diff --git a/.github/workflows/tests_01.yml b/.github/workflows/tests_01.yml new file mode 100644 index 00000000..3951257b --- /dev/null +++ b/.github/workflows/tests_01.yml @@ -0,0 +1,38 @@ +name: "Python bindings tests 01" + +on: + pull_request: + paths-ignore: + - 'docs/**' + workflow_dispatch: + +jobs: + tests-01: + + runs-on: ubuntu-latest + strategy: + matrix: + python: [3.12] + env: + BIGML_USERNAME: ${{ secrets.BIGML_USERNAME }} + BIGML_API_KEY: ${{ secrets.BIGML_API_KEY }} + BIGML_ORGANIZATION: ${{ secrets.BIGML_ORGANIZATION }} + BIGML_EXTERNAL_CONN_HOST: ${{ secrets.BIGML_EXTERNAL_CONN_HOST }} + BIGML_EXTERNAL_CONN_PORT: ${{ secrets.BIGML_EXTERNAL_CONN_PORT }} + BIGML_EXTERNAL_CONN_DB: ${{ secrets.BIGML_EXTERNAL_CONN_DB }} + BIGML_EXTERNAL_CONN_USER: ${{ secrets.BIGML_EXTERNAL_CONN_USER }} + BIGML_EXTERNAL_CONN_PWD: ${{ secrets.BIGML_EXTERNAL_CONN_PWD }} + BIGML_DELTA: ${{ vars.BIGML_DELTA }} + + steps: + - name: Install packages + uses: actions/checkout@v3 + - run: | + pip install .[topics] + + - name: Run tests *01 04 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 41 45 99 38 99* + run: | + pip3 install pytest + export TESTS=$(for t in "01" "04" "06" "07" "08" "09" "10" "11" "12" "13" "14" "15" "16" "17" "18" "19" "20" "21" "41" "38" "99"; do ls bigml/tests/*$t*.py;done|paste -sd " ") + echo $TESTS + pytest -s $TESTS diff --git a/.github/workflows/tests_05.yml b/.github/workflows/tests_05.yml new file mode 100644 index 00000000..ed1cac5f --- /dev/null +++ b/.github/workflows/tests_05.yml @@ -0,0 +1,37 @@ +name: "Python bindings tests 05" + +on: + pull_request: + paths-ignore: + - 'docs/**' + workflow_dispatch: + +jobs: + tests-05: + + runs-on: ubuntu-latest + strategy: + matrix: + python: [3.12] + env: + BIGML_USERNAME: ${{ secrets.BIGML_USERNAME }} + BIGML_API_KEY: ${{ secrets.BIGML_API_KEY }} + BIGML_ORGANIZATION: ${{ secrets.BIGML_ORGANIZATION }} + BIGML_EXTERNAL_CONN_HOST: ${{ secrets.BIGML_EXTERNAL_CONN_HOST }} + BIGML_EXTERNAL_CONN_PORT: ${{ secrets.BIGML_EXTERNAL_CONN_PORT }} + BIGML_EXTERNAL_CONN_DB: ${{ secrets.BIGML_EXTERNAL_CONN_DB }} + BIGML_EXTERNAL_CONN_USER: ${{ secrets.BIGML_EXTERNAL_CONN_USER }} + BIGML_EXTERNAL_CONN_PWD: ${{ secrets.BIGML_EXTERNAL_CONN_PWD }} + BIGML_DELTA: ${{ vars.BIGML_DELTA }} + steps: + - name: Install packages + uses: actions/checkout@v3 + - run: | + pip install .[topics] + + - name: Run tests *01 05 40 45 99* + run: | + pip3 install pytest + export TESTS=$(for t in "05" "40" "45" "99"; do ls bigml/tests/*$t*.py;done|paste -sd " ") + echo $TESTS + pytest -s $TESTS diff --git a/.github/workflows/tests_22.yml b/.github/workflows/tests_22.yml new file mode 100644 index 00000000..46784de2 --- /dev/null +++ b/.github/workflows/tests_22.yml @@ -0,0 +1,38 @@ +name: "Python bindings tests 22" + +on: + pull_request: + paths-ignore: + - 'docs/**' + workflow_dispatch: + +jobs: + tests-22: + + runs-on: ubuntu-latest + strategy: + matrix: + python: [3.12] + env: + BIGML_USERNAME: ${{ secrets.BIGML_USERNAME }} + BIGML_API_KEY: ${{ secrets.BIGML_API_KEY }} + BIGML_ORGANIZATION: ${{ secrets.BIGML_ORGANIZATION }} + BIGML_EXTERNAL_CONN_HOST: ${{ secrets.BIGML_EXTERNAL_CONN_HOST }} + BIGML_EXTERNAL_CONN_PORT: ${{ secrets.BIGML_EXTERNAL_CONN_PORT }} + BIGML_EXTERNAL_CONN_DB: ${{ secrets.BIGML_EXTERNAL_CONN_DB }} + BIGML_EXTERNAL_CONN_USER: ${{ secrets.BIGML_EXTERNAL_CONN_USER }} + BIGML_EXTERNAL_CONN_PWD: ${{ secrets.BIGML_EXTERNAL_CONN_PWD }} + BIGML_DELTA: ${{ vars.BIGML_DELTA }} + + steps: + - name: Install packages + uses: actions/checkout@v3 + - run: | + pip install .[topics] + + - name: Run tests *22 24 25 26 27 28 29 30 31 32 34 39 43 42 44 99* + run: | + pip3 install pytest + export TESTS=$(for t in "22" "24" "25" "26" "27" "28" "29" "30" "31" "32" "34" "39" "43" "42" "44" "99"; do ls bigml/tests/*$t*.py;done|paste -sd " ") + echo $TESTS + pytest -s $TESTS diff --git a/.github/workflows/tests_23.yml b/.github/workflows/tests_23.yml new file mode 100644 index 00000000..892a73d6 --- /dev/null +++ b/.github/workflows/tests_23.yml @@ -0,0 +1,38 @@ +name: "Python bindings tests 23" + +on: + pull_request: + paths-ignore: + - 'docs/**' + workflow_dispatch: + +jobs: + tests-23: + + runs-on: ubuntu-latest + strategy: + matrix: + python: [3.12] + env: + BIGML_USERNAME: ${{ secrets.BIGML_USERNAME }} + BIGML_API_KEY: ${{ secrets.BIGML_API_KEY }} + BIGML_ORGANIZATION: ${{ secrets.BIGML_ORGANIZATION }} + BIGML_EXTERNAL_CONN_HOST: ${{ secrets.BIGML_EXTERNAL_CONN_HOST }} + BIGML_EXTERNAL_CONN_PORT: ${{ secrets.BIGML_EXTERNAL_CONN_PORT }} + BIGML_EXTERNAL_CONN_DB: ${{ secrets.BIGML_EXTERNAL_CONN_DB }} + BIGML_EXTERNAL_CONN_USER: ${{ secrets.BIGML_EXTERNAL_CONN_USER }} + BIGML_EXTERNAL_CONN_PWD: ${{ secrets.BIGML_EXTERNAL_CONN_PWD }} + BIGML_DELTA: ${{ vars.BIGML_DELTA }} + + steps: + - name: Install packages + uses: actions/checkout@v3 + - run: | + pip install .[full] + + - name: Run tests *23 03 37 35 47 48 49 99* + run: | + pip3 install pytest + export TESTS=$(for t in "23" "03" "37" "35" "47" "48" "49" "99"; do ls bigml/tests/*$t*.py;done|paste -sd " ") + echo $TESTS + pytest -s $TESTS diff --git a/.github/workflows/tests_36.yml b/.github/workflows/tests_36.yml new file mode 100644 index 00000000..a766fa97 --- /dev/null +++ b/.github/workflows/tests_36.yml @@ -0,0 +1,38 @@ +name: "Python bindings tests 36" + +on: + pull_request: + paths-ignore: + - 'docs/**' + workflow_dispatch: + +jobs: + tests-36: + + runs-on: ubuntu-latest + strategy: + matrix: + python: [3.12] + env: + BIGML_USERNAME: ${{ secrets.BIGML_USERNAME }} + BIGML_API_KEY: ${{ secrets.BIGML_API_KEY }} + BIGML_ORGANIZATION: ${{ secrets.BIGML_ORGANIZATION }} + BIGML_EXTERNAL_CONN_HOST: ${{ secrets.BIGML_EXTERNAL_CONN_HOST }} + BIGML_EXTERNAL_CONN_PORT: ${{ secrets.BIGML_EXTERNAL_CONN_PORT }} + BIGML_EXTERNAL_CONN_DB: ${{ secrets.BIGML_EXTERNAL_CONN_DB }} + BIGML_EXTERNAL_CONN_USER: ${{ secrets.BIGML_EXTERNAL_CONN_USER }} + BIGML_EXTERNAL_CONN_PWD: ${{ secrets.BIGML_EXTERNAL_CONN_PWD }} + BIGML_DELTA: ${{ vars.BIGML_DELTA }} + + steps: + - name: Install packages + uses: actions/checkout@v3 + - run: | + pip install .[full] + + - name: Run tests *36 33 99* + run: | + pip3 install pytest + export TESTS=$(for t in "36" "33" "99"; do ls bigml/tests/*$t*.py;done|paste -sd " ") + echo $TESTS + pytest -s $TESTS diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..d74e663d --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,22 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the version of Python and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.12" + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/conf.py + +# We recommend specifying your dependencies to enable reproducible builds: +# https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +python: + install: + - requirements: docs/requirements.txt diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index ee3d447f..00000000 --- a/.travis.yml +++ /dev/null @@ -1,20 +0,0 @@ -language: python -python: - - "2.7" - - "3.6" -env: - - BML_TESTS="05" - - BML_TESTS="35 33" - - BML_TESTS="01 02 03 04 06 07 08 09 10 11 12" - - BML_TESTS="36" - - BML_TESTS="13 14 15 16 17 18 19 20 21 22 24 25 26 27 28 29 30 31 32 34 39" - - BML_TESTS="23 37 38 40 41 42 43 44" -install: - - pip install numpy - - pip install scipy - - pip install pystemmer - - pip install . -script: - - export TESTS=$(for t in $BML_TESTS; do ls bigml/tests/*$t*;done|paste -sd ",") - - echo $TESTS - - python setup.py nosetests --nocapture --tests=$TESTS diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 2808229b..7c3d33bd 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -1,17 +1,19 @@ Thank you to all who have contributed to this project! If you contributed and are not listed below please let us know. -Adam Ashenfelter ("ashenfad") -Ben Letchford ("benletchford") -Chee Sing Lee ("cheesinglee") -Francisco J Martin ("aficionado") -jao ("jaor") +Marc Abramowitz ("msabramo") Javier Alperte ("xalperte") +Adam Ashenfelter ("ashenfad") +Kamal Galrani("KamalGalrani") Krishan Gupta ("krishangupta") Leon Hwang ("leonhwang") -Marc Abramowitz ("msabramo") +jao ("jaor") +Sean Kelly ("skelliest") +Chee Sing Lee ("cheesinglee") +Ben Letchford ("benletchford") +Francisco J Martin ("aficionado") Mercè Martín Prats ("mmerce") -Nick Wilson ("njwilson") -Oscar Rovira ("osroca") +Charles Parker ("charlesparker") Poul Petersen ("petersen-poul") -Kamal Galrani("KamalGalrani") +Oscar Rovira ("osroca") +Nick Wilson ("njwilson") diff --git a/HISTORY.rst b/HISTORY.rst index 47f7b18b..6c85c8cd 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,477 @@ History ------- +9.8.3 (2025-03-27) +------------------ + +- Fixing annotations update for regions as lists. + +9.8.2 (2025-03-21) +------------------ + +- Retrying annotations update to avoid temporary concurrency issues in + source composites updates. + +9.8.1 (2025-01-14) +------------------ + +- Fixing annotations update in images composite sources. + +9.8.0 (2024-10-02) +------------------ + +- Fixing the get_leaves function for local decision trees. +- Fixing setup issues in Python3.12 +- Changing documentation templates. + +9.8.0.dev1 (2024-02-28) +----------------------- + +- Documenting and removing partially the need for Node.js in Pipelines. + +9.8.0.dev (2024-02-19) +---------------------- + +- Upgrading libraries to avoid failures in Apple M1 machines. +- Fixing local predictions input data preprocessing for missings. + +9.7.1 (2023-12-08) +------------------ + +- Fixing readthedocs configuration. + +9.7.0 (2023-12-06) +------------------ + +- Changing query string separator in internall API calls. + +9.6.2 (2023-08-02) +------------------ + +- Extending cloning to all available models and WhizzML scripts. +- Fixing shared resources cloning. + +9.6.1 (2023-08-01) +------------------ + +- Adding shared resources cloning. + +9.6.0 (2023-07-20) +------------------ + +- Adding ShapWrapper to enable local Shap values computation with the Shap + library. +- Adding Evaluation object. +- Improving Field class to allow field values encoding as numpy arrays. + +9.5.0 (2023-06-16) +------------------ + +- Extending Local Fusions output to include confidence. + +9.4.0 (2023-06-14) +------------------ + +- Extending LocalModel class to handle Time Series locally. + +9.3.0 (2023-06-09) +------------------ + +- Adding a LocalModel class to handle any type of BigML model locally. + +9.2.0 (2023-04-11) +------------------ + +- Extending all delete methods to allow additional query strings. + +9.1.4 (2023-02-10) +------------------ + +- Providing local deepnet predictions default for Windows OS that cannot + handle images predictions. + +9.1.3 (2022-12-22) +------------------ + +- Changing user's status endpoint retrieval to a lightweight version. + +9.1.2 (2022-11-26) +------------------ + +- Removing all nose dependencies in tests. + +9.1.1 (2022-11-18) +------------------ + +- Removing traces and refactoring Flatline interpreter invocation. +- Migrating tests to pytest. + +9.1.0 (2022-11-09) +------------------ + +- Enabling local models to generate the transformations pipeline used in + training to be able to apply it locally to input data for predictions. +- Refactoring local models to bring the feature extraction capabilities to + the transformations pipeline. +- Rounding local numeric predictions and scores to 5 digits. +- Improving Pipelines and Image Processing documentation. + +9.0.1 (2022-11-02) +------------------ + +- Fixing local batch_prediction method on DataFrames. +- Improving the training examples method in the Fields class to produce + examples of categorical field values following their original distribution. + +9.0.0 (2022-10-12) +------------------ + +- Renaming Pipeline class to BMLPipeline and refactoring to allow scikit-learn + and Pandas pipelines to be also part of the Pipeline transformations. +- Adding several DataTransformers (BMLDataTransformer, SKDataTransformer and + DFDataTransformer) as wrappers for transformation generators. + +8.2.3 (2022-10-07) +------------------ + +- Fixing dump methods for all local resources. +- Refactoring output options in batch_predict methods. +- Adding name and description to local resources. + +8.2.2 (2022-09-29) +------------------ + +- Fixing locale check. +- Documenting the new ``.batch_predict`` method added to local models to + homogenize local batch predictions and accept Pandas' DataFrame as input. + +8.2.1 (2022-09-23) +------------------ + +- Fixing update annotations function when creating annotated images sources. + +8.2.0 (2022-09-22) +------------------ + +- Adding new option to api.ok to report progress via a function callback. + +8.1.1 (2022-09-21) +------------------ + +- Fixing bug in api.ok: response to failing API calls. + +8.1.0 (2022-09-16) +------------------ + +- Upgrading dependencies. + +8.0.0 (2022-09-13) +------------------ + +- Adding new local Pipeline object to encapsulate and run prediction + workflows. +- Changing ``api.ok`` response to return as False when retrieving faulty + resources. Previously, only problems retrieving the API response cause + ``api.ok`` returning False. +- Adding FaultyResource exception to inform about that type of failure when + using the ``raise_on_error`` flag in ``api.ok``. +- Fixing bug in local LogisticRegression when predicting with operating points. + +7.7.0 (2022-08-30) +~~~~~~~~~~~~~~~~~~ + +- Adding local Dataset class to handle Flatline transformations. + +7.6.0 (2022-08-18) +~~~~~~~~~~~~~~~~~~ + +- Adding feature expansion to image fields in local predictors. + +7.5.1 (2022-08-08) +~~~~~~~~~~~~~~~~~~ + +- Adding utility to reproduce the backend images preprocess in local + deepnets. + +7.5.0 (2022-07-09) +~~~~~~~~~~~~~~~~~~ + +- Adding the new capabilities the last API release: object detection. + +7.4.2 (2022-05-26) +~~~~~~~~~~~~~~~~~~ + +- Improved webhooks hashing check. +- Adapting connection to allow API version setting, needed for compatibility + with the Predict Server syntax. + +7.4.1 (2022-05-16) +~~~~~~~~~~~~~~~~~~ + +- Adding support for webhooks hashing check. + +7.4.0 (2022-05-13) +~~~~~~~~~~~~~~~~~~ + +- Adding extras to setup, to allow a basic installation and extra dependencies + handling for topics and images. + +7.3.0 (2022-04-22) +~~~~~~~~~~~~~~~~~~ + +- Improving local ensembles to be generated from list of ensemble plus + models info. +- Allowing BigML connection class to retrieve models from storage without + any credentials. + +7.2.2 (2022-04-20) +~~~~~~~~~~~~~~~~~~ + +- Enabling dump from SupervisedModel class. + + +7.2.1 (2022-04-19) +~~~~~~~~~~~~~~~~~~ + +- Fixing windows setup. + +7.2.0 (2022-04-14) +~~~~~~~~~~~~~~~~~~ + +- Upgrading sensenet library and giving fallback for local deepnet predictions + on OS not supported by tensorflow. +- Adding support for pandas' DataFrame when creating sources. + +7.1.2 (2022-03-17) +~~~~~~~~~~~~~~~~~~ + +- Fixing local deepnet predictions if no missing_count info is found. + +7.1.1 (2022-03-11) +~~~~~~~~~~~~~~~~~~ + +- Improving local predictions for shared fusions and documenting. + +7.1.0 (2022-03-10) +~~~~~~~~~~~~~~~~~~ + +- Adding the ability to predict locally from shared composed resources, like + Ensembles and Fusions. + +7.0.1 (2022-03-04) +~~~~~~~~~~~~~~~~~~ + +- Fixing local Supervised Model to accept stored models as input. +- Improving tests to minimize resource creation. +- Updating dependencies versions. + +7.0.0 (2021-08-03) +~~~~~~~~~~~~~~~~~~ + +- Adding composite sources to the API REST interface methods. +- Adding predictions for images. + +6.3.2 (2021-12-02) +------------------ + +- Fixing local deepnet prediction method to fit new deepnet structure. + +6.3.1 (2021-11-03) +------------------ + +- Improving local ensembles to handle faulty models with no root. + +6.3.0 (2021-10-21) +------------------ + +- Adding activators to local deepnets prediction functions. + +6.2.2 (2021-10-11) +------------------ + +- Fixing underlying dependency used in datetime fields. + +6.2.1 (2021-08-10) +------------------ + +- Fixing predictions for local models with text fields. + +6.2.0 (2021-07-22) +------------------ + +- Minimizing local model information when text or items fields are used. + +6.1.2 (2021-06-16) +------------------ + +- Changing tests mechanism to github actions. + +6.1.1 (2021-06-11) +~~~~~~~~~~~~~~~~~~ + +- Fixing api `get` method. The query string argument was lost. +- Improving api `get`, `create` and `update` methods to avoid unneeded + iterations. +- Improving docs. + +6.1.0 (2021-03-06) +~~~~~~~~~~~~~~~~~~ + +- Fixing local anomalies when normalize_repeats has been used. + +6.0.3 (2021-02-06) +~~~~~~~~~~~~~~~~~~ + +- Improving local models with default numeric values to automatically fill in + missings in input data. +- Raising exceptions if trying to update a failed resource. + +6.0.2 (2020-12-21) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug in local anomaly filter. + +6.0.1 (2020-11-11) +~~~~~~~~~~~~~~~~~~ + +- Changing local anomaly filter. +- Fixing update and download methods to wait for the resource to be finished. + +6.0.0 (2020-09-02) +~~~~~~~~~~~~~~~~~~ + +- New compact format for local models. +- Adding cache managing capacities for all local predictors. + +5.2.0 (2020-08-19) +~~~~~~~~~~~~~~~~~~ + +- Adding syntactic sugar methods to the BigML class to help managing resources. + +5.1.1 (2020-08-11) +~~~~~~~~~~~~~~~~~~ + +- Fixing module directory inclusion and improving docs on local anomalies. + +5.1.0 (2020-08-07) +~~~~~~~~~~~~~~~~~~ + +- Refactoring local anomaly to reduce memory requirements. + + +5.0.1 (2020-08-05) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug in get_tasks_status to get information about transient net + errors. + +5.0.0 (2020-07-31) +~~~~~~~~~~~~~~~~~~ + +- Deprecating support for Python 2.7.X versions. Only Python 3 supported + from this version on. + +4.32.3 (2020-07-15) +~~~~~~~~~~~~~~~~~~~ + +- Extending the Fields class to check the attributes that can be updated in + a source or dataset fields structure to avoid failing fields updates. + +4.32.2 (2020-06-15) +~~~~~~~~~~~~~~~~~~~ + +- Fixing local anomaly scores for new anomaly detectors with feedback and + setting the maximum input data precision to five digits. + +4.32.1 (2020-06-10) +~~~~~~~~~~~~~~~~~~~ + +- Fixing local anomaly scores prediction for corner cases of samples with + one row. + +4.32.0 (2020-05-19) +~~~~~~~~~~~~~~~~~~~ + +- Allowing scripts to be created from gists using the create_script method. +- Improving training examples generation in Fields class. + +4.31.2 (2020-05-14) +~~~~~~~~~~~~~~~~~~~ + +- Fixing problems creating ephemeral prediction resources. + +4.31.1 (2020-05-06) +~~~~~~~~~~~~~~~~~~~ + +- Improving the api.ok method to add an estimated wait time. +- Improving docs and adding TOC for new structure. + +4.31.0 (2020-04-22) +~~~~~~~~~~~~~~~~~~~ + +- Adding REST methods to manage external data connections. + +4.30.2 (2020-04-20) +~~~~~~~~~~~~~~~~~~~ + +- Fixing local anomaly scores for datasets with significant amounts of missings. +- Fixing input data modification for local predictions when fields are not + used in the models. + +4.30.1 (2020-04-16) +~~~~~~~~~~~~~~~~~~~ + +- Fixing tasks status info for organizations. + +4.30.0 (2020-04-10) +~~~~~~~~~~~~~~~~~~~ + +- Allowing the BigML class to retrieve any resource from local storage and + extract its fields. + +4.29.2 (2020-03-20) +~~~~~~~~~~~~~~~~~~~ + +- Improving exception handling when retrieving resources. + +4.29.1 (2020-03-03) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug when disabling SSL verification in predictions only. + +4.29.0 (2020-02-29) +~~~~~~~~~~~~~~~~~~~ + +- Improving api.ok method to allow retries to avoid transient HTTP failures. +- Deprecating the `retries` argument in api.ok. +- Fixing local predictions confidence for weighted models. + +4.28.1 (2020-02-04) +~~~~~~~~~~~~~~~~~~~ + +- Changing api.ok method to avoid raising exceptions when retrieving a + faulty resource. +- Adding call stack info to local Execution class. +- Fixing docs builder. + +4.28.0 (2020-01-23) +~~~~~~~~~~~~~~~~~~~ + +- Adding Execution local utility to extract the outputs and results from an + execution. + +4.27.3 (2020-01-15) +~~~~~~~~~~~~~~~~~~~ + +- Fixing local Fusion class to allow using linear regressions. + +4.27.2 (2020-01-03) +~~~~~~~~~~~~~~~~~~~ + +- Fixing warning message and template files in generated code for hadoop + actionable models. +- Fixing local ensembles that asked for credentials before needing them. + 4.27.1 (2019-12-19) ~~~~~~~~~~~~~~~~~~~ diff --git a/MANIFEST.in b/MANIFEST.in index c2aa50b9..4f3fd0ba 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,7 +3,6 @@ include HISTORY.rst include README.rst recursive-include data * recursive-include docs * -recursive-include tests * -recursive-exclude tests *.pyc -recursive-exclude tests *.pyo +recursive-include bigml/tests * +recursive-exclude bigml/tests *.pyc prune docs/_build diff --git a/README.rst b/README.rst index 0a9fe9c2..89da1cf6 100644 --- a/README.rst +++ b/README.rst @@ -32,49 +32,85 @@ in our `Campfire chatroom `_. Requirements ------------ -Python 2.7 and Python 3 are currently supported by these bindings. +Only ``Python 3`` versions are currently supported by these bindings. +Support for Python 2.7.X ended in version ``4.32.3``. The basic third-party dependencies are the `requests `_, -`poster `_, -`unidecode `_ and -`requests-toolbelt `_ -`bigml-chronos `_ -libraries. These -libraries are automatically installed during the setup. Support for Google -App Engine has been added as of version 3.0.0, using the `urlfetch` package -instead of `requests`. +`unidecode `_, +`requests-toolbelt `_, +`bigml-chronos `_, +`msgpack `_, +`numpy `_ and +`scipy `_ libraries. These +libraries are automatically installed during the basic setup. +Support for Google App Engine has been added as of version 3.0.0, +using the `urlfetch` package instead of `requests`. The bindings will also use ``simplejson`` if you happen to have it installed, but that is optional: we fall back to Python's built-in JSON libraries is ``simplejson`` is not found. -Additional `numpy `_ and -`scipy `_ libraries are needed in case you want to use -local predictions for regression models (including the error information) -using proportional missing strategy. As these are quite heavy libraries and -they are not heavily used in these bindings, -they are not included in the automatic installation -dependencies. The test suite includes some tests that will need these -libraries to be installed. - -Also in order to use local `Topic Model` predictions, you will need to install -`pystemmer `_. Using the `pip install` -command for this library can produce an error if your system lacks the -correct developer tools to compile it. In Windows, the error message -will include a link pointing to the needed Visual Studio version and in -OSX you'll need to install the Xcode developer tools. +The bindings provide support to use the ``BigML`` platform to create, update, +get and delete resources, but also to produce local predictions using the +models created in ``BigML``. Most of them will be actionable with the basic +installation, but some additional dependencies are needed to use local +``Topic Models`` and Image Processing models. Please, refer to the +`Installation <#installation>`_ section for details. + +OS Requirements +~~~~~~~~~~~~~~~ + +The basic installation of the bindings is compatible and can be used +on Linux and Windows based Operating Systems. +However, the extra options that allow working with +image processing models (``[images]`` and ``[full]``) are only supported +and tested on Linux-based Operating Systems. +For image models, Windows OS is not recommended and cannot be supported out of +the box, because the specific compiler versions or dlls required are +unavailable in general. Installation ------------ -To install the latest stable release with -`pip `_ +To install the basic latest stable release with +`pip `_, please use: .. code-block:: bash $ pip install bigml +Support for local Topic Distributions (Topic Models' predictions) +and local predictions for datasets that include Images will only be +available as extras, because the libraries used for that are not +usually available in all Operative Systems. If you need to support those, +please check the `Installation Extras <#installation-extras>`_ section. + +Installation Extras +------------------- + +Local Topic Distributions support can be installed using: + +.. code-block:: bash + + pip install bigml[topics] + +Images local predictions support can be installed using: + +.. code-block:: bash + + pip install bigml[images] + +The full set of features can be installed using: + +.. code-block:: bash + + pip install bigml[full] + + +WARNING: Mind that installing these extras can require some extra work, as +explained in the `Requirements <#requirements>`_ section. + You can also install the development version of the bindings directly from the Git repository @@ -82,22 +118,39 @@ from the Git repository $ pip install -e git://github.com/bigmlcom/python.git#egg=bigml_python + Running the Tests ----------------- -The test will be run using `nose `_ , -that is installed on setup, and you'll need to set up your authentication +The tests will be run using `pytest `_. +You'll need to set up your authentication via environment variables, as explained -below. With that in place, you can run the test suite simply by issuing +in the authentication section. Also some of the tests need other environment +variables like ``BIGML_ORGANIZATION`` to test calls when used by Organization +members and ``BIGML_EXTERNAL_CONN_HOST``, ``BIGML_EXTERNAL_CONN_PORT``, +``BIGML_EXTERNAL_CONN_DB``, ``BIGML_EXTERNAL_CONN_USER``, +``BIGML_EXTERNAL_CONN_PWD`` and ``BIGML_EXTERNAL_CONN_SOURCE`` +in order to test external data connectors. + +With that in place, you can run the test suite simply by issuing .. code-block:: bash - $ python setup.py nosetests + $ pytest + +Additionally, `Tox `_ can be used to +automatically run the test suite in virtual environments for all +supported Python versions. To install Tox: + +.. code-block:: bash + + $ pip install tox + +Then run the tests from the top-level project directory: + +.. code-block:: bash -Some tests need the `numpy `_ and -`scipy `_ libraries to be installed too. They are not -automatically installed as a dependency, as they are quite heavy and very -seldom used. + $ tox Importing the module -------------------- diff --git a/bigml/__init__.py b/bigml/__init__.py index 69e3be50..818decaa 100644 --- a/bigml/__init__.py +++ b/bigml/__init__.py @@ -1 +1,2 @@ +from bigml.version import __version__ __path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/bigml/anomaly.py b/bigml/anomaly.py index 9bcc66f5..4a345724 100644 --- a/bigml/anomaly.py +++ b/bigml/anomaly.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2014-2019 BigML +# Copyright 2020-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,109 +14,266 @@ # License for the specific language governing permissions and limitations # under the License. -"""A local Predictive Anomaly Detector. - -This module defines an Anomaly Detector to score anomlies in a dataset locally +""" +A fast building local Predictive Anomaly Detector. +This module defines an Anomaly Detector to score anomalies in a dataset locally or embedded into your application without needing to send requests to BigML.io. - -This module cannot only save you a few credits, but also enormously -reduce the latency for each prediction and let you use your models -offline. - +The module is also designed for situations when it is desirable to be able to +build the anomaly detector very quickly from an external representation. +It also offers the ability to load its contents from a cache system like +Redis or memcache. The `get` method of the cache system has to be passed +in the `cache_get` argument and the hash for the storage should be the +corresponding anomaly ID. Example usage (assuming that you have previously set up the BIGML_USERNAME and BIGML_API_KEY environment variables and that you own the model/id below): - from bigml.api import BigML -from bigml.anomaly import Anomaly - -api = BigML() - +from anomaly import Anomaly +import redis +r = redis.Redis() +# First build as you would any core Anomaly object: anomaly = Anomaly('anomaly/5126965515526876630001b2') +# Store a serialized version in Redis +anomaly.dump(cache_set=r.set) +# (retrieve the external rep from its convenient place) +# Speedy Build from external rep +anomaly = Anomaly('anomaly/5126965515526876630001b2', cache_get=r.get) +# Get scores same as always: anomaly.anomaly_score({"src_bytes": 350}) - """ -import logging + + import math -import json +from bigml.predicate_utils.utils import OPERATOR_CODE, PREDICATE_INFO_LENGTH +from bigml.predicate_utils.utils import apply_predicates from bigml.api import FINISHED -from bigml.api import get_status, get_api_connection -from bigml.util import cast +from bigml.api import get_status, get_api_connection, get_anomaly_id from bigml.basemodel import get_resource_dict -from bigml.modelfields import ModelFields -from bigml.anomalytree import AnomalyTree - +from bigml.modelfields import ModelFields, NUMERIC +from bigml.util import cast, use_cache, load, get_data_format, \ + get_formatted_data, format_data, get_data_transformations +from bigml.constants import OUT_NEW_HEADERS, INTERNAL, DECIMALS -LOGGER = logging.getLogger('BigML') DEPTH_FACTOR = 0.5772156649 +PREDICATES_OFFSET = 3 +DFT_OUTPUTS = ["score"] + +#pylint: disable=locally-disabled,invalid-name +def get_repeat_depth(population): + """Computes the correction to depth used to normalize repeats + + """ + repeat_depth = 0 + if population > 1: + h = DEPTH_FACTOR + math.log(population - 1) + repeat_depth = max([1.0, + 2 * (h - (float(population - 1) / population))]) + return repeat_depth -class Anomaly(ModelFields): - """ A lightweight wrapper around an anomaly detector. - Uses a BigML remote anomaly detector model to build a local version that - can be used to generate anomaly scores locally. +def build_tree(node, add_population=False): + """Builds a compressed version of the tree structure as an list of + lists. Starting from the root node, each node + is represented by a list whose elements are: + [weight, len(predicates), operator_code, field, value, term, missing, + ..., len(children), children_nodes_list*] + When the normalize_repeats flag is set to True, we need to add the + population of the node: [weight, population, len(predicates), ...] """ + outer = [] + outer.append(node.get('weight', 1)) + if add_population: + outer.append(get_repeat_depth(node.get("population", 0))) + build_predicates(node, outer) + children = node.get("children", []) + outer.append(len(children)) + + if not children: + return outer + + for child in children: + outer.append(build_tree(child, add_population=add_population)) + + return outer + + +def build_predicates(node, encoded_node): + """Build the minified version of the predicate in a node""" + predicates = node.get('predicates') + if predicates and not (predicates is True or predicates == [True]): + predicates = [x for x in predicates if x is not True] + encoded_node.append(len(predicates)) + for pred in predicates: + operation = pred.get('op') + value = pred.get('value') + missing = False + if operation.endswith("*"): + operation = operation[0: -1] + missing = True + elif operation == 'in' and None in value: + missing = True + + encoded_node.append(OPERATOR_CODE.get(operation)) + encoded_node.append(pred.get('field')) + encoded_node.append(value) + encoded_node.append(pred.get('term')) + encoded_node.append(missing) + else: + encoded_node.append(0) # no predicates + + return encoded_node + + +def calculate_depth(node, input_data, fields, depth=0, + normalize_repeats=False): + """Computes the depth in the tree for the input data + + """ + + weight = node[0] + shift = 0 + repeat_depth = 0 + if normalize_repeats: + shift = 1 + repeat_depth = node[1] + + num_predicates = node[1 + shift] + num_children = node[2 + shift + (5 * num_predicates)] + + predicates_ok = 0 + + if num_predicates > 0: + predicates_ok = apply_predicates(node, input_data, fields, + normalize_repeats=normalize_repeats) + + + # some of the predicates where met and depth > 1 in a leaf + if num_predicates > 0 and 0 < predicates_ok < num_predicates and \ + depth > 1 and num_children == 0: + return depth + repeat_depth + - def __init__(self, anomaly, api=None): + if num_predicates > 0 and predicates_ok != num_predicates: + return depth + + depth += weight + + if num_children > 0: + start = PREDICATES_OFFSET + (PREDICATE_INFO_LENGTH * num_predicates) \ + + shift + end = PREDICATES_OFFSET + num_children + ( \ + PREDICATE_INFO_LENGTH * num_predicates) + shift + children = node[slice(start, end)] + for child in children: + num_predicates = child[1 + shift] + predicates_ok = apply_predicates( \ + child, input_data, fields, + normalize_repeats=normalize_repeats) + if predicates_ok == num_predicates: + return calculate_depth(child, input_data, fields, depth, + normalize_repeats=normalize_repeats) + else: + depth += repeat_depth + + return depth + + +class Anomaly(ModelFields): + """ A minimal anomaly detector designed to build quickly from a + specialized external representation. See file documentation, above, + for usage. + + """ + + def __init__(self, anomaly, api=None, cache_get=None): + + if use_cache(cache_get): + # using a cache to store the Minomaly attributes + self.__dict__ = load(get_anomaly_id(anomaly), cache_get) + return self.resource_id = None + self.name = None + self.description = None + self.parent_id = None self.sample_size = None self.input_fields = None + self.default_numeric_value = None self.mean_depth = None self.expected_mean_depth = None + self.normalize_repeats = None self.iforest = None - self.top_anomalies = None self.id_fields = [] - self.api = get_api_connection(api) - self.resource_id, anomaly = get_resource_dict( \ - anomaly, "anomaly", api=self.api) + api = get_api_connection(api) + self.resource_id, anomaly = get_resource_dict( + anomaly, "anomaly", api=api) if 'object' in anomaly and isinstance(anomaly['object'], dict): anomaly = anomaly['object'] + try: + self.parent_id = anomaly.get('dataset') + self.name = anomaly.get("name") + self.description = anomaly.get("description") self.sample_size = anomaly.get('sample_size') self.input_fields = anomaly.get('input_fields') + self.default_numeric_value = anomaly.get('default_numeric_value') + self.normalize_repeats = anomaly.get('normalize_repeats', False) self.id_fields = anomaly.get('id_fields', []) + except AttributeError: + raise ValueError("Failed to find the expected " + "JSON structure. Check your arguments.") + if 'model' in anomaly and isinstance(anomaly['model'], dict): - ModelFields.__init__( \ - self, anomaly['model'].get('fields'), \ + ModelFields.__init__( + self, anomaly['model'].get('fields'), missing_tokens=anomaly['model'].get('missing_tokens')) - if ('top_anomalies' in anomaly['model'] and - isinstance(anomaly['model']['top_anomalies'], list)): - self.mean_depth = anomaly['model'].get('mean_depth') - status = get_status(anomaly) - if 'code' in status and status['code'] == FINISHED: - self.expected_mean_depth = None - if self.mean_depth is None or self.sample_size is None: - raise Exception("The anomaly data is not complete. " - "Score will" - " not be available") - else: - default_depth = ( - 2 * (DEPTH_FACTOR + \ - math.log(self.sample_size - 1) - \ - (float(self.sample_size - 1) / self.sample_size))) - self.expected_mean_depth = min(self.mean_depth, - default_depth) - iforest = anomaly['model'].get('trees', []) - if iforest: - self.iforest = [ - AnomalyTree(anomaly_tree['root'], self.fields) - for anomaly_tree in iforest] - self.top_anomalies = anomaly['model']['top_anomalies'] - else: - raise Exception("The anomaly isn't finished yet") + self.mean_depth = anomaly['model'].get('mean_depth') + self.normalization_factor = anomaly['model'].get( + 'normalization_factor') + self.nodes_mean_depth = anomaly['model'].get( + 'nodes_mean_depth') + status = get_status(anomaly) + if 'code' in status and status['code'] == FINISHED: + self.expected_mean_depth = None + if self.mean_depth is None or self.sample_size is None: + raise Exception("The anomaly data is not complete. " + "Score will not be available") + self.norm = self.normalization_factor if \ + self.normalization_factor is not None else \ + self.norm_factor() + iforest = anomaly['model'].get('trees', []) + self.iforest = [] + if iforest: + self.iforest = [ + build_tree(anomaly_tree['root'], + add_population=self.normalize_repeats) + for anomaly_tree in iforest] + self.top_anomalies = anomaly['model']['top_anomalies'] else: - raise Exception("Cannot create the Anomaly instance. Could not" - " find the 'top_anomalies' key in the" - " resource:\n\n%s" % anomaly['model'].keys()) + raise Exception("The anomaly isn't finished yet") + + def norm_factor(self): + """Computing the normalization factor for simple anomaly detectors""" + if self.mean_depth is not None: + default_depth = self.mean_depth if self.sample_size == 1 else \ + (2 * (DEPTH_FACTOR + math.log(self.sample_size - 1) - + (float(self.sample_size - 1) / self.sample_size))) + return min(self.mean_depth, default_depth) + return None + + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + Avoiding to set it in a Mixin to maintain the current dump function. + """ + return get_data_transformations(self.resource_id, self.parent_id) def anomaly_score(self, input_data): """Returns the anomaly score given by the iforest - To produce an anomaly score, we evaluate each tree in the iforest for its depth result (see the depth method in the AnomalyTree object for details). We find the average of these depths @@ -128,53 +284,107 @@ def anomaly_score(self, input_data): value between 0 and 1. """ - + # corner case with only one record + if self.sample_size == 1 and self.normalization_factor is None: + return 1 # Checks and cleans input_data leaving the fields used in the model - input_data = self.filter_input_data(input_data) - + norm_input_data = self.filter_input_data(input_data) # Strips affixes for numeric values and casts to the final field type - cast(input_data, self.fields) + cast(norm_input_data, self.fields) depth_sum = 0 + if self.iforest is None: raise Exception("We could not find the iforest information to " "compute the anomaly score. Please, rebuild your " "Anomaly object from a complete anomaly detector " "resource.") for tree in self.iforest: - depth_sum += tree.depth(input_data)[0] + tree_depth = calculate_depth( + tree, + norm_input_data, self.fields, + normalize_repeats=self.normalize_repeats) + depth_sum += tree_depth + observed_mean_depth = float(depth_sum) / len(self.iforest) - return math.pow(2, - observed_mean_depth / self.expected_mean_depth) + return round(math.pow(2, - observed_mean_depth / self.norm), + DECIMALS) def anomalies_filter(self, include=True): """Returns the LISP expression needed to filter the subset of top anomalies. When include is set to True, only the top anomalies are selected by the filter. If set to False, only the rest of the dataset is selected. - """ anomaly_filters = [] for anomaly in self.top_anomalies: - filter_rules = [] - row = anomaly.get('row', []) - for index, value in enumerate(row): - field_id = self.input_fields[index] - if field_id in self.id_fields: - continue - if value is None or value is "": - filter_rules.append('(missing? "%s")' % field_id) - else: - if (self.fields[field_id]["optype"] - in ["categorical", "text"]): - value = json.dumps(value) - filter_rules.append('(= (f "%s") %s)' % (field_id, value)) - if filter_rules: - anomaly_filters.append("(and %s)" % " ".join(filter_rules)) + row = anomaly.get('row_number') + if row is not None: + anomaly_filters.append('(= (row-number) %s)' % row) anomalies_filter = " ".join(anomaly_filters) - if include: - if len(anomaly_filters) == 1: + if len(anomaly_filters) == 1: + if include: return anomalies_filter + return "(not %s)" % anomalies_filter + if include: return "(or %s)" % anomalies_filter - else: - return "(not (or %s))" % anomalies_filter + return "(not (or %s))" % anomalies_filter + + def fill_numeric_defaults(self, input_data): + """Checks whether input data is missing a numeric field and + fills it with the average quantity set in default_numeric_value + + """ + + for field_id, field in list(self.fields.items()): + if field_id not in self.id_fields and \ + field['optype'] == NUMERIC and \ + field_id not in input_data and \ + self.default_numeric_value is not None: + default_value = 0 if self.default_numeric_value == "zero" \ + else field['summary'].get(self.default_numeric_value) + input_data[field_id] = default_value + return input_data + + def predict(self, input_data, full=False): + """Method to homogeneize the local models interface for all BigML + models. It returns the anomaly_score method result. If full is set + to True, then the result is returned as a dictionary. + """ + score = self.anomaly_score(input_data) + if full: + return {DFT_OUTPUTS[0]: score} + return score + + def batch_predict(self, input_data_list, outputs=None, **kwargs): + """Creates a batch anomaly score for a list of inputs using the local + anomaly detector. Allows to define some output settings to decide the + name of the header used for the score in the result. To homogeneize + the behaviour of supervised batch_predict method, the outputs argument + accepts a dictionary with keys: "output_fields" and "output_headers". + In this case, output_fields is ignored, as only the score can be + obtained from the anomaly_score method, and only "output_headers" is + considered to allow changing the header associated to that new field. + + :param input_data_list: List of input data to be predicted + :type input_data_list: list or Panda's dataframe + :param dict outputs: properties that define the headers and fields to + be added to the input data + :return: the list of input data plus the predicted values + :rtype: list or Panda's dataframe depending on the input type in + input_data_list + + """ + if outputs is None: + outputs = {} + new_headers = outputs.get(OUT_NEW_HEADERS, DFT_OUTPUTS) + data_format = get_data_format(input_data_list) + inner_data_list = get_formatted_data(input_data_list, INTERNAL) + for input_data in inner_data_list: + prediction = {"score": self.anomaly_score(input_data, **kwargs)} + for index, key in enumerate(DFT_OUTPUTS): + input_data[new_headers[index]] = prediction[key] + if data_format != INTERNAL: + return format_data(inner_data_list, out_format=data_format) + return inner_data_list diff --git a/bigml/anomalytree.py b/bigml/anomalytree.py deleted file mode 100644 index 24e5de1b..00000000 --- a/bigml/anomalytree.py +++ /dev/null @@ -1,85 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2014-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -"""Tree structure for the BigML local Anomaly Detector - -This module defines an auxiliary Tree structure that is used in the local -Anomaly Detector to score anomalies locally or embedded into your application -without needing to send requests to BigML.io. - -""" -from bigml.predicates import Predicates -from bigml.util import sort_fields, utf8 - - -class AnomalyTree(object): - """An anomaly tree-like predictive model. - - """ - - def __init__(self, tree, fields): - - self.fields = fields - - if tree['predicates'] is True: - self.predicates = Predicates([True]) - else: - self.predicates = Predicates(tree['predicates']) - self.id = None - - children = [] - if 'children' in tree: - for child in tree['children']: - children.append(AnomalyTree(child, self.fields)) - self.children = children - - def list_fields(self, out): - """Lists a description of the model's fields. - - """ - - for field in [(val['name'], val['optype']) for _, val in - sort_fields(self.fields)]: - out.write(utf8(u'[%-32s : %s]\n' % (field[0], field[1]))) - out.flush() - return self.fields - - def depth(self, input_data, path=None, depth=0): - """Returns the depth of the node that reaches the input data instance - when ran through the tree, and the associated set of rules. - - If a node has any children whose - predicates are all true given the instance, then the instance will - flow through that child. If the node has no children or no - children with all valid predicates, then it outputs the depth of the - node. - """ - - if path is None: - path = [] - # root node: if predicates are met, depth becomes 1, otherwise is 0 - if depth == 0: - if not self.predicates.apply(input_data, self.fields): - return depth, path - depth += 1 - - if self.children: - for child in self.children: - if child.predicates.apply(input_data, self.fields): - path.append(child.predicates.to_rule(self.fields)) - return child.depth(input_data, path=path, depth=depth + 1) - return depth, path diff --git a/bigml/api.py b/bigml/api.py index 4c3b5d17..55b1e591 100644 --- a/bigml/api.py +++ b/bigml/api.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=too-many-ancestors,non-parent-init-called, unused-import, no-member # -# Copyright 2012-2019 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -34,49 +34,56 @@ """ import sys import pprint +import os +import json from bigml.bigmlconnection import BigMLConnection from bigml.domain import BIGML_PROTOCOL -from bigml.resourcehandler import ResourceHandler -from bigml.sourcehandler import SourceHandler -from bigml.datasethandler import DatasetHandler -from bigml.modelhandler import ModelHandler -from bigml.ensemblehandler import EnsembleHandler -from bigml.predictionhandler import PredictionHandler -from bigml.clusterhandler import ClusterHandler -from bigml.centroidhandler import CentroidHandler -from bigml.anomalyhandler import AnomalyHandler -from bigml.anomalyscorehandler import AnomalyScoreHandler -from bigml.evaluationhandler import EvaluationHandler -from bigml.batchpredictionhandler import BatchPredictionHandler -from bigml.batchcentroidhandler import BatchCentroidHandler -from bigml.batchanomalyscorehandler import BatchAnomalyScoreHandler -from bigml.projecthandler import ProjectHandler -from bigml.samplehandler import SampleHandler -from bigml.correlationhandler import CorrelationHandler -from bigml.statisticaltesthandler import StatisticalTestHandler -from bigml.logistichandler import LogisticRegressionHandler -from bigml.associationhandler import AssociationHandler -from bigml.associationsethandler import AssociationSetHandler -from bigml.configurationhandler import ConfigurationHandler -from bigml.topicmodelhandler import TopicModelHandler -from bigml.topicdistributionhandler import TopicDistributionHandler -from bigml.batchtopicdistributionhandler import BatchTopicDistributionHandler -from bigml.timeserieshandler import TimeSeriesHandler -from bigml.forecasthandler import ForecastHandler -from bigml.deepnethandler import DeepnetHandler -from bigml.optimlhandler import OptimlHandler -from bigml.fusionhandler import FusionHandler -from bigml.pcahandler import PCAHandler -from bigml.projectionhandler import ProjectionHandler -from bigml.linearhandler import LinearRegressionHandler -from bigml.batchprojectionhandler import BatchProjectionHandler -from bigml.scripthandler import ScriptHandler -from bigml.executionhandler import ExecutionHandler -from bigml.libraryhandler import LibraryHandler -from bigml.constants import STORAGE - +from bigml.constants import STORAGE, ALL_FIELDS, TINY_RESOURCE, TASKS_QS +from bigml.util import is_in_progress, is_image +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.sourcehandler import SourceHandlerMixin +from bigml.api_handlers.datasethandler import DatasetHandlerMixin +from bigml.api_handlers.modelhandler import ModelHandlerMixin +from bigml.api_handlers.ensemblehandler import EnsembleHandlerMixin +from bigml.api_handlers.predictionhandler import PredictionHandlerMixin +from bigml.api_handlers.clusterhandler import ClusterHandlerMixin +from bigml.api_handlers.centroidhandler import CentroidHandlerMixin +from bigml.api_handlers.anomalyhandler import AnomalyHandlerMixin +from bigml.api_handlers.anomalyscorehandler import AnomalyScoreHandlerMixin +from bigml.api_handlers.evaluationhandler import EvaluationHandlerMixin +from bigml.api_handlers.batchpredictionhandler import BatchPredictionHandlerMixin +from bigml.api_handlers.batchcentroidhandler import BatchCentroidHandlerMixin +from bigml.api_handlers.batchanomalyscorehandler \ + import BatchAnomalyScoreHandlerMixin +from bigml.api_handlers.projecthandler import ProjectHandlerMixin +from bigml.api_handlers.samplehandler import SampleHandlerMixin +from bigml.api_handlers.correlationhandler import CorrelationHandlerMixin +from bigml.api_handlers.statisticaltesthandler import StatisticalTestHandlerMixin +from bigml.api_handlers.logistichandler import LogisticRegressionHandlerMixin +from bigml.api_handlers.associationhandler import AssociationHandlerMixin +from bigml.api_handlers.associationsethandler import AssociationSetHandlerMixin +from bigml.api_handlers.configurationhandler import ConfigurationHandlerMixin +from bigml.api_handlers.topicmodelhandler import TopicModelHandlerMixin +from bigml.api_handlers.topicdistributionhandler \ + import TopicDistributionHandlerMixin +from bigml.api_handlers.batchtopicdistributionhandler \ + import BatchTopicDistributionHandlerMixin +from bigml.api_handlers.timeserieshandler import TimeSeriesHandlerMixin +from bigml.api_handlers.forecasthandler import ForecastHandlerMixin +from bigml.api_handlers.deepnethandler import DeepnetHandlerMixin +from bigml.api_handlers.optimlhandler import OptimlHandlerMixin +from bigml.api_handlers.fusionhandler import FusionHandlerMixin +from bigml.api_handlers.pcahandler import PCAHandlerMixin +from bigml.api_handlers.projectionhandler import ProjectionHandlerMixin +from bigml.api_handlers.linearhandler import LinearRegressionHandlerMixin +from bigml.api_handlers.batchprojectionhandler import BatchProjectionHandlerMixin +from bigml.api_handlers.scripthandler import ScriptHandlerMixin +from bigml.api_handlers.executionhandler import ExecutionHandlerMixin +from bigml.api_handlers.libraryhandler import LibraryHandlerMixin +from bigml.api_handlers.externalconnectorhandler import \ + ExternalConnectorHandlerMixin # Repeating constants and functions for backwards compatibility @@ -111,9 +118,10 @@ BATCH_PROJECTION_PATH, BATCH_PROJECTION_RE, LINEAR_REGRESSION_PATH, LINEAR_REGRESSION_RE, SCRIPT_PATH, SCRIPT_RE, EXECUTION_PATH, EXECUTION_RE, LIBRARY_PATH, LIBRARY_RE, STATUS_PATH, - IRREGULAR_PLURALS) + IRREGULAR_PLURALS, RESOURCES_WITH_FIELDS, FIELDS_PARENT, + EXTERNAL_CONNECTOR_PATH, EXTERNAL_CONNECTOR_RE, CLONABLE_PATHS) -from bigml.resourcehandler import ( +from bigml.api_handlers.resourcehandler import ( get_resource, get_resource_type, check_resource_type, get_source_id, get_dataset_id, get_model_id, get_ensemble_id, get_evaluation_id, get_cluster_id, get_centroid_id, get_anomaly_id, get_anomaly_score_id, @@ -125,8 +133,8 @@ get_topic_distribution_id, get_batch_topic_distribution_id, get_time_series_id, get_forecast_id, get_deepnet_id, get_optiml_id, get_fusion_id, get_pca_id, get_projection_id, get_batch_projection_id, - get_configuration_id, get_linear_regression_id, - get_script_id, get_execution_id, get_library_id) + get_configuration_id, get_linear_regression_id, get_fields, + get_script_id, get_execution_id, get_library_id, get_external_connector_id) # Map status codes to labels @@ -179,34 +187,101 @@ LINEAR_REGRESSION_PATH: get_linear_regression_id, SCRIPT_PATH: get_script_id, LIBRARY_PATH: get_library_id, - EXECUTION_PATH: get_execution_id + EXECUTION_PATH: get_execution_id, + EXTERNAL_CONNECTOR_PATH: get_external_connector_id } +PREDICTIONS = [PREDICTION_RE , PROJECTION_RE, ANOMALY_SCORE_RE, + CENTROID_RE, TOPIC_DISTRIBUTION_RE, ASSOCIATION_SET_RE] + +PREDICTION_LABELS = { + "anomalyscore": "score", + "topicdistribution": "topic distribution", + "associationset": "association set"} + + +def get_resources_re(exceptions=None): + """Returning the patterns that correspond to a filtered subset of + resources. + """ + if exceptions is None: + exceptions = {} + resources_re = list(RESOURCE_RE.values()) + for res_re in exceptions: + resources_re.remove(res_re) + return resources_re + + +NON_PREDICTIONS = get_resources_re(PREDICTIONS) + + +def get_prediction_label(resource_id): + """Gets the label to be prepended to predictions according to their type""" + resource_type = get_resource_type(resource_id) + return PREDICTION_LABELS.get(resource_type, resource_type) + + +#pylint: disable=locally-disabled,too-many-return-statements +def get_prediction_attr(resource): + """Getting the attribute that contains the prediction, score, etc. """ + if PREDICTION_RE.match(resource["resource"]): + return resource['object']['prediction'][ + resource['object']['objective_fields'][0]] + if PROJECTION_RE.match(resource["resource"]): + return resource["object"]["projection"]["result"] + if ANOMALY_SCORE_RE.match(resource["resource"]): + return resource["object"]["score"] + if CENTROID_RE.match(resource["resource"]): + return resource["object"]["centroid_name"] + if TOPIC_DISTRIBUTION_RE.match(resource["resource"]): + return resource["object"]["topic_distribution"]["result"] + if ASSOCIATION_SET_RE.match(resource["resource"]): + return resource["object"]["association_set"]["result"] + return "" + + def count(listing): """Count of existing resources """ if 'meta' in listing and 'query_total' in listing['meta']: return listing['meta']['query_total'] + return None -class BigML(LinearRegressionHandler, BatchProjectionHandler, - ProjectionHandler, PCAHandler, - ConfigurationHandler, FusionHandler, - OptimlHandler, - DeepnetHandler, ForecastHandler, TimeSeriesHandler, - BatchTopicDistributionHandler, TopicDistributionHandler, - TopicModelHandler, LibraryHandler, ExecutionHandler, ScriptHandler, - AssociationSetHandler, AssociationHandler, - LogisticRegressionHandler, - StatisticalTestHandler, CorrelationHandler, - SampleHandler, ProjectHandler, - BatchAnomalyScoreHandler, BatchCentroidHandler, - BatchPredictionHandler, EvaluationHandler, AnomalyScoreHandler, - AnomalyHandler, CentroidHandler, ClusterHandler, PredictionHandler, - EnsembleHandler, ModelHandler, DatasetHandler, - SourceHandler, ResourceHandler, BigMLConnection): +def filter_kwargs(kwargs, list_of_keys, out=False): + """Creates a new dict with the selected list of keys if present + If `out` is set to True, the keys in the list are removed + If `out` is set to False, only the keys in the list are kept + + """ + new_kwargs = {} + for key in kwargs: + if (key not in list_of_keys and out) or \ + (key in list_of_keys and not out): + new_kwargs[key] = kwargs[key] + return new_kwargs + + +class BigML(BigMLConnection,ExternalConnectorHandlerMixin, + LinearRegressionHandlerMixin, BatchProjectionHandlerMixin, + ProjectionHandlerMixin, PCAHandlerMixin, + ConfigurationHandlerMixin, FusionHandlerMixin, + OptimlHandlerMixin, + DeepnetHandlerMixin, ForecastHandlerMixin, TimeSeriesHandlerMixin, + BatchTopicDistributionHandlerMixin, TopicDistributionHandlerMixin, + TopicModelHandlerMixin, LibraryHandlerMixin, ExecutionHandlerMixin, + ScriptHandlerMixin, AssociationSetHandlerMixin, + AssociationHandlerMixin, LogisticRegressionHandlerMixin, + StatisticalTestHandlerMixin, CorrelationHandlerMixin, + SampleHandlerMixin, ProjectHandlerMixin, + BatchAnomalyScoreHandlerMixin, BatchCentroidHandlerMixin, + BatchPredictionHandlerMixin, EvaluationHandlerMixin, + AnomalyScoreHandlerMixin, AnomalyHandlerMixin, + CentroidHandlerMixin, ClusterHandlerMixin, PredictionHandlerMixin, + EnsembleHandlerMixin, ModelHandlerMixin, DatasetHandlerMixin, + SourceHandlerMixin, ResourceHandlerMixin): """Entry point to create, retrieve, list, update, and delete BigML resources. @@ -221,18 +296,17 @@ class BigML(LinearRegressionHandler, BatchProjectionHandler, error: An error code and message """ - def __init__(self, username=None, api_key=None, dev_mode=False, + def __init__(self, username=None, api_key=None, debug=False, set_locale=False, storage=None, domain=None, - project=None, organization=None): + project=None, organization=None, short_debug=False): """Initializes the BigML API. If left unspecified, `username` and `api_key` will default to the values of the `BIGML_USERNAME` and `BIGML_API_KEY` environment variables respectively. - If `dev_mode` is set to `True`, the API will be used in development - mode where the size of your datasets are limited but you are not - charged any credits. + `dev_mode` has been deprecated. Now all resources coexisit in the + same production environment. If storage is set to a directory name, the resources obtained in CRU operations will be stored in the given directory. @@ -255,49 +329,52 @@ def __init__(self, username=None, api_key=None, dev_mode=False, given by the organization administrator. """ - - BigMLConnection.__init__(self, username=username, api_key=api_key, - dev_mode=dev_mode, debug=debug, - set_locale=set_locale, storage=storage, - domain=domain, project=project, - organization=organization) - ResourceHandler.__init__(self) - SourceHandler.__init__(self) - DatasetHandler.__init__(self) - ModelHandler.__init__(self) - EnsembleHandler.__init__(self) - PredictionHandler.__init__(self) - ClusterHandler.__init__(self) - CentroidHandler.__init__(self) - AnomalyHandler.__init__(self) - AnomalyScoreHandler.__init__(self) - EvaluationHandler.__init__(self) - BatchPredictionHandler.__init__(self) - BatchCentroidHandler.__init__(self) - BatchAnomalyScoreHandler.__init__(self) - ProjectHandler.__init__(self) - SampleHandler.__init__(self) - CorrelationHandler.__init__(self) - StatisticalTestHandler.__init__(self) - LogisticRegressionHandler.__init__(self) - AssociationHandler.__init__(self) - AssociationSetHandler.__init__(self) - ScriptHandler.__init__(self) - ExecutionHandler.__init__(self) - LibraryHandler.__init__(self) - TopicModelHandler.__init__(self) - TopicDistributionHandler.__init__(self) - BatchTopicDistributionHandler.__init__(self) - TimeSeriesHandler.__init__(self) - ForecastHandler.__init__(self) - DeepnetHandler.__init__(self) - OptimlHandler.__init__(self) - FusionHandler.__init__(self) - ConfigurationHandler.__init__(self) - PCAHandler.__init__(self) - ProjectionHandler.__init__(self) - BatchProjectionHandler.__init__(self) - LinearRegressionHandler.__init__(self) + # first BigMLConnection needs to exist + super().__init__(username=username, api_key=api_key, + debug=debug, + set_locale=set_locale, storage=storage, + domain=domain, project=project, + organization=organization, + short_debug=short_debug) + # adding mixins properties + ResourceHandlerMixin.__init__(self) + SourceHandlerMixin.__init__(self) + DatasetHandlerMixin.__init__(self) + ModelHandlerMixin.__init__(self) + EnsembleHandlerMixin.__init__(self) + PredictionHandlerMixin.__init__(self) + ClusterHandlerMixin.__init__(self) + CentroidHandlerMixin.__init__(self) + AnomalyHandlerMixin.__init__(self) + AnomalyScoreHandlerMixin.__init__(self) + EvaluationHandlerMixin.__init__(self) + BatchPredictionHandlerMixin.__init__(self) + BatchCentroidHandlerMixin.__init__(self) + BatchAnomalyScoreHandlerMixin.__init__(self) + ProjectHandlerMixin.__init__(self) + SampleHandlerMixin.__init__(self) + CorrelationHandlerMixin.__init__(self) + StatisticalTestHandlerMixin.__init__(self) + LogisticRegressionHandlerMixin.__init__(self) + AssociationHandlerMixin.__init__(self) + AssociationSetHandlerMixin.__init__(self) + ScriptHandlerMixin.__init__(self) + ExecutionHandlerMixin.__init__(self) + LibraryHandlerMixin.__init__(self) + TopicModelHandlerMixin.__init__(self) + TopicDistributionHandlerMixin.__init__(self) + BatchTopicDistributionHandlerMixin.__init__(self) + TimeSeriesHandlerMixin.__init__(self) + ForecastHandlerMixin.__init__(self) + DeepnetHandlerMixin.__init__(self) + OptimlHandlerMixin.__init__(self) + FusionHandlerMixin.__init__(self) + ConfigurationHandlerMixin.__init__(self) + PCAHandlerMixin.__init__(self) + ProjectionHandlerMixin.__init__(self) + BatchProjectionHandlerMixin.__init__(self) + LinearRegressionHandlerMixin.__init__(self) + ExternalConnectorHandlerMixin.__init__(self) self.status_url = "%s%s" % (self.url, STATUS_PATH) @@ -305,11 +382,12 @@ def __init__(self, username=None, api_key=None, dev_mode=False, for resource_type in RESOURCE_RE: method_name = RENAMED_RESOURCES.get(resource_type, resource_type) self.getters[resource_type] = getattr(self, "get_%s" % method_name) - self.creaters = {} + self.creators = {} for resource_type in RESOURCE_RE: method_name = RENAMED_RESOURCES.get(resource_type, resource_type) - self.creaters[resource_type] = getattr(self, + self.creators[resource_type] = getattr(self, "create_%s" % method_name) + self.creaters = self.creators # to be deprecated self.updaters = {} for resource_type in RESOURCE_RE: method_name = RENAMED_RESOURCES.get(resource_type, resource_type) @@ -327,44 +405,161 @@ def __init__(self, username=None, api_key=None, dev_mode=False, resource_type, resource_type)) self.listers[resource_type] = getattr(self, "list_%s" % method_name) + self.cloners = {} + for resource_type in CLONABLE_PATHS: + method_name = RENAMED_RESOURCES.get(resource_type, resource_type) + self.cloners[resource_type] = getattr(self, + "clone_%s" % method_name) + + def prepare_image_fields(self, model_info, input_data): + """Creating a source for each image field used by the model + that is found in input_data + + """ + new_input_data = {} + new_input_data.update(input_data) + #pylint: disable=locally-disabled,broad-except + try: + fields = self.get_fields(model_info) + image_fields = [field_pair for field_pair in fields.items() + if field_pair[1]["optype"] == "image"] + for image_field, value in image_fields: + if image_field in input_data: + key = image_field + filename = input_data[key] + elif value["name"] in input_data: + key = value["name"] + filename = input_data[key] + source = self.create_source(filename) + source = self.check_resource(source, + query_string=TINY_RESOURCE, + raise_on_error=True) + new_input_data[key] = source["resource"] + except Exception: + # Predict Server does not return the fields info, so we infer + for field, value in input_data.items(): + if isinstance(value, str) and os.path.isfile(value) and \ + is_image(value): + source = self.create_source(value) + source = self.check_resource(source, + query_string=TINY_RESOURCE, + raise_on_error=True) + new_input_data[field] = source["resource"] + + return new_input_data + + def create(self, resource_type, *args, **kwargs): + """Create resources + + """ + finished = kwargs.get('finished', True) + create_kwargs = filter_kwargs(kwargs, + ['query_string', 'finished'], + out=True) + try: + resource_info = self.creators[resource_type](*args, + **create_kwargs) + except KeyError: + raise ValueError("Failed to create %s. This kind of resource" + " does not exist." % resource_type) + if finished and is_in_progress(resource_info): + ok_kwargs = filter_kwargs(kwargs, ['query_string']) + ok_kwargs.update({"error_retries": 5, "debug": self.debug}) + self.ok(resource_info, **ok_kwargs) + return resource_info + + def get(self, resource, **kwargs): + """Method to get resources + + """ + finished = kwargs.get('finished', True) + get_kwargs = filter_kwargs(kwargs, + ['finished'], + out=True) + try: + resource_type = get_resource_type(resource) + resource_info = self.getters[resource_type](resource, **get_kwargs) + except KeyError: + raise ValueError("%s is not a resource or ID." % resource) + if finished and is_in_progress(resource_info): + ok_kwargs = filter_kwargs(kwargs, ['query_string']) + ok_kwargs.update({"error_retries": 5, "debug": self.debug}) + self.ok(resource_info, **ok_kwargs) + return resource_info + + def update(self, resource, changes, **kwargs): + """Method to update resources + + """ + finished = kwargs.get('finished', True) + try: + resource_type = get_resource_type(resource) + update_kwargs = filter_kwargs(kwargs, + ['query_string', 'finished'], + out=True) + resource_info = self.updaters[resource_type](resource, changes, + **update_kwargs) + except KeyError: + raise ValueError("%s is not a resource or ID." % resource) + if finished and is_in_progress(resource_info): + ok_kwargs = filter_kwargs(kwargs, ['query_string']) + ok_kwargs.update({"error_retries": 5, "debug": self.debug}) + self.ok(resource_info, **ok_kwargs) + return resource_info + + def delete(self, resource, **kwargs): + """Method to delete resources + + """ + try: + resource_type = get_resource_type(resource) + return self.deleters[resource_type](resource, **kwargs) + except KeyError: + raise ValueError("%s is not a resource." % resource) def connection_info(self): """Printable string: domain where the connection is bound and the credentials used. """ - info = u"Connecting to:\n" - info += u" %s\n" % self.general_domain - if self.general_protocol != BIGML_PROTOCOL: - info += u" using %s protocol\n" % self.general_protocol - info += u" SSL verification %s\n" % ( - "on" if self.verify else "off") + info = "Connecting to:\n" + info += " %s (%s)\n" % (self.domain.general_domain, + self.domain.api_version) + if self.domain.general_protocol != BIGML_PROTOCOL: + info += " using %s protocol\n" % self.domain.general_protocol + info += " SSL verification %s\n" % ( + "on" if self.domain.verify else "off") + short = "(shortened)" if self.short_debug else "" if self.debug: - info += u" Debug on\n" - if self.general_domain != self.prediction_domain: - info += u" %s (predictions only)\n" % self.prediction_domain - if self.prediction_protocol != BIGML_PROTOCOL: - info += u" using %s protocol\n" % self.prediction_protocol - info += u" SSL verification %s\n" % ( - "on" if self.verify_prediction else "off") + info += " Debug on %s\n" % short + if self.domain.general_domain != self.domain.prediction_domain: + info += " %s (predictions only)\n" % \ + self.domain.prediction_domain + if self.domain.prediction_protocol != BIGML_PROTOCOL: + info += " using %s protocol\n" % \ + self.domain.prediction_protocol + info += " SSL verification %s\n" % ( + "on" if self.domain.verify_prediction else "off") if self.project or self.organization: - info += u" Scope info: %s\n" % \ - u"%s\n %s" % (self.organization or "", - self.project or "") - - - info += u"\nAuthentication string:\n" - info += u" %s\n" % self.auth[1:] + info += " Scope info: %s\n" % \ + "%s\n %s" % (self.organization or "", + self.project or "") + info += "\nAuthentication string:\n" + info += " %s\n" % self.auth[1:] return info - def get_account_status(self): + def get_account_status(self, query_string=''): """Retrieve the account information: tasks, available_tasks, max_tasks, . Returns a dictionary with the summarized information about the account """ - return self._status(self.status_url) + if self.organization is not None: + return self._status(self.status_url, + query_string=query_string, + organization=self.organization) + return self._status(self.status_url, query_string=query_string) def get_tasks_status(self): """Retrieve the tasks information of the account @@ -372,13 +567,24 @@ def get_tasks_status(self): Returns a dictionary with the summarized information about the tasks """ - status = self._status(self.status_url)["object"] + status = self.get_account_status(query_string=TASKS_QS) + if status["error"] is None: + status = status.get("object", {}) + return { + "tasks": status.get("tasks"), + "max_tasks": status.get("subscription", {}).get("max_tasks"), + "available_tasks": (status.get("subscription", + {}).get("max_tasks") + - status.get("tasks")), + "tasks_in_progress": status.get("tasks_in_progress"), + "error": None} + return { - "tasks": status["tasks"], - "max_tasks": status["subscription"]["max_tasks"], - "available_tasks": (status["subscription"]["max_tasks"] - - status["tasks"]), - "tasks_in_progress": status["tasks_in_progress"]} + "tasks": 0, + "max_tasks": 0, + "available_tasks": 0, + "tasks_in_progress": 0, + "error": status["error"]} def get_fields(self, resource): """Retrieve fields used by a resource. @@ -388,63 +594,27 @@ def get_fields(self, resource): """ - def _get_fields_key(resource): - """Returns the fields key from a resource dict - - """ - if resource['code'] in [HTTP_OK, HTTP_ACCEPTED]: - if (MODEL_RE.match(resource_id) or - ANOMALY_RE.match(resource_id)): - return resource['object']['model']['model_fields'] - elif CLUSTER_RE.match(resource_id): - return resource['object']['clusters']['fields'] - elif CORRELATION_RE.match(resource_id): - return resource['object']['correlations']['fields'] - elif STATISTICAL_TEST_RE.match(resource_id): - return resource['object']['statistical_tests']['fields'] - elif LOGISTIC_REGRESSION_RE.match(resource_id): - return resource['object']['logistic_regression']['fields'] - elif ASSOCIATION_RE.match(resource_id): - return resource['object']['associations']['fields'] - elif TOPIC_MODEL_RE.match(resource_id): - return resource['object']['topic_model']['fields'] - elif TIME_SERIES_RE.match(resource_id): - return resource['object']['time_series']['fields'] - elif DEEPNET_RE.match(resource_id): - return resource['object']['deepnet']['fields'] - elif SAMPLE_RE.match(resource_id): - return dict([(field['id'], field) for field in - resource['object']['sample']['fields']]) - elif PCA_RE.match(resource_id): - return dict([(field['id'], field) for field in - resource['object']['pca']['fields']]) - elif LINEAR_REGRESSION_RE.match(resource_id): - return resource['object']['linear_regression']['fields'] - else: - return resource['object']['fields'] - return None - if isinstance(resource, dict) and 'resource' in resource: resource_id = resource['resource'] - elif (isinstance(resource, basestring) and ( - SOURCE_RE.match(resource) or DATASET_RE.match(resource) or - MODEL_RE.match(resource) or PREDICTION_RE.match(resource))): + elif isinstance(resource, str) and get_resource_type(resource) \ + in RESOURCES_WITH_FIELDS: resource_id = resource - resource = self._get("%s%s" % (self.url, resource_id)) + resource = self.retrieve_resource(resource, + query_string=ALL_FIELDS) else: LOGGER.error("Wrong resource id") - return + return None # Tries to extract fields information from resource dict. If it fails, # a get remote call is used to retrieve the resource by id. fields = None try: - fields = _get_fields_key(resource) + fields = get_fields(resource) except KeyError: resource = self._get("%s%s" % (self.url, resource_id)) - fields = _get_fields_key(resource) - + fields = get_fields(resource) return fields + #pylint: disable=locally-disabled,no-self-use def pprint(self, resource, out=sys.stdout): """Pretty prints a resource or part of it. @@ -455,38 +625,27 @@ def pprint(self, resource, out=sys.stdout): and 'resource' in resource): resource_id = resource['resource'] - if (SOURCE_RE.match(resource_id) or DATASET_RE.match(resource_id) - or MODEL_RE.match(resource_id) - or EVALUATION_RE.match(resource_id) - or ENSEMBLE_RE.match(resource_id) - or CLUSTER_RE.match(resource_id) - or ANOMALY_RE.match(resource_id) - or TOPIC_MODEL_RE.match(resource_id) - or LOGISTIC_REGRESSION_RE.match(resource_id) - or TIME_SERIES_RE.match(resource_id) - or DEEPNET_RE.match(resource_id) - or FUSION_RE.match(resource_id) - or PCA_RE.match(resource_id) - or LINEAR_REGRESSION_RE.match(resource_id) - or OPTIML_RE.match(resource_id)): + if (any(getattr(res_re, "match")(resource_id) for res_re + in NON_PREDICTIONS)): out.write("%s (%s bytes)\n" % (resource['object']['name'], resource['object']['size'])) elif PREDICTION_RE.match(resource['resource']): - objective_field_name = ( - resource['object']['fields'][ - resource['object']['objective_fields'][0]]['name']) input_data = {} - for key, value in resource['object']['input_data'].items(): + for key, value in list(resource['object']['input_data'].items()): try: name = resource['object']['fields'][key]['name'] except KeyError: name = key input_data[name] = value - - prediction = ( - resource['object']['prediction'][ - resource['object']['objective_fields'][0]]) - out.write("%s for %s is %s\n" % (objective_field_name, + try: + prediction_label = ( + resource['object']['fields'][ + resource['object']['objective_fields'][0]]['name']) + except IndexError: + prediction_label = get_prediction_label( + resource["resource"]) + prediction = get_prediction_attr(resource) + out.write("%s for %s is %s\n" % (prediction_label, input_data, prediction)) out.flush() @@ -503,20 +662,22 @@ def status(self, resource): status = get_status(resource) code = status['code'] return STATUSES.get(code, "UNKNOWN") - else: - status = get_status(resource) - if status['code'] != UPLOADING: - LOGGER.error("Wrong resource id") - return - return STATUSES[UPLOADING] + + status = get_status(resource) + if status['code'] != UPLOADING: + LOGGER.error("Wrong resource id") + return None + return STATUSES[UPLOADING] def check_resource(self, resource, - query_string='', wait_time=1): + query_string='', wait_time=1, retries=None, + raise_on_error=False): """Check resource method. """ return check_resource(resource, query_string=query_string, wait_time=wait_time, + retries=retries, raise_on_error=raise_on_error, api=self) def source_from_batch_prediction(self, batch_prediction, args=None): @@ -530,6 +691,41 @@ def source_from_batch_prediction(self, batch_prediction, args=None): download_url = "%s%s%s%s" % (self.url, batch_prediction_id, DOWNLOAD_DIR, self.auth) return self._create_remote_source(download_url, args=args) + return None + + def retrieve_resource(self, resource_id, query_string=None, + check_local_fn=None, retries=None): + """ Retrieves resource info either from the local repo or + from the remote server + + """ + if query_string is None: + query_string = '' + if self.storage is not None: + try: + stored_resource = os.path.join(self.storage, + resource_id.replace("/", "_")) + with open(stored_resource) as resource_file: + resource = json.loads(resource_file.read()) + # we check that the stored resource has the information + # needed (for instance, input_fields for predicting) + if check_local_fn is None or check_local_fn(resource): + return resource + except ValueError: + raise ValueError("The file %s contains no JSON" % + stored_resource) + except IOError: + pass + if self.auth == '?username=;api_key=;': + raise ValueError("The credentials information is missing. This" + " information is needed to download resource %s" + " for the first time and store it locally for further" + " use. Please export BIGML_USERNAME" + " and BIGML_API_KEY." % resource_id) + + resource = check_resource(resource_id, query_string=query_string, + api=self, retries=retries) + return resource def get_api_connection(api, store=True, context=None): @@ -544,7 +740,8 @@ def get_api_connection(api, store=True, context=None): if api is None or not isinstance(api, BigML): if context is None: context = {} - context.update({"storage": STORAGE} if store else {}) + storage = context.get("storage") or STORAGE + context.update({"storage": storage} if store else {}) try: api = BigML(**context) except AttributeError: diff --git a/bigml/out_model/__init__.py b/bigml/api_handlers/__init__.py similarity index 100% rename from bigml/out_model/__init__.py rename to bigml/api_handlers/__init__.py diff --git a/bigml/anomalyhandler.py b/bigml/api_handlers/anomalyhandler.py similarity index 76% rename from bigml/anomalyhandler.py rename to bigml/api_handlers/anomalyhandler.py index 5606f89b..03ece5e2 100644 --- a/bigml/anomalyhandler.py +++ b/bigml/api_handlers/anomalyhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,13 +27,13 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, resource_is_ready, - get_anomaly_id) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready from bigml.constants import ANOMALY_PATH -class AnomalyHandler(ResourceHandler): +class AnomalyHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -74,12 +74,11 @@ def get_anomaly(self, anomaly, query_string='', """ check_resource_type(anomaly, ANOMALY_PATH, message="A anomaly id is needed.") - anomaly_id = get_anomaly_id(anomaly) - if anomaly_id: - return self._get("%s%s" % (self.url, anomaly_id), - query_string=query_string, - shared_username=shared_username, - shared_api_key=shared_api_key) + return self.get_resource( \ + anomaly, + query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) def anomaly_is_ready(self, anomaly, **kwargs): """Checks whether an anomaly detector's status is FINISHED. @@ -102,17 +101,24 @@ def update_anomaly(self, anomaly, changes): """ check_resource_type(anomaly, ANOMALY_PATH, message="An anomaly detector id is needed.") - anomaly_id = get_anomaly_id(anomaly) - if anomaly_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, anomaly_id), body) + return self.update_resource(anomaly, changes) - def delete_anomaly(self, anomaly): + def delete_anomaly(self, anomaly, query_string=''): """Deletes an anomaly detector. """ check_resource_type(anomaly, ANOMALY_PATH, message="An anomaly detector id is needed.") - anomaly_id = get_anomaly_id(anomaly) - if anomaly_id: - return self._delete("%s%s" % (self.url, anomaly_id)) + return self.delete_resource(anomaly, query_string=query_string) + + def clone_anomaly(self, anomaly, + args=None, wait_time=3, retries=10): + """Creates a cloned anomaly from an existing `anomaly` + + """ + create_args = self._set_clone_from_args( + anomaly, "anomaly", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.anomaly_url, body) diff --git a/bigml/anomalyscorehandler.py b/bigml/api_handlers/anomalyscorehandler.py similarity index 61% rename from bigml/anomalyscorehandler.py rename to bigml/api_handlers/anomalyscorehandler.py index 3101d4c0..1398d539 100644 --- a/bigml/anomalyscorehandler.py +++ b/bigml/api_handlers/anomalyscorehandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,15 +27,14 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, get_resource_type, - check_resource, - get_anomaly_score_id, get_anomaly_id) -from bigml.constants import (ANOMALY_SCORE_PATH, ANOMALY_PATH, - TINY_RESOURCE) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + get_resource_type, check_resource, get_anomaly_id +from bigml.constants import ANOMALY_SCORE_PATH, ANOMALY_PATH, \ + IMAGE_FIELDS_FILTER, SPECIFIC_EXCLUDES -class AnomalyScoreHandler(ResourceHandler): +class AnomalyScoreHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -57,29 +56,42 @@ def create_anomaly_score(self, anomaly, input_data=None, """ anomaly_id = None resource_type = get_resource_type(anomaly) - if resource_type == ANOMALY_PATH: - anomaly_id = get_anomaly_id(anomaly) - check_resource(anomaly_id, - query_string=TINY_RESOURCE, - wait_time=wait_time, retries=retries, - raise_on_error=True, api=self) - else: + if resource_type != ANOMALY_PATH: raise Exception("An anomaly detector id is needed to create an" " anomaly score. %s found." % resource_type) + anomaly_id = get_anomaly_id(anomaly) + if anomaly_id is None: + raise Exception("Failed to detect a correct anomaly detector " + "structure in %s." % anomaly) + + if isinstance(anomaly, dict) and anomaly.get("resource") is not None: + # retrieving fields info from model structure + model_info = anomaly + else: + # minimal info to check status and prepare image fields + image_fields_filter = IMAGE_FIELDS_FILTER + "," + \ + ",".join(SPECIFIC_EXCLUDES[resource_type]) + model_info = check_resource(anomaly_id, + query_string=image_fields_filter, + wait_time=wait_time, + retries=retries, + raise_on_error=True, + api=self) + if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({ - "input_data": input_data}) + "input_data": self.prepare_image_fields(model_info, input_data)}) create_args.update({ "anomaly": anomaly_id}) body = json.dumps(create_args) return self._create(self.anomaly_score_url, body, - verify=self.verify) + verify=self.domain.verify_prediction) def get_anomaly_score(self, anomaly_score, query_string=''): """Retrieves an anomaly score. @@ -87,10 +99,7 @@ def get_anomaly_score(self, anomaly_score, query_string=''): """ check_resource_type(anomaly_score, ANOMALY_SCORE_PATH, message="An anomaly score id is needed.") - anomaly_score_id = get_anomaly_score_id(anomaly_score) - if anomaly_score_id: - return self._get("%s%s" % (self.url, anomaly_score_id), - query_string=query_string) + return self.get_resource(anomaly_score, query_string=query_string) def list_anomaly_scores(self, query_string=''): """Lists all your anomaly_scores. @@ -104,17 +113,12 @@ def update_anomaly_score(self, anomaly_score, changes): """ check_resource_type(anomaly_score, ANOMALY_SCORE_PATH, message="An anomaly_score id is needed.") - anomaly_score_id = get_anomaly_score_id(anomaly_score) - if anomaly_score_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, anomaly_score_id), body) + return self.update_resource(anomaly_score, changes) - def delete_anomaly_score(self, anomaly_score): + def delete_anomaly_score(self, anomaly_score, query_string=''): """Deletes an anomaly_score. """ check_resource_type(anomaly_score, ANOMALY_SCORE_PATH, message="An anomaly_score id is needed.") - anomaly_score_id = get_anomaly_score_id(anomaly_score) - if anomaly_score_id: - return self._delete("%s%s" % (self.url, anomaly_score_id)) + return self.delete_resource(anomaly_score, query_string=query_string) diff --git a/bigml/associationhandler.py b/bigml/api_handlers/associationhandler.py similarity index 75% rename from bigml/associationhandler.py rename to bigml/api_handlers/associationhandler.py index 3cb0e769..994a0050 100644 --- a/bigml/associationhandler.py +++ b/bigml/api_handlers/associationhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,13 +27,12 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, - get_association_id) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type from bigml.constants import ASSOCIATION_PATH -class AssociationHandler(ResourceHandler): +class AssociationHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the correlations' REST calls. It should not be instantiated independently. @@ -70,10 +69,7 @@ def get_association(self, association, query_string=''): """ check_resource_type(association, ASSOCIATION_PATH, message="An association id is needed.") - association_id = get_association_id(association) - if association_id: - return self._get("%s%s" % (self.url, association_id), - query_string=query_string) + return self.get_resource(association, query_string=query_string) def list_associations(self, query_string=''): """Lists all your associations. @@ -87,17 +83,24 @@ def update_association(self, association, changes): """ check_resource_type(association, ASSOCIATION_PATH, message="An association id is needed.") - association_id = get_association_id(association) - if association_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, association_id), body) + return self.update_resource(association, changes) - def delete_association(self, association): + def delete_association(self, association, query_string=''): """Deletes an association. """ check_resource_type(association, ASSOCIATION_PATH, message="An association id is needed.") - association_id = get_association_id(association) - if association_id: - return self._delete("%s%s" % (self.url, association_id)) + return self.delete_resource(association, query_string=query_string) + + def clone_association(self, association, + args=None, wait_time=3, retries=10): + """Creates a cloned association from an existing `association` + + """ + create_args = self._set_clone_from_args( + association, "association", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.association_url, body) diff --git a/bigml/associationsethandler.py b/bigml/api_handlers/associationsethandler.py similarity index 60% rename from bigml/associationsethandler.py rename to bigml/api_handlers/associationsethandler.py index f957cad1..f1c13bb1 100644 --- a/bigml/associationsethandler.py +++ b/bigml/api_handlers/associationsethandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,15 +27,14 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, get_resource_type, - check_resource, - get_association_set_id, get_association_id) -from bigml.constants import (ASSOCIATION_SET_PATH, ASSOCIATION_PATH, - TINY_RESOURCE) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + get_resource_type, check_resource, get_association_id +from bigml.constants import ASSOCIATION_SET_PATH, ASSOCIATION_PATH, \ + IMAGE_FIELDS_FILTER, SPECIFIC_EXCLUDES -class AssociationSetHandler(ResourceHandler): +class AssociationSetHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -57,29 +56,42 @@ def create_association_set(self, association, input_data=None, """ association_id = None resource_type = get_resource_type(association) - if resource_type == ASSOCIATION_PATH: - association_id = get_association_id(association) - check_resource(association_id, - query_string=TINY_RESOURCE, - wait_time=wait_time, retries=retries, - raise_on_error=True, api=self) - else: - raise Exception("A association id is needed to create an" + if resource_type != ASSOCIATION_PATH: + raise Exception("An association id is needed to create an" " association set. %s found." % resource_type) + association_id = get_association_id(association) + if association_id is None: + raise Exception("Failed to detect a correct association " + "structure in %s." % association) + + if isinstance(association, dict) and \ + association.get("resource") is not None: + # retrieving fields info from model structure + model_info = association + else: + image_fields_filter = IMAGE_FIELDS_FILTER + "," + \ + ",".join(SPECIFIC_EXCLUDES[resource_type]) + model_info = check_resource(association_id, + query_string=image_fields_filter, + wait_time=wait_time, + retries=retries, + raise_on_error=True, + api=self) + if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({ - "input_data": input_data}) + "input_data": self.prepare_image_fields(model_info, input_data)}) create_args.update({ "association": association_id}) body = json.dumps(create_args) return self._create(self.association_set_url, body, - verify=self.verify) + verify=self.domain.verify_prediction) def get_association_set(self, association_set, query_string=''): """Retrieves an association set. @@ -87,10 +99,7 @@ def get_association_set(self, association_set, query_string=''): """ check_resource_type(association_set, ASSOCIATION_SET_PATH, message="An association set id is needed.") - association_set_id = get_association_set_id(association_set) - if association_set_id: - return self._get("%s%s" % (self.url, association_set_id), - query_string) + return self.get_resource(association_set, query_string=query_string) def list_association_sets(self, query_string=''): """Lists all your association sets. @@ -104,17 +113,12 @@ def update_association_set(self, association_set, changes): """ check_resource_type(association_set, ASSOCIATION_SET_PATH, message="An association set id is needed.") - association_set_id = get_association_set_id(association_set) - if association_set_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, association_set_id), body) + return self.update_resource(association_set, changes) - def delete_association_set(self, association_set): + def delete_association_set(self, association_set, query_string=''): """Deletes an association set. """ check_resource_type(association_set, ASSOCIATION_SET_PATH, message="An association set id is needed.") - association_set_id = get_association_set_id(association_set) - if association_set_id: - return self._delete("%s%s" % (self.url, association_set_id)) + return self.delete_resource(association_set, query_string=query_string) diff --git a/bigml/batchanomalyscorehandler.py b/bigml/api_handlers/batchanomalyscorehandler.py similarity index 73% rename from bigml/batchanomalyscorehandler.py rename to bigml/api_handlers/batchanomalyscorehandler.py index 188326e3..07516a27 100644 --- a/bigml/batchanomalyscorehandler.py +++ b/bigml/api_handlers/batchanomalyscorehandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -26,14 +26,12 @@ except ImportError: import json -from bigml.bigmlconnection import DOWNLOAD_DIR -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, - get_batch_anomaly_score_id) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type from bigml.constants import BATCH_ANOMALY_SCORE_PATH, ANOMALY_PATH -class BatchAnomalyScoreHandler(ResourceHandler): +class BatchAnomalyScoreHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -66,6 +64,7 @@ def create_batch_anomaly_score(self, anomaly, dataset, if origin_resources_checked: body = json.dumps(create_args) return self._create(self.batch_anomaly_score_url, body) + return None def get_batch_anomaly_score(self, batch_anomaly_score, query_string=''): """Retrieves a batch anomaly score. @@ -80,13 +79,11 @@ def get_batch_anomaly_score(self, batch_anomaly_score, query_string=''): """ check_resource_type(batch_anomaly_score, BATCH_ANOMALY_SCORE_PATH, message="A batch anomaly score id is needed.") - batch_anomaly_score_id = get_batch_anomaly_score_id( - batch_anomaly_score) - if batch_anomaly_score_id: - return self._get("%s%s" % (self.url, batch_anomaly_score_id), - query_string=query_string) + return self.get_resource(batch_anomaly_score, + query_string=query_string) - def download_batch_anomaly_score(self, batch_anomaly_score, filename=None): + def download_batch_anomaly_score(self, batch_anomaly_score, filename=None, + retries=10): """Retrieves the batch anomaly score file. Downloads anomaly scores, that are stored in a remote CSV file. If @@ -95,11 +92,9 @@ def download_batch_anomaly_score(self, batch_anomaly_score, filename=None): """ check_resource_type(batch_anomaly_score, BATCH_ANOMALY_SCORE_PATH, message="A batch anomaly score id is needed.") - batch_anomaly_score_id = get_batch_anomaly_score_id( - batch_anomaly_score) - if batch_anomaly_score_id: - return self._download("%s%s%s" % (self.url, batch_anomaly_score_id, - DOWNLOAD_DIR), filename=filename) + return self._download_resource(batch_anomaly_score, + filename, + retries=retries) def list_batch_anomaly_scores(self, query_string=''): """Lists all your batch anomaly scores. @@ -113,20 +108,13 @@ def update_batch_anomaly_score(self, batch_anomaly_score, changes): """ check_resource_type(batch_anomaly_score, BATCH_ANOMALY_SCORE_PATH, message="A batch anomaly score id is needed.") - batch_anomaly_score_id = get_batch_anomaly_score_id( - batch_anomaly_score) - if batch_anomaly_score_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, - batch_anomaly_score_id), body) - - def delete_batch_anomaly_score(self, batch_anomaly_score): + return self.update_resource(batch_anomaly_score, changes) + + def delete_batch_anomaly_score(self, batch_anomaly_score, query_string=''): """Deletes a batch anomaly score. """ check_resource_type(batch_anomaly_score, BATCH_ANOMALY_SCORE_PATH, message="A batch anomaly score id is needed.") - batch_anomaly_score_id = get_batch_anomaly_score_id( - batch_anomaly_score) - if batch_anomaly_score_id: - return self._delete("%s%s" % (self.url, batch_anomaly_score_id)) + return self.delete_resource(batch_anomaly_score, + query_string=query_string) diff --git a/bigml/batchcentroidhandler.py b/bigml/api_handlers/batchcentroidhandler.py similarity index 75% rename from bigml/batchcentroidhandler.py rename to bigml/api_handlers/batchcentroidhandler.py index f05e7f92..79c25f52 100644 --- a/bigml/batchcentroidhandler.py +++ b/bigml/api_handlers/batchcentroidhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -26,14 +26,12 @@ except ImportError: import json -from bigml.bigmlconnection import DOWNLOAD_DIR -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, - get_batch_centroid_id) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type from bigml.constants import BATCH_CENTROID_PATH, CLUSTER_PATH -class BatchCentroidHandler(ResourceHandler): +class BatchCentroidHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -66,6 +64,7 @@ def create_batch_centroid(self, cluster, dataset, if origin_resources_checked: body = json.dumps(create_args) return self._create(self.batch_centroid_url, body) + return None def get_batch_centroid(self, batch_centroid, query_string=''): """Retrieves a batch centroid. @@ -79,12 +78,10 @@ def get_batch_centroid(self, batch_centroid, query_string=''): """ check_resource_type(batch_centroid, BATCH_CENTROID_PATH, message="A batch centroid id is needed.") - batch_centroid_id = get_batch_centroid_id(batch_centroid) - if batch_centroid_id: - return self._get("%s%s" % (self.url, batch_centroid_id), - query_string=query_string) + return self.get_resource(batch_centroid, query_string=query_string) - def download_batch_centroid(self, batch_centroid, filename=None): + def download_batch_centroid(self, batch_centroid, filename=None, + retries=10): """Retrieves the batch centroid file. Downloads centroids, that are stored in a remote CSV file. If @@ -93,10 +90,8 @@ def download_batch_centroid(self, batch_centroid, filename=None): """ check_resource_type(batch_centroid, BATCH_CENTROID_PATH, message="A batch centroid id is needed.") - batch_centroid_id = get_batch_centroid_id(batch_centroid) - if batch_centroid_id: - return self._download("%s%s%s" % (self.url, batch_centroid_id, - DOWNLOAD_DIR), filename=filename) + return self._download_resource(batch_centroid, filename, + retries=retries) def list_batch_centroids(self, query_string=''): """Lists all your batch centroids. @@ -110,17 +105,12 @@ def update_batch_centroid(self, batch_centroid, changes): """ check_resource_type(batch_centroid, BATCH_CENTROID_PATH, message="A batch centroid id is needed.") - batch_centroid_id = get_batch_centroid_id(batch_centroid) - if batch_centroid_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, batch_centroid_id), body) + return self.update_resource(batch_centroid, changes) - def delete_batch_centroid(self, batch_centroid): + def delete_batch_centroid(self, batch_centroid, query_string=''): """Deletes a batch centroid. """ check_resource_type(batch_centroid, BATCH_CENTROID_PATH, message="A batch centroid id is needed.") - batch_centroid_id = get_batch_centroid_id(batch_centroid) - if batch_centroid_id: - return self._delete("%s%s" % (self.url, batch_centroid_id)) + return self.delete_resource(batch_centroid, query_string=query_string) diff --git a/bigml/batchpredictionhandler.py b/bigml/api_handlers/batchpredictionhandler.py similarity index 75% rename from bigml/batchpredictionhandler.py rename to bigml/api_handlers/batchpredictionhandler.py index 4bae407e..462d127a 100644 --- a/bigml/batchpredictionhandler.py +++ b/bigml/api_handlers/batchpredictionhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -26,14 +26,12 @@ except ImportError: import json -from bigml.bigmlconnection import DOWNLOAD_DIR -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, - get_batch_prediction_id) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type from bigml.constants import BATCH_PREDICTION_PATH, SUPERVISED_PATHS -class BatchPredictionHandler(ResourceHandler): +class BatchPredictionHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -68,6 +66,7 @@ def create_batch_prediction(self, model, dataset, if origin_resources_checked: body = json.dumps(create_args) return self._create(self.batch_prediction_url, body) + return None def get_batch_prediction(self, batch_prediction, query_string=''): """Retrieves a batch prediction. @@ -81,12 +80,10 @@ def get_batch_prediction(self, batch_prediction, query_string=''): """ check_resource_type(batch_prediction, BATCH_PREDICTION_PATH, message="A batch prediction id is needed.") - batch_prediction_id = get_batch_prediction_id(batch_prediction) - if batch_prediction_id: - return self._get("%s%s" % (self.url, batch_prediction_id), - query_string=query_string) + return self.get_resource(batch_prediction, query_string=query_string) - def download_batch_prediction(self, batch_prediction, filename=None): + def download_batch_prediction(self, batch_prediction, filename=None, + retries=10): """Retrieves the batch predictions file. Downloads predictions, that are stored in a remote CSV file. If @@ -95,10 +92,8 @@ def download_batch_prediction(self, batch_prediction, filename=None): """ check_resource_type(batch_prediction, BATCH_PREDICTION_PATH, message="A batch prediction id is needed.") - batch_prediction_id = get_batch_prediction_id(batch_prediction) - if batch_prediction_id: - return self._download("%s%s%s" % (self.url, batch_prediction_id, - DOWNLOAD_DIR), filename=filename) + return self._download_resource(batch_prediction, filename, + retries=retries) def list_batch_predictions(self, query_string=''): """Lists all your batch predictions. @@ -112,17 +107,13 @@ def update_batch_prediction(self, batch_prediction, changes): """ check_resource_type(batch_prediction, BATCH_PREDICTION_PATH, message="A batch prediction id is needed.") - batch_prediction_id = get_batch_prediction_id(batch_prediction) - if batch_prediction_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, batch_prediction_id), body) + return self.update_resource(batch_prediction, changes) - def delete_batch_prediction(self, batch_prediction): + def delete_batch_prediction(self, batch_prediction, query_string=''): """Deletes a batch prediction. """ check_resource_type(batch_prediction, BATCH_PREDICTION_PATH, message="A batch prediction id is needed.") - batch_prediction_id = get_batch_prediction_id(batch_prediction) - if batch_prediction_id: - return self._delete("%s%s" % (self.url, batch_prediction_id)) + return self.delete_resource(batch_prediction, + query_string=query_string) diff --git a/bigml/batchprojectionhandler.py b/bigml/api_handlers/batchprojectionhandler.py similarity index 75% rename from bigml/batchprojectionhandler.py rename to bigml/api_handlers/batchprojectionhandler.py index 354a8937..bfb05228 100644 --- a/bigml/batchprojectionhandler.py +++ b/bigml/api_handlers/batchprojectionhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2018-2019 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -26,14 +26,12 @@ except ImportError: import json -from bigml.bigmlconnection import DOWNLOAD_DIR -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, - get_batch_projection_id) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type from bigml.constants import BATCH_PROJECTION_PATH, PCA_PATH -class BatchProjectionHandler(ResourceHandler): +class BatchProjectionHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -66,6 +64,7 @@ def create_batch_projection(self, pca, dataset, if origin_resources_checked: body = json.dumps(create_args) return self._create(self.batch_projection_url, body) + return None def get_batch_projection(self, batch_projection, query_string=''): """Retrieves a batch projection. @@ -79,12 +78,10 @@ def get_batch_projection(self, batch_projection, query_string=''): """ check_resource_type(batch_projection, BATCH_PROJECTION_PATH, message="A batch projection id is needed.") - batch_projection_id = get_batch_projection_id(batch_projection) - if batch_projection_id: - return self._get("%s%s" % (self.url, batch_projection_id), - query_string=query_string) + return self.get_resource(batch_projection, query_string=query_string) - def download_batch_projection(self, batch_projection, filename=None): + def download_batch_projection(self, batch_projection, filename=None, + retries=10): """Retrieves the batch projections file. Downloads projections, that are stored in a remote CSV file. If @@ -93,10 +90,8 @@ def download_batch_projection(self, batch_projection, filename=None): """ check_resource_type(batch_projection, BATCH_PROJECTION_PATH, message="A batch projection id is needed.") - batch_projection_id = get_batch_projection_id(batch_projection) - if batch_projection_id: - return self._download("%s%s%s" % (self.url, batch_projection_id, - DOWNLOAD_DIR), filename=filename) + return self._download_resource(batch_projection, filename, + retries=retries) def list_batch_projections(self, query_string=''): """Lists all your batch projections. @@ -110,17 +105,13 @@ def update_batch_projection(self, batch_projection, changes): """ check_resource_type(batch_projection, BATCH_PROJECTION_PATH, message="A batch projection id is needed.") - batch_projection_id = get_batch_projection_id(batch_projection) - if batch_projection_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, batch_projection_id), body) + return self.update_resource(batch_projection, changes) - def delete_batch_projection(self, batch_projection): + def delete_batch_projection(self, batch_projection, query_string=''): """Deletes a batch projection. """ check_resource_type(batch_projection, BATCH_PROJECTION_PATH, message="A batch projection id is needed.") - batch_projection_id = get_batch_projection_id(batch_projection) - if batch_projection_id: - return self._delete("%s%s" % (self.url, batch_projection_id)) + return self.delete_resource(batch_projection, + query_string=query_string) diff --git a/bigml/batchtopicdistributionhandler.py b/bigml/api_handlers/batchtopicdistributionhandler.py similarity index 73% rename from bigml/batchtopicdistributionhandler.py rename to bigml/api_handlers/batchtopicdistributionhandler.py index aa16be79..2a1bd204 100644 --- a/bigml/batchtopicdistributionhandler.py +++ b/bigml/api_handlers/batchtopicdistributionhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2016-2019 BigML +# Copyright 2016-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -26,14 +26,12 @@ except ImportError: import json -from bigml.bigmlconnection import DOWNLOAD_DIR -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, - get_batch_topic_distribution_id) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type from bigml.constants import BATCH_TOPIC_DISTRIBUTION_PATH, TOPIC_MODEL_PATH -class BatchTopicDistributionHandler(ResourceHandler): +class BatchTopicDistributionHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -65,6 +63,7 @@ def create_batch_topic_distribution(self, topic_model, dataset, if origin_resources_checked: body = json.dumps(create_args) return self._create(self.batch_topic_distribution_url, body) + return None def get_batch_topic_distribution(self, batch_topic_distribution, query_string=''): @@ -81,15 +80,12 @@ def get_batch_topic_distribution(self, batch_topic_distribution, check_resource_type(batch_topic_distribution, BATCH_TOPIC_DISTRIBUTION_PATH, message="A batch topic distribution id is needed.") - batch_topic_distribution_id = get_batch_topic_distribution_id( \ - batch_topic_distribution) - if batch_topic_distribution_id: - return self._get("%s%s" % (self.url, batch_topic_distribution_id), - query_string=query_string) + return self.get_resource(batch_topic_distribution, + query_string=query_string) def download_batch_topic_distribution(self, batch_topic_distribution, - filename=None): + filename=None, retries=10): """Retrieves the batch topic distribution file. Downloads topic distributions, that are stored in a remote CSV file. @@ -100,12 +96,8 @@ def download_batch_topic_distribution(self, check_resource_type(batch_topic_distribution, BATCH_TOPIC_DISTRIBUTION_PATH, message="A batch topic distribution id is needed.") - batch_topic_distribution_id = get_batch_topic_distribution_id( \ - batch_topic_distribution) - if batch_topic_distribution_id: - return self._download("%s%s%s" % \ - (self.url, batch_topic_distribution_id, DOWNLOAD_DIR), \ - filename=filename) + return self._download_resource(batch_topic_distribution, filename, + retries=retries) def list_batch_topic_distributions(self, query_string=''): """Lists all your batch topic distributions. @@ -121,22 +113,15 @@ def update_batch_topic_distribution(self, batch_topic_distribution, check_resource_type(batch_topic_distribution, BATCH_TOPIC_DISTRIBUTION_PATH, message="A batch topic distribution id is needed.") - batch_topic_distribution_id = get_batch_topic_distribution_id( \ - batch_topic_distribution) - if batch_topic_distribution_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, - batch_topic_distribution_id), body) - - def delete_batch_topic_distribution(self, batch_topic_distribution): + return self.update_resource(batch_topic_distribution, changes) + + def delete_batch_topic_distribution(self, batch_topic_distribution, + query_string=''): """Deletes a batch topic distribution. """ check_resource_type(batch_topic_distribution, BATCH_TOPIC_DISTRIBUTION_PATH, message="A batch topic distribution id is needed.") - batch_topic_distribution_id = get_batch_topic_distribution_id( \ - batch_topic_distribution) - if batch_topic_distribution_id: - return self._delete("%s%s" % (self.url, - batch_topic_distribution_id)) + return self.delete_resource(batch_topic_distribution, + query_string=query_string) diff --git a/bigml/centroidhandler.py b/bigml/api_handlers/centroidhandler.py similarity index 61% rename from bigml/centroidhandler.py rename to bigml/api_handlers/centroidhandler.py index 84eb4327..d0455649 100644 --- a/bigml/centroidhandler.py +++ b/bigml/api_handlers/centroidhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,14 +27,14 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, get_resource_type, - check_resource, - get_centroid_id, get_cluster_id) -from bigml.constants import CENTROID_PATH, CLUSTER_PATH, TINY_RESOURCE +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + get_resource_type, check_resource, get_cluster_id +from bigml.constants import CENTROID_PATH, CLUSTER_PATH, SPECIFIC_EXCLUDES, \ + IMAGE_FIELDS_FILTER -class CentroidHandler(ResourceHandler): +class CentroidHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -56,29 +56,41 @@ def create_centroid(self, cluster, input_data=None, """ cluster_id = None resource_type = get_resource_type(cluster) - if resource_type == CLUSTER_PATH: - cluster_id = get_cluster_id(cluster) - check_resource(cluster_id, - query_string=TINY_RESOURCE, - wait_time=wait_time, retries=retries, - raise_on_error=True, api=self) - else: + if resource_type != CLUSTER_PATH: raise Exception("A cluster id is needed to create a" " centroid. %s found." % resource_type) + cluster_id = get_cluster_id(cluster) + if cluster_id is None: + raise Exception("Failed to detect a correct cluster " + "structure in %s." % cluster) + + if isinstance(cluster, dict) and cluster.get("resource") is not None: + # retrieving fields info from model structure + model_info = cluster + else: + image_fields_filter = IMAGE_FIELDS_FILTER + "," + \ + ",".join(SPECIFIC_EXCLUDES[resource_type]) + model_info = check_resource(cluster_id, + query_string=image_fields_filter, + wait_time=wait_time, + retries=retries, + raise_on_error=True, + api=self) + if input_data is None: input_data = {} create_args = {} if args is not None: create_args.update(args) create_args.update({ - "input_data": input_data}) + "input_data": self.prepare_image_fields(model_info, input_data)}) create_args.update({ "cluster": cluster_id}) body = json.dumps(create_args) return self._create(self.centroid_url, body, - verify=self.verify) + verify=self.domain.verify_prediction) def get_centroid(self, centroid, query_string=''): """Retrieves a centroid. @@ -86,10 +98,7 @@ def get_centroid(self, centroid, query_string=''): """ check_resource_type(centroid, CENTROID_PATH, message="A centroid id is needed.") - centroid_id = get_centroid_id(centroid) - if centroid_id: - return self._get("%s%s" % (self.url, centroid_id), - query_string=query_string) + return self.get_resource(centroid, query_string=query_string) def list_centroids(self, query_string=''): """Lists all your centroids. @@ -103,17 +112,12 @@ def update_centroid(self, centroid, changes): """ check_resource_type(centroid, CENTROID_PATH, message="A centroid id is needed.") - centroid_id = get_centroid_id(centroid) - if centroid_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, centroid_id), body) + return self.update_resource(centroid, changes) - def delete_centroid(self, centroid): + def delete_centroid(self, centroid, query_string=''): """Deletes a centroid. """ check_resource_type(centroid, CENTROID_PATH, message="A centroid id is needed.") - centroid_id = get_centroid_id(centroid) - if centroid_id: - return self._delete("%s%s" % (self.url, centroid_id)) + return self.delete_resource(centroid, query_string=query_string) diff --git a/bigml/clusterhandler.py b/bigml/api_handlers/clusterhandler.py similarity index 75% rename from bigml/clusterhandler.py rename to bigml/api_handlers/clusterhandler.py index ee5f0c13..ffc833eb 100644 --- a/bigml/clusterhandler.py +++ b/bigml/api_handlers/clusterhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,13 +27,13 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, resource_is_ready, - get_cluster_id) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready from bigml.constants import CLUSTER_PATH -class ClusterHandler(ResourceHandler): +class ClusterHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -74,12 +74,10 @@ def get_cluster(self, cluster, query_string='', """ check_resource_type(cluster, CLUSTER_PATH, message="A cluster id is needed.") - cluster_id = get_cluster_id(cluster) - if cluster_id: - return self._get("%s%s" % (self.url, cluster_id), - query_string=query_string, - shared_username=shared_username, - shared_api_key=shared_api_key) + return self.get_resource(cluster, + query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) def cluster_is_ready(self, cluster, **kwargs): """Checks whether a cluster's status is FINISHED. @@ -102,17 +100,24 @@ def update_cluster(self, cluster, changes): """ check_resource_type(cluster, CLUSTER_PATH, message="A cluster id is needed.") - cluster_id = get_cluster_id(cluster) - if cluster_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, cluster_id), body) + return self.update_resource(cluster, changes) - def delete_cluster(self, cluster): + def delete_cluster(self, cluster, query_string=''): """Deletes a cluster. """ check_resource_type(cluster, CLUSTER_PATH, message="A cluster id is needed.") - cluster_id = get_cluster_id(cluster) - if cluster_id: - return self._delete("%s%s" % (self.url, cluster_id)) + return self.delete_resource(cluster, query_string=query_string) + + def clone_cluster(self, cluster, + args=None, wait_time=3, retries=10): + """Creates a cloned cluster from an existing `cluster` + + """ + create_args = self._set_clone_from_args( + cluster, "cluster", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.cluster_url, body) diff --git a/bigml/configurationhandler.py b/bigml/api_handlers/configurationhandler.py similarity index 76% rename from bigml/configurationhandler.py rename to bigml/api_handlers/configurationhandler.py index 3c92bbd8..4e2e1ae1 100644 --- a/bigml/configurationhandler.py +++ b/bigml/api_handlers/configurationhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,13 +27,12 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, - get_configuration_id) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type from bigml.constants import CONFIGURATION_PATH -class ConfigurationHandler(ResourceHandler): +class ConfigurationHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls. It should not be instantiated independently. @@ -72,10 +71,7 @@ def get_configuration(self, configuration, query_string=''): """ check_resource_type(configuration, CONFIGURATION_PATH, message="A configuration id is needed.") - configuration_id = get_configuration_id(configuration) - if configuration_id: - return self._get("%s%s" % (self.url, configuration_id), - query_string=query_string) + return self.get_resource(configuration, query_string=query_string) def list_configurations(self, query_string=''): """Lists all your configurations. @@ -89,17 +85,12 @@ def update_configuration(self, configuration, changes): """ check_resource_type(configuration, CONFIGURATION_PATH, message="A configuration id is needed.") - configuration_id = get_configuration_id(configuration) - if configuration_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, configuration_id), body) + return self.update_resource(configuration, changes) - def delete_configuration(self, configuration): + def delete_configuration(self, configuration, query_string=''): """Deletes a configuration. """ check_resource_type(configuration, CONFIGURATION_PATH, message="A configuration id is needed.") - configuration_id = get_configuration_id(configuration) - if configuration_id: - return self._delete("%s%s" % (self.url, configuration_id)) + return self.delete_resource(configuration, query_string=query_string) diff --git a/bigml/correlationhandler.py b/bigml/api_handlers/correlationhandler.py similarity index 78% rename from bigml/correlationhandler.py rename to bigml/api_handlers/correlationhandler.py index 6313e13c..29fedc23 100644 --- a/bigml/correlationhandler.py +++ b/bigml/api_handlers/correlationhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,15 +27,14 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, - get_correlation_id, get_resource_type, - get_dataset_id, check_resource) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + get_resource_type, get_dataset_id, check_resource from bigml.constants import (CORRELATION_PATH, DATASET_PATH, TINY_RESOURCE) -class CorrelationHandler(ResourceHandler): +class CorrelationHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the correlations' REST calls. It should not be instantiated independently. @@ -87,10 +86,7 @@ def get_correlation(self, correlation, query_string=''): """ check_resource_type(correlation, CORRELATION_PATH, message="A correlation id is needed.") - correlation_id = get_correlation_id(correlation) - if correlation_id: - return self._get("%s%s" % (self.url, correlation_id), - query_string=query_string) + return self.get_resource(correlation, query_string=query_string) def list_correlations(self, query_string=''): """Lists all your correlations. @@ -104,17 +100,12 @@ def update_correlation(self, correlation, changes): """ check_resource_type(correlation, CORRELATION_PATH, message="A correlation id is needed.") - correlation_id = get_correlation_id(correlation) - if correlation_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, correlation_id), body) + return self.update_resource(correlation, changes) - def delete_correlation(self, correlation): + def delete_correlation(self, correlation, query_string=''): """Deletes a correlation. """ check_resource_type(correlation, CORRELATION_PATH, message="A correlation id is needed.") - correlation_id = get_correlation_id(correlation) - if correlation_id: - return self._delete("%s%s" % (self.url, correlation_id)) + return self.delete_resource(correlation, query_string=query_string) diff --git a/bigml/datasethandler.py b/bigml/api_handlers/datasethandler.py similarity index 84% rename from bigml/datasethandler.py rename to bigml/api_handlers/datasethandler.py index c1c62293..04ac3ec6 100644 --- a/bigml/datasethandler.py +++ b/bigml/api_handlers/datasethandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -26,18 +26,15 @@ except ImportError: import json - -from bigml.bigmlconnection import DOWNLOAD_DIR -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, - get_resource_type, resource_is_ready, - check_resource, get_source_id, - get_dataset_id, get_cluster_id) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + get_resource_type, resource_is_ready, check_resource, get_source_id, \ + get_dataset_id, get_cluster_id from bigml.constants import (DATASET_PATH, SOURCE_PATH, TINY_RESOURCE, CLUSTER_PATH) -class DatasetHandler(ResourceHandler): +class DatasetHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls to datasets. It should not be instantiated independently. @@ -107,8 +104,8 @@ def create_dataset(self, origin_resource, args=None, raise_on_error=True, api=self) if 'centroid' not in create_args: try: - centroid = cluster['object'][ - 'cluster_datasets_ids'].keys()[0] + centroid = list(cluster['object'][ + 'cluster_datasets_ids'].keys())[0] create_args.update({'centroid': centroid}) except KeyError: raise KeyError("Failed to generate the dataset. A " @@ -137,10 +134,7 @@ def get_dataset(self, dataset, query_string=''): """ check_resource_type(dataset, DATASET_PATH, message="A dataset id is needed.") - dataset_id = get_dataset_id(dataset) - if dataset_id: - return self._get("%s%s" % (self.url, dataset_id), - query_string=query_string) + return self.get_resource(dataset, query_string=query_string) def dataset_is_ready(self, dataset): """Check whether a dataset' status is FINISHED. @@ -163,20 +157,15 @@ def update_dataset(self, dataset, changes): """ check_resource_type(dataset, DATASET_PATH, message="A dataset id is needed.") - dataset_id = get_dataset_id(dataset) - if dataset_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, dataset_id), body) + return self.update_resource(dataset, changes) - def delete_dataset(self, dataset): + def delete_dataset(self, dataset, query_string=''): """Deletes a dataset. """ check_resource_type(dataset, DATASET_PATH, message="A dataset id is needed.") - dataset_id = get_dataset_id(dataset) - if dataset_id: - return self._delete("%s%s" % (self.url, dataset_id)) + return self.delete_resource(dataset, query_string=query_string) def error_counts(self, dataset, raise_on_error=True): """Returns the ids of the fields that contain errors and their number. @@ -204,16 +193,24 @@ def error_counts(self, dataset, raise_on_error=True): errors_dict[field_id] = errors[field_id]['total'] return errors_dict - def download_dataset(self, dataset, filename=None, retries=10): """Donwloads dataset contents to a csv file or file object """ check_resource_type(dataset, DATASET_PATH, message="A dataset id is needed.") - dataset_id = get_dataset_id(dataset) - if dataset_id: - return self._download("%s%s%s" % (self.url, dataset_id, - DOWNLOAD_DIR), - filename=filename, - retries=retries) + return self._download_resource(dataset, + filename, + retries=retries) + + def clone_dataset(self, dataset, + args=None, wait_time=3, retries=10): + """Creates a cloned dataset from an existing `dataset` + + """ + create_args = self._set_clone_from_args( + dataset, "dataset", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.dataset_url, body) diff --git a/bigml/deepnethandler.py b/bigml/api_handlers/deepnethandler.py similarity index 75% rename from bigml/deepnethandler.py rename to bigml/api_handlers/deepnethandler.py index 7c94c704..ff966793 100644 --- a/bigml/deepnethandler.py +++ b/bigml/api_handlers/deepnethandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2017-2019 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,13 +27,13 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, resource_is_ready, - get_deepnet_id) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready from bigml.constants import DEEPNET_PATH -class DeepnetHandler(ResourceHandler): +class DeepnetHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -77,12 +77,10 @@ def get_deepnet(self, deepnet, query_string='', """ check_resource_type(deepnet, DEEPNET_PATH, message="A deepnet id is needed.") - deepnet_id = get_deepnet_id(deepnet) - if deepnet_id: - return self._get("%s%s" % (self.url, deepnet_id), - query_string=query_string, - shared_username=shared_username, - shared_api_key=shared_api_key) + return self.get_resource(deepnet, + query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) def deepnet_is_ready(self, deepnet, **kwargs): """Checks whether a deepnet's status is FINISHED. @@ -105,18 +103,24 @@ def update_deepnet(self, deepnet, changes): """ check_resource_type(deepnet, DEEPNET_PATH, message="A deepnet id is needed.") - deepnet_id = get_deepnet_id(deepnet) - if deepnet_id: - body = json.dumps(changes) - return self._update( - "%s%s" % (self.url, deepnet_id), body) + return self.update_resource(deepnet, changes) - def delete_deepnet(self, deepnet): + def delete_deepnet(self, deepnet, query_string=''): """Deletes a deepnet. """ check_resource_type(deepnet, DEEPNET_PATH, message="A deepnet id is needed.") - deepnet_id = get_deepnet_id(deepnet) - if deepnet_id: - return self._delete("%s%s" % (self.url, deepnet_id)) + return self.delete_resource(deepnet, query_string=query_string) + + def clone_deepnet(self, deepnet, + args=None, wait_time=3, retries=10): + """Creates a cloned deepnet from an existing `deepnet` + + """ + create_args = self._set_clone_from_args( + deepnet, "deepnet", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.deepnet_url, body) diff --git a/bigml/ensemblehandler.py b/bigml/api_handlers/ensemblehandler.py similarity index 72% rename from bigml/ensemblehandler.py rename to bigml/api_handlers/ensemblehandler.py index e19107e6..6ebd035e 100644 --- a/bigml/ensemblehandler.py +++ b/bigml/api_handlers/ensemblehandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,13 +27,13 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, resource_is_ready, - get_ensemble_id) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready from bigml.constants import ENSEMBLE_PATH -class EnsembleHandler(ResourceHandler): +class EnsembleHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -59,7 +59,8 @@ def create_ensemble(self, datasets, args=None, wait_time=3, retries=10): body = json.dumps(create_args) return self._create(self.ensemble_url, body) - def get_ensemble(self, ensemble, query_string=''): + def get_ensemble(self, ensemble, query_string='', + shared_username=None, shared_api_key=None): """Retrieves an ensemble. The ensemble parameter should be a string containing the @@ -71,10 +72,9 @@ def get_ensemble(self, ensemble, query_string=''): """ check_resource_type(ensemble, ENSEMBLE_PATH, message="An ensemble id is needed.") - ensemble_id = get_ensemble_id(ensemble) - if ensemble_id: - return self._get("%s%s" % (self.url, ensemble_id), - query_string=query_string) + return self.get_resource(ensemble, query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) def ensemble_is_ready(self, ensemble): """Checks whether a ensemble's status is FINISHED. @@ -97,17 +97,24 @@ def update_ensemble(self, ensemble, changes): """ check_resource_type(ensemble, ENSEMBLE_PATH, message="An ensemble id is needed.") - ensemble_id = get_ensemble_id(ensemble) - if ensemble_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, ensemble_id), body) + return self.update_resource(ensemble, changes) - def delete_ensemble(self, ensemble): + def delete_ensemble(self, ensemble, query_string=''): """Deletes a ensemble. """ check_resource_type(ensemble, ENSEMBLE_PATH, message="An ensemble id is needed.") - ensemble_id = get_ensemble_id(ensemble) - if ensemble_id: - return self._delete("%s%s" % (self.url, ensemble_id)) + return self.delete_resource(ensemble, query_string=query_string) + + def clone_ensemble(self, ensemble, + args=None, wait_time=3, retries=10): + """Creates a cloned ensemble from an existing `ensemble` + + """ + create_args = self._set_clone_from_args( + ensemble, "ensemble", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.ensemble_url, body) diff --git a/bigml/evaluationhandler.py b/bigml/api_handlers/evaluationhandler.py similarity index 79% rename from bigml/evaluationhandler.py rename to bigml/api_handlers/evaluationhandler.py index c044bd89..82b224d4 100644 --- a/bigml/evaluationhandler.py +++ b/bigml/api_handlers/evaluationhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,13 +27,12 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, - get_evaluation_id) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type from bigml.constants import SUPERVISED_PATHS, TIME_SERIES_PATH, EVALUATION_PATH -class EvaluationHandler(ResourceHandler): +class EvaluationHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -67,6 +66,7 @@ def create_evaluation(self, model, dataset, if origin_resources_checked: body = json.dumps(create_args) return self._create(self.evaluation_url, body) + return None def get_evaluation(self, evaluation, query_string=''): """Retrieves an evaluation. @@ -80,10 +80,7 @@ def get_evaluation(self, evaluation, query_string=''): """ check_resource_type(evaluation, EVALUATION_PATH, message="An evaluation id is needed.") - evaluation_id = get_evaluation_id(evaluation) - if evaluation_id: - return self._get("%s%s" % (self.url, evaluation_id), - query_string=query_string) + return self.get_resource(evaluation, query_string=query_string) def list_evaluations(self, query_string=''): """Lists all your evaluations. @@ -97,17 +94,12 @@ def update_evaluation(self, evaluation, changes): """ check_resource_type(evaluation, EVALUATION_PATH, message="An evaluation id is needed.") - evaluation_id = get_evaluation_id(evaluation) - if evaluation_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, evaluation_id), body) + return self.update_resource(evaluation, changes) - def delete_evaluation(self, evaluation): + def delete_evaluation(self, evaluation, query_string=''): """Deletes an evaluation. """ check_resource_type(evaluation, EVALUATION_PATH, message="An evaluation id is needed.") - evaluation_id = get_evaluation_id(evaluation) - if evaluation_id: - return self._delete("%s%s" % (self.url, evaluation_id)) + return self.delete_resource(evaluation, query_string=query_string) diff --git a/bigml/executionhandler.py b/bigml/api_handlers/executionhandler.py similarity index 78% rename from bigml/executionhandler.py rename to bigml/api_handlers/executionhandler.py index a29cda2a..2fbf6f7e 100644 --- a/bigml/executionhandler.py +++ b/bigml/api_handlers/executionhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2015 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,15 +27,14 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, - get_execution_id, get_resource_type, - get_script_id, check_resource) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + get_resource_type, get_script_id, check_resource from bigml.constants import (EXECUTION_PATH, SCRIPT_PATH, TINY_RESOURCE) -class ExecutionHandler(ResourceHandler): +class ExecutionHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the executions' REST calls. It should not be instantiated independently. @@ -60,8 +59,7 @@ def create_execution(self, origin_resource, args=None, if args is not None: create_args.update(args) - if (isinstance(origin_resource, basestring) or - isinstance(origin_resource, dict)): + if isinstance(origin_resource, (dict, str)): # single script scripts = [origin_resource] else: @@ -73,8 +71,8 @@ def create_execution(self, origin_resource, args=None, " a script execution. %s found." % get_resource_type(origin_resource)) - if all([get_resource_type(script_id) == SCRIPT_PATH for - script_id in script_ids]): + if all(get_resource_type(script_id) == SCRIPT_PATH for + script_id in script_ids): for script in scripts: check_resource(script, query_string=TINY_RESOURCE, @@ -107,10 +105,7 @@ def get_execution(self, execution, query_string=''): """ check_resource_type(execution, EXECUTION_PATH, message="An execution id is needed.") - execution_id = get_execution_id(execution) - if execution_id: - return self._get("%s%s" % (self.url, execution_id), - query_string=query_string) + return self.get_resource(execution, query_string=query_string) def list_executions(self, query_string=''): """Lists all your executions. @@ -124,10 +119,7 @@ def update_execution(self, execution, changes): """ check_resource_type(execution, EXECUTION_PATH, message="An execution id is needed.") - execution_id = get_execution_id(execution) - if execution_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, execution_id), body) + return self.update_resource(execution, changes) def delete_execution(self, execution, query_string=''): """Deletes an execution. @@ -135,7 +127,4 @@ def delete_execution(self, execution, query_string=''): """ check_resource_type(execution, EXECUTION_PATH, message="An execution id is needed.") - execution_id = get_execution_id(execution) - if execution_id: - return self._delete("%s%s" % (self.url, execution_id), - query_string=query_string) + return self.delete_resource(execution, query_string=query_string) diff --git a/bigml/api_handlers/externalconnectorhandler.py b/bigml/api_handlers/externalconnectorhandler.py new file mode 100644 index 00000000..7d33a58e --- /dev/null +++ b/bigml/api_handlers/externalconnectorhandler.py @@ -0,0 +1,130 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2020-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for external connectors' REST calls + + https://bigml.com/api/externalconnectors + +""" +import os +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type +from bigml.constants import EXTERNAL_CONNECTOR_PATH, \ + EXTERNAL_CONNECTION_ATTRS + + +def get_env_connection_info(): + """Retrieves the information to use in the external connection from + environment variables. + + """ + # try to use environment variables values + connection_info = {} + for external_key in list(EXTERNAL_CONNECTION_ATTRS.keys()): + if os.environ.get(external_key): + connection_info.update( \ + {EXTERNAL_CONNECTION_ATTRS[external_key]: + os.environ.get(external_key)}) + return connection_info + + +class ExternalConnectorHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the external connectors' REST calls. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the ExternalConnectorHandler. This class is intended to + be used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.external_connector_url = self.url + EXTERNAL_CONNECTOR_PATH + + def create_external_connector(self, connection_info, args=None): + """Creates an external connections from a dictionary containing the + connection information. + + """ + + create_args = {} + if args is not None: + create_args.update(args) + + if connection_info is None: + connection_info = get_env_connection_info() + + if not isinstance(connection_info, dict): + raise Exception("To create an external connector you need to" + " provide a dictionary with the connection" + " information. Please refer to the API external" + " connector docs for details.") + + source = connection_info.get("source", "postgresql") + if "source" in connection_info: + del connection_info["source"] + + create_args.update({"connection": connection_info}) + create_args.update({"source": source}) + body = json.dumps(create_args) + return self._create(self.external_connector_url, body) + + def get_external_connector(self, external_connector, query_string=''): + """Retrieves an external connector. + + The external connector parameter should be a string containing the + external connector id or the dict returned by + create_external_connector. + As an external connector is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the connector contents and state info + available at the time it is called. + """ + check_resource_type(external_connector, EXTERNAL_CONNECTOR_PATH, + message="An external connector id is needed.") + return self.get_resource(external_connector, query_string=query_string) + + def list_external_connectors(self, query_string=''): + """Lists all your external connectors. + + """ + return self._list(self.external_connector_url, query_string) + + def update_external_connector(self, external_connector, changes): + """Updates an external connector. + + """ + check_resource_type(external_connector, EXTERNAL_CONNECTOR_PATH, + message="An external connector id is needed.") + return self.update_resource(external_connector, changes) + + def delete_external_connector(self, external_connector, query_string=''): + """Deletes an external connector. + + """ + check_resource_type(external_connector, EXTERNAL_CONNECTOR_PATH, + message="An external connector id is needed.") + return self.delete_resource(external_connector, + query_string=query_string) diff --git a/bigml/forecasthandler.py b/bigml/api_handlers/forecasthandler.py similarity index 77% rename from bigml/forecasthandler.py rename to bigml/api_handlers/forecasthandler.py index a999e8ea..cfaba279 100644 --- a/bigml/forecasthandler.py +++ b/bigml/api_handlers/forecasthandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2017-2019 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,13 +27,12 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, get_forecast_id, - check_resource, get_time_series_id, - get_resource_type) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + check_resource, get_time_series_id, get_resource_type from bigml.constants import (FORECAST_PATH, TIME_SERIES_PATH, TINY_RESOURCE) -class ForecastHandler(ResourceHandler): +class ForecastHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -77,7 +76,7 @@ def create_forecast(self, time_series, input_data=None, body = json.dumps(create_args) return self._create(self.forecast_url, body, - verify=self.verify_prediction) + verify=self.domain.verify_prediction) def get_forecast(self, forecast, query_string=''): """Retrieves a forecast. @@ -85,10 +84,7 @@ def get_forecast(self, forecast, query_string=''): """ check_resource_type(forecast, FORECAST_PATH, message="A forecast id is needed.") - forecast_id = get_forecast_id(forecast) - if forecast_id: - return self._get("%s%s" % (self.url, forecast_id), - query_string=query_string) + return self.get_resource(forecast, query_string=query_string) def list_forecasts(self, query_string=''): """Lists all your forecasts. @@ -102,17 +98,12 @@ def update_forecast(self, forecast, changes): """ check_resource_type(forecast, FORECAST_PATH, message="A forecast id is needed.") - forecast_id = get_forecast_id(forecast) - if forecast_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, forecast_id), body) + return self.update_resource(forecast, changes) - def delete_forecast(self, forecast): + def delete_forecast(self, forecast, query_string=''): """Deletes a forecast. """ check_resource_type(forecast, FORECAST_PATH, message="A forecast id is needed.") - forecast_id = get_forecast_id(forecast) - if forecast_id: - return self._delete("%s%s" % (self.url, forecast_id)) + return self.delete_resource(forecast, query_string=query_string) diff --git a/bigml/fusionhandler.py b/bigml/api_handlers/fusionhandler.py similarity index 75% rename from bigml/fusionhandler.py rename to bigml/api_handlers/fusionhandler.py index 33194983..90e22ee7 100644 --- a/bigml/fusionhandler.py +++ b/bigml/api_handlers/fusionhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2018-2019 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,13 +27,13 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, resource_is_ready, - get_fusion_id) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready from bigml.constants import FUSION_PATH, SUPERVISED_PATHS -class FusionHandler(ResourceHandler): +class FusionHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -77,12 +77,10 @@ def get_fusion(self, fusion, query_string='', """ check_resource_type(fusion, FUSION_PATH, message="A fusion id is needed.") - fusion_id = get_fusion_id(fusion) - if fusion_id: - return self._get("%s%s" % (self.url, fusion_id), - query_string=query_string, - shared_username=shared_username, - shared_api_key=shared_api_key) + return self.get_resource(fusion, + query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) def fusion_is_ready(self, fusion, **kwargs): """Checks whether a fusion's status is FINISHED. @@ -105,18 +103,24 @@ def update_fusion(self, fusion, changes): """ check_resource_type(fusion, FUSION_PATH, message="A fusion id is needed.") - fusion_id = get_fusion_id(fusion) - if fusion_id: - body = json.dumps(changes) - return self._update( - "%s%s" % (self.url, fusion_id), body) + return self.update_resource(fusion, changes) - def delete_fusion(self, fusion): + def clone_fusion(self, fusion, + args=None, wait_time=3, retries=10): + """Creates a cloned fusion from an existing `fusion` + + """ + create_args = self._set_clone_from_args( + fusion, "fusion", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.fusion_url, body) + + def delete_fusion(self, fusion, query_string=''): """Deletes a fusion. """ check_resource_type(fusion, FUSION_PATH, message="A fusion id is needed.") - fusion_id = get_fusion_id(fusion) - if fusion_id: - return self._delete("%s%s" % (self.url, fusion_id)) + return self.delete_resource(fusion, query_string=query_string) diff --git a/bigml/libraryhandler.py b/bigml/api_handlers/libraryhandler.py similarity index 82% rename from bigml/libraryhandler.py rename to bigml/api_handlers/libraryhandler.py index dc0e9b6d..36055eee 100644 --- a/bigml/libraryhandler.py +++ b/bigml/api_handlers/libraryhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2015 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,14 +27,13 @@ import json import os -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, - get_library_id, get_resource_type, - check_resource) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + get_library_id, get_resource_type, check_resource from bigml.constants import LIBRARY_PATH, TINY_RESOURCE -class LibraryHandler(ResourceHandler): +class LibraryHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the whizzml libraries' REST calls. It should not be instantiated independently. @@ -75,7 +74,7 @@ def create_library(self, source_code=None, args=None, raise_on_error=True, api=self) create_args.update({ "origin": library_id}) - elif isinstance(source_code, basestring): + elif isinstance(source_code, str): try: if os.path.exists(source_code): with open(source_code) as code_file: @@ -106,10 +105,7 @@ def get_library(self, library, query_string=''): """ check_resource_type(library, LIBRARY_PATH, message="A library id is needed.") - library_id = get_library_id(library) - if library_id: - return self._get("%s%s" % (self.url, library_id), - query_string=query_string) + return self.get_resource(library, query_string=query_string) def list_libraries(self, query_string=''): """Lists all your libraries. @@ -123,17 +119,12 @@ def update_library(self, library, changes): """ check_resource_type(library, LIBRARY_PATH, message="A library id is needed.") - library_id = get_library_id(library) - if library_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, library_id), body) + return self.update_resource(library, changes) - def delete_library(self, library): + def delete_library(self, library, query_string=''): """Deletes a library. """ check_resource_type(library, LIBRARY_PATH, message="A library id is needed.") - library_id = get_library_id(library) - if library_id: - return self._delete("%s%s" % (self.url, library_id)) + return self.delete_resource(library, query_string=query_string) diff --git a/bigml/linearhandler.py b/bigml/api_handlers/linearhandler.py similarity index 74% rename from bigml/linearhandler.py rename to bigml/api_handlers/linearhandler.py index 76b1961c..3f24a5f8 100644 --- a/bigml/linearhandler.py +++ b/bigml/api_handlers/linearhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2019 BigML +# Copyright 2019-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,13 +27,13 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, resource_is_ready, - get_linear_regression_id) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready from bigml.constants import LINEAR_REGRESSION_PATH -class LinearRegressionHandler(ResourceHandler): +class LinearRegressionHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -77,13 +77,10 @@ def get_linear_regression(self, linear_regression, query_string='', """ check_resource_type(linear_regression, LINEAR_REGRESSION_PATH, message="A linear regression id is needed.") - linear_regression_id = get_linear_regression_id( - linear_regression) - if linear_regression_id: - return self._get("%s%s" % (self.url, linear_regression_id), - query_string=query_string, - shared_username=shared_username, - shared_api_key=shared_api_key) + return self.get_resource(linear_regression, + query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) def linear_regression_is_ready(self, linear_regression, **kwargs): """Checks whether a linear regressioin's status is FINISHED. @@ -106,20 +103,25 @@ def update_linear_regression(self, linear_regression, changes): """ check_resource_type(linear_regression, LINEAR_REGRESSION_PATH, message="A linear regression id is needed.") - linear_regression_id = get_linear_regression_id( - linear_regression) - if linear_regression_id: - body = json.dumps(changes) - return self._update( - "%s%s" % (self.url, linear_regression_id), body) - - def delete_linear_regression(self, linear_regression): + return self.update_resource(linear_regression, changes) + + def delete_linear_regression(self, linear_regression, query_string=''): """Deletes a linear regression. """ check_resource_type(linear_regression, LINEAR_REGRESSION_PATH, message="A linear regression id is needed.") - linear_regression_id = get_linear_regression_id( - linear_regression) - if linear_regression_id: - return self._delete("%s%s" % (self.url, linear_regression_id)) + return self.delete_resource(linear_regression, + query_string=query_string) + + def clone_linear_regression(self, linear_regression, + args=None, wait_time=3, retries=10): + """Creates a cloned linear regression from an existing `linear regression` + + """ + create_args = self._set_clone_from_args( + linear_regression, "linearregression", + args=args, wait_time=wait_time, retries=retries) + + body = json.dumps(create_args) + return self._create(self.linear_regression_url, body) diff --git a/bigml/logistichandler.py b/bigml/api_handlers/logistichandler.py similarity index 74% rename from bigml/logistichandler.py rename to bigml/api_handlers/logistichandler.py index 2c1715b3..744422bf 100644 --- a/bigml/logistichandler.py +++ b/bigml/api_handlers/logistichandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,13 +27,13 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, resource_is_ready, - get_logistic_regression_id) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready from bigml.constants import LOGISTIC_REGRESSION_PATH -class LogisticRegressionHandler(ResourceHandler): +class LogisticRegressionHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -77,13 +77,10 @@ def get_logistic_regression(self, logistic_regression, query_string='', """ check_resource_type(logistic_regression, LOGISTIC_REGRESSION_PATH, message="A logistic regression id is needed.") - logistic_regression_id = get_logistic_regression_id( - logistic_regression) - if logistic_regression_id: - return self._get("%s%s" % (self.url, logistic_regression_id), - query_string=query_string, - shared_username=shared_username, - shared_api_key=shared_api_key) + return self.get_resource(logistic_regression, + query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) def logistic_regression_is_ready(self, logistic_regression, **kwargs): """Checks whether a logistic regressioin's status is FINISHED. @@ -106,20 +103,25 @@ def update_logistic_regression(self, logistic_regression, changes): """ check_resource_type(logistic_regression, LOGISTIC_REGRESSION_PATH, message="A logistic regression id is needed.") - logistic_regression_id = get_logistic_regression_id( - logistic_regression) - if logistic_regression_id: - body = json.dumps(changes) - return self._update( - "%s%s" % (self.url, logistic_regression_id), body) - - def delete_logistic_regression(self, logistic_regression): + return self.update_resource(logistic_regression, changes) + + def delete_logistic_regression(self, logistic_regression, query_string=''): """Deletes a logistic regression. """ check_resource_type(logistic_regression, LOGISTIC_REGRESSION_PATH, message="A logistic regression id is needed.") - logistic_regression_id = get_logistic_regression_id( - logistic_regression) - if logistic_regression_id: - return self._delete("%s%s" % (self.url, logistic_regression_id)) + return self.delete_resource(logistic_regression, + query_string=query_string) + + def clone_logistic_regression(self, logistic_regression, + args=None, wait_time=3, retries=10): + """Creates a cloned logistic regression from an existing `logistic regression` + + """ + create_args = self._set_clone_from_args( + logistic_regression, "logisticregression", + args=args, wait_time=wait_time, retries=retries) + + body = json.dumps(create_args) + return self._create(self.logistic_regression_url, body) diff --git a/bigml/modelhandler.py b/bigml/api_handlers/modelhandler.py similarity index 80% rename from bigml/modelhandler.py rename to bigml/api_handlers/modelhandler.py index b7871fc5..0a94d342 100644 --- a/bigml/modelhandler.py +++ b/bigml/api_handlers/modelhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,15 +27,15 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, resource_is_ready, - get_resource_type, check_resource, - get_model_id, get_cluster_id) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready, get_resource_type, check_resource, \ + get_cluster_id from bigml.constants import (MODEL_PATH, CLUSTER_PATH, DATASET_PATH, TINY_RESOURCE) -class ModelHandler(ResourceHandler): +class ModelHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -85,8 +85,8 @@ def create_model(self, origin_resource, args=None, wait_time=3, retries=10): raise_on_error=True, api=self) if 'centroid' not in create_args: try: - centroid = cluster['object'][ - 'cluster_models'].keys()[0] + centroid = list(cluster['object'][ + 'cluster_models'].keys())[0] create_args.update({'centroid': centroid}) except KeyError: raise KeyError("Failed to generate the model. A " @@ -120,15 +120,15 @@ def get_model(self, model, query_string='', If this is a shared model, the username and sharing api key must also be provided. + If it's a model inside an ensemble or fusion, the shared_ref is + needed. """ check_resource_type(model, MODEL_PATH, message="A model id is needed.") - model_id = get_model_id(model) - if model_id: - return self._get("%s%s" % (self.url, model_id), - query_string=query_string, - shared_username=shared_username, - shared_api_key=shared_api_key) + return self.get_resource(model, + query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) def model_is_ready(self, model, **kwargs): """Checks whether a model's status is FINISHED. @@ -151,17 +151,24 @@ def update_model(self, model, changes): """ check_resource_type(model, MODEL_PATH, message="A model id is needed.") - model_id = get_model_id(model) - if model_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, model_id), body) + return self.update_resource(model, changes) - def delete_model(self, model): + def delete_model(self, model, query_string=''): """Deletes a model. """ check_resource_type(model, MODEL_PATH, message="A model id is needed.") - model_id = get_model_id(model) - if model_id: - return self._delete("%s%s" % (self.url, model_id)) + return self.delete_resource(model, query_string=query_string) + + def clone_model(self, model, + args=None, wait_time=3, retries=10): + """Creates a cloned model from an existing `model` + + """ + create_args = self._set_clone_from_args( + model, "model", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.model_url, body) diff --git a/bigml/optimlhandler.py b/bigml/api_handlers/optimlhandler.py similarity index 78% rename from bigml/optimlhandler.py rename to bigml/api_handlers/optimlhandler.py index 859d17e5..cd5853d5 100644 --- a/bigml/optimlhandler.py +++ b/bigml/api_handlers/optimlhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2018-2019 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,13 +27,13 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, resource_is_ready, - get_optiml_id) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready from bigml.constants import OPTIML_PATH -class OptimlHandler(ResourceHandler): +class OptimlHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -77,12 +77,10 @@ def get_optiml(self, optiml, query_string='', """ check_resource_type(optiml, OPTIML_PATH, message="An optiml id is needed.") - optiml_id = get_optiml_id(optiml) - if optiml_id: - return self._get("%s%s" % (self.url, optiml_id), - query_string=query_string, - shared_username=shared_username, - shared_api_key=shared_api_key) + return self.get_resource(optiml, + query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) def optiml_is_ready(self, optiml, **kwargs): """Checks whether an optiml's status is FINISHED. @@ -105,18 +103,12 @@ def update_optiml(self, optiml, changes): """ check_resource_type(optiml, OPTIML_PATH, message="An optiml id is needed.") - optiml_id = get_optiml_id(optiml) - if optiml_id: - body = json.dumps(changes) - return self._update( - "%s%s" % (self.url, optiml_id), body) + return self.update_resource(optiml, changes) - def delete_optiml(self, optiml): + def delete_optiml(self, optiml, query_string=''): """Deletes an optiml. """ check_resource_type(optiml, OPTIML_PATH, message="An optiml id is needed.") - optiml_id = get_optiml_id(optiml) - if optiml_id: - return self._delete("%s%s" % (self.url, optiml_id)) + return self.delete_resource(optiml, query_string=query_string) diff --git a/bigml/pcahandler.py b/bigml/api_handlers/pcahandler.py similarity index 75% rename from bigml/pcahandler.py rename to bigml/api_handlers/pcahandler.py index e848a85c..933d73da 100644 --- a/bigml/pcahandler.py +++ b/bigml/api_handlers/pcahandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2018-2019 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,13 +27,13 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, resource_is_ready, - get_pca_id) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready from bigml.constants import PCA_PATH -class PCAHandler(ResourceHandler): +class PCAHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -75,12 +75,10 @@ def get_pca(self, pca, query_string='', """ check_resource_type(pca, PCA_PATH, message="A PCA id is needed.") - pca_id = get_pca_id(pca) - if pca_id: - return self._get("%s%s" % (self.url, pca_id), - query_string=query_string, - shared_username=shared_username, - shared_api_key=shared_api_key) + return self.get_resource(pca, + query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) def pca_is_ready(self, pca, **kwargs): """Checks whether a pca's status is FINISHED. @@ -103,18 +101,24 @@ def update_pca(self, pca, changes): """ check_resource_type(pca, PCA_PATH, message="A PCA id is needed.") - pca_id = get_pca_id(pca) - if pca_id: - body = json.dumps(changes) - return self._update( - "%s%s" % (self.url, pca_id), body) + return self.update_resource(pca, changes) - def delete_pca(self, pca): + def delete_pca(self, pca, query_string=''): """Deletes a PCA. """ check_resource_type(pca, PCA_PATH, message="A PCA id is needed.") - pca_id = get_pca_id(pca) - if pca_id: - return self._delete("%s%s" % (self.url, pca_id)) + return self.delete_resource(pca, query_string=query_string) + + def clone_pca(self, pca, + args=None, wait_time=3, retries=10): + """Creates a cloned PCA from an existing `PCA` + + """ + create_args = self._set_clone_from_args( + pca, "pca", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.pca_url, body) diff --git a/bigml/predictionhandler.py b/bigml/api_handlers/predictionhandler.py similarity index 66% rename from bigml/predictionhandler.py rename to bigml/api_handlers/predictionhandler.py index cb9bfdd9..c2c160b2 100644 --- a/bigml/predictionhandler.py +++ b/bigml/api_handlers/predictionhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,14 +27,14 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, get_prediction_id, - check_resource, get_resource_id, - get_resource_type) -from bigml.constants import SUPERVISED_PATHS, TINY_RESOURCE, PREDICTION_PATH +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + check_resource, get_resource_id, get_resource_type +from bigml.constants import SUPERVISED_PATHS, IMAGE_FIELDS_FILTER, \ + PREDICTION_PATH, SPECIFIC_EXCLUDES -class PredictionHandler(ResourceHandler): +class PredictionHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -71,11 +71,22 @@ def create_prediction(self, model, input_data=None, resource_type) model_id = get_resource_id(model) - if model_id is not None: - check_resource(model_id, - query_string=TINY_RESOURCE, - wait_time=wait_time, retries=retries, - raise_on_error=True, api=self) + if model_id is None: + raise Exception("Failed to detect a correct model structure" + " in %s." % model) + + if isinstance(model, dict) and model.get("resource") is not None: + # retrieving fields info from model structure + model_info = model + else: + image_fields_filter = IMAGE_FIELDS_FILTER + "," + \ + ",".join(SPECIFIC_EXCLUDES[resource_type]) + model_info = check_resource(model_id, + query_string=image_fields_filter, + wait_time=wait_time, + retries=retries, + raise_on_error=True, + api=self) if input_data is None: input_data = {} @@ -83,14 +94,14 @@ def create_prediction(self, model, input_data=None, if args is not None: create_args.update(args) create_args.update({ - "input_data": input_data}) + "input_data": self.prepare_image_fields(model_info, input_data)}) if model_id is not None: create_args.update({ "model": model_id}) body = json.dumps(create_args) return self._create(self.prediction_url, body, - verify=self.verify_prediction) + verify=self.domain.verify_prediction) def get_prediction(self, prediction, query_string=''): """Retrieves a prediction. @@ -98,10 +109,7 @@ def get_prediction(self, prediction, query_string=''): """ check_resource_type(prediction, PREDICTION_PATH, message="A prediction id is needed.") - prediction_id = get_prediction_id(prediction) - if prediction_id: - return self._get("%s%s" % (self.url, prediction_id), - query_string=query_string) + return self.get_resource(prediction, query_string=query_string) def list_predictions(self, query_string=''): """Lists all your predictions. @@ -115,17 +123,12 @@ def update_prediction(self, prediction, changes): """ check_resource_type(prediction, PREDICTION_PATH, message="A prediction id is needed.") - prediction_id = get_prediction_id(prediction) - if prediction_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, prediction_id), body) + return self.update_resource(prediction, changes) - def delete_prediction(self, prediction): + def delete_prediction(self, prediction, query_string=''): """Deletes a prediction. """ check_resource_type(prediction, PREDICTION_PATH, message="A prediction id is needed.") - prediction_id = get_prediction_id(prediction) - if prediction_id: - return self._delete("%s%s" % (self.url, prediction_id)) + return self.delete_resource(prediction, query_string=query_string) diff --git a/bigml/projecthandler.py b/bigml/api_handlers/projecthandler.py similarity index 75% rename from bigml/projecthandler.py rename to bigml/api_handlers/projecthandler.py index f3e3e991..3c3b7a51 100644 --- a/bigml/projecthandler.py +++ b/bigml/api_handlers/projecthandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,12 +27,12 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import check_resource_type, get_project_id +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type from bigml.constants import PROJECT_PATH -class ProjectHandler(ResourceHandler): +class ProjectHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -68,10 +68,8 @@ def get_project(self, project, query_string=''): """ check_resource_type(project, PROJECT_PATH, message="A project id is needed.") - project_id = get_project_id(project) - if project_id: - return self._get("%s%s" % (self.url, project_id), - query_string=query_string, organization=True) + return self.get_resource(project, query_string=query_string, + organization=True) def list_projects(self, query_string=''): """Lists all your projects. @@ -85,19 +83,13 @@ def update_project(self, project, changes): """ check_resource_type(project, PROJECT_PATH, message="A project id is needed.") - project_id = get_project_id(project) - if project_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, project_id), body, - organization=True) + return self.update_resource(project, changes, organization=True) - def delete_project(self, project): + def delete_project(self, project, query_string=''): """Deletes a project. """ check_resource_type(project, PROJECT_PATH, message="A project id is needed.") - project_id = get_project_id(project) - if project_id: - return self._delete("%s%s" % (self.url, project_id), - organization=True) + return self.delete_resource(project, query_string=query_string, + organization=True) diff --git a/bigml/projectionhandler.py b/bigml/api_handlers/projectionhandler.py similarity index 64% rename from bigml/projectionhandler.py rename to bigml/api_handlers/projectionhandler.py index c11710de..d463fca8 100644 --- a/bigml/projectionhandler.py +++ b/bigml/api_handlers/projectionhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2018-2019 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,14 +27,14 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, get_projection_id, - check_resource, get_resource_id, - get_resource_type) -from bigml.constants import TINY_RESOURCE, PROJECTION_PATH, PCA_PATH +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + check_resource, get_resource_id, get_resource_type +from bigml.constants import PROJECTION_PATH, PCA_PATH, \ + IMAGE_FIELDS_FILTER, SPECIFIC_EXCLUDES -class ProjectionHandler(ResourceHandler): +class ProjectionHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -64,11 +64,22 @@ def create_projection(self, pca, input_data=None, resource_type) pca_id = get_resource_id(pca) - if pca_id is not None: - check_resource(pca_id, - query_string=TINY_RESOURCE, - wait_time=wait_time, retries=retries, - raise_on_error=True, api=self) + if pca_id is None: + raise Exception("Failed to detect a correct pca structure" + " in %s." % pca) + + if isinstance(pca, dict) and pca.get("resource") is not None: + # retrieving fields info from model structure + model_info = pca + else: + image_fields_filter = IMAGE_FIELDS_FILTER + "," + \ + ",".join(SPECIFIC_EXCLUDES[resource_type]) + model_info = check_resource(pca_id, + query_string=image_fields_filter, + wait_time=wait_time, + retries=retries, + raise_on_error=True, + api=self) if input_data is None: input_data = {} @@ -76,14 +87,14 @@ def create_projection(self, pca, input_data=None, if args is not None: create_args.update(args) create_args.update({ - "input_data": input_data}) + "input_data": self.prepare_image_fields(model_info, input_data)}) if pca_id is not None: create_args.update({ "pca": pca_id}) body = json.dumps(create_args) return self._create(self.projection_url, body, - verify=self.verify) + verify=self.domain.verify_prediction) def get_projection(self, projection, query_string=''): """Retrieves a projection. @@ -91,10 +102,7 @@ def get_projection(self, projection, query_string=''): """ check_resource_type(projection, PROJECTION_PATH, message="A projection id is needed.") - projection_id = get_projection_id(projection) - if projection_id: - return self._get("%s%s" % (self.url, projection_id), - query_string=query_string) + return self.get_resource(projection, query_string=query_string) def list_projections(self, query_string=''): """Lists all your projections. @@ -108,17 +116,12 @@ def update_projection(self, projection, changes): """ check_resource_type(projection, PROJECTION_PATH, message="A projection id is needed.") - projection_id = get_projection_id(projection) - if projection_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, projection_id), body) + return self.update_resource(projection, changes) - def delete_projection(self, projection): + def delete_projection(self, projection, query_string=''): """Deletes a projection. """ check_resource_type(projection, PROJECTION_PATH, message="A projection id is needed.") - projection_id = get_projection_id(projection) - if projection_id: - return self._delete("%s%s" % (self.url, projection_id)) + return self.delete_resource(projection, query_string=query_string) diff --git a/bigml/resourcehandler.py b/bigml/api_handlers/resourcehandler.py similarity index 58% rename from bigml/resourcehandler.py rename to bigml/api_handlers/resourcehandler.py index c6a29575..524f53ef 100644 --- a/bigml/resourcehandler.py +++ b/bigml/api_handlers/resourcehandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method,unused-import # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -11,7 +11,7 @@ # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# WARRANTIES OR CONDITIONS OF ANY KIn545D, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. @@ -22,6 +22,9 @@ import time import os import datetime +import json +import re +import abc from xml.dom import minidom @@ -30,22 +33,11 @@ from bigml.util import get_exponential_wait, get_status, is_status_final, \ save, save_json from bigml.util import DFT_STORAGE -from bigml.bigmlconnection import HTTP_OK, HTTP_ACCEPTED, HTTP_CREATED, LOGGER -from bigml.bigmlconnection import BigMLConnection - - -# Resource status codes -WAITING = 0 -QUEUED = 1 -STARTED = 2 -IN_PROGRESS = 3 -SUMMARIZED = 4 -FINISHED = 5 -UPLOADING = 6 -FAULTY = -1 -UNKNOWN = -2 -RUNNABLE = -3 - +from bigml.bigmlconnection import HTTP_OK, HTTP_ACCEPTED, HTTP_CREATED, \ + LOGGER, DOWNLOAD_DIR, HTTP_INTERNAL_SERVER_ERROR +from bigml.constants import WAITING, QUEUED, STARTED, IN_PROGRESS, \ + SUMMARIZED, FINISHED, UPLOADING, FAULTY, UNKNOWN, RUNNABLE +from bigml.exceptions import FaultyResourceError # Minimum query string to get model fields TINY_RESOURCE = "full=false" @@ -53,7 +45,7 @@ # Resource types that are composed by other resources COMPOSED_RESOURCES = ["ensemble", "fusion"] -LIST_LAST = "limit=1;full=yes;tags=%s" +LIST_LAST = "limit=1&full=yes&tags=%s" PMML_QS = "pmml=yes" @@ -64,9 +56,9 @@ def get_resource_type(resource): """ if isinstance(resource, dict) and 'resource' in resource: resource = resource['resource'] - if not isinstance(resource, basestring): + if not isinstance(resource, str): raise ValueError("Failed to parse a resource string or structure.") - for resource_type, resource_re in c.RESOURCE_RE.items(): + for resource_type, resource_re in list(c.RESOURCE_RE.items()): if resource_re.match(resource): return resource_type return None @@ -78,7 +70,7 @@ def get_resource(resource_type, resource): """ if isinstance(resource, dict) and 'resource' in resource: resource = resource['resource'] - if isinstance(resource, basestring): + if isinstance(resource, str): if c.RESOURCE_RE[resource_type].match(resource): return resource found_type = get_resource_type(resource) @@ -91,23 +83,64 @@ def get_resource(resource_type, resource): raise ValueError("%s is not a valid resource ID." % resource) +def get_id(pure_id): + """Returns last part or a resource ID. + + """ + if isinstance(pure_id, str): + pure_id = re.sub(r'^[^/]*/(%s)' % c.ID_PATTERN, r'\1', pure_id) + if c.ID_RE.match(pure_id): + return pure_id + raise ValueError("%s is not a valid ID." % pure_id) + + +def get_fields(resource): + """Returns the field information in a resource dictionary structure + + """ + try: + resource_type = get_resource_type(resource) + except ValueError: + raise ValueError("Unknown resource structure. Failed to find" + " a valid resource dictionary as argument.") + + if resource_type in c.RESOURCES_WITH_FIELDS: + resource = resource.get('object', resource) + # fields structure + if resource_type in list(c.FIELDS_PARENT.keys()) and \ + c.FIELDS_PARENT[resource_type] is not None: + fields = resource[c.FIELDS_PARENT[resource_type]].get('fields', {}) + else: + fields = resource.get('fields', {}) + + if resource_type == c.SAMPLE_PATH: + fields = {field['id']: field for field in fields} + return fields + + def resource_is_ready(resource): """Checks a fully fledged resource structure and returns True if finished. """ - if not isinstance(resource, dict) or 'error' not in resource: + if not isinstance(resource, dict): raise Exception("No valid resource structure found") - if resource['error'] is not None: - raise Exception(resource['error']['status']['message']) - return (resource['code'] in [HTTP_OK, HTTP_ACCEPTED] and - get_status(resource)['code'] == c.FINISHED) + # full resources + if 'object' in resource: + if 'error' not in resource: + raise Exception("No valid resource structure found") + if resource['error'] is not None: + raise Exception(resource['error']['status']['message']) + return (resource['code'] in [HTTP_OK, HTTP_ACCEPTED] and + get_status(resource)['code'] == c.FINISHED) + # only API response contents + return get_status(resource)['code'] == c.FINISHED def check_resource_type(resource, expected_resource, message=None): """Checks the resource type. """ - if isinstance(expected_resource, basestring): + if isinstance(expected_resource, str): expected_resources = [expected_resource] else: expected_resources = expected_resource @@ -371,30 +404,55 @@ def get_library_id(library): return get_resource(c.LIBRARY_PATH, library) +def get_external_connector_id(library): + """Returns a externalconnector/id. + + """ + return get_resource(c.EXTERNAL_CONNECTOR_PATH, library) + + def get_resource_id(resource): """Returns the resource id if it falls in one of the registered types """ if isinstance(resource, dict) and 'resource' in resource: return resource['resource'] - elif isinstance(resource, basestring) and any( + if isinstance(resource, str) and any( resource_re.match(resource) for _, resource_re - in c.RESOURCE_RE.items()): + in list(c.RESOURCE_RE.items())): return resource - else: - return + return None -def exception_on_error(resource): - """Raises exception if resource has error +def exception_on_error(resource, logger=None): + """Raises exception if the resource has an error. The error can be + due to a problem in the API call to retrieve it or because the + resource is FAULTY. """ - if resource['error'] is not None: - raise Exception(resource['error']['status']['message']) + if resource.get('error') is not None: + # http error calling the API + message = "API connection problem - %s" % resource.get('error', \ + {}).get('status', {}).get('message') + if logger is not None: + logger.error(message) + raise Exception(message) + if resource.get('object', resource).get('status', {}).get('error') \ + is not None: + # Faulty resource problem + status = resource.get('object', resource).get( \ + 'status', {}) + message = "Faulty resource %s - %s" % (resource["resource"], + status.get('cause', status).get('message')) + if logger is not None: + logger.error(message) + raise FaultyResourceError(message) def check_resource(resource, get_method=None, query_string='', wait_time=1, - retries=None, raise_on_error=False, api=None): + retries=None, raise_on_error=False, + max_elapsed_estimate=float('inf'), api=None, debug=False, + progress_cb=None): """Waits until a resource is finished. Given a resource and its corresponding get_method (if absent, the @@ -406,44 +464,110 @@ def check_resource(resource, get_method=None, query_string='', wait_time=1, parameter. """ - if isinstance(resource, basestring): - resource_id = resource - else: - resource_id = get_resource_id(resource) + resource_id = get_resource_id(resource) + # ephemeral predictions + if isinstance(resource, dict) and resource.get("resource") is None: + return resource if resource_id is None: raise ValueError("Failed to extract a valid resource id to check.") + if wait_time <= 0: + raise ValueError("The time to wait needs to be positive.") + debug = debug or (api is not None and (api.debug or api.short_debug)) + if debug: + print("Checking resource: %s" % resource_id) kwargs = {'query_string': query_string} - - if get_method is None and hasattr(api, 'get_resource'): + if hasattr(api, 'shared_ref') or (get_method is None and + hasattr(api, 'get_resource')): get_method = api.get_resource elif get_method is None: raise ValueError("You must supply either the get_method or the api" " connection info to retrieve the resource") - if isinstance(resource, basestring): - resource = get_method(resource, **kwargs) + + if not isinstance(resource, dict) or not http_ok(resource) or \ + resource.get("object") is None: + resource = resource_id + + if isinstance(resource, str): + if debug: + print("Getting resource %s" % resource_id) + resource = get_method(resource_id, **kwargs) + if not http_ok(resource): + if raise_on_error: + raise Exception("API connection problem: %s" % + json.dumps(resource)) + return resource + counter = 0 + elapsed = 0 while retries is None or counter < retries: + counter += 1 status = get_status(resource) code = status['code'] + if debug: + print("The resource has status code: %s" % code) if code == c.FINISHED: if counter > 1: + if debug: + print("Getting resource %s with args %s" % (resource_id, + kwargs)) # final get call to retrieve complete resource resource = get_method(resource, **kwargs) if raise_on_error: exception_on_error(resource) return resource - elif code == c.FAULTY: - raise ValueError(status) - time.sleep(get_exponential_wait(wait_time, counter)) + if code == c.FAULTY: + if raise_on_error: + exception_on_error(resource) + return resource + # resource is ok + progress = 0 + #pylint: disable=locally-disabled, bare-except + if status is not None: + progress = status.get("progress", 0) + if debug: + print("Progress: %s" % progress) + try: + if progress_cb is not None: + progress_cb(progress, resource) + except: + print("WARNING: Progress callback raised exception. Please," + "double check your function.") + progress = progress if progress > 0.8 \ + else 0 # dumping when almost finished + progress_dumping = (1 - progress) + _wait_time = get_exponential_wait(wait_time, + max(int(counter * progress_dumping), 1)) + _max_wait = max_elapsed_estimate - _wait_time + _wait_time = min(_max_wait, _wait_time) + if _wait_time <= 0: + # when the max_expected_elapsed time is met, we still wait for + # the resource to be finished but we restart all counters and + # the exponentially growing time is initialized + _wait_time = wait_time + counter = 0 + elapsed = 0 + if debug: + print("Sleeping %s" % _wait_time) + time.sleep(_wait_time) + elapsed += _wait_time # retries for the finished status use a query string that gets the # minimal available resource if kwargs.get('query_string') is not None: tiny_kwargs = {'query_string': c.TINY_RESOURCE} else: tiny_kwargs = {} + if debug: + print("Getting only status for resource %s" % resource_id) resource = get_method(resource, **tiny_kwargs) + if not http_ok(resource): + resource["resource"] = resource_id + if raise_on_error: + raise Exception("API connection problem: %s" % + json.dumps(resource)) + return resource + if raise_on_error: exception_on_error(resource) return resource @@ -455,24 +579,22 @@ def http_ok(resource): """ if 'code' in resource: return resource['code'] in [HTTP_OK, HTTP_CREATED, HTTP_ACCEPTED] + return False - -class ResourceHandler(BigMLConnection): +class ResourceHandlerMixin(metaclass=abc.ABCMeta): """This class is used by the BigML class as a mixin that provides the get method for all kind of resources and auxiliar utilities to check their status. It should not be instantiated independently. """ - - def __init__(self): - """Initializes the ResourceHandler. This class is intended to be - used purely as a mixin on BigMLConnection and must not be - instantiated independently. + @abc.abstractmethod + def prepare_image_fields(self, model_info, input_data): + """This is an abstract method that should be implemented in the API + final class to create sources for the image fields used in the model """ - pass def get_resource(self, resource, **kwargs): """Retrieves a remote resource. @@ -490,27 +612,125 @@ def get_resource(self, resource, **kwargs): raise ValueError("A resource id or structure is needed.") resource_id = get_resource_id(resource) + # adding the shared_ref if the API connection object has one + if hasattr(self, "shared_ref"): + kwargs.update({"shared_ref": self.shared_ref}) + if resource_id: - return self._get("%s%s" % (self.url, resource_id), - **kwargs) + kwargs.update({"resource_id": resource_id}) + return self._get("%s%s" % (self.url, resource_id), **kwargs) + return None + + def update_resource(self, resource, changes, **kwargs): + """Updates a remote resource. + + The resource parameter should be a string containing the + resource id or the dict returned by the corresponding create method. + """ + resource_id, error = self.final_resource(resource) + if error or resource_id is None: + raise Exception("Failed to update %s. Only correctly finished " + "resources can be updated. Please, check " + "the resource status." % resource_id) + kwargs.update({"resource_id": resource_id}) + body = json.dumps(changes) + return self._update("%s%s" % (self.url, resource_id), body, **kwargs) + + def delete_resource(self, resource, **kwargs): + """Delete a remote resource + + """ + resource_id = get_resource_id(resource) + if resource_id: + return self._delete("%s%s" % (self.url, resource_id), **kwargs) + return None + + def _download_resource(self, resource, filename, retries=10): + """Download CSV information from downloadable resources + + """ + resource_id, error = self.final_resource(resource, retries=retries) + if error or resource_id is None: + raise Exception("Failed to download %s. Only correctly finished " + "resources can be downloaded. Please, check " + "the resource status. %s" % (resource_id, error)) + return self._download("%s%s%s" % (self.url, resource_id, + DOWNLOAD_DIR), + filename=filename, + retries=retries) + + #pylint: disable=locally-disabled,invalid-name def ok(self, resource, query_string='', wait_time=1, - retries=None, raise_on_error=False): + max_requests=None, raise_on_error=False, retries=None, + error_retries=None, max_elapsed_estimate=float('inf'), debug=False, + progress_cb=None): """Waits until the resource is finished or faulty, updates it and - returns True on success + returns True when a finished resource is correctly retrieved + and False if the retrieval fails or the resource is faulty. + + resource: (map) Resource structure + query_string: (string) Filters used on the resource attributes + wait_time: (number) Time to sleep between get requests + max_requests: (integer) Maximum number of get requests + raise_on_error: (boolean) Whether to raise errors or log them + retries: (integer) Now `max_requests` (deprecated) + error_retries: (integer) Retries for transient HTTP errors + max_elapsed_estimate: (integer) Elapsed number of seconds that we + expect the resource to be finished in. + This is not a hard limit for the method + to end, but an estimation of time to wait. + debug: (boolean) Whether to print traces for every get call + progress_cb: (function) Callback function to log progress """ - if http_ok(resource): - resource.update(check_resource(resource, - query_string=query_string, - wait_time=wait_time, - retries=retries, - raise_on_error=raise_on_error, - api=self)) + def maybe_retrying(resource, error_retries, new_resource=None): + """Retrying retrieval if it's due to a transient error """ + if new_resource is None: + new_resource = resource + else: + new_resource.update({"object": resource["object"]}) + if new_resource.get('error', {}).get( + 'status', {}).get('type') == c.TRANSIENT \ + and error_retries is not None and error_retries > 0: + time.sleep(wait_time) + return self.ok(resource, query_string, wait_time, + max_requests, raise_on_error, retries, + error_retries - 1, max_elapsed_estimate, + debug) + resource.update(new_resource) + if raise_on_error: + exception_on_error(resource, logger=LOGGER) + return False + + new_resource = check_resource( \ + resource, + query_string=query_string, + wait_time=wait_time, + retries=max_requests, + max_elapsed_estimate=max_elapsed_estimate, + raise_on_error=False, # we don't raise on error to update always + api=self, + debug=debug, + progress_cb=progress_cb) + + if http_ok(new_resource): + resource.update(new_resource) + # try to recover from transient errors + if resource["error"] is not None: + return maybe_retrying(resource, error_retries) + + #pylint: disable=locally-disabled,bare-except + if raise_on_error: + exception_on_error(resource, logger=LOGGER) + else: + try: + exception_on_error(resource) + except: + return False return True - else: - LOGGER.error("The resource couldn't be created: %s", - resource['error']) + return maybe_retrying(resource, error_retries, + new_resource=new_resource) def _set_create_from_datasets_args(self, datasets, args=None, wait_time=3, retries=10, key=None): @@ -525,7 +745,7 @@ def _set_create_from_datasets_args(self, datasets, args=None, if args is not None: create_args.update(args) - if isinstance(datasets, basestring) and datasets.startswith('shared/'): + if isinstance(datasets, str) and datasets.startswith('shared/'): origin = datasets.replace('shared/', "") if get_resource_type(origin) != "dataset": create_args.update({"shared_hash": origin.split("/")[1]}) @@ -565,7 +785,7 @@ def _set_create_from_datasets_args(self, datasets, args=None, return create_args def _set_create_from_models_args(self, models, types, args=None, - wait_time=3, retries=10, key=None): + wait_time=3, retries=10): """Builds args dictionary for the create call from a list of models. The first argument needs to be a list of: - the model IDs @@ -605,6 +825,40 @@ def _set_create_from_models_args(self, models, types, args=None, return create_args + def _set_clone_from_args(self, origin, resource_type, args=None, + wait_time=3, retries=10): + """Builds args dictionary for the create call to clone resources. + The first argument needs to be a resource or resource ID that + has one of the types in resource_type + + """ + if isinstance(origin, dict) and origin.get("id"): + origin = origin.get("id") + + origin_id = get_resource_id(origin) + + if origin_id is not None: + check_resource_type(origin, resource_type, + message=("Failed to find a %s as the resource" + " to clone." % resource_type)) + origin = check_resource(origin, + query_string=c.TINY_RESOURCE, + wait_time=wait_time, retries=retries, + raise_on_error=True, api=self) + + create_args = {} + if args is not None: + create_args.update(args) + + if isinstance(origin, dict) and origin["object"].get("shared_hash"): + attr = "shared_hash" + origin_id = origin["object"][attr] + else: + attr = "origin" + create_args.update({attr: origin_id}) + + return create_args + def check_origins(self, dataset, model, args, model_types=None, wait_time=3, retries=10): """Returns True if the dataset and model needed to build @@ -681,7 +935,6 @@ def export(self, resource, filename=None, pmml=False, " can be exported to PMML.") resource_id = get_resource_id(resource) - if resource_id: if pmml: # only models with no text fields can be exported @@ -695,7 +948,7 @@ def export(self, resource, filename=None, pmml=False, "text and items fields cannot be " "exported to PMML.") if kwargs.get("query_string"): - kwargs["query_string"] += ";%s" % PMML_QS + kwargs["query_string"] += "&%s" % PMML_QS else: kwargs["query_string"] = PMML_QS @@ -713,15 +966,24 @@ def export(self, resource, filename=None, pmml=False, filename = os.path.join( \ file_dir, resource_id.replace("/", "_")) if resource_type in COMPOSED_RESOURCES: + # inner models in composed resources need the shared reference + # to be downloaded + if resource.startswith("shared"): + kwargs.update( + {"shared_ref": resource_id.replace("shared/", "")}) + elif "shared_ref" in kwargs and not resource.startswith("shared"): + kwargs["shared_ref"] = "%s,%s" % (kwargs["shared_ref"], + resource_id) for component_id in resource_info["object"]["models"]: - # for weighted fusions we need to retrieve the component - # ID + # for weighted fusions we need to retrieve the component ID if isinstance(component_id, dict): component_id = component_id['id'] + component_filename = os.path.join( + os.path.dirname(filename), + component_id.replace("/", "_")) self.export( \ component_id, - filename=os.path.join(os.path.dirname(filename), - component_id.replace("/", "_")), + filename=component_filename, pmml=pmml, **kwargs) if kwargs.get("query_string") and \ @@ -735,9 +997,8 @@ def export(self, resource, filename=None, pmml=False, resource_info).toprettyxml() return save(resource_info, filename) return save_json(resource_info, filename) - else: - raise ValueError("First agument is expected to be a valid" - " resource ID or structure.") + raise ValueError("First agument is expected to be a valid" + " resource ID or structure.") def export_last(self, tags, filename=None, resource_type="model", project=None, @@ -757,9 +1018,9 @@ def export_last(self, tags, filename=None, if tags is not None and tags != '': query_string = LIST_LAST % tags if project is not None: - query_string += ";project=%s" % project + query_string += "&project=%s" % project - kwargs.update({'query_string': "%s;%s" % \ + kwargs.update({'query_string': "%s&%s" % \ (query_string, kwargs.get('query_string', ''))}) response = self._list("%s%s" % (self.url, resource_type), @@ -782,9 +1043,28 @@ def export_last(self, tags, filename=None, os.path.dirname(filename), component_id.replace("/", "_"))) return save_json(resource_info, filename) - else: - raise ValueError("No %s found with tags %s." % (resource_type, - tags)) - else: - raise ValueError("First agument is expected to be a non-empty" - " tag.") + raise ValueError("No %s found with tags %s." % (resource_type, + tags)) + raise ValueError("First agument is expected to be a non-empty" + " tag.") + + def final_resource(self, resource, retries=10): + """Waits for a resource to finish or fail and returns + its ID and the error information + + """ + resource = check_resource( \ + resource, + query_string=c.TINY_RESOURCE, + retries=retries, + api=self) + error = resource.get("error") + try: + if resource.get("object", resource)["status"]["code"] == c.FAULTY: + error = "%s (%s)" % (resource.get("error"), + resource.get("object", resource)[ \ + "status"]["message"]) + except KeyError: + error = "Could not get resource status info for %s" % \ + resource.get("resource", resource) + return get_resource_id(resource), error diff --git a/bigml/samplehandler.py b/bigml/api_handlers/samplehandler.py similarity index 79% rename from bigml/samplehandler.py rename to bigml/api_handlers/samplehandler.py index 9ccb7af2..d50baf0b 100644 --- a/bigml/samplehandler.py +++ b/bigml/api_handlers/samplehandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,15 +27,14 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, - get_sample_id, get_resource_type, - get_dataset_id, check_resource) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + get_resource_type, get_dataset_id, check_resource from bigml.constants import (SAMPLE_PATH, DATASET_PATH, TINY_RESOURCE) -class SampleHandler(ResourceHandler): +class SampleHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the samples' REST calls. It should not be instantiated independently. @@ -87,10 +86,7 @@ def get_sample(self, sample, query_string=''): """ check_resource_type(sample, SAMPLE_PATH, message="A sample id is needed.") - sample_id = get_sample_id(sample) - if sample_id: - return self._get("%s%s" % (self.url, sample_id), - query_string=query_string) + return self.get_resource(sample, query_string=query_string) def list_samples(self, query_string=''): """Lists all your samples. @@ -104,17 +100,12 @@ def update_sample(self, sample, changes): """ check_resource_type(sample, SAMPLE_PATH, message="A sample id is needed.") - sample_id = get_sample_id(sample) - if sample_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, sample_id), body) + return self.update_resource(sample, changes) - def delete_sample(self, sample): + def delete_sample(self, sample, query_string=''): """Deletes a sample. """ check_resource_type(sample, SAMPLE_PATH, message="A sample id is needed.") - sample_id = get_sample_id(sample) - if sample_id: - return self._delete("%s%s" % (self.url, sample_id)) + return self.delete_resource(sample, query_string=query_string) diff --git a/bigml/scripthandler.py b/bigml/api_handlers/scripthandler.py similarity index 59% rename from bigml/scripthandler.py rename to bigml/api_handlers/scripthandler.py index be479e31..d03ed771 100644 --- a/bigml/scripthandler.py +++ b/bigml/api_handlers/scripthandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2015 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -22,20 +22,56 @@ """ import os +import re + +from urllib.parse import urljoin + +import requests + try: import simplejson as json except ImportError: import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, - get_script_id, get_resource_type, - check_resource) + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + get_script_id, get_resource_type, check_resource from bigml.constants import SCRIPT_PATH, TINY_RESOURCE +from bigml.util import is_url +from bigml.bigmlconnection import HTTP_OK -class ScriptHandler(ResourceHandler): +def retrieve_script_args(gist_url): + """Retrieves the information to create a script from a public + gist url + + """ + + response = requests.get(gist_url) + response.encoding = "utf8" + if response.status_code == HTTP_OK: + pattern = r"\"[^\"]*?\/raw\/[^\"]*" + urls = re.findall(pattern, response.text) + script_args = {} + + for url in urls: + url = urljoin(gist_url, url.replace("\"", "")) + if url.endswith(".whizzml"): + response = requests.get(url) + if response.status_code == HTTP_OK: + script_args["source_code"] = response.text + if url.endswith(".json"): + response = requests.get(url, \ + headers={"content-type": "application/json"}) + if response.status_code == HTTP_OK: + script_args["json"] = response.text + return script_args + raise ValueError("The url did not contain the expected structure.") + + +class ScriptHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the whizzml script' REST calls. It should not be instantiated independently. @@ -76,14 +112,19 @@ def create_script(self, source_code=None, args=None, raise_on_error=True, api=self) create_args.update({ "origin": script_id}) - elif isinstance(source_code, basestring): - try: - if os.path.exists(source_code): - with open(source_code) as code_file: - source_code = code_file.read() - except IOError: - raise IOError("Could not open the source code file %s." % - source_code) + elif isinstance(source_code, str): + if is_url(source_code): + script_args = retrieve_script_args(source_code) + source_code = script_args.get("source_code") + create_args.update(json.loads(script_args.get("json"))) + else: + try: + if os.path.exists(source_code): + with open(source_code) as code_file: + source_code = code_file.read() + except IOError: + raise IOError("Could not open the source code file %s." % + source_code) create_args.update({ "source_code": source_code}) else: @@ -107,10 +148,7 @@ def get_script(self, script, query_string=''): """ check_resource_type(script, SCRIPT_PATH, message="A script id is needed.") - script_id = get_script_id(script) - if script_id: - return self._get("%s%s" % (self.url, script_id), - query_string=query_string) + return self.get_resource(script, query_string=query_string) def list_scripts(self, query_string=''): """Lists all your scripts. @@ -124,17 +162,24 @@ def update_script(self, script, changes): """ check_resource_type(script, SCRIPT_PATH, message="A script id is needed.") - script_id = get_script_id(script) - if script_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, script_id), body) + return self.update_resource(script, changes) + + def clone_script(self, script, + args=None, wait_time=3, retries=10): + """Creates a cloned script from an existing `script` + + """ + create_args = self._set_clone_from_args( + script, "script", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.script_url, body) - def delete_script(self, script): + def delete_script(self, script, query_string=''): """Deletes a script. """ check_resource_type(script, SCRIPT_PATH, message="A script id is needed.") - script_id = get_script_id(script) - if script_id: - return self._delete("%s%s" % (self.url, script_id)) + return self.delete_resource(script, query_string=query_string) diff --git a/bigml/api_handlers/sourcehandler.py b/bigml/api_handlers/sourcehandler.py new file mode 100644 index 00000000..bd4b6e6b --- /dev/null +++ b/bigml/api_handlers/sourcehandler.py @@ -0,0 +1,637 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for sources' REST calls + + https://bigml.com/api/sources + +""" + +import sys +import os +import numbers +import time +import logging + +from urllib import parse + +try: + #added to allow GAE to work + from google.appengine.api import urlfetch + GAE_ENABLED = True +except ImportError: + GAE_ENABLED = False + +try: + import simplejson as json +except ImportError: + import json + +try: + from pandas import DataFrame + from io import StringIO + PANDAS_READY = True +except ImportError: + PANDAS_READY = False + +from zipfile import ZipFile + +import mimetypes +import requests + +from requests_toolbelt import MultipartEncoder + +from bigml.util import is_url, maybe_save, filter_by_extension, \ + infer_field_type +from bigml.bigmlconnection import ( + HTTP_CREATED, HTTP_BAD_REQUEST, + HTTP_UNAUTHORIZED, HTTP_PAYMENT_REQUIRED, HTTP_NOT_FOUND, + HTTP_TOO_MANY_REQUESTS, + HTTP_INTERNAL_SERVER_ERROR, GAE_ENABLED, SEND_JSON, LOGGER) +from bigml.bigmlconnection import json_load +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready, get_source_id, get_id +from bigml.constants import SOURCE_PATH, IMAGE_EXTENSIONS +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.fields import Fields + + +MAX_CHANGES = 5 +MAX_RETRIES = 5 + +def compact_regions(regions): + """Returns the list of regions in the compact value used for updates """ + + out_regions = [] + for region in regions: + new_region = [] + new_region.append(region.get("label")) + new_region.append(region.get("xmin")) + new_region.append(region.get("ymin")) + new_region.append(region.get("xmax")) + new_region.append(region.get("ymax")) + out_regions.append(new_region) + return out_regions + + +class SourceHandlerMixin(ResourceHandlerMixin): + + """This class is used by the BigML class as + a mixin that provides the REST calls to sources. It should not + be instantiated independently. + + """ + + def __init__(self): + """Initializes the SourceHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.source_url = self.url + SOURCE_PATH + + def _create_remote_source(self, url, args=None): + """Creates a new source using a URL + + """ + create_args = {} + if args is not None: + create_args.update(args) + create_args.update({"remote": url}) + create_args = self._add_project(create_args) + body = json.dumps(create_args) + return self._create(self.source_url, body) + + def _create_connector_source(self, connector, args=None): + """Creates a new source using an external connector + + """ + create_args = {} + if args is not None: + create_args.update(args) + create_args.update({"external_data": connector}) + create_args = self._add_project(create_args) + body = json.dumps(create_args) + return self._create(self.source_url, body) + + def _create_inline_source(self, src_obj, args=None): + """Create source from inline data + + The src_obj data should be a list of rows stored as dict or + list objects. + """ + create_args = {} + if args is not None: + create_args.update(args) + create_args = self._add_project(create_args) + + # some basic validation + if (not isinstance(src_obj, list) or ( + not all(isinstance(row, dict) for row in src_obj) and + not all(isinstance(row, list) for row in src_obj))): + raise TypeError( + 'ERROR: inline source must be a list of dicts or a ' + 'list of lists') + + create_args.update({"data": json.dumps(src_obj)}) + body = json.dumps(create_args) + return self._create(self.source_url, body) + + def _create_local_source(self, file_name, args=None): + """Creates a new source using a local file. + + + """ + create_args = {} + if args is not None: + create_args.update(args) + + for key, value in list(create_args.items()): + if value is not None and isinstance(value, (list, dict)): + create_args[key] = json.dumps(value) + elif value is not None and isinstance(value, numbers.Number): + # the multipart encoder only accepts strings and files + create_args[key] = str(value) + + + code = HTTP_INTERNAL_SERVER_ERROR + resource_id = None + location = None + resource = None + error = { + "status": { + "code": code, + "message": "The resource couldn't be created"}} + + #pylint: disable=locally-disabled,consider-using-with + try: + if isinstance(file_name, str): + name = os.path.basename(file_name) + file_handler = open(file_name, "rb") + else: + name = 'Stdin input' + file_handler = file_name + except IOError: + sys.exit("ERROR: cannot read training set") + qs_params = self._add_credentials({}) + qs_str = "?%s" % parse.urlencode(qs_params) if qs_params else "" + create_args = self._add_project(create_args, True) + if GAE_ENABLED: + try: + req_options = { + 'url': self.source_url + qs_str, + 'method': urlfetch.POST, + 'headers': SEND_JSON, + 'data': create_args, + 'files': {name: file_handler}, + 'validate_certificate': self.domain.verify + } + response = urlfetch.fetch(**req_options) + except urlfetch.Error as exception: + LOGGER.error("HTTP request error: %s", + str(exception)) + return maybe_save(resource_id, self.storage, code, + location, resource, error) + else: + try: + files = {"file": (name, + file_handler, + mimetypes.guess_type(name)[0])} + files.update(create_args) + multipart = MultipartEncoder(fields=files) + response = requests.post( \ + self.source_url, + params=qs_params, + headers={'Content-Type': multipart.content_type}, + data=multipart, verify=self.domain.verify) + except (requests.ConnectionError, + requests.Timeout, + requests.RequestException) as exc: + LOGGER.error("HTTP request error: %s", str(exc)) + code = HTTP_INTERNAL_SERVER_ERROR + return maybe_save(resource_id, self.storage, code, + location, resource, error) + try: + code = response.status_code + if code == HTTP_CREATED: + location = response.headers['location'] + resource = json_load(response.content) + resource_id = resource['resource'] + error = None + elif code in [HTTP_BAD_REQUEST, + HTTP_UNAUTHORIZED, + HTTP_PAYMENT_REQUIRED, + HTTP_NOT_FOUND, + HTTP_TOO_MANY_REQUESTS]: + error = json_load(response.content) + else: + LOGGER.error("Unexpected error (%s)", code) + code = HTTP_INTERNAL_SERVER_ERROR + + except ValueError: + LOGGER.error("Malformed response") + + return maybe_save(resource_id, self.storage, code, + location, resource, error) + + def clone_source(self, source, + args=None, wait_time=3, retries=10): + """Creates a cloned source from an existing `source` + + """ + create_args = self._set_clone_from_args( + source, "source", args=args, wait_time=wait_time, retries=retries) + + body = json.dumps(create_args) + return self._create(self.source_url, body) + + def _create_composite(self, sources, args=None): + """Creates a composite source from an existing `source` or list of + sources + + """ + create_args = {} + if args is not None: + create_args.update(args) + + if not isinstance(sources, list): + sources = [sources] + + source_ids = [] + for source in sources: + # we accept full resource IDs or pure IDs and produce pure IDs + try: + source_id = get_source_id(source) + except ValueError: + source_id = None + + if source_id is None: + pure_id = get_id(source) + source_id = "source/%s" % pure_id + else: + pure_id = source_id.replace("source/", "") + + if pure_id is not None: + source_ids.append(pure_id) + else: + raise Exception("A source or list of source ids" + " are needed to create a" + " source.") + create_args.update({"sources": source_ids}) + + body = json.dumps(create_args) + return self._create(self.source_url, body) + + def create_source(self, path=None, args=None): + """Creates a new source. + + The source can be a local file path or a URL. + We also accept a pandas DataFrame as first argument + TODO: add async load and progress bar in Python 3 + + """ + + if path is None: + raise Exception('A local path or a valid URL must be provided.') + + if PANDAS_READY and isinstance(path, DataFrame): + buffer = StringIO(path.to_csv(index=False)) + return self._create_local_source(file_name=buffer, args=args) + if is_url(path): + return self._create_remote_source(path, args=args) + if isinstance(path, list): + try: + if all(get_id(item) is not None \ + for item in path): + # list of sources + return self._create_composite(path, args=args) + except ValueError: + pass + return self._create_inline_source(path, args=args) + if isinstance(path, dict): + return self._create_connector_source(path, args=args) + try: + if get_source_id(path) is not None: + # cloning source + return self.clone_source(path, args=args) + except ValueError: + pass + return self._create_local_source(file_name=path, args=args) + + def create_annotated_source(self, annotations_file, args=None): + """Creates a composite source for annotated images. + + Images are usually associated to other information, like labels or + numeric fields, which can be regarded as additional attributes + related to that image. The associated information can be described + as annotations for each of the images. These annotations can be + provided as a JSON file that contains the properties associated to + each image and the name of the image file, that is used as foreign key. + The meta information needed to create the structure of the composite + source, such as the fields to be associated and their types, + should also be included in the annotations file. + This is an example of the expected structure of the annotations file: + + {"description": "Fruit images to test colour distributions", + "images_file": "./fruits_hist.zip", + "new_fields": [{"name": "new_label", "optype": "categorical"}], + "source_id": null, + "annotations": [ + {"file": "f1/fruits1f.png", "new_label": "True"}, + {"file": "f1/fruits1.png", "new_label": "False"}, + {"file": "f2/fruits2e.png", "new_label": "False"}]} + + The "images_file" attribute should contain the path to zip-compressed + images file and the "annotations" attribute the corresponding + annotations. The "new_fields" attribute should be a list of the fields + used as annotations for the images. + + Also, if you prefer to keep your annotations in a separate file, you + can point to that file in the "annotations" attribute: + + {"description": "Fruit images to test colour distributions", + "images_file": "./fruits_hist.zip", + "new_fields": [{"name": "new_label", "optype": "categorical"}], + "source_id": null, + "annotations": "./annotations_detail.json"} + + The created source will contain the fields associated to the + uploaded images, plus an additional field named "new_label" with the + values defined in this file. + + If a source has already been created from this collection of images, + you can provide the ID of this source in the "source_id" attribute. + Thus, the existing source will be updated to add the new annotations + (if still open for editing) or will be cloned (if the source is + closed for editing) and the new source will be updated . In both cases, + images won't be uploaded when "source_id" is used. + + """ + + if not os.path.exists(annotations_file): + raise ValueError("A local path to a JSON file must be provided.") + + with open(annotations_file) as annotations_handler: + annotations_info = json.load(annotations_handler) + + if annotations_info.get("images_file") is None: + raise ValueError("Failed to find the `images_file` attribute " + "in the annotations file %s" % annotations_file) + base_directory = os.path.dirname(annotations_file) + zip_path = os.path.join(base_directory, + annotations_info.get("images_file")) + if isinstance(annotations_info.get("annotations"), str): + annotations = os.path.join(base_directory, + annotations_info.get("annotations")) + else: + annotations = annotations_info.get("annotations") + # check metadata file attributes + if annotations_info.get("source_id") is None: + # upload the compressed images + source = self.create_source(zip_path, args=args) + if not self.ok(source): + raise IOError("A source could not be created for %s" % + zip_path) + source_id = source["resource"] + else: + source_id = annotations_info.get("source_id") + return self.update_composite_annotations( + source_id, zip_path, annotations, + new_fields=annotations_info.get("new_fields")) + + def update_composite_annotations(self, source, images_file, + annotations, new_fields=None, + source_changes=None): + """Updates a composite source to add a list of annotations + The annotations argument should contain annotations in a BigML-COCO + syntax: + + [{"file": "image1.jpg", + "label": "label1"}. + {"file": "image2.jpg", + "label": "label1"}, + {"file": "image3.jpg", + "label": "label2"}] + + or point to a JSON file that contains that information, + and the images_file argument should point to a zip file that + contains the referrered images sorted as uploaded to build the source. + + If the attributes in the annotations file ("file" excluded) are not + already defined in the composite source, the `new_fields` argument + can be set to contain a list of the fields and types to be added + + [{"name": "label", "optype": "categorical"}] + """ + if source_changes is None: + source_changes = {} + + source_id = get_source_id(source) + if source_id: + source = self.get_source(source_id) + if source.get("object", {}).get("closed"): + source = self.clone_source(source_id) + self.ok(source) + # corresponding source IDs + try: + sources = source["object"]["sources"] + except KeyError: + raise ValueError("Failed to find the list of sources in the " + "created composite: %s." % source["resource"]) + try: + with ZipFile(images_file) as zip_handler: + file_list = zip_handler.namelist() + file_list = filter_by_extension(file_list, IMAGE_EXTENSIONS) + except IOError: + raise ValueError("Failed to find the list of images in zip %s" % + images_file) + + file_to_source = dict(zip(file_list, sources)) + + fields = Fields(source) + + # adding the annotation values + if annotations: + if isinstance(annotations, str): + # path to external annotations file + try: + with open(annotations) as \ + annotations_handler: + annotations = json.load(annotations_handler) + except IOError as exc: + raise ValueError("Failed to find annotations in %s" % + exc) + elif not isinstance(annotations, list): + raise ValueError("The annotations attribute needs to contain" + " a list of annotations or the path to " + " a file with such a list.") + if new_fields is None: + new_fields = {} + for annotation in annotations: + for field, value in annotation.items(): + if field != "file" and field not in new_fields: + new_fields[field] = infer_field_type(field, value) + new_fields = list(new_fields.values()) + + # creating new annotation fields, if absent + if new_fields: + field_names = [field["name"] for _, field in fields.fields.items()] + changes = [] + for field_info in new_fields: + if field_info.get("name") not in field_names: + changes.append(field_info) + if changes: + source_changes.update({"new_fields": changes}) + if source_changes: + source = self.update_source(source["resource"], source_changes) + self.ok(source) + + fields = Fields(source) + + changes = [] + changes_dict = {} + for annotation in annotations: + filename = annotation.get("file") + try: + _ = file_list.index(filename) + except ValueError: + LOGGER.error("WARNING: Could not find annotated file (%s)" + " in the composite's sources list", filename) + continue + for key in annotation.keys(): + if key == "file": + continue + if key not in changes_dict: + changes_dict[key] = [] + value = annotation.get(key) + changes_dict[key].append((value, file_to_source[filename])) + + #pylint: disable=locally-disabled,broad-except + for field, values in changes_dict.items(): + try: + optype = fields.fields[fields.field_id(field)]["optype"] + if optype == "categorical": + sorted_values = sorted(values, key=lambda x: x[0]) + old_value = None + source_ids = [] + for value, source_id in sorted_values: + if value != old_value and old_value is not None: + changes.append({"field": field, "value": old_value, + "components": source_ids}) + source_ids = [source_id] + old_value = value + else: + source_ids.append(source_id) + if old_value is None: + old_value = value + changes.append({"field": field, "value": value, + "components": source_ids}) + elif optype == "regions": + for value, source_id in values: + if isinstance(value, list): + # dictionary should contain the bigml-coco format + value = compact_regions(value) + changes.append( + {"field": field, + "value": value, + "components": [source_id]}) + else: + for value, source_id in values: + changes.append( + {"field": field, + "value": value, + "components": [source_id]}) + except Exception: + LOGGER.error("WARNING: Problem adding annotation to %s (%s)", + field, values) + pass + + # we need to limit the amount of changes per update + batches_number = int(len(changes) / MAX_CHANGES) + for offset in range(0, batches_number + 1): + new_batch = changes[ + offset * MAX_CHANGES: (offset + 1) * MAX_CHANGES] + if new_batch: + source = self.update_source(source, + {"row_values": new_batch}) + counter = 0 + while source["error"] is not None and counter < MAX_RETRIES: + # retrying in case update is temporarily unavailable + counter += 1 + time.sleep(counter) + source = self.get_source(source) + self.ok(source) + source = self.update_source(source, + {"row_values": new_batch}) + if source["error"] is not None: + err_str = json.dumps(source["error"]) + v_str = json.dumps(new_batch) + LOGGER.error("WARNING: Some annotations were not updated " + f" (error: {err_str}, values: {v_str})") + if not self.ok(source): + raise Exception( + f"Failed to update {len(new_batch)} annotations.") + time.sleep(0.1) + + return source + + def get_source(self, source, query_string=''): + """Retrieves a remote source. + The source parameter should be a string containing the + source id or the dict returned by create_source. + As source is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, thet function will + return a dict that encloses the source values and state info + available at the time it is called. + """ + check_resource_type(source, SOURCE_PATH, + message="A source id is needed.") + return self.get_resource(source, query_string=query_string) + + def source_is_ready(self, source): + """Checks whether a source' status is FINISHED. + + """ + check_resource_type(source, SOURCE_PATH, + message="A source id is needed.") + source = self.get_source(source) + return resource_is_ready(source) + + def list_sources(self, query_string=''): + """Lists all your remote sources. + + """ + return self._list(self.source_url, query_string) + + def update_source(self, source, changes): + """Updates a source. + + Updates remote `source` with `changes'. + + """ + check_resource_type(source, SOURCE_PATH, + message="A source id is needed.") + return self.update_resource(source, changes) + + def delete_source(self, source, query_string=''): + """Deletes a remote source permanently. + + """ + check_resource_type(source, SOURCE_PATH, + message="A source id is needed.") + return self.delete_resource(source, query_string=query_string) diff --git a/bigml/statisticaltesthandler.py b/bigml/api_handlers/statisticaltesthandler.py similarity index 78% rename from bigml/statisticaltesthandler.py rename to bigml/api_handlers/statisticaltesthandler.py index 39bd59ce..eca91255 100644 --- a/bigml/statisticaltesthandler.py +++ b/bigml/api_handlers/statisticaltesthandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,15 +27,14 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, - get_statistical_test_id, get_resource_type, - get_dataset_id, check_resource) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + get_resource_type, get_dataset_id, check_resource from bigml.constants import (STATISTICAL_TEST_PATH, DATASET_PATH, TINY_RESOURCE) -class StatisticalTestHandler(ResourceHandler): +class StatisticalTestHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the statistical tests' REST calls. It should not be instantiated independently. @@ -87,10 +86,7 @@ def get_statistical_test(self, statistical_test, query_string=''): """ check_resource_type(statistical_test, STATISTICAL_TEST_PATH, message="A statistical test id is needed.") - statistical_test_id = get_statistical_test_id(statistical_test) - if statistical_test_id: - return self._get("%s%s" % (self.url, statistical_test_id), - query_string=query_string) + return self.get_resource(statistical_test, query_string=query_string) def list_statistical_tests(self, query_string=''): """Lists all your statistical tests. @@ -104,17 +100,13 @@ def update_statistical_test(self, statistical_test, changes): """ check_resource_type(statistical_test, STATISTICAL_TEST_PATH, message="A statistical test id is needed.") - statistical_test_id = get_statistical_test_id(statistical_test) - if statistical_test_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, statistical_test_id), body) + return self.update_resource(statistical_test, changes) - def delete_statistical_test(self, statistical_test): + def delete_statistical_test(self, statistical_test, query_string=''): """Deletes a statistical test. """ check_resource_type(statistical_test, STATISTICAL_TEST_PATH, message="A statistical test id is needed.") - statistical_test_id = get_statistical_test_id(statistical_test) - if statistical_test_id: - return self._delete("%s%s" % (self.url, statistical_test_id)) + return self.delete_resource(statistical_test, + query_string=query_string) diff --git a/bigml/timeserieshandler.py b/bigml/api_handlers/timeserieshandler.py similarity index 75% rename from bigml/timeserieshandler.py rename to bigml/api_handlers/timeserieshandler.py index 6d637f16..2d57a08c 100644 --- a/bigml/timeserieshandler.py +++ b/bigml/api_handlers/timeserieshandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2017-2019 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,13 +27,13 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, resource_is_ready, - get_time_series_id) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready from bigml.constants import TIME_SERIES_PATH -class TimeSeriesHandler(ResourceHandler): +class TimeSeriesHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -77,13 +77,10 @@ def get_time_series(self, time_series, query_string='', """ check_resource_type(time_series, TIME_SERIES_PATH, message="A time series id is needed.") - time_series_id = get_time_series_id( - time_series) - if time_series_id: - return self._get("%s%s" % (self.url, time_series_id), - query_string=query_string, - shared_username=shared_username, - shared_api_key=shared_api_key) + return self.get_resource(time_series, + query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) def time_series_is_ready(self, time_series, **kwargs): """Checks whether a time series's status is FINISHED. @@ -106,19 +103,24 @@ def update_time_series(self, time_series, changes): """ check_resource_type(time_series, TIME_SERIES_PATH, message="A time series id is needed.") - time_series_id = get_time_series_id( - time_series) - if time_series_id: - body = json.dumps(changes) - return self._update( - "%s%s" % (self.url, time_series_id), body) - - def delete_time_series(self, time_series): + return self.update_resource(time_series, changes) + + def delete_time_series(self, time_series, query_string=''): """Deletes a time series. """ check_resource_type(time_series, TIME_SERIES_PATH, message="A time series id is needed.") - time_series_id = get_time_series_id(time_series) - if time_series_id: - return self._delete("%s%s" % (self.url, time_series_id)) + return self.delete_resource(time_series, query_string=query_string) + + def clone_time_series(self, time_series, + args=None, wait_time=3, retries=10): + """Creates a cloned time_series from an existing `time series` + + """ + create_args = self._set_clone_from_args( + time_series, "timeseries", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.time_series_url, body) diff --git a/bigml/topicdistributionhandler.py b/bigml/api_handlers/topicdistributionhandler.py similarity index 57% rename from bigml/topicdistributionhandler.py rename to bigml/api_handlers/topicdistributionhandler.py index 962b0970..117cefd2 100644 --- a/bigml/topicdistributionhandler.py +++ b/bigml/api_handlers/topicdistributionhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2016-2019 BigML +# Copyright 2016-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,15 +27,14 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, - get_resource_type, - get_topic_distribution_id, - check_resource, get_topic_model_id) -from bigml.constants import (TOPIC_DISTRIBUTION_PATH, TINY_RESOURCE) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + get_resource_type, check_resource, get_topic_model_id +from bigml.constants import TOPIC_MODEL_PATH, TOPIC_DISTRIBUTION_PATH, \ + IMAGE_FIELDS_FILTER, SPECIFIC_EXCLUDES -class TopicDistributionHandler(ResourceHandler): +class TopicDistributionHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -55,16 +54,31 @@ def create_topic_distribution(self, topic_model, input_data=None, """Creates a new topic distribution. """ + + resource_type = get_resource_type(topic_model) + if resource_type != TOPIC_MODEL_PATH: + raise Exception("A topic model resource id is needed" + " to create a prediction. %s found." % + resource_type) + topic_model_id = get_topic_model_id(topic_model) - if topic_model_id is not None: - check_resource(topic_model_id, - query_string=TINY_RESOURCE, - wait_time=wait_time, retries=retries, - raise_on_error=True, api=self) + if topic_model_id is None: + raise Exception("Failed to detect a correct topic model structure" + " in %s." % topic_model) + + if isinstance(topic_model, dict) and \ + topic_model.get("resource") is not None: + # retrieving fields info from model structure + model_info = topic_model else: - resource_type = get_resource_type(topic_model) - raise Exception("A topic model id is needed to create a" - " topic distribution. %s found." % resource_type) + image_fields_filter = IMAGE_FIELDS_FILTER + "," + \ + ",".join(SPECIFIC_EXCLUDES[resource_type]) + model_info = check_resource(topic_model_id, + query_string=image_fields_filter, + wait_time=wait_time, + retries=retries, + raise_on_error=True, + api=self) if input_data is None: input_data = {} @@ -72,12 +86,12 @@ def create_topic_distribution(self, topic_model, input_data=None, if args is not None: create_args.update(args) create_args.update({ - "input_data": input_data, + "input_data": self.prepare_image_fields(model_info, input_data), "topicmodel": topic_model_id}) body = json.dumps(create_args) return self._create(self.topic_distribution_url, body, - verify=self.verify_prediction) + verify=self.domain.verify_prediction) def get_topic_distribution(self, topic_distribution, query_string=''): """Retrieves a topic distribution. @@ -85,10 +99,7 @@ def get_topic_distribution(self, topic_distribution, query_string=''): """ check_resource_type(topic_distribution, TOPIC_DISTRIBUTION_PATH, message="A topic distribution id is needed.") - topic_distribution_id = get_topic_distribution_id(topic_distribution) - if topic_distribution_id: - return self._get("%s%s" % (self.url, topic_distribution_id), - query_string=query_string) + return self.get_resource(topic_distribution, query_string=query_string) def list_topic_distributions(self, query_string=''): """Lists all your topic distributions. @@ -102,18 +113,13 @@ def update_topic_distribution(self, topic_distribution, changes): """ check_resource_type(topic_distribution, TOPIC_DISTRIBUTION_PATH, message="A topic distribution id is needed.") - topic_distribution_id = get_topic_distribution_id(topic_distribution) - if topic_distribution_id: - body = json.dumps(changes) - return self._update("%s%s" % \ - (self.url, topic_distribution_id), body) + return self.update_resource(topic_distribution, changes) - def delete_topic_distribution(self, topic_distribution): + def delete_topic_distribution(self, topic_distribution, query_string=''): """Deletes a topic distribution. """ check_resource_type(topic_distribution, TOPIC_DISTRIBUTION_PATH, message="A topic distribution id is needed.") - topic_distribution_id = get_topic_distribution_id(topic_distribution) - if topic_distribution_id: - return self._delete("%s%s" % (self.url, topic_distribution_id)) + return self.delete_resource(topic_distribution, + query_string=query_string) diff --git a/bigml/topicmodelhandler.py b/bigml/api_handlers/topicmodelhandler.py similarity index 75% rename from bigml/topicmodelhandler.py rename to bigml/api_handlers/topicmodelhandler.py index 3c959ae2..a34b904b 100644 --- a/bigml/topicmodelhandler.py +++ b/bigml/api_handlers/topicmodelhandler.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=abstract-method # -# Copyright 2016-2019 BigML +# Copyright 2016-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -27,13 +27,13 @@ import json -from bigml.resourcehandler import ResourceHandler -from bigml.resourcehandler import (check_resource_type, resource_is_ready, - get_topic_model_id) +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready from bigml.constants import TOPIC_MODEL_PATH -class TopicModelHandler(ResourceHandler): +class TopicModelHandlerMixin(ResourceHandlerMixin): """This class is used by the BigML class as a mixin that provides the REST calls models. It should not be instantiated independently. @@ -74,12 +74,11 @@ def get_topic_model(self, topic_model, query_string='', """ check_resource_type(topic_model, TOPIC_MODEL_PATH, message="A Topic Model id is needed.") - topic_model_id = get_topic_model_id(topic_model) - if topic_model_id: - return self._get("%s%s" % (self.url, topic_model_id), - query_string=query_string, - shared_username=shared_username, - shared_api_key=shared_api_key) + return self.get_resource(topic_model, + query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) + def topic_model_is_ready(self, topic_model, **kwargs): """Checks whether a topic model's status is FINISHED. @@ -102,17 +101,24 @@ def update_topic_model(self, topic_model, changes): """ check_resource_type(topic_model, TOPIC_MODEL_PATH, message="A topic model id is needed.") - topic_model_id = get_topic_model_id(topic_model) - if topic_model_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, topic_model_id), body) + return self.update_resource(topic_model, changes) - def delete_topic_model(self, topic_model): + def delete_topic_model(self, topic_model, query_string=''): """Deletes a Topic Model. """ check_resource_type(topic_model, TOPIC_MODEL_PATH, message="A topic model id is needed.") - topic_model_id = get_topic_model_id(topic_model) - if topic_model_id: - return self._delete("%s%s" % (self.url, topic_model_id)) + return self.delete_resource(topic_model, query_string=query_string) + + def clone_topic_model(self, topic_model, + args=None, wait_time=3, retries=10): + """Creates a cloned topic model from an existing `topic model` + + """ + create_args = self._set_clone_from_args( + topic_model, "topicmodel", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.topic_model_url, body) diff --git a/bigml/association.py b/bigml/association.py index 60dca50d..a3b65d76 100644 --- a/bigml/association.py +++ b/bigml/association.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -31,7 +30,7 @@ api = BigML() association = Association('association/5026966515526876630001b2') -association.rules() +association.association_set() """ @@ -42,13 +41,13 @@ from bigml.api import FINISHED -from bigml.api import get_status, get_api_connection +from bigml.api import get_status, get_api_connection, get_association_id from bigml.basemodel import get_resource_dict from bigml.modelfields import ModelFields from bigml.associationrule import AssociationRule from bigml.item import Item from bigml.io import UnicodeWriter - +from bigml.util import use_cache, load, dump, dumps, get_data_transformations LOGGER = logging.getLogger('BigML') @@ -106,12 +105,26 @@ class Association(ModelFields): that can be used to extract associations information. """ + #pylint: disable=locally-disabled,access-member-before-definition + def __init__(self, association, api=None, cache_get=None): + - def __init__(self, association, api=None): + if use_cache(cache_get): + # using a cache to store the association attributes + self.__dict__ = load(get_association_id(association), cache_get) + for index, item in enumerate(self.items): + self.items[index] = Item(item["index"], item, self.fields) + for index, rule in enumerate(self.rules): + self.rules[index] = AssociationRule(rule) + return self.resource_id = None + self.name = None + self.description = None + self.parent_id = None self.complement = None self.discretization = {} + self.default_numeric_value = None self.field_discretizations = {} self.items = [] self.max_k = None @@ -123,19 +136,27 @@ def __init__(self, association, api=None): self.search_strategy = DEFAULT_SEARCH_STRATEGY self.rules = [] self.significance_level = None - self.api = get_api_connection(api) + api = get_api_connection(api) self.resource_id, association = get_resource_dict( \ - association, "association", api=self.api) - + association, "association", api=api) if 'object' in association and isinstance(association['object'], dict): association = association['object'] + try: + self.parent_id = association.get('dataset') + self.name = association.get("name") + self.description = association.get("description") + except AttributeError: + raise ValueError("Failed to find the expected " + "JSON structure. Check your arguments.") if 'associations' in association and \ isinstance(association['associations'], dict): status = get_status(association) if 'code' in status and status['code'] == FINISHED: self.input_fields = association['input_fields'] + self.default_numeric_value = association.get( \ + 'default_numeric_value') associations = association['associations'] fields = associations['fields'] ModelFields.__init__( \ @@ -194,10 +215,10 @@ def association_set(self, input_data, if score_by and score_by not in SCORES: raise ValueError("The available values of score_by are: %s" % ", ".join(SCORES)) - input_data = self.filter_input_data(input_data) + norm_input_data = self.filter_input_data(input_data) # retrieving the items in input_data items_indexes = [item.index for item in - self.get_items(input_map=input_data)] + self.get_items(input_map=norm_input_data)] if score_by is None: score_by = self.search_strategy @@ -208,7 +229,7 @@ def association_set(self, input_data, # if the rhs corresponds to a non-itemized field and this field # is already in input_data, don't add rhs if field_type in NO_ITEMS and self.items[rule.rhs[0]].field_id in \ - input_data: + norm_input_data: continue # if an itemized content is in input_data, don't add it to the # prediction @@ -229,8 +250,8 @@ def association_set(self, input_data, predictions[rhs]["rules"] = [] predictions[rhs]["rules"].append(rule.rule_id) # choose the best k predictions - k = len(predictions.keys()) if k is None else k - predictions = sorted(predictions.items(), + k = len(predictions) if k is None else k + predictions = sorted(list(predictions.items()), key=lambda x: x[1]["score"], reverse=True)[:k] final_predictions = [] for rhs, prediction in predictions: @@ -364,7 +385,7 @@ def item_list_set(rule): return True if isinstance(item_list[0], Item): items = [item.index for item in item_list] - elif isinstance(item_list[0], basestring): + elif isinstance(item_list[0], str): items = [item.index for item in self.get_items(names=item_list)] @@ -398,7 +419,7 @@ def rules_csv(self, file_name, **kwargs): with UnicodeWriter(file_name, quoting=csv.QUOTE_NONNUMERIC) as writer: writer.writerow(RULE_HEADERS) for rule in rules: - writer.writerow([item if not isinstance(item, basestring) + writer.writerow([item if not isinstance(item, str) else item.encode("utf-8") for item in rule]) @@ -415,7 +436,8 @@ def describe(self, rule_row): item = self.items[item_index] # if there's just one field, we don't use the item description # to avoid repeating the field name constantly. - item_description = item.name if len(self.fields.keys()) == 1 \ + item_description = item.name if \ + len(list(self.fields.keys())) == 1 \ and not item.complement else item.describe() description.append(item_description) description_str = " & ".join(description) @@ -432,6 +454,7 @@ def summarize(self, out=sys.stdout, limit=10, **kwargs): for metric in ASSOCIATION_METRICS: out.write("\n\nTop %s by %s:\n\n" % ( limit, METRIC_LITERALS[metric])) + #pylint: disable=locally-disabled,cell-var-from-loop top_rules = sorted(rules, key=lambda x: getattr(x, metric), reverse=True)[0: limit * 2] out_rules = [] @@ -466,3 +489,43 @@ def summarize(self, out=sys.stdout, limit=10, **kwargs): break out.write("\n".join(out_rules)) out.write("\n") + + def predict(self, input_data, k=DEFAULT_K, score_by=None, full=False): + """Method to homogeneize the local models interface for all BigML + models. It returns the association_set method result. If full is set + to True, then the result is returned as a dictionary. + """ + rules = self.association_set(input_data, k=k, score_by=score_by) + if full: + return {"rules": rules} + return rules + + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + Avoiding to set it in a Mixin to maintain the current dump function. + """ + return get_data_transformations(self.resource_id, self.parent_id) + + def dump(self, output=None, cache_set=None): + """Uses msgpack to serialize the resource object + If cache_set is filled with a cache set method, the method is called + + """ + self_vars = vars(self).copy() + for index, elem in enumerate(self_vars["items"]): + self_vars["items"][index] = vars(elem) + for index, elem in enumerate(self_vars["rules"]): + self_vars["rules"][index] = vars(elem) + dump(self_vars, output=output, cache_set=cache_set) + + def dumps(self): + """Uses msgpack to serialize the resource object to a string + + """ + self_vars = vars(self).copy() + for index, elem in enumerate(self_vars["items"]): + self_vars["items"][index] = vars(elem) + for index, elem in enumerate(self_vars["rules"]): + self_vars["rules"][index] = vars(elem) + dumps(self_vars) diff --git a/bigml/associationrule.py b/bigml/associationrule.py index be65c432..63944342 100644 --- a/bigml/associationrule.py +++ b/bigml/associationrule.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -22,7 +21,7 @@ SUPPORTED_LANGUAGES = ["JSON", "CSV"] -class AssociationRule(object): +class AssociationRule(): """ Object encapsulating an association rule as described in https://bigml.com/developers/associations diff --git a/bigml/basemodel.py b/bigml/basemodel.py index 183ae014..0c22dc54 100644 --- a/bigml/basemodel.py +++ b/bigml/basemodel.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2013-2019 BigML +# Copyright 2013-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -24,17 +23,17 @@ """ import logging import sys -import locale -import os import json +import os from bigml.api import FINISHED -from bigml.api import get_status, BigML, get_model_id, ID_GETTERS, \ - check_resource, get_resource_type, get_api_connection +from bigml.api import get_status, get_model_id, ID_GETTERS, \ + get_api_connection from bigml.util import utf8 from bigml.util import DEFAULT_LOCALE -from bigml.modelfields import (ModelFields, check_model_structure, - check_model_fields) +from bigml.modelfields import ModelFields, check_resource_structure, \ + check_resource_fields +from bigml.api_handlers.resourcehandler import resource_is_ready LOGGER = logging.getLogger('BigML') @@ -46,39 +45,22 @@ # remove them when we use only_model=true so we will set it to # false until the problem in apian is fixed -ONLY_MODEL = 'only_model=false;limit=-1;' -EXCLUDE_FIELDS = 'exclude=fields;' +ONLY_MODEL = 'only_model=false&limit=-1&' +EXCLUDE_FIELDS = 'exclude=fields&' def retrieve_resource(api, resource_id, query_string=ONLY_MODEL, - no_check_fields=False): + no_check_fields=False, retries=None): """ Retrieves resource info either from a local repo or from the remote server """ - if api.storage is not None: - try: - stored_resource = "%s%s%s" % (api.storage, os.sep, - resource_id.replace("/", "_")) - with open(stored_resource) as resource_file: - resource = json.loads(resource_file.read()) - # we check that the stored resource has enough fields information - # for local predictions to work. Otherwise we should retrieve it. - if no_check_fields or check_model_fields(resource): - return resource - except ValueError: - raise ValueError("The file %s contains no JSON") - except IOError: - pass - if api.auth == '?username=;api_key=;': - raise ValueError("The credentials information is missing. This" - " information is needed to download resource %s" - " for the first time and store it locally for further" - " use. Please export BIGML_USERNAME" - " and BIGML_API_KEY." % resource_id) - api_getter = api.getters[get_resource_type(resource_id)] - resource = check_resource(resource_id, api_getter, query_string) - return resource + + check_local_fn = check_local_but_fields if no_check_fields \ + else check_local_info + return api.retrieve_resource(resource_id, query_string=query_string, + check_local_fn=check_local_fn, + retries=retries) def extract_objective(objective_field): @@ -97,14 +79,38 @@ def print_importance(instance, out=sys.stdout): count = 1 field_importance, fields = instance.field_importance_data() for [field, importance] in field_importance: - out.write(utf8(u" %s. %s: %.2f%%\n" % ( + out.write(utf8(" %s. %s: %.2f%%\n" % ( count, fields[field]['name'], round(importance, 4) * 100))) count += 1 +#pylint: disable=locally-disabled,broad-except +def check_local_but_fields(resource): + """Whether the information in `resource` is enough to use it locally + except for the fields section + + """ + try: + return resource_is_ready(resource) and \ + check_resource_structure(resource) + except Exception: + return False + +#pylint: disable=locally-disabled,broad-except +def check_local_info(resource): + """Whether the information in `model` is enough to use it locally + + """ + try: + return check_local_but_fields(resource) and \ + check_resource_fields(resource) + except Exception: + return False -def get_resource_dict(resource, resource_type, api=None): + +def get_resource_dict(resource, resource_type, api=None, + no_check_fields=False): """Extracting the resource JSON info as a dict from the first argument of the local object constructors, that can be: @@ -117,8 +123,9 @@ def get_resource_dict(resource, resource_type, api=None): get_id = ID_GETTERS[resource_type] resource_id = None # the string can be a path to a JSON file - if isinstance(resource, basestring): + if isinstance(resource, str): try: + resource_path = resource with open(resource) as resource_file: resource = json.load(resource_file) resource_id = get_id(resource) @@ -126,6 +133,10 @@ def get_resource_dict(resource, resource_type, api=None): raise ValueError("The JSON file does not seem" " to contain a valid BigML %s" " representation." % resource_type) + # keeping the path to the main file as storage folder for + # related files + storage = os.path.dirname(resource_path) + api.storage = storage except IOError: # if it is not a path, it can be a model id resource_id = get_id(resource) @@ -135,27 +146,33 @@ def get_resource_dict(resource, resource_type, api=None): api.error_message(resource, resource_type=resource_type, method="get")) - else: - raise IOError("Failed to open the expected JSON file" - " at %s." % resource) + raise IOError("Failed to open the expected JSON file" + " at %s." % resource) except ValueError: raise ValueError("Failed to interpret %s." " JSON file expected." % resource) + # dict resource or file path argument: # checks whether the information needed for local predictions is in # the first argument - if isinstance(resource, dict) and \ - not check_model_fields(resource): - # if the fields used by the model are not + check_fn = check_local_but_fields if no_check_fields else \ + check_local_info + + if isinstance(resource, dict) and not check_fn( + resource): + # if the fields used by the resource are not # available, use only ID to retrieve it again resource = get_id(resource) resource_id = resource + # resource ID or failed resource info: + # trying to read the resource from storage or from the API if not (isinstance(resource, dict) and 'resource' in resource and resource['resource'] is not None): query_string = ONLY_MODEL resource = retrieve_resource(api, resource_id, - query_string=query_string) + query_string=query_string, + no_check_fields=no_check_fields) else: resource_id = get_id(resource) @@ -166,7 +183,7 @@ def datetime_fields(fields): """Returns datetime fields from a dict of fields """ - return {k: v for k, v in fields.items() \ + return {k: v for k, v in list(fields.items()) \ if v.get("optype", False) == "datetime"} @@ -175,12 +192,21 @@ class BaseModel(ModelFields): Uses a BigML remote model to build a local version that contains the main features of a model, except its tree structure. + model: the model dict or ID + api: connection to the API + fields: fields dict (used in ensembles where fields info can be shared) + checked: boolean that avoids rechecking the model structure when it + has already been checked previously in a derived class + operation_settings: operation thresholds for the classification model """ - def __init__(self, model, api=None, fields=None): + def __init__(self, model, api=None, fields=None, checked=True, + operation_settings=None): - if check_model_structure(model): + check_fn = check_local_but_fields if fields is not None else \ + check_local_info + if isinstance(model, dict) and (checked or check_fn(model)): self.resource_id = model['resource'] else: # If only the model id is provided, the short version of the model @@ -195,13 +221,9 @@ def __init__(self, model, api=None, fields=None): query_string = EXCLUDE_FIELDS else: query_string = ONLY_MODEL - model = retrieve_resource(self.api, self.resource_id, - query_string=query_string) - # Stored copies of the model structure might lack some necessary - # keys - if not check_model_structure(model): - model = self.api.get_model(self.resource_id, - query_string=query_string) + model = retrieve_resource(api, self.resource_id, + query_string=query_string, + no_check_fields=fields is not None) if 'object' in model and isinstance(model['object'], dict): model = model['object'] @@ -209,33 +231,37 @@ def __init__(self, model, api=None, fields=None): if 'model' in model and isinstance(model['model'], dict): status = get_status(model) if 'code' in status and status['code'] == FINISHED: + model_fields = None if (fields is None and ('model_fields' in model['model'] or 'fields' in model['model'])): - fields = model['model'].get('model_fields', - model['model'].get('fields', - [])) - # model_fields doesn't contain the datetime fields - fields.update(datetime_fields(model['model'].get('fields', - {}))) + # models might use less fields that provided + model_fields = model['model'].get('model_fields') + fields = model['model'].get('fields', {}) # pagination or exclusion might cause a field not to # be in available fields dict - if not all(key in model['model']['fields'] - for key in fields.keys()): - raise Exception("Some fields are missing" - " to generate a local model." - " Please, provide a model with" - " the complete list of fields.") - for field in fields: - field_info = model['model']['fields'][field] - if 'summary' in field_info: - fields[field]['summary'] = field_info['summary'] - fields[field]['name'] = field_info['name'] + if model_fields: + if not all(key in fields + for key in list(model_fields.keys())): + raise Exception("Some fields are missing" + " to generate a local model." + " Please, provide a model with" + " the complete list of fields.") + for field in model_fields: + field_info = fields[field] + if 'summary' in field_info: + model_fields[field]['summary'] = field_info[ + 'summary'] + model_fields[field]['name'] = field_info[ + 'name'] objective_field = model['objective_fields'] missing_tokens = model['model'].get('missing_tokens') ModelFields.__init__( - self, fields, objective_id=extract_objective(objective_field), - missing_tokens=missing_tokens) + self, fields, objective_id=extract_objective( + objective_field), + missing_tokens=missing_tokens, + operation_settings=operation_settings, + model_fields=model_fields) self.description = model['description'] self.field_importance = model['model'].get('importance', None) @@ -244,7 +270,6 @@ def __init__(self, model, api=None, fields=None): in self.field_importance if element[0] in fields] self.locale = model.get('locale', DEFAULT_LOCALE) - else: raise Exception("The model isn't finished yet") else: diff --git a/bigml/bigmlconnection.py b/bigml/bigmlconnection.py index c9a98528..1e680915 100644 --- a/bigml/bigmlconnection.py +++ b/bigml/bigmlconnection.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -22,9 +21,11 @@ import os import time import locale -import StringIO +import io import logging +from urllib import parse + try: import simplejson as json except ImportError: @@ -39,22 +40,23 @@ import requests -from bigml.util import (check_dir, - maybe_save, get_exponential_wait) -from bigml.util import DEFAULT_LOCALE, PY3 +import bigml.constants as c + +from bigml.util import check_dir, maybe_save, get_exponential_wait +from bigml.util import DEFAULT_LOCALE from bigml.domain import Domain -from bigml.domain import DEFAULT_DOMAIN, BIGML_PROTOCOL +from bigml.domain import DEFAULT_DOMAIN LOG_FORMAT = '%(asctime)-15s: %(message)s' LOGGER = logging.getLogger('BigML') -CONSOLE = logging.StreamHandler() +CONSOLE = logging.StreamHandler(sys.stdout) CONSOLE.setLevel(logging.WARNING) LOGGER.addHandler(CONSOLE) # Base URL -BIGML_URL = '%s://%s/andromeda/' +BIGML_URL = '%s://%s/%s' DOWNLOAD_DIR = '/download' @@ -104,7 +106,7 @@ def assign_dir(path): Returns either the path or None. """ - if not isinstance(path, basestring): + if not isinstance(path, str): return None try: return check_dir(path) @@ -118,8 +120,6 @@ def json_load(content): """ args = [content.decode('utf-8')] - if not PY3: - args.append('utf-8') return json.loads(*args) @@ -128,7 +128,8 @@ def json_load(content): # Patch for requests # ############################################################################## -def patch_requests(): +#pylint: disable=locally-disabled,broad-except,used-before-assignment +def patch_requests(short_debug): """ Monkey patches requests to get debug output. """ @@ -137,14 +138,23 @@ def debug_request(method, url, **kwargs): """ response = original_request(method, url, **kwargs) - logging.debug("Data: %s", response.request.body) - logging.debug("Response: %s", response.content) + LOGGER.debug("Data: %s", response.request.body) + try: + response_content = "Download status is %s" % response.status_code \ + if "download" in url else \ + json.dumps(json.loads(response.content), indent=4) + except Exception: + response_content = response.content + response_content = response_content[0: 256] if short_debug else \ + response_content + LOGGER.debug("Response: %s\n", response_content) return response + original_request = requests.api.request requests.api.request = debug_request -class BigMLConnection(object): +class BigMLConnection(): """Low level point to create, retrieve, list, update, and delete sources, datasets, models and predictions. @@ -157,16 +167,16 @@ class BigMLConnection(object): error: An error code and message """ - def __init__(self, username=None, api_key=None, dev_mode=False, + def __init__(self, username=None, api_key=None, debug=False, set_locale=False, storage=None, domain=None, - project=None, organization=None): + project=None, organization=None, short_debug=False): """Initializes the BigML API. If left unspecified, `username` and `api_key` will default to the values of the `BIGML_USERNAME` and `BIGML_API_KEY` environment variables respectively. - dev_mode` has been deprecated. Now all resources coexist in the + `dev_mode` has been deprecated. Now all resources coexist in the same production environment. Existing resources generated in development mode have been archived under a special project and are now accessible in production mode. @@ -193,51 +203,50 @@ def __init__(self, username=None, api_key=None, dev_mode=False, """ - if dev_mode: - LOGGER.warning("Development mode is deprecated and the dev_mode" - " flag will be removed.") logging_level = logging.ERROR - if debug: + if debug or short_debug: try: logging_level = logging.DEBUG - patch_requests() + patch_requests(short_debug) except Exception: # when using GAE will fail pass - logging.basicConfig(format=LOG_FORMAT, - level=logging_level, - stream=sys.stdout) + LOGGER.forma = LOG_FORMAT, + LOGGER.level = logging_level if username is None: try: username = os.environ['BIGML_USERNAME'] except KeyError: - raise AttributeError("Cannot find BIGML_USERNAME in" - " your environment") + if storage is None: + raise AttributeError("Cannot find BIGML_USERNAME in" + " your environment") if api_key is None: try: api_key = os.environ['BIGML_API_KEY'] except KeyError: - raise AttributeError("Cannot find BIGML_API_KEY in" - " your environment") - - self.auth = "?username=%s;api_key=%s;" % (username, api_key) + if storage is None: + raise AttributeError("Cannot find BIGML_API_KEY in" + " your environment") + + self.username = username + self.api_key = api_key + self.qs_params = {"username": self.username, "api_key": self.api_key} + self.auth = "?" + parse.urlencode(self.qs_params) self.project = None self.organization = None if project is not None: self.project = project + self.qs_params.update({"project": self.project}) if organization is not None: self.organization = organization + self.debug = debug - self.general_domain = None - self.general_protocol = None - self.prediction_domain = None - self.prediction_protocol = None - self.verify = None - self.verify_prediction = None + self.short_debug = short_debug + self.domain = None self.url = None self.prediction_base_url = None @@ -245,61 +254,60 @@ def __init__(self, username=None, api_key=None, dev_mode=False, # if verify is not set, we capture warnings to avoid `requests` library # warnings: InsecurePlatformWarning - logging.captureWarnings(not self.verify) + logging.captureWarnings(not self.domain.verify) if set_locale: locale.setlocale(locale.LC_ALL, DEFAULT_LOCALE) self.storage = assign_dir(storage) - def _set_api_urls(self, dev_mode=False, domain=None): + def _set_api_urls(self, domain=None): """Sets the urls that point to the REST api methods for each resource - dev_mode` has been deprecated. Now all resources coexist in the - same production environment. Existing resources generated in - development mode have been archived under a special project and - are now accessible in production mode. - """ - if dev_mode: - LOGGER.warning("Development mode is deprecated and the dev_mode" - " flag will be removed soon.") if domain is None: domain = Domain() - elif isinstance(domain, basestring): + elif isinstance(domain, str): domain = Domain(domain=domain) elif not isinstance(domain, Domain): raise ValueError("The domain must be set using a Domain object.") # Setting the general and prediction domain options - self.general_domain = domain.general_domain - self.general_protocol = domain.general_protocol - self.prediction_domain = domain.prediction_domain - self.prediction_protocol = domain.prediction_protocol - self.verify = domain.verify - self.verify_prediction = domain.verify_prediction - self.url = BIGML_URL % (BIGML_PROTOCOL, self.general_domain) + self.domain = domain + api_version = "%s/" % self.domain.api_version if \ + self.domain.api_version != "" else "" + self.url = BIGML_URL % (self.domain.general_protocol, + self.domain.general_domain, + api_version) self.prediction_base_url = BIGML_URL % ( - self.prediction_protocol, self.prediction_domain) - + self.domain.prediction_protocol, self.domain.prediction_domain, "") - def _add_credentials(self, url, organization=False, shared_auth=None): + def _add_credentials(self, qs_params, + organization=False, shared_auth=None): """Adding the credentials and project or organization information for authentication The organization argument is a boolean that controls authentication profiles in organizations. When set to true, - the organization ID is used to access the projects in an + the organization ID is used to access the projects and tasks in an organization. If false, a particular project ID must be used. - The shared_auth string provides the alternative credentials for + The shared_auth dictionary provides the alternative credentials for shared resources. """ - auth = self.auth if shared_auth is None else shared_auth - auth = auth if "?" not in url else ";%s" % auth[1:] - return "%s%s%s" % (url, auth, - "organization=%s;" % self.organization if - organization and self.organization - else "project=%s;" % self.project if self.project - else "") + if qs_params is None: + qs_params = {} + params = {} + params.update(qs_params) + if shared_auth is None: + params.update(self.qs_params) + else: + params.update(share_auth) + if organization and self.organization: + try: + del params["project"] + except KeyError: + pass + params.update({"organization": self.organization}) + return params def _add_project(self, payload, include=True): """Adding project id as attribute when it has been set in the @@ -309,7 +317,7 @@ def _add_project(self, payload, include=True): to_string = False if self.project and include: # Adding project ID to args if it's not set - if isinstance(payload, basestring): + if isinstance(payload, str): payload = json.loads(payload) to_string = True if payload.get("project") is None: @@ -345,36 +353,39 @@ def _create(self, url, body, verify=None, organization=None): # downloaded. code = HTTP_ACCEPTED if verify is None: - verify = self.verify - - url = self._add_credentials(url, organization=organization) + verify = self.domain.verify + qs_params = self._add_credentials({}, organization=organization) + qs_str = "?%s" % parse.urlencode(qs_params) if qs_params else "" body = self._add_project(body, not organization) while code == HTTP_ACCEPTED: if GAE_ENABLED: try: req_options = { - 'url': url, + 'url': url + qs_str, 'method': urlfetch.POST, 'headers': SEND_JSON, 'payload': body, 'validate_certificate': verify } response = urlfetch.fetch(**req_options) - except urlfetch.Error, exception: + except urlfetch.Error as exception: LOGGER.error("HTTP request error: %s", str(exception)) + error["status"]["type"] = c.TRANSIENT return maybe_save(resource_id, self.storage, code, location, resource, error) else: try: response = requests.post(url, + params=qs_params, headers=SEND_JSON, data=body, verify=verify) except (requests.ConnectionError, requests.Timeout, - requests.RequestException), exc: + requests.RequestException) as exc: LOGGER.error("HTTP request error: %s", str(exc)) code = HTTP_INTERNAL_SERVER_ERROR + error["status"]["type"] = c.TRANSIENT return maybe_save(resource_id, self.storage, code, location, resource, error) try: @@ -383,7 +394,7 @@ def _create(self, url, body, verify=None, organization=None): if 'location' in response.headers: location = response.headers['location'] resource = json_load(response.content) - resource_id = resource['resource'] + resource_id = resource.get('resource') error = None elif code in [HTTP_BAD_REQUEST, HTTP_UNAUTHORIZED, @@ -394,9 +405,9 @@ def _create(self, url, body, verify=None, organization=None): error = json_load(response.content) LOGGER.error(self.error_message(error, method='create')) elif code != HTTP_ACCEPTED: - LOGGER.error("Unexpected error (%s)", code) + LOGGER.error("CREATE Unexpected error (%s)", code) code = HTTP_INTERNAL_SERVER_ERROR - except ValueError, exc: + except ValueError as exc: LOGGER.error("Malformed response: %s", str(exc)) code = HTTP_INTERNAL_SERVER_ERROR @@ -404,7 +415,8 @@ def _create(self, url, body, verify=None, organization=None): location, resource, error) def _get(self, url, query_string='', - shared_username=None, shared_api_key=None, organization=None): + shared_username=None, shared_api_key=None, organization=None, + shared_ref=None, resource_id=None): """Retrieves a remote resource. Uses HTTP GET to retrieve a BigML `url`. @@ -418,44 +430,48 @@ def _get(self, url, query_string='', """ code = HTTP_INTERNAL_SERVER_ERROR - resource_id = None location = url resource = None error = { "status": { "code": HTTP_INTERNAL_SERVER_ERROR, "message": "The resource couldn't be retrieved"}} - auth = (self.auth if shared_username is None - else "?username=%s;api_key=%s" % ( - shared_username, shared_api_key)) kwargs = {"organization": organization} if shared_username is not None and shared_api_key is not None: - kwargs.update({"shared_auth": auth}) - - url = self._add_credentials(url, **kwargs) + query_string + kwargs.update({"shared_auth": {"username": shared_username, + "api_key": shared_api_key}}) + + qs_params = self._add_credentials({}, **kwargs) + if shared_ref is not None: + qs_params.update({"shared_ref": shared_ref}) + qs_params.update(dict(parse.parse_qsl(query_string))) + qs_str = "?%s" % parse.urlencode(qs_params) if qs_params else "" if GAE_ENABLED: try: req_options = { - 'url': url, + 'url': url + qs_str, 'method': urlfetch.GET, 'headers': ACCEPT_JSON, - 'validate_certificate': self.verify + 'validate_certificate': self.domain.verify } response = urlfetch.fetch(**req_options) - except urlfetch.Error, exception: + except urlfetch.Error as exception: LOGGER.error("HTTP request error: %s", str(exception)) + error["status"]["type"] = c.TRANSIENT return maybe_save(resource_id, self.storage, code, location, resource, error) else: try: - response = requests.get(url, headers=ACCEPT_JSON, - verify=self.verify) + response = requests.get(url, params = qs_params, + headers=ACCEPT_JSON, + verify=self.domain.verify) except (requests.ConnectionError, requests.Timeout, - requests.RequestException), exc: + requests.RequestException) as exc: LOGGER.error("HTTP request error: %s", str(exc)) + error["status"]["type"] = c.TRANSIENT return maybe_save(resource_id, self.storage, code, location, resource, error) try: @@ -469,17 +485,17 @@ def _get(self, url, query_string='', HTTP_NOT_FOUND, HTTP_TOO_MANY_REQUESTS]: error = json_load(response.content) - LOGGER.error(self.error_message(error, method='get')) + LOGGER.error(self.error_message(error, method='get', + resource_id=resource_id)) else: - LOGGER.error("Unexpected error (%s)", code) + LOGGER.error("GET Unexpected error (%s)", code) code = HTTP_INTERNAL_SERVER_ERROR - except ValueError, exc: + except ValueError as exc: if "output_format" in query_string: # output can be an xml file that is returned without storing return response.content - else: - LOGGER.error("Malformed response: %s" % str(exc)) + LOGGER.error("Malformed response: %s", str(exc)) return maybe_save(resource_id, self.storage, code, location, resource, error) @@ -515,20 +531,22 @@ def _list(self, url, query_string='', organization=None): "code": code, "message": "The resource couldn't be listed"}} - url = self._add_credentials(url, organization=organization) + \ - query_string + qs_params = self._add_credentials({}, organization=organization) + qs_params.update(dict(parse.parse_qsl(query_string))) + qs_str = "?%s" % parse.urlencode(qs_params) if qs_params else "" if GAE_ENABLED: try: req_options = { - 'url': url, + 'url': url + qs_str, 'method': urlfetch.GET, 'headers': ACCEPT_JSON, - 'validate_certificate': self.verify + 'validate_certificate': self.domain.verify } response = urlfetch.fetch(**req_options) - except urlfetch.Error, exception: + except urlfetch.Error as exception: LOGGER.error("HTTP request error: %s", str(exception)) + error["status"]["type"] = c.TRANSIENT return { 'code': code, 'meta': meta, @@ -536,12 +554,14 @@ def _list(self, url, query_string='', organization=None): 'error': error} else: try: - response = requests.get(url, headers=ACCEPT_JSON, - verify=self.verify) + response = requests.get(url, params=qs_params, + headers=ACCEPT_JSON, + verify=self.domain.verify) except (requests.ConnectionError, requests.Timeout, - requests.RequestException), exc: + requests.RequestException) as exc: LOGGER.error("HTTP request error: %s", str(exc)) + error["status"]["type"] = c.TRANSIENT return { 'code': code, 'meta': meta, @@ -561,9 +581,9 @@ def _list(self, url, query_string='', organization=None): HTTP_TOO_MANY_REQUESTS]: error = json_load(response.content) else: - LOGGER.error("Unexpected error (%s)", code) + LOGGER.error("LIST Unexpected error (%s)", code) code = HTTP_INTERNAL_SERVER_ERROR - except ValueError, exc: + except ValueError as exc: LOGGER.error("Malformed response: %s", str(exc)) return { @@ -572,7 +592,7 @@ def _list(self, url, query_string='', organization=None): 'objects': resources, 'error': error} - def _update(self, url, body, organization=None): + def _update(self, url, body, organization=None, resource_id=None): """Updates a remote resource. Uses PUT to update a BigML resource. Only the new fields that @@ -588,7 +608,6 @@ def _update(self, url, body, organization=None): """ code = HTTP_INTERNAL_SERVER_ERROR - resource_id = None location = url resource = None error = { @@ -596,37 +615,40 @@ def _update(self, url, body, organization=None): "code": code, "message": "The resource couldn't be updated"}} - url = self._add_credentials(url, organization=organization) + qs_params = self._add_credentials({}, organization=organization) + qs_str = "?%s" % parse.urlencode(qs_params) if qs_params else "" body = self._add_project(body, not organization) if GAE_ENABLED: try: req_options = { - 'url': url, + 'url': url + qs_str, 'method': urlfetch.PUT, 'headers': SEND_JSON, 'payload': body, - 'validate_certificate': self.verify + 'validate_certificate': self.domain.verify } response = urlfetch.fetch(**req_options) - except urlfetch.Error, exception: + except urlfetch.Error as exception: LOGGER.error("HTTP request error: %s", str(exception)) + error["status"]["type"] = c.TRANSIENT return maybe_save(resource_id, self.storage, code, location, resource, error) else: try: response = requests.put(url, + params=qs_params, headers=SEND_JSON, - data=body, verify=self.verify) + data=body, verify=self.domain.verify) except (requests.ConnectionError, requests.Timeout, - requests.RequestException), exc: + requests.RequestException) as exc: LOGGER.error("HTTP request error: %s", str(exc)) + error["status"]["type"] = c.TRANSIENT return maybe_save(resource_id, self.storage, code, location, resource, error) try: code = response.status_code - if code == HTTP_ACCEPTED: resource = json_load(response.content) resource_id = resource['resource'] @@ -636,9 +658,10 @@ def _update(self, url, body, organization=None): HTTP_METHOD_NOT_ALLOWED, HTTP_TOO_MANY_REQUESTS]: error = json_load(response.content) - LOGGER.error(self.error_message(error, method='update')) + LOGGER.error(self.error_message(error, method='update', + resource_id=resource_id)) else: - LOGGER.error("Unexpected error (%s)", code) + LOGGER.error("UPDATE Unexpected error (%s)", code) code = HTTP_INTERNAL_SERVER_ERROR except ValueError: LOGGER.error("Malformed response") @@ -646,7 +669,8 @@ def _update(self, url, body, organization=None): return maybe_save(resource_id, self.storage, code, location, resource, error) - def _delete(self, url, query_string='', organization=None): + def _delete(self, url, query_string='', organization=None, + resource_id=None): """Permanently deletes a remote resource. If the request is successful the status `code` will be HTTP_NO_CONTENT @@ -659,32 +683,37 @@ def _delete(self, url, query_string='', organization=None): "status": { "code": code, "message": "The resource couldn't be deleted"}} - - url = self._add_credentials(url, organization=organization) + \ - query_string + qs_params = self._add_credentials({}, organization=organization) + qs_params.update(dict(parse.parse_qsl(query_string))) + qs_str = "?%s" % parse.urlencode(qs_params) if qs_params else "" if GAE_ENABLED: try: req_options = { - 'url': url, + 'url': url + qs_str, 'method': urlfetch.DELETE, - 'validate_certificate': self.verify + 'validate_certificate': self.domain.verify } response = urlfetch.fetch(**req_options) - except urlfetch.Error, exception: + except urlfetch.Error as exception: LOGGER.error("HTTP request error: %s", str(exception)) + error["status"]["type"] = c.TRANSIENT return { 'code': code, + 'resource': resource_id, 'error': error} else: try: - response = requests.delete(url, verify=self.verify) + response = requests.delete(url, params=qs_params, + verify=self.domain.verify) except (requests.ConnectionError, requests.Timeout, - requests.RequestException), exc: + requests.RequestException) as exc: LOGGER.error("HTTP request error: %s", str(exc)) + error["status"]["type"] = c.TRANSIENT return { 'code': code, + 'resource': resource_id, 'error': error} try: code = response.status_code @@ -706,6 +735,7 @@ def _delete(self, url, query_string='', organization=None): return { 'code': code, + 'resource': resource_id, 'error': error} def _download(self, url, filename=None, wait_time=10, retries=10, @@ -722,26 +752,28 @@ def _download(self, url, filename=None, wait_time=10, retries=10, if counter > 2 * retries: LOGGER.error("Retries exhausted trying to download the file.") return file_object - + qs_params = self._add_credentials({}) + qs_str = "?%s" % parse.urlencode(qs_params) if qs_params else "" if GAE_ENABLED: try: req_options = { - 'url': self._add_credentials(url), + 'url': url + qs_str, 'method': urlfetch.GET, - 'validate_certificate': self.verify + 'validate_certificate': self.domain.verify } response = urlfetch.fetch(**req_options) - except urlfetch.Error, exception: + except urlfetch.Error as exception: LOGGER.error("HTTP request error: %s", str(exception)) return file_object else: try: - response = requests.get(self._add_credentials(url), - verify=self.verify, stream=True) + response = requests.get(url, params=qs_params, + verify=self.domain.verify, + stream=True) except (requests.ConnectionError, requests.Timeout, - requests.RequestException), exc: + requests.RequestException) as exc: LOGGER.error("HTTP request error: %s", str(exc)) return file_object try: @@ -763,12 +795,11 @@ def _download(self, url, filename=None, wait_time=10, retries=10, wait_time=wait_time, retries=retries, counter=counter) - else: - return self._download(url, - filename=filename, - wait_time=wait_time, - retries=retries, - counter=retries + 1) + return self._download(url, + filename=filename, + wait_time=wait_time, + retries=retries, + counter=retries + 1) elif counter == retries: LOGGER.error("The maximum number of retries " " for the download has been " @@ -787,7 +818,7 @@ def _download(self, url, filename=None, wait_time=10, retries=10, filename = None if filename is None: if GAE_ENABLED: - file_object = StringIO.StringIO(response.content) + file_object = io.StringIO(response.content) else: file_object = response.raw else: @@ -827,30 +858,31 @@ def _download(self, url, filename=None, wait_time=10, retries=10, return file_object - def _status(self, url, query_string=''): + def _status(self, url, query_string='', organization=None): """Returns the status of the account. """ code = HTTP_INTERNAL_SERVER_ERROR - meta = None resources = None error = { "status": { "code": code, "message": "Failed to obtain the account status info"}} + qs_params = self._add_credentials({}, organization=organization) + qs_params.update(dict(parse.parse_qsl(query_string))) + qs_str = "?%s" % parse.urlencode(qs_params) if qs_params else "" - url = self._add_credentials(url) + query_string if GAE_ENABLED: try: req_options = { - 'url': url, + 'url': url + qs_str, 'method': urlfetch.GET, 'headers': ACCEPT_JSON, - 'validate_certificate': self.verify + 'validate_certificate': self.domain.verify } response = urlfetch.fetch(**req_options) - except urlfetch.Error, exception: + except urlfetch.Error as exception: LOGGER.error("HTTP request error: %s", str(exception)) return { @@ -859,12 +891,14 @@ def _status(self, url, query_string=''): 'error': error} else: try: - response = requests.get(url, headers=ACCEPT_JSON, - verify=self.verify) + response = requests.get(url, params=qs_params, + headers=ACCEPT_JSON, + verify=self.domain.verify) except (requests.ConnectionError, requests.Timeout, - requests.RequestException), exc: + requests.RequestException) as exc: LOGGER.error("HTTP request error: %s", str(exc)) + error["status"]["type"] = c.TRANSIENT return { 'code': code, 'object': resources, @@ -884,7 +918,7 @@ def _status(self, url, query_string=''): else: LOGGER.error("Unexpected error (%s)", code) code = HTTP_INTERNAL_SERVER_ERROR - except ValueError, exc: + except ValueError as exc: LOGGER.error("Malformed response: %s", str(exc)) return { @@ -892,7 +926,8 @@ def _status(self, url, query_string=''): 'object': resources, 'error': error} - def error_message(self, resource, resource_type='resource', method=None): + def error_message(self, resource, resource_type='resource', method=None, + resource_id=None): """Error message for each type of resource """ @@ -904,6 +939,9 @@ def error_message(self, resource, resource_type='resource', method=None): elif ('code' in resource and 'status' in resource): error_info = resource + resource_id = resource_id or resource.get("resource") + else: + resource_id = resource_id or resource if error_info is not None and 'code' in error_info: code = error_info['code'] if ('status' in error_info and @@ -914,37 +952,39 @@ def error_message(self, resource, resource_type='resource', method=None): error += ": %s" % extra if code == HTTP_NOT_FOUND and method == 'get': alternate_message = '' - if self.general_domain != DEFAULT_DOMAIN: + if self.domain.general_domain != DEFAULT_DOMAIN: alternate_message = ( - u'- The %s was not created in %s.\n' % ( - resource_type, self.general_domain)) + '- The %s was not created in %s.\n' % ( + resource_type, self.domain.general_domain)) error += ( - u'\nCouldn\'t find a %s matching the given' - u' id in %s. The most probable causes are:\n\n%s' - u'- A typo in the %s\'s id.\n' - u'- The %s id cannot be accessed with your credentials' - u' or was not created in %s.\n' - u'\nDouble-check your %s and' - u' credentials info and retry.' % ( - resource_type, self.general_domain, + '\nCouldn\'t find a %s matching the given' + ' id (%s) in %s. The most probable causes are:\n\n%s' + '- A typo in the %s\'s id.\n' + '- The %s id cannot be accessed with your credentials' + ' or was not created in %s.\n' + '\nDouble-check your %s and' + ' credentials info and retry.' % ( + resource_type, resource_id, self.domain.general_domain, alternate_message, resource_type, - resource_type, self.general_domain, resource_type)) + resource_type, self.domain.general_domain, + resource_type)) return error if code == HTTP_UNAUTHORIZED: - error += (u'\nDouble-check your credentials and the general' - u' domain your account is registered with (currently' - u' using %s), please.' % self.general_domain) + error += ('\nDouble-check your credentials and the general' + ' domain your account is registered with (currently' + ' using %s), please.' % self.domain.general_domain) return error if code == HTTP_BAD_REQUEST: - error += u'\nDouble-check the arguments for the call, please.' + error += '\nDouble-check the arguments for the call, please.' return error if code == HTTP_TOO_MANY_REQUESTS: - error += (u'\nToo many requests. Please stop ' - u' requests for a while before resuming.') + error += ('\nToo many requests. Please stop ' + ' requests for a while before resuming.') return error - elif code == HTTP_PAYMENT_REQUIRED: - error += (u'\nYou\'ll need to buy some more credits to perform' - u' the chosen action') + if code == HTTP_PAYMENT_REQUIRED: + error += ('\nThis operation exceeds your subscription limits.' + ' Please, upgrade your subscription, reduce the ' + 'dataset size or wait for a running task to finish.') return error return "Invalid %s structure:\n\n%s" % (resource_type, resource) diff --git a/bigml/boostedtree.py b/bigml/boostedtree.py deleted file mode 100644 index 0067081b..00000000 --- a/bigml/boostedtree.py +++ /dev/null @@ -1,177 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2017-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -"""Tree structure for the BigML local boosted Model - -This module defines an auxiliary Tree structure that is used in the local -boosted Ensemble to predict locally or embedded into your application -without needing to send requests to BigML.io. - -""" -from bigml.predicate import Predicate -from bigml.prediction import Prediction -from bigml.util import sort_fields, utf8, split -from bigml.tree import LAST_PREDICTION, PROPORTIONAL -from bigml.tree import one_branch - - -class BoostedTree(object): - """A boosted tree-like predictive model. - - """ - def __init__(self, tree, fields, objective_field=None): - - self.fields = fields - self.objective_id = objective_field - self.output = tree['output'] - - if tree['predicate'] is True: - self.predicate = True - else: - self.predicate = Predicate( - tree['predicate']['operator'], - tree['predicate']['field'], - tree['predicate']['value'], - tree['predicate'].get('term', None)) - - self.id = tree.get('id') - children = [] - if 'children' in tree: - for child in tree['children']: - children.append(self.__class__( \ - child, - self.fields, - objective_field=objective_field)) - self.children = children - self.count = tree['count'] - self.g_sum = tree.get('g_sum') - self.h_sum = tree.get('h_sum') - - def list_fields(self, out): - """Lists a description of the model's fields. - - """ - - for field in [(val['name'], val['optype']) for _, val in - sort_fields(self.fields)]: - out.write(utf8(u'[%-32s : %s]\n' % (field[0], field[1]))) - out.flush() - return self.fields - - def predict(self, input_data, path=None, missing_strategy=LAST_PREDICTION): - """Makes a prediction based on a number of field values. - - The input fields must be keyed by Id. There are two possible - strategies to predict when the value for the splitting field - is missing: - 0 - LAST_PREDICTION: the last issued prediction is returned. - 1 - PROPORTIONAL: we consider all possible outcomes and create - an average prediction. - """ - - if path is None: - path = [] - if missing_strategy == PROPORTIONAL: - return self.predict_proportional(input_data, path=path) - else: - if self.children: - for child in self.children: - if child.predicate.apply(input_data, self.fields): - path.append(child.predicate.to_rule(self.fields)) - return child.predict(input_data, path=path) - - return Prediction( - self.output, - path, - None, - distribution=None, - count=self.count, - median=None, - distribution_unit=None, - children=self.children, - d_min=None, - d_max=None) - - def predict_proportional(self, input_data, path=None, - missing_found=False): - """Makes a prediction based on a number of field values considering all - the predictions of the leaves that fall in a subtree. - - Each time a splitting field has no value assigned, we consider - both branches of the split to be true, merging their - predictions. The function returns the merged distribution and the - last node reached by a unique path. - - """ - - if path is None: - path = [] - - if not self.children: - return (self.g_sum, self.h_sum, self.count, path) - if one_branch(self.children, input_data) or \ - self.fields[split(self.children)]["optype"] in \ - ["text", "items"]: - for child in self.children: - if child.predicate.apply(input_data, self.fields): - new_rule = child.predicate.to_rule(self.fields) - if new_rule not in path and not missing_found: - path.append(new_rule) - return child.predict_proportional(input_data, path, - missing_found) - else: - # missing value found, the unique path stops - missing_found = True - g_sums = 0.0 - h_sums = 0.0 - population = 0 - for child in self.children: - g_sum, h_sum, count, _ = \ - child.predict_proportional(input_data, path, - missing_found) - g_sums += g_sum - h_sums += h_sum - population += count - return (g_sums, h_sums, population, path) - - - def get_leaves(self, path=None, filter_function=None): - """Returns a list that includes all the leaves of the tree. - - """ - leaves = [] - if path is None: - path = [] - if not isinstance(self.predicate, bool): - path.append(self.predicate.to_lisp_rule(self.fields)) - - if self.children: - for child in self.children: - leaves += child.get_leaves(path=path[:], - filter_function=filter_function) - else: - leaf = { - 'id': self.id, - 'count': self.count, - 'g_sum': self.g_sum, - 'h_sum': self.h_sum, - 'output': self.output, - 'path': path} - if (not hasattr(filter_function, '__call__') - or filter_function(leaf)): - leaves += [leaf] - return leaves diff --git a/bigml/centroid.py b/bigml/centroid.py index 81e5d57e..534cb562 100644 --- a/bigml/centroid.py +++ b/bigml/centroid.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -52,14 +51,15 @@ def cosine_distance2(terms, centroid_terms, scale): return similarity_distance ** 2 -class Centroid(object): +class Centroid(): """A Centroid. """ def __init__(self, centroid_info): self.center = centroid_info.get('center', {}) self.count = centroid_info.get('count', 0) - self.centroid_id = centroid_info.get('id', None) + self.centroid_id = centroid_info.get( + 'id', centroid_info.get("centroid_id", None)) self.name = centroid_info.get('name', None) self.distance = centroid_info.get('distance', {}) @@ -68,20 +68,24 @@ def distance2(self, input_data, term_sets, scales, stop_distance2=None): """ distance2 = 0.0 - for field_id, value in self.center.items(): - if isinstance(value, list): - # text field - terms = ([] if field_id not in term_sets else - term_sets[field_id]) - distance2 += cosine_distance2(terms, value, scales[field_id]) - elif isinstance(value, basestring): - if field_id not in input_data or input_data[field_id] != value: - distance2 += 1 * scales[field_id] ** 2 - else: - distance2 += ((input_data[field_id] - value) * - scales[field_id]) ** 2 - if stop_distance2 is not None and distance2 >= stop_distance2: - return None + for field_id, value in list(self.center.items()): + try: + if isinstance(value, list): + # text field + terms = ([] if field_id not in term_sets else + term_sets[field_id]) + distance2 += cosine_distance2(terms, value, scales[field_id]) + elif isinstance(value, str): + if field_id not in input_data or input_data[field_id] != value: + distance2 += 1 * scales[field_id] ** 2 + else: + distance2 += ((input_data[field_id] - value) * + scales[field_id]) ** 2 + if stop_distance2 is not None and distance2 >= stop_distance2: + return None + except: + raise ValueError("Error computing field id %s input %s value %s" % + (field_id, input_data[field_id], value)) return distance2 def print_statistics(self, out=sys.stdout): @@ -89,8 +93,8 @@ def print_statistics(self, out=sys.stdout): centroid """ - out.write(u"%s%s:\n" % (INDENT, self.name)) - literal = u"%s%s: %s\n" + out.write("%s%s:\n" % (INDENT, self.name)) + literal = "%s%s: %s\n" for measure_title in STATISTIC_MEASURES: measure = measure_title.lower().replace(" ", "_") out.write(literal % (INDENT * 2, measure_title, diff --git a/bigml/cluster.py b/bigml/cluster.py index 25f02538..5739554b 100644 --- a/bigml/cluster.py +++ b/bigml/cluster.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -21,7 +20,7 @@ embedded into your application without needing to send requests to BigML.io. -This module cannot only save you a few credits, but also enormously +This module can help you enormously to reduce the latency for each prediction and let you use your clusters offline. @@ -44,19 +43,20 @@ import math import re import csv +import codecs + from bigml.api import FINISHED -from bigml.api import get_status, BigML, get_api_connection -from bigml.util import cast, utf8, PY3, NUMERIC +from bigml.api import get_status, get_api_connection, get_cluster_id +from bigml.util import cast, utf8, NUMERIC, use_cache, load, dump, dumps, \ + get_data_format, get_formatted_data, format_data, get_data_transformations from bigml.centroid import Centroid from bigml.basemodel import get_resource_dict -from bigml.model import print_distribution +from bigml.generators.model import print_distribution from bigml.predicate import TM_TOKENS, TM_FULL_TERM from bigml.modelfields import ModelFields from bigml.io import UnicodeWriter - -if PY3: - import codecs +from bigml.constants import OUT_NEW_FIELDS, OUT_NEW_HEADERS, INTERNAL LOGGER = logging.getLogger('BigML') @@ -65,10 +65,11 @@ 'sum', 'sum_squares', 'variance'] INDENT = " " * 4 INTERCENTROID_MEASURES = [('Minimum', min), - ('Mean', lambda(x): sum(x)/float(len(x))), + ('Mean', lambda x: sum(x)/float(len(x))), ('Maximum', max)] GLOBAL_CLUSTER_LABEL = 'Global' -NUMERIC_DEFAULTS = ["mean", "median", "minimum", "maximum", "zero"] + +DFT_OUTPUTS = ["centroid_name", "distance"] def parse_terms(text, case_sensitive=True): @@ -77,7 +78,7 @@ def parse_terms(text, case_sensitive=True): """ if text is None: return [] - expression = ur'(\b|_)([^\b_\s]+?)(\b|_)' + expression = r'(\b|_)([^\b_\s]+?)(\b|_)' pattern = re.compile(expression) return [match[1] if case_sensitive else match[1].lower() for match in re.findall(pattern, text)] @@ -99,8 +100,7 @@ def get_unique_terms(terms, term_forms, tag_cloud): """ extend_forms = {} - tag_cloud = tag_cloud.keys() - for term, forms in term_forms.items(): + for term, forms in list(term_forms.items()): for form in forms: extend_forms[form] = term extend_forms[term] = term @@ -113,6 +113,31 @@ def get_unique_terms(terms, term_forms, tag_cloud): return list(terms_set) +def cluster_global_distance(): + """Used to populate the intercentroid distances columns in the CSV + report. For now we don't want to compute real distance and just + display "N/A" + """ + intercentroid_distance = [] + for measure, _ in INTERCENTROID_MEASURES: + intercentroid_distance.append([measure, 'N/A']) + return intercentroid_distance + + +def centroid_features(centroid, field_ids, encode=True): + """Returns features defining the centroid according to the list + of common field ids that define the centroids. + + """ + features = [] + for field_id in field_ids: + value = centroid.center[field_id] + if isinstance(value, str) and encode: + value = utf8(value) + features.append(value) + return features + + class Cluster(ModelFields): """ A lightweight wrapper around a cluster model. @@ -121,10 +146,23 @@ class Cluster(ModelFields): """ - def __init__(self, cluster, api=None): + def __init__(self, cluster, api=None, cache_get=None): - self.resource_id = None + self.api = get_api_connection(api) self.centroids = None + if use_cache(cache_get): + # using a cache to store the cluster attributes + self.__dict__ = load(get_cluster_id(cluster), cache_get) + + for index, centroid in enumerate(self.centroids): + self.centroids[index] = Centroid(centroid) + self.cluster_global = Centroid(self.cluster_global) + return + + self.resource_id = None + self.name = None + self.description = None + self.parent_id = None self.cluster_global = None self.total_ss = None self.within_ss = None @@ -132,6 +170,7 @@ def __init__(self, cluster, api=None): self.ratio_ss = None self.critical_value = None self.input_fields = [] + self.default_numeric_value = None self.summary_fields = [] self.default_numeric_value = None self.k = None @@ -143,13 +182,19 @@ def __init__(self, cluster, api=None): self.item_analysis = {} self.items = {} self.datasets = {} - self.api = get_api_connection(api) self.resource_id, cluster = get_resource_dict( \ cluster, "cluster", api=self.api) if 'object' in cluster and isinstance(cluster['object'], dict): cluster = cluster['object'] + try: + self.parent_id = cluster.get('dataset') + self.name = cluster.get("name") + self.description = cluster.get("description") + except AttributeError: + raise ValueError("Failed to find the expected " + "JSON structure. Check your arguments.") if 'clusters' in cluster and isinstance(cluster['clusters'], dict): status = get_status(cluster) @@ -192,30 +237,11 @@ def __init__(self, cluster, api=None): # clusters retrieved from API will only contain # model fields pass - for field_id, field in fields.items(): - if field['optype'] == 'text': - self.term_forms[field_id] = {} - self.term_forms[field_id].update(field[ - 'summary']['term_forms']) - self.tag_clouds[field_id] = {} - self.tag_clouds[field_id].update(field[ - 'summary']['tag_cloud']) - self.term_analysis[field_id] = {} - self.term_analysis[field_id].update( - field['term_analysis']) - if field['optype'] == 'items': - self.items[field_id] = {} - self.items[field_id].update( - dict(field['summary']['items'])) - self.item_analysis[field_id] = {} - self.item_analysis[field_id].update( - field['item_analysis']) - missing_tokens = cluster['clusters'].get('missing_tokens') ModelFields.__init__(self, fields, missing_tokens=missing_tokens) - if not all([field_id in self.fields for - field_id in self.scales]): + if not all(field_id in self.fields for + field_id in self.scales): raise Exception("Some fields are missing" " to generate a local cluster." " Please, provide a cluster with" @@ -253,21 +279,22 @@ def is_g_means(self): """ return self.critical_value is not None - def fill_numeric_defaults(self, input_data, average="mean"): + def fill_numeric_defaults(self, input_data): """Checks whether input data is missing a numeric field and - fills it with the average quantity provided in the - ``average`` parameter + fills it with the average quantity set in default_numeric_value + """ for field_id, field in self.fields.items(): if (field_id not in self.summary_fields and \ field['optype'] == NUMERIC and field_id not in input_data): - if average not in NUMERIC_DEFAULTS: - raise ValueError("The available defaults are: %s" % \ - ", ".join(NUMERIC_DEFAULTS)) - default_value = 0 if average == "zero" \ - else field['summary'].get(average) + if self.default_numeric_value is None: + raise Exception("Missing values in input data. Input" + " data must contain values for all " + "numeric fields to compute a distance.") + default_value = 0 if self.default_numeric_value == "zero" \ + else field['summary'].get(self.default_numeric_value) input_data[field_id] = default_value return input_data @@ -280,7 +307,7 @@ def get_unique_terms(self, input_data): for field_id in self.term_forms: if field_id in input_data: input_data_field = input_data.get(field_id, '') - if isinstance(input_data_field, basestring): + if isinstance(input_data_field, str): case_sensitive = self.term_analysis[field_id].get( 'case_sensitive', True) token_mode = self.term_analysis[field_id].get( @@ -295,23 +322,24 @@ def get_unique_terms(self, input_data): input_data_field if case_sensitive else input_data_field.lower()) unique_terms[field_id] = get_unique_terms( - terms, self.term_forms[field_id], + terms, self.fields[field_id]["summary"]["term_forms"], self.tag_clouds.get(field_id, [])) else: unique_terms[field_id] = input_data_field del input_data[field_id] # the same for items fields + #pylint: disable=locally-disabled,consider-using-dict-items for field_id in self.item_analysis: if field_id in input_data: input_data_field = input_data.get(field_id, '') - if isinstance(input_data_field, basestring): + if isinstance(input_data_field, str): # parsing the items in input_data separator = self.item_analysis[field_id].get( 'separator', ' ') regexp = self.item_analysis[field_id].get( 'separator_regexp') if regexp is None: - regexp = ur'%s' % re.escape(separator) + regexp = r'%s' % re.escape(separator) terms = parse_items(input_data_field, regexp) unique_terms[field_id] = get_unique_terms( terms, {}, @@ -342,38 +370,19 @@ def centroids_distance(self, to_centroid): intercentroid_distance.append([measure, result]) return intercentroid_distance - def cluster_global_distance(self): - """Used to populate the intercentroid distances columns in the CSV - report. For now we don't want to compute real distance and jsut - display "N/A" - """ - intercentroid_distance = [] - for measure, _ in INTERCENTROID_MEASURES: - intercentroid_distance.append([measure, 'N/A']) - return intercentroid_distance - def _prepare_for_distance(self, input_data): """Prepares the fields to be able to compute the distance2 """ # Checks and cleans input_data leaving the fields used in the model - clean_input_data = self.filter_input_data(input_data) - - # Checks that all numeric fields are present in input data and - # fills them with the default average (if given) when otherwise - try: - self.fill_numeric_defaults(clean_input_data, - self.default_numeric_value) - except ValueError: - raise Exception("Missing values in input data. Input" - " data must contain values for all " - "numeric fields to compute a distance.") + # and adding default numeric values if set + norm_input_data = self.filter_input_data(input_data) # Strips affixes for numeric values and casts to the final field type - cast(clean_input_data, self.fields) + cast(norm_input_data, self.fields) - unique_terms = self.get_unique_terms(clean_input_data) + unique_terms = self.get_unique_terms(norm_input_data) - return clean_input_data, unique_terms + return norm_input_data, unique_terms def distances2_to_point(self, reference_point, list_of_points): @@ -419,16 +428,17 @@ def points_in_cluster(self, centroid_id): if centroid_dataset in [None, ""]: centroid_dataset = self.api.create_dataset( \ self.resource_id, {"centroid": centroid_id}) - self.api.ok(centroid_dataset) + self.datasets[centroid_id] = centroid_dataset[ \ + "resource"].replace("dataset/", "") + self.api.ok(centroid_dataset, raise_on_error=True) else: centroid_dataset = self.api.check_resource( \ "dataset/%s" % centroid_dataset) # download dataset to compute local predictions downloaded_data = self.api.download_dataset( \ centroid_dataset["resource"]) - if PY3: - text_reader = codecs.getreader("utf-8") - downloaded_data = text_reader(downloaded_data) + text_reader = codecs.getreader("utf-8") + downloaded_data = text_reader(downloaded_data) reader = csv.DictReader(downloaded_data) points = [] for row in reader: @@ -482,19 +492,6 @@ def sorted_centroids(self, reference_point): "centroids": sorted(close_centroids, key=lambda x: x["distance"])} - def centroid_features(self, centroid, field_ids, encode=True): - """Returns features defining the centroid according to the list - of common field ids that define the centroids. - - """ - features = [] - for field_id in field_ids: - value = centroid.center[field_id] - if isinstance(value, basestring) and encode: - value = value.encode('utf-8') - features.append(value) - return features - def get_data_distribution(self): """Returns training data distribution @@ -508,9 +505,9 @@ def print_global_distribution(self, out=sys.stdout): """Prints the line Global: 100% ( instances) """ - output = u"" + output = "" if self.cluster_global: - output += (u" %s: 100%% (%d instances)\n" % ( + output += (" %s: 100%% (%d instances)\n" % ( self.cluster_global.name, self.cluster_global.count)) out.write(output) @@ -525,11 +522,11 @@ def print_ss_metrics(self, out=sys.stdout): "of squares)", self.within_ss), ("between_ss (Between sum of squares)", self.between_ss), ("ratio_ss (Ratio of sum of squares)", self.ratio_ss)] - output = u"" + output = "" for metric in ss_metrics: if metric[1]: - output += (u"%s%s: %5f\n" % (INDENT, metric[0], metric[1])) + output += ("%s%s: %5f\n" % (INDENT, metric[0], metric[1])) out.write(output) out.flush() @@ -541,10 +538,10 @@ def statistics_csv(self, file_name=None): rows = [] writer = None field_ids = self.centroids[0].center.keys() - headers = [u"Centroid_name"] - headers.extend([u"%s" % self.fields[field_id]["name"] + headers = ["Centroid_name"] + headers.extend(["%s" % self.fields[field_id]["name"] for field_id in field_ids]) - headers.extend([u"Instances"]) + headers.extend(["Instances"]) intercentroids = False header_complete = False @@ -552,20 +549,20 @@ def statistics_csv(self, file_name=None): centroids_list = sorted(self.centroids, key=lambda x: x.name) for centroid in centroids_list: row = [centroid.name] - row.extend(self.centroid_features(centroid, field_ids, - encode=False)) + row.extend(centroid_features(centroid, field_ids, + encode=False)) row.append(centroid.count) if len(self.centroids) > 1: for measure, result in self.centroids_distance(centroid): if not intercentroids: - headers.append(u"%s intercentroid distance" % \ + headers.append("%s intercentroid distance" % \ measure.title()) row.append(result) intercentroids = True for measure, result in centroid.distance.items(): if measure in CSV_STATISTICS: if not header_complete: - headers.append(u"Distance %s" % + headers.append("Distance %s" % measure.lower().replace("_", " ")) row.append(result) if not header_complete: @@ -574,12 +571,12 @@ def statistics_csv(self, file_name=None): rows.append(row) if self.cluster_global: - row = [u"%s" % self.cluster_global.name] - row.extend(self.centroid_features(self.cluster_global, field_ids, - encode=False)) + row = ["%s" % self.cluster_global.name] + row.extend(centroid_features(self.cluster_global, field_ids, + encode=False)) row.append(self.cluster_global.count) if len(self.centroids) > 1: - for measure, result in self.cluster_global_distance(): + for measure, result in cluster_global_distance(): row.append(result) for measure, result in self.cluster_global.distance.items(): if measure in CSV_STATISTICS: @@ -591,6 +588,7 @@ def statistics_csv(self, file_name=None): return rows with UnicodeWriter(file_name) as writer: writer.writerows(rows) + return file_name def summarize(self, out=sys.stdout): """Prints a summary of the cluster info @@ -599,51 +597,128 @@ def summarize(self, out=sys.stdout): report_header = '' if self.is_g_means: report_header = \ - u'G-means Cluster (critical_value=%d)' % self.critical_value + 'G-means Cluster (critical_value=%d)' % self.critical_value else: - report_header = u'K-means Cluster (k=%d)' % self.k + report_header = 'K-means Cluster (k=%d)' % self.k out.write(report_header + ' with %d centroids\n\n' % len(self.centroids)) - out.write(u"Data distribution:\n") + out.write("Data distribution:\n") # "Global" is set as first entry self.print_global_distribution(out=out) print_distribution(self.get_data_distribution(), out=out) - out.write(u"\n") + out.write("\n") centroids_list = [self.cluster_global] if self.cluster_global else [] centroids_list.extend(sorted(self.centroids, key=lambda x: x.name)) - out.write(u"Cluster metrics:\n") + out.write("Cluster metrics:\n") self.print_ss_metrics(out=out) - out.write(u"\n") + out.write("\n") - out.write(u"Centroids:\n") + out.write("Centroids:\n") for centroid in centroids_list: - out.write(utf8(u"\n%s%s: " % (INDENT, centroid.name))) + out.write(utf8("\n%s%s: " % (INDENT, centroid.name))) connector = "" for field_id, value in centroid.center.items(): - if isinstance(value, basestring): - value = u"\"%s\"" % value - out.write(utf8(u"%s%s: %s" % (connector, - self.fields[field_id]['name'], - value))) + if isinstance(value, str): + value = "\"%s\"" % value + out.write(utf8("%s%s: %s" % (connector, + self.fields[field_id]['name'], + value))) connector = ", " - out.write(u"\n\n") + out.write("\n\n") - out.write(u"Distance distribution:\n\n") + out.write("Distance distribution:\n\n") for centroid in centroids_list: centroid.print_statistics(out=out) - out.write(u"\n") + out.write("\n") if len(self.centroids) > 1: - out.write(u"Intercentroid distance:\n\n") + out.write("Intercentroid distance:\n\n") centroids_list = (centroids_list[1:] if self.cluster_global else centroids_list) for centroid in centroids_list: - out.write(utf8(u"%sTo centroid: %s\n" % (INDENT, - centroid.name))) + out.write(utf8("%sTo centroid: %s\n" % (INDENT, + centroid.name))) for measure, result in self.centroids_distance(centroid): - out.write(u"%s%s: %s\n" % (INDENT * 2, measure, result)) - out.write(u"\n") + out.write("%s%s: %s\n" % (INDENT * 2, measure, result)) + out.write("\n") + + def predict(self, input_data, full=False): + """Method to homogeneize the local models interface for all BigML + models. It returns the centroid method result. + """ + centroid = self.centroid(input_data) + if not full: + return {"centroid_name": centroid["name"]} + return centroid + + def batch_predict(self, input_data_list, outputs=None, **kwargs): + """Creates a batch centroid for a list of inputs using the local + cluster model. Allows to define some output settings to + decide the fields to be added to the input_data (centroid_name, + distance, etc.) and the name that we want to assign to these new + fields. The outputs argument accepts a dictionary with keys + "output_fields", to contain a list of the prediction properties to add + (["centroid_name", "distance"] by default) and "output_headers", to + contain a list of the headers to be used when adding them (identical + to "output_fields" list, by default). + + :param input_data_list: List of input data to be predicted + :type input_data_list: list or Panda's dataframe + :param dict outputs: properties that define the headers and fields to + be added to the input data + :return: the list of input data plus the predicted values + :rtype: list or Panda's dataframe depending on the input type in + input_data_list + + """ + if outputs is None: + outputs = {} + new_fields = outputs.get(OUT_NEW_FIELDS, DFT_OUTPUTS) + new_headers = outputs.get(OUT_NEW_HEADERS, new_fields) + if len(new_fields) > len(new_headers): + new_headers.expand(new_fields[len(new_headers):]) + else: + new_headers = new_headers[0: len(new_fields)] + data_format = get_data_format(input_data_list) + inner_data_list = get_formatted_data(input_data_list, INTERNAL) + for input_data in inner_data_list: + prediction = self.centroid(input_data, **kwargs) + for index, key in enumerate(new_fields): + input_data[new_headers[index]] = prediction[key] + if data_format != INTERNAL: + return format_data(inner_data_list, out_format=data_format) + return inner_data_list + + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + Avoiding to set it in a Mixin to maintain the current dump function. + """ + return get_data_transformations(self.resource_id, self.parent_id) + + def dump(self, output=None, cache_set=None): + """Uses msgpack to serialize the resource object + If cache_set is filled with a cache set method, the method is called + + """ + self_vars = vars(self) + for index, centroid in enumerate(self_vars["centroids"]): + self_vars["centroids"][index] = vars(centroid) + self_vars["cluster_global"] = vars(self_vars["cluster_global"]) + del self_vars["api"] + dump(self_vars, output=output, cache_set=cache_set) + + def dumps(self): + """Uses msgpack to serialize the resource object to a string + + """ + self_vars = vars(self) + for index, centroid in enumerate(self_vars["centroids"]): + self_vars["centroids"][index] = vars(centroid) + self_vars["cluster_global"] = vars(self_vars["cluster_global"]) + del self_vars["api"] + dumps(self_vars) diff --git a/bigml/constants.py b/bigml/constants.py index 618b5ea7..5171d557 100644 --- a/bigml/constants.py +++ b/bigml/constants.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -21,6 +20,7 @@ import re + # Basic resources SOURCE_PATH = 'source' DATASET_PATH = 'dataset' @@ -59,6 +59,8 @@ EXECUTION_PATH = 'execution' LIBRARY_PATH = 'library' STATUS_PATH = 'status' +EXTERNAL_CONNECTOR_PATH = 'externalconnector' + SUPERVISED_PATHS = [ MODEL_PATH, ENSEMBLE_PATH, @@ -78,9 +80,12 @@ ASSOCIATION_PATH, TOPIC_MODEL_PATH, TIME_SERIES_PATH, - FUSION_PATH + FUSION_PATH, + PCA_PATH ] +CLONABLE_PATHS = [SOURCE_PATH, DATASET_PATH, SCRIPT_PATH] +CLONABLE_PATHS.extend(MODELS_PATHS) PMML_MODELS = [ MODEL_PATH, @@ -92,14 +97,17 @@ # Resource Ids patterns ID_PATTERN = '[a-f0-9]{24}' SHARED_PATTERN = '[a-zA-Z0-9]{24,30}' -SOURCE_RE = re.compile(r'^%s/%s$' % (SOURCE_PATH, ID_PATTERN)) +ID_RE = re.compile(r'^%s$' % ID_PATTERN) +SOURCE_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % (SOURCE_PATH, ID_PATTERN, + SOURCE_PATH, SHARED_PATTERN)) DATASET_RE = re.compile(r'^(public/)?%s/%s$|^shared/%s/%s$' % ( DATASET_PATH, ID_PATTERN, DATASET_PATH, SHARED_PATTERN)) MODEL_RE = re.compile(r'^(public/)?%s/%s$|^shared/%s/%s$' % ( MODEL_PATH, ID_PATTERN, MODEL_PATH, SHARED_PATTERN)) PREDICTION_RE = re.compile(r'^%s/%s$' % (PREDICTION_PATH, ID_PATTERN)) EVALUATION_RE = re.compile(r'^%s/%s$' % (EVALUATION_PATH, ID_PATTERN)) -ENSEMBLE_RE = re.compile(r'^%s/%s$' % (ENSEMBLE_PATH, ID_PATTERN)) +ENSEMBLE_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % ( + ENSEMBLE_PATH, ID_PATTERN, ENSEMBLE_PATH, SHARED_PATTERN)) BATCH_PREDICTION_RE = re.compile(r'^%s/%s$' % (BATCH_PREDICTION_PATH, ID_PATTERN)) CLUSTER_RE = re.compile(r'^(public/)?%s/%s$|^shared/%s/%s$' % ( @@ -142,8 +150,8 @@ (FORECAST_PATH, ID_PATTERN)) DEEPNET_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % \ (DEEPNET_PATH, ID_PATTERN, DEEPNET_PATH, SHARED_PATTERN)) -OPTIML_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % \ - (OPTIML_PATH, ID_PATTERN, OPTIML_PATH, SHARED_PATTERN)) +OPTIML_RE = re.compile(r'^%s/%s$' % \ + (OPTIML_PATH, ID_PATTERN)) FUSION_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % \ (FUSION_PATH, ID_PATTERN, FUSION_PATH, SHARED_PATTERN)) PCA_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % \ @@ -154,12 +162,15 @@ LINEAR_REGRESSION_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % \ (LINEAR_REGRESSION_PATH, ID_PATTERN, LINEAR_REGRESSION_PATH, SHARED_PATTERN)) -SCRIPT_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % \ +SCRIPT_RE = re.compile(r'^(public/)?%s/%s$|^shared/%s/%s$' % \ (SCRIPT_PATH, ID_PATTERN, SCRIPT_PATH, SHARED_PATTERN)) EXECUTION_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % \ (EXECUTION_PATH, ID_PATTERN, EXECUTION_PATH, SHARED_PATTERN)) LIBRARY_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % \ (LIBRARY_PATH, ID_PATTERN, LIBRARY_PATH, SHARED_PATTERN)) +EXTERNAL_CONNECTOR_RE = re.compile(r'^%s/%s$' % \ + (EXTERNAL_CONNECTOR_PATH, ID_PATTERN)) + RESOURCE_RE = { SOURCE_PATH: SOURCE_RE, @@ -197,7 +208,8 @@ LINEAR_REGRESSION_PATH: LINEAR_REGRESSION_RE, SCRIPT_PATH: SCRIPT_RE, EXECUTION_PATH: EXECUTION_RE, - LIBRARY_PATH: LIBRARY_RE} + LIBRARY_PATH: LIBRARY_RE, + EXTERNAL_CONNECTOR_PATH: EXTERNAL_CONNECTOR_RE} RENAMED_RESOURCES = { @@ -213,7 +225,8 @@ TOPIC_DISTRIBUTION_PATH: 'topic_distribution', BATCH_TOPIC_DISTRIBUTION_PATH: 'batch_topic_distribution', TIME_SERIES_PATH: 'time_series', - BATCH_PROJECTION_PATH: 'batch_projection' + BATCH_PROJECTION_PATH: 'batch_projection', + EXTERNAL_CONNECTOR_PATH: 'external_connector' } IRREGULAR_PLURALS = { @@ -230,7 +243,8 @@ TOPIC_DISTRIBUTION_PATH: 'topic_distributions', TIME_SERIES_PATH: 'time_series', LIBRARY_PATH: 'libraries', - BATCH_PROJECTION_PATH: 'batch_projections' + BATCH_PROJECTION_PATH: 'batch_projections', + EXTERNAL_CONNECTOR_PATH: 'external_connectors' } # Resource status codes @@ -245,8 +259,102 @@ UNKNOWN = -2 RUNNABLE = -3 -# Minimum query string to get model fields +# Minimum query string to get model status TINY_RESOURCE = "full=false" +# Filtering only tasks status info +TASKS_QS = "include=subscription,tasks" + +# Minimum query string to get model image fields and status +IMAGE_FIELDS_FILTER = ("optype=image&exclude=summary,objective_summary," + "input_fields,importance,model_fields") + # Default storage folder STORAGE = "./storage" + +# label for transient HTTP errors +TRANSIENT = "transient" + +# fields related attributes +RESOURCES_WITH_FIELDS = [SOURCE_PATH, DATASET_PATH, MODEL_PATH, + PREDICTION_PATH, CLUSTER_PATH, ANOMALY_PATH, + SAMPLE_PATH, CORRELATION_PATH, STATISTICAL_TEST_PATH, + LOGISTIC_REGRESSION_PATH, ASSOCIATION_PATH, + TOPIC_MODEL_PATH, ENSEMBLE_PATH, PCA_PATH, + FUSION_PATH, + DEEPNET_PATH, LINEAR_REGRESSION_PATH] +DEFAULT_MISSING_TOKENS = ["", "N/A", "n/a", "NULL", "null", "-", "#DIV/0", + "#REF!", "#NAME?", "NIL", "nil", "NA", "na", + "#VALUE!", "#NULL!", "NaN", "#N/A", "#NUM!", "?"] +FIELDS_PARENT = { \ + "model": "model", + "anomaly": "model", + "cluster": "clusters", + "logisticregression": "logistic_regression", + "linearregression": "linear_regression", + "ensemble": "ensemble", + "deepnet": "deepnet", + "topicmodel": "topic_model", + "association": "associations", + "correlation": "correlations", + "sample": "sample", + "pca": "pca", + "fusion": "fusion", + "timeseries": "timeseries", + "statisticaltest": "statistical_tests", + "dataset": None} +ALL_FIELDS = "limit=-1" +SPECIFIC_EXCLUDES = { \ + "model": ["root"], + "anomaly": ["trees"], + "cluster": ["clusters"], + "logisticregression": ["coefficients"], + "linearregression": ["coefficients"], + "ensemble": ["models"], + "deepnet": ["network"], + "topicmodel": ["topics"], + "association": ["rules", "rules_summary"], + "fusion": ["models"], + "pca": ["pca"], + "timeseries": ["ets_models"]} + +EXTERNAL_CONNECTION_ATTRS = { \ + "BIGML_EXTERNAL_CONN_HOST": "host", + "BIGML_EXTERNAL_CONN_PORT": "port", + "BIGML_EXTERNAL_CONN_USER": "user", + "BIGML_EXTERNAL_CONN_PWD": "password", + "BIGML_EXTERNAL_CONN_DB": "database", + "BIGML_EXTERNAL_CONN_SOURCE": "source"} + + +# missing strategies +LAST_PREDICTION = 0 +PROPORTIONAL = 1 + +# output options in batch predictions +OUT_NEW_FIELDS = "output_fields" +OUT_NEW_HEADERS = "output_headers" + +# input data allowed formats in batch predictions +NUMPY = "numpy" +DATAFRAME = "dataframe" +INTERNAL = "list_of_dicts" + +CATEGORICAL = "categorical" + +IMAGE_EXTENSIONS = ['png', 'jpg', 'jpeg', 'gif', 'tiff', 'tif', 'bmp', + 'webp', 'cur', 'ico', 'pcx', 'psd', 'psb'] + +REGIONS = "regions" +REGION_SCORE_ALIAS = "region_score_threshold" +REGION_SCORE_THRESHOLD = "bounding_box_threshold" +REGIONS_OPERATION_SETTINGS = [ + REGION_SCORE_ALIAS, "iou_threshold", "max_objects"] +DEFAULT_OPERATION_SETTINGS = ["operating_point", "operating_kind"] +DECIMALS = 5 + +IMAGE = "image" +DATETIME = "datetime" +IOU_REMOTE_SETTINGS = {"iou_threshold": 0.2} +TEMP_DIR = "/tmp" +TOP_IMAGE_SIZE = 512 diff --git a/bigml/dataset.py b/bigml/dataset.py new file mode 100644 index 00000000..5c548e61 --- /dev/null +++ b/bigml/dataset.py @@ -0,0 +1,255 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +Class to store Dataset transformations based on the Dataset API response + +""" +import os +import subprocess + +from bigml.fields import Fields, sorted_headers, get_new_fields +from bigml.api import get_api_connection, get_dataset_id, get_status +from bigml.basemodel import get_resource_dict +from bigml.util import DEFAULT_LOCALE, use_cache, cast, load, dump, dumps, \ + sensenet_logging +from bigml.constants import FINISHED +from bigml.flatline import Flatline +from bigml.featurizer import Featurizer + +process = subprocess.Popen(['node -v'], stdout=subprocess.PIPE, shell=True) +out = process.stdout.read() +FLATLINE_READY = out.startswith(b"v") +if FLATLINE_READY: + from bigml.flatline import Flatline + + +#pylint: disable=locally-disabled,bare-except,ungrouped-imports +try: + # bigml-sensenet should be installed for image processing + sensenet_logging() + import sensenet + from bigml.images.featurizers import ImageFeaturizer as Featurizer +except: + pass + + +class Dataset: + """Local representation of a BigML Dataset. It can store a sample of + data whose fields are a subset of the ones defined in the fields + attribute. + """ + + def __init__(self, dataset, api=None, cache_get=None): + if use_cache(cache_get): + #pylint: disable=locally-disabled,access-member-before-definition + self.__dict__ = load(get_dataset_id(dataset), cache_get) + if self.origin_dataset is not None: + self.origin_dataset = Dataset(self.origin_dataset, + api=api, cache_get=cache_get) + self.featurizer = Featurizer(self.in_fields, + self.input_fields, preferred_only=False) + return + + self.resource_id = None + self.name = None + self.description = None + self.rows = None + self.origin_dataset = None + self.parent_id = None + self.in_fields = None + self.out_fields = None + self.description = None + self.locale = None + self.input_fields = None + self.missing_tokens = None + self.fields_obj = None + self.api = get_api_connection(api) + self.cache_get = cache_get + self.featurizer = None + self.transformations = None + + # retrieving dataset information from + self.resource_id, dataset = get_resource_dict( \ + dataset, "dataset", api=self.api, no_check_fields=False) + + if 'object' in dataset and isinstance(dataset['object'], dict): + dataset = dataset['object'] + self.name = dataset.get('name') + self.description = dataset.get('description') + if 'fields' in dataset and isinstance(dataset['fields'], dict): + status = get_status(dataset) + if 'code' in status and status['code'] == FINISHED: + out_fields_obj = Fields(dataset) + self.out_fields = out_fields_obj.fields + self.out_header_names, _ = sorted_headers(out_fields_obj) + self.out_fields = out_fields_obj.fields + self.description = dataset["description"] + self.locale = dataset.get('locale', DEFAULT_LOCALE) + self.missing_tokens = dataset.get('missing_tokens') + self.input_fields = dataset.get('input_fields') + self.rows = dataset.get("rows", 0) + # we extract the generators and names from the "output_fields" + if dataset.get("new_fields"): + new_fields = get_new_fields(dataset.get( + "output_fields", [])) + else: + new_fields = None + origin_dataset = dataset.get("origin_dataset") + if origin_dataset: + self.parent_id = origin_dataset + self.add_transformations(origin_dataset, new_fields) + elif dataset.get("source"): + self.parent_id = dataset.get("source") + self.in_fields = out_fields_obj.fields + self.featurizer = Featurizer(self.in_fields, + self.input_fields, + self.in_fields, + preferred_only=False) + self.fields_obj = Fields(self.in_fields) + self.in_header_names, self.in_header_ids = sorted_headers( + Fields(self.in_fields)) + + def add_transformations(self, origin_dataset, new_fields): + """Adds a new transformation where the new fields provided are + defined + """ + _, origin_dataset = get_resource_dict( + origin_dataset, "dataset", api=self.api) + self.origin_dataset = Dataset(origin_dataset, api=self.api, + cache_get=self.cache_get) + self.in_fields = self.origin_dataset.out_fields + if new_fields: + self.transformations = new_fields + + def get_sample(self, rows_number=32): + """Gets a sample of data representing the dataset """ + sample = self.api.create_sample(self.resource_id) + if self.api.ok(sample): + sample = self.api.get_sample( + sample["resource"], "rows=%s" % rows_number) + return sample.get("object", {}).get("sample", {}).get("rows") + return [] + + def get_inputs_sample(self, rows_number=32): + """Gets a sample of data representing the origin dataset """ + if self.origin_dataset is None: + return [] + return self.origin_dataset.get_sample(rows_number=rows_number) + + def _input_array(self, input_data): + """Transform the dict-like input data into a row """ + + # new_input_data = self.filter_input_data(input_data) + new_input_data = {} + for key, value in input_data.items(): + if key not in self.in_fields: + key = self.fields_obj.fields_by_name.get(key, key) + new_input_data.update({key: value}) + if self.featurizer is not None: + new_input_data = self.featurizer.extend_input(new_input_data) + cast(new_input_data, self.in_fields) + row = [] + for f_id in self.in_header_ids: + row.append(None if not f_id in new_input_data else + new_input_data[f_id]) + return row + + def _transform(self, input_arrays): + """Given a list of inputs that match the origin dataset structure, + apply the Flatline transformations used in the dataset + + """ + new_input_arrays = [] + out_headers = [] + fields = {"fields": self.in_fields} + out_arrays = [] + for transformation in self.transformations: + expr = transformation.get("field") + names = transformation.get("names", []) + out_headers.extend(names) + # evaluating first to raise an alert if the expression is failing + check = Flatline.check_lisp(expr, fields) + if "error" in check: + raise ValueError(check["error"]) + if expr == '(all)': + new_input_arrays = input_arrays.copy() + continue + new_input = Flatline.apply_lisp(expr, input_arrays, self) + for index, _ in enumerate(new_input): + try: + new_input_arrays[index] + except IndexError: + new_input_arrays.append([]) + new_input_arrays[index].extend(new_input[index]) + for index, input_array in enumerate(new_input_arrays): + try: + out_arrays[index] + except IndexError: + out_arrays.append([]) + out_arrays[index].extend(input_array) + return [out_headers, out_arrays] + + + def transform(self, input_data_list): + """Applies the transformations to the given input data and returns + the result. Usually, the input_data_list will contain a single + dictionary, but it can contain a list of them if needed for window + functions. + """ + if self.transformations is None and self.featurizer is None: + return input_data_list + rows = [self._input_array(input_data) for input_data in + input_data_list] + if self.transformations: + if not FLATLINE_READY: + raise ValueError("Nodejs should be installed to handle this" + " dataset's transformations. Please, check" + " the bindings documentation for details.") + out_headers, out_arrays = self._transform(rows) + rows = [dict(zip(out_headers, row)) for row + in out_arrays] + for index, result in enumerate(rows): + rows[index] = {key: value for key, value in result.items() + if value is not None} + else: + rows = [dict(zip(self.out_header_names, row)) for row in rows] + return rows + + def dump(self, output=None, cache_set=None): + """Uses msgpack to serialize the resource object + If cache_set is filled with a cache set method, the method is called + + """ + self_vars = vars(self).copy() + del self_vars["api"] + del self_vars["cache_get"] + self_vars["origin_dataset"] = self_vars["origin_dataset"].resource_id + del self_vars["featurizer"] + del self_vars["fields_obj"] + dump(self_vars, output=output, cache_set=cache_set) + + def dumps(self): + """Uses msgpack to serialize the resource object to a string + + """ + self_vars = vars(self).copy() + del self_vars["api"] + del self_vars["cache_get"] + self_vars["origin_dataset"] = self_vars["origin_dataset"].resource_id + del self_vars["featurizer"] + del self_vars["fields_obj"] + return dumps(self_vars) diff --git a/bigml/deepnet.py b/bigml/deepnet.py index 18f1b05a..dbb45dc9 100644 --- a/bigml/deepnet.py +++ b/bigml/deepnet.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=wrong-import-position,ungrouped-imports # -# Copyright 2017-2019 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -21,7 +21,7 @@ embedded into your application without needing to send requests to BigML.io. -This module cannot only save you a few credits, but also enormously +This module can help you enormously to reduce the latency for each prediction and let you use your models offline. @@ -40,34 +40,43 @@ deepnet.predict({"petal length": 3, "petal width": 1}) """ -import logging +import os +import warnings from functools import cmp_to_key from bigml.api import FINISHED -from bigml.api import get_status, get_api_connection -from bigml.util import cast, PRECISION +from bigml.api import get_status, get_api_connection, get_deepnet_id +from bigml.util import cast, use_cache, load, get_data_transformations, \ + PRECISION, sensenet_logging from bigml.basemodel import get_resource_dict, extract_objective from bigml.modelfields import ModelFields from bigml.laminar.constants import NUMERIC from bigml.model import parse_operating_point, sort_categories +from bigml.constants import REGIONS, REGIONS_OPERATION_SETTINGS, \ + DEFAULT_OPERATION_SETTINGS, REGION_SCORE_ALIAS, REGION_SCORE_THRESHOLD, \ + IMAGE, DECIMALS, IOU_REMOTE_SETTINGS -try: - import numpy - import scipy - import bigml.laminar.numpy_ops as net - import bigml.laminar.preprocess_np as pp -except ImportError: - import bigml.laminar.math_ops as net - import bigml.laminar.preprocess as pp +import bigml.laminar.numpy_ops as net +import bigml.laminar.preprocess_np as pp +try: + sensenet_logging() + from sensenet.models.wrappers import create_model + from bigml.images.utils import to_relative_coordinates + LAMINAR_VERSION = False +except Exception: + LAMINAR_VERSION = True -LOGGER = logging.getLogger('BigML') MEAN = "mean" STANDARD_DEVIATION = "stdev" + def moments(amap): + """Extracts mean and stdev + + """ return amap[MEAN], amap[STANDARD_DEVIATION] @@ -90,14 +99,38 @@ class Deepnet(ModelFields): """ - def __init__(self, deepnet, api=None): + def __init__(self, deepnet, api=None, cache_get=None, + operation_settings=None): """The Deepnet constructor can be given as first argument: - a deepnet structure - a deepnet id - a path to a JSON file containing a deepnet structure + :param deepnet: The deepnet info or reference + :param api: Connection object that will be used to download the deepnet + info if not locally available + :param cache_get: Get function that handles memory-cached objects + :param operation_settings: Dict object that contains operating options + + The operation_settings will depend on the type of ML problem: + - regressions: no operation_settings allowed + - classifications: operating_point, operating_kind + - regions: bounding_box_threshold, iou_threshold and max_objects """ + + self.using_laminar = LAMINAR_VERSION + + if use_cache(cache_get): + # using a cache to store the model attributes + self.__dict__ = load(get_deepnet_id(deepnet), cache_get) + self.operation_settings = self._add_operation_settings( + operation_settings) + return + self.resource_id = None + self.name = None + self.description = None + self.parent_id = None self.regression = False self.network = None self.networks = None @@ -105,45 +138,95 @@ def __init__(self, deepnet, api=None): self.class_names = [] self.preprocess = [] self.optimizer = None + self.default_numeric_value = None self.missing_numerics = False - self.api = get_api_connection(api) + api = get_api_connection(api) self.resource_id, deepnet = get_resource_dict( \ - deepnet, "deepnet", api=self.api) + deepnet, "deepnet", api=api) if 'object' in deepnet and isinstance(deepnet['object'], dict): deepnet = deepnet['object'] - self.input_fields = deepnet['input_fields'] + try: + self.parent_id = deepnet.get('dataset') + self.name = deepnet.get('name') + self.description = deepnet.get('description') + self.input_fields = deepnet['input_fields'] + self.default_numeric_value = deepnet.get('default_numeric_value') + except (AttributeError, KeyError): + raise ValueError("Failed to find the expected " + "JSON structure. Check your arguments.") if 'deepnet' in deepnet and isinstance(deepnet['deepnet'], dict): status = get_status(deepnet) objective_field = deepnet['objective_fields'] - deepnet = deepnet['deepnet'] + deepnet_info = deepnet['deepnet'] if 'code' in status and status['code'] == FINISHED: - self.fields = deepnet['fields'] - missing_tokens = deepnet.get('missing_tokens') + self.fields = deepnet_info['fields'] + missing_tokens = deepnet_info.get('missing_tokens') ModelFields.__init__( self, self.fields, objective_id=extract_objective(objective_field), - terms=True, categories=True, missing_tokens=missing_tokens) + categories=True, missing_tokens=missing_tokens) self.regression = \ self.fields[self.objective_id]['optype'] == NUMERIC - if not self.regression: - self.class_names = [category for category, _ in \ - self.fields[self.objective_id][ \ - 'summary']['categories']] - self.class_names.sort() + self.regions = \ + self.fields[self.objective_id]['optype'] == REGIONS + if not self.regression and not self.regions: # order matters - self.objective_categories = [category for \ - category, _ in self.fields[self.objective_id][ \ - "summary"]["categories"]] - - self.missing_numerics = deepnet.get('missing_numerics', False) - if 'network' in deepnet: - network = deepnet['network'] + self.objective_categories = self.categories[ + self.objective_id] + self.class_names = sorted(self.objective_categories) + + self.missing_numerics = deepnet_info.get('missing_numerics', + False) + self.operation_settings = self._add_operation_settings( + operation_settings) + if 'network' in deepnet_info: + network = deepnet_info['network'] self.network = network self.networks = network.get('networks', []) + # old deepnets might use the latter option + if self.networks: + self.output_exposition = self.networks[0].get( + "output_exposition") + else: + self.output_exposition = None + self.output_exposition = self.network.get( + "output_exposition", self.output_exposition) self.preprocess = network.get('preprocess') self.optimizer = network.get('optimizer', {}) + + if self.regions: + settings = self.operation_settings or {} + settings.update(IOU_REMOTE_SETTINGS) + else: + settings = None + + #pylint: disable=locally-disabled,broad-except + if not self.using_laminar: + try: + self.deepnet = create_model(deepnet, + settings=settings) + except Exception: + # Windows systems can fail to have some libraries + # required to predict complex deepnets with inner + # tree layers. In this case, we revert to the old + # library version iff possible. + self.using_laminar = True + + if self.using_laminar: + if self.regions: + raise ValueError("Failed to find the extra libraries" + " that are compulsory for predicting " + "regions. Please, install them by " + "running \n" + "pip install bigml[images]") + for _, field in self.fields.items(): + if field["optype"] == IMAGE: + raise ValueError("This deepnet cannot be predicted" + " as some required libraries are " + "not available for this OS.") + self.deepnet = None else: raise Exception("The deepnet isn't finished yet") else: @@ -151,6 +234,25 @@ def __init__(self, deepnet, api=None): " find the 'deepnet' key in the resource:\n\n%s" % deepnet) + def _add_operation_settings(self, operation_settings): + """Checks and adds the user-given operation settings """ + if operation_settings is None: + return None + if self.regression: + raise ValueError("No operating settings are allowed" + " for regressions") + allowed_settings = REGIONS_OPERATION_SETTINGS if \ + self.regions else DEFAULT_OPERATION_SETTINGS + settings = {setting: operation_settings[setting] for + setting in operation_settings.keys() if setting in + allowed_settings + } + if REGION_SCORE_ALIAS in settings: + settings[REGION_SCORE_THRESHOLD] = settings[ + REGION_SCORE_ALIAS] + del settings[REGION_SCORE_ALIAS] + return settings + def fill_array(self, input_data, unique_terms): """ Filling the input array for the network with the data in the input_data dictionary. Numeric missings are added as a new field @@ -174,7 +276,10 @@ def fill_array(self, input_data, unique_terms): category = unique_terms.get(field_id) if category is not None: category = category[0][0] - columns.append([category]) + if self.using_laminar: + columns.append([category]) + else: + columns.append(category) else: # when missing_numerics is True and the field had missings # in the training data, then we add a new "is missing?" element @@ -182,14 +287,16 @@ def fill_array(self, input_data, unique_terms): # missing or not in the input data if self.missing_numerics \ and self.fields[field_id][\ - "summary"]["missing_count"] > 0: + "summary"].get("missing_count", 0) > 0: if field_id in input_data: columns.extend([input_data[field_id], 0.0]) else: columns.extend([0.0, 1.0]) else: columns.append(input_data.get(field_id)) - return pp.preprocess(columns, self.preprocess) + if self.using_laminar: + return pp.preprocess(columns, self.preprocess) + return columns def predict(self, input_data, operating_point=None, operating_kind=None, full=False): @@ -223,45 +330,64 @@ def predict(self, input_data, operating_point=None, operating_kind=None, # Checks and cleans input_data leaving the fields used in the model unused_fields = [] - new_data = self.filter_input_data( \ + + if self.regions: + # Only a single image file is allowed as input. + # Sensenet predictions are using absolute coordinates, so we need + # to change it to relative and set the decimal precision + prediction = to_relative_coordinates(input_data, + self.deepnet(input_data)) + return {"prediction": prediction} + + norm_input_data = self.filter_input_data( \ input_data, add_unused_fields=full) if full: - input_data, unused_fields = new_data - else: - input_data = new_data + norm_input_data, unused_fields = norm_input_data # Strips affixes for numeric values and casts to the final field type - cast(input_data, self.fields) + cast(norm_input_data, self.fields) # When operating_point is used, we need the probabilities # of all possible classes to decide, so se use # the `predict_probability` method + if operating_point is None and self.operation_settings is not None: + operating_point = self.operation_settings.get("operating_point") + if operating_kind is None and self.operation_settings is not None: + operating_kind = self.operation_settings.get("operating_kind") + if operating_point: if self.regression: raise ValueError("The operating_point argument can only be" " used in classifications.") return self.predict_operating( \ - input_data, operating_point=operating_point) + norm_input_data, operating_point=operating_point) if operating_kind: if self.regression: raise ValueError("The operating_point argument can only be" " used in classifications.") return self.predict_operating_kind( \ - input_data, operating_kind=operating_kind) + norm_input_data, operating_kind=operating_kind) # Computes text and categorical field expansion - unique_terms = self.get_unique_terms(input_data) - - input_array = self.fill_array(input_data, unique_terms) - - if self.networks: - prediction = self.predict_list(input_array) + unique_terms = self.get_unique_terms(norm_input_data) + input_array = self.fill_array(norm_input_data, unique_terms) + if self.deepnet is not None: + prediction = list(self.deepnet(input_array)[0]) + # prediction is now a numpy array of probabilities for classification + # and a numpy array with the value for regressions + prediction = self.to_prediction(prediction) else: - prediction = self.predict_single(input_array) + # no tensorflow + if self.networks: + prediction = self.predict_list(input_array) + else: + prediction = self.predict_single(input_array) if full: if not isinstance(prediction, dict): - prediction = {"prediction": prediction} + prediction = {"prediction": round(prediction, DECIMALS)} prediction.update({"unused_fields": unused_fields}) + if "probability" in prediction: + prediction["confidence"] = prediction.get("probability") else: if isinstance(prediction, dict): prediction = prediction["prediction"] @@ -270,7 +396,6 @@ def predict(self, input_data, operating_point=None, operating_kind=None, def predict_single(self, input_array): """Makes a prediction with a single network - """ if self.network['trees'] is not None: input_array = pp.tree_transform(input_array, self.network['trees']) @@ -279,6 +404,8 @@ def predict_single(self, input_array): self.network)) def predict_list(self, input_array): + """Makes predictions with a list of networks + """ if self.network['trees'] is not None: input_array_trees = pp.tree_transform(input_array, self.network['trees']) @@ -296,11 +423,10 @@ def model_predict(self, input_array, model): """Prediction with one model """ - layers = net.init_layers(model['layers']) y_out = net.propagate(input_array, layers) if self.regression: - y_mean, y_stdev = moments(model['output_exposition']) + y_mean, y_stdev = moments(self.output_exposition) y_out = net.destandardize(y_out, y_mean, y_stdev) return y_out[0][0] @@ -311,12 +437,16 @@ def to_prediction(self, y_out): """ if self.regression: + if not self.using_laminar: + y_out = y_out[0] return float(y_out) - prediction = sorted(enumerate(y_out[0]), key=lambda x: -x[1])[0] + if self.using_laminar: + y_out = y_out[0] + prediction = sorted(enumerate(y_out), key=lambda x: -x[1])[0] prediction = {"prediction": self.class_names[prediction[0]], "probability": round(prediction[1], PRECISION), "distribution": [{"category": category, - "probability": round(y_out[0][i], + "probability": round(y_out[i], PRECISION)} \ for i, category in enumerate(self.class_names)]} @@ -325,7 +455,8 @@ def to_prediction(self, y_out): def predict_probability(self, input_data, compact=False): """Predicts a probability for each possible output class, based on input values. The input fields must be a dictionary - keyed by field name or field ID. + keyed by field name or field ID. This method is not available for + regions objectives :param input_data: Input data to be predicted :param compact: If False, prediction is returned as a list of maps, one @@ -334,21 +465,32 @@ def predict_probability(self, input_data, compact=False): respectively. If True, returns a list of probabilities ordered by the sorted order of the class names. """ + if self.regions: + raise ValueError("The .predict_probability method cannot be used" + " to predict regions.") if self.regression: prediction = self.predict(input_data, full=not compact) if compact: return [prediction] - else: - return prediction - else: - distribution = self.predict(input_data, full=True)['distribution'] - distribution.sort(key=lambda x: x['category']) + return prediction + distribution = self.predict(input_data, full=True)['distribution'] + distribution.sort(key=lambda x: x['category']) - if compact: - return [category['probability'] for category in distribution] - else: - return distribution + if compact: + return [category['probability'] for category in distribution] + return distribution + def predict_confidence(self, input_data, compact=False): + """Uses probability as a confidence + """ + if compact or self.regression: + return self.predict_probability(input_data, compact=compact) + return [{"category": pred["category"], + "confidence": pred["probability"]} + for pred in self.predict_probability(input_data, + compact=compact)] + + #pylint: disable=locally-disabled,invalid-name def _sort_predictions(self, a, b, criteria): """Sorts the categories in the predicted node according to the given criteria @@ -375,6 +517,8 @@ def predict_operating_kind(self, input_data, operating_kind=None): prediction = predictions[0] prediction["prediction"] = prediction["category"] del prediction["category"] + if "probability" in prediction: + prediction["confidence"] = prediction.get("probability") return prediction def predict_operating(self, input_data, operating_point=None): @@ -383,7 +527,8 @@ def predict_operating(self, input_data, operating_point=None): """ kind, threshold, positive_class = parse_operating_point( \ - operating_point, ["probability"], self.class_names) + operating_point, ["probability"], self.class_names, + self.operation_settings) predictions = self.predict_probability(input_data, False) position = self.class_names.index(positive_class) if predictions[position][kind] > threshold: @@ -401,4 +546,13 @@ def predict_operating(self, input_data, operating_point=None): prediction = prediction[0] prediction["prediction"] = prediction["category"] del prediction["category"] + if "probability" in prediction: + prediction["confidence"] = prediction.get("probability") return prediction + + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + Avoiding to set it in a Mixin to maintain the current dump function. + """ + return get_data_transformations(self.resource_id, self.parent_id) diff --git a/bigml/domain.py b/bigml/domain.py index 865122e8..81a26ebc 100644 --- a/bigml/domain.py +++ b/bigml/domain.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -23,10 +22,14 @@ # Default domain and protocol DEFAULT_DOMAIN = 'bigml.io' DEFAULT_PROTOCOL = 'https' +DEFAULT_API_VERSION = 'andromeda' # Base Domain BIGML_DOMAIN = os.environ.get('BIGML_DOMAIN', DEFAULT_DOMAIN) +# Default API version +BIGML_API_VERSION = os.environ.get('BIGML_API_VERSION', DEFAULT_API_VERSION) + # Protocol for main server BIGML_PROTOCOL = os.environ.get('BIGML_PROTOCOL', DEFAULT_PROTOCOL) @@ -46,7 +49,7 @@ BIGML_PREDICTION_SSL_VERIFY = os.environ.get('BIGML_PREDICTION_SSL_VERIFY') -class Domain(object): +class Domain(): """A Domain object to store the remote domain information for the API The domain that serves the remote resources can be set globally for @@ -74,7 +77,7 @@ class Domain(object): def __init__(self, domain=None, prediction_domain=None, prediction_protocol=None, protocol=None, verify=None, - prediction_verify=None): + prediction_verify=None, api_version=None): """Domain object constructor. @param: domain string Domain name @@ -88,16 +91,20 @@ def __init__(self, domain=None, prediction_domain=None, @param: prediction_verify boolean Sets on/off the SSL verification for the prediction server (when different from the general SSL verification) - + @param: api_version string Name of the API version """ # Base domain for remote resources - self.general_domain = domain or BIGML_DOMAIN - self.general_protocol = protocol or BIGML_PROTOCOL + self.general_domain = domain if domain is not None else BIGML_DOMAIN + self.general_protocol = protocol if protocol is not None else \ + BIGML_PROTOCOL + self.api_version = api_version if api_version is not None else \ + BIGML_API_VERSION # Usually, predictions are served from the same domain if prediction_domain is None: if domain is not None: self.prediction_domain = domain - self.prediction_protocol = protocol or BIGML_PROTOCOL + self.prediction_protocol = protocol if protocol is not None \ + else BIGML_PROTOCOL else: self.prediction_domain = BIGML_PREDICTION_DOMAIN self.prediction_protocol = BIGML_PREDICTION_PROTOCOL @@ -105,7 +112,8 @@ def __init__(self, domain=None, prediction_domain=None, # for instance in high-availability prediction servers else: self.prediction_domain = prediction_domain - self.prediction_protocol = prediction_protocol or \ + self.prediction_protocol = prediction_protocol if \ + prediction_protocol is not None else \ BIGML_PREDICTION_PROTOCOL # Check SSL when comming from `bigml.io` subdomains or when forced @@ -123,13 +131,15 @@ def __init__(self, domain=None, prediction_domain=None, if self.verify is None: self.verify = self.general_domain.lower().endswith(DEFAULT_DOMAIN) if self.prediction_protocol == BIGML_PROTOCOL and \ - (prediction_verify or BIGML_PREDICTION_SSL_VERIFY is not None): + (prediction_verify is not None or \ + BIGML_PREDICTION_SSL_VERIFY is not None): try: self.verify_prediction = prediction_verify \ if prediction_verify is not None else \ bool(int(BIGML_PREDICTION_SSL_VERIFY)) except ValueError: pass + if self.verify_prediction is None: self.verify_prediction = ( (self.prediction_domain.lower().endswith(DEFAULT_DOMAIN) and diff --git a/bigml/ensemble.py b/bigml/ensemble.py index 760cce71..94c96a77 100644 --- a/bigml/ensemble.py +++ b/bigml/ensemble.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2012-2019 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -20,7 +19,7 @@ This module defines an Ensemble to make predictions locally using its associated models. -This module can not only save you a few credits, but also enormously +This module can help you enormously to reduce the latency for each prediction and let you use your models offline. @@ -45,34 +44,30 @@ import os from functools import cmp_to_key +from copy import deepcopy -from bigml.api import BigML, get_ensemble_id, get_model_id, get_api_connection -from bigml.model import Model, print_distribution, \ - parse_operating_point, sort_categories +from bigml.exceptions import NoRootDecisionTree +from bigml.api import get_ensemble_id, get_model_id, get_api_connection +from bigml.model import Model, parse_operating_point, sort_categories +from bigml.generators.model import print_distribution from bigml.basemodel import retrieve_resource, ONLY_MODEL, EXCLUDE_FIELDS from bigml.model import LAST_PREDICTION from bigml.multivote import MultiVote from bigml.multivote import PLURALITY_CODE, PROBABILITY_CODE, CONFIDENCE_CODE from bigml.multimodel import MultiModel -from bigml.basemodel import BaseModel, print_importance -from bigml.modelfields import ModelFields, lacks_info +from bigml.basemodel import BaseModel, print_importance, check_local_but_fields +from bigml.modelfields import ModelFields, NUMERIC from bigml.multivotelist import MultiVoteList -from bigml.util import cast - +from bigml.tree_utils import add_distribution +from bigml.util import cast, use_cache, load, dump, dumps, \ + get_data_transformations +from bigml.constants import DECIMALS BOOSTING = 1 LOGGER = logging.getLogger('BigML') OPERATING_POINT_KINDS = ["probability", "confidence", "votes"] -def use_cache(cache_get): - """Checks whether the user has provided a cache get function to retrieve - local models. - - """ - return cache_get is not None and hasattr(cache_get, '__call__') - - def boosted_list_error(boosting): """The local ensemble cannot be built from a list of boosted models @@ -90,71 +85,145 @@ class Ensemble(ModelFields): that can be used to generate predictions locally. The expected arguments are: - ensemble: ensemble object or id, list of model objects or - ids or list of local model objects (see Model) - api: connection object. If None, a new connection object is - instantiated. - max_models: integer that limits the number of models instantiated and - held in memory at the same time while predicting. If None, - no limit is set and all the ensemble models are - instantiated and held in memory permanently. - cache_get: user-provided function that should return the JSON - information describing the model or the corresponding - Model object. Can be used to read these objects from a - cache storage. """ - def __init__(self, ensemble, - api=None, - max_models=None, - cache_get=None): + #pylint: disable=locally-disabled,broad-except,access-member-before-definition + def __init__(self, ensemble, api=None, max_models=None, cache_get=None, + operation_settings=None): + """ + :param ensemble: ensemble object or id, list of ensemble model + objects or ids or list of ensemble obj and local model + objects (see Model) + :param api: connection object. If None, a new connection object is + instantiated. + :param max_models: integer that limits the number of models instantiated + and held in memory at the same time while predicting. + If None, no limit is set and all the ensemble models + are instantiated and held in memory permanently. + :param cache_get: user-provided function that should return the JSON + information describing the model or the corresponding + Ensemble object. Can be used to read these objects + from a cache storage. + :param operation_settings: Dict object that contains operating options + """ + self.model_splits = [] + self.multi_model = None + self.api = get_api_connection(api) + self.fields = None + self.class_names = None + self.default_numeric_value = None + if use_cache(cache_get): + # using a cache to store the model attributes + self.__dict__ = load(get_ensemble_id(ensemble), cache_get) + self.api = get_api_connection(api) + self.operation_settings = self._add_operation_settings( + operation_settings) + if len(self.models_splits) == 1: + # retrieve the models from a cache get function + try: + models = [Model(model_id, cache_get=cache_get, + operation_settings=operation_settings) + for model_id + in self.models_splits[0]] + except Exception as exc: + raise Exception('Error while calling the user-given' + ' function %s: %s' % + (cache_get.__name__, str(exc))) + self.multi_model = MultiModel( + models, + self.api, + fields=self.fields, + class_names=self.class_names, + cache_get=cache_get, + operation_settings=operation_settings) + return self.resource_id = None + self.name = None + self.description = None + self.parent_id = None self.objective_id = None self.distributions = None self.distribution = None - self.models_splits = [] - self.multi_model = None self.boosting = None self.boosting_offsets = None self.cache_get = None self.regression = False - self.fields = None - self.class_names = None self.importance = {} query_string = ONLY_MODEL no_check_fields = False - self.api = get_api_connection(api) self.input_fields = [] + child_api = self.api + + + models = [] if isinstance(ensemble, list): - if all([isinstance(model, Model) for model in ensemble]): + try: + if isinstance(ensemble[0], dict) and \ + get_ensemble_id(ensemble[0]): + number_of_models = len(ensemble) - 1 + model_list = ensemble + ensemble = model_list[0] + if len(ensemble["object"]["models"]) == number_of_models: + model_list = model_list[1:] + else: + raise ValueError("The provided list of models does not" + " match the ensemble list of models.") + try: + models = [Model( + model, operation_settings=operation_settings) + for model in model_list] + except Exception: + models = model_list + else: + # only list of models (old ensembles) + models = ensemble + ensemble=None + except ValueError: + # only list of models (old ensembles) models = ensemble - self.model_ids = [local_model.resource_id for local_model in - models] - else: - try: - models = [get_model_id(model) for model in ensemble] - self.model_ids = models - except ValueError, exc: - raise ValueError('Failed to verify the list of models.' - ' Check your model id values: %s' % - str(exc)) - - else: + ensemble = None + if models: + if all(isinstance(model, Model) for model in models): + self.model_ids = [local_model.resource_id for local_model in + models] + else: + try: + models = [get_model_id(model) for model in models] + self.model_ids = models + except ValueError as exc: + raise ValueError('Failed to verify the list of models.' + ' Check your model id values: %s' % + str(exc)) + if ensemble: ensemble = self.get_ensemble_resource(ensemble) self.resource_id = get_ensemble_id(ensemble) - - if lacks_info(ensemble, inner_key="ensemble"): + shared_ref = self.resource_id.replace("shared/", "") if \ + self.resource_id.startswith("shared/") else None + if shared_ref is not None: + child_api = deepcopy(self.api) + child_api.shared_ref = shared_ref + elif hasattr(self.api, "shared_ref") and \ + self.api.shared_ref is not None: + child_api = deepcopy(self.api) + # adding the resource ID to the sharing chain + child_api.shared_ref += ",%s" % self.resource_id + + if not check_local_but_fields(ensemble): # avoid checking fields because of old ensembles ensemble = retrieve_resource(self.api, self.resource_id, no_check_fields=True) + self.parent_id = ensemble.get('object', {}).get('dataset') + self.name = ensemble.get('object', {}).get('name') + self.description = ensemble.get('object', {}).get('description') if ensemble['object'].get('type') == BOOSTING: self.boosting = ensemble['object'].get('boosting') - models = ensemble['object']['models'] self.distributions = ensemble['object'].get('distributions', []) self.importance = ensemble['object'].get('importance', []) - self.model_ids = models + self.model_ids = ensemble['object']['models'] + if not models: + models = self.model_ids # new ensembles have the fields structure if ensemble['object'].get('ensemble'): self.fields = ensemble['object'].get( \ @@ -163,6 +232,7 @@ def __init__(self, ensemble, query_string = EXCLUDE_FIELDS no_check_fields = True self.input_fields = ensemble['object'].get('input_fields') + self.default_numeric_value = ensemble.get('default_numeric_value') number_of_models = len(models) if max_models is None: @@ -175,16 +245,18 @@ def __init__(self, ensemble, if use_cache(cache_get): # retrieve the models from a cache get function try: - models = [cache_get(model_id) for model_id + models = [Model(model_id, cache_get=cache_get, + operation_settings=operation_settings) + for model_id in self.models_splits[0]] self.cache_get = cache_get - except Exception, exc: + except Exception as exc: raise Exception('Error while calling the user-given' ' function %s: %s' % (cache_get.__name__, str(exc))) else: models = [retrieve_resource( \ - self.api, + child_api, model_id, query_string=query_string, no_check_fields=no_check_fields) @@ -198,15 +270,17 @@ def __init__(self, ensemble, if use_cache(cache_get): # retrieve the models from a cache get function try: - model = cache_get(self.models_splits[0][0]) + model = Model(self.models_splits[0][0], + cache_get=cache_get, + operation_settings=operation_settings) self.cache_get = cache_get - except Exception, exc: + except Exception as exc: raise Exception('Error while calling the user-given' ' function %s: %s' % (cache_get.__name__, str(exc))) else: model = retrieve_resource( \ - self.api, + child_api, self.models_splits[0][0], query_string=query_string, no_check_fields=no_check_fields) @@ -218,7 +292,7 @@ def __init__(self, ensemble, self.distributions = [] for model in models: self.distributions.append({ - 'training': {'categories': model.tree.distribution} + 'training': model.root_distribution }) except AttributeError: self.distributions = [model['object']['model']['distribution'] @@ -232,19 +306,9 @@ def __init__(self, ensemble, max_models=max_models) if self.fields: - summary = self.fields[self.objective_id]['summary'] - if 'bins' in summary: - distribution = summary['bins'] - elif 'counts' in summary: - distribution = summary['counts'] - elif 'categories' in summary: - distribution = summary['categories'] - else: - distribution = [] - self.distribution = distribution - + add_distribution(self) self.regression = \ - self.fields[self.objective_id].get('optype') == 'numeric' + self.fields[self.objective_id].get('optype') == NUMERIC if self.boosting: self.boosting_offsets = ensemble['object'].get('initial_offset', 0) \ @@ -269,11 +333,16 @@ def __init__(self, ensemble, ModelFields.__init__( \ self, self.fields, objective_id=self.objective_id) + if len(self.models_splits) == 1: - self.multi_model = MultiModel(models, - self.api, - fields=self.fields, - class_names=self.class_names) + self.multi_model = MultiModel( + models, + self.api, + fields=self.fields, + class_names=self.class_names, + operation_settings=operation_settings) + for index, model in enumerate(self.multi_model.models): + self.multi_model.models[index].term_forms = self.term_forms def _add_models_attrs(self, model, max_models=None): """ Adds the boosting and fields info when the ensemble is built from @@ -300,7 +369,7 @@ def get_ensemble_resource(self, ensemble): - an ensemble id """ # the string can be a path to a JSON file - if isinstance(ensemble, basestring): + if isinstance(ensemble, str): try: path = os.path.dirname(os.path.abspath(ensemble)) with open(ensemble) as ensemble_file: @@ -310,8 +379,7 @@ def get_ensemble_resource(self, ensemble): raise ValueError("The JSON file does not seem" " to contain a valid BigML ensemble" " representation.") - else: - self.api = BigML(storage=path) + self.api.storage = path except IOError: # if it is not a path, it can be an ensemble id self.resource_id = get_ensemble_id(ensemble) @@ -321,9 +389,8 @@ def get_ensemble_resource(self, ensemble): self.api.error_message(ensemble, resource_type='ensemble', method='get')) - else: - raise IOError("Failed to open the expected JSON file" - " at %s" % ensemble) + raise IOError("Failed to open the expected JSON file" + " at %s" % ensemble) except ValueError: raise ValueError("Failed to interpret %s." " JSON file expected.") @@ -384,7 +451,7 @@ def predict_probability(self, input_data, missing_strategy) if not compact: - names_probabilities = zip(self.class_names, output) + names_probabilities = list(zip(self.class_names, output)) output = [{'category': class_name, 'probability': probability} for class_name, probability in names_probabilities] @@ -432,7 +499,7 @@ def predict_confidence(self, input_data, missing_strategy, method=CONFIDENCE_CODE) if not compact: - names_confidences = zip(self.class_names, output) + names_confidences = list(zip(self.class_names, output)) output = [{'category': class_name, 'confidence': confidence} for class_name, confidence in names_confidences] @@ -478,7 +545,7 @@ def predict_votes(self, input_data, missing_strategy, method=PLURALITY_CODE) if not compact: - names_votes = zip(self.class_names, output) + names_votes = list(zip(self.class_names, output)) output = [{'category': class_name, 'votes': k} for class_name, k in names_votes] @@ -504,6 +571,8 @@ def _combine_distributions(self, input_data, missing_strategy, api=self.api, fields=self.fields, class_names=self.class_names) + for index, _ in enumerate(multi_model.models): + multi_model.models[index].term_forms = self.term_forms votes_split = multi_model.generate_votes_distribution( \ input_data, @@ -527,7 +596,7 @@ def _get_models(self, models_split): try: models = [self.cache_get(model_id) for model_id in models_split] - except Exception, exc: + except Exception as exc: raise Exception('Error while calling the ' 'user-given' ' function %s: %s' % @@ -540,6 +609,7 @@ def _get_models(self, models_split): return models + #pylint: disable=locally-disabled,invalid-name def _sort_predictions(self, a, b, criteria): """Sorts the categories in the predicted node according to the given criteria @@ -556,7 +626,8 @@ def predict_operating(self, input_data, """ kind, threshold, positive_class = parse_operating_point( \ - operating_point, OPERATING_POINT_KINDS, self.class_names) + operating_point, OPERATING_POINT_KINDS, + self.class_names, self.operation_settings) try: predict_method = None @@ -627,6 +698,7 @@ def predict_operating_kind(self, input_data, del prediction["category"] return prediction + #pylint: disable=locally-disabled,protected-access def predict(self, input_data, method=None, options=None, missing_strategy=LAST_PREDICTION, operating_point=None, operating_kind=None, median=False, @@ -689,21 +761,23 @@ def predict(self, input_data, method=None, """ # Checks and cleans input_data leaving the fields used in the model - new_data = self.filter_input_data( \ + norm_input_data = self.filter_input_data( \ input_data, add_unused_fields=full) unused_fields = None if full: - input_data, unused_fields = new_data - else: - input_data = new_data + norm_input_data, unused_fields = norm_input_data # Strips affixes for numeric values and casts to the final field type - cast(input_data, self.fields) + cast(norm_input_data, self.fields) if median and method is None: # predictions with median are only available with old combiners method = PLURALITY_CODE + if operating_point is None and self.operation_settings is not None: + operating_point = self.operation_settings.get("operating_point") + if operating_kind is None and self.operation_settings is not None: + operating_kind = self.operation_settings.get("operating_kind") if method is None and operating_point is None and \ operating_kind is None and not median: @@ -716,13 +790,12 @@ def predict(self, input_data, method=None, raise ValueError("The operating_point argument can only be" " used in classifications.") prediction = self.predict_operating( \ - input_data, + norm_input_data, missing_strategy=missing_strategy, operating_point=operating_point) if full: return prediction - else: - return prediction["prediction"] + return prediction["prediction"] if operating_kind: if self.regression: @@ -730,15 +803,14 @@ def predict(self, input_data, method=None, # combiners method = 1 if operating_kind == "confidence" else 0 return self.predict( \ - input_data, method=method, + norm_input_data, method=method, options=options, missing_strategy=missing_strategy, operating_point=None, operating_kind=None, full=full) - else: - prediction = self.predict_operating_kind( \ - input_data, - missing_strategy=missing_strategy, - operating_kind=operating_kind) - return prediction + prediction = self.predict_operating_kind( \ + norm_input_data, + missing_strategy=missing_strategy, + operating_kind=operating_kind) + return prediction if len(self.models_splits) > 1: # If there's more than one chunk of models, they must be @@ -750,9 +822,11 @@ def predict(self, input_data, method=None, multi_model = MultiModel(models, api=self.api, fields=self.fields) + for index, _ in enumerate(multi_model.models): + multi_model.models[index].term_forms = self.term_forms votes_split = multi_model._generate_votes( - input_data, + norm_input_data, missing_strategy=missing_strategy, unused_fields=unused_fields) if median: @@ -763,7 +837,7 @@ def predict(self, input_data, method=None, # When only one group of models is found you use the # corresponding multimodel to predict votes_split = self.multi_model._generate_votes( - input_data, missing_strategy=missing_strategy, + norm_input_data, missing_strategy=missing_strategy, unused_fields=unused_fields) votes = MultiVote(votes_split.predictions, @@ -771,6 +845,7 @@ def predict(self, input_data, method=None, if median: for prediction in votes.predictions: prediction['prediction'] = prediction['median'] + if self.boosting is not None and not self.regression: categories = [ \ d[0] for d in @@ -778,12 +853,14 @@ def predict(self, input_data, method=None, options = {"categories": categories} result = votes.combine(method=method, options=options, full=full) if full: - unused_fields = set(input_data.keys()) + unused_fields = set(norm_input_data.keys()) for prediction in votes.predictions: unused_fields = unused_fields.intersection( \ set(prediction.get("unused_fields", []))) if not isinstance(result, dict): - result = {"prediction": result} + result = {"prediction": round(result, DECIMALS)} + if "probability" in result and "confidence" not in result: + result["confidence"] = result["probability"] result['unused_fields'] = list(unused_fields) return result @@ -798,9 +875,9 @@ def field_importance_data(self): if self.importance: field_importance = self.importance field_names = {field_id: {'name': self.fields[field_id]["name"]} \ - for field_id in field_importance.keys()} + for field_id in list(field_importance.keys())} return [list(importance) for importance in \ - sorted(field_importance.items(), key=lambda x: x[1], + sorted(list(field_importance.items()), key=lambda x: x[1], reverse=True)], field_names if (self.distributions is not None and @@ -809,8 +886,7 @@ def field_importance_data(self): # Extracts importance from ensemble information importances = [model_info['importance'] for model_info in self.distributions] - for index in range(0, len(importances)): - model_info = importances[index] + for model_info in importances: for field_info in model_info: field_id = field_info[0] if field_id not in field_importance: @@ -834,7 +910,7 @@ def field_importance_data(self): for field_id in field_importance: field_importance[field_id] /= number_of_models return [list(importance) for importance in \ - sorted(field_importance.items(), key=lambda x: x[1], + sorted(list(field_importance.items()), key=lambda x: x[1], reverse=True)], field_names def print_importance(self, out=sys.stdout): @@ -882,19 +958,19 @@ def summarize(self, out=sys.stdout): distribution = self.get_data_distribution("training") if distribution: - out.write(u"Data distribution:\n") + out.write("Data distribution:\n") print_distribution(distribution, out=out) - out.write(u"\n\n") + out.write("\n\n") if not self.boosting: predictions = self.get_data_distribution("predictions") if predictions: - out.write(u"Predicted distribution:\n") + out.write("Predicted distribution:\n") print_distribution(predictions, out=out) - out.write(u"\n\n") + out.write("\n\n") - out.write(u"Field importance:\n") + out.write("Field importance:\n") self.print_importance(out=out) out.flush() @@ -913,24 +989,58 @@ def all_model_fields(self, max_models=None): models.extend(split) else: models = self.model_ids + for index, model_id in enumerate(models): - if isinstance(model_id, Model): - local_model = model_id - elif self.cache_get is not None: - local_model = self.cache_get(model_id) - else: - local_model = Model(model_id, self.api) - if (max_models is not None and index > 0 and - index % max_models == 0): - gc.collect() - fields.update(local_model.fields) - if (objective_id is not None and - objective_id != local_model.objective_id): - # the models' objective field have different ids, no global id - no_objective_id = True - else: - objective_id = local_model.objective_id + try: + if isinstance(model_id, Model): + local_model = model_id + elif self.cache_get is not None: + local_model = self.cache_get(model_id) + else: + local_model = Model(model_id, self.api) + if (max_models is not None and index > 0 and + index % max_models == 0): + gc.collect() + fields.update(local_model.fields) + if (objective_id is not None and + objective_id != local_model.objective_id): + # the models' objective field have different ids, no global id + no_objective_id = True + else: + objective_id = local_model.objective_id + except NoRootDecisionTree: + pass if no_objective_id: objective_id = None gc.collect() return fields, objective_id + + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + Avoiding to set it in a Mixin to maintain the current dump function. + """ + return get_data_transformations(self.resource_id, self.parent_id) + + def dump(self, output=None, cache_set=None): + """Uses msgpack to serialize the resource object + If cache_set is filled with a cache set method, the method is called + + """ + self_vars = vars(self).copy() + del self_vars["api"] + if "multi_model" in self_vars: + for model in self_vars["multi_model"].models: + model.dump(output=output, cache_set=cache_set) + del self_vars["multi_model"] + dump(self_vars, output=output, cache_set=cache_set) + + def dumps(self): + """Uses msgpack to serialize the resource object to a string + + """ + self_vars = vars(self).copy() + del self_vars["api"] + if "multi_model" in self_vars: + del self_vars["multi_model"] + dumps(self_vars) diff --git a/bigml/ensemblepredictor.py b/bigml/ensemblepredictor.py index f4db2711..cab2fbdd 100644 --- a/bigml/ensemblepredictor.py +++ b/bigml/ensemblepredictor.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2017-2019 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -33,22 +32,24 @@ import logging import json -from bigml.api import BigML, get_ensemble_id -from bigml.model import print_distribution +from bigml.api import get_ensemble_id, get_api_connection +from bigml.generators.model import print_distribution from bigml.constants import STORAGE from bigml.multivote import MultiVote from bigml.multivote import PLURALITY_CODE -from bigml.basemodel import BaseModel, print_importance, retrieve_resource -from bigml.modelfields import lacks_info -from bigml.out_model.pythonmodel import PythonModel +from bigml.basemodel import BaseModel, print_importance, retrieve_resource, \ + check_local_info +from bigml.model import Model from bigml.flattree import FlatTree +from bigml.util import NUMERIC +from bigml.tree_utils import add_distribution BOOSTING = 1 LOGGER = logging.getLogger('BigML') -class EnsemblePredictor(object): +class EnsemblePredictor(): """A local predictive Ensemble. Uses a number of BigML models to build an ensemble local version @@ -68,10 +69,6 @@ class EnsemblePredictor(object): def __init__(self, ensemble, model_fns_dir, api=None): - if api is None: - self.api = BigML(storage=STORAGE) - else: - self.api = api self.resource_id = None # to be deprecated self.ensemble_id = None @@ -87,12 +84,13 @@ def __init__(self, ensemble, model_fns_dir, api=None): self.class_names = None self.importance = {} self.predict_functions = [] + self.api = get_api_connection(api) ensemble = self.get_ensemble_resource(ensemble) self.resource_id = get_ensemble_id(ensemble) self.ensemble_id = self.resource_id - if lacks_info(ensemble, inner_key="ensemble"): + if not check_local_info(ensemble): # avoid checking fields because of old ensembles ensemble = retrieve_resource(self.api, self.resource_id, no_check_fields=True) @@ -119,19 +117,10 @@ def __init__(self, ensemble, model_fns_dir, api=None): " command.") if self.fields: - summary = self.fields[self.objective_id]['summary'] - if 'bins' in summary: - distribution = summary['bins'] - elif 'counts' in summary: - distribution = summary['counts'] - elif 'categories' in summary: - distribution = summary['categories'] - else: - distribution = [] - self.distribution = distribution + add_distribution(self) self.regression = \ - self.fields[self.objective_id].get('optype') == 'numeric' + self.fields[self.objective_id].get('optype') == NUMERIC if self.boosting: self.boosting_offsets = ensemble['object'].get('initial_offset', 0) \ @@ -181,15 +170,17 @@ def get_ensemble_resource(self, ensemble): - an ensemble id """ # the string can be a path to a JSON file - if isinstance(ensemble, basestring): + if isinstance(ensemble, str): try: with open(ensemble) as ensemble_file: + path = os.path.dirname(ensemble) ensemble = json.load(ensemble_file) self.resource_id = get_ensemble_id(ensemble) if self.resource_id is None: raise ValueError("The JSON file does not seem" " to contain a valid BigML ensemble" " representation.") + self.api.storage = path except IOError: # if it is not a path, it can be an ensemble id self.resource_id = get_ensemble_id(ensemble) @@ -199,9 +190,8 @@ def get_ensemble_resource(self, ensemble): self.api.error_message(ensemble, resource_type='ensemble', method='get')) - else: - raise IOError("Failed to open the expected JSON file" - " at %s" % ensemble) + raise IOError("Failed to open the expected JSON file" + " at %s" % ensemble) except ValueError: raise ValueError("Failed to interpret %s." " JSON file expected.") @@ -261,9 +251,9 @@ def field_importance_data(self): if self.importance: field_importance = self.importance field_names = {field_id: {'name': self.fields[field_id]["name"]} \ - for field_id in field_importance.keys()} + for field_id in list(field_importance.keys())} return [list(importance) for importance in \ - sorted(field_importance.items(), key=lambda x: x[1], + sorted(list(field_importance.items()), key=lambda x: x[1], reverse=True)], field_names if (self.distributions is not None and @@ -272,8 +262,7 @@ def field_importance_data(self): # Extracts importance from ensemble information importances = [model_info['importance'] for model_info in self.distributions] - for index in range(0, len(importances)): - model_info = importances[index] + for model_info in importances: for field_info in model_info: field_id = field_info[0] if field_id not in field_importance: @@ -297,7 +286,7 @@ def field_importance_data(self): for field_id in field_importance: field_importance[field_id] /= number_of_models return [list(importance) for importance in \ - sorted(field_importance.items(), key=lambda x: x[1], + sorted(list(field_importance.items()), key=lambda x: x[1], reverse=True)], field_names def print_importance(self, out=sys.stdout): @@ -345,35 +334,39 @@ def summarize(self, out=sys.stdout): distribution = self.get_data_distribution("training") if distribution: - out.write(u"Data distribution:\n") + out.write("Data distribution:\n") print_distribution(distribution, out=out) - out.write(u"\n\n") + out.write("\n\n") if not self.boosting: predictions = self.get_data_distribution("predictions") if predictions: - out.write(u"Predicted distribution:\n") + out.write("Predicted distribution:\n") print_distribution(predictions, out=out) - out.write(u"\n\n") + out.write("\n\n") - out.write(u"Field importance:\n") + out.write("Field importance:\n") self.print_importance(out=out) out.flush() - def generate_models(self, directory='./storage'): + def generate_models(self, directory=STORAGE): """Generates the functions for the models in the ensemble """ if not os.path.isfile(directory) and not os.path.exists(directory): os.makedirs(directory) - open(os.path.join(directory, "__init__.py"), "w").close() + with open(os.path.join(directory, "__init__.py"), mode='w'): + pass for model_id in self.model_ids: - local_model = PythonModel(model_id, api=self.api, - fields=self.fields) - local_flat_tree = FlatTree(local_model.tree, local_model.boosting) + local_model = Model(model_id, api=self.api, + fields=self.fields) + local_flat_tree = FlatTree(local_model.tree, local_model.offsets, + local_model.fields, + local_model.objective_id, + local_model.boosting) with open(os.path.join(directory, "%s.py" % model_id.replace("/", "_")), "w") \ as handler: local_flat_tree.python(out=handler, - docstring="Model %s" % model_id ) + docstring="Model %s" % model_id) diff --git a/bigml/evaluation.py b/bigml/evaluation.py new file mode 100644 index 00000000..76726589 --- /dev/null +++ b/bigml/evaluation.py @@ -0,0 +1,123 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2023-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A local Evaluation object. + +This module defines a local class to handle the results of an evaluation + +""" +import json + + +from bigml.api import get_api_connection, ID_GETTERS +from bigml.basemodel import retrieve_resource, get_resource_dict + +CLASSIFICATION_METRICS = [ + "accuracy", "precision", "recall", "phi", "phi_coefficient", + "f_measure", "confusion_matrix", "per_class_statistics"] + +REGRESSION_METRICS = ["mean_absolute_error", "mean_squared_error", "r_squared"] + + +class ClassificationEval(): + """A class to store the classification metrics """ + def __init__(self, name, per_class_statistics): + + self.name = name + for statistics in per_class_statistics: + if statistics["class_name"] == name: + break + for metric in CLASSIFICATION_METRICS: + if metric in statistics.keys(): + setattr(self, metric, statistics.get(metric)) + + +class Evaluation(): + """A class to deal with the information in an evaluation result + + """ + def __init__(self, evaluation, api=None): + + self.resource_id = None + self.model_id = None + self.test_dataset_id = None + self.regression = None + self.full = None + self.random = None + self.error = None + self.error_message = None + self.api = get_api_connection(api) + + try: + self.resource_id, evaluation = get_resource_dict( \ + evaluation, "evaluation", self.api, no_check_fields=True) + except ValueError as resource: + try: + evaluation = json.loads(str(resource)) + self.resource_id = evaluation["resource"] + except ValueError: + raise ValueError("The evaluation resource was faulty: \n%s" % \ + resource) + + if 'object' in evaluation and isinstance(evaluation['object'], dict): + evaluation = evaluation['object'] + self.status = evaluation["status"] + self.error = self.status.get("error") + if self.error is not None: + self.error_message = self.status.get("message") + else: + self.model_id = evaluation["model"] + self.test_dataset_id = evaluation["dataset"] + + if 'result' in evaluation and \ + isinstance(evaluation['result'], dict): + self.full = evaluation.get("result", {}).get("model") + self.random = evaluation.get("result", {}).get("random") + self.regression = not self.full.get("confusion_matrix") + if self.regression: + self.add_metrics(self.full, REGRESSION_METRICS) + self.mean = evaluation.get("result", {}).get("mean") + else: + self.add_metrics(self.full, CLASSIFICATION_METRICS) + self.mode = evaluation.get("result", {}).get("mode") + self.classes = evaluation.get("result", {}).get( + "class_names") + else: + raise ValueError("Failed to find the correct evaluation" + " structure.") + if not self.regression: + self.positive_class = ClassificationEval(self.classes[-1], + self.per_class_statistics) + + def add_metrics(self, metrics_info, metrics_list, obj=None): + """Adding the metrics in the `metrics_info` dictionary as attributes + in the object passed as argument. If None is given, the metrics will + be added to the self object. + """ + if obj is None: + obj = self + + for metric in metrics_list: + setattr(obj, metric, metrics_info.get(metric, + metrics_info.get("average_%s" % metric))) + + def set_positive_class(self, positive_class): + """Changing the positive class """ + if positive_class is None or positive_class not in self.classes: + raise ValueError("The possible classes are: %s" % + ", ".join(self.classes)) + self.positive_class = ClassificationEval(positive_class, + self.per_class_statistics) diff --git a/bigml/tests/read_anomaly_steps.py b/bigml/exceptions.py similarity index 59% rename from bigml/tests/read_anomaly_steps.py rename to bigml/exceptions.py index cdece01c..71e965f6 100644 --- a/bigml/tests/read_anomaly_steps.py +++ b/bigml/exceptions.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2014-2019 BigML +# Copyright 2021-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,15 +14,20 @@ # License for the specific language governing permissions and limitations # under the License. -import os -from world import world -from nose.tools import eq_ +"""Declared exceptions. -from bigml.api import HTTP_OK +""" -#@step(r'I get the anomaly detector "(.*)"') -def i_get_the_anomaly(step, anomaly): - resource = world.api.get_anomaly(anomaly) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.anomaly = resource['object'] +class ResourceException(Exception): + """Base class to any exception that arises from a bad structured resource + + """ + + +class NoRootDecisionTree(ResourceException): + """The decision tree structure has no "root" attribute """ + + + +class FaultyResourceError(Exception): + """Exception to be raised when retrieving a Faulty resource """ diff --git a/bigml/execution.py b/bigml/execution.py new file mode 100644 index 00000000..626cd06e --- /dev/null +++ b/bigml/execution.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""An local Execution object. + +This module defines a local class to handle the results of an execution + +""" +import json + + +from bigml.api import get_api_connection, ID_GETTERS +from bigml.basemodel import retrieve_resource + + + +def get_resource_dict(resource, resource_type, api=None): + """Extracting the resource JSON info as a dict from the first argument of + the local object constructors, that can be: + + - the path to a file that contains the JSON + - the ID of the resource + - the resource dict itself + + """ + + get_id = ID_GETTERS[resource_type] + resource_id = None + # the string can be a path to a JSON file + if isinstance(resource, str): + try: + with open(resource) as resource_file: + resource = json.load(resource_file) + resource_id = get_id(resource) + if resource_id is None: + raise ValueError("The JSON file does not seem" + " to contain a valid BigML %s" + " representation." % resource_type) + except IOError: + # if it is not a path, it can be a model id + resource_id = get_id(resource) + if resource_id is None: + if resource.find("%s/" % resource_type) > -1: + raise Exception( + api.error_message(resource, + resource_type=resource_type, + method="get")) + raise IOError("Failed to open the expected JSON file" + " at %s." % resource) + except ValueError: + raise ValueError("Failed to interpret %s." + " JSON file expected." % resource) + + if not (isinstance(resource, dict) and 'resource' in resource and + resource['resource'] is not None): + resource = retrieve_resource(api, resource_id, retries=0) + else: + resource_id = get_id(resource) + + return resource_id, resource + + +class Execution(): + """A class to deal with the information in an execution result + + """ + def __init__(self, execution, api=None): + + self.resource_id = None + self.outputs = None + self.output_types = None + self.output_resources = None + self.result = None + self.status = None + self.source_location = None + self.error = None + self.error_message = None + self.error_location = None + self.call_stack = None + self.api = get_api_connection(api) + + try: + self.resource_id, execution = get_resource_dict( \ + execution, "execution", self.api) + except ValueError as resource: + try: + execution = json.loads(str(resource)) + self.resource_id = execution["resource"] + except ValueError: + raise ValueError("The execution resource was faulty: \n%s" % \ + resource) + + if 'object' in execution and isinstance(execution['object'], dict): + execution = execution['object'] + self.status = execution["status"] + self.error = self.status.get("error") + if self.error is not None: + self.error_message = self.status.get("message") + self.error_location = self.status.get("source_location") + self.call_stack = self.status.get("call_stack") + else: + self.source_location = self.status.get("source_location") + if 'execution' in execution and \ + isinstance(execution['execution'], dict): + execution = execution.get('execution') + self.result = execution.get("result") + self.outputs = dict((output[0], output[1]) \ + for output in execution.get("outputs")) + self.output_types = dict((output[0], output[2]) \ + for output in execution.get("outputs")) + self.output_resources = dict((res["variable"], res["id"]) \ + for res in execution.get("output_resources")) + self.execution = execution diff --git a/bigml/featurizer.py b/bigml/featurizer.py new file mode 100644 index 00000000..0a6d9e33 --- /dev/null +++ b/bigml/featurizer.py @@ -0,0 +1,119 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A Featurizer to generate features for composed fields. + +This module defines a Featurizer class to hold the information associated +to the subfields derived from datetime fields. +It is used for local predictions. + +""" +from bigml_chronos import chronos +from bigml.constants import DATETIME + + +DATE_FNS = { + "day-of-month": lambda x: x.day, + "day-of-week": lambda x: x.weekday() + 1, + "millisecond": lambda x: x.microsecond / 1000} + +IMAGE_PROVENANCE = ["dimensions", "average_pixels", "level_histogram", + "histogram_of_gradients", "pretrained_cnn", "wavelet_subbands"] + +def expand_date(res_object, parent_id, date): + """ Retrieves all the values of the subfields generated from + a parent datetime field + + """ + expanded = {} + timeformats = res_object.fields[parent_id].get('time_formats', {}) + try: + parsed_date = chronos.parse(date, format_names=timeformats) + except ValueError: + return {} + for fid, ftype in list(res_object.subfields[parent_id].items()): + date_fn = DATE_FNS.get(ftype) + if date_fn is not None: + expanded.update({fid: date_fn(parsed_date)}) + else: + expanded.update({fid: getattr(parsed_date, ftype)}) + return expanded + + +class Featurizer: + """A class to generate the components derived from a composed field """ + + def __init__(self, fields, input_fields, selected_fields=None, + preferred_only=True): + self.fields = fields + self.input_fields = input_fields + self.subfields = {} + self.generators = {} + self.preferred_only = preferred_only + self.selected_fields = self.add_subfields( + selected_fields, preferred_only=preferred_only) + + def add_subfields(self, selected_fields=None, preferred_only=True): + """Adding the subfields information in the fields structure and the + generating functions for the subfields values. + """ + # filling preferred fields with preferred input fields + fields = selected_fields or self.fields + + if selected_fields is None: + selected_fields = {} + selected_fields.update({field_id: field for field_id, field \ + in fields.items() if field_id in self.input_fields \ + and (not preferred_only or self.fields[field_id].get( + "preferred", True))}) + + # computing the subfields generated from parsing datetimes + for fid, finfo in list(selected_fields.items()): + + # datetime subfields + if finfo.get('parent_optype', False) == DATETIME: + parent_id = finfo["parent_ids"][0] + subfield = {fid: finfo["datatype"]} + if parent_id in list(self.subfields.keys()): + self.subfields[parent_id].update(subfield) + else: + selected_fields[parent_id] = self.fields[parent_id] + self.subfields[parent_id] = subfield + self.generators.update({parent_id: expand_date}) + elif finfo.get('provenance', False) in IMAGE_PROVENANCE: + raise ValueError("This model uses image-derived fields. " + "Please, use the pip install bigml[images] " + "option to install the libraries required " + "for local predictions in this case.") + + return selected_fields + + def extend_input(self, input_data): + """Computing the values for the generated subfields and adding them + to the original input data. Parent fields will be removed if the + `preferred_only` option is set, as they are not used in models. + However, the `preferred_only` option set to False will keep them, + allowing to be used as generators in other transformations. + """ + extended = {} + for f_id, value in list(input_data.items()): + if f_id in self.subfields: + if not self.preferred_only: + extended[f_id] = value + extended.update(self.generators[f_id](self, f_id, value)) + else: + extended[f_id] = value + return extended diff --git a/bigml/fields.py b/bigml/fields.py index e420b913..41246b62 100644 --- a/bigml/fields.py +++ b/bigml/fields.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=unbalanced-tuple-unpacking # -# Copyright 2012-2019 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -44,29 +44,26 @@ import sys import json import csv +import random +import numpy as np + +try: + from pandas import DataFrame + PANDAS_READY = True +except ImportError: + PANDAS_READY = False from bigml.util import invert_dictionary, python_map_type, find_locale from bigml.util import DEFAULT_LOCALE -from bigml.api import get_resource_type +from bigml.api_handlers.resourcehandler import get_resource_type, get_fields from bigml.constants import ( - SOURCE_PATH, DATASET_PATH, PREDICTION_PATH, MODEL_PATH, CLUSTER_PATH, - ANOMALY_PATH, SAMPLE_PATH, CORRELATION_PATH, STATISTICAL_TEST_PATH, - LOGISTIC_REGRESSION_PATH, ASSOCIATION_PATH, TOPIC_MODEL_PATH, - ENSEMBLE_PATH, PCA_PATH, LINEAR_REGRESSION_PATH) + SOURCE_PATH, DATASET_PATH, SUPERVISED_PATHS, FUSION_PATH, + RESOURCES_WITH_FIELDS, DEFAULT_MISSING_TOKENS, REGIONS, CATEGORICAL) from bigml.io import UnicodeReader, UnicodeWriter -RESOURCES_WITH_FIELDS = [SOURCE_PATH, DATASET_PATH, MODEL_PATH, - PREDICTION_PATH, CLUSTER_PATH, ANOMALY_PATH, - SAMPLE_PATH, CORRELATION_PATH, STATISTICAL_TEST_PATH, - LOGISTIC_REGRESSION_PATH, ASSOCIATION_PATH, - TOPIC_MODEL_PATH, ENSEMBLE_PATH, PCA_PATH, - LINEAR_REGRESSION_PATH] -DEFAULT_MISSING_TOKENS = ["", "N/A", "n/a", "NULL", "null", "-", "#DIV/0", - "#REF!", "#NAME?", "NIL", "nil", "NA", "na", - "#VALUE!", "#NULL!", "NaN", "#N/A", "#NUM!", "?"] - LIST_LIMIT = 10 +REGIONS_ATTR = "labels" SUMMARY_HEADERS = ["field column", "field ID", "field name", "field label", "field description", "field type", "preferred", "missing count", "errors", "contents summary", @@ -78,24 +75,7 @@ "field type": "optype", "preferred": "preferred"} -ITEM_SINGULAR = {u"categories": u"category"} - -FIELDS_PARENT = { \ - "model": "model", - "anomaly": "model", - "cluster": "clusters", - "logisticregression": "logistic_regression", - "linearregression": "linear_regression", - "ensemble": "ensemble", - "deepnet": "deepnet", - "topicmodel": "topic_model", - "association": "associations", - "fusion": "fusion", - "correlation": "correlations", - "sample": "sample", - "pca": "pca", - "timeseries": "timeseries", - "statisticaltest": "statistical_tests"} +ITEM_SINGULAR = {"categories": "category"} def get_fields_structure(resource, errors=False): @@ -108,26 +88,19 @@ def get_fields_structure(resource, errors=False): except ValueError: raise ValueError("Unknown resource structure") field_errors = None - if resource_type in RESOURCES_WITH_FIELDS: - resource = resource.get('object', resource) - # locale and missing tokens - if resource_type == SOURCE_PATH: - resource_locale = resource['source_parser']['locale'] - missing_tokens = resource[ - 'source_parser']['missing_tokens'] - else: - resource_locale = resource.get('locale', DEFAULT_LOCALE) - missing_tokens = resource.get('missing_tokens', - DEFAULT_MISSING_TOKENS) - # fields structure - if resource_type in FIELDS_PARENT.keys(): - fields = resource[FIELDS_PARENT[resource_type]].get('fields', {}) - else: - fields = resource.get('fields', {}) + resource = resource.get('object', resource) + # locale and missing tokens + if resource_type == SOURCE_PATH: + resource_locale = resource['source_parser']['locale'] + missing_tokens = resource[ + 'source_parser']['missing_tokens'] + else: + resource_locale = resource.get('locale', DEFAULT_LOCALE) + missing_tokens = resource.get('missing_tokens', + DEFAULT_MISSING_TOKENS) - if resource_type == SAMPLE_PATH: - fields = dict([(field['id'], field) for field in - fields]) + fields = get_fields(resource) + if resource_type in RESOURCES_WITH_FIELDS: # Check whether there's an objective id objective_column = None if resource_type == DATASET_PATH: @@ -135,7 +108,8 @@ def get_fields_structure(resource, errors=False): 'objective_field', {}).get('id') if errors: field_errors = resource.get("status", {}).get("field_errors") - elif resource_type in [MODEL_PATH, LOGISTIC_REGRESSION_PATH]: + elif resource_type in SUPERVISED_PATHS and \ + resource_type != FUSION_PATH: objective_id = resource.get( \ 'objective_fields', [None])[0] objective_column = fields.get( \ @@ -144,9 +118,8 @@ def get_fields_structure(resource, errors=False): if errors: result = result + (field_errors,) return result - else: - return (None, None, None, None, None) if errors else \ - (None, None, None, None) + return (None, None, None, None, None) if errors else \ + (None, None, None, None) def attribute_summary(attribute_value, item_type, limit=None): @@ -155,14 +128,24 @@ def attribute_summary(attribute_value, item_type, limit=None): """ if attribute_value is None: return None - items = [u"%s (%s)" % (item, instances) for - item, instances in attribute_value] + if item_type != REGIONS_ATTR: + items = ["%s (%s)" % (item, instances) for + item, instances in attribute_value] + items_length = len(items) + if limit is None or limit > items_length: + limit = items_length + return "%s %s: %s" % (items_length, type_singular(item_type, + items_length == 1), + ", ".join(items[0: limit])) + items = ["%s (%s)" % (attr.get("label"), attr.get("count")) for + attr in attribute_value] items_length = len(items) if limit is None or limit > items_length: limit = items_length - return u"%s %s: %s" % (items_length, type_singular(item_type, - items_length == 1), - u", ".join(items[0: limit])) + return "%s %s: %s" % (items_length, type_singular(item_type, + items_length == 1), + ", ".join(items[0: limit])) + def type_singular(item_type, singular=False): """Singularizes item types if needed @@ -173,7 +156,77 @@ def type_singular(item_type, singular=False): return item_type -class Fields(object): +def numeric_example(numeric_summary): + """Generates a random numeric example in the gaussian defined by + mean and sigma in the numeric_summary + + """ + try: + mean = numeric_summary.get("mean") + sigma = numeric_summary.get("standard_deviation") + minimum = numeric_summary.get("minimum") + maximum = numeric_summary.get("maximum") + value = -1 + while value < minimum or value > maximum: + value = random.gauss(mean, sigma) + return value + except TypeError: + return None + + +def sorted_headers(fields): + """Listing the names of the fields as ordered in the original dataset. + The `fields` parameter is a Fields object. + """ + header_names = [] + header_ids = [] + for column in fields.fields_columns: + header_names.append(fields.fields[ + fields.fields_by_column_number[column]]["name"]) + header_ids.append(fields.fields_by_column_number[column]) + + return header_names, header_ids + + +def get_new_fields(output_fields): + """Extracts the sexpr and names of the output fields in a dataset + generated from a new_fields transformation. + """ + new_fields = [] + for output_field in output_fields: + sexp = output_field.get("generator") + names = output_field.get("names") + new_fields.append({"field": sexp, "names": names}) + return new_fields + + +def one_hot_code(value, field, decode=False): + """Translating into codes categorical values. The codes are the index + of the value in the list of categories read from the fields summary. + Decode set to True will cause the code to be translated to the value""" + + try: + categories = [cat[0] for cat in field["summary"]["categories"]] + except KeyError: + raise KeyError("Failed to find the categories list. Check the field" + " information.") + + if decode: + try: + result = categories[int(value)] + except KeyError: + raise KeyError("Code not found in the categories list. %s" % + categories) + else: + try: + result = categories.index(value) + except ValueError: + raise ValueError("The '%s' value is not found in the categories " + "list: %s" % (value, categories)) + return result + + +class Fields(): """A class to deal with BigML auto-generated ids. """ @@ -186,6 +239,7 @@ def __init__(self, resource_or_fields, missing_tokens=None, # structure. The structure is checked and fields structure is returned # if a resource type is matched. try: + self.resource_type = get_resource_type(resource_or_fields) resource_info = get_fields_structure(resource_or_fields, True) (self.fields, resource_locale, @@ -217,7 +271,7 @@ def __init__(self, resource_or_fields, missing_tokens=None, self.missing_tokens = missing_tokens self.fields_columns = sorted(self.fields_by_column_number.keys()) # Ids of the fields to be included - self.filtered_fields = (self.fields.keys() if include is None + self.filtered_fields = (list(self.fields.keys()) if include is None else include) # To be updated in update_objective_field self.row_ids = None @@ -231,7 +285,10 @@ def __init__(self, resource_or_fields, missing_tokens=None, if objective_field is None and objective_column is not None: objective_field = objective_column objective_field_present = True - self.update_objective_field(objective_field, objective_field_present) + if self.fields: + # empty composite sources will not have an objective field + self.update_objective_field(objective_field, + objective_field_present) def update_objective_field(self, objective_field, objective_field_present, headers=None): @@ -243,7 +300,7 @@ def update_objective_field(self, objective_field, objective_field_present, # If no objective field, select the last column, else store its column if objective_field is None: self.objective_field = self.fields_columns[-1] - elif isinstance(objective_field, basestring): + elif isinstance(objective_field, str): try: self.objective_field = self.field_column_number( \ objective_field) @@ -263,7 +320,7 @@ def update_objective_field(self, objective_field, objective_field_present, if headers is None: # The row is supposed to contain the fields sorted by column number self.row_ids = [item[0] for item in - sorted(self.fields.items(), + sorted(list(self.fields.items()), key=lambda x: x[1]['column_number']) if objective_field_present or item[1]['column_number'] != self.objective_field] @@ -287,37 +344,39 @@ def field_id(self, key): """ - if isinstance(key, basestring): + if isinstance(key, str): try: - id = self.fields_by_name[key] + f_id = self.fields_by_name[key] except KeyError: raise ValueError("Error: field name '%s' does not exist" % key) - return id - elif isinstance(key, int): + return f_id + if isinstance(key, int): try: - id = self.fields_by_column_number[key] + f_id = self.fields_by_column_number[key] except KeyError: raise ValueError("Error: field column number '%s' does not" " exist" % key) - return id + return f_id + return None def field_name(self, key): """Returns a field name. """ - if isinstance(key, basestring): + if isinstance(key, str): try: name = self.fields[key]['name'] except KeyError: raise ValueError("Error: field id '%s' does not exist" % key) return name - elif isinstance(key, int): + if isinstance(key, int): try: name = self.fields[self.fields_by_column_number[key]]['name'] except KeyError: raise ValueError("Error: field column number '%s' does not" " exist" % key) return name + return None def field_column_number(self, key): """Returns a field column number. @@ -351,7 +410,7 @@ def pair(self, row, headers=None, else: objective_field = self.objective_field # If objective fields is a name or an id, retrive column number - if isinstance(objective_field, basestring): + if isinstance(objective_field, str): objective_field = self.field_column_number(objective_field) # Try to guess if objective field is in the data by using headers or @@ -378,7 +437,7 @@ def list_fields(self, out=sys.stdout): """ for field in [(val['name'], val['optype'], val['column_number']) - for _, val in sorted(self.fields.items(), + for _, val in sorted(list(self.fields.items()), key=lambda k: k[1]['column_number'])]: out.write('[%-32s: %-16s: %-8s]\n' % (field[0], @@ -390,7 +449,7 @@ def preferred_fields(self): it isn't set at all. """ - return {key: field for key, field in self.fields.iteritems() + return {key: field for key, field in self.fields.items() if ('preferred' not in field) or field['preferred']} def validate_input_data(self, input_data, out=sys.stdout): @@ -419,8 +478,8 @@ def normalize(self, value): """Transforms to unicode and cleans missing tokens """ - if not isinstance(value, unicode): - value = unicode(value, "utf-8") + if not isinstance(value, str): + value = str(value, "utf-8") return None if value in self.missing_tokens else value def to_input_data(self, row): @@ -437,7 +496,7 @@ def missing_counts(self): """ summaries = [(field_id, field.get('summary', {})) - for field_id, field in self.fields.items()] + for field_id, field in list(self.fields.items())] if len(summaries) == 0: raise ValueError("The structure has not enough information " "to extract the fields containing missing values." @@ -445,9 +504,9 @@ def missing_counts(self): "You could retry the get remote call " " with 'limit=-1' as query string.") - return dict([(field_id, summary.get('missing_count', 0)) - for field_id, summary in summaries - if summary.get('missing_count', 0) > 0]) + return {field_id: summary.get('missing_count', 0) + for field_id, summary in summaries + if summary.get('missing_count', 0) > 0} def stats(self, field_name): """Returns the summary information for the field @@ -457,6 +516,77 @@ def stats(self, field_name): summary = self.fields[field_id].get('summary', {}) return summary + def objective_field_info(self): + """Returns the fields structure for the objective field""" + if self.objective_field is None: + return None + objective_id = self.field_id(self.objective_field) + return {objective_id: self.fields[objective_id]} + + def sorted_field_ids(self, objective=False): + """List of field IDs ordered by column number. If objective is + set to False, the objective field will be excluded. + """ + fields = {} + fields.update(self.fields_by_column_number) + if not objective and self.objective_field is not None: + del(fields[self.objective_field]) + field_ids = fields.values() + return field_ids + + def to_numpy(self, input_data_list, objective=False): + """Transforming input data to numpy syntax. Fields are sorted + in the dataset order and categorical fields are one-hot encoded. + If objective set to False, the objective field will not be included""" + if PANDAS_READY and isinstance(input_data_list, DataFrame): + inner_data_list = input_data_list.to_dict('records') + else: + inner_data_list = input_data_list + field_ids = self.sorted_field_ids(objective=objective) + np_input_list = np.empty(shape=(len(input_data_list), + len(field_ids))) + for index, input_data in enumerate(inner_data_list): + np_input = np.array([]) + for field_id in field_ids: + field_input = input_data.get(field_id, + input_data.get(self.field_name(field_id))) + field = self.fields[field_id] + if field["optype"] == CATEGORICAL: + field_input = one_hot_code(field_input, field) + np_input = np.append(np_input, field_input) + np_input_list[index] = np_input + return np_input_list + + def from_numpy(self, np_data_list, objective=False, by_name=True): + """Transforming input data from numpy syntax. Fields are sorted + in the dataset order and categorical fields are one-hot encoded.""" + input_data_list = [] + field_ids = self.sorted_field_ids(objective=objective) + for np_data in np_data_list: + if len(np_data) != len(field_ids): + raise ValueError("Wrong number of features in data: %s" + " found, %s expected" % (len(np_data), len(field_ids))) + input_data = {} + for index, field_id in enumerate(field_ids): + field_input = None if np.isnan(np_data[index]) else \ + np_data[index] + field = self.fields[field_id] + if field["optype"] == CATEGORICAL: + field_input = one_hot_code(field_input, field, decode=True) + if by_name: + field_id = self.fields[field_id]["name"] + input_data.update({field_id: field_input}) + input_data_list.append(input_data) + return input_data_list + + def one_hot_codes(self, field_name): + """Returns the codes used for every category in a categorical field""" + field = self.fields[self.field_id(field_name)] + if field["optype"] != CATEGORICAL: + raise ValueError("Only categorical fields are encoded") + categories = [cat[0] for cat in field["summary"]["categories"]] + return dict(zip(categories, range(0, len(categories)))) + def summary_csv(self, filename=None): """Summary of the contents of the fields @@ -492,7 +622,7 @@ def summary_csv(self, filename=None): else: field_summary.append(json.dumps(field.get('preferred'))) field_summary.append(field_summary_value.get("missing_count")) - if self.field_errors and field_id in self.field_errors.keys(): + if self.field_errors and field_id in list(self.field_errors.keys()): errors = self.field_errors.get(field_id) field_summary.append(errors.get("total")) else: @@ -505,22 +635,27 @@ def summary_csv(self, filename=None): elif field['optype'] == 'categorical': categories = field_summary_value.get("categories") field_summary.append( \ - attribute_summary(categories, u"categorìes", + attribute_summary(categories, "categorìes", + limit=LIST_LIMIT)) + elif field['optype'] == REGIONS: + labels_info = field_summary_value.get("labels") + field_summary.append( \ + attribute_summary(labels_info, "labels", limit=LIST_LIMIT)) elif field['optype'] == "text": terms = field_summary_value.get("tag_cloud") field_summary.append( \ - attribute_summary(terms, u"terms", + attribute_summary(terms, "terms", limit=LIST_LIMIT)) elif field['optype'] == "items": items = field_summary_value.get("items") field_summary.append( \ - attribute_summary(items, u"items", limit=LIST_LIMIT)) + attribute_summary(items, "items", limit=LIST_LIMIT)) else: field_summary.append("") - if self.field_errors and field_id in self.field_errors.keys(): + if self.field_errors and field_id in list(self.field_errors.keys()): field_summary.append( \ - attribute_summary(errors.get("sample"), u"errors", + attribute_summary(errors.get("sample"), "errors", limit=None)) else: field_summary.append("") @@ -530,8 +665,8 @@ def summary_csv(self, filename=None): summary.append(field_summary) if writer is None: return summary - else: - writer.close_writer() + writer.close_writer() + return filename def new_fields_structure(self, csv_attributes_file=None, attributes=None, out_file=None): @@ -549,15 +684,16 @@ def new_fields_structure(self, csv_attributes_file=None, """ if csv_attributes_file is not None: reader = UnicodeReader(csv_attributes_file).open_reader() - attributes = [row for row in reader] + attributes = list(reader) new_fields_structure = {} if "field ID" in attributes[0] or "field column" in attributes[0]: # headers are used for index in range(1, len(attributes)): - new_attributes = dict(zip(attributes[0], attributes[index])) + new_attributes = dict(list(zip(attributes[0], + attributes[index]))) if new_attributes.get("field ID"): field_id = new_attributes.get("field ID") - if not field_id in self.fields.keys(): + if not field_id in list(self.fields.keys()): raise ValueError("Field ID %s not found" " in this resource" % field_id) del new_attributes["field ID"] @@ -565,16 +701,18 @@ def new_fields_structure(self, csv_attributes_file=None, try: field_column = int(new_attributes.get("field column")) except TypeError: - raise ValueError("Field column %s not found" - " in this resource" % field_column) + raise ValueError( + "Field column %s not found" + " in this resource" % new_attributes.get( + "field_column")) if not field_column in self.fields_columns: raise ValueError("Field column %s not found" " in this resource" % field_column) field_id = self.field_id(field_column) del new_attributes["field column"] - new_attributes_headers = new_attributes.keys() + new_attributes_headers = list(new_attributes.keys()) for attribute in new_attributes_headers: - if not attribute in UPDATABLE_HEADERS.keys(): + if not attribute in list(UPDATABLE_HEADERS.keys()): del new_attributes[attribute] else: new_attributes[UPDATABLE_HEADERS[attribute]] = \ @@ -607,40 +745,98 @@ def new_fields_structure(self, csv_attributes_file=None, field_id = field_attributes[0] if first_column_is_id else \ self.field_id(int(field_attributes[0])) new_fields_structure[field_id] = \ - dict(zip(headers, field_attributes[1: 6])) + dict(list(zip(headers, field_attributes[1: 6]))) except ValueError: raise ValueError("The first column should contain either the" " column or ID of the fields. Failed to find" " %s as either of them." % field_id) + new_fields_structure = {"fields": new_fields_structure} if out_file is None: - return {"fields": new_fields_structure} - else: - try: - with open(out_file, "w") as out: - json.dump({"fields": new_fields_structure}, out) - except IOError: - raise IOError("Failed writing the fields structure file in" - " %s- Please, check your arguments." % - out_file) - - def training_data_example(self): + return new_fields_structure + try: + with open(out_file, "w") as out: + json.dump(new_fields_structure, out) + except IOError: + raise IOError("Failed writing the fields structure file in" + " %s- Please, check your arguments." % + out_file) + return out_file + + def training_data_example(self, missings=False): """Generates an example of training data based on the contents of the summaries of every field + If missings is set to true, missing values are allowed + """ training_data = {} - for field_id, field in self.fields.items(): + for _, field in list(self.fields.items()): if field.get("summary") is not None: value = None optype = field.get("optype") if optype == "numeric": - value = field["summary"]["mean"] + if missings and random.randint(0, 5) > 3: + value = None + else: + value = numeric_example(field["summary"]) if optype == "categorical": - value = field["summary"]["categories"][0][0] + if missings and random.randint(0, 5) > 3: + value = None + else: + categories = [cat[0] for cat in field["summary"]["categories"]] + weights = [cat[1] for cat in field["summary"]["categories"]] + value = random.choices(categories, weights)[0] if optype == "text": - value = field["summary"]["tag_cloud"][0][0] + if missings and random.randint(0, 5) > 3: + value = None + else: + text_number = len(field["summary"]["tag_cloud"]) + index = random.randint(0, text_number - 1) + value = field["summary"]["tag_cloud"][index][0] if optype == "items": - value = field["summary"]["items"][0][0] + if missings and random.randint(0, 5) > 3: + value = None + else: + items_number = len(field["summary"]["items"]) + index = random.randint(0, items_number - 1) + value = field["summary"]["items"][index][0] + if optype == REGIONS: + if missings and random.randint(0, 5) > 3: + value = None + else: + labels_number = len(field["summary"]["labels"]) + index = random.randint(0, labels_number - 1) + field_summary = field["summary"]["labels"][index] + label = field_summary["label"] + xmin = numeric_example(field_summary["xmin"]) + xmax = numeric_example(field_summary["xmax"]) + ymin = numeric_example(field_summary["ymin"]) + ymax = numeric_example(field_summary["ymax"]) + #pylint: disable=locally-disabled,too-many-boolean-expressions + if None in [xmin, xmax, ymin, ymax] or xmax < xmin or \ + ymax < ymin or xmin < 0 or xmax < 0 or \ + ymin < 0 or ymax < 0: + value = [] + else: + value = [[label, xmin, xmax, ymin, ymax]] + if value is not None: training_data.update({field["name"]: value}) return training_data + + def filter_fields_update(self, update_body): + """Filters the updatable attributes according to the type of resource + + """ + fields_info = update_body.get("fields") + if self.resource_type and fields_info is not None: + if self.resource_type == "dataset": + for _, field in list(fields_info.items()): + if field.get("optype") is not None: + del field["optype"] + elif self.resource_type == "source": + for _, field in list(fields_info.items()): + if field.get("preferred") is not None: + del field["preferred"] + update_body["fields"] = fields_info + return update_body diff --git a/bigml/flatline.py b/bigml/flatline.py new file mode 100644 index 00000000..ee18536a --- /dev/null +++ b/bigml/flatline.py @@ -0,0 +1,171 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +Flatline: Class that encapsulates the Flatline expressions interpreter +""" + +from javascript import require + + +class Flatline: + """A bridge to an underlying nodejs Flatline interpreter. + + This class uses JSPyBridge to launch a Nodejs interpreter that loads + Flatline's javascript implementation and allows interaction via + Python constructs. + + Example: + + Flatline.check_lisp('(+ 1 2)') + Flatline.check_json(["f", 0], dataset=dataset) + + """ + + __FLATLINEJS = require('./flatline/flatline-node.js') + interpreter = __FLATLINEJS.bigml.dixie.flatline + + #pylint: disable=locally-disabled,invalid-name + @staticmethod + def infer_fields(row, prefix=None, offset=None): + """Utility function generating a mock list of fields. + + Usually, checks and applications of Flatline expressions run + in the context of a given dataset's field descriptors, but + during testing it's useful sometimes to provide a mock set of + them, based on the types of the values of the test input rows. + + Example: + + In[1]: Interpreter.infer_fields([0, 'a label']) + Out[2]: [{'column_number': 0, + 'datatype': 'int64', + 'id': '000000', + 'optype': 'numeric'}, + {'column_number': 1, + 'datatype': 'string', + 'id': '000001', + 'optype': 'categorical'}] + + """ + result = [] + id_ = 0 + for v in row: + t = type(v) + optype = 'categorical' + datatype = 'string' + if (t is int or t is float): + optype = 'numeric' + if t is float: + datatype = 'float64' + else: + datatype = 'int64' + id_str = '%06x' % id_ + if prefix: + length = len(prefix) + id_str = prefix + id_str[length:] + column = id_ + if offset: + column = offset + id_ + result.append({'id': id_str, + 'optype':optype, + 'datatype': datatype, + 'column_number': column}) + id_ = id_ + 1 + return result + + @staticmethod + def _dataset(dataset, rows): + """The dataset argument should be a Dataset that contains the + in_fields information + """ + try: + return {"fields": dataset.in_fields} + except AttributeError: + if len(rows) > 0: + return {'fields': Flatline.infer_fields(rows[0])} + return None + + @staticmethod + def defined_functions(): + """A list of the names of all defined Flaline functions""" + return Flatline.interpreter.defined_primitives + + @staticmethod + def check_lisp(sexp, fields=None): + """Checks whether the given lisp s-expression is valid. + + Any operations referring to a dataset's fields will use the + information found in fields structure. + + """ + r = Flatline.interpreter.evaluate_sexp(sexp, fields, True).valueOf() + return r + + @staticmethod + def check_json(json_sexp, fields=None): + """Checks whether the given JSON s-expression is valid. + + Works like `check_lisp` (which see), but taking a JSON + expression represented as a native Python list instead of a + Lisp sexp string. + + """ + r = Flatline.interpreter.evaluate_js(json_sexp, fields).valueOf() + return r + + @staticmethod + def lisp_to_json(sexp): + """ Auxliary function transforming Lisp to Python representation.""" + return Flatline.interpreter.sexp_to_js(sexp) + + @staticmethod + def json_to_lisp(json_sexp): + """ Auxliary function transforming Python to lisp representation.""" + return Flatline.interpreter.js_to_sexp(json_sexp) + + @staticmethod + def apply_lisp(sexp, rows, dataset=None): + """Applies the given Lisp sexp to a set of input rows. + + Input rows are represented as a list of lists of native Python + values. The dataset info should be provided as a Dataset object. + If no dataset is provided, the field characteristics + of the input rows are guessed using `infer_fields`. + + """ + return Flatline.interpreter.eval_and_apply_sexp( + sexp, + Flatline._dataset(dataset, rows), + rows) + + @staticmethod + def apply_json(json_sexp, rows, dataset=None): + """Applies the given JSON sexp to a set of input rows. + + As usual, JSON sexps are represented as Python lists, + e.g. ["+", 1, 2]. + + Input rows are represented as a list of lists of native Python + values. The dataset info should be provided as a Dataset object. + If no dataset is provided, the field characteristics + of the input rows are guessed using `infer_fields`. + + """ + return Flatline.interpreter.eval_and_apply_js( + json_sexp, + Flatline._dataset(dataset, rows), + rows) diff --git a/bigml/flatline/flatline-node.js b/bigml/flatline/flatline-node.js new file mode 100644 index 00000000..9e6ab6b5 --- /dev/null +++ b/bigml/flatline/flatline-node.js @@ -0,0 +1,4898 @@ +if(typeof Math.imul == "undefined" || (Math.imul(0xffffffff,5) == 0)) { + Math.imul = function (a, b) { + var ah = (a >>> 16) & 0xffff; + var al = a & 0xffff; + var bh = (b >>> 16) & 0xffff; + var bl = b & 0xffff; + // the shift by 0 fixes the sign on the high part + // the final |0 converts the unsigned value into a signed value + return ((al * bl) + (((ah * bl + al * bh) << 16) >>> 0)|0); + } +} + + + ;var COMPILED=!0,goog=goog||{};goog.global=this||self;goog.isDef=function(a){return void 0!==a};goog.isString=function(a){return"string"==typeof a};goog.isBoolean=function(a){return"boolean"==typeof a};goog.isNumber=function(a){return"number"==typeof a}; +goog.exportPath_=function(a,b,c){a=a.split(".");c=c||goog.global;a[0]in c||"undefined"==typeof c.execScript||c.execScript("var "+a[0]);for(var d;a.length&&(d=a.shift());)a.length||void 0===b?c=c[d]&&c[d]!==Object.prototype[d]?c[d]:c[d]={}:c[d]=b}; +goog.define=function(a,b){if(!COMPILED){var c=goog.global.CLOSURE_UNCOMPILED_DEFINES,d=goog.global.CLOSURE_DEFINES;c&&void 0===c.nodeType&&Object.prototype.hasOwnProperty.call(c,a)?b=c[a]:d&&void 0===d.nodeType&&Object.prototype.hasOwnProperty.call(d,a)&&(b=d[a])}return b};goog.FEATURESET_YEAR=2012;goog.DEBUG=!0;goog.LOCALE="en";goog.TRUSTED_SITE=!0;goog.STRICT_MODE_COMPATIBLE=!1;goog.DISALLOW_TEST_ONLY_CODE=COMPILED&&!goog.DEBUG;goog.ENABLE_CHROME_APP_SAFE_SCRIPT_LOADING=!1; +goog.provide=function(a){if(goog.isInModuleLoader_())throw Error("goog.provide cannot be used within a module.");if(!COMPILED&&goog.isProvided_(a))throw Error('Namespace "'+a+'" already declared.');goog.constructNamespace_(a)};goog.constructNamespace_=function(a,b){if(!COMPILED){delete goog.implicitNamespaces_[a];for(var c=a;(c=c.substring(0,c.lastIndexOf(".")))&&!goog.getObjectByName(c);)goog.implicitNamespaces_[c]=!0}goog.exportPath_(a,b)}; +goog.getScriptNonce=function(a){if(a&&a!=goog.global)return goog.getScriptNonce_(a.document);null===goog.cspNonce_&&(goog.cspNonce_=goog.getScriptNonce_(goog.global.document));return goog.cspNonce_};goog.NONCE_PATTERN_=/^[\w+/_-]+[=]{0,2}$/;goog.cspNonce_=null;goog.getScriptNonce_=function(a){return(a=a.querySelector&&a.querySelector("script[nonce]"))&&(a=a.nonce||a.getAttribute("nonce"))&&goog.NONCE_PATTERN_.test(a)?a:""};goog.VALID_MODULE_RE_=/^[a-zA-Z_$][a-zA-Z0-9._$]*$/; +goog.module=function(a){if("string"!==typeof a||!a||-1==a.search(goog.VALID_MODULE_RE_))throw Error("Invalid module identifier");if(!goog.isInGoogModuleLoader_())throw Error("Module "+a+" has been loaded incorrectly. Note, modules cannot be loaded as normal scripts. They require some kind of pre-processing step. You're likely trying to load a module via a script tag or as a part of a concatenated bundle without rewriting the module. For more info see: https://github.com/google/closure-library/wiki/goog.module:-an-ES6-module-like-alternative-to-goog.provide."); +if(goog.moduleLoaderState_.moduleName)throw Error("goog.module may only be called once per module.");goog.moduleLoaderState_.moduleName=a;if(!COMPILED){if(goog.isProvided_(a))throw Error('Namespace "'+a+'" already declared.');delete goog.implicitNamespaces_[a]}};goog.module.get=function(a){return goog.module.getInternal_(a)}; +goog.module.getInternal_=function(a){if(!COMPILED){if(a in goog.loadedModules_)return goog.loadedModules_[a].exports;if(!goog.implicitNamespaces_[a])return a=goog.getObjectByName(a),null!=a?a:null}return null};goog.ModuleType={ES6:"es6",GOOG:"goog"};goog.moduleLoaderState_=null;goog.isInModuleLoader_=function(){return goog.isInGoogModuleLoader_()||goog.isInEs6ModuleLoader_()};goog.isInGoogModuleLoader_=function(){return!!goog.moduleLoaderState_&&goog.moduleLoaderState_.type==goog.ModuleType.GOOG}; +goog.isInEs6ModuleLoader_=function(){if(goog.moduleLoaderState_&&goog.moduleLoaderState_.type==goog.ModuleType.ES6)return!0;var a=goog.global.$jscomp;return a?"function"!=typeof a.getCurrentModulePath?!1:!!a.getCurrentModulePath():!1}; +goog.module.declareLegacyNamespace=function(){if(!COMPILED&&!goog.isInGoogModuleLoader_())throw Error("goog.module.declareLegacyNamespace must be called from within a goog.module");if(!COMPILED&&!goog.moduleLoaderState_.moduleName)throw Error("goog.module must be called prior to goog.module.declareLegacyNamespace.");goog.moduleLoaderState_.declareLegacyNamespace=!0}; +goog.declareModuleId=function(a){if(!COMPILED){if(!goog.isInEs6ModuleLoader_())throw Error("goog.declareModuleId may only be called from within an ES6 module");if(goog.moduleLoaderState_&&goog.moduleLoaderState_.moduleName)throw Error("goog.declareModuleId may only be called once per module.");if(a in goog.loadedModules_)throw Error('Module with namespace "'+a+'" already exists.');}if(goog.moduleLoaderState_)goog.moduleLoaderState_.moduleName=a;else{var b=goog.global.$jscomp;if(!b||"function"!=typeof b.getCurrentModulePath)throw Error('Module with namespace "'+ +a+'" has been loaded incorrectly.');b=b.require(b.getCurrentModulePath());goog.loadedModules_[a]={exports:b,type:goog.ModuleType.ES6,moduleId:a}}};goog.setTestOnly=function(a){if(goog.DISALLOW_TEST_ONLY_CODE)throw a=a||"",Error("Importing test-only code into non-debug environment"+(a?": "+a:"."));};goog.forwardDeclare=function(a){};COMPILED||(goog.isProvided_=function(a){return a in goog.loadedModules_||!goog.implicitNamespaces_[a]&&null!=goog.getObjectByName(a)},goog.implicitNamespaces_={"goog.module":!0}); +goog.getObjectByName=function(a,b){a=a.split(".");b=b||goog.global;for(var c=0;c>>0);goog.uidCounter_=0;goog.getHashCode=goog.getUid; +goog.removeHashCode=goog.removeUid;goog.cloneObject=function(a){var b=goog.typeOf(a);if("object"==b||"array"==b){if("function"===typeof a.clone)return a.clone();b="array"==b?[]:{};for(var c in a)b[c]=goog.cloneObject(a[c]);return b}return a};goog.bindNative_=function(a,b,c){return a.call.apply(a.bind,arguments)}; +goog.bindJs_=function(a,b,c){if(!a)throw Error();if(2c?Math.max(0,a.length+c):c;if("string"===typeof a)return"string"!==typeof b||1!=b.length?-1:a.indexOf(b,c);for(;cc&&(c=Math.max(0,a.length+c));if("string"===typeof a)return"string"!==typeof b||1!=b.length?-1:a.lastIndexOf(b,c);for(;0<=c;c--)if(c in a&&a[c]===b)return c;return-1}; +goog.array.forEach=goog.NATIVE_ARRAY_PROTOTYPES&&(goog.array.ASSUME_NATIVE_FUNCTIONS||Array.prototype.forEach)?function(a,b,c){goog.asserts.assert(null!=a.length);Array.prototype.forEach.call(a,b,c)}:function(a,b,c){for(var d=a.length,e="string"===typeof a?a.split(""):a,f=0;fb?null:"string"===typeof a?a.charAt(b):a[b]};goog.array.findIndex=function(a,b,c){for(var d=a.length,e="string"===typeof a?a.split(""):a,f=0;fb?null:"string"===typeof a?a.charAt(b):a[b]}; +goog.array.findIndexRight=function(a,b,c){var d=a.length,e="string"===typeof a?a.split(""):a;for(--d;0<=d;d--)if(d in e&&b.call(c,e[d],d,a))return d;return-1};goog.array.contains=function(a,b){return 0<=goog.array.indexOf(a,b)};goog.array.isEmpty=function(a){return 0==a.length};goog.array.clear=function(a){if(!goog.isArray(a))for(var b=a.length-1;0<=b;b--)delete a[b];a.length=0};goog.array.insert=function(a,b){goog.array.contains(a,b)||a.push(b)}; +goog.array.insertAt=function(a,b,c){goog.array.splice(a,c,0,b)};goog.array.insertArrayAt=function(a,b,c){goog.partial(goog.array.splice,a,c,0).apply(null,b)};goog.array.insertBefore=function(a,b,c){var d;2==arguments.length||0>(d=goog.array.indexOf(a,c))?a.push(b):goog.array.insertAt(a,b,d)};goog.array.remove=function(a,b){b=goog.array.indexOf(a,b);var c;(c=0<=b)&&goog.array.removeAt(a,b);return c}; +goog.array.removeLast=function(a,b){b=goog.array.lastIndexOf(a,b);return 0<=b?(goog.array.removeAt(a,b),!0):!1};goog.array.removeAt=function(a,b){goog.asserts.assert(null!=a.length);return 1==Array.prototype.splice.call(a,b,1).length};goog.array.removeIf=function(a,b,c){b=goog.array.findIndex(a,b,c);return 0<=b?(goog.array.removeAt(a,b),!0):!1};goog.array.removeAllIf=function(a,b,c){var d=0;goog.array.forEachRight(a,function(e,f){b.call(c,e,f,a)&&goog.array.removeAt(a,f)&&d++});return d}; +goog.array.concat=function(a){return Array.prototype.concat.apply([],arguments)};goog.array.join=function(a){return Array.prototype.concat.apply([],arguments)};goog.array.toArray=function(a){var b=a.length;if(0=arguments.length?Array.prototype.slice.call(a,b):Array.prototype.slice.call(a,b,c)}; +goog.array.removeDuplicates=function(a,b,c){b=b||a;var d=function(a){return goog.isObject(a)?"o"+goog.getUid(a):(typeof a).charAt(0)+a};c=c||d;d={};for(var e=0,f=0;f>>1);var l=c?b.call(e,a[k],k,a):b(d,a[k]);0b?1:ac?(goog.array.insertAt(a,b,-(c+1)),!0):!1};goog.array.binaryRemove=function(a,b,c){b=goog.array.binarySearch(a,b,c);return 0<=b?goog.array.removeAt(a,b):!1}; +goog.array.bucket=function(a,b,c){for(var d={},e=0;ec*(f-e))return[];if(0f;a+=c)d.push(a);return d};goog.array.repeat=function(a,b){for(var c=[],d=0;db&&Array.prototype.push.apply(a,a.splice(0,-b)));return a}; +goog.array.moveItem=function(a,b,c){goog.asserts.assert(0<=b&&ba?goog.i18n.bidi.Dir.RTL:b?null:goog.i18n.bidi.Dir.NEUTRAL:null==a?null:a?goog.i18n.bidi.Dir.RTL:goog.i18n.bidi.Dir.LTR};goog.i18n.bidi.ltrChars_="A-Za-zÀ-ÖØ-öø-ʸ̀-֐ऀ-῿‎Ⰰ-\ud801\ud804-\ud839\ud83c-\udbff豈-﬜︀-﹯﻽-￿";goog.i18n.bidi.rtlChars_="֑-ۯۺ-ࣿ‏\ud802-\ud803\ud83a-\ud83bיִ-﷿ﹰ-ﻼ";goog.i18n.bidi.htmlSkipReg_=/<[^>]*>|&[^;]+;/g; +goog.i18n.bidi.stripHtmlIfNeeded_=function(a,b){return b?a.replace(goog.i18n.bidi.htmlSkipReg_,""):a};goog.i18n.bidi.rtlCharReg_=new RegExp("["+goog.i18n.bidi.rtlChars_+"]");goog.i18n.bidi.ltrCharReg_=new RegExp("["+goog.i18n.bidi.ltrChars_+"]");goog.i18n.bidi.hasAnyRtl=function(a,b){return goog.i18n.bidi.rtlCharReg_.test(goog.i18n.bidi.stripHtmlIfNeeded_(a,b))};goog.i18n.bidi.hasRtlChar=goog.i18n.bidi.hasAnyRtl; +goog.i18n.bidi.hasAnyLtr=function(a,b){return goog.i18n.bidi.ltrCharReg_.test(goog.i18n.bidi.stripHtmlIfNeeded_(a,b))};goog.i18n.bidi.ltrRe_=new RegExp("^["+goog.i18n.bidi.ltrChars_+"]");goog.i18n.bidi.rtlRe_=new RegExp("^["+goog.i18n.bidi.rtlChars_+"]");goog.i18n.bidi.isRtlChar=function(a){return goog.i18n.bidi.rtlRe_.test(a)};goog.i18n.bidi.isLtrChar=function(a){return goog.i18n.bidi.ltrRe_.test(a)};goog.i18n.bidi.isNeutralChar=function(a){return!goog.i18n.bidi.isLtrChar(a)&&!goog.i18n.bidi.isRtlChar(a)}; +goog.i18n.bidi.ltrDirCheckRe_=new RegExp("^[^"+goog.i18n.bidi.rtlChars_+"]*["+goog.i18n.bidi.ltrChars_+"]");goog.i18n.bidi.rtlDirCheckRe_=new RegExp("^[^"+goog.i18n.bidi.ltrChars_+"]*["+goog.i18n.bidi.rtlChars_+"]");goog.i18n.bidi.startsWithRtl=function(a,b){return goog.i18n.bidi.rtlDirCheckRe_.test(goog.i18n.bidi.stripHtmlIfNeeded_(a,b))};goog.i18n.bidi.isRtlText=goog.i18n.bidi.startsWithRtl; +goog.i18n.bidi.startsWithLtr=function(a,b){return goog.i18n.bidi.ltrDirCheckRe_.test(goog.i18n.bidi.stripHtmlIfNeeded_(a,b))};goog.i18n.bidi.isLtrText=goog.i18n.bidi.startsWithLtr;goog.i18n.bidi.isRequiredLtrRe_=/^http:\/\/.*/;goog.i18n.bidi.isNeutralText=function(a,b){a=goog.i18n.bidi.stripHtmlIfNeeded_(a,b);return goog.i18n.bidi.isRequiredLtrRe_.test(a)||!goog.i18n.bidi.hasAnyLtr(a)&&!goog.i18n.bidi.hasAnyRtl(a)}; +goog.i18n.bidi.ltrExitDirCheckRe_=new RegExp("["+goog.i18n.bidi.ltrChars_+"][^"+goog.i18n.bidi.rtlChars_+"]*$");goog.i18n.bidi.rtlExitDirCheckRe_=new RegExp("["+goog.i18n.bidi.rtlChars_+"][^"+goog.i18n.bidi.ltrChars_+"]*$");goog.i18n.bidi.endsWithLtr=function(a,b){return goog.i18n.bidi.ltrExitDirCheckRe_.test(goog.i18n.bidi.stripHtmlIfNeeded_(a,b))};goog.i18n.bidi.isLtrExitText=goog.i18n.bidi.endsWithLtr; +goog.i18n.bidi.endsWithRtl=function(a,b){return goog.i18n.bidi.rtlExitDirCheckRe_.test(goog.i18n.bidi.stripHtmlIfNeeded_(a,b))};goog.i18n.bidi.isRtlExitText=goog.i18n.bidi.endsWithRtl;goog.i18n.bidi.rtlLocalesRe_=/^(ar|ckb|dv|he|iw|fa|nqo|ps|sd|ug|ur|yi|.*[-_](Adlm|Arab|Hebr|Nkoo|Rohg|Thaa))(?!.*[-_](Latn|Cyrl)($|-|_))($|-|_)/i;goog.i18n.bidi.isRtlLanguage=function(a){return goog.i18n.bidi.rtlLocalesRe_.test(a)};goog.i18n.bidi.bracketGuardTextRe_=/(\(.*?\)+)|(\[.*?\]+)|(\{.*?\}+)|(<.*?>+)/g; +goog.i18n.bidi.guardBracketInText=function(a,b){b=(void 0===b?goog.i18n.bidi.hasAnyRtl(a):b)?goog.i18n.bidi.Format.RLM:goog.i18n.bidi.Format.LRM;return a.replace(goog.i18n.bidi.bracketGuardTextRe_,b+"$\x26"+b)};goog.i18n.bidi.enforceRtlInHtml=function(a){return"\x3c"==a.charAt(0)?a.replace(/<\w+/,"$\x26 dir\x3drtl"):"\n\x3cspan dir\x3drtl\x3e"+a+"\x3c/span\x3e"};goog.i18n.bidi.enforceRtlInText=function(a){return goog.i18n.bidi.Format.RLE+a+goog.i18n.bidi.Format.PDF}; +goog.i18n.bidi.enforceLtrInHtml=function(a){return"\x3c"==a.charAt(0)?a.replace(/<\w+/,"$\x26 dir\x3dltr"):"\n\x3cspan dir\x3dltr\x3e"+a+"\x3c/span\x3e"};goog.i18n.bidi.enforceLtrInText=function(a){return goog.i18n.bidi.Format.LRE+a+goog.i18n.bidi.Format.PDF};goog.i18n.bidi.dimensionsRe_=/:\s*([.\d][.\w]*)\s+([.\d][.\w]*)\s+([.\d][.\w]*)\s+([.\d][.\w]*)/g;goog.i18n.bidi.leftRe_=/left/gi;goog.i18n.bidi.rightRe_=/right/gi;goog.i18n.bidi.tempRe_=/%%%%/g; +goog.i18n.bidi.mirrorCSS=function(a){return a.replace(goog.i18n.bidi.dimensionsRe_,":$1 $4 $3 $2").replace(goog.i18n.bidi.leftRe_,"%%%%").replace(goog.i18n.bidi.rightRe_,goog.i18n.bidi.LEFT).replace(goog.i18n.bidi.tempRe_,goog.i18n.bidi.RIGHT)};goog.i18n.bidi.doubleQuoteSubstituteRe_=/([\u0591-\u05f2])"/g;goog.i18n.bidi.singleQuoteSubstituteRe_=/([\u0591-\u05f2])'/g; +goog.i18n.bidi.normalizeHebrewQuote=function(a){return a.replace(goog.i18n.bidi.doubleQuoteSubstituteRe_,"$1״").replace(goog.i18n.bidi.singleQuoteSubstituteRe_,"$1׳")};goog.i18n.bidi.wordSeparatorRe_=/\s+/;goog.i18n.bidi.hasNumeralsRe_=/[\d\u06f0-\u06f9]/;goog.i18n.bidi.rtlDetectionThreshold_=.4; +goog.i18n.bidi.estimateDirection=function(a,b){let c=0,d=0,e=!1;a=goog.i18n.bidi.stripHtmlIfNeeded_(a,b).split(goog.i18n.bidi.wordSeparatorRe_);for(b=0;bgoog.i18n.bidi.rtlDetectionThreshold_?goog.i18n.bidi.Dir.RTL:goog.i18n.bidi.Dir.LTR}; +goog.i18n.bidi.detectRtlDirectionality=function(a,b){return goog.i18n.bidi.estimateDirection(a,b)==goog.i18n.bidi.Dir.RTL};goog.i18n.bidi.setElementDirAndAlign=function(a,b){a&&(b=goog.i18n.bidi.toDir(b))&&(a.style.textAlign=b==goog.i18n.bidi.Dir.RTL?goog.i18n.bidi.RIGHT:goog.i18n.bidi.LEFT,a.dir=b==goog.i18n.bidi.Dir.RTL?"rtl":"ltr")}; +goog.i18n.bidi.setElementDirByTextDirectionality=function(a,b){switch(goog.i18n.bidi.estimateDirection(b)){case goog.i18n.bidi.Dir.LTR:a.dir="ltr";break;case goog.i18n.bidi.Dir.RTL:a.dir="rtl";break;default:a.removeAttribute("dir")}};goog.i18n.bidi.DirectionalString=function(){};goog.html.TrustedResourceUrl=function(a,b){this.privateDoNotAccessOrElseTrustedResourceUrlWrappedValue_=a===goog.html.TrustedResourceUrl.CONSTRUCTOR_TOKEN_PRIVATE_&&b||"";this.TRUSTED_RESOURCE_URL_TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_=goog.html.TrustedResourceUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_};goog.html.TrustedResourceUrl.prototype.implementsGoogStringTypedString=!0;goog.html.TrustedResourceUrl.prototype.getTypedStringValue=function(){return this.privateDoNotAccessOrElseTrustedResourceUrlWrappedValue_.toString()}; +goog.html.TrustedResourceUrl.prototype.implementsGoogI18nBidiDirectionalString=!0;goog.html.TrustedResourceUrl.prototype.getDirection=function(){return goog.i18n.bidi.Dir.LTR}; +goog.html.TrustedResourceUrl.prototype.cloneWithParams=function(a,b){var c=goog.html.TrustedResourceUrl.unwrap(this);c=goog.html.TrustedResourceUrl.URL_PARAM_PARSER_.exec(c);var d=c[3]||"";return goog.html.TrustedResourceUrl.createTrustedResourceUrlSecurityPrivateDoNotAccessOrElse(c[1]+goog.html.TrustedResourceUrl.stringifyParams_("?",c[2]||"",a)+goog.html.TrustedResourceUrl.stringifyParams_("#",d,b))}; +goog.DEBUG&&(goog.html.TrustedResourceUrl.prototype.toString=function(){return"TrustedResourceUrl{"+this.privateDoNotAccessOrElseTrustedResourceUrlWrappedValue_+"}"});goog.html.TrustedResourceUrl.unwrap=function(a){return goog.html.TrustedResourceUrl.unwrapTrustedScriptURL(a).toString()}; +goog.html.TrustedResourceUrl.unwrapTrustedScriptURL=function(a){if(a instanceof goog.html.TrustedResourceUrl&&a.constructor===goog.html.TrustedResourceUrl&&a.TRUSTED_RESOURCE_URL_TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_===goog.html.TrustedResourceUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_)return a.privateDoNotAccessOrElseTrustedResourceUrlWrappedValue_;goog.asserts.fail("expected object of type TrustedResourceUrl, got '"+a+"' of type "+goog.typeOf(a));return"type_error:TrustedResourceUrl"}; +goog.html.TrustedResourceUrl.format=function(a,b){var c=goog.string.Const.unwrap(a);if(!goog.html.TrustedResourceUrl.BASE_URL_.test(c))throw Error("Invalid TrustedResourceUrl format: "+c);a=c.replace(goog.html.TrustedResourceUrl.FORMAT_MARKER_,function(a,e){if(!Object.prototype.hasOwnProperty.call(b,e))throw Error('Found marker, "'+e+'", in format string, "'+c+'", but no valid label mapping found in args: '+JSON.stringify(b));a=b[e];return a instanceof goog.string.Const?goog.string.Const.unwrap(a): +encodeURIComponent(String(a))});return goog.html.TrustedResourceUrl.createTrustedResourceUrlSecurityPrivateDoNotAccessOrElse(a)};goog.html.TrustedResourceUrl.FORMAT_MARKER_=/%{(\w+)}/g;goog.html.TrustedResourceUrl.BASE_URL_=/^((https:)?\/\/[0-9a-z.:[\]-]+\/|\/[^/\\]|[^:/\\%]+\/|[^:/\\%]*[?#]|about:blank#)/i;goog.html.TrustedResourceUrl.URL_PARAM_PARSER_=/^([^?#]*)(\?[^#]*)?(#[\s\S]*)?/; +goog.html.TrustedResourceUrl.formatWithParams=function(a,b,c,d){return goog.html.TrustedResourceUrl.format(a,b).cloneWithParams(c,d)};goog.html.TrustedResourceUrl.fromConstant=function(a){return goog.html.TrustedResourceUrl.createTrustedResourceUrlSecurityPrivateDoNotAccessOrElse(goog.string.Const.unwrap(a))};goog.html.TrustedResourceUrl.fromConstants=function(a){for(var b="",c=0;ca.length?"\x26":"")+encodeURIComponent(d)+"\x3d"+encodeURIComponent(String(g)))}}return b};goog.html.TrustedResourceUrl.CONSTRUCTOR_TOKEN_PRIVATE_={};goog.string.internal={};goog.string.internal.startsWith=function(a,b){return 0==a.lastIndexOf(b,0)};goog.string.internal.endsWith=function(a,b){const c=a.length-b.length;return 0<=c&&a.indexOf(b,c)==c};goog.string.internal.caseInsensitiveStartsWith=function(a,b){return 0==goog.string.internal.caseInsensitiveCompare(b,a.substr(0,b.length))};goog.string.internal.caseInsensitiveEndsWith=function(a,b){return 0==goog.string.internal.caseInsensitiveCompare(b,a.substr(a.length-b.length,b.length))}; +goog.string.internal.caseInsensitiveEquals=function(a,b){return a.toLowerCase()==b.toLowerCase()};goog.string.internal.isEmptyOrWhitespace=function(a){return/^[\s\xa0]*$/.test(a)};goog.string.internal.trim=goog.TRUSTED_SITE&&String.prototype.trim?function(a){return a.trim()}:function(a){return/^[\s\xa0]*([\s\S]*?)[\s\xa0]*$/.exec(a)[1]};goog.string.internal.caseInsensitiveCompare=function(a,b){a=String(a).toLowerCase();b=String(b).toLowerCase();return a/g; +goog.string.internal.QUOT_RE_=/"/g;goog.string.internal.SINGLE_QUOTE_RE_=/'/g;goog.string.internal.NULL_RE_=/\x00/g;goog.string.internal.ALL_RE_=/[\x00&<>"']/;goog.string.internal.whitespaceEscape=function(a,b){return goog.string.internal.newLineToBr(a.replace(/ /g," \x26#160;"),b)};goog.string.internal.contains=function(a,b){return-1!=a.indexOf(b)};goog.string.internal.caseInsensitiveContains=function(a,b){return goog.string.internal.contains(a.toLowerCase(),b.toLowerCase())}; +goog.string.internal.compareVersions=function(a,b){var c=0;a=goog.string.internal.trim(String(a)).split(".");b=goog.string.internal.trim(String(b)).split(".");const d=Math.max(a.length,b.length);for(let g=0;0==c&&gb?1:0};goog.html.SafeUrl=function(a,b){this.privateDoNotAccessOrElseSafeUrlWrappedValue_=a===goog.html.SafeUrl.CONSTRUCTOR_TOKEN_PRIVATE_&&b||"";this.SAFE_URL_TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_=goog.html.SafeUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_};goog.html.SafeUrl.INNOCUOUS_STRING="about:invalid#zClosurez";goog.html.SafeUrl.prototype.implementsGoogStringTypedString=!0;goog.html.SafeUrl.prototype.getTypedStringValue=function(){return this.privateDoNotAccessOrElseSafeUrlWrappedValue_.toString()}; +goog.html.SafeUrl.prototype.implementsGoogI18nBidiDirectionalString=!0;goog.html.SafeUrl.prototype.getDirection=function(){return goog.i18n.bidi.Dir.LTR};goog.DEBUG&&(goog.html.SafeUrl.prototype.toString=function(){return"SafeUrl{"+this.privateDoNotAccessOrElseSafeUrlWrappedValue_+"}"}); +goog.html.SafeUrl.unwrap=function(a){if(a instanceof goog.html.SafeUrl&&a.constructor===goog.html.SafeUrl&&a.SAFE_URL_TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_===goog.html.SafeUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_)return a.privateDoNotAccessOrElseSafeUrlWrappedValue_;goog.asserts.fail("expected object of type SafeUrl, got '"+a+"' of type "+goog.typeOf(a));return"type_error:SafeUrl"};goog.html.SafeUrl.fromConstant=function(a){return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(goog.string.Const.unwrap(a))}; +goog.html.SAFE_MIME_TYPE_PATTERN_=/^(?:audio\/(?:3gpp2|3gpp|aac|L16|midi|mp3|mp4|mpeg|oga|ogg|opus|x-m4a|x-wav|wav|webm)|image\/(?:bmp|gif|jpeg|jpg|png|tiff|webp|x-icon)|text\/csv|video\/(?:mpeg|mp4|ogg|webm|quicktime))(?:;\w+=(?:\w+|"[\w;=]+"))*$/i;goog.html.SafeUrl.isSafeMimeType=function(a){return goog.html.SAFE_MIME_TYPE_PATTERN_.test(a)};goog.html.SafeUrl.fromBlob=function(a){a=goog.html.SAFE_MIME_TYPE_PATTERN_.test(a.type)?goog.fs.url.createObjectUrl(a):goog.html.SafeUrl.INNOCUOUS_STRING;return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(a)}; +goog.html.DATA_URL_PATTERN_=/^data:([^,]*);base64,[a-z0-9+\/]+=*$/i;goog.html.SafeUrl.fromDataUrl=function(a){a=a.replace(/(%0A|%0D)/g,"");var b=a.match(goog.html.DATA_URL_PATTERN_);b=b&&goog.html.SAFE_MIME_TYPE_PATTERN_.test(b[1]);return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(b?a:goog.html.SafeUrl.INNOCUOUS_STRING)};goog.html.SafeUrl.fromTelUrl=function(a){goog.string.internal.caseInsensitiveStartsWith(a,"tel:")||(a=goog.html.SafeUrl.INNOCUOUS_STRING);return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(a)}; +goog.html.SIP_URL_PATTERN_=/^sip[s]?:[+a-z0-9_.!$%&'*\/=^`{|}~-]+@([a-z0-9-]+\.)+[a-z0-9]{2,63}$/i;goog.html.SafeUrl.fromSipUrl=function(a){goog.html.SIP_URL_PATTERN_.test(decodeURIComponent(a))||(a=goog.html.SafeUrl.INNOCUOUS_STRING);return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(a)};goog.html.SafeUrl.fromFacebookMessengerUrl=function(a){goog.string.internal.caseInsensitiveStartsWith(a,"fb-messenger://share")||(a=goog.html.SafeUrl.INNOCUOUS_STRING);return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(a)}; +goog.html.SafeUrl.fromWhatsAppUrl=function(a){goog.string.internal.caseInsensitiveStartsWith(a,"whatsapp://send")||(a=goog.html.SafeUrl.INNOCUOUS_STRING);return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(a)};goog.html.SafeUrl.fromSmsUrl=function(a){goog.string.internal.caseInsensitiveStartsWith(a,"sms:")&&goog.html.SafeUrl.isSmsUrlBodyValid_(a)||(a=goog.html.SafeUrl.INNOCUOUS_STRING);return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(a)}; +goog.html.SafeUrl.isSmsUrlBodyValid_=function(a){var b=a.indexOf("#");0+~[\]()=^$|]+$/.test(c))throw Error("Selector allows only [-_a-zA-Z0-9#.:* ,\x3e+~[\\]()\x3d^$|] and strings, got: "+a);if(!goog.html.SafeStyleSheet.hasBalancedBrackets_(c))throw Error("() and [] in selector must be balanced, got: "+a);b instanceof goog.html.SafeStyle|| +(b=goog.html.SafeStyle.create(b));a=a+"{"+goog.html.SafeStyle.unwrap(b).replace(/=a||"€"<=a&&"�">=a}; +goog.string.stripNewlines=function(a){return a.replace(/(\r\n|\r|\n)+/g," ")};goog.string.canonicalizeNewlines=function(a){return a.replace(/(\r\n|\r|\n)/g,"\n")};goog.string.normalizeWhitespace=function(a){return a.replace(/\xa0|\s/g," ")};goog.string.normalizeSpaces=function(a){return a.replace(/\xa0|[ \t]+/g," ")};goog.string.collapseBreakingSpaces=function(a){return a.replace(/[\t\r\n ]+/g," ").replace(/^[\t\r\n ]+|[\t\r\n ]+$/g,"")};goog.string.trim=goog.string.internal.trim; +goog.string.trimLeft=function(a){return a.replace(/^[\s\xa0]+/,"")};goog.string.trimRight=function(a){return a.replace(/[\s\xa0]+$/,"")};goog.string.caseInsensitiveCompare=goog.string.internal.caseInsensitiveCompare; +goog.string.numberAwareCompare_=function(a,b,c){if(a==b)return 0;if(!a)return-1;if(!b)return 1;for(var d=a.toLowerCase().match(c),e=b.toLowerCase().match(c),f=Math.min(d.length,e.length),g=0;gb&&(a=a.substring(0,b-3)+"...");c&&(a=goog.string.htmlEscape(a));return a};goog.string.truncateMiddle=function(a,b,c,d){c&&(a=goog.string.unescapeEntities(a));if(d&&a.length>b){d>b&&(d=b);var e=a.length-d;a=a.substring(0,b-d)+"..."+a.substring(e)}else a.length>b&&(d=Math.floor(b/2),e=a.length-d,a=a.substring(0,d+b%2)+"..."+a.substring(e));c&&(a=goog.string.htmlEscape(a));return a}; +goog.string.specialEscapeChars_={"\x00":"\\0","\b":"\\b","\f":"\\f","\n":"\\n","\r":"\\r","\t":"\\t","\x0B":"\\x0B",'"':'\\"',"\\":"\\\\","\x3c":"\\u003C"};goog.string.jsEscapeCache_={"'":"\\'"};goog.string.quote=function(a){a=String(a);for(var b=['"'],c=0;ce?d:goog.string.escapeChar(d))}b.push('"');return b.join("")}; +goog.string.escapeString=function(a){for(var b=[],c=0;cb)var c=a;else{if(256>b){if(c="\\x",16>b||256b&&(c+="0");c+=b.toString(16).toUpperCase()}return goog.string.jsEscapeCache_[a]=c};goog.string.contains=goog.string.internal.contains;goog.string.caseInsensitiveContains=goog.string.internal.caseInsensitiveContains; +goog.string.countOf=function(a,b){return a&&b?a.split(b).length-1:0};goog.string.removeAt=function(a,b,c){var d=a;0<=b&&b>>0;return b};goog.string.uniqueStringCounter_=2147483648*Math.random()|0; +goog.string.createUniqueString=function(){return"goog_"+goog.string.uniqueStringCounter_++};goog.string.toNumber=function(a){var b=Number(a);return 0==b&&goog.string.isEmptyOrWhitespace(a)?NaN:b};goog.string.isLowerCamelCase=function(a){return/^[a-z]+([A-Z][a-z]*)*$/.test(a)};goog.string.isUpperCamelCase=function(a){return/^([A-Z][a-z]*)+$/.test(a)};goog.string.toCamelCase=function(a){return String(a).replace(/\-([a-z])/g,function(a,c){return c.toUpperCase()})}; +goog.string.toSelectorCase=function(a){return String(a).replace(/([A-Z])/g,"-$1").toLowerCase()};goog.string.toTitleCase=function(a,b){b="string"===typeof b?goog.string.regExpEscape(b):"\\s";return a.replace(new RegExp("(^"+(b?"|["+b+"]+":"")+")([a-z])","g"),function(a,b,e){return b+e.toUpperCase()})};goog.string.capitalize=function(a){return String(a.charAt(0)).toUpperCase()+String(a.substr(1)).toLowerCase()}; +goog.string.parseInt=function(a){isFinite(a)&&(a=String(a));return"string"===typeof a?/^\s*-?0x/i.test(a)?parseInt(a,16):parseInt(a,10):NaN};goog.string.splitLimit=function(a,b,c){a=a.split(b);for(var d=[];0c&&(c=e)}return-1==c?a:a.slice(c+1)}; +goog.string.editDistance=function(a,b){var c=[],d=[];if(a==b)return 0;if(!a.length||!b.length)return Math.max(a.length,b.length);for(var e=0;ea*b?a+b:a};goog.math.lerp=function(a,b,c){return a+c*(b-a)};goog.math.nearlyEquals=function(a,b,c){return Math.abs(a-b)<=(c||1E-6)};goog.math.standardAngle=function(a){return goog.math.modulo(a,360)}; +goog.math.standardAngleInRadians=function(a){return goog.math.modulo(a,2*Math.PI)};goog.math.toRadians=function(a){return a*Math.PI/180};goog.math.toDegrees=function(a){return 180*a/Math.PI};goog.math.angleDx=function(a,b){return b*Math.cos(goog.math.toRadians(a))};goog.math.angleDy=function(a,b){return b*Math.sin(goog.math.toRadians(a))};goog.math.angle=function(a,b,c,d){return goog.math.standardAngle(goog.math.toDegrees(Math.atan2(d-b,c-a)))}; +goog.math.angleDifference=function(a,b){a=goog.math.standardAngle(b)-goog.math.standardAngle(a);180=a&&(a=360+a);return a};goog.math.sign=function(a){return 0a?-1:a}; +goog.math.longestCommonSubsequence=function(a,b,c,d){c=c||function(a,b){return a==b};d=d||function(b,c){return a[b]};for(var e=a.length,f=b.length,g=[],h=0;hg[h][k-1]?h--:k--;return l}; +goog.math.sum=function(a){return goog.array.reduce(arguments,function(a,c){return a+c},0)};goog.math.average=function(a){return goog.math.sum.apply(null,arguments)/arguments.length};goog.math.sampleVariance=function(a){var b=arguments.length;if(2>b)return 0;var c=goog.math.average.apply(null,arguments);return goog.math.sum.apply(null,goog.array.map(arguments,function(a){return Math.pow(a-c,2)}))/(b-1)};goog.math.standardDeviation=function(a){return Math.sqrt(goog.math.sampleVariance.apply(null,arguments))}; +goog.math.isInt=function(a){return isFinite(a)&&0==a%1};goog.math.isFiniteNumber=function(a){return isFinite(a)};goog.math.isNegativeZero=function(a){return 0==a&&0>1/a};goog.math.log10Floor=function(a){if(0a?1:0)}return 0==a?-Infinity:NaN};goog.math.safeFloor=function(a,b){goog.asserts.assert(void 0===b||0=a.length)throw goog.iter.StopIteration;if(b in a)return a[b++];b++}};return c}throw Error("Not implemented");}; +goog.iter.forEach=function(a,b,c){if(goog.isArrayLike(a))try{goog.array.forEach(a,b,c)}catch(d){if(d!==goog.iter.StopIteration)throw d;}else{a=goog.iter.toIterator(a);try{for(;;)b.call(c,a.next(),void 0,a)}catch(d){if(d!==goog.iter.StopIteration)throw d;}}};goog.iter.filter=function(a,b,c){var d=goog.iter.toIterator(a);a=new goog.iter.Iterator;a.next=function(){for(;;){var a=d.next();if(b.call(c,a,void 0,d))return a}};return a}; +goog.iter.filterFalse=function(a,b,c){return goog.iter.filter(a,goog.functions.not(b),c)};goog.iter.range=function(a,b,c){var d=0,e=a,f=c||1;1=e||0>f&&d<=e)throw goog.iter.StopIteration;var a=d;d+=f;return a};return g};goog.iter.join=function(a,b){return goog.iter.toArray(a).join(b)}; +goog.iter.map=function(a,b,c){var d=goog.iter.toIterator(a);a=new goog.iter.Iterator;a.next=function(){var a=d.next();return b.call(c,a,void 0,d)};return a};goog.iter.reduce=function(a,b,c,d){var e=c;goog.iter.forEach(a,function(a){e=b.call(d,e,a)});return e};goog.iter.some=function(a,b,c){a=goog.iter.toIterator(a);try{for(;;)if(b.call(c,a.next(),void 0,a))return!0}catch(d){if(d!==goog.iter.StopIteration)throw d;}return!1}; +goog.iter.every=function(a,b,c){a=goog.iter.toIterator(a);try{for(;;)if(!b.call(c,a.next(),void 0,a))return!1}catch(d){if(d!==goog.iter.StopIteration)throw d;}return!0};goog.iter.chain=function(a){return goog.iter.chainFromIterable(arguments)}; +goog.iter.chainFromIterable=function(a){var b=goog.iter.toIterator(a);a=new goog.iter.Iterator;var c=null;a.next=function(){for(;;){if(null==c){var a=b.next();c=goog.iter.toIterator(a)}try{return c.next()}catch(e){if(e!==goog.iter.StopIteration)throw e;c=null}}};return a};goog.iter.dropWhile=function(a,b,c){var d=goog.iter.toIterator(a);a=new goog.iter.Iterator;var e=!0;a.next=function(){for(;;){var a=d.next();if(!e||!b.call(c,a,void 0,d))return e=!1,a}};return a}; +goog.iter.takeWhile=function(a,b,c){var d=goog.iter.toIterator(a);a=new goog.iter.Iterator;a.next=function(){var a=d.next();if(b.call(c,a,void 0,d))return a;throw goog.iter.StopIteration;};return a};goog.iter.toArray=function(a){if(goog.isArrayLike(a))return goog.array.toArray(a);a=goog.iter.toIterator(a);var b=[];goog.iter.forEach(a,function(a){b.push(a)});return b}; +goog.iter.equals=function(a,b,c){a=goog.iter.zipLongest({},a,b);var d=c||goog.array.defaultCompareEquality;return goog.iter.every(a,function(a){return d(a[0],a[1])})};goog.iter.nextOrValue=function(a,b){try{return goog.iter.toIterator(a).next()}catch(c){if(c!=goog.iter.StopIteration)throw c;return b}}; +goog.iter.product=function(a){if(goog.array.some(arguments,function(a){return!a.length})||!arguments.length)return new goog.iter.Iterator;var b=new goog.iter.Iterator,c=arguments,d=goog.array.repeat(0,c.length);b.next=function(){if(d){for(var a=goog.array.map(d,function(a,b){return c[b][a]}),b=d.length-1;0<=b;b--){goog.asserts.assert(d);if(d[b]=b),a=goog.iter.limit(a,c-b));return a};goog.iter.hasDuplicates_=function(a){var b=[];goog.array.removeDuplicates(a,b);return a.length!=b.length};goog.iter.permutations=function(a,b){a=goog.iter.toArray(a);b=goog.array.repeat(a,"number"===typeof b?b:a.length);b=goog.iter.product.apply(void 0,b);return goog.iter.filter(b,function(a){return!goog.iter.hasDuplicates_(a)})}; +goog.iter.combinations=function(a,b){function c(a){return d[a]}var d=goog.iter.toArray(a);a=goog.iter.range(d.length);b=goog.iter.permutations(a,b);var e=goog.iter.filter(b,function(a){return goog.array.isSorted(a)});b=new goog.iter.Iterator;b.next=function(){return goog.array.map(e.next(),c)};return b}; +goog.iter.combinationsWithReplacement=function(a,b){function c(a){return d[a]}var d=goog.iter.toArray(a);a=goog.array.range(d.length);b=goog.array.repeat(a,b);b=goog.iter.product.apply(void 0,b);var e=goog.iter.filter(b,function(a){return goog.array.isSorted(a)});b=new goog.iter.Iterator;b.next=function(){return goog.array.map(e.next(),c)};return b};goog.structs.Map=function(a,b){this.map_={};this.keys_=[];this.version_=this.count_=0;var c=arguments.length;if(12*this.count_&&this.cleanupKeysArray_(),!0):!1}; +goog.structs.Map.prototype.cleanupKeysArray_=function(){if(this.count_!=this.keys_.length){for(var a=0,b=0;a=d.keys_.length)throw goog.iter.StopIteration;var e=d.keys_[b++];return a?e:d.map_[e]};return e};goog.structs.Map.hasKey_=function(a,b){return Object.prototype.hasOwnProperty.call(a,b)};goog.uri={};goog.uri.utils={};goog.uri.utils.CharCode_={AMPERSAND:38,EQUAL:61,HASH:35,QUESTION:63};goog.uri.utils.buildFromEncodedParts=function(a,b,c,d,e,f,g){var h="";a&&(h+=a+":");c&&(h+="//",b&&(h+=b+"@"),h+=c,d&&(h+=":"+d));e&&(h+=e);f&&(h+="?"+f);g&&(h+="#"+g);return h};goog.uri.utils.splitRe_=/^(?:([^:/?#.]+):)?(?:\/\/(?:([^/?#]*)@)?([^/#?]*?)(?::([0-9]+))?(?=[/#?]|$))?([^?#]+)?(?:\?([^#]*))?(?:#([\s\S]*))?$/; +goog.uri.utils.ComponentIndex={SCHEME:1,USER_INFO:2,DOMAIN:3,PORT:4,PATH:5,QUERY_DATA:6,FRAGMENT:7};goog.uri.utils.split=function(a){return a.match(goog.uri.utils.splitRe_)};goog.uri.utils.decodeIfPossible_=function(a,b){return a?b?decodeURI(a):decodeURIComponent(a):a};goog.uri.utils.getComponentByIndex_=function(a,b){return goog.uri.utils.split(b)[a]||null};goog.uri.utils.getScheme=function(a){return goog.uri.utils.getComponentByIndex_(goog.uri.utils.ComponentIndex.SCHEME,a)}; +goog.uri.utils.getEffectiveScheme=function(a){a=goog.uri.utils.getScheme(a);!a&&goog.global.self&&goog.global.self.location&&(a=goog.global.self.location.protocol,a=a.substr(0,a.length-1));return a?a.toLowerCase():""};goog.uri.utils.getUserInfoEncoded=function(a){return goog.uri.utils.getComponentByIndex_(goog.uri.utils.ComponentIndex.USER_INFO,a)};goog.uri.utils.getUserInfo=function(a){return goog.uri.utils.decodeIfPossible_(goog.uri.utils.getUserInfoEncoded(a))}; +goog.uri.utils.getDomainEncoded=function(a){return goog.uri.utils.getComponentByIndex_(goog.uri.utils.ComponentIndex.DOMAIN,a)};goog.uri.utils.getDomain=function(a){return goog.uri.utils.decodeIfPossible_(goog.uri.utils.getDomainEncoded(a),!0)};goog.uri.utils.getPort=function(a){return Number(goog.uri.utils.getComponentByIndex_(goog.uri.utils.ComponentIndex.PORT,a))||null};goog.uri.utils.getPathEncoded=function(a){return goog.uri.utils.getComponentByIndex_(goog.uri.utils.ComponentIndex.PATH,a)}; +goog.uri.utils.getPath=function(a){return goog.uri.utils.decodeIfPossible_(goog.uri.utils.getPathEncoded(a),!0)};goog.uri.utils.getQueryData=function(a){return goog.uri.utils.getComponentByIndex_(goog.uri.utils.ComponentIndex.QUERY_DATA,a)};goog.uri.utils.getFragmentEncoded=function(a){var b=a.indexOf("#");return 0>b?null:a.substr(b+1)};goog.uri.utils.setFragmentEncoded=function(a,b){return goog.uri.utils.removeFragment(a)+(b?"#"+b:"")};goog.uri.utils.getFragment=function(a){return goog.uri.utils.decodeIfPossible_(goog.uri.utils.getFragmentEncoded(a))}; +goog.uri.utils.getHost=function(a){a=goog.uri.utils.split(a);return goog.uri.utils.buildFromEncodedParts(a[goog.uri.utils.ComponentIndex.SCHEME],a[goog.uri.utils.ComponentIndex.USER_INFO],a[goog.uri.utils.ComponentIndex.DOMAIN],a[goog.uri.utils.ComponentIndex.PORT])};goog.uri.utils.getOrigin=function(a){a=goog.uri.utils.split(a);return goog.uri.utils.buildFromEncodedParts(a[goog.uri.utils.ComponentIndex.SCHEME],null,a[goog.uri.utils.ComponentIndex.DOMAIN],a[goog.uri.utils.ComponentIndex.PORT])}; +goog.uri.utils.getPathAndAfter=function(a){a=goog.uri.utils.split(a);return goog.uri.utils.buildFromEncodedParts(null,null,null,null,a[goog.uri.utils.ComponentIndex.PATH],a[goog.uri.utils.ComponentIndex.QUERY_DATA],a[goog.uri.utils.ComponentIndex.FRAGMENT])};goog.uri.utils.removeFragment=function(a){var b=a.indexOf("#");return 0>b?a:a.substr(0,b)}; +goog.uri.utils.haveSameDomain=function(a,b){a=goog.uri.utils.split(a);b=goog.uri.utils.split(b);return a[goog.uri.utils.ComponentIndex.DOMAIN]==b[goog.uri.utils.ComponentIndex.DOMAIN]&&a[goog.uri.utils.ComponentIndex.SCHEME]==b[goog.uri.utils.ComponentIndex.SCHEME]&&a[goog.uri.utils.ComponentIndex.PORT]==b[goog.uri.utils.ComponentIndex.PORT]}; +goog.uri.utils.assertNoFragmentsOrQueries_=function(a){goog.asserts.assert(0>a.indexOf("#")&&0>a.indexOf("?"),"goog.uri.utils: Fragment or query identifiers are not supported: [%s]",a)};goog.uri.utils.parseQueryData=function(a,b){if(a){a=a.split("\x26");for(var c=0;cb&&(b=a.length);var c=a.indexOf("?");if(0>c||c>b){c=b;var d=""}else d=a.substring(c+1,b);return[a.substr(0,c),d,a.substr(b)]};goog.uri.utils.joinQueryData_=function(a){return a[0]+(a[1]?"?"+a[1]:"")+a[2]};goog.uri.utils.appendQueryData_=function(a,b){return b?a?a+"\x26"+b:b:a};goog.uri.utils.appendQueryDataToUri_=function(a,b){if(!b)return a;a=goog.uri.utils.splitQueryData_(a);a[1]=goog.uri.utils.appendQueryData_(a[1],b);return goog.uri.utils.joinQueryData_(a)}; +goog.uri.utils.appendKeyValuePairs_=function(a,b,c){goog.asserts.assertString(a);if(goog.isArray(b)){goog.asserts.assertArray(b);for(var d=0;dd)return null;var e=a.indexOf("\x26",d);if(0>e||e>c)e=c;d+=b.length+1;return goog.string.urlDecode(a.substr(d,e-d))};goog.uri.utils.getParamValues=function(a,b){for(var c=a.search(goog.uri.utils.hashOrEndRe_),d=0,e,f=[];0<=(e=goog.uri.utils.findParam_(a,d,b,c));){d=a.indexOf("\x26",e);if(0>d||d>c)d=c;e+=b.length+1;f.push(goog.string.urlDecode(a.substr(e,d-e)))}return f}; +goog.uri.utils.trailingQueryPunctuationRe_=/[?&]($|#)/;goog.uri.utils.removeParam=function(a,b){for(var c=a.search(goog.uri.utils.hashOrEndRe_),d=0,e,f=[];0<=(e=goog.uri.utils.findParam_(a,d,b,c));)f.push(a.substring(d,e)),d=Math.min(a.indexOf("\x26",e)+1||c,c);f.push(a.substr(d));return f.join("").replace(goog.uri.utils.trailingQueryPunctuationRe_,"$1")};goog.uri.utils.setParam=function(a,b,c){return goog.uri.utils.appendParam(goog.uri.utils.removeParam(a,b),b,c)}; +goog.uri.utils.setParamsFromMap=function(a,b){a=goog.uri.utils.splitQueryData_(a);var c=a[1],d=[];c&&goog.array.forEach(c.split("\x26"),function(a){var c=a.indexOf("\x3d");c=0<=c?a.substr(0,c):a;b.hasOwnProperty(c)||d.push(a)});a[1]=goog.uri.utils.appendQueryData_(d.join("\x26"),goog.uri.utils.buildQueryDataFromMap(b));return goog.uri.utils.joinQueryData_(a)}; +goog.uri.utils.appendPath=function(a,b){goog.uri.utils.assertNoFragmentsOrQueries_(a);goog.string.endsWith(a,"/")&&(a=a.substr(0,a.length-1));goog.string.startsWith(b,"/")&&(b=b.substr(1));return goog.string.buildString(a,"/",b)}; +goog.uri.utils.setPath=function(a,b){goog.string.startsWith(b,"/")||(b="/"+b);a=goog.uri.utils.split(a);return goog.uri.utils.buildFromEncodedParts(a[goog.uri.utils.ComponentIndex.SCHEME],a[goog.uri.utils.ComponentIndex.USER_INFO],a[goog.uri.utils.ComponentIndex.DOMAIN],a[goog.uri.utils.ComponentIndex.PORT],b,a[goog.uri.utils.ComponentIndex.QUERY_DATA],a[goog.uri.utils.ComponentIndex.FRAGMENT])};goog.uri.utils.StandardQueryParam={RANDOM:"zx"}; +goog.uri.utils.makeUnique=function(a){return goog.uri.utils.setParam(a,goog.uri.utils.StandardQueryParam.RANDOM,goog.string.getRandomString())};goog.Uri=function(a,b){this.domain_=this.userInfo_=this.scheme_="";this.port_=null;this.fragment_=this.path_="";this.ignoreCase_=this.isReadOnly_=!1;var c;a instanceof goog.Uri?(this.ignoreCase_=void 0!==b?b:a.getIgnoreCase(),this.setScheme(a.getScheme()),this.setUserInfo(a.getUserInfo()),this.setDomain(a.getDomain()),this.setPort(a.getPort()),this.setPath(a.getPath()),this.setQueryData(a.getQueryData().clone()),this.setFragment(a.getFragment())):a&&(c=goog.uri.utils.split(String(a)))?(this.ignoreCase_= +!!b,this.setScheme(c[goog.uri.utils.ComponentIndex.SCHEME]||"",!0),this.setUserInfo(c[goog.uri.utils.ComponentIndex.USER_INFO]||"",!0),this.setDomain(c[goog.uri.utils.ComponentIndex.DOMAIN]||"",!0),this.setPort(c[goog.uri.utils.ComponentIndex.PORT]),this.setPath(c[goog.uri.utils.ComponentIndex.PATH]||"",!0),this.setQueryData(c[goog.uri.utils.ComponentIndex.QUERY_DATA]||"",!0),this.setFragment(c[goog.uri.utils.ComponentIndex.FRAGMENT]||"",!0)):(this.ignoreCase_=!!b,this.queryData_=new goog.Uri.QueryData(null, +null,this.ignoreCase_))};goog.Uri.RANDOM_PARAM=goog.uri.utils.StandardQueryParam.RANDOM; +goog.Uri.prototype.toString=function(){var a=[],b=this.getScheme();b&&a.push(goog.Uri.encodeSpecialChars_(b,goog.Uri.reDisallowedInSchemeOrUserInfo_,!0),":");var c=this.getDomain();if(c||"file"==b)a.push("//"),(b=this.getUserInfo())&&a.push(goog.Uri.encodeSpecialChars_(b,goog.Uri.reDisallowedInSchemeOrUserInfo_,!0),"@"),a.push(goog.Uri.removeDoubleEncoding_(goog.string.urlEncode(c))),c=this.getPort(),null!=c&&a.push(":",String(c));if(c=this.getPath())this.hasDomain()&&"/"!=c.charAt(0)&&a.push("/"), +a.push(goog.Uri.encodeSpecialChars_(c,"/"==c.charAt(0)?goog.Uri.reDisallowedInAbsolutePath_:goog.Uri.reDisallowedInRelativePath_,!0));(c=this.getEncodedQuery())&&a.push("?",c);(c=this.getFragment())&&a.push("#",goog.Uri.encodeSpecialChars_(c,goog.Uri.reDisallowedInFragment_));return a.join("")}; +goog.Uri.prototype.resolve=function(a){var b=this.clone(),c=a.hasScheme();c?b.setScheme(a.getScheme()):c=a.hasUserInfo();c?b.setUserInfo(a.getUserInfo()):c=a.hasDomain();c?b.setDomain(a.getDomain()):c=a.hasPort();var d=a.getPath();if(c)b.setPort(a.getPort());else if(c=a.hasPath()){if("/"!=d.charAt(0))if(this.hasDomain()&&!this.hasPath())d="/"+d;else{var e=b.getPath().lastIndexOf("/");-1!=e&&(d=b.getPath().substr(0,e+1)+d)}d=goog.Uri.removeDotSegments(d)}c?b.setPath(d):c=a.hasQuery();c?b.setQueryData(a.getQueryData().clone()): +c=a.hasFragment();c&&b.setFragment(a.getFragment());return b};goog.Uri.prototype.clone=function(){return new goog.Uri(this)};goog.Uri.prototype.getScheme=function(){return this.scheme_};goog.Uri.prototype.setScheme=function(a,b){this.enforceReadOnly();if(this.scheme_=b?goog.Uri.decodeOrEmpty_(a,!0):a)this.scheme_=this.scheme_.replace(/:$/,"");return this};goog.Uri.prototype.hasScheme=function(){return!!this.scheme_};goog.Uri.prototype.getUserInfo=function(){return this.userInfo_}; +goog.Uri.prototype.setUserInfo=function(a,b){this.enforceReadOnly();this.userInfo_=b?goog.Uri.decodeOrEmpty_(a):a;return this};goog.Uri.prototype.hasUserInfo=function(){return!!this.userInfo_};goog.Uri.prototype.getDomain=function(){return this.domain_};goog.Uri.prototype.setDomain=function(a,b){this.enforceReadOnly();this.domain_=b?goog.Uri.decodeOrEmpty_(a,!0):a;return this};goog.Uri.prototype.hasDomain=function(){return!!this.domain_};goog.Uri.prototype.getPort=function(){return this.port_}; +goog.Uri.prototype.setPort=function(a){this.enforceReadOnly();if(a){a=Number(a);if(isNaN(a)||0>a)throw Error("Bad port number "+a);this.port_=a}else this.port_=null;return this};goog.Uri.prototype.hasPort=function(){return null!=this.port_};goog.Uri.prototype.getPath=function(){return this.path_};goog.Uri.prototype.setPath=function(a,b){this.enforceReadOnly();this.path_=b?goog.Uri.decodeOrEmpty_(a,!0):a;return this};goog.Uri.prototype.hasPath=function(){return!!this.path_}; +goog.Uri.prototype.hasQuery=function(){return""!==this.queryData_.toString()};goog.Uri.prototype.setQueryData=function(a,b){this.enforceReadOnly();a instanceof goog.Uri.QueryData?(this.queryData_=a,this.queryData_.setIgnoreCase(this.ignoreCase_)):(b||(a=goog.Uri.encodeSpecialChars_(a,goog.Uri.reDisallowedInQuery_)),this.queryData_=new goog.Uri.QueryData(a,null,this.ignoreCase_));return this};goog.Uri.prototype.setQuery=function(a,b){return this.setQueryData(a,b)}; +goog.Uri.prototype.getEncodedQuery=function(){return this.queryData_.toString()};goog.Uri.prototype.getDecodedQuery=function(){return this.queryData_.toDecodedString()};goog.Uri.prototype.getQueryData=function(){return this.queryData_};goog.Uri.prototype.getQuery=function(){return this.getEncodedQuery()};goog.Uri.prototype.setParameterValue=function(a,b){this.enforceReadOnly();this.queryData_.set(a,b);return this}; +goog.Uri.prototype.setParameterValues=function(a,b){this.enforceReadOnly();goog.isArray(b)||(b=[String(b)]);this.queryData_.setValues(a,b);return this};goog.Uri.prototype.getParameterValues=function(a){return this.queryData_.getValues(a)};goog.Uri.prototype.getParameterValue=function(a){return this.queryData_.get(a)};goog.Uri.prototype.getFragment=function(){return this.fragment_};goog.Uri.prototype.setFragment=function(a,b){this.enforceReadOnly();this.fragment_=b?goog.Uri.decodeOrEmpty_(a):a;return this}; +goog.Uri.prototype.hasFragment=function(){return!!this.fragment_};goog.Uri.prototype.hasSameDomainAs=function(a){return(!this.hasDomain()&&!a.hasDomain()||this.getDomain()==a.getDomain())&&(!this.hasPort()&&!a.hasPort()||this.getPort()==a.getPort())};goog.Uri.prototype.makeUnique=function(){this.enforceReadOnly();this.setParameterValue(goog.Uri.RANDOM_PARAM,goog.string.getRandomString());return this};goog.Uri.prototype.removeParameter=function(a){this.enforceReadOnly();this.queryData_.remove(a);return this}; +goog.Uri.prototype.setReadOnly=function(a){this.isReadOnly_=a;return this};goog.Uri.prototype.isReadOnly=function(){return this.isReadOnly_};goog.Uri.prototype.enforceReadOnly=function(){if(this.isReadOnly_)throw Error("Tried to modify a read-only Uri");};goog.Uri.prototype.setIgnoreCase=function(a){this.ignoreCase_=a;this.queryData_&&this.queryData_.setIgnoreCase(a);return this};goog.Uri.prototype.getIgnoreCase=function(){return this.ignoreCase_}; +goog.Uri.parse=function(a,b){return a instanceof goog.Uri?a.clone():new goog.Uri(a,b)};goog.Uri.create=function(a,b,c,d,e,f,g,h){h=new goog.Uri(null,h);a&&h.setScheme(a);b&&h.setUserInfo(b);c&&h.setDomain(c);d&&h.setPort(d);e&&h.setPath(e);f&&h.setQueryData(f);g&&h.setFragment(g);return h};goog.Uri.resolve=function(a,b){a instanceof goog.Uri||(a=goog.Uri.parse(a));b instanceof goog.Uri||(b=goog.Uri.parse(b));return a.resolve(b)}; +goog.Uri.removeDotSegments=function(a){if(".."==a||"."==a)return"";if(goog.string.contains(a,"./")||goog.string.contains(a,"/.")){var b=goog.string.startsWith(a,"/");a=a.split("/");for(var c=[],d=0;d>4&15).toString(16)+(a&15).toString(16)};goog.Uri.removeDoubleEncoding_=function(a){return a.replace(/%25([0-9a-fA-F]{2})/g,"%$1")};goog.Uri.reDisallowedInSchemeOrUserInfo_=/[#\/\?@]/g;goog.Uri.reDisallowedInRelativePath_=/[#\?:]/g; +goog.Uri.reDisallowedInAbsolutePath_=/[#\?]/g;goog.Uri.reDisallowedInQuery_=/[#\?@]/g;goog.Uri.reDisallowedInFragment_=/#/g;goog.Uri.haveSameDomain=function(a,b){a=goog.uri.utils.split(a);b=goog.uri.utils.split(b);return a[goog.uri.utils.ComponentIndex.DOMAIN]==b[goog.uri.utils.ComponentIndex.DOMAIN]&&a[goog.uri.utils.ComponentIndex.PORT]==b[goog.uri.utils.ComponentIndex.PORT]};goog.Uri.QueryData=function(a,b,c){this.count_=this.keyMap_=null;this.encodedQuery_=a||null;this.ignoreCase_=!!c}; +goog.Uri.QueryData.prototype.ensureKeyMapInitialized_=function(){if(!this.keyMap_&&(this.keyMap_=new goog.structs.Map,this.count_=0,this.encodedQuery_)){var a=this;goog.uri.utils.parseQueryData(this.encodedQuery_,function(b,c){a.add(goog.string.urlDecode(b),c)})}}; +goog.Uri.QueryData.createFromMap=function(a,b,c){b=goog.structs.getKeys(a);if("undefined"==typeof b)throw Error("Keys are undefined");c=new goog.Uri.QueryData(null,null,c);a=goog.structs.getValues(a);for(var d=0;da?goog.reflect.cache(goog.math.Integer.IntCache_,a,function(a){return new goog.math.Integer([a|0],0>a?-1:0)}):new goog.math.Integer([a|0],0>a?-1:0)}; +goog.math.Integer.fromNumber=function(a){if(isNaN(a)||!isFinite(a))return goog.math.Integer.ZERO;if(0>a)return goog.math.Integer.fromNumber(-a).negate();for(var b=[],c=1,d=0;a>=c;d++)b[d]=a/c|0,c*=goog.math.Integer.TWO_PWR_32_DBL_;return new goog.math.Integer(b,0)};goog.math.Integer.fromBits=function(a){return new goog.math.Integer(a,a[a.length-1]&-2147483648?-1:0)}; +goog.math.Integer.fromString=function(a,b){if(0==a.length)throw Error("number format error: empty string");b=b||10;if(2>b||36f?(f=goog.math.Integer.fromNumber(Math.pow(b, +f)),d=d.multiply(f).add(goog.math.Integer.fromNumber(g))):(d=d.multiply(c),d=d.add(goog.math.Integer.fromNumber(g)))}return d};goog.math.Integer.TWO_PWR_32_DBL_=4294967296;goog.math.Integer.ZERO=goog.math.Integer.fromInt(0);goog.math.Integer.ONE=goog.math.Integer.fromInt(1);goog.math.Integer.TWO_PWR_24_=goog.math.Integer.fromInt(16777216);goog.math.Integer.prototype.toInt=function(){return 0a||36>>0).toString(a);c=e;if(c.isZero())return f+d;for(;6>f.length;)f="0"+f;d=""+f+d}};goog.math.Integer.prototype.getBits=function(a){return 0>a?0:athis.compare(a)};goog.math.Integer.prototype.lessThanOrEqual=function(a){return 0>=this.compare(a)};goog.math.Integer.prototype.compare=function(a){a=this.subtract(a);return a.isNegative()?-1:a.isZero()?0:1}; +goog.math.Integer.prototype.shorten=function(a){var b=a-1>>5;a=(a-1)%32;for(var c=[],d=0;d>>16,g=this.getBits(e)&65535,h=a.getBits(e)>>>16,k=a.getBits(e)&65535;g=d+g+k;f=(g>>>16)+f+h;d=f>>>16;g&=65535;f&=65535;c[e]=f<<16|g}return goog.math.Integer.fromBits(c)};goog.math.Integer.prototype.subtract=function(a){return this.add(a.negate())}; +goog.math.Integer.prototype.multiply=function(a){if(this.isZero()||a.isZero())return goog.math.Integer.ZERO;if(this.isNegative())return a.isNegative()?this.negate().multiply(a.negate()):this.negate().multiply(a).negate();if(a.isNegative())return this.multiply(a.negate()).negate();if(this.lessThan(goog.math.Integer.TWO_PWR_24_)&&a.lessThan(goog.math.Integer.TWO_PWR_24_))return goog.math.Integer.fromNumber(this.toNumber()*a.toNumber());for(var b=this.bits_.length+a.bits_.length,c=[],d=0;d<2*b;d++)c[d]= +0;for(d=0;d>>16,g=this.getBits(d)&65535,h=a.getBits(e)>>>16,k=a.getBits(e)&65535;c[2*d+2*e]+=g*k;goog.math.Integer.carry16_(c,2*d+2*e);c[2*d+2*e+1]+=f*k;goog.math.Integer.carry16_(c,2*d+2*e+1);c[2*d+2*e+1]+=g*h;goog.math.Integer.carry16_(c,2*d+2*e+1);c[2*d+2*e+2]+=f*h;goog.math.Integer.carry16_(c,2*d+2*e+2)}for(d=0;d>>16,a[b]&=65535,b++}; +goog.math.Integer.prototype.slowDivide_=function(a){if(this.isNegative()||a.isNegative())throw Error("slowDivide_ only works with positive integers.");for(var b=goog.math.Integer.ONE,c=a;c.lessThanOrEqual(this);)b=b.shiftLeft(1),c=c.shiftLeft(1);var d=b.shiftRight(1),e=c.shiftRight(1);c=c.shiftRight(2);for(b=b.shiftRight(2);!c.isZero();){var f=e.add(c);f.lessThanOrEqual(this)&&(d=d.add(b),e=f);c=c.shiftRight(1);b=b.shiftRight(1)}a=this.subtract(d.multiply(a));return new goog.math.Integer.DivisionResult(d, +a)};goog.math.Integer.prototype.divide=function(a){return this.divideAndRemainder(a).quotient};goog.math.Integer.DivisionResult=function(a,b){this.quotient=a;this.remainder=b}; +goog.math.Integer.prototype.divideAndRemainder=function(a){if(a.isZero())throw Error("division by zero");if(this.isZero())return new goog.math.Integer.DivisionResult(goog.math.Integer.ZERO,goog.math.Integer.ZERO);if(this.isNegative())return a=this.negate().divideAndRemainder(a),new goog.math.Integer.DivisionResult(a.quotient.negate(),a.remainder.negate());if(a.isNegative())return a=this.divideAndRemainder(a.negate()),new goog.math.Integer.DivisionResult(a.quotient.negate(),a.remainder);if(30=e?1:Math.pow(2,e-48);for(var f=goog.math.Integer.fromNumber(d),g=f.multiply(a);g.isNegative()||g.greaterThan(c);)d-=e,f=goog.math.Integer.fromNumber(d),g=f.multiply(a);f.isZero()&&(f=goog.math.Integer.ONE);b=b.add(f);c=c.subtract(g)}return new goog.math.Integer.DivisionResult(b,c)};goog.math.Integer.prototype.modulo=function(a){return this.divideAndRemainder(a).remainder}; +goog.math.Integer.prototype.not=function(){for(var a=this.bits_.length,b=[],c=0;c>5;a%=32;for(var c=this.bits_.length+b+(0>>32-a:this.getBits(e-b);return new goog.math.Integer(d,this.sign_)};goog.math.Integer.prototype.shiftRight=function(a){var b=a>>5;a%=32;for(var c=this.bits_.length-b,d=[],e=0;e>>a|this.getBits(e+b+1)<<32-a:this.getBits(e+b);return new goog.math.Integer(d,this.sign_)};goog.string.StringBuffer=function(a,b){null!=a&&this.append.apply(this,arguments)};goog.string.StringBuffer.prototype.buffer_="";goog.string.StringBuffer.prototype.set=function(a){this.buffer_=""+a};goog.string.StringBuffer.prototype.append=function(a,b,c){this.buffer_+=String(a);if(null!=b)for(let a=1;a>21;return 0==a||-1==a&&!(0==this.low_&&-2097152==this.high_)}toString(a){a=a||10;if(2>a||36>2);var c=Math.pow(a,b),d=module$contents$goog$math$Long_Long.fromBits(c, +c/module$contents$goog$math$Long_TWO_PWR_32_DBL_);c=this.div(d);d=Math.abs(this.subtract(c.multiply(d)).toNumber());var e=10==a?""+d:d.toString(a);e.length>>0}getNumBitsAbs(){if(this.isNegative())return this.equals(module$contents$goog$math$Long_Long.getMinValue())?64:this.negate().getNumBitsAbs();for(var a= +0!=this.high_?this.high_:this.low_,b=31;0this.high_}isOdd(){return 1==(this.low_&1)}equals(a){return this.low_==a.low_&&this.high_==a.high_}notEquals(a){return!this.equals(a)}lessThan(a){return 0>this.compare(a)}lessThanOrEqual(a){return 0>=this.compare(a)}greaterThan(a){return 0a.getLowBitsUnsigned()?1:-1:this.high_>a.high_?1:-1}negate(){var a=~this.low_+1|0;return module$contents$goog$math$Long_Long.fromBits(a,~this.high_+!a|0)}add(a){var b=this.high_>>>16,c=this.high_&65535,d=this.low_>>>16,e=a.high_>>>16,f=a.high_&65535,g=a.low_>>>16;a=(this.low_&65535)+(a.low_&65535);g=(a>>>16)+(d+g);d=g>>>16;d+=c+f;b=(d>>>16)+(b+e)&65535;return module$contents$goog$math$Long_Long.fromBits((g&65535)<<16|a&65535,b<<16|d&65535)}subtract(a){return this.add(a.negate())}multiply(a){if(this.isZero())return this; +if(a.isZero())return a;var b=this.high_>>>16,c=this.high_&65535,d=this.low_>>>16,e=this.low_&65535,f=a.high_>>>16,g=a.high_&65535,h=a.low_>>>16;a=a.low_&65535;var k=e*a;var l=(k>>>16)+d*a;var m=l>>>16;l=(l&65535)+e*h;m+=l>>>16;m+=c*a;var n=m>>>16;m=(m&65535)+d*h;n+=m>>>16;m=(m&65535)+e*g;n=n+(m>>>16)+(b*a+c*h+d*g+e*f)&65535;return module$contents$goog$math$Long_Long.fromBits((l&65535)<<16|k&65535,n<<16|m&65535)}div(a){if(a.isZero())throw Error("division by zero");if(this.isNegative()){if(this.equals(module$contents$goog$math$Long_Long.getMinValue())){if(a.equals(module$contents$goog$math$Long_Long.getOne())|| +a.equals(module$contents$goog$math$Long_Long.getNegOne()))return module$contents$goog$math$Long_Long.getMinValue();if(a.equals(module$contents$goog$math$Long_Long.getMinValue()))return module$contents$goog$math$Long_Long.getOne();var b=this.shiftRight(1).div(a).shiftLeft(1);if(b.equals(module$contents$goog$math$Long_Long.getZero()))return a.isNegative()?module$contents$goog$math$Long_Long.getOne():module$contents$goog$math$Long_Long.getNegOne();var c=this.subtract(a.multiply(b));return b.add(c.div(a))}return a.isNegative()? +this.negate().div(a.negate()):this.negate().div(a).negate()}if(this.isZero())return module$contents$goog$math$Long_Long.getZero();if(a.isNegative())return a.equals(module$contents$goog$math$Long_Long.getMinValue())?module$contents$goog$math$Long_Long.getZero():this.div(a.negate()).negate();var d=module$contents$goog$math$Long_Long.getZero();for(c=this;c.greaterThanOrEqual(a);){b=Math.max(1,Math.floor(c.toNumber()/a.toNumber()));var e=Math.ceil(Math.log(b)/Math.LN2);e=48>=e?1:Math.pow(2,e-48);for(var f= +module$contents$goog$math$Long_Long.fromNumber(b),g=f.multiply(a);g.isNegative()||g.greaterThan(c);)b-=e,f=module$contents$goog$math$Long_Long.fromNumber(b),g=f.multiply(a);f.isZero()&&(f=module$contents$goog$math$Long_Long.getOne());d=d.add(f);c=c.subtract(g)}return d}modulo(a){return this.subtract(this.div(a).multiply(a))}not(){return module$contents$goog$math$Long_Long.fromBits(~this.low_,~this.high_)}and(a){return module$contents$goog$math$Long_Long.fromBits(this.low_&a.low_,this.high_&a.high_)}or(a){return module$contents$goog$math$Long_Long.fromBits(this.low_| +a.low_,this.high_|a.high_)}xor(a){return module$contents$goog$math$Long_Long.fromBits(this.low_^a.low_,this.high_^a.high_)}shiftLeft(a){a&=63;if(0==a)return this;var b=this.low_;return 32>a?module$contents$goog$math$Long_Long.fromBits(b<>>32-a):module$contents$goog$math$Long_Long.fromBits(0,b<a?module$contents$goog$math$Long_Long.fromBits(this.low_>>>a|b<<32-a,b>>a):module$contents$goog$math$Long_Long.fromBits(b>> +a-32,0<=b?0:-1)}shiftRightUnsigned(a){a&=63;if(0==a)return this;var b=this.high_;return 32>a?module$contents$goog$math$Long_Long.fromBits(this.low_>>>a|b<<32-a,b>>>a):32==a?module$contents$goog$math$Long_Long.fromBits(b,0):module$contents$goog$math$Long_Long.fromBits(b>>>a-32,0)}static fromInt(a){var b=a|0;goog.asserts.assert(a===b,"value should be a 32-bit integer");return-128<=b&&128>b?module$contents$goog$math$Long_getCachedIntValue_(b):new module$contents$goog$math$Long_Long(b,0>b?-1:0)}static fromNumber(a){return 0< +a?a>=module$contents$goog$math$Long_TWO_PWR_63_DBL_?module$contents$goog$math$Long_Long.getMaxValue():new module$contents$goog$math$Long_Long(a,a/module$contents$goog$math$Long_TWO_PWR_32_DBL_):0>a?a<=-module$contents$goog$math$Long_TWO_PWR_63_DBL_?module$contents$goog$math$Long_Long.getMinValue():(new module$contents$goog$math$Long_Long(-a,-a/module$contents$goog$math$Long_TWO_PWR_32_DBL_)).negate():module$contents$goog$math$Long_Long.getZero()}static fromBits(a,b){return new module$contents$goog$math$Long_Long(a, +b)}static fromString(a,b){if("-"==a.charAt(0))return module$contents$goog$math$Long_Long.fromString(a.substring(1),b).negate();var c=parseInt(a,b||10);if(c<=module$contents$goog$math$Long_MAX_SAFE_INTEGER_)return new module$contents$goog$math$Long_Long(c%module$contents$goog$math$Long_TWO_PWR_32_DBL_|0,c/module$contents$goog$math$Long_TWO_PWR_32_DBL_|0);if(0==a.length)throw Error("number format error: empty string");if(0<=a.indexOf("-"))throw Error('number format error: interior "-" character: '+ +a);b=b||10;if(2>b||36f?(f=module$contents$goog$math$Long_Long.fromNumber(Math.pow(b,f)),d=d.multiply(f).add(module$contents$goog$math$Long_Long.fromNumber(g))):(d=d.multiply(c),d=d.add(module$contents$goog$math$Long_Long.fromNumber(g)))}return d}static isStringInRange(a, +b){b=b||10;if(2>b||36a?-1:0)})} +const module$contents$goog$math$Long_MAX_VALUE_FOR_RADIX_=" 111111111111111111111111111111111111111111111111111111111111111 2021110011022210012102010021220101220221 13333333333333333333333333333333 1104332401304422434310311212 1540241003031030222122211 22341010611245052052300 777777777777777777777 67404283172107811827 9223372036854775807 1728002635214590697 41a792678515120367 10b269549075433c37 4340724c6c71dc7a7 160e2ad3246366807 7fffffffffffffff 33d3d8307b214008 16agh595df825fa7 ba643dci0ffeehh 5cbfjia3fh26ja7 2heiciiie82dh97 1adaibb21dckfa7 i6k448cf4192c2 acd772jnc9l0l7 64ie1focnn5g77 3igoecjbmca687 27c48l5b37oaop 1bk39f3ah3dmq7 q1se8f0m04isb hajppbc1fc207 bm03i95hia437 7vvvvvvvvvvvv 5hg4ck9jd4u37 3tdtk1v8j6tpp 2pijmikexrxp7 1y2p0ij32e8e7".split(" "),module$contents$goog$math$Long_MIN_VALUE_FOR_RADIX_= +" -1000000000000000000000000000000000000000000000000000000000000000 -2021110011022210012102010021220101220222 -20000000000000000000000000000000 -1104332401304422434310311213 -1540241003031030222122212 -22341010611245052052301 -1000000000000000000000 -67404283172107811828 -9223372036854775808 -1728002635214590698 -41a792678515120368 -10b269549075433c38 -4340724c6c71dc7a8 -160e2ad3246366808 -8000000000000000 -33d3d8307b214009 -16agh595df825fa8 -ba643dci0ffeehi -5cbfjia3fh26ja8 -2heiciiie82dh98 -1adaibb21dckfa8 -i6k448cf4192c3 -acd772jnc9l0l8 -64ie1focnn5g78 -3igoecjbmca688 -27c48l5b37oaoq -1bk39f3ah3dmq8 -q1se8f0m04isc -hajppbc1fc208 -bm03i95hia438 -8000000000000 -5hg4ck9jd4u38 -3tdtk1v8j6tpq -2pijmikexrxp8 -1y2p0ij32e8e8".split(" "), +module$contents$goog$math$Long_MAX_SAFE_INTEGER_=9007199254740991,module$contents$goog$math$Long_TWO_PWR_32_DBL_=4294967296,module$contents$goog$math$Long_TWO_PWR_63_DBL_=0x7fffffffffffffff,module$contents$goog$math$Long_ZERO_=module$contents$goog$math$Long_Long.fromBits(0,0),module$contents$goog$math$Long_ONE_=module$contents$goog$math$Long_Long.fromBits(1,0),module$contents$goog$math$Long_NEG_ONE_=module$contents$goog$math$Long_Long.fromBits(-1,-1),module$contents$goog$math$Long_MAX_VALUE_=module$contents$goog$math$Long_Long.fromBits(4294967295, +2147483647),module$contents$goog$math$Long_MIN_VALUE_=module$contents$goog$math$Long_Long.fromBits(0,2147483648),module$contents$goog$math$Long_TWO_PWR_24_=module$contents$goog$math$Long_Long.fromBits(16777216,0);var cljs={core:{}};cljs.core._STAR_clojurescript_version_STAR_="1.10.773";cljs.core._STAR_unchecked_if_STAR_=!1;cljs.core._STAR_unchecked_arrays_STAR_=!1;cljs.core._STAR_warn_on_infer_STAR_=!1;if("undefined"===typeof cljs||"undefined"===typeof cljs.core||"undefined"===typeof cljs.core.PROTOCOL_SENTINEL)cljs.core.PROTOCOL_SENTINEL={};cljs.core.MODULE_URIS=null;cljs.core.MODULE_INFOS=null;cljs.core._STAR_target_STAR_="default";cljs.core._STAR_global_STAR_="default";cljs.core._STAR_ns_STAR_=null; +cljs.core._STAR_out_STAR_=null;cljs.core._STAR_assert_STAR_=!0;if("undefined"===typeof cljs||"undefined"===typeof cljs.core||"undefined"===typeof cljs.core._STAR_print_fn_STAR_)cljs.core._STAR_print_fn_STAR_=null;cljs.core._STAR_exec_tap_fn_STAR_=function(a){return"undefined"!==typeof setTimeout?(a=setTimeout(a,0),cljs.core.truth_(a)?!0:a):!1};if("undefined"===typeof cljs||"undefined"===typeof cljs.core||"undefined"===typeof cljs.core._STAR_print_err_fn_STAR_)cljs.core._STAR_print_err_fn_STAR_=null; +cljs.core.set_print_fn_BANG_=function(a){return cljs.core._STAR_print_fn_STAR_=a};cljs.core.set_print_err_fn_BANG_=function(a){return cljs.core._STAR_print_err_fn_STAR_=a};cljs.core._STAR_flush_on_newline_STAR_=!0;cljs.core._STAR_print_newline_STAR_=!0;cljs.core._STAR_print_readably_STAR_=!0;cljs.core._STAR_print_meta_STAR_=!1;cljs.core._STAR_print_dup_STAR_=!1;cljs.core._STAR_print_namespace_maps_STAR_=!1;cljs.core._STAR_print_length_STAR_=null;cljs.core._STAR_print_level_STAR_=null; +cljs.core._STAR_print_fn_bodies_STAR_=!1;if("undefined"===typeof cljs||"undefined"===typeof cljs.core||"undefined"===typeof cljs.core._STAR_loaded_libs_STAR_)cljs.core._STAR_loaded_libs_STAR_=null; +cljs.core.pr_opts=function(){return new cljs.core.PersistentArrayMap(null,5,[new cljs.core.Keyword(null,"flush-on-newline","flush-on-newline",-151457939),cljs.core._STAR_flush_on_newline_STAR_,new cljs.core.Keyword(null,"readably","readably",1129599760),cljs.core._STAR_print_readably_STAR_,new cljs.core.Keyword(null,"meta","meta",1499536964),cljs.core._STAR_print_meta_STAR_,new cljs.core.Keyword(null,"dup","dup",556298533),cljs.core._STAR_print_dup_STAR_,new cljs.core.Keyword(null,"print-length", +"print-length",1931866356),cljs.core._STAR_print_length_STAR_],null)};cljs.core.enable_console_print_BANG_=function(){cljs.core._STAR_print_newline_STAR_=!1;cljs.core.set_print_fn_BANG_(function(){return console.log.apply(console,goog.array.clone(arguments))});cljs.core.set_print_err_fn_BANG_(function(){return console.error.apply(console,goog.array.clone(arguments))});return null};cljs.core.truth_=function(a){return null!=a&&!1!==a};cljs.core.not_native=null; +cljs.core.identical_QMARK_=function(a,b){return a===b};cljs.core.nil_QMARK_=function(a){return null==a};cljs.core.array_QMARK_=function(a){return"nodejs"===cljs.core._STAR_target_STAR_?Array.isArray(a):a instanceof Array};cljs.core.number_QMARK_=function(a){return"number"===typeof a};cljs.core.not=function(a){return null==a?!0:!1===a?!0:!1};cljs.core.some_QMARK_=function(a){return null!=a};cljs.core.object_QMARK_=function(a){return null!=a?a.constructor===Object:!1};cljs.core.string_QMARK_=function(a){return goog.isString(a)}; +cljs.core.char_QMARK_=function(a){return"string"===typeof a&&1===a.length};cljs.core.any_QMARK_=function(a){return!0};cljs.core.native_satisfies_QMARK_=function(a,b){return a[goog.typeOf(null==b?null:b)]?!0:a._?!0:!1};cljs.core.is_proto_=function(a){return a.constructor.prototype===a};cljs.core._STAR_main_cli_fn_STAR_=null;cljs.core._STAR_command_line_args_STAR_=null;cljs.core.type=function(a){return null==a?null:a.constructor}; +cljs.core.missing_protocol=function(a,b){var c=cljs.core.type(b);c=cljs.core.truth_(cljs.core.truth_(c)?c.cljs$lang$type:c)?c.cljs$lang$ctorStr:goog.typeOf(b);return Error(["No protocol method ",a," defined for type ",c,": ",b].join(""))};cljs.core.type__GT_str=function(a){var b=a.cljs$lang$ctorStr;return cljs.core.truth_(b)?b:cljs.core.str.cljs$core$IFn$_invoke$arity$1(a)};cljs.core.load_file=function(a){return cljs.core.truth_(COMPILED)?null:goog.nodeGlobalRequire(a)}; +"undefined"!==typeof Symbol&&"function"===goog.typeOf(Symbol)?cljs.core.ITER_SYMBOL=Symbol.iterator:cljs.core.ITER_SYMBOL="@@iterator";cljs.core.CHAR_MAP={"]":"_RBRACK_","'":"_SINGLEQUOTE_","\x3d":"_EQ_",'"':"_DOUBLEQUOTE_","!":"_BANG_","*":"_STAR_","%":"_PERCENT_","|":"_BAR_","~":"_TILDE_","/":"_SLASH_","\\":"_BSLASH_","-":"_","?":"_QMARK_","\x26":"_AMPERSAND_",":":"_COLON_","\x3c":"_LT_","{":"_LBRACE_","}":"_RBRACE_","[":"_LBRACK_","#":"_SHARP_","^":"_CARET_","+":"_PLUS_","@":"_CIRCA_","\x3e":"_GT_"}; +cljs.core.DEMUNGE_MAP={_RBRACE_:"}",_COLON_:":",_BANG_:"!",_QMARK_:"?",_BSLASH_:"\\\\",_SLASH_:"/",_PERCENT_:"%",_PLUS_:"+",_SHARP_:"#",_LBRACE_:"{",_BAR_:"|",_LBRACK_:"[",_EQ_:"\x3d",_:"-",_TILDE_:"~",_RBRACK_:"]",_GT_:"\x3e",_SINGLEQUOTE_:"'",_CIRCA_:"@",_AMPERSAND_:"\x26",_DOUBLEQUOTE_:'\\"',_CARET_:"^",_LT_:"\x3c",_STAR_:"*"};cljs.core.DEMUNGE_PATTERN=null; +cljs.core.system_time=function(){if("undefined"!==typeof performance&&null!=performance.now)return performance.now();if("undefined"!==typeof process&&null!=process.hrtime){var a=process.hrtime();return(1E9*a[0]+a[1])/1E6}return(new Date).getTime()}; +cljs.core.make_array=function(a){switch(arguments.length){case 1:return cljs.core.make_array.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core.make_array.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:for(var b=[],c=arguments.length,d=0;;)if(db)throw Error("Assert failed: (not (neg? idx))");if(!(bb)throw Error("Assert failed: (not (neg? idx))");if(!(bb)throw Error("Assert failed: (not (neg? idx))");if(!(bb)throw Error("Assert failed: (not (neg? idx))");if(!(b>>-b};"undefined"!==typeof Math&&"undefined"!==typeof Math.imul&&0!==Math.imul(4294967295,5)?cljs.core.imul=function(a,b){return Math.imul(a,b)}:cljs.core.imul=function(a,b){var c=a&65535,d=b&65535;return c*d+((a>>>16&65535)*d+c*(b>>>16&65535)<<16>>>0)|0};cljs.core.m3_seed=0;cljs.core.m3_C1=-862048943;cljs.core.m3_C2=461845907; +cljs.core.m3_mix_K1=function(a){return cljs.core.imul(cljs.core.int_rotate_left(cljs.core.imul(a|0,cljs.core.m3_C1),15),cljs.core.m3_C2)};cljs.core.m3_mix_H1=function(a,b){return cljs.core.imul(cljs.core.int_rotate_left((a|0)^(b|0),13),5)+-430675100|0};cljs.core.m3_fmix=function(a,b){a=(a|0)^b;a=cljs.core.imul(a^a>>>16,-2048144789);a=cljs.core.imul(a^a>>>13,-1028477387);return a^a>>>16}; +cljs.core.m3_hash_int=function(a){if(0===a)return a;a=cljs.core.m3_mix_K1(a);a=cljs.core.m3_mix_H1(cljs.core.m3_seed,a);return cljs.core.m3_fmix(a,4)};cljs.core.m3_hash_unencoded_chars=function(a){a:{var b=1;for(var c=cljs.core.m3_seed;;)if(b>2)};cljs.core.instance_QMARK_=function(a,b){return b instanceof a};cljs.core.symbol_QMARK_=function(a){return a instanceof cljs.core.Symbol};cljs.core.hash_symbol=function(a){return cljs.core.hash_combine(cljs.core.m3_hash_unencoded_chars(a.name),cljs.core.hash_string(a.ns))}; +cljs.core.compare_symbols=function(a,b){if(a.str===b.str)return 0;if(cljs.core.truth_(cljs.core.not(a.ns)?b.ns:!1))return-1;if(cljs.core.truth_(a.ns)){if(cljs.core.not(b.ns))return 1;var c=goog.array.defaultCompare(a.ns,b.ns);return 0===c?goog.array.defaultCompare(a.name,b.name):c}return goog.array.defaultCompare(a.name,b.name)}; +cljs.core.Symbol=function(a,b,c,d,e){this.ns=a;this.name=b;this.str=c;this._hash=d;this._meta=e;this.cljs$lang$protocol_mask$partition0$=2154168321;this.cljs$lang$protocol_mask$partition1$=4096};cljs.core.Symbol.prototype.toString=function(){return this.str};cljs.core.Symbol.prototype.equiv=function(a){return this.cljs$core$IEquiv$_equiv$arity$2(null,a)};cljs.core.Symbol.prototype.cljs$core$IEquiv$_equiv$arity$2=function(a,b){return b instanceof cljs.core.Symbol?this.str===b.str:!1}; +cljs.core.Symbol.prototype.call=function(){var a=null,b=function(a,b){return cljs.core.get.cljs$core$IFn$_invoke$arity$2(b,this)},c=function(a,b,c){return cljs.core.get.cljs$core$IFn$_invoke$arity$3(b,this,c)};a=function(a,e,f){switch(arguments.length){case 2:return b.call(this,a,e);case 3:return c.call(this,a,e,f)}throw Error("Invalid arity: "+(arguments.length-1));};a.cljs$core$IFn$_invoke$arity$2=b;a.cljs$core$IFn$_invoke$arity$3=c;return a}(); +cljs.core.Symbol.prototype.apply=function(a,b){return this.call.apply(this,[this].concat(cljs.core.aclone(b)))};cljs.core.Symbol.prototype.cljs$core$IFn$_invoke$arity$1=function(a){return cljs.core.get.cljs$core$IFn$_invoke$arity$2(a,this)};cljs.core.Symbol.prototype.cljs$core$IFn$_invoke$arity$2=function(a,b){return cljs.core.get.cljs$core$IFn$_invoke$arity$3(a,this,b)};cljs.core.Symbol.prototype.cljs$core$IMeta$_meta$arity$1=function(a){return this._meta}; +cljs.core.Symbol.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return new cljs.core.Symbol(this.ns,this.name,this.str,this._hash,b)};cljs.core.Symbol.prototype.cljs$core$IHash$_hash$arity$1=function(a){a=this._hash;return null!=a?a:this._hash=a=cljs.core.hash_symbol(this)};cljs.core.Symbol.prototype.cljs$core$INamed$_name$arity$1=function(a){return this.name};cljs.core.Symbol.prototype.cljs$core$INamed$_namespace$arity$1=function(a){return this.ns}; +cljs.core.Symbol.prototype.cljs$core$IPrintWithWriter$_pr_writer$arity$3=function(a,b,c){return cljs.core._write(b,this.str)}; +cljs.core.Symbol.getBasis=function(){return new cljs.core.PersistentVector(null,5,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"ns","ns",2082130287,null),new cljs.core.Symbol(null,"name","name",-810760592,null),new cljs.core.Symbol(null,"str","str",-1564826950,null),cljs.core.with_meta(new cljs.core.Symbol(null,"_hash","_hash",-2130838312,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null)),new cljs.core.Symbol(null, +"_meta","_meta",-1716892533,null)],null)};cljs.core.Symbol.cljs$lang$type=!0;cljs.core.Symbol.cljs$lang$ctorStr="cljs.core/Symbol";cljs.core.Symbol.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/Symbol")};cljs.core.__GT_Symbol=function(a,b,c,d,e){return new cljs.core.Symbol(a,b,c,d,e)};cljs.core.var_QMARK_=function(a){return a instanceof cljs.core.Var}; +cljs.core.symbol=function(a){switch(arguments.length){case 1:return cljs.core.symbol.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core.symbol.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.core.symbol.cljs$core$IFn$_invoke$arity$1=function(a){for(;;){if(a instanceof cljs.core.Symbol)return a;if("string"===typeof a){var b=a.indexOf("/");return 1>b?cljs.core.symbol.cljs$core$IFn$_invoke$arity$2(null,a):cljs.core.symbol.cljs$core$IFn$_invoke$arity$2(a.substring(0,b),a.substring(b+1,a.length))}if(cljs.core.var_QMARK_(a))return a.sym;if(a instanceof cljs.core.Keyword)a=a.fqn;else throw Error("no conversion to symbol");}}; +cljs.core.symbol.cljs$core$IFn$_invoke$arity$2=function(a,b){var c=null!=a?[cljs.core.str.cljs$core$IFn$_invoke$arity$1(a),"/",cljs.core.str.cljs$core$IFn$_invoke$arity$1(b)].join(""):b;return new cljs.core.Symbol(a,b,c,null,null)};cljs.core.symbol.cljs$lang$maxFixedArity=2;cljs.core.Var=function(a,b,c){this.val=a;this.sym=b;this._meta=c;this.cljs$lang$protocol_mask$partition0$=6717441;this.cljs$lang$protocol_mask$partition1$=0}; +cljs.core.Var.prototype.isMacro=function(){return(this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null)).cljs$lang$macro};cljs.core.Var.prototype.toString=function(){return["#'",cljs.core.str.cljs$core$IFn$_invoke$arity$1(this.sym)].join("")};cljs.core.Var.prototype.cljs$core$IDeref$_deref$arity$1=function(a){return this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null)}; +cljs.core.Var.prototype.cljs$core$IMeta$_meta$arity$1=function(a){return this._meta};cljs.core.Var.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return new cljs.core.Var(this.val,this.sym,b)};cljs.core.Var.prototype.cljs$core$IEquiv$_equiv$arity$2=function(a,b){return b instanceof cljs.core.Var?cljs.core._EQ_.cljs$core$IFn$_invoke$arity$2(this.sym,b.sym):!1};cljs.core.Var.prototype.cljs$core$IHash$_hash$arity$1=function(a){return cljs.core.hash_symbol(this.sym)}; +cljs.core.Var.prototype.cljs$core$Fn$=cljs.core.PROTOCOL_SENTINEL; +cljs.core.Var.prototype.call=function(){var a=null,b=function(a){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$0?a.cljs$core$IFn$_invoke$arity$0():a.call(null)},c=function(a,b){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$1?a.cljs$core$IFn$_invoke$arity$1(b):a.call(null,b)},d=function(a,b,c){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0? +a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$2?a.cljs$core$IFn$_invoke$arity$2(b,c):a.call(null,b,c)},e=function(a,b,c,d){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$3?a.cljs$core$IFn$_invoke$arity$3(b,c,d):a.call(null,b,c,d)},f=function(a,b,c,d,e){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$4? +a.cljs$core$IFn$_invoke$arity$4(b,c,d,e):a.call(null,b,c,d,e)},g=function(a,b,c,d,e,f){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$5?a.cljs$core$IFn$_invoke$arity$5(b,c,d,e,f):a.call(null,b,c,d,e,f)},h=function(a,b,c,d,e,f,g){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$6?a.cljs$core$IFn$_invoke$arity$6(b,c,d,e,f,g): +a.call(null,b,c,d,e,f,g)},k=function(a,b,c,d,e,f,g,h){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$7?a.cljs$core$IFn$_invoke$arity$7(b,c,d,e,f,g,h):a.call(null,b,c,d,e,f,g,h)},l=function(a,b,c,d,e,f,g,h,k){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$8?a.cljs$core$IFn$_invoke$arity$8(b,c,d,e,f,g,h,k):a.call(null,b,c, +d,e,f,g,h,k)},m=function(a,b,c,d,e,f,g,h,k,l){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$9?a.cljs$core$IFn$_invoke$arity$9(b,c,d,e,f,g,h,k,l):a.call(null,b,c,d,e,f,g,h,k,l)},n=function(a,b,c,d,e,f,g,h,k,l,m){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$10?a.cljs$core$IFn$_invoke$arity$10(b,c,d,e,f,g,h,k,l,m):a.call(null, +b,c,d,e,f,g,h,k,l,m)},p=function(a,b,c,d,e,f,g,h,k,l,m,n){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$11?a.cljs$core$IFn$_invoke$arity$11(b,c,d,e,f,g,h,k,l,m,n):a.call(null,b,c,d,e,f,g,h,k,l,m,n)},q=function(a,b,c,d,e,f,g,h,k,l,m,n,p){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$12?a.cljs$core$IFn$_invoke$arity$12(b, +c,d,e,f,g,h,k,l,m,n,p):a.call(null,b,c,d,e,f,g,h,k,l,m,n,p)},r=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$13?a.cljs$core$IFn$_invoke$arity$13(b,c,d,e,f,g,h,k,l,m,n,p,q):a.call(null,b,c,d,e,f,g,h,k,l,m,n,p,q)},t=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$14? +a.cljs$core$IFn$_invoke$arity$14(b,c,d,e,f,g,h,k,l,m,n,p,q,r):a.call(null,b,c,d,e,f,g,h,k,l,m,n,p,q,r)},u=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$15?a.cljs$core$IFn$_invoke$arity$15(b,c,d,e,f,g,h,k,l,m,n,p,q,r,t):a.call(null,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t)},v=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0(): +a.val.call(null);return a.cljs$core$IFn$_invoke$arity$16?a.cljs$core$IFn$_invoke$arity$16(b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u):a.call(null,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u)},w=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$17?a.cljs$core$IFn$_invoke$arity$17(b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v):a.call(null,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v)},x=function(a,b,c,d,e,f,g,h,k, +l,m,n,p,q,r,t,u,v,w){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$18?a.cljs$core$IFn$_invoke$arity$18(b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w):a.call(null,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w)},A=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$19?a.cljs$core$IFn$_invoke$arity$19(b, +c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x):a.call(null,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x)},K=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x,A){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$20?a.cljs$core$IFn$_invoke$arity$20(b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x,A):a.call(null,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x,A)},S=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x,A,K){a=this;return cljs.core.apply.cljs$core$IFn$_invoke$arity$variadic(a.val.cljs$core$IFn$_invoke$arity$0? +a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null),b,c,d,e,cljs.core.prim_seq.cljs$core$IFn$_invoke$arity$2([f,g,h,k,l,m,n,p,q,r,t,u,v,w,x,A,K],0))};a=function(a,z,B,C,D,E,F,G,H,I,J,L,M,N,O,P,Q,R,T,U,V,W){switch(arguments.length){case 1:return b.call(this,a);case 2:return c.call(this,a,z);case 3:return d.call(this,a,z,B);case 4:return e.call(this,a,z,B,C);case 5:return f.call(this,a,z,B,C,D);case 6:return g.call(this,a,z,B,C,D,E);case 7:return h.call(this,a,z,B,C,D,E,F);case 8:return k.call(this, +a,z,B,C,D,E,F,G);case 9:return l.call(this,a,z,B,C,D,E,F,G,H);case 10:return m.call(this,a,z,B,C,D,E,F,G,H,I);case 11:return n.call(this,a,z,B,C,D,E,F,G,H,I,J);case 12:return p.call(this,a,z,B,C,D,E,F,G,H,I,J,L);case 13:return q.call(this,a,z,B,C,D,E,F,G,H,I,J,L,M);case 14:return r.call(this,a,z,B,C,D,E,F,G,H,I,J,L,M,N);case 15:return t.call(this,a,z,B,C,D,E,F,G,H,I,J,L,M,N,O);case 16:return u.call(this,a,z,B,C,D,E,F,G,H,I,J,L,M,N,O,P);case 17:return v.call(this,a,z,B,C,D,E,F,G,H,I,J,L,M,N,O,P,Q); +case 18:return w.call(this,a,z,B,C,D,E,F,G,H,I,J,L,M,N,O,P,Q,R);case 19:return x.call(this,a,z,B,C,D,E,F,G,H,I,J,L,M,N,O,P,Q,R,T);case 20:return A.call(this,a,z,B,C,D,E,F,G,H,I,J,L,M,N,O,P,Q,R,T,U);case 21:return K.call(this,a,z,B,C,D,E,F,G,H,I,J,L,M,N,O,P,Q,R,T,U,V);case 22:return S.call(this,a,z,B,C,D,E,F,G,H,I,J,L,M,N,O,P,Q,R,T,U,V,W)}throw Error("Invalid arity: "+(arguments.length-1));};a.cljs$core$IFn$_invoke$arity$1=b;a.cljs$core$IFn$_invoke$arity$2=c;a.cljs$core$IFn$_invoke$arity$3=d;a.cljs$core$IFn$_invoke$arity$4= +e;a.cljs$core$IFn$_invoke$arity$5=f;a.cljs$core$IFn$_invoke$arity$6=g;a.cljs$core$IFn$_invoke$arity$7=h;a.cljs$core$IFn$_invoke$arity$8=k;a.cljs$core$IFn$_invoke$arity$9=l;a.cljs$core$IFn$_invoke$arity$10=m;a.cljs$core$IFn$_invoke$arity$11=n;a.cljs$core$IFn$_invoke$arity$12=p;a.cljs$core$IFn$_invoke$arity$13=q;a.cljs$core$IFn$_invoke$arity$14=r;a.cljs$core$IFn$_invoke$arity$15=t;a.cljs$core$IFn$_invoke$arity$16=u;a.cljs$core$IFn$_invoke$arity$17=v;a.cljs$core$IFn$_invoke$arity$18=w;a.cljs$core$IFn$_invoke$arity$19= +x;a.cljs$core$IFn$_invoke$arity$20=A;a.cljs$core$IFn$_invoke$arity$21=K;a.cljs$core$IFn$_invoke$arity$22=S;return a}();cljs.core.Var.prototype.apply=function(a,b){return this.call.apply(this,[this].concat(cljs.core.aclone(b)))};cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$0=function(){var a=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return a.cljs$core$IFn$_invoke$arity$0?a.cljs$core$IFn$_invoke$arity$0():a.call(null)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$1=function(a){var b=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return b.cljs$core$IFn$_invoke$arity$1?b.cljs$core$IFn$_invoke$arity$1(a):b.call(null,a)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$2=function(a,b){var c=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return c.cljs$core$IFn$_invoke$arity$2?c.cljs$core$IFn$_invoke$arity$2(a,b):c.call(null,a,b)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$3=function(a,b,c){var d=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return d.cljs$core$IFn$_invoke$arity$3?d.cljs$core$IFn$_invoke$arity$3(a,b,c):d.call(null,a,b,c)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$4=function(a,b,c,d){var e=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return e.cljs$core$IFn$_invoke$arity$4?e.cljs$core$IFn$_invoke$arity$4(a,b,c,d):e.call(null,a,b,c,d)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$5=function(a,b,c,d,e){var f=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return f.cljs$core$IFn$_invoke$arity$5?f.cljs$core$IFn$_invoke$arity$5(a,b,c,d,e):f.call(null,a,b,c,d,e)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$6=function(a,b,c,d,e,f){var g=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return g.cljs$core$IFn$_invoke$arity$6?g.cljs$core$IFn$_invoke$arity$6(a,b,c,d,e,f):g.call(null,a,b,c,d,e,f)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$7=function(a,b,c,d,e,f,g){var h=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return h.cljs$core$IFn$_invoke$arity$7?h.cljs$core$IFn$_invoke$arity$7(a,b,c,d,e,f,g):h.call(null,a,b,c,d,e,f,g)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$8=function(a,b,c,d,e,f,g,h){var k=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return k.cljs$core$IFn$_invoke$arity$8?k.cljs$core$IFn$_invoke$arity$8(a,b,c,d,e,f,g,h):k.call(null,a,b,c,d,e,f,g,h)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$9=function(a,b,c,d,e,f,g,h,k){var l=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return l.cljs$core$IFn$_invoke$arity$9?l.cljs$core$IFn$_invoke$arity$9(a,b,c,d,e,f,g,h,k):l.call(null,a,b,c,d,e,f,g,h,k)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$10=function(a,b,c,d,e,f,g,h,k,l){var m=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return m.cljs$core$IFn$_invoke$arity$10?m.cljs$core$IFn$_invoke$arity$10(a,b,c,d,e,f,g,h,k,l):m.call(null,a,b,c,d,e,f,g,h,k,l)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$11=function(a,b,c,d,e,f,g,h,k,l,m){var n=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return n.cljs$core$IFn$_invoke$arity$11?n.cljs$core$IFn$_invoke$arity$11(a,b,c,d,e,f,g,h,k,l,m):n.call(null,a,b,c,d,e,f,g,h,k,l,m)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$12=function(a,b,c,d,e,f,g,h,k,l,m,n){var p=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return p.cljs$core$IFn$_invoke$arity$12?p.cljs$core$IFn$_invoke$arity$12(a,b,c,d,e,f,g,h,k,l,m,n):p.call(null,a,b,c,d,e,f,g,h,k,l,m,n)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$13=function(a,b,c,d,e,f,g,h,k,l,m,n,p){var q=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return q.cljs$core$IFn$_invoke$arity$13?q.cljs$core$IFn$_invoke$arity$13(a,b,c,d,e,f,g,h,k,l,m,n,p):q.call(null,a,b,c,d,e,f,g,h,k,l,m,n,p)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$14=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q){var r=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return r.cljs$core$IFn$_invoke$arity$14?r.cljs$core$IFn$_invoke$arity$14(a,b,c,d,e,f,g,h,k,l,m,n,p,q):r.call(null,a,b,c,d,e,f,g,h,k,l,m,n,p,q)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$15=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r){var t=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return t.cljs$core$IFn$_invoke$arity$15?t.cljs$core$IFn$_invoke$arity$15(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r):t.call(null,a,b,c,d,e,f,g,h,k,l,m,n,p,q,r)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$16=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t){var u=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return u.cljs$core$IFn$_invoke$arity$16?u.cljs$core$IFn$_invoke$arity$16(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t):u.call(null,a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$17=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u){var v=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return v.cljs$core$IFn$_invoke$arity$17?v.cljs$core$IFn$_invoke$arity$17(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u):v.call(null,a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$18=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v){var w=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return w.cljs$core$IFn$_invoke$arity$18?w.cljs$core$IFn$_invoke$arity$18(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v):w.call(null,a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$19=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w){var x=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return x.cljs$core$IFn$_invoke$arity$19?x.cljs$core$IFn$_invoke$arity$19(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w):x.call(null,a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$20=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x){var A=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return A.cljs$core$IFn$_invoke$arity$20?A.cljs$core$IFn$_invoke$arity$20(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x):A.call(null,a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$21=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x,A){return cljs.core.apply.cljs$core$IFn$_invoke$arity$variadic(this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null),a,b,c,d,cljs.core.prim_seq.cljs$core$IFn$_invoke$arity$2([e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x,A],0))}; +cljs.core.Var.getBasis=function(){return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"val","val",1769233139,null),new cljs.core.Symbol(null,"sym","sym",195671222,null),new cljs.core.Symbol(null,"_meta","_meta",-1716892533,null)],null)};cljs.core.Var.cljs$lang$type=!0;cljs.core.Var.cljs$lang$ctorStr="cljs.core/Var";cljs.core.Var.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/Var")}; +cljs.core.__GT_Var=function(a,b,c){return new cljs.core.Var(a,b,c)};cljs.core.iterable_QMARK_=function(a){return null!=a?a.cljs$lang$protocol_mask$partition1$&131072||cljs.core.PROTOCOL_SENTINEL===a.cljs$core$IIterable$?!0:a.cljs$lang$protocol_mask$partition1$?!1:cljs.core.native_satisfies_QMARK_(cljs.core.IIterable,a):cljs.core.native_satisfies_QMARK_(cljs.core.IIterable,a)};cljs.core.js_iterable_QMARK_=function(a){return null!=a&&null!=a[cljs.core.ITER_SYMBOL]};cljs.core.clone=function(a){return cljs.core._clone(a)}; +cljs.core.cloneable_QMARK_=function(a){return null!=a?a.cljs$lang$protocol_mask$partition1$&8192||cljs.core.PROTOCOL_SENTINEL===a.cljs$core$ICloneable$?!0:a.cljs$lang$protocol_mask$partition1$?!1:cljs.core.native_satisfies_QMARK_(cljs.core.ICloneable,a):cljs.core.native_satisfies_QMARK_(cljs.core.ICloneable,a)}; +cljs.core.seq=function(a){if(null==a)return null;if(null!=a&&(a.cljs$lang$protocol_mask$partition0$&8388608||cljs.core.PROTOCOL_SENTINEL===a.cljs$core$ISeqable$))return a.cljs$core$ISeqable$_seq$arity$1(null);if(cljs.core.array_QMARK_(a)||"string"===typeof a)return 0===a.length?null:new cljs.core.IndexedSeq(a,0,null);if(cljs.core.js_iterable_QMARK_(a))return cljs.core.es6_iterator_seq(goog.object.get(a,cljs.core.ITER_SYMBOL).call(a));if(cljs.core.native_satisfies_QMARK_(cljs.core.ISeqable,a))return cljs.core._seq(a); +throw Error([cljs.core.str.cljs$core$IFn$_invoke$arity$1(a)," is not ISeqable"].join(""));};cljs.core.first=function(a){if(null==a)return null;if(null!=a&&(a.cljs$lang$protocol_mask$partition0$&64||cljs.core.PROTOCOL_SENTINEL===a.cljs$core$ISeq$))return a.cljs$core$ISeq$_first$arity$1(null);a=cljs.core.seq(a);return null==a?null:cljs.core._first(a)}; +cljs.core.rest=function(a){return null!=a?null!=a&&(a.cljs$lang$protocol_mask$partition0$&64||cljs.core.PROTOCOL_SENTINEL===a.cljs$core$ISeq$)?a.cljs$core$ISeq$_rest$arity$1(null):(a=cljs.core.seq(a))?a.cljs$core$ISeq$_rest$arity$1(null):cljs.core.List.EMPTY:cljs.core.List.EMPTY};cljs.core.next=function(a){return null==a?null:null!=a&&(a.cljs$lang$protocol_mask$partition0$&128||cljs.core.PROTOCOL_SENTINEL===a.cljs$core$INext$)?a.cljs$core$INext$_next$arity$1(null):cljs.core.seq(cljs.core.rest(a))}; +cljs.core._EQ_=function(a){switch(arguments.length){case 1:return cljs.core._EQ_.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core._EQ_.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:for(var b=[],c=arguments.length,d=0;;)if(d=d)return-1;!(0c&&(c+=d,c=0>c?0:c);for(;;)if(cc?d+c:c;for(;;)if(0<=c){if(cljs.core._EQ_.cljs$core$IFn$_invoke$arity$2(cljs.core.nth.cljs$core$IFn$_invoke$arity$2(a,c),b))return c;--c}else return-1};cljs.core._lastIndexOf.cljs$lang$maxFixedArity=3; +cljs.core.IndexedSeqIterator=function(a,b){this.arr=a;this.i=b};cljs.core.IndexedSeqIterator.prototype.hasNext=function(){return this.ia?0:a};cljs.core.IndexedSeq.prototype.cljs$core$IReversible$_rseq$arity$1=function(a){a=this.cljs$core$ICounted$_count$arity$1(null);return 0b)throw Error("Index out of bounds");return cljs.core.linear_traversal_nth.cljs$core$IFn$_invoke$arity$2(a,b)}if(cljs.core.native_satisfies_QMARK_(cljs.core.IIndexed,a))return cljs.core._nth.cljs$core$IFn$_invoke$arity$2(a,b);throw Error(["nth not supported on this type ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(cljs.core.type__GT_str(cljs.core.type(a)))].join("")); +}; +cljs.core.nth.cljs$core$IFn$_invoke$arity$3=function(a,b,c){if("number"!==typeof b)throw Error("Index argument to nth must be a number.");if(null==a)return c;if(null!=a&&(a.cljs$lang$protocol_mask$partition0$&16||cljs.core.PROTOCOL_SENTINEL===a.cljs$core$IIndexed$))return a.cljs$core$IIndexed$_nth$arity$3(null,b,c);if(cljs.core.array_QMARK_(a))return-1b?c:cljs.core.linear_traversal_nth.cljs$core$IFn$_invoke$arity$3(a,b,c);if(cljs.core.native_satisfies_QMARK_(cljs.core.IIndexed,a))return cljs.core._nth.cljs$core$IFn$_invoke$arity$3(a,b,c);throw Error(["nth not supported on this type ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(cljs.core.type__GT_str(cljs.core.type(a)))].join(""));};cljs.core.nth.cljs$lang$maxFixedArity=3; +cljs.core.nthrest=function(a,b){for(var c=a;;)if(a=0a:a instanceof goog.math.Integer?a.isNegative():a instanceof module$contents$goog$math$Long_Long?a.isNegative():!1};cljs.core.nat_int_QMARK_=function(a){return cljs.core.integer_QMARK_(a)?!(0>a):a instanceof goog.math.Integer?cljs.core.not(a.isNegative()):a instanceof module$contents$goog$math$Long_Long?cljs.core.not(a.isNegative()):!1};cljs.core.float_QMARK_=function(a){return"number"===typeof a}; +cljs.core.double_QMARK_=function(a){return"number"===typeof a};cljs.core.infinite_QMARK_=function(a){return a===Number.POSITIVE_INFINITY||a===Number.NEGATIVE_INFINITY};cljs.core.contains_QMARK_=function(a,b){return cljs.core.get.cljs$core$IFn$_invoke$arity$3(a,b,cljs.core.lookup_sentinel)===cljs.core.lookup_sentinel?!1:!0}; +cljs.core.find=function(a,b){return cljs.core.ifind_QMARK_(a)?cljs.core._find(a,b):null!=a&&cljs.core.associative_QMARK_(a)&&cljs.core.contains_QMARK_(a,b)?new cljs.core.MapEntry(b,cljs.core.get.cljs$core$IFn$_invoke$arity$2(a,b),null):null}; +cljs.core.distinct_QMARK_=function(a){switch(arguments.length){case 1:return cljs.core.distinct_QMARK_.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core.distinct_QMARK_.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:for(var b=[],c=arguments.length,d=0;;)if(dd?1:0===c?0:cljs.core.compare_indexed.cljs$core$IFn$_invoke$arity$4(a,b,c,0)};cljs.core.compare_indexed.cljs$core$IFn$_invoke$arity$4=function(a,b,c,d){for(;;){var e=cljs.core.compare(cljs.core.nth.cljs$core$IFn$_invoke$arity$2(a,d),cljs.core.nth.cljs$core$IFn$_invoke$arity$2(b,d));if(0===e&&d+1b};cljs.core._GT_.cljs$core$IFn$_invoke$arity$variadic=function(a,b,c){for(;;)if(a>b)if(cljs.core.next(c))a=b,b=cljs.core.first(c),c=cljs.core.next(c);else return b>cljs.core.first(c);else return!1};cljs.core._GT_.cljs$lang$applyTo=function(a){var b=cljs.core.first(a),c=cljs.core.next(a);a=cljs.core.first(c);c=cljs.core.next(c);return this.cljs$core$IFn$_invoke$arity$variadic(b,a,c)};cljs.core._GT_.cljs$lang$maxFixedArity=2; +cljs.core._GT__EQ_=function(a){switch(arguments.length){case 1:return cljs.core._GT__EQ_.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core._GT__EQ_.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:for(var b=[],c=arguments.length,d=0;;)if(d=b};cljs.core._GT__EQ_.cljs$core$IFn$_invoke$arity$variadic=function(a,b,c){for(;;)if(a>=b)if(cljs.core.next(c))a=b,b=cljs.core.first(c),c=cljs.core.next(c);else return b>=cljs.core.first(c);else return!1}; +cljs.core._GT__EQ_.cljs$lang$applyTo=function(a){var b=cljs.core.first(a),c=cljs.core.next(a);a=cljs.core.first(c);c=cljs.core.next(c);return this.cljs$core$IFn$_invoke$arity$variadic(b,a,c)};cljs.core._GT__EQ_.cljs$lang$maxFixedArity=2;cljs.core.dec=function(a){return a-1}; +cljs.core.max=function(a){switch(arguments.length){case 1:return cljs.core.max.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core.max.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:for(var b=[],c=arguments.length,d=0;;)if(db?a:b};cljs.core.max.cljs$core$IFn$_invoke$arity$variadic=function(a,b,c){return cljs.core.reduce.cljs$core$IFn$_invoke$arity$3(cljs.core.max,a>b?a:b,c)};cljs.core.max.cljs$lang$applyTo=function(a){var b=cljs.core.first(a),c=cljs.core.next(a);a=cljs.core.first(c);c=cljs.core.next(c);return this.cljs$core$IFn$_invoke$arity$variadic(b,a,c)};cljs.core.max.cljs$lang$maxFixedArity=2; +cljs.core.min=function(a){switch(arguments.length){case 1:return cljs.core.min.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core.min.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:for(var b=[],c=arguments.length,d=0;;)if(d>b};cljs.core.bit_shift_right_zero_fill=function(a,b){return a>>>b};cljs.core.unsigned_bit_shift_right=function(a,b){return a>>>b};cljs.core.bit_count=function(a){a-=a>>1&1431655765;a=(a&858993459)+(a>>2&858993459);return 16843009*(a+(a>>4)&252645135)>>24}; +cljs.core._EQ__EQ_=function(a){switch(arguments.length){case 1:return cljs.core._EQ__EQ_.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core._EQ__EQ_.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:for(var b=[],c=arguments.length,d=0;;)if(da};cljs.core.nthnext=function(a,b){for(a=cljs.core.seq(a);;)if(a&&0c:b}()))b[c]=a.next(),c+=1;else return cljs.core.chunk_cons(cljs.core.array_chunk.cljs$core$IFn$_invoke$arity$3(b,0,c),cljs.core.chunkIteratorSeq.cljs$core$IFn$_invoke$arity$1?cljs.core.chunkIteratorSeq.cljs$core$IFn$_invoke$arity$1(a):cljs.core.chunkIteratorSeq.call(null,a));else return null},null,null)}; +cljs.core.TransformerIterator=function(a,b,c,d,e,f){this.buffer=a;this._next=b;this.completed=c;this.xf=d;this.sourceIter=e;this.multi=f}; +cljs.core.TransformerIterator.prototype.step=function(){if(this._next!==cljs.core.NONE)return!0;for(;;)if(this._next===cljs.core.NONE)if(this.buffer.isEmpty()){if(this.completed)return!1;if(this.sourceIter.hasNext()){if(this.multi)var a=cljs.core.apply.cljs$core$IFn$_invoke$arity$2(this.xf,cljs.core.cons(null,this.sourceIter.next()));else a=this.sourceIter.next(),a=this.xf.cljs$core$IFn$_invoke$arity$2?this.xf.cljs$core$IFn$_invoke$arity$2(null,a):this.xf.call(null,null,a);cljs.core.reduced_QMARK_(a)&& +(this.xf.cljs$core$IFn$_invoke$arity$1?this.xf.cljs$core$IFn$_invoke$arity$1(null):this.xf.call(null,null),this.completed=!0)}else this.xf.cljs$core$IFn$_invoke$arity$1?this.xf.cljs$core$IFn$_invoke$arity$1(null):this.xf.call(null,null),this.completed=!0}else this._next=this.buffer.remove();else return!0};cljs.core.TransformerIterator.prototype.hasNext=function(){return this.step()}; +cljs.core.TransformerIterator.prototype.next=function(){if(this.hasNext()){var a=this._next;this._next=cljs.core.NONE;return a}throw Error("No such element");};cljs.core.TransformerIterator.prototype.remove=function(){return Error("Unsupported operation")}; +cljs.core.TransformerIterator.getBasis=function(){return new cljs.core.PersistentVector(null,6,5,cljs.core.PersistentVector.EMPTY_NODE,[cljs.core.with_meta(new cljs.core.Symbol(null,"buffer","buffer",-2037140571,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null)),cljs.core.with_meta(new cljs.core.Symbol(null,"_next","_next",101877036,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266), +!0],null)),cljs.core.with_meta(new cljs.core.Symbol(null,"completed","completed",1154475024,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null)),cljs.core.with_meta(new cljs.core.Symbol(null,"xf","xf",2042434515,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null)),new cljs.core.Symbol(null,"sourceIter","sourceIter",1068220306,null),new cljs.core.Symbol(null,"multi","multi", +1450238522,null)],null)};cljs.core.TransformerIterator.cljs$lang$type=!0;cljs.core.TransformerIterator.cljs$lang$ctorStr="cljs.core/TransformerIterator";cljs.core.TransformerIterator.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/TransformerIterator")};cljs.core.__GT_TransformerIterator=function(a,b,c,d,e,f){return new cljs.core.TransformerIterator(a,b,c,d,e,f)};goog.object.set(cljs.core.TransformerIterator.prototype,cljs.core.ITER_SYMBOL,function(){return cljs.core.es6_iterator(this)}); +cljs.core.transformer_iterator=function(a,b,c){var d=new cljs.core.TransformerIterator(cljs.core.EMPTY,cljs.core.NONE,!1,null,b,c);d.xf=function(){var b=function(){var a=null,b=function(a,b){d.buffer=d.buffer.add(b);return a};a=function(a,c){switch(arguments.length){case 0:return null;case 1:return a;case 2:return b.call(this,a,c)}throw Error("Invalid arity: "+arguments.length);};a.cljs$core$IFn$_invoke$arity$0=function(){return null};a.cljs$core$IFn$_invoke$arity$1=function(a){return a};a.cljs$core$IFn$_invoke$arity$2= +b;return a}();return a.cljs$core$IFn$_invoke$arity$1?a.cljs$core$IFn$_invoke$arity$1(b):a.call(null,b)}();return d};cljs.core.TransformerIterator.create=function(a,b){return cljs.core.transformer_iterator(a,b,!1)};cljs.core.TransformerIterator.createMulti=function(a,b){return cljs.core.transformer_iterator(a,new cljs.core.MultiIterator(cljs.core.to_array(b)),!0)}; +cljs.core.sequence=function(a){switch(arguments.length){case 1:return cljs.core.sequence.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core.sequence.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:for(var b=[],c=arguments.length,d=0;;)if(da?0:a-1>>>5<<5};cljs.core.new_path=function(a,b,c){for(;;){if(0===b)return c;var d=cljs.core.pv_fresh_node(a);cljs.core.pv_aset(d,0,c);c=d;b-=5}}; +cljs.core.push_tail=function(a,b,c,d){var e=cljs.core.pv_clone_node(c),f=a.cnt-1>>>b&31;5===b?cljs.core.pv_aset(e,f,d):(c=cljs.core.pv_aget(c,f),null!=c?(b-=5,a=cljs.core.push_tail.cljs$core$IFn$_invoke$arity$4?cljs.core.push_tail.cljs$core$IFn$_invoke$arity$4(a,b,c,d):cljs.core.push_tail.call(null,a,b,c,d)):a=cljs.core.new_path(null,b-5,d),cljs.core.pv_aset(e,f,a));return e}; +cljs.core.vector_index_out_of_bounds=function(a,b){throw Error(["No item ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(a)," in vector of length ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(b)].join(""));};cljs.core.first_array_for_longvec=function(a){var b=a.root;for(a=a.shift;;)if(0=cljs.core.tail_off(a))return a.tail;var c=a.root;for(a=a.shift;;)if(0>>a&31),a-=5;else return c.arr}; +cljs.core.array_for=function(a,b){return 0<=b&&b>>b&31;cljs.core.pv_aset(f,g,function(){var f=b-5,k=cljs.core.pv_aget(c,g);return cljs.core.do_assoc.cljs$core$IFn$_invoke$arity$5?cljs.core.do_assoc.cljs$core$IFn$_invoke$arity$5(a,f,k,d,e):cljs.core.do_assoc.call(null,a,f,k,d,e)}())}return f}; +cljs.core.pop_tail=function(a,b,c){var d=a.cnt-2>>>b&31;if(5=this.cnt?new cljs.core.IndexedSeq(this.tail,0,null):cljs.core.chunked_seq.cljs$core$IFn$_invoke$arity$4(this,cljs.core.first_array_for_longvec(this),0,0)};cljs.core.PersistentVector.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return b===this.meta?this:new cljs.core.PersistentVector(b,this.cnt,this.shift,this.root,this.tail,this.__hash)}; +cljs.core.PersistentVector.prototype.cljs$core$ICollection$_conj$arity$2=function(a,b){if(32>this.cnt-cljs.core.tail_off(this)){a=this.tail.length;for(var c=Array(a+1),d=0;;)if(d>>5>1<c)return new cljs.core.PersistentVector(null,c,5,cljs.core.PersistentVector.EMPTY_NODE,a,null);var d=a.slice(0,32);b=32;for(var e=(new cljs.core.PersistentVector(null,32,5,cljs.core.PersistentVector.EMPTY_NODE,d,null)).cljs$core$IEditableCollection$_as_transient$arity$1(null);;)if(bb)return null;a=this.start+b;return ab||this.end<=this.start+b?cljs.core.vector_index_out_of_bounds(b,this.end-this.start):cljs.core._nth.cljs$core$IFn$_invoke$arity$2(this.v,this.start+b)};cljs.core.Subvec.prototype.cljs$core$IIndexed$_nth$arity$3=function(a,b,c){return 0>b||this.end<=this.start+b?c:cljs.core._nth.cljs$core$IFn$_invoke$arity$3(this.v,this.start+b,c)}; +cljs.core.Subvec.prototype.cljs$core$IVector$_assoc_n$arity$3=function(a,b,c){var d=this,e=d.start+b;if(0>b||d.end+1<=e)throw Error(["Index ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(b)," out of bounds [0,",cljs.core.str.cljs$core$IFn$_invoke$arity$1(this.cljs$core$ICounted$_count$arity$1(null)),"]"].join(""));return cljs.core.build_subvec(d.meta,cljs.core.assoc.cljs$core$IFn$_invoke$arity$3(d.v,e,c),d.start,function(){var a=d.end,b=e+1;return a>b?a:b}(),null)}; +cljs.core.Subvec.prototype.cljs$core$IIterable$_iterator$arity$1=function(a){return null!=this.v&&cljs.core.PROTOCOL_SENTINEL===this.v.cljs$core$APersistentVector$?cljs.core.ranged_iterator(this.v,this.start,this.end):cljs.core.seq_iter(this)};cljs.core.Subvec.prototype.cljs$core$IMeta$_meta$arity$1=function(a){return this.meta};cljs.core.Subvec.prototype.cljs$core$ICloneable$_clone$arity$1=function(a){return new cljs.core.Subvec(this.meta,this.v,this.start,this.end,this.__hash)}; +cljs.core.Subvec.prototype.cljs$core$ICounted$_count$arity$1=function(a){return this.end-this.start};cljs.core.Subvec.prototype.cljs$core$IStack$_peek$arity$1=function(a){return this.start===this.end?null:cljs.core._nth.cljs$core$IFn$_invoke$arity$2(this.v,this.end-1)};cljs.core.Subvec.prototype.cljs$core$IStack$_pop$arity$1=function(a){if(this.start===this.end)throw Error("Can't pop empty vector");return cljs.core.build_subvec(this.meta,this.v,this.start,this.end-1,null)}; +cljs.core.Subvec.prototype.cljs$core$IReversible$_rseq$arity$1=function(a){return this.start!==this.end?new cljs.core.RSeq(this,this.end-this.start-1,null):null};cljs.core.Subvec.prototype.cljs$core$IHash$_hash$arity$1=function(a){a=this.__hash;return null!=a?a:this.__hash=a=cljs.core.hash_ordered_coll(this)};cljs.core.Subvec.prototype.cljs$core$IEquiv$_equiv$arity$2=function(a,b){return cljs.core.equiv_sequential(this,b)}; +cljs.core.Subvec.prototype.cljs$core$IEmptyableCollection$_empty$arity$1=function(a){return cljs.core._with_meta(cljs.core.PersistentVector.EMPTY,this.meta)};cljs.core.Subvec.prototype.cljs$core$IReduce$_reduce$arity$2=function(a,b){return null!=this.v&&cljs.core.PROTOCOL_SENTINEL===this.v.cljs$core$APersistentVector$?cljs.core.pv_reduce.cljs$core$IFn$_invoke$arity$4(this.v,b,this.start,this.end):cljs.core.ci_reduce.cljs$core$IFn$_invoke$arity$2(this,b)}; +cljs.core.Subvec.prototype.cljs$core$IReduce$_reduce$arity$3=function(a,b,c){return null!=this.v&&cljs.core.PROTOCOL_SENTINEL===this.v.cljs$core$APersistentVector$?cljs.core.pv_reduce.cljs$core$IFn$_invoke$arity$5(this.v,b,c,this.start,this.end):cljs.core.ci_reduce.cljs$core$IFn$_invoke$arity$3(this,b,c)}; +cljs.core.Subvec.prototype.cljs$core$IAssociative$_assoc$arity$3=function(a,b,c){if("number"===typeof b)return this.cljs$core$IVector$_assoc_n$arity$3(null,b,c);throw Error("Subvec's key for assoc must be a number.");};cljs.core.Subvec.prototype.cljs$core$ISeqable$_seq$arity$1=function(a){var b=this;return function e(a){return a===b.end?null:cljs.core.cons(cljs.core._nth.cljs$core$IFn$_invoke$arity$2(b.v,a),new cljs.core.LazySeq(null,function(){return e(a+1)},null,null))}(b.start)}; +cljs.core.Subvec.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return b===this.meta?this:cljs.core.build_subvec(b,this.v,this.start,this.end,this.__hash)};cljs.core.Subvec.prototype.cljs$core$ICollection$_conj$arity$2=function(a,b){return cljs.core.build_subvec(this.meta,cljs.core._assoc_n(this.v,this.end,b),this.start,this.end+1,null)}; +cljs.core.Subvec.prototype.call=function(){var a=null;a=function(a,c,d){switch(arguments.length){case 2:return this.cljs$core$IIndexed$_nth$arity$2(null,c);case 3:return this.cljs$core$IIndexed$_nth$arity$3(null,c,d)}throw Error("Invalid arity: "+(arguments.length-1));};a.cljs$core$IFn$_invoke$arity$2=function(a,c){return this.cljs$core$IIndexed$_nth$arity$2(null,c)};a.cljs$core$IFn$_invoke$arity$3=function(a,c,d){return this.cljs$core$IIndexed$_nth$arity$3(null,c,d)};return a}(); +cljs.core.Subvec.prototype.apply=function(a,b){return this.call.apply(this,[this].concat(cljs.core.aclone(b)))};cljs.core.Subvec.prototype.cljs$core$IFn$_invoke$arity$1=function(a){return this.cljs$core$IIndexed$_nth$arity$2(null,a)};cljs.core.Subvec.prototype.cljs$core$IFn$_invoke$arity$2=function(a,b){return this.cljs$core$IIndexed$_nth$arity$3(null,a,b)}; +cljs.core.Subvec.getBasis=function(){return new cljs.core.PersistentVector(null,5,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"meta","meta",-1154898805,null),new cljs.core.Symbol(null,"v","v",1661996586,null),new cljs.core.Symbol(null,"start","start",1285322546,null),new cljs.core.Symbol(null,"end","end",1372345569,null),cljs.core.with_meta(new cljs.core.Symbol(null,"__hash","__hash",-1328796629,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable", +"mutable",875778266),!0],null))],null)};cljs.core.Subvec.cljs$lang$type=!0;cljs.core.Subvec.cljs$lang$ctorStr="cljs.core/Subvec";cljs.core.Subvec.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/Subvec")};cljs.core.__GT_Subvec=function(a,b,c,d,e){return new cljs.core.Subvec(a,b,c,d,e)};goog.object.set(cljs.core.Subvec.prototype,cljs.core.ITER_SYMBOL,function(){return cljs.core.es6_iterator(this)}); +cljs.core.build_subvec=function(a,b,c,d,e){for(;;)if(b instanceof cljs.core.Subvec)c=b.start+c,d=b.start+d,b=b.v;else{if(!cljs.core.vector_QMARK_(b))throw Error("v must satisfy IVector");if(0>c||dcljs.core.count(b))throw Error("Index out of bounds");return new cljs.core.Subvec(a,b,c,d,e)}}; +cljs.core.subvec=function(a){switch(arguments.length){case 2:return cljs.core.subvec.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return cljs.core.subvec.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};cljs.core.subvec.cljs$core$IFn$_invoke$arity$2=function(a,b){return cljs.core.subvec.cljs$core$IFn$_invoke$arity$3(a,b,cljs.core.count(a))}; +cljs.core.subvec.cljs$core$IFn$_invoke$arity$3=function(a,b,c){if(null==b||null==c)throw Error("Assert failed: (and (not (nil? start)) (not (nil? end)))");return cljs.core.build_subvec(null,a,b|0,c|0,null)};cljs.core.subvec.cljs$lang$maxFixedArity=3;cljs.core.tv_ensure_editable=function(a,b){return a===b.edit?b:new cljs.core.VectorNode(a,cljs.core.aclone(b.arr))};cljs.core.tv_editable_root=function(a){return new cljs.core.VectorNode({},cljs.core.aclone(a.arr))}; +cljs.core.tv_editable_tail=function(a){var b=[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null];cljs.core.array_copy(a,0,b,0,a.length);return b}; +cljs.core.tv_push_tail=function(a,b,c,d){var e=cljs.core.tv_ensure_editable(a.root.edit,c),f=a.cnt-1>>>b&31;cljs.core.pv_aset(e,f,5===b?d:function(){var c=cljs.core.pv_aget(e,f);if(null!=c){var h=b-5;return cljs.core.tv_push_tail.cljs$core$IFn$_invoke$arity$4?cljs.core.tv_push_tail.cljs$core$IFn$_invoke$arity$4(a,h,c,d):cljs.core.tv_push_tail.call(null,a,h,c,d)}return cljs.core.new_path(a.root.edit,b-5,d)}());return e}; +cljs.core.tv_pop_tail=function(a,b,c){c=cljs.core.tv_ensure_editable(a.root.edit,c);var d=a.cnt-2>>>b&31;if(5=cljs.core.tail_off(a))return a.tail;var c=a.root,d=c;for(a=a.shift;;)if(0>>a&31)),a-=5;else return d.arr};cljs.core.TransientVector=function(a,b,c,d){this.cnt=a;this.shift=b;this.root=c;this.tail=d;this.cljs$lang$protocol_mask$partition1$=88;this.cljs$lang$protocol_mask$partition0$=275}; +cljs.core.TransientVector.prototype.cljs$core$ITransientCollection$_conj_BANG_$arity$2=function(a,b){if(this.root.edit){if(32>this.cnt-cljs.core.tail_off(this))this.tail[this.cnt&31]=b;else{a=new cljs.core.VectorNode(this.root.edit,this.tail);var c=[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null];c[0]=b;this.tail=c;this.cnt>>>5>1<>>a&31;cljs.core.pv_aset(g,f,h(a-5,cljs.core.pv_aget(g,f)))}return g}(d.shift,d.root)}(),d.root=a),this;if(b===d.cnt)return this.cljs$core$ITransientCollection$_conj_BANG_$arity$2(null, +c);throw Error(["Index ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(b)," out of bounds for TransientVector of length",cljs.core.str.cljs$core$IFn$_invoke$arity$1(d.cnt)].join(""));}throw Error("assoc! after persistent!");}; +cljs.core.TransientVector.prototype.cljs$core$ITransientVector$_pop_BANG_$arity$1=function(a){if(this.root.edit){if(0===this.cnt)throw Error("Can't pop empty vector");if(1===this.cnt)this.cnt=0;else if(0<(this.cnt-1&31))--this.cnt;else{a=cljs.core.unchecked_editable_array_for(this,this.cnt-2);var b=cljs.core.tv_pop_tail(this,this.shift,this.root);b=null!=b?b:new cljs.core.VectorNode(this.root.edit,[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null, +null,null,null,null,null,null,null,null,null,null,null,null]);5b?1:0}; +cljs.core.obj_map__GT_hash_map=function(a,b,c){var d=a.keys,e=d.length,f=a.strobj;a=cljs.core.meta(a);for(var g=0,h=cljs.core.transient$(cljs.core.PersistentHashMap.EMPTY);;)if(gcljs.core.ObjMap.HASHMAP_THRESHOLD||this.keys.length>=cljs.core.ObjMap.HASHMAP_THRESHOLD)return cljs.core.obj_map__GT_hash_map(this,b,c);if(null!=cljs.core.scan_array(1,b,this.keys))return a=cljs.core.obj_clone(this.strobj,this.keys),goog.object.set(a,b,c),new cljs.core.ObjMap(this.meta,this.keys,a,this.update_count+1,null);a=cljs.core.obj_clone(this.strobj,this.keys);var d=cljs.core.aclone(this.keys); +goog.object.set(a,b,c);d.push(b);return new cljs.core.ObjMap(this.meta,d,a,this.update_count+1,null)}return cljs.core.obj_map__GT_hash_map(this,b,c)};cljs.core.ObjMap.prototype.cljs$core$IAssociative$_contains_key_QMARK_$arity$2=function(a,b){return goog.isString(b)&&null!=cljs.core.scan_array(1,b,this.keys)?!0:!1}; +cljs.core.ObjMap.prototype.cljs$core$ISeqable$_seq$arity$1=function(a){var b=this;return 0=a)return new cljs.core.PersistentArrayMap(this.meta,this.cnt-1,c,null);cljs.core._EQ_.cljs$core$IFn$_invoke$arity$2(b,this.arr[d])?d+=2:(c[e]=this.arr[d],c[e+1]=this.arr[d+1],e+=2,d+=2)}}else return this}; +cljs.core.PersistentArrayMap.prototype.cljs$core$IAssociative$_assoc$arity$3=function(a,b,c){a=cljs.core.array_map_index_of(this,b);if(-1===a)return this.cnt>>b&31}; +cljs.core.clone_and_set=function(a){switch(arguments.length){case 3:return cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);case 5:return cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$5(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$3=function(a,b,c){a=cljs.core.aclone(a);a[b]=c;return a};cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$5=function(a,b,c,d,e){a=cljs.core.aclone(a);a[b]=c;a[d]=e;return a};cljs.core.clone_and_set.cljs$lang$maxFixedArity=5;cljs.core.remove_pair=function(a,b){var c=Array(a.length-2);cljs.core.array_copy(a,0,c,0,2*b);cljs.core.array_copy(a,2*(b+1),c,2*b,c.length-2*b);return c}; +cljs.core.bitmap_indexed_node_index=function(a,b){return cljs.core.bit_count(a&b-1)};cljs.core.bitpos=function(a,b){return 1<<(a>>>b&31)}; +cljs.core.edit_and_set=function(a){switch(arguments.length){case 4:return cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$4(arguments[0],arguments[1],arguments[2],arguments[3]);case 6:return cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$6(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4],arguments[5]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$4=function(a,b,c,d){a=a.ensure_editable(b);a.arr[c]=d;return a};cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$6=function(a,b,c,d,e,f){a=a.ensure_editable(b);a.arr[c]=d;a.arr[e]=f;return a};cljs.core.edit_and_set.cljs$lang$maxFixedArity=6; +cljs.core.inode_kv_reduce=function(a,b,c){for(var d=a.length,e=0,f=c;;)if(eb?4:2*(b+1));cljs.core.array_copy(this.arr,0,c,0,2*b);return new cljs.core.BitmapIndexedNode(a,this.bitmap,c)}; +cljs.core.BitmapIndexedNode.prototype.inode_without_BANG_=function(a,b,c,d,e){var f=1<<(c>>>b&31);if(0===(this.bitmap&f))return this;var g=cljs.core.bitmap_indexed_node_index(this.bitmap,f),h=this.arr[2*g],k=this.arr[2*g+1];return null==h?(b=k.inode_without_BANG_(a,b+5,c,d,e),b===k?this:null!=b?cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$4(this,a,2*g+1,b):this.bitmap===f?null:this.edit_and_remove_pair(a,f,g)):cljs.core.key_test(d,h)?(e.val=!0,this.edit_and_remove_pair(a,f,g)):this}; +cljs.core.BitmapIndexedNode.prototype.edit_and_remove_pair=function(a,b,c){if(this.bitmap===b)return null;a=this.ensure_editable(a);var d=a.arr,e=d.length;a.bitmap^=b;cljs.core.array_copy(d,2*(c+1),d,2*c,e-2*(c+1));d[e-2]=null;d[e-1]=null;return a};cljs.core.BitmapIndexedNode.prototype.inode_seq=function(){return cljs.core.create_inode_seq.cljs$core$IFn$_invoke$arity$1(this.arr)};cljs.core.BitmapIndexedNode.prototype.kv_reduce=function(a,b){return cljs.core.inode_kv_reduce(this.arr,a,b)}; +cljs.core.BitmapIndexedNode.prototype.inode_lookup=function(a,b,c,d){var e=1<<(b>>>a&31);if(0===(this.bitmap&e))return d;var f=cljs.core.bitmap_indexed_node_index(this.bitmap,e);e=this.arr[2*f];f=this.arr[2*f+1];return null==e?f.inode_lookup(a+5,b,c,d):cljs.core.key_test(c,e)?f:d}; +cljs.core.BitmapIndexedNode.prototype.inode_assoc_BANG_=function(a,b,c,d,e,f){var g=1<<(c>>>b&31),h=cljs.core.bitmap_indexed_node_index(this.bitmap,g);if(0===(this.bitmap&g)){var k=cljs.core.bit_count(this.bitmap);if(2*k>>b&31]=cljs.core.BitmapIndexedNode.EMPTY.inode_assoc_BANG_(a,b+5,c,d,e,f);for(e=d=0;;)if(32>d)0===(this.bitmap>>>d&1)?d+=1:(h[d]=null!=this.arr[e]?cljs.core.BitmapIndexedNode.EMPTY.inode_assoc_BANG_(a,b+5,cljs.core.hash(this.arr[e]),this.arr[e],this.arr[e+1],f):this.arr[e+1],e+=2,d+=1);else break;return new cljs.core.ArrayNode(a,k+1,h)}b=Array(2*(k+4));cljs.core.array_copy(this.arr,0,b,0,2*h);b[2*h]=d;b[2*h+1]=e;cljs.core.array_copy(this.arr,2*h, +b,2*(h+1),2*(k-h));f.val=!0;a=this.ensure_editable(a);a.arr=b;a.bitmap|=g;return a}k=this.arr[2*h];g=this.arr[2*h+1];if(null==k)return k=g.inode_assoc_BANG_(a,b+5,c,d,e,f),k===g?this:cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$4(this,a,2*h+1,k);if(cljs.core.key_test(d,k))return e===g?this:cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$4(this,a,2*h+1,e);f.val=!0;return cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$6(this,a,2*h,null,2*h+1,cljs.core.create_node.cljs$core$IFn$_invoke$arity$7(a, +b+5,k,g,c,d,e))}; +cljs.core.BitmapIndexedNode.prototype.inode_assoc=function(a,b,c,d,e){var f=1<<(b>>>a&31),g=cljs.core.bitmap_indexed_node_index(this.bitmap,f);if(0===(this.bitmap&f)){var h=cljs.core.bit_count(this.bitmap);if(16<=h){g=[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null];g[b>>>a&31]=cljs.core.BitmapIndexedNode.EMPTY.inode_assoc(a+5,b,c,d,e);for(d=c=0;;)if(32>c)0===(this.bitmap>>>c&1)?c+=1:(g[c]= +null!=this.arr[d]?cljs.core.BitmapIndexedNode.EMPTY.inode_assoc(a+5,cljs.core.hash(this.arr[d]),this.arr[d],this.arr[d+1],e):this.arr[d+1],d+=2,c+=1);else break;return new cljs.core.ArrayNode(null,h+1,g)}a=Array(2*(h+1));cljs.core.array_copy(this.arr,0,a,0,2*g);a[2*g]=c;a[2*g+1]=d;cljs.core.array_copy(this.arr,2*g,a,2*(g+1),2*(h-g));e.val=!0;return new cljs.core.BitmapIndexedNode(null,this.bitmap|f,a)}h=this.arr[2*g];f=this.arr[2*g+1];if(null==h)return h=f.inode_assoc(a+5,b,c,d,e),h===f?this:new cljs.core.BitmapIndexedNode(null, +this.bitmap,cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$3(this.arr,2*g+1,h));if(cljs.core.key_test(c,h))return d===f?this:new cljs.core.BitmapIndexedNode(null,this.bitmap,cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$3(this.arr,2*g+1,d));e.val=!0;return new cljs.core.BitmapIndexedNode(null,this.bitmap,cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$5(this.arr,2*g,null,2*g+1,cljs.core.create_node.cljs$core$IFn$_invoke$arity$6(a+5,h,f,b,c,d)))}; +cljs.core.BitmapIndexedNode.prototype.inode_find=function(a,b,c,d){var e=1<<(b>>>a&31);if(0===(this.bitmap&e))return d;var f=cljs.core.bitmap_indexed_node_index(this.bitmap,e);e=this.arr[2*f];f=this.arr[2*f+1];return null==e?f.inode_find(a+5,b,c,d):cljs.core.key_test(c,e)?new cljs.core.MapEntry(e,f,null):d}; +cljs.core.BitmapIndexedNode.prototype.inode_without=function(a,b,c){var d=1<<(b>>>a&31);if(0===(this.bitmap&d))return this;var e=cljs.core.bitmap_indexed_node_index(this.bitmap,d),f=this.arr[2*e],g=this.arr[2*e+1];return null==f?(a=g.inode_without(a+5,b,c),a===g?this:null!=a?new cljs.core.BitmapIndexedNode(null,this.bitmap,cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$3(this.arr,2*e+1,a)):this.bitmap===d?null:new cljs.core.BitmapIndexedNode(null,this.bitmap^d,cljs.core.remove_pair(this.arr, +e))):cljs.core.key_test(c,f)?new cljs.core.BitmapIndexedNode(null,this.bitmap^d,cljs.core.remove_pair(this.arr,e)):this};cljs.core.BitmapIndexedNode.prototype.cljs$core$IIterable$_iterator$arity$1=function(a){return new cljs.core.NodeIterator(this.arr,0,null,null)}; +cljs.core.BitmapIndexedNode.getBasis=function(){return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"edit","edit",-1302639,null),cljs.core.with_meta(new cljs.core.Symbol(null,"bitmap","bitmap",501334601,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null)),cljs.core.with_meta(new cljs.core.Symbol(null,"arr","arr",2115492975,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null, +"mutable","mutable",875778266),!0],null))],null)};cljs.core.BitmapIndexedNode.cljs$lang$type=!0;cljs.core.BitmapIndexedNode.cljs$lang$ctorStr="cljs.core/BitmapIndexedNode";cljs.core.BitmapIndexedNode.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/BitmapIndexedNode")};cljs.core.__GT_BitmapIndexedNode=function(a,b,c){return new cljs.core.BitmapIndexedNode(a,b,c)};cljs.core.BitmapIndexedNode.EMPTY=new cljs.core.BitmapIndexedNode(null,0,[]); +cljs.core.pack_array_node=function(a,b,c){var d=a.arr,e=d.length;a=Array(2*(a.cnt-1));for(var f=0,g=1,h=0;;)if(f>>b&31,g=this.arr[f];if(null==g)return this;b=g.inode_without_BANG_(a,b+5,c,d,e);if(b===g)return this;if(null==b){if(8>=this.cnt)return cljs.core.pack_array_node(this,a,f);a=cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$4(this,a,f,b);--a.cnt;return a}return cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$4(this,a,f,b)};cljs.core.ArrayNode.prototype.inode_seq=function(){return cljs.core.create_array_node_seq.cljs$core$IFn$_invoke$arity$1(this.arr)}; +cljs.core.ArrayNode.prototype.kv_reduce=function(a,b){for(var c=this.arr.length,d=0;;)if(d>>a&31];return null!=e?e.inode_lookup(a+5,b,c,d):d}; +cljs.core.ArrayNode.prototype.inode_assoc_BANG_=function(a,b,c,d,e,f){var g=c>>>b&31,h=this.arr[g];if(null==h)return a=cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$4(this,a,g,cljs.core.BitmapIndexedNode.EMPTY.inode_assoc_BANG_(a,b+5,c,d,e,f)),a.cnt+=1,a;b=h.inode_assoc_BANG_(a,b+5,c,d,e,f);return b===h?this:cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$4(this,a,g,b)}; +cljs.core.ArrayNode.prototype.inode_assoc=function(a,b,c,d,e){var f=b>>>a&31,g=this.arr[f];if(null==g)return new cljs.core.ArrayNode(null,this.cnt+1,cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$3(this.arr,f,cljs.core.BitmapIndexedNode.EMPTY.inode_assoc(a+5,b,c,d,e)));a=g.inode_assoc(a+5,b,c,d,e);return a===g?this:new cljs.core.ArrayNode(null,this.cnt,cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$3(this.arr,f,a))}; +cljs.core.ArrayNode.prototype.inode_find=function(a,b,c,d){var e=this.arr[b>>>a&31];return null!=e?e.inode_find(a+5,b,c,d):d}; +cljs.core.ArrayNode.prototype.inode_without=function(a,b,c){var d=b>>>a&31,e=this.arr[d];return null!=e?(a=e.inode_without(a+5,b,c),a===e?this:null==a?8>=this.cnt?cljs.core.pack_array_node(this,null,d):new cljs.core.ArrayNode(null,this.cnt-1,cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$3(this.arr,d,a)):new cljs.core.ArrayNode(null,this.cnt,cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$3(this.arr,d,a))):this}; +cljs.core.ArrayNode.prototype.cljs$core$IIterable$_iterator$arity$1=function(a){return new cljs.core.ArrayNodeIterator(this.arr,0,null)}; +cljs.core.ArrayNode.getBasis=function(){return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"edit","edit",-1302639,null),cljs.core.with_meta(new cljs.core.Symbol(null,"cnt","cnt",1924510325,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null)),cljs.core.with_meta(new cljs.core.Symbol(null,"arr","arr",2115492975,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null, +"mutable","mutable",875778266),!0],null))],null)};cljs.core.ArrayNode.cljs$lang$type=!0;cljs.core.ArrayNode.cljs$lang$ctorStr="cljs.core/ArrayNode";cljs.core.ArrayNode.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/ArrayNode")};cljs.core.__GT_ArrayNode=function(a,b,c){return new cljs.core.ArrayNode(a,b,c)};cljs.core.hash_collision_node_find_index=function(a,b,c){b*=2;for(var d=0;;)if(da?d:cljs.core.key_test(c,this.arr[a])?this.arr[a+1]:d}; +cljs.core.HashCollisionNode.prototype.inode_assoc_BANG_=function(a,b,c,d,e,f){if(c===this.collision_hash){b=cljs.core.hash_collision_node_find_index(this.arr,this.cnt,d);if(-1===b){if(this.arr.length>2*this.cnt)return a=cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$6(this,a,2*this.cnt,d,2*this.cnt+1,e),f.val=!0,a.cnt+=1,a;b=this.arr.length;c=Array(b+2);cljs.core.array_copy(this.arr,0,c,0,b);c[b]=d;c[b+1]=e;f.val=!0;return this.ensure_editable_array(a,this.cnt+1,c)}return this.arr[b+1]===e?this: +cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$4(this,a,b+1,e)}return(new cljs.core.BitmapIndexedNode(a,1<<(this.collision_hash>>>b&31),[null,this,null,null])).inode_assoc_BANG_(a,b,c,d,e,f)}; +cljs.core.HashCollisionNode.prototype.inode_assoc=function(a,b,c,d,e){return b===this.collision_hash?(a=cljs.core.hash_collision_node_find_index(this.arr,this.cnt,c),-1===a?(a=2*this.cnt,b=Array(a+2),cljs.core.array_copy(this.arr,0,b,0,a),b[a]=c,b[a+1]=d,e.val=!0,new cljs.core.HashCollisionNode(null,this.collision_hash,this.cnt+1,b)):cljs.core._EQ_.cljs$core$IFn$_invoke$arity$2(this.arr[a+1],d)?this:new cljs.core.HashCollisionNode(null,this.collision_hash,this.cnt,cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$3(this.arr, +a+1,d))):(new cljs.core.BitmapIndexedNode(null,1<<(this.collision_hash>>>a&31),[null,this])).inode_assoc(a,b,c,d,e)};cljs.core.HashCollisionNode.prototype.ensure_editable_array=function(a,b,c){return a===this.edit?(this.arr=c,this.cnt=b,this):new cljs.core.HashCollisionNode(this.edit,this.collision_hash,b,c)}; +cljs.core.HashCollisionNode.prototype.inode_find=function(a,b,c,d){a=cljs.core.hash_collision_node_find_index(this.arr,this.cnt,c);return 0>a?d:cljs.core.key_test(c,this.arr[a])?new cljs.core.MapEntry(this.arr[a],this.arr[a+1],null):d}; +cljs.core.HashCollisionNode.prototype.inode_without=function(a,b,c){a=cljs.core.hash_collision_node_find_index(this.arr,this.cnt,c);return-1===a?this:1===this.cnt?null:new cljs.core.HashCollisionNode(null,this.collision_hash,this.cnt-1,cljs.core.remove_pair(this.arr,cljs.core.quot(a,2)))};cljs.core.HashCollisionNode.prototype.cljs$core$IIterable$_iterator$arity$1=function(a){return new cljs.core.NodeIterator(this.arr,0,null,null)}; +cljs.core.HashCollisionNode.getBasis=function(){return new cljs.core.PersistentVector(null,4,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"edit","edit",-1302639,null),cljs.core.with_meta(new cljs.core.Symbol(null,"collision-hash","collision-hash",-35831342,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null)),cljs.core.with_meta(new cljs.core.Symbol(null,"cnt","cnt",1924510325,null),new cljs.core.PersistentArrayMap(null, +1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null)),cljs.core.with_meta(new cljs.core.Symbol(null,"arr","arr",2115492975,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null))],null)};cljs.core.HashCollisionNode.cljs$lang$type=!0;cljs.core.HashCollisionNode.cljs$lang$ctorStr="cljs.core/HashCollisionNode";cljs.core.HashCollisionNode.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/HashCollisionNode")}; +cljs.core.__GT_HashCollisionNode=function(a,b,c,d){return new cljs.core.HashCollisionNode(a,b,c,d)}; +cljs.core.create_node=function(a){switch(arguments.length){case 6:return cljs.core.create_node.cljs$core$IFn$_invoke$arity$6(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4],arguments[5]);case 7:return cljs.core.create_node.cljs$core$IFn$_invoke$arity$7(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4],arguments[5],arguments[6]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.core.create_node.cljs$core$IFn$_invoke$arity$6=function(a,b,c,d,e,f){var g=cljs.core.hash(b);if(g===d)return new cljs.core.HashCollisionNode(null,g,2,[b,c,e,f]);var h=new cljs.core.Box(!1);return cljs.core.BitmapIndexedNode.EMPTY.inode_assoc(a,g,b,c,h).inode_assoc(a,d,e,f,h)}; +cljs.core.create_node.cljs$core$IFn$_invoke$arity$7=function(a,b,c,d,e,f,g){var h=cljs.core.hash(c);if(h===e)return new cljs.core.HashCollisionNode(null,h,2,[c,d,f,g]);var k=new cljs.core.Box(!1);return cljs.core.BitmapIndexedNode.EMPTY.inode_assoc_BANG_(a,b,h,c,d,k).inode_assoc_BANG_(a,b,e,f,g,k)};cljs.core.create_node.cljs$lang$maxFixedArity=7; +cljs.core.NodeSeq=function(a,b,c,d,e){this.meta=a;this.nodes=b;this.i=c;this.s=d;this.__hash=e;this.cljs$lang$protocol_mask$partition0$=32374988;this.cljs$lang$protocol_mask$partition1$=0};cljs.core.NodeSeq.prototype.toString=function(){return cljs.core.pr_str_STAR_(this)};cljs.core.NodeSeq.prototype.equiv=function(a){return this.cljs$core$IEquiv$_equiv$arity$2(null,a)}; +cljs.core.NodeSeq.prototype.indexOf=function(){var a=null,b=function(a){return cljs.core._indexOf.cljs$core$IFn$_invoke$arity$3(this,a,0)},c=function(a,b){return cljs.core._indexOf.cljs$core$IFn$_invoke$arity$3(this,a,b)};a=function(a,e){switch(arguments.length){case 1:return b.call(this,a);case 2:return c.call(this,a,e)}throw Error("Invalid arity: "+arguments.length);};a.cljs$core$IFn$_invoke$arity$1=b;a.cljs$core$IFn$_invoke$arity$2=c;return a}(); +cljs.core.NodeSeq.prototype.lastIndexOf=function(){var a=null,b=function(a){return cljs.core._lastIndexOf.cljs$core$IFn$_invoke$arity$3(this,a,cljs.core.count(this))},c=function(a,b){return cljs.core._lastIndexOf.cljs$core$IFn$_invoke$arity$3(this,a,b)};a=function(a,e){switch(arguments.length){case 1:return b.call(this,a);case 2:return c.call(this,a,e)}throw Error("Invalid arity: "+arguments.length);};a.cljs$core$IFn$_invoke$arity$1=b;a.cljs$core$IFn$_invoke$arity$2=c;return a}(); +cljs.core.NodeSeq.prototype.cljs$core$IMeta$_meta$arity$1=function(a){return this.meta};cljs.core.NodeSeq.prototype.cljs$core$INext$_next$arity$1=function(a){return null==this.s?cljs.core.create_inode_seq.cljs$core$IFn$_invoke$arity$3(this.nodes,this.i+2,null):cljs.core.create_inode_seq.cljs$core$IFn$_invoke$arity$3(this.nodes,this.i,cljs.core.next(this.s))};cljs.core.NodeSeq.prototype.cljs$core$IHash$_hash$arity$1=function(a){a=this.__hash;return null!=a?a:this.__hash=a=cljs.core.hash_ordered_coll(this)}; +cljs.core.NodeSeq.prototype.cljs$core$IEquiv$_equiv$arity$2=function(a,b){return cljs.core.equiv_sequential(this,b)};cljs.core.NodeSeq.prototype.cljs$core$IEmptyableCollection$_empty$arity$1=function(a){return cljs.core.List.EMPTY};cljs.core.NodeSeq.prototype.cljs$core$IReduce$_reduce$arity$2=function(a,b){return cljs.core.seq_reduce.cljs$core$IFn$_invoke$arity$2(b,this)}; +cljs.core.NodeSeq.prototype.cljs$core$IReduce$_reduce$arity$3=function(a,b,c){return cljs.core.seq_reduce.cljs$core$IFn$_invoke$arity$3(b,c,this)};cljs.core.NodeSeq.prototype.cljs$core$ISeq$_first$arity$1=function(a){return null==this.s?new cljs.core.MapEntry(this.nodes[this.i],this.nodes[this.i+1],null):cljs.core.first(this.s)}; +cljs.core.NodeSeq.prototype.cljs$core$ISeq$_rest$arity$1=function(a){a=null==this.s?cljs.core.create_inode_seq.cljs$core$IFn$_invoke$arity$3(this.nodes,this.i+2,null):cljs.core.create_inode_seq.cljs$core$IFn$_invoke$arity$3(this.nodes,this.i,cljs.core.next(this.s));return null!=a?a:cljs.core.List.EMPTY};cljs.core.NodeSeq.prototype.cljs$core$ISeqable$_seq$arity$1=function(a){return this}; +cljs.core.NodeSeq.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return b===this.meta?this:new cljs.core.NodeSeq(b,this.nodes,this.i,this.s,this.__hash)};cljs.core.NodeSeq.prototype.cljs$core$ICollection$_conj$arity$2=function(a,b){return cljs.core.cons(b,this)}; +cljs.core.NodeSeq.getBasis=function(){return new cljs.core.PersistentVector(null,5,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"meta","meta",-1154898805,null),new cljs.core.Symbol(null,"nodes","nodes",-459054278,null),new cljs.core.Symbol(null,"i","i",253690212,null),new cljs.core.Symbol(null,"s","s",-948495851,null),cljs.core.with_meta(new cljs.core.Symbol(null,"__hash","__hash",-1328796629,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable", +"mutable",875778266),!0],null))],null)};cljs.core.NodeSeq.cljs$lang$type=!0;cljs.core.NodeSeq.cljs$lang$ctorStr="cljs.core/NodeSeq";cljs.core.NodeSeq.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/NodeSeq")};cljs.core.__GT_NodeSeq=function(a,b,c,d,e){return new cljs.core.NodeSeq(a,b,c,d,e)};goog.object.set(cljs.core.NodeSeq.prototype,cljs.core.ITER_SYMBOL,function(){return cljs.core.es6_iterator(this)}); +cljs.core.create_inode_seq=function(a){switch(arguments.length){case 1:return cljs.core.create_inode_seq.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 3:return cljs.core.create_inode_seq.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.core.create_inode_seq.cljs$core$IFn$_invoke$arity$1=function(a){return cljs.core.create_inode_seq.cljs$core$IFn$_invoke$arity$3(a,0,null)}; +cljs.core.create_inode_seq.cljs$core$IFn$_invoke$arity$3=function(a,b,c){if(null==c)for(c=a.length;;)if(bthis.cnt?cljs.core.count(cljs.core.next(this))+1:this.cnt};cljs.core.PersistentTreeMapSeq.prototype.cljs$core$IHash$_hash$arity$1=function(a){a=this.__hash;return null!=a?a:this.__hash=a=cljs.core.hash_ordered_coll(this)};cljs.core.PersistentTreeMapSeq.prototype.cljs$core$IEquiv$_equiv$arity$2=function(a,b){return cljs.core.equiv_sequential(this,b)}; +cljs.core.PersistentTreeMapSeq.prototype.cljs$core$IEmptyableCollection$_empty$arity$1=function(a){return cljs.core.List.EMPTY};cljs.core.PersistentTreeMapSeq.prototype.cljs$core$IReduce$_reduce$arity$2=function(a,b){return cljs.core.seq_reduce.cljs$core$IFn$_invoke$arity$2(b,this)};cljs.core.PersistentTreeMapSeq.prototype.cljs$core$IReduce$_reduce$arity$3=function(a,b,c){return cljs.core.seq_reduce.cljs$core$IFn$_invoke$arity$3(b,c,this)}; +cljs.core.PersistentTreeMapSeq.prototype.cljs$core$ISeq$_first$arity$1=function(a){return cljs.core.peek(this.stack)};cljs.core.PersistentTreeMapSeq.prototype.cljs$core$ISeq$_rest$arity$1=function(a){a=cljs.core.first(this.stack);a=cljs.core.tree_map_seq_push(this.ascending_QMARK_?a.right:a.left,cljs.core.next(this.stack),this.ascending_QMARK_);return null!=a?new cljs.core.PersistentTreeMapSeq(null,a,this.ascending_QMARK_,this.cnt-1,null):cljs.core.List.EMPTY}; +cljs.core.PersistentTreeMapSeq.prototype.cljs$core$ISeqable$_seq$arity$1=function(a){return this};cljs.core.PersistentTreeMapSeq.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return b===this.meta?this:new cljs.core.PersistentTreeMapSeq(b,this.stack,this.ascending_QMARK_,this.cnt,this.__hash)};cljs.core.PersistentTreeMapSeq.prototype.cljs$core$ICollection$_conj$arity$2=function(a,b){return cljs.core.cons(b,this)}; +cljs.core.PersistentTreeMapSeq.getBasis=function(){return new cljs.core.PersistentVector(null,5,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"meta","meta",-1154898805,null),new cljs.core.Symbol(null,"stack","stack",847125597,null),cljs.core.with_meta(new cljs.core.Symbol(null,"ascending?","ascending?",-1938452653,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"tag","tag",-1290361223),new cljs.core.Symbol(null,"boolean","boolean",-278886877,null)],null)), +new cljs.core.Symbol(null,"cnt","cnt",1924510325,null),cljs.core.with_meta(new cljs.core.Symbol(null,"__hash","__hash",-1328796629,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null))],null)};cljs.core.PersistentTreeMapSeq.cljs$lang$type=!0;cljs.core.PersistentTreeMapSeq.cljs$lang$ctorStr="cljs.core/PersistentTreeMapSeq";cljs.core.PersistentTreeMapSeq.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/PersistentTreeMapSeq")}; +cljs.core.__GT_PersistentTreeMapSeq=function(a,b,c,d,e){return new cljs.core.PersistentTreeMapSeq(a,b,c,d,e)};goog.object.set(cljs.core.PersistentTreeMapSeq.prototype,cljs.core.ITER_SYMBOL,function(){return cljs.core.es6_iterator(this)});cljs.core.create_tree_map_seq=function(a,b,c){return new cljs.core.PersistentTreeMapSeq(null,cljs.core.tree_map_seq_push(a,null,b),b,c,null)}; +cljs.core.balance_left=function(a,b,c,d){return c instanceof cljs.core.RedNode?c.left instanceof cljs.core.RedNode?new cljs.core.RedNode(c.key,c.val,c.left.blacken(),new cljs.core.BlackNode(a,b,c.right,d,null),null):c.right instanceof cljs.core.RedNode?new cljs.core.RedNode(c.right.key,c.right.val,new cljs.core.BlackNode(c.key,c.val,c.left,c.right.left,null),new cljs.core.BlackNode(a,b,c.right.right,d,null),null):new cljs.core.BlackNode(a,b,c,d,null):new cljs.core.BlackNode(a,b,c,d,null)}; +cljs.core.balance_right=function(a,b,c,d){return d instanceof cljs.core.RedNode?d.right instanceof cljs.core.RedNode?new cljs.core.RedNode(d.key,d.val,new cljs.core.BlackNode(a,b,c,d.left,null),d.right.blacken(),null):d.left instanceof cljs.core.RedNode?new cljs.core.RedNode(d.left.key,d.left.val,new cljs.core.BlackNode(a,b,c,d.left.left,null),new cljs.core.BlackNode(d.key,d.val,d.left.right,d.right,null),null):new cljs.core.BlackNode(a,b,c,d,null):new cljs.core.BlackNode(a,b,c,d,null)}; +cljs.core.balance_left_del=function(a,b,c,d){if(c instanceof cljs.core.RedNode)return new cljs.core.RedNode(a,b,c.blacken(),d,null);if(d instanceof cljs.core.BlackNode)return cljs.core.balance_right(a,b,c,d.redden());if(d instanceof cljs.core.RedNode&&d.left instanceof cljs.core.BlackNode)return new cljs.core.RedNode(d.left.key,d.left.val,new cljs.core.BlackNode(a,b,c,d.left.left,null),cljs.core.balance_right(d.key,d.val,d.left.right,d.right.redden()),null);throw Error("red-black tree invariant violation"); +}; +cljs.core.balance_right_del=function(a,b,c,d){if(d instanceof cljs.core.RedNode)return new cljs.core.RedNode(a,b,c,d.blacken(),null);if(c instanceof cljs.core.BlackNode)return cljs.core.balance_left(a,b,c.redden(),d);if(c instanceof cljs.core.RedNode&&c.right instanceof cljs.core.BlackNode)return new cljs.core.RedNode(c.right.key,c.right.val,cljs.core.balance_left(c.key,c.val,c.left.redden(),c.right.left),new cljs.core.BlackNode(a,b,c.right.right,d,null),null);throw Error("red-black tree invariant violation");}; +cljs.core.tree_map_kv_reduce=function(a,b,c){var d=null!=a.left?function(){var d=a.left;return cljs.core.tree_map_kv_reduce.cljs$core$IFn$_invoke$arity$3?cljs.core.tree_map_kv_reduce.cljs$core$IFn$_invoke$arity$3(d,b,c):cljs.core.tree_map_kv_reduce.call(null,d,b,c)}():c;if(cljs.core.reduced_QMARK_(d))return d;var e=function(){var c=a.key,e=a.val;return b.cljs$core$IFn$_invoke$arity$3?b.cljs$core$IFn$_invoke$arity$3(d,c,e):b.call(null,d,c,e)}();if(cljs.core.reduced_QMARK_(e))return e;if(null!=a.right){var f= +a.right;return cljs.core.tree_map_kv_reduce.cljs$core$IFn$_invoke$arity$3?cljs.core.tree_map_kv_reduce.cljs$core$IFn$_invoke$arity$3(f,b,e):cljs.core.tree_map_kv_reduce.call(null,f,b,e)}return e};cljs.core.BlackNode=function(a,b,c,d,e){this.key=a;this.val=b;this.left=c;this.right=d;this.__hash=e;this.cljs$lang$protocol_mask$partition0$=166619935;this.cljs$lang$protocol_mask$partition1$=0};cljs.core.BlackNode.prototype.cljs$core$IFind$=cljs.core.PROTOCOL_SENTINEL; +cljs.core.BlackNode.prototype.cljs$core$IFind$_find$arity$2=function(a,b){switch(b){case 0:return new cljs.core.MapEntry(0,this.key,null);case 1:return new cljs.core.MapEntry(1,this.val,null);default:return null}}; +cljs.core.BlackNode.prototype.lastIndexOf=function(){var a=null,b=function(a){return cljs.core._lastIndexOf.cljs$core$IFn$_invoke$arity$3(this,a,cljs.core.count(this))},c=function(a,b){return cljs.core._lastIndexOf.cljs$core$IFn$_invoke$arity$3(this,a,b)};a=function(a,e){switch(arguments.length){case 1:return b.call(this,a);case 2:return c.call(this,a,e)}throw Error("Invalid arity: "+arguments.length);};a.cljs$core$IFn$_invoke$arity$1=b;a.cljs$core$IFn$_invoke$arity$2=c;return a}(); +cljs.core.BlackNode.prototype.indexOf=function(){var a=null,b=function(a){return cljs.core._indexOf.cljs$core$IFn$_invoke$arity$3(this,a,0)},c=function(a,b){return cljs.core._indexOf.cljs$core$IFn$_invoke$arity$3(this,a,b)};a=function(a,e){switch(arguments.length){case 1:return b.call(this,a);case 2:return c.call(this,a,e)}throw Error("Invalid arity: "+arguments.length);};a.cljs$core$IFn$_invoke$arity$1=b;a.cljs$core$IFn$_invoke$arity$2=c;return a}();cljs.core.BlackNode.prototype.add_right=function(a){return a.balance_right(this)}; +cljs.core.BlackNode.prototype.redden=function(){return new cljs.core.RedNode(this.key,this.val,this.left,this.right,null)};cljs.core.BlackNode.prototype.blacken=function(){return this};cljs.core.BlackNode.prototype.add_left=function(a){return a.balance_left(this)};cljs.core.BlackNode.prototype.replace=function(a,b,c,d){return new cljs.core.BlackNode(a,b,c,d,null)};cljs.core.BlackNode.prototype.balance_left=function(a){return new cljs.core.BlackNode(a.key,a.val,this,a.right,null)}; +cljs.core.BlackNode.prototype.balance_right=function(a){return new cljs.core.BlackNode(a.key,a.val,a.left,this,null)};cljs.core.BlackNode.prototype.remove_left=function(a){return cljs.core.balance_left_del(this.key,this.val,a,this.right)};cljs.core.BlackNode.prototype.kv_reduce=function(a,b){return cljs.core.tree_map_kv_reduce(this,a,b)};cljs.core.BlackNode.prototype.remove_right=function(a){return cljs.core.balance_right_del(this.key,this.val,this.left,a)}; +cljs.core.BlackNode.prototype.cljs$core$ILookup$_lookup$arity$2=function(a,b){return this.cljs$core$IIndexed$_nth$arity$3(null,b,null)};cljs.core.BlackNode.prototype.cljs$core$ILookup$_lookup$arity$3=function(a,b,c){return this.cljs$core$IIndexed$_nth$arity$3(null,b,c)};cljs.core.BlackNode.prototype.cljs$core$IIndexed$_nth$arity$2=function(a,b){if(0===b)return this.key;if(1===b)return this.val;throw Error("Index out of bounds");}; +cljs.core.BlackNode.prototype.cljs$core$IIndexed$_nth$arity$3=function(a,b,c){return 0===b?this.key:1===b?this.val:c};cljs.core.BlackNode.prototype.cljs$core$IVector$_assoc_n$arity$3=function(a,b,c){return(new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[this.key,this.val],null)).cljs$core$IVector$_assoc_n$arity$3(null,b,c)};cljs.core.BlackNode.prototype.cljs$core$IMeta$_meta$arity$1=function(a){return null}; +cljs.core.BlackNode.prototype.cljs$core$ICounted$_count$arity$1=function(a){return 2};cljs.core.BlackNode.prototype.cljs$core$IMapEntry$_key$arity$1=function(a){return this.key};cljs.core.BlackNode.prototype.cljs$core$IMapEntry$_val$arity$1=function(a){return this.val};cljs.core.BlackNode.prototype.cljs$core$IStack$_peek$arity$1=function(a){return this.val}; +cljs.core.BlackNode.prototype.cljs$core$IStack$_pop$arity$1=function(a){return new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[this.key],null)};cljs.core.BlackNode.prototype.cljs$core$IReversible$_rseq$arity$1=function(a){return new cljs.core.IndexedSeq([this.val,this.key],0,null)};cljs.core.BlackNode.prototype.cljs$core$IHash$_hash$arity$1=function(a){a=this.__hash;return null!=a?a:this.__hash=a=cljs.core.hash_ordered_coll(this)}; +cljs.core.BlackNode.prototype.cljs$core$IEquiv$_equiv$arity$2=function(a,b){return cljs.core.equiv_sequential(this,b)};cljs.core.BlackNode.prototype.cljs$core$IEmptyableCollection$_empty$arity$1=function(a){return null};cljs.core.BlackNode.prototype.cljs$core$IReduce$_reduce$arity$2=function(a,b){return cljs.core.ci_reduce.cljs$core$IFn$_invoke$arity$2(this,b)}; +cljs.core.BlackNode.prototype.cljs$core$IReduce$_reduce$arity$3=function(a,b,c){return cljs.core.ci_reduce.cljs$core$IFn$_invoke$arity$3(this,b,c)};cljs.core.BlackNode.prototype.cljs$core$IAssociative$_assoc$arity$3=function(a,b,c){return cljs.core.assoc.cljs$core$IFn$_invoke$arity$3(new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[this.key,this.val],null),b,c)}; +cljs.core.BlackNode.prototype.cljs$core$IAssociative$_contains_key_QMARK_$arity$2=function(a,b){return 0===b||1===b};cljs.core.BlackNode.prototype.cljs$core$ISeqable$_seq$arity$1=function(a){return new cljs.core.IndexedSeq([this.key,this.val],0,null)};cljs.core.BlackNode.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return cljs.core._with_meta(new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[this.key,this.val],null),b)}; +cljs.core.BlackNode.prototype.cljs$core$ICollection$_conj$arity$2=function(a,b){return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[this.key,this.val,b],null)}; +cljs.core.BlackNode.prototype.call=function(){var a=null;a=function(a,c,d){switch(arguments.length){case 2:return this.cljs$core$IIndexed$_nth$arity$2(null,c);case 3:return this.cljs$core$IIndexed$_nth$arity$3(null,c,d)}throw Error("Invalid arity: "+(arguments.length-1));};a.cljs$core$IFn$_invoke$arity$2=function(a,c){return this.cljs$core$IIndexed$_nth$arity$2(null,c)};a.cljs$core$IFn$_invoke$arity$3=function(a,c,d){return this.cljs$core$IIndexed$_nth$arity$3(null,c,d)};return a}(); +cljs.core.BlackNode.prototype.apply=function(a,b){return this.call.apply(this,[this].concat(cljs.core.aclone(b)))};cljs.core.BlackNode.prototype.cljs$core$IFn$_invoke$arity$1=function(a){return this.cljs$core$IIndexed$_nth$arity$2(null,a)};cljs.core.BlackNode.prototype.cljs$core$IFn$_invoke$arity$2=function(a,b){return this.cljs$core$IIndexed$_nth$arity$3(null,a,b)}; +cljs.core.BlackNode.getBasis=function(){return new cljs.core.PersistentVector(null,5,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"key","key",124488940,null),new cljs.core.Symbol(null,"val","val",1769233139,null),new cljs.core.Symbol(null,"left","left",1241415590,null),new cljs.core.Symbol(null,"right","right",1187949694,null),cljs.core.with_meta(new cljs.core.Symbol(null,"__hash","__hash",-1328796629,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null, +"mutable","mutable",875778266),!0],null))],null)};cljs.core.BlackNode.cljs$lang$type=!0;cljs.core.BlackNode.cljs$lang$ctorStr="cljs.core/BlackNode";cljs.core.BlackNode.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/BlackNode")};cljs.core.__GT_BlackNode=function(a,b,c,d,e){return new cljs.core.BlackNode(a,b,c,d,e)};goog.object.set(cljs.core.BlackNode.prototype,cljs.core.ITER_SYMBOL,function(){return cljs.core.es6_iterator(this)}); +cljs.core.RedNode=function(a,b,c,d,e){this.key=a;this.val=b;this.left=c;this.right=d;this.__hash=e;this.cljs$lang$protocol_mask$partition0$=166619935;this.cljs$lang$protocol_mask$partition1$=0};cljs.core.RedNode.prototype.cljs$core$IFind$=cljs.core.PROTOCOL_SENTINEL;cljs.core.RedNode.prototype.cljs$core$IFind$_find$arity$2=function(a,b){switch(b){case 0:return new cljs.core.MapEntry(0,this.key,null);case 1:return new cljs.core.MapEntry(1,this.val,null);default:return null}}; +cljs.core.RedNode.prototype.lastIndexOf=function(){var a=null,b=function(a){return cljs.core._lastIndexOf.cljs$core$IFn$_invoke$arity$3(this,a,cljs.core.count(this))},c=function(a,b){return cljs.core._lastIndexOf.cljs$core$IFn$_invoke$arity$3(this,a,b)};a=function(a,e){switch(arguments.length){case 1:return b.call(this,a);case 2:return c.call(this,a,e)}throw Error("Invalid arity: "+arguments.length);};a.cljs$core$IFn$_invoke$arity$1=b;a.cljs$core$IFn$_invoke$arity$2=c;return a}(); +cljs.core.RedNode.prototype.indexOf=function(){var a=null,b=function(a){return cljs.core._indexOf.cljs$core$IFn$_invoke$arity$3(this,a,0)},c=function(a,b){return cljs.core._indexOf.cljs$core$IFn$_invoke$arity$3(this,a,b)};a=function(a,e){switch(arguments.length){case 1:return b.call(this,a);case 2:return c.call(this,a,e)}throw Error("Invalid arity: "+arguments.length);};a.cljs$core$IFn$_invoke$arity$1=b;a.cljs$core$IFn$_invoke$arity$2=c;return a}(); +cljs.core.RedNode.prototype.add_right=function(a){return new cljs.core.RedNode(this.key,this.val,this.left,a,null)};cljs.core.RedNode.prototype.redden=function(){throw Error("red-black tree invariant violation");};cljs.core.RedNode.prototype.blacken=function(){return new cljs.core.BlackNode(this.key,this.val,this.left,this.right,null)};cljs.core.RedNode.prototype.add_left=function(a){return new cljs.core.RedNode(this.key,this.val,a,this.right,null)}; +cljs.core.RedNode.prototype.replace=function(a,b,c,d){return new cljs.core.RedNode(a,b,c,d,null)}; +cljs.core.RedNode.prototype.balance_left=function(a){return this.left instanceof cljs.core.RedNode?new cljs.core.RedNode(this.key,this.val,this.left.blacken(),new cljs.core.BlackNode(a.key,a.val,this.right,a.right,null),null):this.right instanceof cljs.core.RedNode?new cljs.core.RedNode(this.right.key,this.right.val,new cljs.core.BlackNode(this.key,this.val,this.left,this.right.left,null),new cljs.core.BlackNode(a.key,a.val,this.right.right,a.right,null),null):new cljs.core.BlackNode(a.key,a.val, +this,a.right,null)}; +cljs.core.RedNode.prototype.balance_right=function(a){return this.right instanceof cljs.core.RedNode?new cljs.core.RedNode(this.key,this.val,new cljs.core.BlackNode(a.key,a.val,a.left,this.left,null),this.right.blacken(),null):this.left instanceof cljs.core.RedNode?new cljs.core.RedNode(this.left.key,this.left.val,new cljs.core.BlackNode(a.key,a.val,a.left,this.left.left,null),new cljs.core.BlackNode(this.key,this.val,this.left.right,this.right,null),null):new cljs.core.BlackNode(a.key,a.val,a.left, +this,null)};cljs.core.RedNode.prototype.remove_left=function(a){return new cljs.core.RedNode(this.key,this.val,a,this.right,null)};cljs.core.RedNode.prototype.kv_reduce=function(a,b){return cljs.core.tree_map_kv_reduce(this,a,b)};cljs.core.RedNode.prototype.remove_right=function(a){return new cljs.core.RedNode(this.key,this.val,this.left,a,null)};cljs.core.RedNode.prototype.cljs$core$ILookup$_lookup$arity$2=function(a,b){return this.cljs$core$IIndexed$_nth$arity$3(null,b,null)}; +cljs.core.RedNode.prototype.cljs$core$ILookup$_lookup$arity$3=function(a,b,c){return this.cljs$core$IIndexed$_nth$arity$3(null,b,c)};cljs.core.RedNode.prototype.cljs$core$IIndexed$_nth$arity$2=function(a,b){if(0===b)return this.key;if(1===b)return this.val;throw Error("Index out of bounds");};cljs.core.RedNode.prototype.cljs$core$IIndexed$_nth$arity$3=function(a,b,c){return 0===b?this.key:1===b?this.val:c}; +cljs.core.RedNode.prototype.cljs$core$IVector$_assoc_n$arity$3=function(a,b,c){return(new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[this.key,this.val],null)).cljs$core$IVector$_assoc_n$arity$3(null,b,c)};cljs.core.RedNode.prototype.cljs$core$IMeta$_meta$arity$1=function(a){return null};cljs.core.RedNode.prototype.cljs$core$ICounted$_count$arity$1=function(a){return 2};cljs.core.RedNode.prototype.cljs$core$IMapEntry$_key$arity$1=function(a){return this.key}; +cljs.core.RedNode.prototype.cljs$core$IMapEntry$_val$arity$1=function(a){return this.val};cljs.core.RedNode.prototype.cljs$core$IStack$_peek$arity$1=function(a){return this.val};cljs.core.RedNode.prototype.cljs$core$IStack$_pop$arity$1=function(a){return new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[this.key],null)};cljs.core.RedNode.prototype.cljs$core$IReversible$_rseq$arity$1=function(a){return new cljs.core.IndexedSeq([this.val,this.key],0,null)}; +cljs.core.RedNode.prototype.cljs$core$IHash$_hash$arity$1=function(a){a=this.__hash;return null!=a?a:this.__hash=a=cljs.core.hash_ordered_coll(this)};cljs.core.RedNode.prototype.cljs$core$IEquiv$_equiv$arity$2=function(a,b){return cljs.core.equiv_sequential(this,b)};cljs.core.RedNode.prototype.cljs$core$IEmptyableCollection$_empty$arity$1=function(a){return null}; +cljs.core.RedNode.prototype.cljs$core$IReduce$_reduce$arity$2=function(a,b){return cljs.core.ci_reduce.cljs$core$IFn$_invoke$arity$2(this,b)};cljs.core.RedNode.prototype.cljs$core$IReduce$_reduce$arity$3=function(a,b,c){return cljs.core.ci_reduce.cljs$core$IFn$_invoke$arity$3(this,b,c)}; +cljs.core.RedNode.prototype.cljs$core$IAssociative$_assoc$arity$3=function(a,b,c){return cljs.core.assoc.cljs$core$IFn$_invoke$arity$3(new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[this.key,this.val],null),b,c)};cljs.core.RedNode.prototype.cljs$core$IAssociative$_contains_key_QMARK_$arity$2=function(a,b){return 0===b||1===b};cljs.core.RedNode.prototype.cljs$core$ISeqable$_seq$arity$1=function(a){return new cljs.core.IndexedSeq([this.key,this.val],0,null)}; +cljs.core.RedNode.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return cljs.core._with_meta(new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[this.key,this.val],null),b)};cljs.core.RedNode.prototype.cljs$core$ICollection$_conj$arity$2=function(a,b){return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[this.key,this.val,b],null)}; +cljs.core.RedNode.prototype.call=function(){var a=null;a=function(a,c,d){switch(arguments.length){case 2:return this.cljs$core$IIndexed$_nth$arity$2(null,c);case 3:return this.cljs$core$IIndexed$_nth$arity$3(null,c,d)}throw Error("Invalid arity: "+(arguments.length-1));};a.cljs$core$IFn$_invoke$arity$2=function(a,c){return this.cljs$core$IIndexed$_nth$arity$2(null,c)};a.cljs$core$IFn$_invoke$arity$3=function(a,c,d){return this.cljs$core$IIndexed$_nth$arity$3(null,c,d)};return a}(); +cljs.core.RedNode.prototype.apply=function(a,b){return this.call.apply(this,[this].concat(cljs.core.aclone(b)))};cljs.core.RedNode.prototype.cljs$core$IFn$_invoke$arity$1=function(a){return this.cljs$core$IIndexed$_nth$arity$2(null,a)};cljs.core.RedNode.prototype.cljs$core$IFn$_invoke$arity$2=function(a,b){return this.cljs$core$IIndexed$_nth$arity$3(null,a,b)}; +cljs.core.RedNode.getBasis=function(){return new cljs.core.PersistentVector(null,5,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"key","key",124488940,null),new cljs.core.Symbol(null,"val","val",1769233139,null),new cljs.core.Symbol(null,"left","left",1241415590,null),new cljs.core.Symbol(null,"right","right",1187949694,null),cljs.core.with_meta(new cljs.core.Symbol(null,"__hash","__hash",-1328796629,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable", +"mutable",875778266),!0],null))],null)};cljs.core.RedNode.cljs$lang$type=!0;cljs.core.RedNode.cljs$lang$ctorStr="cljs.core/RedNode";cljs.core.RedNode.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/RedNode")};cljs.core.__GT_RedNode=function(a,b,c,d,e){return new cljs.core.RedNode(a,b,c,d,e)};goog.object.set(cljs.core.RedNode.prototype,cljs.core.ITER_SYMBOL,function(){return cljs.core.es6_iterator(this)}); +cljs.core.tree_map_add=function(a,b,c,d,e){if(null==b)return new cljs.core.RedNode(c,d,null,null,null);var f=function(){var d=b.key;return a.cljs$core$IFn$_invoke$arity$2?a.cljs$core$IFn$_invoke$arity$2(c,d):a.call(null,c,d)}();if(0===f)return e[0]=b,null;if(0>f)return f=function(){var f=b.left;return cljs.core.tree_map_add.cljs$core$IFn$_invoke$arity$5?cljs.core.tree_map_add.cljs$core$IFn$_invoke$arity$5(a,f,c,d,e):cljs.core.tree_map_add.call(null,a,f,c,d,e)}(),null!=f?b.add_left(f):null;f=function(){var f= +b.right;return cljs.core.tree_map_add.cljs$core$IFn$_invoke$arity$5?cljs.core.tree_map_add.cljs$core$IFn$_invoke$arity$5(a,f,c,d,e):cljs.core.tree_map_add.call(null,a,f,c,d,e)}();return null!=f?b.add_right(f):null}; +cljs.core.tree_map_append=function(a,b){if(null==a)return b;if(null==b)return a;if(a instanceof cljs.core.RedNode){if(b instanceof cljs.core.RedNode){var c=function(){var c=a.right,e=b.left;return cljs.core.tree_map_append.cljs$core$IFn$_invoke$arity$2?cljs.core.tree_map_append.cljs$core$IFn$_invoke$arity$2(c,e):cljs.core.tree_map_append.call(null,c,e)}();return c instanceof cljs.core.RedNode?new cljs.core.RedNode(c.key,c.val,new cljs.core.RedNode(a.key,a.val,a.left,c.left,null),new cljs.core.RedNode(b.key, +b.val,c.right,b.right,null),null):new cljs.core.RedNode(a.key,a.val,a.left,new cljs.core.RedNode(b.key,b.val,c,b.right,null),null)}return new cljs.core.RedNode(a.key,a.val,a.left,function(){var c=a.right;return cljs.core.tree_map_append.cljs$core$IFn$_invoke$arity$2?cljs.core.tree_map_append.cljs$core$IFn$_invoke$arity$2(c,b):cljs.core.tree_map_append.call(null,c,b)}(),null)}if(b instanceof cljs.core.RedNode)return new cljs.core.RedNode(b.key,b.val,function(){var c=b.left;return cljs.core.tree_map_append.cljs$core$IFn$_invoke$arity$2? +cljs.core.tree_map_append.cljs$core$IFn$_invoke$arity$2(a,c):cljs.core.tree_map_append.call(null,a,c)}(),b.right,null);c=function(){var c=a.right,e=b.left;return cljs.core.tree_map_append.cljs$core$IFn$_invoke$arity$2?cljs.core.tree_map_append.cljs$core$IFn$_invoke$arity$2(c,e):cljs.core.tree_map_append.call(null,c,e)}();return c instanceof cljs.core.RedNode?new cljs.core.RedNode(c.key,c.val,new cljs.core.BlackNode(a.key,a.val,a.left,c.left,null),new cljs.core.BlackNode(b.key,b.val,c.right,b.right, +null),null):cljs.core.balance_left_del(a.key,a.val,a.left,new cljs.core.BlackNode(b.key,b.val,c,b.right,null))}; +cljs.core.tree_map_remove=function(a,b,c,d){if(null!=b){var e=function(){var d=b.key;return a.cljs$core$IFn$_invoke$arity$2?a.cljs$core$IFn$_invoke$arity$2(c,d):a.call(null,c,d)}();if(0===e)return d[0]=b,cljs.core.tree_map_append(b.left,b.right);if(0>e)return e=function(){var e=b.left;return cljs.core.tree_map_remove.cljs$core$IFn$_invoke$arity$4?cljs.core.tree_map_remove.cljs$core$IFn$_invoke$arity$4(a,e,c,d):cljs.core.tree_map_remove.call(null,a,e,c,d)}(),null!=e||null!=d[0]?b.left instanceof cljs.core.BlackNode? +cljs.core.balance_left_del(b.key,b.val,e,b.right):new cljs.core.RedNode(b.key,b.val,e,b.right,null):null;e=function(){var e=b.right;return cljs.core.tree_map_remove.cljs$core$IFn$_invoke$arity$4?cljs.core.tree_map_remove.cljs$core$IFn$_invoke$arity$4(a,e,c,d):cljs.core.tree_map_remove.call(null,a,e,c,d)}();return null!=e||null!=d[0]?b.right instanceof cljs.core.BlackNode?cljs.core.balance_right_del(b.key,b.val,b.left,e):new cljs.core.RedNode(b.key,b.val,b.left,e,null):null}return null}; +cljs.core.tree_map_replace=function(a,b,c,d){var e=b.key,f=a.cljs$core$IFn$_invoke$arity$2?a.cljs$core$IFn$_invoke$arity$2(c,e):a.call(null,c,e);return 0===f?b.replace(e,d,b.left,b.right):0>f?b.replace(e,b.val,function(){var e=b.left;return cljs.core.tree_map_replace.cljs$core$IFn$_invoke$arity$4?cljs.core.tree_map_replace.cljs$core$IFn$_invoke$arity$4(a,e,c,d):cljs.core.tree_map_replace.call(null,a,e,c,d)}(),b.right):b.replace(e,b.val,b.left,function(){var e=b.right;return cljs.core.tree_map_replace.cljs$core$IFn$_invoke$arity$4? +cljs.core.tree_map_replace.cljs$core$IFn$_invoke$arity$4(a,e,c,d):cljs.core.tree_map_replace.call(null,a,e,c,d)}())};cljs.core.PersistentTreeMap=function(a,b,c,d,e){this.comp=a;this.tree=b;this.cnt=c;this.meta=d;this.__hash=e;this.cljs$lang$protocol_mask$partition0$=418776847;this.cljs$lang$protocol_mask$partition1$=8192};cljs.core.PersistentTreeMap.prototype.cljs$core$IFind$=cljs.core.PROTOCOL_SENTINEL;cljs.core.PersistentTreeMap.prototype.cljs$core$IFind$_find$arity$2=function(a,b){return this.entry_at(b)}; +cljs.core.PersistentTreeMap.prototype.forEach=function(a){for(var b=cljs.core.seq(this),c=null,d=0,e=0;;)if(ec?b.left:b.right}else return null};cljs.core.PersistentTreeMap.prototype.has=function(a){return cljs.core.contains_QMARK_(this,a)}; +cljs.core.PersistentTreeMap.prototype.cljs$core$ILookup$_lookup$arity$2=function(a,b){return this.cljs$core$ILookup$_lookup$arity$3(null,b,null)};cljs.core.PersistentTreeMap.prototype.cljs$core$ILookup$_lookup$arity$3=function(a,b,c){a=this.entry_at(b);return null!=a?a.val:c};cljs.core.PersistentTreeMap.prototype.cljs$core$IKVReduce$_kv_reduce$arity$3=function(a,b,c){return null!=this.tree?cljs.core.unreduced(cljs.core.tree_map_kv_reduce(this.tree,b,c)):c}; +cljs.core.PersistentTreeMap.prototype.cljs$core$IMeta$_meta$arity$1=function(a){return this.meta};cljs.core.PersistentTreeMap.prototype.cljs$core$ICloneable$_clone$arity$1=function(a){return new cljs.core.PersistentTreeMap(this.comp,this.tree,this.cnt,this.meta,this.__hash)};cljs.core.PersistentTreeMap.prototype.cljs$core$ICounted$_count$arity$1=function(a){return this.cnt}; +cljs.core.PersistentTreeMap.prototype.cljs$core$IReversible$_rseq$arity$1=function(a){return 0e?(a=cljs.core.conj.cljs$core$IFn$_invoke$arity$2(a,d),d=d.left):d=d.right:0< +e?(a=cljs.core.conj.cljs$core$IFn$_invoke$arity$2(a,d),d=d.right):d=d.left}else return null==a?null:new cljs.core.PersistentTreeMapSeq(null,a,c,-1,null)}else return null};cljs.core.PersistentTreeMap.prototype.cljs$core$ISorted$_entry_key$arity$2=function(a,b){return cljs.core.key(b)};cljs.core.PersistentTreeMap.prototype.cljs$core$ISorted$_comparator$arity$1=function(a){return this.comp}; +cljs.core.PersistentTreeMap.getBasis=function(){return new cljs.core.PersistentVector(null,5,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"comp","comp",-1462482139,null),new cljs.core.Symbol(null,"tree","tree",1444219499,null),new cljs.core.Symbol(null,"cnt","cnt",1924510325,null),new cljs.core.Symbol(null,"meta","meta",-1154898805,null),cljs.core.with_meta(new cljs.core.Symbol(null,"__hash","__hash",-1328796629,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null, +"mutable","mutable",875778266),!0],null))],null)};cljs.core.PersistentTreeMap.cljs$lang$type=!0;cljs.core.PersistentTreeMap.cljs$lang$ctorStr="cljs.core/PersistentTreeMap";cljs.core.PersistentTreeMap.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/PersistentTreeMap")};cljs.core.__GT_PersistentTreeMap=function(a,b,c,d,e){return new cljs.core.PersistentTreeMap(a,b,c,d,e)};cljs.core.PersistentTreeMap.EMPTY=new cljs.core.PersistentTreeMap(cljs.core.compare,null,0,null,cljs.core.empty_unordered_hash); +goog.object.set(cljs.core.PersistentTreeMap.prototype,cljs.core.ITER_SYMBOL,function(){return cljs.core.es6_iterator(this)});cljs.core.hash_map=function(a){for(var b=[],c=arguments.length,d=0;;)if(d(a.cljs$core$IFn$_invoke$arity$1?a.cljs$core$IFn$_invoke$arity$1(c):a.call(null,c))?b:c}; +cljs.core.max_key.cljs$core$IFn$_invoke$arity$variadic=function(a,b,c,d){return cljs.core.reduce.cljs$core$IFn$_invoke$arity$3(function(b,c){return cljs.core.max_key.cljs$core$IFn$_invoke$arity$3(a,b,c)},cljs.core.max_key.cljs$core$IFn$_invoke$arity$3(a,b,c),d)};cljs.core.max_key.cljs$lang$applyTo=function(a){var b=cljs.core.first(a),c=cljs.core.next(a);a=cljs.core.first(c);var d=cljs.core.next(c);c=cljs.core.first(d);d=cljs.core.next(d);return this.cljs$core$IFn$_invoke$arity$variadic(b,a,c,d)}; +cljs.core.max_key.cljs$lang$maxFixedArity=3; +cljs.core.min_key=function(a){switch(arguments.length){case 2:return cljs.core.min_key.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return cljs.core.min_key.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);default:for(var b=[],c=arguments.length,d=0;;)if(d=this.count)throw Error("-drop-first of empty chunk");return new cljs.core.RangeChunk(this.start+this.step,this.step,this.count-1)}; +cljs.core.RangeChunk.getBasis=function(){return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"start","start",1285322546,null),new cljs.core.Symbol(null,"step","step",-1365547645,null),new cljs.core.Symbol(null,"count","count",-514511684,null)],null)};cljs.core.RangeChunk.cljs$lang$type=!0;cljs.core.RangeChunk.cljs$lang$ctorStr="cljs.core/RangeChunk";cljs.core.RangeChunk.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/RangeChunk")}; +cljs.core.__GT_RangeChunk=function(a,b,c){return new cljs.core.RangeChunk(a,b,c)};cljs.core.RangeIterator=function(a,b,c){this.i=a;this.end=b;this.step=c};cljs.core.RangeIterator.prototype.hasNext=function(){return 0this.end};cljs.core.RangeIterator.prototype.next=function(){var a=this.i;this.i+=this.step;return a}; +cljs.core.RangeIterator.getBasis=function(){return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[cljs.core.with_meta(new cljs.core.Symbol(null,"i","i",253690212,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null)),new cljs.core.Symbol(null,"end","end",1372345569,null),new cljs.core.Symbol(null,"step","step",-1365547645,null)],null)};cljs.core.RangeIterator.cljs$lang$type=!0; +cljs.core.RangeIterator.cljs$lang$ctorStr="cljs.core/RangeIterator";cljs.core.RangeIterator.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/RangeIterator")};cljs.core.__GT_RangeIterator=function(a,b,c){return new cljs.core.RangeIterator(a,b,c)};cljs.core.Range=function(a,b,c,d,e,f,g){this.meta=a;this.start=b;this.end=c;this.step=d;this.chunk=e;this.chunk_next=f;this.__hash=g;this.cljs$lang$protocol_mask$partition0$=32375006;this.cljs$lang$protocol_mask$partition1$=140800}; +cljs.core.Range.prototype.toString=function(){return cljs.core.pr_str_STAR_(this)};cljs.core.Range.prototype.equiv=function(a){return this.cljs$core$IEquiv$_equiv$arity$2(null,a)}; +cljs.core.Range.prototype.indexOf=function(){var a=null,b=function(a){return cljs.core._indexOf.cljs$core$IFn$_invoke$arity$3(this,a,0)},c=function(a,b){return cljs.core._indexOf.cljs$core$IFn$_invoke$arity$3(this,a,b)};a=function(a,e){switch(arguments.length){case 1:return b.call(this,a);case 2:return c.call(this,a,e)}throw Error("Invalid arity: "+arguments.length);};a.cljs$core$IFn$_invoke$arity$1=b;a.cljs$core$IFn$_invoke$arity$2=c;return a}(); +cljs.core.Range.prototype.lastIndexOf=function(){var a=null,b=function(a){return cljs.core._lastIndexOf.cljs$core$IFn$_invoke$arity$3(this,a,cljs.core.count(this))},c=function(a,b){return cljs.core._lastIndexOf.cljs$core$IFn$_invoke$arity$3(this,a,b)};a=function(a,e){switch(arguments.length){case 1:return b.call(this,a);case 2:return c.call(this,a,e)}throw Error("Invalid arity: "+arguments.length);};a.cljs$core$IFn$_invoke$arity$1=b;a.cljs$core$IFn$_invoke$arity$2=c;return a}(); +cljs.core.Range.prototype.forceChunk=function(){if(null==this.chunk){var a=this.cljs$core$ICounted$_count$arity$1(null);return 32this.end&&0===this.step)return this.start;throw Error("Index out of bounds");};cljs.core.Range.prototype.cljs$core$IIndexed$_nth$arity$3=function(a,b,c){return 0<=b&&bthis.end&&0===this.step?this.start:c}; +cljs.core.Range.prototype.cljs$core$IIterable$_iterator$arity$1=function(a){return new cljs.core.RangeIterator(this.start,this.end,this.step)};cljs.core.Range.prototype.cljs$core$IMeta$_meta$arity$1=function(a){return this.meta};cljs.core.Range.prototype.cljs$core$ICloneable$_clone$arity$1=function(a){return new cljs.core.Range(this.meta,this.start,this.end,this.step,this.chunk,this.chunk_next,this.__hash)}; +cljs.core.Range.prototype.cljs$core$INext$_next$arity$1=function(a){return 0this.end?new cljs.core.Range(null,this.start+this.step,this.end,this.step,null,null,null):null};cljs.core.Range.prototype.cljs$core$ICounted$_count$arity$1=function(a){return Math.ceil((this.end-this.start)/this.step)}; +cljs.core.Range.prototype.cljs$core$IHash$_hash$arity$1=function(a){a=this.__hash;return null!=a?a:this.__hash=a=cljs.core.hash_ordered_coll(this)};cljs.core.Range.prototype.cljs$core$IEquiv$_equiv$arity$2=function(a,b){return cljs.core.equiv_sequential(this,b)};cljs.core.Range.prototype.cljs$core$IEmptyableCollection$_empty$arity$1=function(a){return cljs.core.List.EMPTY}; +cljs.core.Range.prototype.cljs$core$IReduce$_reduce$arity$2=function(a,b){return cljs.core.ci_reduce.cljs$core$IFn$_invoke$arity$2(this,b)};cljs.core.Range.prototype.cljs$core$IReduce$_reduce$arity$3=function(a,b,c){for(a=this.start;;)if(0this.end){c=b.cljs$core$IFn$_invoke$arity$2?b.cljs$core$IFn$_invoke$arity$2(c,a):b.call(null,c,a);if(cljs.core.reduced_QMARK_(c))return cljs.core.deref(c);a+=this.step}else return c}; +cljs.core.Range.prototype.cljs$core$ISeq$_first$arity$1=function(a){return this.start};cljs.core.Range.prototype.cljs$core$ISeq$_rest$arity$1=function(a){a=this.cljs$core$INext$_next$arity$1(null);return null==a?cljs.core.List.EMPTY:a};cljs.core.Range.prototype.cljs$core$ISeqable$_seq$arity$1=function(a){return this};cljs.core.Range.prototype.cljs$core$IChunkedSeq$_chunked_first$arity$1=function(a){this.forceChunk();return this.chunk}; +cljs.core.Range.prototype.cljs$core$IChunkedSeq$_chunked_rest$arity$1=function(a){this.forceChunk();return null==this.chunk_next?cljs.core.List.EMPTY:this.chunk_next};cljs.core.Range.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return b===this.meta?this:new cljs.core.Range(b,this.start,this.end,this.step,this.chunk,this.chunk_next,this.__hash)};cljs.core.Range.prototype.cljs$core$ICollection$_conj$arity$2=function(a,b){return cljs.core.cons(b,this)}; +cljs.core.Range.prototype.cljs$core$IChunkedNext$_chunked_next$arity$1=function(a){return cljs.core.seq(this.cljs$core$IChunkedSeq$_chunked_rest$arity$1(null))}; +cljs.core.Range.getBasis=function(){return new cljs.core.PersistentVector(null,7,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"meta","meta",-1154898805,null),new cljs.core.Symbol(null,"start","start",1285322546,null),new cljs.core.Symbol(null,"end","end",1372345569,null),new cljs.core.Symbol(null,"step","step",-1365547645,null),cljs.core.with_meta(new cljs.core.Symbol(null,"chunk","chunk",449371907,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable", +"mutable",875778266),!0],null)),cljs.core.with_meta(new cljs.core.Symbol(null,"chunk-next","chunk-next",-547810434,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null)),cljs.core.with_meta(new cljs.core.Symbol(null,"__hash","__hash",-1328796629,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null))],null)};cljs.core.Range.cljs$lang$type=!0;cljs.core.Range.cljs$lang$ctorStr="cljs.core/Range"; +cljs.core.Range.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/Range")};cljs.core.__GT_Range=function(a,b,c,d,e,f,g){return new cljs.core.Range(a,b,c,d,e,f,g)};goog.object.set(cljs.core.Range.prototype,cljs.core.ITER_SYMBOL,function(){return cljs.core.es6_iterator(this)}); +cljs.core.range=function(a){switch(arguments.length){case 0:return cljs.core.range.cljs$core$IFn$_invoke$arity$0();case 1:return cljs.core.range.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core.range.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return cljs.core.range.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.core.range.cljs$core$IFn$_invoke$arity$0=function(){return cljs.core.range.cljs$core$IFn$_invoke$arity$3(0,Number.MAX_VALUE,1)};cljs.core.range.cljs$core$IFn$_invoke$arity$1=function(a){return cljs.core.range.cljs$core$IFn$_invoke$arity$3(0,a,1)};cljs.core.range.cljs$core$IFn$_invoke$arity$2=function(a,b){return cljs.core.range.cljs$core$IFn$_invoke$arity$3(a,b,1)}; +cljs.core.range.cljs$core$IFn$_invoke$arity$3=function(a,b,c){return 0c?b>=a?cljs.core.List.EMPTY:new cljs.core.Range(null,a,b,c,null,null,null):b===a?cljs.core.List.EMPTY:cljs.core.repeat.cljs$core$IFn$_invoke$arity$1(a)};cljs.core.range.cljs$lang$maxFixedArity=3; +cljs.core.take_nth=function(a){switch(arguments.length){case 1:return cljs.core.take_nth.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core.take_nth.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.core.take_nth.cljs$core$IFn$_invoke$arity$1=function(a){if("number"!==typeof a)throw Error("Assert failed: (number? n)");return function(b){var c=cljs.core.volatile_BANG_(-1);return function(){var d=null,e=function(){return b.cljs$core$IFn$_invoke$arity$0?b.cljs$core$IFn$_invoke$arity$0():b.call(null)},f=function(a){return b.cljs$core$IFn$_invoke$arity$1?b.cljs$core$IFn$_invoke$arity$1(a):b.call(null,a)},g=function(d,e){var f=c.cljs$core$IVolatile$_vreset_BANG_$arity$2(null,c.cljs$core$IDeref$_deref$arity$1(null)+ +1);return 0===cljs.core.rem(f,a)?b.cljs$core$IFn$_invoke$arity$2?b.cljs$core$IFn$_invoke$arity$2(d,e):b.call(null,d,e):d};d=function(a,b){switch(arguments.length){case 0:return e.call(this);case 1:return f.call(this,a);case 2:return g.call(this,a,b)}throw Error("Invalid arity: "+arguments.length);};d.cljs$core$IFn$_invoke$arity$0=e;d.cljs$core$IFn$_invoke$arity$1=f;d.cljs$core$IFn$_invoke$arity$2=g;return d}()}}; +cljs.core.take_nth.cljs$core$IFn$_invoke$arity$2=function(a,b){if("number"!==typeof a)throw Error("Assert failed: (number? n)");return new cljs.core.LazySeq(null,function(){var c=cljs.core.seq(b);return c?cljs.core.cons(cljs.core.first(c),cljs.core.take_nth.cljs$core$IFn$_invoke$arity$2(a,cljs.core.drop.cljs$core$IFn$_invoke$arity$2(a,c))):null},null,null)};cljs.core.take_nth.cljs$lang$maxFixedArity=2; +cljs.core.split_with=function(a,b){return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[cljs.core.take_while.cljs$core$IFn$_invoke$arity$2(a,b),cljs.core.drop_while.cljs$core$IFn$_invoke$arity$2(a,b)],null)}; +cljs.core.partition_by=function(a){switch(arguments.length){case 1:return cljs.core.partition_by.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core.partition_by.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.core.partition_by.cljs$core$IFn$_invoke$arity$1=function(a){return function(b){var c=cljs.core.array_list(),d=cljs.core.volatile_BANG_(new cljs.core.Keyword("cljs.core","none","cljs.core/none",926646439));return function(){var e=null,f=function(){return b.cljs$core$IFn$_invoke$arity$0?b.cljs$core$IFn$_invoke$arity$0():b.call(null)},g=function(a){if(!cljs.core.truth_(c.isEmpty())){var d=cljs.core.vec(c.toArray());c.clear();a=cljs.core.unreduced(b.cljs$core$IFn$_invoke$arity$2?b.cljs$core$IFn$_invoke$arity$2(a, +d):b.call(null,a,d))}return b.cljs$core$IFn$_invoke$arity$1?b.cljs$core$IFn$_invoke$arity$1(a):b.call(null,a)},h=function(e,f){var g=cljs.core.deref(d),h=a.cljs$core$IFn$_invoke$arity$1?a.cljs$core$IFn$_invoke$arity$1(f):a.call(null,f);cljs.core.vreset_BANG_(d,h);if(cljs.core.keyword_identical_QMARK_(g,new cljs.core.Keyword("cljs.core","none","cljs.core/none",926646439))||cljs.core._EQ_.cljs$core$IFn$_invoke$arity$2(h,g))return c.add(f),e;g=cljs.core.vec(c.toArray());c.clear();e=b.cljs$core$IFn$_invoke$arity$2? +b.cljs$core$IFn$_invoke$arity$2(e,g):b.call(null,e,g);cljs.core.reduced_QMARK_(e)||c.add(f);return e};e=function(a,b){switch(arguments.length){case 0:return f.call(this);case 1:return g.call(this,a);case 2:return h.call(this,a,b)}throw Error("Invalid arity: "+arguments.length);};e.cljs$core$IFn$_invoke$arity$0=f;e.cljs$core$IFn$_invoke$arity$1=g;e.cljs$core$IFn$_invoke$arity$2=h;return e}()}}; +cljs.core.partition_by.cljs$core$IFn$_invoke$arity$2=function(a,b){return new cljs.core.LazySeq(null,function(){var c=cljs.core.seq(b);if(c){var d=cljs.core.first(c),e=a.cljs$core$IFn$_invoke$arity$1?a.cljs$core$IFn$_invoke$arity$1(d):a.call(null,d),f=cljs.core.cons(d,cljs.core.take_while.cljs$core$IFn$_invoke$arity$2(function(b){return cljs.core._EQ_.cljs$core$IFn$_invoke$arity$2(e,a.cljs$core$IFn$_invoke$arity$1?a.cljs$core$IFn$_invoke$arity$1(b):a.call(null,b))},cljs.core.next(c)));return cljs.core.cons(f, +cljs.core.partition_by.cljs$core$IFn$_invoke$arity$2(a,new cljs.core.LazySeq(null,function(){return cljs.core.drop.cljs$core$IFn$_invoke$arity$2(cljs.core.count(f),c)},null,null)))}return null},null,null)};cljs.core.partition_by.cljs$lang$maxFixedArity=2; +cljs.core.frequencies=function(a){return cljs.core.persistent_BANG_(cljs.core.reduce.cljs$core$IFn$_invoke$arity$3(function(a,c){return cljs.core.assoc_BANG_.cljs$core$IFn$_invoke$arity$3(a,c,cljs.core.get.cljs$core$IFn$_invoke$arity$3(a,c,0)+1)},cljs.core.transient$(cljs.core.PersistentArrayMap.EMPTY),a))}; +cljs.core.reductions=function(a){switch(arguments.length){case 2:return cljs.core.reductions.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return cljs.core.reductions.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.core.reductions.cljs$core$IFn$_invoke$arity$2=function(a,b){return new cljs.core.LazySeq(null,function(){var c=cljs.core.seq(b);return c?cljs.core.reductions.cljs$core$IFn$_invoke$arity$3(a,cljs.core.first(c),cljs.core.rest(c)):new cljs.core.List(null,a.cljs$core$IFn$_invoke$arity$0?a.cljs$core$IFn$_invoke$arity$0():a.call(null),null,1,null)},null,null)}; +cljs.core.reductions.cljs$core$IFn$_invoke$arity$3=function(a,b,c){return cljs.core.reduced_QMARK_(b)?new cljs.core.List(null,cljs.core.deref(b),null,1,null):cljs.core.cons(b,new cljs.core.LazySeq(null,function(){var d=cljs.core.seq(c);return d?cljs.core.reductions.cljs$core$IFn$_invoke$arity$3(a,function(){var c=cljs.core.first(d);return a.cljs$core$IFn$_invoke$arity$2?a.cljs$core$IFn$_invoke$arity$2(b,c):a.call(null,b,c)}(),cljs.core.rest(d)):null},null,null))}; +cljs.core.reductions.cljs$lang$maxFixedArity=3; +cljs.core.juxt=function(a){switch(arguments.length){case 1:return cljs.core.juxt.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core.juxt.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return cljs.core.juxt.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);default:for(var b=[],c=arguments.length,d=0;;)if(de?1:e);return e<=b.length?(e=cljs.core.subs.cljs$core$IFn$_invoke$arity$2(b,e),cljs.core.re_seq_STAR_.cljs$core$IFn$_invoke$arity$2?cljs.core.re_seq_STAR_.cljs$core$IFn$_invoke$arity$2(a,e):cljs.core.re_seq_STAR_.call(null,a,e)):null},null,null))}; +cljs.core.re_seq=function(a,b){if("string"===typeof b)return cljs.core.re_seq_STAR_(a,b);throw new TypeError("re-seq must match against a string.");};cljs.core.re_pattern=function(a){if(a instanceof RegExp)return a;var b=cljs.core.re_find(/^\(\?([idmsux]*)\)/,a),c=cljs.core.nth.cljs$core$IFn$_invoke$arity$3(b,0,null);b=cljs.core.nth.cljs$core$IFn$_invoke$arity$3(b,1,null);a=cljs.core.subs.cljs$core$IFn$_invoke$arity$2(a,null==c?0:c.length);c=RegExp;b=cljs.core.truth_(b)?b:"";return new c(a,b)}; +cljs.core.pr_sequential_writer=function(a,b,c,d,e,f,g){var h=cljs.core._STAR_print_level_STAR_;cljs.core._STAR_print_level_STAR_=null==cljs.core._STAR_print_level_STAR_?null:cljs.core._STAR_print_level_STAR_-1;try{if(null!=cljs.core._STAR_print_level_STAR_&&0>cljs.core._STAR_print_level_STAR_)return cljs.core._write(a,"#");cljs.core._write(a,c);if(0===(new cljs.core.Keyword(null,"print-length","print-length",1931866356)).cljs$core$IFn$_invoke$arity$1(f))cljs.core.seq(g)&&cljs.core._write(a,function(){var a= +(new cljs.core.Keyword(null,"more-marker","more-marker",-14717935)).cljs$core$IFn$_invoke$arity$1(f);return cljs.core.truth_(a)?a:"..."}());else{if(cljs.core.seq(g)){var k=cljs.core.first(g);b.cljs$core$IFn$_invoke$arity$3?b.cljs$core$IFn$_invoke$arity$3(k,a,f):b.call(null,k,a,f)}for(var l=cljs.core.next(g),m=(new cljs.core.Keyword(null,"print-length","print-length",1931866356)).cljs$core$IFn$_invoke$arity$1(f)-1;;)if(!l||null!=m&&0===m){cljs.core.seq(l)&&0===m&&(cljs.core._write(a,d),cljs.core._write(a, +function(){var a=(new cljs.core.Keyword(null,"more-marker","more-marker",-14717935)).cljs$core$IFn$_invoke$arity$1(f);return cljs.core.truth_(a)?a:"..."}()));break}else{cljs.core._write(a,d);var n=cljs.core.first(l);c=a;g=f;b.cljs$core$IFn$_invoke$arity$3?b.cljs$core$IFn$_invoke$arity$3(n,c,g):b.call(null,n,c,g);var p=cljs.core.next(l);c=m-1;l=p;m=c}}return cljs.core._write(a,e)}finally{cljs.core._STAR_print_level_STAR_=h}}; +cljs.core.write_all=function(a){for(var b=[],c=arguments.length,d=0;;)if(d=Number(c)?a:a=-1Number(a)?"-":0<=b.indexOf("+")?"+":0<=b.indexOf(" ")?" ":"";0<=Number(a)&&(d=f+d);if(isNaN(c)||d.length>=Number(c))return d;d=isNaN(e)?Math.abs(Number(a)).toString():Math.abs(Number(a)).toFixed(e);a=Number(c)-d.length-f.length;0<=b.indexOf("-",0)?d=f+d+goog.string.repeat(" ",a):(b=0<=b.indexOf("0",0)?"0":" ",d=f+goog.string.repeat(b,a)+d);return d}; +goog.string.format.demuxes_.d=function(a,b,c,d,e,f,g,h){return goog.string.format.demuxes_.f(parseInt(a,10),b,c,d,0,f,g,h)};goog.string.format.demuxes_.i=goog.string.format.demuxes_.d;goog.string.format.demuxes_.u=goog.string.format.demuxes_.d;var clojure={string:{}};clojure.string.seq_reverse=function(a){return cljs.core.reduce.call(null,cljs.core.conj,cljs.core.List.EMPTY,a)};clojure.string.re_surrogate_pair=/([\uD800-\uDBFF])([\uDC00-\uDFFF])/g;clojure.string.reverse=function(a){return a.replace(clojure.string.re_surrogate_pair,"$2$1").split("").reverse().join("")}; +clojure.string.replace_all=function(a,b,c){var d=RegExp,e=b.source;var f=cljs.core.truth_(b.ignoreCase)?"gi":"g";f=cljs.core.truth_(b.multiline)?[f,"m"].join(""):f;b=cljs.core.truth_(b.unicode)?[f,"u"].join(""):f;d=new d(e,b);return a.replace(d,c)}; +clojure.string.replace_with=function(a){return function(){var b=function(b){b=cljs.core.drop_last.call(null,2,b);return cljs.core._EQ_.call(null,cljs.core.count.call(null,b),1)?a.call(null,cljs.core.first.call(null,b)):a.call(null,cljs.core.vec.call(null,b))},c=function(a){var c=null;if(0=b||b>=2+cljs.core.count.call(null,a))return cljs.core.conj.call(null,cljs.core.vec.call(null,cljs.core.cons.call(null,"",cljs.core.map.call(null,cljs.core.str,cljs.core.seq.call(null,a)))),"");var c=cljs.core._EQ__EQ_;if(cljs.core.truth_(c.call(null,1,b)))return new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[a],null);if(cljs.core.truth_(c.call(null,2,b)))return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE, +["",a],null);b-=2;return cljs.core.conj.call(null,cljs.core.vec.call(null,cljs.core.cons.call(null,"",cljs.core.subvec.call(null,cljs.core.vec.call(null,cljs.core.map.call(null,cljs.core.str,cljs.core.seq.call(null,a))),0,b))),cljs.core.subs.call(null,a,b))}; +clojure.string.split=function(a){switch(arguments.length){case 2:return clojure.string.split.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return clojure.string.split.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};clojure.string.split.cljs$core$IFn$_invoke$arity$2=function(a,b){return clojure.string.split.call(null,a,b,0)}; +clojure.string.split.cljs$core$IFn$_invoke$arity$3=function(a,b,c){return clojure.string.discard_trailing_if_needed.call(null,c,"/(?:)/"===cljs.core.str.cljs$core$IFn$_invoke$arity$1(b)?clojure.string.split_with_empty_regex.call(null,a,c):1>c?cljs.core.vec.call(null,cljs.core.str.cljs$core$IFn$_invoke$arity$1(a).split(b)):function(){for(var d=a,e=c,f=cljs.core.PersistentVector.EMPTY;;){if(1===e)return cljs.core.conj.call(null,f,d);var g=cljs.core.re_find.call(null,b,d);if(null!=g){var h=d.indexOf(g); +g=d.substring(h+cljs.core.count.call(null,g));--e;f=cljs.core.conj.call(null,f,d.substring(0,h));d=g}else return cljs.core.conj.call(null,f,d)}}())};clojure.string.split.cljs$lang$maxFixedArity=3;clojure.string.split_lines=function(a){return clojure.string.split.call(null,a,/\n|\r\n/)};clojure.string.trim=function(a){return goog.string.trim(a)};clojure.string.triml=function(a){return goog.string.trimLeft(a)};clojure.string.trimr=function(a){return goog.string.trimRight(a)}; +clojure.string.trim_newline=function(a){for(var b=a.length;;){if(0===b)return"";var c=cljs.core.get.call(null,a,b-1);if("\n"===c||"\r"===c)--b;else return a.substring(0,b)}};clojure.string.blank_QMARK_=function(a){return goog.string.isEmptyOrWhitespace(goog.string.makeSafe(a))}; +clojure.string.escape=function(a,b){for(var c=new goog.string.StringBuffer,d=a.length,e=0;;){if(d===e)return c.toString();var f=a.charAt(e),g=cljs.core.get.call(null,b,f);null!=g?c.append(cljs.core.str.cljs$core$IFn$_invoke$arity$1(g)):c.append(f);e+=1}}; +clojure.string.index_of=function(a){switch(arguments.length){case 2:return clojure.string.index_of.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return clojure.string.index_of.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};clojure.string.index_of.cljs$core$IFn$_invoke$arity$2=function(a,b){a=a.indexOf(b);return 0>a?null:a}; +clojure.string.index_of.cljs$core$IFn$_invoke$arity$3=function(a,b,c){a=a.indexOf(b,c);return 0>a?null:a};clojure.string.index_of.cljs$lang$maxFixedArity=3; +clojure.string.last_index_of=function(a){switch(arguments.length){case 2:return clojure.string.last_index_of.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return clojure.string.last_index_of.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +clojure.string.last_index_of.cljs$core$IFn$_invoke$arity$2=function(a,b){a=a.lastIndexOf(b);return 0>a?null:a};clojure.string.last_index_of.cljs$core$IFn$_invoke$arity$3=function(a,b,c){a=a.lastIndexOf(b,c);return 0>a?null:a};clojure.string.last_index_of.cljs$lang$maxFixedArity=3;clojure.string.starts_with_QMARK_=function(a,b){return goog.string.startsWith(a,b)};clojure.string.ends_with_QMARK_=function(a,b){return goog.string.endsWith(a,b)}; +clojure.string.includes_QMARK_=function(a,b){return goog.string.contains(a,b)};var bigml={hideo:{}};bigml.hideo.util={};bigml.hideo.util.version={};bigml.hideo.util.version.version_string="0.8.1";bigml.hideo.util.version.version_major=0;bigml.hideo.util.version.version_minor=8;bigml.hideo.util.version.version_micro=1;bigml.hideo.util.version.project_name="dixie"; +bigml.hideo.util.version.version=new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[bigml.hideo.util.version.version_major,bigml.hideo.util.version.version_minor,bigml.hideo.util.version.version_micro],null); +bigml.hideo.util.version.version_EQ_=function(a,b){return cljs.core._EQ_.call(null,cljs.core.truth_(a)?a:new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[0,0,0],null),cljs.core.truth_(b)?b:new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[0,0,0],null))}; +bigml.hideo.util.version.version_LT_=function(a,b){return 0>cljs.core.compare.call(null,cljs.core.truth_(a)?a:new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[0,0,0],null),cljs.core.truth_(b)?b:new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[0,0,0],null))}; +bigml.hideo.util.version.version_LT__EQ_=function(a,b){return!(0cljs.core.compare.call(null,cljs.core.truth_(a)?a:new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[0,0,0],null),cljs.core.truth_(b)?b:new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[0,0,0],null)))};bigml.dixie={};bigml.dixie.flatline={};bigml.dixie.flatline.utils={};bigml.dixie.flatline.utils.version_string=bigml.hideo.util.version.version_string;bigml.dixie.flatline.utils.version=bigml.hideo.util.version.version;bigml.dixie.flatline.utils.registry=cljs.core.atom.call(null,cljs.core.PersistentArrayMap.EMPTY); +bigml.dixie.flatline.utils.deferror_BANG_=function(a){for(var b=[],c=arguments.length,d=0;;)if(d=Math.abs((e-g)/e))return 1-e*Math.exp(b*Math.log(a)-a-bigml.dixie.flatline.utils.log_gamma.call(null,b));h+=1;c=e+(h-b)*c;g=f+(h-b)*d;d=a*g+h*f;var k=g/d;f=(a*c+h*e)/d;g=e;c/=d;d=k;e=f;f=1}};bigml.dixie.flatline.utils.gser=function(a,b){for(var c=1/b,d=1/b,e=1;;){if(c<=1E-4*d)return d*Math.exp(b*Math.log(a)-a-bigml.dixie.flatline.utils.log_gamma.call(null,b));c=c*a/(b+e);d+=c;e+=1}}; +bigml.dixie.flatline.utils.gammacdf=function(a,b){return 0b?cljs.core.str.cljs$core$IFn$_invoke$arity$1(b):"":null].join(""):null}; +bigml.dixie.flatline.types.fn_names=function(a){a=bigml.dixie.flatline.types.fn_desc.call(null,a);return cljs.core.truth_(a)?cljs.core.sequential_QMARK_.call(null,a)?cljs.core.map.call(null,function(a){return bigml.dixie.flatline.types.format_fname.call(null,(new cljs.core.Keyword(null,"name","name",1843675177)).cljs$core$IFn$_invoke$arity$1(a),(new cljs.core.Keyword(null,"shift","shift",997140064)).cljs$core$IFn$_invoke$arity$1(a))},a):bigml.dixie.flatline.types.format_fname.call(null,(new cljs.core.Keyword(null, +"name","name",1843675177)).cljs$core$IFn$_invoke$arity$1(a),(new cljs.core.Keyword(null,"shift","shift",997140064)).cljs$core$IFn$_invoke$arity$1(a)):null};bigml.dixie.flatline.types.fn_shifts=function(a){a=bigml.dixie.flatline.types.fn_desc.call(null,a);return cljs.core.truth_(a)?cljs.core.sequential_QMARK_.call(null,a)?cljs.core.map.call(null,new cljs.core.Keyword(null,"shift","shift",997140064),a):(new cljs.core.Keyword(null,"shift","shift",997140064)).cljs$core$IFn$_invoke$arity$1(a):null}; +bigml.dixie.flatline.types.bool__GT_str=function(a){return!0===a||!1===a?cljs.core.str.cljs$core$IFn$_invoke$arity$1(a):a}; +bigml.dixie.flatline.types.stringify_booleans=function(a){return cljs.core.with_meta.call(null,cljs.core._EQ_.call(null,new cljs.core.Keyword(null,"boolean","boolean",-1919418404),bigml.dixie.flatline.types.fn_type.call(null,a))?function(b,c){return bigml.dixie.flatline.types.bool__GT_str.call(null,a.call(null,b,c))}:cljs.core.truth_(cljs.core.some.call(null,new cljs.core.PersistentHashSet(null,new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"boolean","boolean",-1919418404),null], +null),null),bigml.dixie.flatline.types.fn_types.call(null,a)))?function(b,c){return cljs.core.mapv.call(null,bigml.dixie.flatline.types.bool__GT_str,a.call(null,b,c))}:a,cljs.core.meta.call(null,a))}; +bigml.dixie.flatline.types.trim_strings=function(a){return cljs.core.with_meta.call(null,cljs.core._EQ_.call(null,new cljs.core.Keyword(null,"string","string",-1989541586),bigml.dixie.flatline.types.fn_type.call(null,a))?function(b,c){return bigml.dixie.flatline.utils.trim.call(null,a.call(null,b,c))}:cljs.core.truth_(cljs.core.some.call(null,new cljs.core.PersistentHashSet(null,new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"string","string",-1989541586),null],null),null),bigml.dixie.flatline.types.fn_types.call(null, +a)))?function(b,c){return cljs.core.mapv.call(null,bigml.dixie.flatline.utils.trim,a.call(null,b,c))}:a,cljs.core.meta.call(null,a))};bigml.dixie.flatline.types.check_numbers=function(a,b,c){0>a||bigml.dixie.flatline.errors.check_arity.call(null,b,a);return cljs.core.doall.call(null,cljs.core.map.call(null,function(a,c){return bigml.dixie.flatline.types.check_type.call(null,b,c+1,a,new cljs.core.Keyword(null,"numeric","numeric",-1495594714))},c,cljs.core.range.call(null)))};bigml.dixie.flatline.eval={};bigml.dixie.flatline.eval.max_window_width=100;bigml.dixie.flatline.eval.op_designator=function(a){return"string"===typeof a||a instanceof cljs.core.Symbol?cljs.core.keyword.call(null,a):a instanceof cljs.core.Keyword?a:null}; +if("undefined"===typeof bigml||"undefined"===typeof bigml.dixie||"undefined"===typeof bigml.dixie.flatline||"undefined"===typeof bigml.dixie.flatline.eval||"undefined"===typeof bigml.dixie.flatline.eval.primop)bigml.dixie.flatline.eval.primop=function(){var a=cljs.core.atom.call(null,cljs.core.PersistentArrayMap.EMPTY),b=cljs.core.atom.call(null,cljs.core.PersistentArrayMap.EMPTY),c=cljs.core.atom.call(null,cljs.core.PersistentArrayMap.EMPTY),d=cljs.core.atom.call(null,cljs.core.PersistentArrayMap.EMPTY), +e=cljs.core.get.call(null,cljs.core.PersistentArrayMap.EMPTY,new cljs.core.Keyword(null,"hierarchy","hierarchy",-1053470341),cljs.core.get_global_hierarchy.call(null));return new cljs.core.MultiFn(cljs.core.symbol.call(null,"bigml.dixie.flatline.eval","primop"),function(){var a=function(a,b){return bigml.dixie.flatline.eval.op_designator.call(null,a)},b=function(b,c){var d=null;if(1b%28}; +goog.date.getNumberOfDaysInMonth=function(a,b){switch(b){case goog.date.month.FEB:return goog.date.isLeapYear(a)?29:28;case goog.date.month.JUN:case goog.date.month.SEP:case goog.date.month.NOV:case goog.date.month.APR:return 30}return 31};goog.date.isSameDay=function(a,b){b=b||new Date(goog.now());return a.getDate()==b.getDate()&&goog.date.isSameMonth(a,b)};goog.date.isSameMonth=function(a,b){b=b||new Date(goog.now());return a.getMonth()==b.getMonth()&&goog.date.isSameYear(a,b)}; +goog.date.isSameYear=function(a,b){b=b||new Date(goog.now());return a.getFullYear()==b.getFullYear()};goog.date.getCutOffSameWeek_=function(a,b,c,d,e){a=new Date(a,b,c);d=void 0!==d?d:goog.date.weekDay.THU;e=e||goog.date.weekDay.MON;b=(a.getDay()+6)%7;return a.valueOf()+((d-e+7)%7-(b-e+7)%7)*goog.date.MS_PER_DAY}; +goog.date.getWeekNumber=function(a,b,c,d,e){a=goog.date.getCutOffSameWeek_(a,b,c,d,e);b=(new Date((new Date(a)).getFullYear(),0,1)).valueOf();return Math.floor(Math.round((a-b)/goog.date.MS_PER_DAY)/7)+1};goog.date.getYearOfWeek=function(a,b,c,d,e){a=goog.date.getCutOffSameWeek_(a,b,c,d,e);return(new Date(a)).getFullYear()};goog.date.min=function(a,b){return ab?a:b}; +goog.date.setIso8601DateTime=function(a,b){b=goog.string.trim(b);var c=-1==b.indexOf("T")?" ":"T";b=b.split(c);return goog.date.setIso8601DateOnly_(a,b[0])&&(2>b.length||goog.date.setIso8601TimeOnly_(a,b[1]))}; +goog.date.setIso8601DateOnly_=function(a,b){b=b.match(goog.date.splitDateStringRegex_);if(!b)return!1;var c=Number(b[2]),d=Number(b[3]),e=Number(b[4]),f=Number(b[5]),g=Number(b[6])||1;a.setFullYear(Number(b[1]));e?(a.setDate(1),a.setMonth(0),a.add(new goog.date.Interval(goog.date.Interval.DAYS,e-1))):f?goog.date.setDateFromIso8601Week_(a,f,g):(c&&(a.setDate(1),a.setMonth(c-1)),d&&a.setDate(d));return!0}; +goog.date.setDateFromIso8601Week_=function(a,b,c){a.setMonth(0);a.setDate(1);var d=a.getDay()||7;b=new goog.date.Interval(goog.date.Interval.DAYS,(4>=d?1-d:8-d)+(Number(c)+7*(Number(b)-1))-1);a.add(b)}; +goog.date.setIso8601TimeOnly_=function(a,b){var c=b.match(goog.date.splitTimezoneStringRegex_);if(c)if(b=b.substring(0,b.length-c[0].length),"Z"===c[0])var d=0;else d=60*Number(c[2])+Number(c[3]),d*="-"==c[1]?1:-1;b=b.match(goog.date.splitTimeStringRegex_);if(!b)return!1;if(c){goog.asserts.assertNumber(d);c=a.getYear();var e=a.getMonth(),f=a.getDate();b=Date.UTC(c,e,f,Number(b[1]),Number(b[2])||0,Number(b[3])||0,b[4]?1E3*Number(b[4]):0);a.setTime(b+6E4*d)}else a.setHours(Number(b[1])),a.setMinutes(Number(b[2])|| +0),a.setSeconds(Number(b[3])||0),a.setMilliseconds(b[4]?1E3*Number(b[4]):0);return!0}; +goog.date.Interval=function(a,b,c,d,e,f){"string"===typeof a?(this.years=a==goog.date.Interval.YEARS?b:0,this.months=a==goog.date.Interval.MONTHS?b:0,this.days=a==goog.date.Interval.DAYS?b:0,this.hours=a==goog.date.Interval.HOURS?b:0,this.minutes=a==goog.date.Interval.MINUTES?b:0,this.seconds=a==goog.date.Interval.SECONDS?b:0):(this.years=a||0,this.months=b||0,this.days=c||0,this.hours=d||0,this.minutes=e||0,this.seconds=f||0)}; +goog.date.Interval.fromIsoString=function(a){a=a.match(goog.date.splitDurationRegex_);if(!a)return null;var b=!(a[6]||a[7]||a[8]);if(b&&!(a[2]||a[3]||a[4])||b&&a[5])return null;b=a[1];var c=parseInt(a[2],10)||0,d=parseInt(a[3],10)||0,e=parseInt(a[4],10)||0,f=parseInt(a[6],10)||0,g=parseInt(a[7],10)||0;a=parseFloat(a[8])||0;return b?new goog.date.Interval(-c,-d,-e,-f,-g,-a):new goog.date.Interval(c,d,e,f,g,a)}; +goog.date.Interval.prototype.toIsoString=function(a){var b=Math.min(this.years,this.months,this.days,this.hours,this.minutes,this.seconds),c=Math.max(this.years,this.months,this.days,this.hours,this.minutes,this.seconds);if(0>b&&0b&&c.push("-");c.push("P");(this.years||a)&&c.push(Math.abs(this.years)+"Y");(this.months||a)&&c.push(Math.abs(this.months)+"M");(this.days||a)&&c.push(Math.abs(this.days)+"D");if(this.hours||this.minutes||this.seconds|| +a)c.push("T"),(this.hours||a)&&c.push(Math.abs(this.hours)+"H"),(this.minutes||a)&&c.push(Math.abs(this.minutes)+"M"),(this.seconds||a)&&c.push(Math.abs(this.seconds)+"S");return c.join("")};goog.date.Interval.prototype.equals=function(a){return a.years==this.years&&a.months==this.months&&a.days==this.days&&a.hours==this.hours&&a.minutes==this.minutes&&a.seconds==this.seconds}; +goog.date.Interval.prototype.clone=function(){return new goog.date.Interval(this.years,this.months,this.days,this.hours,this.minutes,this.seconds)};goog.date.Interval.YEARS="y";goog.date.Interval.MONTHS="m";goog.date.Interval.DAYS="d";goog.date.Interval.HOURS="h";goog.date.Interval.MINUTES="n";goog.date.Interval.SECONDS="s";goog.date.Interval.prototype.isZero=function(){return 0==this.years&&0==this.months&&0==this.days&&0==this.hours&&0==this.minutes&&0==this.seconds}; +goog.date.Interval.prototype.getInverse=function(){return this.times(-1)};goog.date.Interval.prototype.times=function(a){return new goog.date.Interval(this.years*a,this.months*a,this.days*a,this.hours*a,this.minutes*a,this.seconds*a)};goog.date.Interval.prototype.getTotalSeconds=function(){goog.asserts.assert(0==this.years&&0==this.months);return 60*(60*(24*this.days+this.hours)+this.minutes)+this.seconds}; +goog.date.Interval.prototype.add=function(a){this.years+=a.years;this.months+=a.months;this.days+=a.days;this.hours+=a.hours;this.minutes+=a.minutes;this.seconds+=a.seconds}; +goog.date.Date=function(a,b,c){"number"===typeof a?(this.date=this.buildDate_(a,b||0,c||1),this.maybeFixDst_(c||1)):goog.isObject(a)?(this.date=this.buildDate_(a.getFullYear(),a.getMonth(),a.getDate()),this.maybeFixDst_(a.getDate())):(this.date=new Date(goog.now()),a=this.date.getDate(),this.date.setHours(0),this.date.setMinutes(0),this.date.setSeconds(0),this.date.setMilliseconds(0),this.maybeFixDst_(a))}; +goog.date.Date.prototype.buildDate_=function(a,b,c){b=new Date(a,b,c);0<=a&&100>a&&b.setFullYear(b.getFullYear()-1900);return b};goog.date.Date.prototype.firstDayOfWeek_=goog.i18n.DateTimeSymbols.FIRSTDAYOFWEEK;goog.date.Date.prototype.firstWeekCutOffDay_=goog.i18n.DateTimeSymbols.FIRSTWEEKCUTOFFDAY;goog.date.Date.prototype.clone=function(){var a=new goog.date.Date(this.date);a.firstDayOfWeek_=this.firstDayOfWeek_;a.firstWeekCutOffDay_=this.firstWeekCutOffDay_;return a}; +goog.date.Date.prototype.getFullYear=function(){return this.date.getFullYear()};goog.date.Date.prototype.getYear=function(){return this.getFullYear()};goog.date.Date.prototype.getMonth=function(){return this.date.getMonth()};goog.date.Date.prototype.getDate=function(){return this.date.getDate()};goog.date.Date.prototype.getTime=function(){return this.date.getTime()};goog.date.Date.prototype.getDay=function(){return this.date.getDay()}; +goog.date.Date.prototype.getIsoWeekday=function(){return(this.getDay()+6)%7};goog.date.Date.prototype.getWeekday=function(){return(this.getIsoWeekday()-this.firstDayOfWeek_+7)%7};goog.date.Date.prototype.getUTCFullYear=function(){return this.date.getUTCFullYear()};goog.date.Date.prototype.getUTCMonth=function(){return this.date.getUTCMonth()};goog.date.Date.prototype.getUTCDate=function(){return this.date.getUTCDate()};goog.date.Date.prototype.getUTCDay=function(){return this.date.getDay()}; +goog.date.Date.prototype.getUTCHours=function(){return this.date.getUTCHours()};goog.date.Date.prototype.getUTCMinutes=function(){return this.date.getUTCMinutes()};goog.date.Date.prototype.getUTCIsoWeekday=function(){return(this.date.getUTCDay()+6)%7};goog.date.Date.prototype.getUTCWeekday=function(){return(this.getUTCIsoWeekday()-this.firstDayOfWeek_+7)%7};goog.date.Date.prototype.getFirstDayOfWeek=function(){return this.firstDayOfWeek_};goog.date.Date.prototype.getFirstWeekCutOffDay=function(){return this.firstWeekCutOffDay_}; +goog.date.Date.prototype.getNumberOfDaysInMonth=function(){return goog.date.getNumberOfDaysInMonth(this.getFullYear(),this.getMonth())};goog.date.Date.prototype.getWeekNumber=function(){return goog.date.getWeekNumber(this.getFullYear(),this.getMonth(),this.getDate(),this.firstWeekCutOffDay_,this.firstDayOfWeek_)};goog.date.Date.prototype.getYearOfWeek=function(){return goog.date.getYearOfWeek(this.getFullYear(),this.getMonth(),this.getDate(),this.firstWeekCutOffDay_,this.firstDayOfWeek_)}; +goog.date.Date.prototype.getDayOfYear=function(){for(var a=this.getDate(),b=this.getFullYear(),c=this.getMonth()-1;0<=c;c--)a+=goog.date.getNumberOfDaysInMonth(b,c);return a};goog.date.Date.prototype.getTimezoneOffset=function(){return this.date.getTimezoneOffset()};goog.date.Date.prototype.getTimezoneOffsetString=function(){var a=this.getTimezoneOffset();if(0==a)a="Z";else{var b=Math.abs(a)/60,c=Math.floor(b);b=60*(b-c);a=(0b&&(b+=12);var d=goog.date.getNumberOfDaysInMonth(c,b);d=Math.min(d,this.getDate());this.setDate(1);this.setFullYear(c);this.setMonth(b);this.setDate(d)}a.days&&(b=new Date(this.getYear(),this.getMonth(),this.getDate(),12),a=new Date(b.getTime()+864E5*a.days),this.setDate(1),this.setFullYear(a.getFullYear()),this.setMonth(a.getMonth()),this.setDate(a.getDate()), +this.maybeFixDst_(a.getDate()))};goog.date.Date.prototype.toIsoString=function(a,b){return[this.getFullYear(),goog.string.padNumber(this.getMonth()+1,2),goog.string.padNumber(this.getDate(),2)].join(a?"-":"")+(b?this.getTimezoneOffsetString():"")};goog.date.Date.prototype.toUTCIsoString=function(a,b){return[this.getUTCFullYear(),goog.string.padNumber(this.getUTCMonth()+1,2),goog.string.padNumber(this.getUTCDate(),2)].join(a?"-":"")+(b?"Z":"")}; +goog.date.Date.prototype.equals=function(a){return!(!a||this.getYear()!=a.getYear()||this.getMonth()!=a.getMonth()||this.getDate()!=a.getDate())};goog.date.Date.prototype.toString=function(){return this.toIsoString()};goog.date.Date.prototype.maybeFixDst_=function(a){this.getDate()!=a&&(a=this.getDate()=e:null,p=cljs.core.truth_(m)?cljs.core.truth_(e)?cljs.core.truth_(n)?cljs_time.internal.core.year_corrected_dim.call(null,m,e):n:e:cljs.core.truth_(e)?cljs.core.truth_(n)?cljs_time.internal.core.corrected_dim.call(null,e):n:e,q=cljs.core.truth_(f)?cljs.core.truth_(p)?1<=f&&f<=p:1<=f&&31>=f:null,r=cljs.core.truth_(l)?0<=l&&23>=l:null,t=cljs.core.truth_(b)?0<=b&&59>=b:null,u=cljs.core.truth_(h)? +0<=h&&60>=h:null,v=cljs.core.truth_(c)?0<=c&&999>=c:null,w=cljs.core.truth_(k)?1<=k&&53>=k:null,x=cljs.core.truth_(d)?1<=d&&7>=d:null;if(cljs.core.every_QMARK_.call(null,cljs.core.true_QMARK_,cljs.core.remove.call(null,cljs.core.nil_QMARK_,new cljs.core.PersistentVector(null,8,5,cljs.core.PersistentVector.EMPTY_NODE,[n,q,r,t,u,v,w,x],null)))){if(cljs.core.not.call(null,function(){var a=cljs.core.truth_(m)?m:cljs.core.truth_(e)?e:f;return cljs.core.truth_(a)?cljs.core.truth_(g)?g:cljs.core.truth_(k)? +k:d:a}()))return a;throw cljs.core.ex_info.call(null,"Mixing year, month, day and week-year week-number fields",new cljs.core.PersistentArrayMap(null,3,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"invalid-date","invalid-date",2030506573),new cljs.core.Keyword(null,"date","date",-1463434462),a,new cljs.core.Keyword(null,"errors","errors",-908790718),cljs.core.PersistentArrayMap.EMPTY],null));}throw cljs.core.ex_info.call(null,"Date is not valid",new cljs.core.PersistentArrayMap(null, +3,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"invalid-date","invalid-date",2030506573),new cljs.core.Keyword(null,"date","date",-1463434462),a,new cljs.core.Keyword(null,"errors","errors",-908790718),function(){var a=cljs.core.PersistentArrayMap.EMPTY;a=!1===n?cljs.core.assoc.call(null,a,new cljs.core.Keyword(null,"months","months",-45571637),e):a;a=!1===q?cljs.core.assoc.call(null,a,new cljs.core.Keyword(null,"days","days",-1394072564),f):a;a=!1===r?cljs.core.assoc.call(null, +a,new cljs.core.Keyword(null,"hours","hours",58380855),l):a;a=!1===t?cljs.core.assoc.call(null,a,new cljs.core.Keyword(null,"minutes","minutes",1319166394),b):a;a=!1===u?cljs.core.assoc.call(null,a,new cljs.core.Keyword(null,"seconds","seconds",-445266194),h):a;a=!1===v?cljs.core.assoc.call(null,a,new cljs.core.Keyword(null,"millis","millis",-1338288387),c):a;a=!1===w?cljs.core.assoc.call(null,a,new cljs.core.Keyword(null,"weekyear-week","weekyear-week",795291571),k):a;return!1===x?cljs.core.assoc.call(null, +a,new cljs.core.Keyword(null,"day-of-week","day-of-week",1639326729),d):a}()],null));};cljs_time.internal.core.index_of=function(a,b){return cljs.core.first.call(null,cljs.core.keep_indexed.call(null,function(a,d){return cljs.core.truth_(cljs_time.internal.core._EQ_.call(null,d,b))?a:null},a))}; +cljs_time.internal.core.format=function(a){for(var b=[],c=arguments.length,d=0;;)if(d=a?["0",cljs.core.str.cljs$core$IFn$_invoke$arity$1(a)].join(""):cljs.core.str.cljs$core$IFn$_invoke$arity$1(a)};cljs_time.internal.core.zero_pad.cljs$core$IFn$_invoke$arity$2=function(a,b){return 1>b?cljs.core.str.cljs$core$IFn$_invoke$arity$1(a):[clojure.string.join.call(null,cljs.core.take.call(null,b-cljs.core.str.cljs$core$IFn$_invoke$arity$1(a).length,cljs.core.repeat.call(null,"0"))),cljs.core.str.cljs$core$IFn$_invoke$arity$1(a)].join("")}; +cljs_time.internal.core.zero_pad.cljs$lang$maxFixedArity=2; +cljs_time.internal.core.multiplied_by=function(a,b){var c=function(a){return cljs.core.truth_(a)?a*b:null};return cljs.core.update_in.call(null,cljs.core.update_in.call(null,cljs.core.update_in.call(null,cljs.core.update_in.call(null,cljs.core.update_in.call(null,cljs.core.update_in.call(null,cljs.core.update_in.call(null,cljs.core.update_in.call(null,a,new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"millis","millis",-1338288387)],null),c), +new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"seconds","seconds",-445266194)],null),c),new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"minutes","minutes",1319166394)],null),c),new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"hours","hours",58380855)],null),c),new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE, +[new cljs.core.Keyword(null,"days","days",-1394072564)],null),c),new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"weeks","weeks",1844596125)],null),c),new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"months","months",-45571637)],null),c),new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"years","years",-1298579689)],null),c)}; +cljs_time.internal.core.get_week_year=function(a,b,c){var d=cljs_time.internal.core._EQ_.call(null,b,0),e=cljs_time.internal.core._EQ_.call(null,b,11);b=goog.date.getWeekNumber(a,b,c);return cljs.core.truth_(cljs.core.truth_(d)?52<=b:d)?a-1:cljs.core.truth_(cljs.core.truth_(e)?cljs_time.internal.core._EQ_.call(null,b,1):e)?a+1:a};goog.i18n.ordinalRules={};goog.i18n.ordinalRules.Keyword={ZERO:"zero",ONE:"one",TWO:"two",FEW:"few",MANY:"many",OTHER:"other"};goog.i18n.ordinalRules.defaultSelect_=function(a,b){return goog.i18n.ordinalRules.Keyword.OTHER};goog.i18n.ordinalRules.decimals_=function(a){a+="";const b=a.indexOf(".");return-1==b?0:a.length-b-1};goog.i18n.ordinalRules.get_vf_=function(a,b){b=void 0===b?Math.min(goog.i18n.ordinalRules.decimals_(a),3):b;const c=Math.pow(10,b);return{v:b,f:(a*c|0)%c}}; +goog.i18n.ordinalRules.get_wt_=function(a,b){if(0===b)return{w:0,t:0};for(;0===b%10;)b/=10,a--;return{w:a,t:b}};goog.i18n.ordinalRules.cySelect_=function(a,b){return 0==a||7==a||8==a||9==a?goog.i18n.ordinalRules.Keyword.ZERO:1==a?goog.i18n.ordinalRules.Keyword.ONE:2==a?goog.i18n.ordinalRules.Keyword.TWO:3==a||4==a?goog.i18n.ordinalRules.Keyword.FEW:5==a||6==a?goog.i18n.ordinalRules.Keyword.MANY:goog.i18n.ordinalRules.Keyword.OTHER}; +goog.i18n.ordinalRules.enSelect_=function(a,b){return 1==a%10&&11!=a%100?goog.i18n.ordinalRules.Keyword.ONE:2==a%10&&12!=a%100?goog.i18n.ordinalRules.Keyword.TWO:3==a%10&&13!=a%100?goog.i18n.ordinalRules.Keyword.FEW:goog.i18n.ordinalRules.Keyword.OTHER};goog.i18n.ordinalRules.ukSelect_=function(a,b){return 3==a%10&&13!=a%100?goog.i18n.ordinalRules.Keyword.FEW:goog.i18n.ordinalRules.Keyword.OTHER}; +goog.i18n.ordinalRules.itSelect_=function(a,b){return 11==a||8==a||80==a||800==a?goog.i18n.ordinalRules.Keyword.MANY:goog.i18n.ordinalRules.Keyword.OTHER};goog.i18n.ordinalRules.neSelect_=function(a,b){return 1<=a&&4>=a?goog.i18n.ordinalRules.Keyword.ONE:goog.i18n.ordinalRules.Keyword.OTHER}; +goog.i18n.ordinalRules.orSelect_=function(a,b){return 1==a||5==a||7<=a&&9>=a?goog.i18n.ordinalRules.Keyword.ONE:2==a||3==a?goog.i18n.ordinalRules.Keyword.TWO:4==a?goog.i18n.ordinalRules.Keyword.FEW:6==a?goog.i18n.ordinalRules.Keyword.MANY:goog.i18n.ordinalRules.Keyword.OTHER};goog.i18n.ordinalRules.beSelect_=function(a,b){return 2!=a%10&&3!=a%10||12==a%100||13==a%100?goog.i18n.ordinalRules.Keyword.OTHER:goog.i18n.ordinalRules.Keyword.FEW}; +goog.i18n.ordinalRules.azSelect_=function(a,b){a|=0;return 1==a%10||2==a%10||5==a%10||7==a%10||8==a%10||20==a%100||50==a%100||70==a%100||80==a%100?goog.i18n.ordinalRules.Keyword.ONE:3==a%10||4==a%10||100==a%1E3||200==a%1E3||300==a%1E3||400==a%1E3||500==a%1E3||600==a%1E3||700==a%1E3||800==a%1E3||900==a%1E3?goog.i18n.ordinalRules.Keyword.FEW:0==a||6==a%10||40==a%100||60==a%100||90==a%100?goog.i18n.ordinalRules.Keyword.MANY:goog.i18n.ordinalRules.Keyword.OTHER}; +goog.i18n.ordinalRules.kaSelect_=function(a,b){a|=0;return 1==a?goog.i18n.ordinalRules.Keyword.ONE:0==a||2<=a%100&&20>=a%100||40==a%100||60==a%100||80==a%100?goog.i18n.ordinalRules.Keyword.MANY:goog.i18n.ordinalRules.Keyword.OTHER};goog.i18n.ordinalRules.mrSelect_=function(a,b){return 1==a?goog.i18n.ordinalRules.Keyword.ONE:2==a||3==a?goog.i18n.ordinalRules.Keyword.TWO:4==a?goog.i18n.ordinalRules.Keyword.FEW:goog.i18n.ordinalRules.Keyword.OTHER}; +goog.i18n.ordinalRules.svSelect_=function(a,b){return 1!=a%10&&2!=a%10||11==a%100||12==a%100?goog.i18n.ordinalRules.Keyword.OTHER:goog.i18n.ordinalRules.Keyword.ONE};goog.i18n.ordinalRules.kkSelect_=function(a,b){return 6==a%10||9==a%10||0==a%10&&0!=a?goog.i18n.ordinalRules.Keyword.MANY:goog.i18n.ordinalRules.Keyword.OTHER}; +goog.i18n.ordinalRules.mkSelect_=function(a,b){a|=0;return 1==a%10&&11!=a%100?goog.i18n.ordinalRules.Keyword.ONE:2==a%10&&12!=a%100?goog.i18n.ordinalRules.Keyword.TWO:7!=a%10&&8!=a%10||17==a%100||18==a%100?goog.i18n.ordinalRules.Keyword.OTHER:goog.i18n.ordinalRules.Keyword.MANY};goog.i18n.ordinalRules.huSelect_=function(a,b){return 1==a||5==a?goog.i18n.ordinalRules.Keyword.ONE:goog.i18n.ordinalRules.Keyword.OTHER}; +goog.i18n.ordinalRules.frSelect_=function(a,b){return 1==a?goog.i18n.ordinalRules.Keyword.ONE:goog.i18n.ordinalRules.Keyword.OTHER};goog.i18n.ordinalRules.sqSelect_=function(a,b){return 1==a?goog.i18n.ordinalRules.Keyword.ONE:4==a%10&&14!=a%100?goog.i18n.ordinalRules.Keyword.MANY:goog.i18n.ordinalRules.Keyword.OTHER};goog.i18n.ordinalRules.caSelect_=function(a,b){return 1==a||3==a?goog.i18n.ordinalRules.Keyword.ONE:2==a?goog.i18n.ordinalRules.Keyword.TWO:4==a?goog.i18n.ordinalRules.Keyword.FEW:goog.i18n.ordinalRules.Keyword.OTHER}; +goog.i18n.ordinalRules.guSelect_=function(a,b){return 1==a?goog.i18n.ordinalRules.Keyword.ONE:2==a||3==a?goog.i18n.ordinalRules.Keyword.TWO:4==a?goog.i18n.ordinalRules.Keyword.FEW:6==a?goog.i18n.ordinalRules.Keyword.MANY:goog.i18n.ordinalRules.Keyword.OTHER};goog.i18n.ordinalRules.tkSelect_=function(a,b){return 6==a%10||9==a%10||10==a?goog.i18n.ordinalRules.Keyword.FEW:goog.i18n.ordinalRules.Keyword.OTHER}; +goog.i18n.ordinalRules.gdSelect_=function(a,b){return 1==a||11==a?goog.i18n.ordinalRules.Keyword.ONE:2==a||12==a?goog.i18n.ordinalRules.Keyword.TWO:3==a||13==a?goog.i18n.ordinalRules.Keyword.FEW:goog.i18n.ordinalRules.Keyword.OTHER};goog.i18n.ordinalRules.kwSelect_=function(a,b){return 1<=a&&4>=a||1<=a%100&&4>=a%100||21<=a%100&&24>=a%100||41<=a%100&&44>=a%100||61<=a%100&&64>=a%100||81<=a%100&&84>=a%100?goog.i18n.ordinalRules.Keyword.ONE:5==a||5==a%100?goog.i18n.ordinalRules.Keyword.MANY:goog.i18n.ordinalRules.Keyword.OTHER}; +goog.i18n.ordinalRules.asSelect_=function(a,b){return 1==a||5==a||7==a||8==a||9==a||10==a?goog.i18n.ordinalRules.Keyword.ONE:2==a||3==a?goog.i18n.ordinalRules.Keyword.TWO:4==a?goog.i18n.ordinalRules.Keyword.FEW:6==a?goog.i18n.ordinalRules.Keyword.MANY:goog.i18n.ordinalRules.Keyword.OTHER};goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.enSelect_;"af"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"am"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +"ar"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);if("ar_DZ"==goog.LOCALE||"ar-DZ"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;if("ar_EG"==goog.LOCALE||"ar-EG"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;"az"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.azSelect_);"be"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.beSelect_); +"bg"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"bn"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.asSelect_);"br"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"bs"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"ca"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.caSelect_);"chr"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +"cs"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"cy"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.cySelect_);"da"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"de"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);if("de_AT"==goog.LOCALE||"de-AT"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_; +if("de_CH"==goog.LOCALE||"de-CH"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;"el"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"en"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.enSelect_);if("en_AU"==goog.LOCALE||"en-AU"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.enSelect_;if("en_CA"==goog.LOCALE||"en-CA"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.enSelect_; +if("en_GB"==goog.LOCALE||"en-GB"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.enSelect_;if("en_IE"==goog.LOCALE||"en-IE"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.enSelect_;if("en_IN"==goog.LOCALE||"en-IN"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.enSelect_;if("en_SG"==goog.LOCALE||"en-SG"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.enSelect_; +if("en_US"==goog.LOCALE||"en-US"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.enSelect_;if("en_ZA"==goog.LOCALE||"en-ZA"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.enSelect_;"es"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);if("es_419"==goog.LOCALE||"es-419"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;if("es_ES"==goog.LOCALE||"es-ES"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_; +if("es_MX"==goog.LOCALE||"es-MX"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;if("es_US"==goog.LOCALE||"es-US"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;"et"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"eu"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"fa"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +"fi"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"fil"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.frSelect_);"fr"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.frSelect_);if("fr_CA"==goog.LOCALE||"fr-CA"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.frSelect_;"ga"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.frSelect_); +"gl"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"gsw"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"gu"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.guSelect_);"haw"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"he"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"hi"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.guSelect_); +"hr"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"hu"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.huSelect_);"hy"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.frSelect_);"id"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"in"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"is"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +"it"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.itSelect_);"iw"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"ja"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"ka"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.kaSelect_);"kk"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.kkSelect_);"km"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +"kn"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"ko"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"ky"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"ln"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"lo"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.frSelect_);"lt"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +"lv"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"mk"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.mkSelect_);"ml"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"mn"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"mo"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.frSelect_);"mr"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.mrSelect_); +"ms"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.frSelect_);"mt"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"my"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"nb"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"ne"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.neSelect_);"nl"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +"no"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);if("no_NO"==goog.LOCALE||"no-NO"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;"or"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.orSelect_);"pa"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"pl"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +"pt"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);if("pt_BR"==goog.LOCALE||"pt-BR"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;if("pt_PT"==goog.LOCALE||"pt-PT"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;"ro"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.frSelect_);"ru"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +"sh"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"si"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"sk"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"sl"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"sq"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.sqSelect_);"sr"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +if("sr_Latn"==goog.LOCALE||"sr-Latn"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;"sv"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.svSelect_);"sw"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"ta"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"te"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +"th"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"tl"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.frSelect_);"tr"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"uk"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.ukSelect_);"ur"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"uz"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +"vi"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.frSelect_);"zh"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);if("zh_CN"==goog.LOCALE||"zh-CN"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;if("zh_HK"==goog.LOCALE||"zh-HK"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;if("zh_TW"==goog.LOCALE||"zh-TW"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_; +"zu"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);cljs.tools={};cljs.tools.reader={};cljs.tools.reader.impl={};cljs.tools.reader.impl.utils={};cljs.tools.reader.impl.utils.char$=function(a){return null==a?null:cljs.core.char$.call(null,a)};cljs.tools.reader.impl.utils.ex_info_QMARK_=function(a){return a instanceof cljs.core.ExceptionInfo}; +cljs.tools.reader.impl.utils.ReaderConditional=function(a,b,c,d,e){this.splicing_QMARK_=a;this.form=b;this.__meta=c;this.__extmap=d;this.__hash=e;this.cljs$lang$protocol_mask$partition0$=2230716170;this.cljs$lang$protocol_mask$partition1$=139264};cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$ILookup$_lookup$arity$2=function(a,b){return this.cljs$core$ILookup$_lookup$arity$3(null,b,null)}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$ILookup$_lookup$arity$3=function(a,b,c){switch(b instanceof cljs.core.Keyword?b.fqn:null){case "splicing?":return this.splicing_QMARK_;case "form":return this.form;default:return cljs.core.get.call(null,this.__extmap,b,c)}}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$IKVReduce$_kv_reduce$arity$3=function(a,b,c){return cljs.core.reduce.call(null,function(a,c){var d=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return b.call(null,a,d,c)},c,this)}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$IPrintWithWriter$_pr_writer$arity$3=function(a,b,c){return cljs.core.pr_sequential_writer.call(null,b,function(a){return cljs.core.pr_sequential_writer.call(null,b,cljs.core.pr_writer,""," ","",c,a)},"#cljs.tools.reader.impl.utils.ReaderConditional{",", ","}",c,cljs.core.concat.call(null,new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE, +[new cljs.core.Keyword(null,"splicing?","splicing?",-428596366),this.splicing_QMARK_],null),new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"form","form",-1624062471),this.form],null)],null),this.__extmap))}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$IIterable$_iterator$arity$1=function(a){return new cljs.core.RecordIter(0,this,2,new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"splicing?","splicing?",-428596366),new cljs.core.Keyword(null,"form","form",-1624062471)],null),cljs.core.truth_(this.__extmap)?cljs.core._iterator.call(null,this.__extmap):cljs.core.nil_iter.call(null))}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$IMeta$_meta$arity$1=function(a){return this.__meta};cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$ICloneable$_clone$arity$1=function(a){return new cljs.tools.reader.impl.utils.ReaderConditional(this.splicing_QMARK_,this.form,this.__meta,this.__extmap,this.__hash)};cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$ICounted$_count$arity$1=function(a){return 2+cljs.core.count.call(null,this.__extmap)}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$IHash$_hash$arity$1=function(a){a=this.__hash;return null!=a?a:this.__hash=a=-209062840^cljs.core.hash_unordered_coll.call(null,this)}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$IEquiv$_equiv$arity$2=function(a,b){return null!=b&&this.constructor===b.constructor&&cljs.core._EQ_.call(null,this.splicing_QMARK_,b.splicing_QMARK_)&&cljs.core._EQ_.call(null,this.form,b.form)&&cljs.core._EQ_.call(null,this.__extmap,b.__extmap)}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$IMap$_dissoc$arity$2=function(a,b){return cljs.core.contains_QMARK_.call(null,new cljs.core.PersistentHashSet(null,new cljs.core.PersistentArrayMap(null,2,[new cljs.core.Keyword(null,"splicing?","splicing?",-428596366),null,new cljs.core.Keyword(null,"form","form",-1624062471),null],null),null),b)?cljs.core.dissoc.call(null,cljs.core._with_meta.call(null,cljs.core.into.call(null,cljs.core.PersistentArrayMap.EMPTY,this),this.__meta), +b):new cljs.tools.reader.impl.utils.ReaderConditional(this.splicing_QMARK_,this.form,this.__meta,cljs.core.not_empty.call(null,cljs.core.dissoc.call(null,this.__extmap,b)),null)}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$IAssociative$_assoc$arity$3=function(a,b,c){a=cljs.core.keyword_identical_QMARK_;return cljs.core.truth_(a.call(null,new cljs.core.Keyword(null,"splicing?","splicing?",-428596366),b))?new cljs.tools.reader.impl.utils.ReaderConditional(c,this.form,this.__meta,this.__extmap,null):cljs.core.truth_(a.call(null,new cljs.core.Keyword(null,"form","form",-1624062471),b))?new cljs.tools.reader.impl.utils.ReaderConditional(this.splicing_QMARK_, +c,this.__meta,this.__extmap,null):new cljs.tools.reader.impl.utils.ReaderConditional(this.splicing_QMARK_,this.form,this.__meta,cljs.core.assoc.call(null,this.__extmap,b,c),null)}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$ISeqable$_seq$arity$1=function(a){return cljs.core.seq.call(null,cljs.core.concat.call(null,new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.MapEntry(new cljs.core.Keyword(null,"splicing?","splicing?",-428596366),this.splicing_QMARK_,null),new cljs.core.MapEntry(new cljs.core.Keyword(null,"form","form",-1624062471),this.form,null)],null),this.__extmap))}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return new cljs.tools.reader.impl.utils.ReaderConditional(this.splicing_QMARK_,this.form,b,this.__extmap,this.__hash)}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$ICollection$_conj$arity$2=function(a,b){return cljs.core.vector_QMARK_.call(null,b)?this.cljs$core$IAssociative$_assoc$arity$3(null,cljs.core._nth.call(null,b,0),cljs.core._nth.call(null,b,1)):cljs.core.reduce.call(null,cljs.core._conj,this,b)}; +cljs.tools.reader.impl.utils.ReaderConditional.getBasis=function(){return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"splicing?","splicing?",1211935161,null),new cljs.core.Symbol(null,"form","form",16469056,null)],null)};cljs.tools.reader.impl.utils.ReaderConditional.cljs$lang$type=!0; +cljs.tools.reader.impl.utils.ReaderConditional.cljs$lang$ctorPrSeq=function(a){return new cljs.core.List(null,"cljs.tools.reader.impl.utils/ReaderConditional",null,1,null)};cljs.tools.reader.impl.utils.ReaderConditional.cljs$lang$ctorPrWriter=function(a,b){return cljs.core._write.call(null,b,"cljs.tools.reader.impl.utils/ReaderConditional")};cljs.tools.reader.impl.utils.__GT_ReaderConditional=function(a,b){return new cljs.tools.reader.impl.utils.ReaderConditional(a,b,null,null,null)}; +cljs.tools.reader.impl.utils.map__GT_ReaderConditional=function(a){var b=cljs.core.dissoc.call(null,a,new cljs.core.Keyword(null,"splicing?","splicing?",-428596366),new cljs.core.Keyword(null,"form","form",-1624062471));b=cljs.core.record_QMARK_.call(null,a)?cljs.core.into.call(null,cljs.core.PersistentArrayMap.EMPTY,b):b;return new cljs.tools.reader.impl.utils.ReaderConditional((new cljs.core.Keyword(null,"splicing?","splicing?",-428596366)).cljs$core$IFn$_invoke$arity$1(a),(new cljs.core.Keyword(null, +"form","form",-1624062471)).cljs$core$IFn$_invoke$arity$1(a),null,cljs.core.not_empty.call(null,b),null)};cljs.tools.reader.impl.utils.reader_conditional_QMARK_=function(a){return a instanceof cljs.tools.reader.impl.utils.ReaderConditional};cljs.tools.reader.impl.utils.reader_conditional=function(a,b){return new cljs.tools.reader.impl.utils.ReaderConditional(b,a,null,null,null)};cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$IPrintWithWriter$=cljs.core.PROTOCOL_SENTINEL; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$IPrintWithWriter$_pr_writer$arity$3=function(a,b,c){cljs.core._write.call(null,b,["#?",cljs.core.truth_(this.splicing_QMARK_)?"@":null].join(""));return cljs.core.pr_writer.call(null,this.form,b,c)};cljs.tools.reader.impl.utils.ws_rx=/[\s]/;cljs.tools.reader.impl.utils.whitespace_QMARK_=function(a){return null==a?null:","===a?!0:cljs.tools.reader.impl.utils.ws_rx.test(a)}; +cljs.tools.reader.impl.utils.numeric_QMARK_=function(a){return null==a?null:goog.string.isNumeric(a)};cljs.tools.reader.impl.utils.newline_QMARK_=function(a){return"\n"===a||"\n"===a||null==a}; +cljs.tools.reader.impl.utils.desugar_meta=function(a){return a instanceof cljs.core.Keyword?cljs.core.PersistentArrayMap.createAsIfByAssoc([a,!0]):a instanceof cljs.core.Symbol?new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"tag","tag",-1290361223),a],null):"string"===typeof a?new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"tag","tag",-1290361223),a],null):a};cljs.tools.reader.impl.utils.last_id=cljs.core.atom.call(null,0); +cljs.tools.reader.impl.utils.next_id=function(){return cljs.core.swap_BANG_.call(null,cljs.tools.reader.impl.utils.last_id,cljs.core.inc)}; +cljs.tools.reader.impl.utils.namespace_keys=function(a,b){return function e(b){return new cljs.core.LazySeq(null,function(){for(;;){var d=cljs.core.seq.call(null,b);if(d){if(cljs.core.chunked_seq_QMARK_.call(null,d)){var g=cljs.core.chunk_first.call(null,d),h=cljs.core.count.call(null,g),k=cljs.core.chunk_buffer.call(null,h);return function(){for(var b=0;;)if(bthis.s_pos?(a=this.s.charAt(this.s_pos),this.s_pos+=1,a):null};cljs.tools.reader.reader_types.StringReader.prototype.cljs$tools$reader$reader_types$Reader$peek_char$arity$1=function(a){return this.s_len>this.s_pos?this.s.charAt(this.s_pos):null}; +cljs.tools.reader.reader_types.StringReader.getBasis=function(){return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"s","s",-948495851,null),new cljs.core.Symbol(null,"s-len","s-len",1869978331,null),cljs.core.with_meta(new cljs.core.Symbol(null,"s-pos","s-pos",-540562492,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null))],null)}; +cljs.tools.reader.reader_types.StringReader.cljs$lang$type=!0;cljs.tools.reader.reader_types.StringReader.cljs$lang$ctorStr="cljs.tools.reader.reader-types/StringReader";cljs.tools.reader.reader_types.StringReader.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write.call(null,b,"cljs.tools.reader.reader-types/StringReader")};cljs.tools.reader.reader_types.__GT_StringReader=function(a,b,c){return new cljs.tools.reader.reader_types.StringReader(a,b,c)}; +cljs.tools.reader.reader_types.NodeReadableReader=function(a,b){this.readable=a;this.buf=b};cljs.tools.reader.reader_types.NodeReadableReader.prototype.cljs$tools$reader$reader_types$Reader$=cljs.core.PROTOCOL_SENTINEL; +cljs.tools.reader.reader_types.NodeReadableReader.prototype.cljs$tools$reader$reader_types$Reader$read_char$arity$1=function(a){if(cljs.core.truth_(this.buf))return a=this.buf[0],this.buf=null,cljs.tools.reader.impl.utils.char$.call(null,a);a=cljs.core.str.cljs$core$IFn$_invoke$arity$1(this.readable.read(1));return cljs.core.truth_(a)?cljs.tools.reader.impl.utils.char$.call(null,a):null}; +cljs.tools.reader.reader_types.NodeReadableReader.prototype.cljs$tools$reader$reader_types$Reader$peek_char$arity$1=function(a){cljs.core.truth_(this.buf)||(this.buf=cljs.core.str.cljs$core$IFn$_invoke$arity$1(this.readable.read(1)));return cljs.core.truth_(this.buf)?cljs.tools.reader.impl.utils.char$.call(null,this.buf[0]):null}; +cljs.tools.reader.reader_types.NodeReadableReader.getBasis=function(){return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"readable","readable",2113054478,null),cljs.core.with_meta(new cljs.core.Symbol(null,"buf","buf",1426618187,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null))],null)};cljs.tools.reader.reader_types.NodeReadableReader.cljs$lang$type=!0; +cljs.tools.reader.reader_types.NodeReadableReader.cljs$lang$ctorStr="cljs.tools.reader.reader-types/NodeReadableReader";cljs.tools.reader.reader_types.NodeReadableReader.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write.call(null,b,"cljs.tools.reader.reader-types/NodeReadableReader")};cljs.tools.reader.reader_types.__GT_NodeReadableReader=function(a,b){return new cljs.tools.reader.reader_types.NodeReadableReader(a,b)}; +cljs.tools.reader.reader_types.PushbackReader=function(a,b,c,d){this.rdr=a;this.buf=b;this.buf_len=c;this.buf_pos=d};cljs.tools.reader.reader_types.PushbackReader.prototype.cljs$tools$reader$reader_types$Reader$=cljs.core.PROTOCOL_SENTINEL; +cljs.tools.reader.reader_types.PushbackReader.prototype.cljs$tools$reader$reader_types$Reader$read_char$arity$1=function(a){a=this.buf_posc?'..."':'"';return['"',cljs.core.str.cljs$core$IFn$_invoke$arity$1(b.substring(0,function(){var a=b.length;return cd?"...}":"}")}); +cljs.core._add_method.call(null,cljs.tools.reader.impl.inspect.inspect_STAR_,new cljs.core.Keyword(null,"set","set",304602554),function(a,b){return cljs.tools.reader.impl.inspect.inspect_STAR__col.call(null,a,b,"#{","}")});cljs.core._add_method.call(null,cljs.tools.reader.impl.inspect.inspect_STAR_,new cljs.core.Keyword(null,"vector","vector",1902966158),function(a,b){return cljs.tools.reader.impl.inspect.inspect_STAR__col.call(null,a,b,"[","]")}); +cljs.core._add_method.call(null,cljs.tools.reader.impl.inspect.inspect_STAR_,new cljs.core.Keyword(null,"default","default",-1987822328),function(a,b){return cljs.core.pr_str.call(null,cljs.core.type.call(null,b))}); +cljs.tools.reader.impl.inspect.inspect=function(a){switch(arguments.length){case 1:return cljs.tools.reader.impl.inspect.inspect.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.tools.reader.impl.inspect.inspect.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.tools.reader.impl.inspect.inspect.cljs$core$IFn$_invoke$arity$1=function(a){return cljs.tools.reader.impl.inspect.inspect_STAR_.call(null,!1,a)};cljs.tools.reader.impl.inspect.inspect.cljs$core$IFn$_invoke$arity$2=function(a,b){return cljs.tools.reader.impl.inspect.inspect_STAR_.call(null,a,b)};cljs.tools.reader.impl.inspect.inspect.cljs$lang$maxFixedArity=2;cljs.tools.reader.impl.errors={}; +cljs.tools.reader.impl.errors.ex_details=function(a,b){b=new cljs.core.PersistentArrayMap(null,2,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"reader-exception","reader-exception",-1938323098),new cljs.core.Keyword(null,"ex-kind","ex-kind",1581199296),b],null);return cljs.tools.reader.reader_types.indexing_reader_QMARK_.call(null,a)?cljs.core.assoc.call(null,b,new cljs.core.Keyword(null,"file","file",-1269645878),cljs.tools.reader.reader_types.get_file_name.call(null,a), +new cljs.core.Keyword(null,"line","line",212345235),cljs.tools.reader.reader_types.get_line_number.call(null,a),new cljs.core.Keyword(null,"col","col",-1959363084),cljs.tools.reader.reader_types.get_column_number.call(null,a)):b}; +cljs.tools.reader.impl.errors.throw_ex=function(a){for(var b=[],c=arguments.length,d=0;;)if(d=a?"+":"-");a=Math.abs(a);b.push(goog.string.padNumber(Math.floor(a/60)%100,2),":",goog.string.padNumber(a%60,2));return b.join("")};goog.i18n.TimeZone.composePosixTimeZoneID_=function(a){if(0==a)return"Etc/GMT";var b=["Etc/GMT",0>a?"-":"+"];a=Math.abs(a);b.push(Math.floor(a/60)%100);a%=60;0!=a&&b.push(":",goog.string.padNumber(a,2));return b.join("")}; +goog.i18n.TimeZone.composeUTCString_=function(a){if(0==a)return"UTC";var b=["UTC",0>a?"+":"-"];a=Math.abs(a);b.push(Math.floor(a/60)%100);a%=60;0!=a&&b.push(":",a);return b.join("")};goog.i18n.TimeZone.prototype.getTimeZoneData=function(){return{id:this.timeZoneId_,std_offset:-this.standardOffset_,names:goog.array.clone(this.tzNames_),names_ext:goog.object.clone(this.tzNamesExt_),transitions:goog.array.clone(this.transitions_)}}; +goog.i18n.TimeZone.prototype.getDaylightAdjustment=function(a){a=Date.UTC(a.getUTCFullYear(),a.getUTCMonth(),a.getUTCDate(),a.getUTCHours(),a.getUTCMinutes())/goog.i18n.TimeZone.MILLISECONDS_PER_HOUR_;for(var b=0;b=this.transitions_[b];)b+=2;return 0==b?0:this.transitions_[b-1]};goog.i18n.TimeZone.prototype.getGMTString=function(a){return goog.i18n.TimeZone.composeGMTString_(this.getOffset(a))};goog.i18n.TimeZone.prototype.getUTCString=function(a){return goog.i18n.TimeZone.composeUTCString_(this.getOffset(a))}; +goog.i18n.TimeZone.prototype.getLongName=function(a){return this.tzNames_[this.isDaylightTime(a)?goog.i18n.TimeZone.NameType.DLT_LONG_NAME:goog.i18n.TimeZone.NameType.STD_LONG_NAME]};goog.i18n.TimeZone.prototype.getOffset=function(a){return this.standardOffset_-this.getDaylightAdjustment(a)};goog.i18n.TimeZone.prototype.getRFCTimeZoneString=function(a){a=-this.getOffset(a);var b=[0>a?"-":"+"];a=Math.abs(a);b.push(goog.string.padNumber(Math.floor(a/60)%100,2),goog.string.padNumber(a%60,2));return b.join("")}; +goog.i18n.TimeZone.prototype.getShortName=function(a){return this.tzNames_[this.isDaylightTime(a)?goog.i18n.TimeZone.NameType.DLT_SHORT_NAME:goog.i18n.TimeZone.NameType.STD_SHORT_NAME]};goog.i18n.TimeZone.prototype.getTimeZoneId=function(){return this.timeZoneId_};goog.i18n.TimeZone.prototype.isDaylightTime=function(a){return 0=cljs.core.count.call(null,b)&&cljs.core.every_QMARK_.call(null,function(b){return cljs.core.contains_QMARK_.call(null,a,b)},b)};bigml.dixie.fields={};bigml.dixie.fields.core={};bigml.dixie.fields.core.summary=new cljs.core.Keyword(null,"summary","summary",380847952);bigml.dixie.fields.core.with_summary=function(a,b){return cljs.core.truth_(b)?cljs.core.assoc.call(null,a,new cljs.core.Keyword(null,"summary","summary",380847952),b):a};bigml.dixie.fields.core.categories=cljs.core.comp.call(null,new cljs.core.Keyword(null,"categories","categories",178386610),bigml.dixie.fields.core.summary); +bigml.dixie.fields.core.with_categories=function(a,b){return cljs.core.assoc_in.call(null,a,new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[bigml.dixie.fields.core.summary,new cljs.core.Keyword(null,"categories","categories",178386610)],null),b)};bigml.dixie.fields.core.missing_count=cljs.core.comp.call(null,new cljs.core.Keyword(null,"missing_count","missing_count",-7853302),bigml.dixie.fields.core.summary); +bigml.dixie.fields.core.population=cljs.core.comp.call(null,new cljs.core.Keyword(null,"population","population",-1209901867),bigml.dixie.fields.core.summary);bigml.dixie.fields.core.variance=cljs.core.comp.call(null,new cljs.core.Keyword(null,"variance","variance",1132010827),bigml.dixie.fields.core.summary); +bigml.dixie.fields.core.distribution=function(a){return cljs.core.some.call(null,(new cljs.core.Keyword(null,"summary","summary",380847952)).cljs$core$IFn$_invoke$arity$1(a),new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"categories","categories",178386610),new cljs.core.Keyword(null,"bins","bins",1670395210),new cljs.core.Keyword(null,"counts","counts",234305892)],null))}; +bigml.dixie.fields.core.make_descriptor=function(a){for(var b=[],c=arguments.length,d=0;;)if(d=a)return cljs.core.conj.call(null,b,bigml.dixie.fields.core.digits.call(null,a));var c=cljs.core.quot.call(null,a,16);b=cljs.core.conj.call(null,b,bigml.dixie.fields.core.digits.call(null,cljs.core.mod.call(null,a,16)));a=c}}; +bigml.dixie.fields.core.num__GT_id=function(a){a=bigml.dixie.fields.core.hex_digits.call(null,a,cljs.core.List.EMPTY);return cljs.core.apply.call(null,cljs.core.str,cljs.core.concat.call(null,cljs.core.repeat.call(null,6-cljs.core.count.call(null,a),0),a))};bigml.dixie.fields.core.parent_ids=new cljs.core.Keyword(null,"parent_ids","parent_ids",-1815054092);bigml.dixie.fields.core.parent=cljs.core.comp.call(null,cljs.core.first,bigml.dixie.fields.core.parent_ids); +bigml.dixie.fields.core.with_parent_ids=function(a,b){return cljs.core.empty_QMARK_.call(null,b)?cljs.core.dissoc.call(null,a,new cljs.core.Keyword(null,"parent_ids","parent_ids",-1815054092)):cljs.core.assoc.call(null,a,new cljs.core.Keyword(null,"parent_ids","parent_ids",-1815054092),b)};bigml.dixie.fields.core.child_ids=new cljs.core.Keyword(null,"child_ids","child_ids",1297307435); +bigml.dixie.fields.core.with_child_ids=function(a,b){return cljs.core.empty_QMARK_.call(null,b)?cljs.core.dissoc.call(null,a,new cljs.core.Keyword(null,"child_ids","child_ids",1297307435)):cljs.core.assoc.call(null,a,new cljs.core.Keyword(null,"child_ids","child_ids",1297307435),b)}; +bigml.dixie.fields.core.make_child=function(a){for(var b=[],c=arguments.length,d=0;;)if(de?function(){var a=-e-1;return function(b, +c){return cljs.core.nth.call(null,c,a,null)}}():function(a,b){return cljs.core.nth.call(null,a,e,null)};return cljs.core.truth_((new cljs.core.Keyword(null,"txt_","txt_",-1749668228)).cljs$core$IFn$_invoke$arity$1(a))?function(a,b){return c.call(null,cljs.core.nth.call(null,f.call(null,a,b),d,null))}:bigml.dixie.fields.core.regions_QMARK_.call(null,a)?function(a,b){a=cljs.core.nth.call(null,f.call(null,a,b),d,null);return cljs.core.truth_(a)?cljs.core.seq.call(null,"string"===typeof a?c.call(null, +a):a):null}:function(a,b){a=cljs.core.nth.call(null,f.call(null,a,b),d,null);return cljs.core.truth_(a)?"string"===typeof a?c.call(null,a):a:null}};bigml.dixie.flatline.fields.make_getter.cljs$lang$maxFixedArity=3; +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"field","field",-1302436500),function(a,b,c,d){bigml.dixie.flatline.errors.check_arity.call(null,b,1,3);d=bigml.dixie.flatline.fields.find_desc.call(null,b,d,cljs.core.second.call(null,b),!0);a=function(){var a=cljs.core.second.call(null,c);if(cljs.core.truth_(a)){var d=bigml.dixie.flatline.types.constant_fn_QMARK_.call(null,a);a=cljs.core.truth_(d)?a.call(null):d;a=cljs.core.integer_QMARK_.call(null,a)?a: +!1;return cljs.core.truth_(a)?a:bigml.dixie.flatline.utils.raise.call(null,new cljs.core.Keyword(null,"flatline-type-error","flatline-type-error",845929452),"Field shift must be an int in %s",cljs.core.pr_str.call(null,b))}return null}();var e=cljs.core.assoc.call(null,d,new cljs.core.Keyword(null,"shift","shift",997140064),a),f=bigml.dixie.flatline.fields.make_getter.call(null,e,a);d=function(){var a=cljs.core.second.call(null,cljs.core.next.call(null,c));if(cljs.core.truth_(a)){if(cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null, +a))){var b=a.call(null);return function(a,c){a=f.call(null,a,c);return cljs.core.truth_(a)?a:b}}return function(b,c){var d=f.call(null,b,c);return cljs.core.truth_(d)?d:a.call(null,b,c)}}return f}();var g=function(){var a=bigml.dixie.flatline.types.optype__GT_type.call(null,cljs.core.name.call(null,function(){var a=(new cljs.core.Keyword(null,"optype","optype",-1789210098)).cljs$core$IFn$_invoke$arity$1(e);return cljs.core.truth_(a)?a:""}()));return cljs.core.truth_(a)?a:bigml.dixie.flatline.utils.raise.call(null, +new cljs.core.Keyword(null,"flatline-field-not-found","flatline-field-not-found",-1248006758),"No type information for field %s",cljs.core.pr_str.call(null,cljs.core.second.call(null,b)))}();return bigml.dixie.flatline.types.with_type.call(null,d,g,null,e,a)}); +bigml.dixie.flatline.expand.define_syntax_STAR_.call(null,new cljs.core.Symbol(null,"f","f",43394975,null),cljs.core.list(new cljs.core.Symbol(null,"id","id",252129435,null),new cljs.core.Symbol(null,"...","...",-1926939749,null)),cljs.core.list(new cljs.core.Symbol(null,"field","field",338095027,null),new cljs.core.Symbol(null,"id","id",252129435,null),new cljs.core.Symbol(null,"...","...",-1926939749,null))); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"missing","missing",362507769),function(a,b,c,d){bigml.dixie.flatline.errors.check_arity.call(null,b,1,2);var e=bigml.dixie.flatline.eval.primop.call(null,new cljs.core.Keyword(null,"field","field",-1302436500),b,c,d);a=cljs.core.dissoc.call(null,cljs.core.meta.call(null,e),new cljs.core.Keyword(null,"fd","fd",-1524403E3));var f=cljs.core.conj.call(null,cljs.core.set.call(null,(new cljs.core.Keyword(null,"missing_tokens", +"missing_tokens",-1726684288)).cljs$core$IFn$_invoke$arity$1(bigml.dixie.flatline.types.fn_desc.call(null,e))),null,"");return bigml.dixie.flatline.types.as_bool.call(null,cljs.core.with_meta.call(null,function(a,b){return cljs.core.contains_QMARK_.call(null,f,e.call(null,a,b))},a))}); +bigml.dixie.flatline.fields.row_types=function(a){a=bigml.dixie.fields.core.linearize_field_descriptors.call(null,a);return cljs.core.map.call(null,cljs.core.comp.call(null,bigml.dixie.flatline.types.optype__GT_type,cljs.core.name,bigml.dixie.fields.core.optype),a)}; +bigml.dixie.flatline.fields.row_getter=function(a){a=bigml.dixie.fields.core.linearize_field_descriptors.call(null,a);return bigml.dixie.flatline.types.with_type.call(null,function(a,c){return cljs.core.first.call(null,a)},bigml.dixie.flatline.fields.row_types.call(null,a),null,a)}; +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"all","all",892129742),function(a,b,c,d){bigml.dixie.flatline.errors.check_arity.call(null,b,0);return bigml.dixie.flatline.fields.row_getter.call(null,d)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"all-but","all-but",683089552),function(a,b,c,d){cljs.core.every_QMARK_.call(null,bigml.dixie.flatline.types.constant_fn_QMARK_,c)||bigml.dixie.flatline.utils.raise.call(null,new cljs.core.Keyword(null,"flatline-type-error","flatline-type-error",845929452),"all-but's arguments must be constants in %s",b);var e=cljs.core.set.call(null,cljs.core.keep.call(null,function(a){return cljs.core.first.call(null,bigml.dixie.flatline.fields.find_desc_STAR_.call(null, +b,d,a.call(null),!0))},c));a=cljs.core.filter.call(null,function(a){return cljs.core.not.call(null,e.call(null,(new cljs.core.Keyword(null,"id","id",-1388402092)).cljs$core$IFn$_invoke$arity$1(a)))},bigml.dixie.fields.core.linearize_field_descriptors.call(null,d));c=bigml.dixie.fields.core.col_positions.call(null,cljs.core.map.call(null,new cljs.core.Keyword(null,"id","id",-1388402092),a),d);var f=cljs.core.apply.call(null,cljs.core.juxt,cljs.core.map.call(null,function(a){return function(b){return cljs.core.nth.call(null, +b,a,null)}},c));c=cljs.core.map.call(null,function(a){return bigml.dixie.flatline.types.optype__GT_type.call(null,cljs.core.name.call(null,(new cljs.core.Keyword(null,"optype","optype",-1789210098)).cljs$core$IFn$_invoke$arity$1(a)))},a);return bigml.dixie.flatline.types.with_type.call(null,function(a,b){return f.call(null,cljs.core.first.call(null,a))},c,null,a)}); +bigml.dixie.flatline.fields.check_kv=function(a,b,c){var d=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);d=cljs.core.first.call(null,bigml.dixie.flatline.fields.find_desc_STAR_.call(null,a,b,d,!0));b=b.call(null,d);b=cljs.core.truth_(b)?bigml.dixie.flatline.utils.field_value.call(null,b,c):null;cljs.core.truth_(b)||bigml.dixie.flatline.utils.raise.call(null,new cljs.core.Keyword(null,"flatline-type-error","flatline-type-error",845929452),"Invalid value %s for field %s in %s", +cljs.core.pr_str.call(null,c),d,a);return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[d,b],null)}; +bigml.dixie.flatline.fields.idvs__GT_posvs=function(a,b,c){var d=bigml.dixie.fields.core.mappify_field_descriptors.call(null,b);c=cljs.core.keep.call(null,function(b){return bigml.dixie.flatline.fields.check_kv.call(null,a,d,b)},c);b=bigml.dixie.fields.core.col_positions.call(null,cljs.core.map.call(null,cljs.core.first,c),b);return cljs.core.map.call(null,function(a,b){return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[a,cljs.core.second.call(null,b)],null)},b, +c)};bigml.dixie.flatline.fields.not_there=cljs.core.gensym.call(null); +bigml.dixie.flatline.fields.follow_keys=function(a,b){for(;;){if(cljs.core.empty_QMARK_.call(null,b))return a;if(!cljs.core.coll_QMARK_.call(null,a)||cljs.core.empty_QMARK_.call(null,a))return null;var c=cljs.core.first.call(null,b),d=cljs.core.get.call(null,a,cljs.core.integer_QMARK_.call(null,c)?c:cljs.core.keyword.call(null,c),bigml.dixie.flatline.fields.not_there);a=cljs.core._EQ_.call(null,bigml.dixie.flatline.fields.not_there,d)?cljs.core.sequential_QMARK_.call(null,a)&&!cljs.core.integer_QMARK_.call(null, +c)?cljs.core.some.call(null,function(a,b,c,d){return function(a){return cljs.core._EQ_.call(null,cljs.core.first.call(null,a),c)?cljs.core.second.call(null,a):null}}(a,b,c,d),a):null:d;b=cljs.core.rest.call(null,b)}}; +bigml.dixie.flatline.fields.find_prop=function(a,b,c,d,e){b=bigml.dixie.flatline.fields.find_desc.call(null,a,b,c,e);b=bigml.dixie.flatline.fields.follow_keys.call(null,b,d);return"number"===typeof b?new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[b,new cljs.core.Keyword(null,"numeric","numeric",-1495594714)],null):"string"===typeof b?new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[b,new cljs.core.Keyword(null,"string","string",-1989541586)], +null):!0===b||!1===b?new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[b,new cljs.core.Keyword(null,"boolean","boolean",-1919418404)],null):cljs.core.truth_(e)?bigml.dixie.flatline.utils.raise.call(null,new cljs.core.Keyword(null,"flatline-malformed-expression","flatline-malformed-expression",1136890174),"Invalid field path %s in %s",cljs.core.pr_str.call(null,cljs.core.cons.call(null,c,d)),a):null}; +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"all-with-defaults","all-with-defaults",1393857544),function(){var a=function(a,b,e,f,g){cljs.core.every_QMARK_.call(null,bigml.dixie.flatline.types.constant_fn_QMARK_,e)||bigml.dixie.flatline.utils.raise.call(null,new cljs.core.Keyword(null,"flatline-type-error","flatline-type-error",845929452),"all-with-default's arguments must be constant in %s",b);cljs.core.odd_QMARK_.call(null,cljs.core.count.call(null, +e))&&bigml.dixie.flatline.utils.raise.call(null,new cljs.core.Keyword(null,"flatline-invalid-arguments","flatline-invalid-arguments",1649316504),"all-with-default's takes an even number of arguments in %s",b);a=bigml.dixie.fields.core.linearize_field_descriptors.call(null,f);e=cljs.core.partition.call(null,2,cljs.core.map.call(null,function(a){return a.call(null)},e));var c=cljs.core.into.call(null,cljs.core.PersistentArrayMap.EMPTY,bigml.dixie.flatline.fields.idvs__GT_posvs.call(null,b,a,e)),d=cljs.core.range.call(null, +0,cljs.core.count.call(null,a));b=cljs.core.map.call(null,function(a){return bigml.dixie.flatline.types.optype__GT_type.call(null,cljs.core.name.call(null,(new cljs.core.Keyword(null,"optype","optype",-1789210098)).cljs$core$IFn$_invoke$arity$1(a)))},a);return bigml.dixie.flatline.types.with_type.call(null,function(a,b){a=cljs.core.nth.call(null,a,0,null);return cljs.core.mapv.call(null,function(a,b){return null==a?c.call(null,b):a},a,d)},b,null,a)},b=function(b,d,e,f,g){var c=null;if(4a?null:Math.sqrt(a)}.call(null,e.call(null)),new cljs.core.Keyword(null,"numeric","numeric",-1495594714)):bigml.dixie.flatline.types.with_type.call(null, +function(a,b){a=e.call(null,a,b);return cljs.core.truth_(a)?(a=0>a?null:Math.sqrt(a),a):null},new cljs.core.Keyword(null,"numeric","numeric",-1495594714))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"ln","ln",1974894440),function(a,b,c,d){bigml.dixie.flatline.types.check_numbers.call(null,1,b,c);var e=cljs.core.first.call(null,c);return cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null,e))?bigml.dixie.flatline.types.constant_fn.call(null,function(a){return 0=a?Math.acos(a):null}.call(null,e.call(null)),new cljs.core.Keyword(null,"numeric","numeric",-1495594714)):bigml.dixie.flatline.types.with_type.call(null, +function(a,b){a=e.call(null,a,b);return cljs.core.truth_(a)?(a=-1<=a&&1>=a?Math.acos(a):null,a):null},new cljs.core.Keyword(null,"numeric","numeric",-1495594714))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"asin","asin",1750305199),function(a,b,c,d){bigml.dixie.flatline.types.check_numbers.call(null,1,b,c);var e=cljs.core.first.call(null,c);return cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null,e))?bigml.dixie.flatline.types.constant_fn.call(null,function(a){return-1<=a&&1>=a?Math.asin(a):null}.call(null,e.call(null)),new cljs.core.Keyword(null,"numeric","numeric",-1495594714)):bigml.dixie.flatline.types.with_type.call(null, +function(a,b){a=e.call(null,a,b);return cljs.core.truth_(a)?(a=-1<=a&&1>=a?Math.asin(a):null,a):null},new cljs.core.Keyword(null,"numeric","numeric",-1495594714))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"atan","atan",1627885634),function(a,b,c,d){bigml.dixie.flatline.types.check_numbers.call(null,1,b,c);var e=cljs.core.first.call(null,c);return cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null,e))?bigml.dixie.flatline.types.constant_fn.call(null,Math.atan(e.call(null)),new cljs.core.Keyword(null,"numeric","numeric",-1495594714)):bigml.dixie.flatline.types.with_type.call(null,function(a, +b){a=e.call(null,a,b);return cljs.core.truth_(a)?Math.atan(a):null},new cljs.core.Keyword(null,"numeric","numeric",-1495594714))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"to-radians","to-radians",1089873499),function(a,b,c,d){bigml.dixie.flatline.types.check_numbers.call(null,1,b,c);var e=cljs.core.first.call(null,c);return cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null,e))?bigml.dixie.flatline.types.constant_fn.call(null,function(a){return Math.PI/180*a}.call(null,e.call(null)),new cljs.core.Keyword(null,"numeric","numeric",-1495594714)):bigml.dixie.flatline.types.with_type.call(null, +function(a,b){a=e.call(null,a,b);return cljs.core.truth_(a)?Math.PI/180*a:null},new cljs.core.Keyword(null,"numeric","numeric",-1495594714))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"to-degrees","to-degrees",-325601763),function(a,b,c,d){bigml.dixie.flatline.types.check_numbers.call(null,1,b,c);var e=cljs.core.first.call(null,c);return cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null,e))?bigml.dixie.flatline.types.constant_fn.call(null,function(a){return 180/Math.PI*a}.call(null,e.call(null)),new cljs.core.Keyword(null,"numeric","numeric",-1495594714)):bigml.dixie.flatline.types.with_type.call(null, +function(a,b){a=e.call(null,a,b);return cljs.core.truth_(a)?180/Math.PI*a:null},new cljs.core.Keyword(null,"numeric","numeric",-1495594714))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"cosh","cosh",691647627),function(a,b,c,d){bigml.dixie.flatline.types.check_numbers.call(null,1,b,c);var e=cljs.core.first.call(null,c);return cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null,e))?bigml.dixie.flatline.types.constant_fn.call(null,Math.cosh(e.call(null)),new cljs.core.Keyword(null,"numeric","numeric",-1495594714)):bigml.dixie.flatline.types.with_type.call(null,function(a, +b){a=e.call(null,a,b);return cljs.core.truth_(a)?Math.cosh(a):null},new cljs.core.Keyword(null,"numeric","numeric",-1495594714))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"sinh","sinh",1664012467),function(a,b,c,d){bigml.dixie.flatline.types.check_numbers.call(null,1,b,c);var e=cljs.core.first.call(null,c);return cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null,e))?bigml.dixie.flatline.types.constant_fn.call(null,Math.sinh(e.call(null)),new cljs.core.Keyword(null,"numeric","numeric",-1495594714)):bigml.dixie.flatline.types.with_type.call(null,function(a, +b){a=e.call(null,a,b);return cljs.core.truth_(a)?Math.sinh(a):null},new cljs.core.Keyword(null,"numeric","numeric",-1495594714))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tanh","tanh",-1160049730),function(a,b,c,d){bigml.dixie.flatline.types.check_numbers.call(null,1,b,c);var e=cljs.core.first.call(null,c);return cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null,e))?bigml.dixie.flatline.types.constant_fn.call(null,Math.tanh(e.call(null)),new cljs.core.Keyword(null,"numeric","numeric",-1495594714)):bigml.dixie.flatline.types.with_type.call(null,function(a, +b){a=e.call(null,a,b);return cljs.core.truth_(a)?Math.tanh(a):null},new cljs.core.Keyword(null,"numeric","numeric",-1495594714))});bigml.dixie.flatline.expand.define_syntax_STAR_.call(null,new cljs.core.Symbol(null,"square","square",-1842001092,null),cljs.core.list(new cljs.core.Symbol(null,"x","x",-555367584,null)),cljs.core.list(new cljs.core.Symbol(null,"*","*",345799209,null),new cljs.core.Symbol(null,"x","x",-555367584,null),new cljs.core.Symbol(null,"x","x",-555367584,null))); +bigml.dixie.flatline.expand.define_syntax_STAR_.call(null,new cljs.core.Symbol(null,"even?","even?",-1827825394,null),cljs.core.list(new cljs.core.Symbol(null,"x","x",-555367584,null)),cljs.core.list(new cljs.core.Symbol(null,"\x3d","\x3d",-1501502141,null),0,cljs.core.list(new cljs.core.Symbol(null,"mod","mod",1510044207,null),new cljs.core.Symbol(null,"x","x",-555367584,null),2))); +bigml.dixie.flatline.expand.define_syntax_STAR_.call(null,new cljs.core.Symbol(null,"odd?","odd?",-1458588199,null),cljs.core.list(new cljs.core.Symbol(null,"x","x",-555367584,null)),cljs.core.list(new cljs.core.Symbol(null,"\x3d","\x3d",-1501502141,null),1,cljs.core.list(new cljs.core.Symbol(null,"mod","mod",1510044207,null),new cljs.core.Symbol(null,"x","x",-555367584,null),2))); +bigml.dixie.flatline.expand.define_syntax_STAR_.call(null,new cljs.core.Symbol(null,"zero?","zero?",325758897,null),cljs.core.list(new cljs.core.Symbol(null,"x","x",-555367584,null)),cljs.core.list(new cljs.core.Symbol(null,"\x3d","\x3d",-1501502141,null),0,new cljs.core.Symbol(null,"x","x",-555367584,null))); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"rand","rand",908504774),function(a,b,c,d){bigml.dixie.flatline.types.check_numbers.call(null,0,b,null);return bigml.dixie.flatline.types.with_type.call(null,function(){var a=function(a){return cljs.core.rand.call(null)},b=function(b){var c=null;if(0=c?(c=Math.sqrt(c),c=-1<=c&&1>=c?Math.asin(c):null,cljs.core.truth_(c)?2*c:null):null};bigml.dixie.flatline.math.spherical_distance_deg=function(a){for(var b=[],c=arguments.length,d=0;;)if(d=cljs.core.first.call(null,b)?a:a+cljs.core.second.call(null,b)},0,b)/(new cljs.core.Keyword(null,"population","population",-1209901867)).cljs$core$IFn$_invoke$arity$2(a,1)}; +bigml.dixie.flatline.fuzzy_logic.check_field_types=function(a,b){return cljs.core.every_QMARK_.call(null,function(a){return bigml.dixie.flatline.types.numeric_fn_QMARK_.call(null,a)||bigml.dixie.flatline.types.string_fn_QMARK_.call(null,a)},new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[a,b],null))?null:bigml.dixie.flatline.utils.raise.call(null,new cljs.core.Keyword(null,"flatline-type-error","flatline-type-error",845929452),"Both arguments expected to have type numeric or string")}; +bigml.dixie.flatline.fuzzy_logic.check_logic_values=function(a,b){return cljs.core.truth_(cljs.core.truth_(a)?cljs.core.truth_(b)?0<=a&&1>=a&&0<=b&&1>=b:b:a)?new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[a,b],null):null}; +bigml.dixie.flatline.fuzzy_logic.check_norm_parameter=function(a){for(var b=[],c=arguments.length,d=0;;)if(dbigml.dixie.flatline.fuzzy_logic.out_of_range.call(null, +b)?bigml.dixie.flatline.fields.make_getter.call(null,a,0):bigml.dixie.flatline.utils.raise.call(null,new cljs.core.Keyword(null,"flatline-invalid-arguments","flatline-invalid-arguments",1649316504),"More than 20%% of the input field values are not between 0 and 1, try to normalize them")):bigml.dixie.flatline.types.numeric_fn_QMARK_.call(null,c)?c:null}; +bigml.dixie.flatline.fuzzy_logic.apply_norm=function(a){for(var b=[],c=arguments.length,d=0;;)if(da?0:a};bigml.dixie.flatline.fuzzy_logic.tnorm_drastic=function(a,b){return 1===a?b:1===b?a:0};bigml.dixie.flatline.fuzzy_logic.tnorm_nilpotent_min=function(a,b){return 1b?a:b}; +bigml.dixie.flatline.fuzzy_logic.tconorm_probabilistic_sum=function(a,b){return a+b-a*b};bigml.dixie.flatline.fuzzy_logic.tconorm_bounded_sum=function(a,b){a+=b;return 1>a?a:1};bigml.dixie.flatline.fuzzy_logic.tconorm_drastic=function(a,b){return 0===a?b:0===b?a:1};bigml.dixie.flatline.fuzzy_logic.tconorm_nilpotent_max=function(a){for(var b=[],c=arguments.length,d=0;;)if(db+a?b>a?b:a:1};bigml.dixie.flatline.fuzzy_logic.tconorm_nilpotent_max.cljs$lang$maxFixedArity=0;bigml.dixie.flatline.fuzzy_logic.tconorm_nilpotent_max.cljs$lang$applyTo=function(a){return this.cljs$core$IFn$_invoke$arity$variadic(cljs.core.seq.call(null,a))}; +bigml.dixie.flatline.fuzzy_logic.tconorm_einstein_sum=function(a,b){return(a+b)/(1+a*b)}; +bigml.dixie.flatline.fuzzy_logic.tnorm_schweizer_sklar=function(a){bigml.dixie.flatline.fuzzy_logic.check_norm_parameter.call(null,a,"schweizer-sklar");return 0>a?function(b,c){return bigml.dixie.flatline.utils.pow.call(null,bigml.dixie.flatline.utils.pow.call(null,b,a)+bigml.dixie.flatline.utils.pow.call(null,c,a)+-1,1/a)}:0===a?bigml.dixie.flatline.fuzzy_logic.tnorm_product:0d?0:d}(),1/a)}:null};bigml.dixie.flatline.fuzzy_logic.tnorm_hamacher=function(a){bigml.dixie.flatline.fuzzy_logic.check_norm_parameter.call(null,a,"hamacher",0);var b=function(b,d){return b*d/(a+(1-a)*(b+(d-b*d)))};return 0===a?function(a,d){return a===d&&0===d?0:b.call(null,a,d)}:function(a,d){return b.call(null,a,d)}}; +bigml.dixie.flatline.fuzzy_logic.tnorm_frank=function(a){bigml.dixie.flatline.fuzzy_logic.check_norm_parameter.call(null,a,"frank",0);return 0===a?bigml.dixie.flatline.fuzzy_logic.tnorm_min:1===a?bigml.dixie.flatline.fuzzy_logic.tnorm_product:function(b,c){return bigml.dixie.flatline.utils.log.call(null,a,1+(bigml.dixie.flatline.utils.pow.call(null,a,b)-1)*(bigml.dixie.flatline.utils.pow.call(null,a,c)-1)/(a-1))}}; +bigml.dixie.flatline.fuzzy_logic.tnorm_yager=function(a){bigml.dixie.flatline.fuzzy_logic.check_norm_parameter.call(null,a,"yager",0);return 0===a?bigml.dixie.flatline.fuzzy_logic.tnorm_drastic:function(b,c){b=1-bigml.dixie.flatline.utils.pow.call(null,bigml.dixie.flatline.utils.pow.call(null,1-b,a)+bigml.dixie.flatline.utils.pow.call(null,1-c,a),1/a);return 0>b?0:b}}; +bigml.dixie.flatline.fuzzy_logic.tnorm_aczel_alsina=function(a){bigml.dixie.flatline.fuzzy_logic.check_norm_parameter.call(null,a,"aczel-alsina",0);return 0===a?bigml.dixie.flatline.fuzzy_logic.tnorm_drastic:function(b,c){return bigml.dixie.flatline.utils.exp.call(null,-1*bigml.dixie.flatline.utils.pow.call(null,bigml.dixie.flatline.utils.pow.call(null,bigml.dixie.flatline.utils.abs.call(null,bigml.dixie.flatline.utils.log.call(null,10,b)),a)+bigml.dixie.flatline.utils.pow.call(null,bigml.dixie.flatline.utils.abs.call(null, +bigml.dixie.flatline.utils.log.call(null,10,c)),a),1/a))}};bigml.dixie.flatline.fuzzy_logic.tnorm_dombi=function(a){bigml.dixie.flatline.fuzzy_logic.check_norm_parameter.call(null,a,"dombi",0);return 0===a?bigml.dixie.flatline.fuzzy_logic.tnorm_drastic:function(b,c){return 0===b||0===c?0:1/(1+bigml.dixie.flatline.utils.pow.call(null,bigml.dixie.flatline.utils.pow.call(null,(1-b)/b,a)+bigml.dixie.flatline.utils.pow.call(null,(1-c)/c,a),1/a))}}; +bigml.dixie.flatline.fuzzy_logic.tnorm_sugeno_weber=function(a){bigml.dixie.flatline.fuzzy_logic.check_norm_parameter.call(null,a,"sugeno-weber",-1);return cljs.core._EQ_.call(null,a,-1)?bigml.dixie.flatline.fuzzy_logic.tnorm_drastic:function(b,c){b=(b+c+-1+a*b*c)/(1+a);return 0>b?0:b}}; +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-min","tnorm-min",-908787833),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,a,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_min)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-product","tnorm-product",-1276477234),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,a,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_product)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-lukasiewicz","tnorm-lukasiewicz",-625721216),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,a,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_lukasiewicz)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-drastic","tnorm-drastic",1507892347),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,a,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_drastic)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-nilpotent-min","tnorm-nilpotent-min",-195877317),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,a,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_nilpotent_min)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tconorm-max","tconorm-max",-730874243),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,a,c,d,bigml.dixie.flatline.fuzzy_logic.tconorm_max)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tconorm-probabilistic-sum","tconorm-probabilistic-sum",-878905728),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,a,c,d,bigml.dixie.flatline.fuzzy_logic.tconorm_probabilistic_sum)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tconorm-bounded-sum","tconorm-bounded-sum",-1654068829),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,a,c,d,bigml.dixie.flatline.fuzzy_logic.tconorm_bounded_sum)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tconorm-drastic","tconorm-drastic",-453939904),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,a,c,d,bigml.dixie.flatline.fuzzy_logic.tconorm_drastic)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tconorm-nilpotent-max","tconorm-nilpotent-max",383516123),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,a,c,d,bigml.dixie.flatline.fuzzy_logic.tconorm_nilpotent_max)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tconorm-einstein-sum","tconorm-einstein-sum",1566574325),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,a,c,d,bigml.dixie.flatline.fuzzy_logic.tconorm_einstein_sum)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-schweizer-sklar","tnorm-schweizer-sklar",1972745550),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);var e=cljs.core.nth.call(null,c,1,null);c=cljs.core.nth.call(null,c,2,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,e,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_schweizer_sklar.call(null,a.call(null)),new cljs.core.Keyword(null,"parametric","parametric",-273569731))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-hamacher","tnorm-hamacher",-908773099),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);var e=cljs.core.nth.call(null,c,1,null);c=cljs.core.nth.call(null,c,2,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,e,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_hamacher.call(null,a.call(null)),new cljs.core.Keyword(null,"parametric","parametric",-273569731))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-frank","tnorm-frank",1093115450),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);var e=cljs.core.nth.call(null,c,1,null);c=cljs.core.nth.call(null,c,2,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,e,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_frank.call(null,a.call(null)),new cljs.core.Keyword(null,"parametric","parametric",-273569731))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-yager","tnorm-yager",949011974),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);var e=cljs.core.nth.call(null,c,1,null);c=cljs.core.nth.call(null,c,2,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,e,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_yager.call(null,a.call(null)),new cljs.core.Keyword(null,"parametric","parametric",-273569731))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-aczel-alsina","tnorm-aczel-alsina",354898993),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);var e=cljs.core.nth.call(null,c,1,null);c=cljs.core.nth.call(null,c,2,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,e,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_aczel_alsina.call(null,a.call(null)),new cljs.core.Keyword(null,"parametric","parametric",-273569731))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-dombi","tnorm-dombi",-310984956),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);var e=cljs.core.nth.call(null,c,1,null);c=cljs.core.nth.call(null,c,2,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,e,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_dombi.call(null,a.call(null)),new cljs.core.Keyword(null,"parametric","parametric",-273569731))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-sugeno-weber","tnorm-sugeno-weber",587108434),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);var e=cljs.core.nth.call(null,c,1,null);c=cljs.core.nth.call(null,c,2,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,e,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_sugeno_weber.call(null,a.call(null)),new cljs.core.Keyword(null,"parametric","parametric",-273569731))});goog.i18n.DateTimeFormat=function(a,b){goog.asserts.assert(void 0!==a,"Pattern must be defined");goog.asserts.assert(void 0!==b||void 0!==goog.i18n.DateTimeSymbols,"goog.i18n.DateTimeSymbols or explicit symbols must be defined");this.patternParts_=[];this.dateTimeSymbols_=b||goog.i18n.DateTimeSymbols;"number"==typeof a?this.applyStandardPattern_(a):this.applyPattern_(a)}; +goog.i18n.DateTimeFormat.Format={FULL_DATE:0,LONG_DATE:1,MEDIUM_DATE:2,SHORT_DATE:3,FULL_TIME:4,LONG_TIME:5,MEDIUM_TIME:6,SHORT_TIME:7,FULL_DATETIME:8,LONG_DATETIME:9,MEDIUM_DATETIME:10,SHORT_DATETIME:11};goog.i18n.DateTimeFormat.TOKENS_=[/^'(?:[^']|'')*('|$)/,/^(?:G+|y+|Y+|M+|k+|S+|E+|a+|h+|K+|H+|c+|L+|Q+|d+|m+|s+|v+|V+|w+|z+|Z+)/,/^[^'GyYMkSEahKHcLQdmsvVwzZ]+/];goog.i18n.DateTimeFormat.PartTypes_={QUOTED_STRING:0,FIELD:1,LITERAL:2}; +goog.i18n.DateTimeFormat.getHours_=function(a){return a.getHours?a.getHours():0}; +goog.i18n.DateTimeFormat.prototype.applyPattern_=function(a){for(goog.i18n.DateTimeFormat.removeRlmInPatterns_&&(a=a.replace(/\u200f/g,""));a;){for(var b=a,c=0;ca)var b=this.dateTimeSymbols_.DATEFORMATS[a];else if(8>a)b=this.dateTimeSymbols_.TIMEFORMATS[a-4];else if(12>a)b=this.dateTimeSymbols_.DATETIMEFORMATS[a-8],b=b.replace("{1}",this.dateTimeSymbols_.DATEFORMATS[a-8]),b=b.replace("{0}",this.dateTimeSymbols_.TIMEFORMATS[a-8]);else{this.applyStandardPattern_(goog.i18n.DateTimeFormat.Format.MEDIUM_DATETIME);return}this.applyPattern_(b)}; +goog.i18n.DateTimeFormat.prototype.localizeNumbers_=function(a){return goog.i18n.DateTimeFormat.localizeNumbers(a,this.dateTimeSymbols_)};goog.i18n.DateTimeFormat.enforceAsciiDigits_=!1;goog.i18n.DateTimeFormat.removeRlmInPatterns_=!1;goog.i18n.DateTimeFormat.setEnforceAsciiDigits=function(a){goog.i18n.DateTimeFormat.enforceAsciiDigits_=a;goog.i18n.DateTimeFormat.removeRlmInPatterns_=a};goog.i18n.DateTimeFormat.isEnforceAsciiDigits=function(){return goog.i18n.DateTimeFormat.enforceAsciiDigits_}; +goog.i18n.DateTimeFormat.localizeNumbers=function(a,b){a=String(a);b=b||goog.i18n.DateTimeSymbols;if(void 0===b.ZERODIGIT||goog.i18n.DateTimeFormat.enforceAsciiDigits_)return a;for(var c=[],d=0;d=e?String.fromCharCode(b.ZERODIGIT+e-48):a.charAt(d))}return c.join("")};goog.i18n.DateTimeFormat.prototype.formatEra_=function(a,b){b=0b&&(b=-b);2==a&&(b%=100);return this.localizeNumbers_(goog.string.padNumber(b,a))};goog.i18n.DateTimeFormat.prototype.formatYearOfWeek_=function(a,b){b=goog.date.getYearOfWeek(b.getFullYear(),b.getMonth(),b.getDate(),this.dateTimeSymbols_.FIRSTWEEKCUTOFFDAY,this.dateTimeSymbols_.FIRSTDAYOFWEEK);0>b&&(b=-b);2==a&&(b%=100);return this.localizeNumbers_(goog.string.padNumber(b,a))}; +goog.i18n.DateTimeFormat.prototype.formatMonth_=function(a,b){b=b.getMonth();switch(a){case 5:return this.dateTimeSymbols_.NARROWMONTHS[b];case 4:return this.dateTimeSymbols_.MONTHS[b];case 3:return this.dateTimeSymbols_.SHORTMONTHS[b];default:return this.localizeNumbers_(goog.string.padNumber(b+1,a))}}; +goog.i18n.DateTimeFormat.validateDateHasTime_=function(a){if(!(a.getHours&&a.getSeconds&&a.getMinutes))throw Error("The date to format has no time (probably a goog.date.Date). Use Date or goog.date.DateTime, or use a pattern without time fields.");};goog.i18n.DateTimeFormat.prototype.format24Hours_=function(a,b){goog.i18n.DateTimeFormat.validateDateHasTime_(b);b=goog.i18n.DateTimeFormat.getHours_(b)||24;return this.localizeNumbers_(goog.string.padNumber(b,a))}; +goog.i18n.DateTimeFormat.prototype.formatFractionalSeconds_=function(a,b){b=b.getMilliseconds()/1E3;return this.localizeNumbers_(b.toFixed(Math.min(3,a)).substr(2)+(3a?1:0]};goog.i18n.DateTimeFormat.prototype.format1To12Hours_=function(a,b){goog.i18n.DateTimeFormat.validateDateHasTime_(b);b=goog.i18n.DateTimeFormat.getHours_(b)%12||12;return this.localizeNumbers_(goog.string.padNumber(b,a))}; +goog.i18n.DateTimeFormat.prototype.format0To11Hours_=function(a,b){goog.i18n.DateTimeFormat.validateDateHasTime_(b);b=goog.i18n.DateTimeFormat.getHours_(b)%12;return this.localizeNumbers_(goog.string.padNumber(b,a))};goog.i18n.DateTimeFormat.prototype.format0To23Hours_=function(a,b){goog.i18n.DateTimeFormat.validateDateHasTime_(b);b=goog.i18n.DateTimeFormat.getHours_(b);return this.localizeNumbers_(goog.string.padNumber(b,a))}; +goog.i18n.DateTimeFormat.prototype.formatStandaloneDay_=function(a,b){b=b.getDay();switch(a){case 5:return this.dateTimeSymbols_.STANDALONENARROWWEEKDAYS[b];case 4:return this.dateTimeSymbols_.STANDALONEWEEKDAYS[b];case 3:return this.dateTimeSymbols_.STANDALONESHORTWEEKDAYS[b];default:return this.localizeNumbers_(goog.string.padNumber(b,1))}}; +goog.i18n.DateTimeFormat.prototype.formatStandaloneMonth_=function(a,b){b=b.getMonth();switch(a){case 5:return this.dateTimeSymbols_.STANDALONENARROWMONTHS[b];case 4:return this.dateTimeSymbols_.STANDALONEMONTHS[b];case 3:return this.dateTimeSymbols_.STANDALONESHORTMONTHS[b];default:return this.localizeNumbers_(goog.string.padNumber(b+1,a))}};goog.i18n.DateTimeFormat.prototype.formatQuarter_=function(a,b){b=Math.floor(b.getMonth()/3);return 4>a?this.dateTimeSymbols_.SHORTQUARTERS[b]:this.dateTimeSymbols_.QUARTERS[b]}; +goog.i18n.DateTimeFormat.prototype.formatDate_=function(a,b){return this.localizeNumbers_(goog.string.padNumber(b.getDate(),a))};goog.i18n.DateTimeFormat.prototype.formatMinutes_=function(a,b){goog.i18n.DateTimeFormat.validateDateHasTime_(b);return this.localizeNumbers_(goog.string.padNumber(b.getMinutes(),a))};goog.i18n.DateTimeFormat.prototype.formatSeconds_=function(a,b){goog.i18n.DateTimeFormat.validateDateHasTime_(b);return this.localizeNumbers_(goog.string.padNumber(b.getSeconds(),a))}; +goog.i18n.DateTimeFormat.prototype.formatWeekOfYear_=function(a,b){b=goog.date.getWeekNumber(b.getFullYear(),b.getMonth(),b.getDate(),this.dateTimeSymbols_.FIRSTWEEKCUTOFFDAY,this.dateTimeSymbols_.FIRSTDAYOFWEEK);return this.localizeNumbers_(goog.string.padNumber(b,a))};goog.i18n.DateTimeFormat.prototype.formatTimeZoneRFC_=function(a,b,c){c=c||goog.i18n.TimeZone.createTimeZone(b.getTimezoneOffset());return 4>a?c.getRFCTimeZoneString(b):this.localizeNumbers_(c.getGMTString(b))}; +goog.i18n.DateTimeFormat.prototype.formatTimeZone_=function(a,b,c){c=c||goog.i18n.TimeZone.createTimeZone(b.getTimezoneOffset());return 4>a?c.getShortName(b):c.getLongName(b)};goog.i18n.DateTimeFormat.prototype.formatTimeZoneId_=function(a,b){b=b||goog.i18n.TimeZone.createTimeZone(a.getTimezoneOffset());return b.getTimeZoneId()};goog.i18n.DateTimeFormat.prototype.formatTimeZoneLocationId_=function(a,b,c){c=c||goog.i18n.TimeZone.createTimeZone(b.getTimezoneOffset());return 2>=a?c.getTimeZoneId():c.getGenericLocation(b)}; +goog.i18n.DateTimeFormat.prototype.formatField_=function(a,b,c,d,e){var f=a.length;switch(a.charAt(0)){case "G":return this.formatEra_(f,c);case "y":return this.formatYear_(f,c);case "Y":return this.formatYearOfWeek_(f,c);case "M":return this.formatMonth_(f,c);case "k":return this.format24Hours_(f,d);case "S":return this.formatFractionalSeconds_(f,d);case "E":return this.formatDayOfWeek_(f,c);case "a":return this.formatAmPm_(f,d);case "h":return this.format1To12Hours_(f,d);case "K":return this.format0To11Hours_(f, +d);case "H":return this.format0To23Hours_(f,d);case "c":return this.formatStandaloneDay_(f,c);case "L":return this.formatStandaloneMonth_(f,c);case "Q":return this.formatQuarter_(f,c);case "d":return this.formatDate_(f,c);case "m":return this.formatMinutes_(f,d);case "s":return this.formatSeconds_(f,d);case "v":return this.formatTimeZoneId_(b,e);case "V":return this.formatTimeZoneLocationId_(f,b,e);case "w":return this.formatWeekOfYear_(f,d);case "z":return this.formatTimeZone_(f,b,e);case "Z":return this.formatTimeZoneRFC_(f, +b,e);default:return""}};goog.date.UtcDateTime=function(a,b,c,d,e,f,g){a="number"===typeof a?Date.UTC(a,b||0,c||1,d||0,e||0,f||0,g||0):a?a.getTime():goog.now();this.date=new Date(a)};goog.inherits(goog.date.UtcDateTime,goog.date.DateTime);goog.date.UtcDateTime.fromTimestamp=function(a){var b=new goog.date.UtcDateTime;b.setTime(a);return b};goog.date.UtcDateTime.fromIsoString=function(a){var b=new goog.date.UtcDateTime(2E3);return goog.date.setIso8601DateTime(b,a)?b:null}; +goog.date.UtcDateTime.prototype.clone=function(){var a=new goog.date.UtcDateTime(this.date);a.setFirstDayOfWeek(this.getFirstDayOfWeek());a.setFirstWeekCutOffDay(this.getFirstWeekCutOffDay());return a};goog.date.UtcDateTime.prototype.add=function(a){if(a.years||a.months){var b=new goog.date.Interval(a.years,a.months);goog.date.Date.prototype.add.call(this,b)}a=1E3*(a.seconds+60*(a.minutes+60*(a.hours+24*a.days)));this.date=new Date(this.date.getTime()+a)}; +goog.date.UtcDateTime.prototype.getTimezoneOffset=function(){return 0};goog.date.UtcDateTime.prototype.getFullYear=goog.date.DateTime.prototype.getUTCFullYear;goog.date.UtcDateTime.prototype.getMonth=goog.date.DateTime.prototype.getUTCMonth;goog.date.UtcDateTime.prototype.getDate=goog.date.DateTime.prototype.getUTCDate;goog.date.UtcDateTime.prototype.getHours=goog.date.DateTime.prototype.getUTCHours;goog.date.UtcDateTime.prototype.getMinutes=goog.date.DateTime.prototype.getUTCMinutes; +goog.date.UtcDateTime.prototype.getSeconds=goog.date.DateTime.prototype.getUTCSeconds;goog.date.UtcDateTime.prototype.getMilliseconds=goog.date.DateTime.prototype.getUTCMilliseconds;goog.date.UtcDateTime.prototype.getDay=goog.date.DateTime.prototype.getUTCDay;goog.date.UtcDateTime.prototype.setFullYear=goog.date.DateTime.prototype.setUTCFullYear;goog.date.UtcDateTime.prototype.setMonth=goog.date.DateTime.prototype.setUTCMonth;goog.date.UtcDateTime.prototype.setDate=goog.date.DateTime.prototype.setUTCDate; +goog.date.UtcDateTime.prototype.setHours=goog.date.DateTime.prototype.setUTCHours;goog.date.UtcDateTime.prototype.setMinutes=goog.date.DateTime.prototype.setUTCMinutes;goog.date.UtcDateTime.prototype.setSeconds=goog.date.DateTime.prototype.setUTCSeconds;goog.date.UtcDateTime.prototype.setMilliseconds=goog.date.DateTime.prototype.setUTCMilliseconds;cljs_time.core={};cljs_time.core.deprecated=function(a){return cljs.core.println.call(null,"DEPRECATION WARNING: ",a)};cljs_time.core._EQ_=cljs_time.internal.core._EQ_;cljs_time.core.DateTimeProtocol=function(){};var cljs_time$core$DateTimeProtocol$year$dyn_29437=function(a){var b=cljs_time.core.year[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.year._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.year",a);}; +cljs_time.core.year=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$year$arity$1?a.cljs_time$core$DateTimeProtocol$year$arity$1(a):cljs_time$core$DateTimeProtocol$year$dyn_29437.call(null,a)};var cljs_time$core$DateTimeProtocol$month$dyn_29438=function(a){var b=cljs_time.core.month[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.month._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.month",a);}; +cljs_time.core.month=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$month$arity$1?a.cljs_time$core$DateTimeProtocol$month$arity$1(a):cljs_time$core$DateTimeProtocol$month$dyn_29438.call(null,a)};var cljs_time$core$DateTimeProtocol$day$dyn_29439=function(a){var b=cljs_time.core.day[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.day._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.day",a);}; +cljs_time.core.day=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$day$arity$1?a.cljs_time$core$DateTimeProtocol$day$arity$1(a):cljs_time$core$DateTimeProtocol$day$dyn_29439.call(null,a)}; +var cljs_time$core$DateTimeProtocol$day_of_week$dyn_29440=function(a){var b=cljs_time.core.day_of_week[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.day_of_week._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.day-of-week",a);}; +cljs_time.core.day_of_week=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$day_of_week$arity$1?a.cljs_time$core$DateTimeProtocol$day_of_week$arity$1(a):cljs_time$core$DateTimeProtocol$day_of_week$dyn_29440.call(null,a)}; +var cljs_time$core$DateTimeProtocol$hour$dyn_29441=function(a){var b=cljs_time.core.hour[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.hour._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.hour",a);};cljs_time.core.hour=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$hour$arity$1?a.cljs_time$core$DateTimeProtocol$hour$arity$1(a):cljs_time$core$DateTimeProtocol$hour$dyn_29441.call(null,a)}; +var cljs_time$core$DateTimeProtocol$minute$dyn_29442=function(a){var b=cljs_time.core.minute[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.minute._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.minute",a);}; +cljs_time.core.minute=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$minute$arity$1?a.cljs_time$core$DateTimeProtocol$minute$arity$1(a):cljs_time$core$DateTimeProtocol$minute$dyn_29442.call(null,a)};var cljs_time$core$DateTimeProtocol$sec$dyn_29443=function(a){var b=cljs_time.core.sec[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.sec._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.sec",a);}; +cljs_time.core.sec=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$sec$arity$1?a.cljs_time$core$DateTimeProtocol$sec$arity$1(a):cljs_time$core$DateTimeProtocol$sec$dyn_29443.call(null,a)};var cljs_time$core$DateTimeProtocol$second$dyn_29444=function(a){var b=cljs_time.core.second[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.second._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.second",a);}; +cljs_time.core.second=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$second$arity$1?a.cljs_time$core$DateTimeProtocol$second$arity$1(a):cljs_time$core$DateTimeProtocol$second$dyn_29444.call(null,a)}; +var cljs_time$core$DateTimeProtocol$milli$dyn_29445=function(a){var b=cljs_time.core.milli[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.milli._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.milli",a);};cljs_time.core.milli=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$milli$arity$1?a.cljs_time$core$DateTimeProtocol$milli$arity$1(a):cljs_time$core$DateTimeProtocol$milli$dyn_29445.call(null,a)}; +var cljs_time$core$DateTimeProtocol$equal_QMARK_$dyn_29446=function(a,b){var c=cljs_time.core.equal_QMARK_[goog.typeOf(null==a?null:a)];if(null!=c)return c.call(null,a,b);c=cljs_time.core.equal_QMARK_._;if(null!=c)return c.call(null,a,b);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.equal?",a);}; +cljs_time.core.equal_QMARK_=function(a,b){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$equal_QMARK_$arity$2?a.cljs_time$core$DateTimeProtocol$equal_QMARK_$arity$2(a,b):cljs_time$core$DateTimeProtocol$equal_QMARK_$dyn_29446.call(null,a,b)}; +var cljs_time$core$DateTimeProtocol$after_QMARK_$dyn_29447=function(a,b){var c=cljs_time.core.after_QMARK_[goog.typeOf(null==a?null:a)];if(null!=c)return c.call(null,a,b);c=cljs_time.core.after_QMARK_._;if(null!=c)return c.call(null,a,b);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.after?",a);}; +cljs_time.core.after_QMARK_=function(a,b){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$after_QMARK_$arity$2?a.cljs_time$core$DateTimeProtocol$after_QMARK_$arity$2(a,b):cljs_time$core$DateTimeProtocol$after_QMARK_$dyn_29447.call(null,a,b)}; +var cljs_time$core$DateTimeProtocol$before_QMARK_$dyn_29448=function(a,b){var c=cljs_time.core.before_QMARK_[goog.typeOf(null==a?null:a)];if(null!=c)return c.call(null,a,b);c=cljs_time.core.before_QMARK_._;if(null!=c)return c.call(null,a,b);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.before?",a);}; +cljs_time.core.before_QMARK_=function(a,b){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$before_QMARK_$arity$2?a.cljs_time$core$DateTimeProtocol$before_QMARK_$arity$2(a,b):cljs_time$core$DateTimeProtocol$before_QMARK_$dyn_29448.call(null,a,b)}; +var cljs_time$core$DateTimeProtocol$plus_$dyn_29449=function(a,b){var c=cljs_time.core.plus_[goog.typeOf(null==a?null:a)];if(null!=c)return c.call(null,a,b);c=cljs_time.core.plus_._;if(null!=c)return c.call(null,a,b);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.plus-",a);}; +cljs_time.core.plus_=function(a,b){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$plus_$arity$2?a.cljs_time$core$DateTimeProtocol$plus_$arity$2(a,b):cljs_time$core$DateTimeProtocol$plus_$dyn_29449.call(null,a,b)}; +var cljs_time$core$DateTimeProtocol$minus_$dyn_29450=function(a,b){var c=cljs_time.core.minus_[goog.typeOf(null==a?null:a)];if(null!=c)return c.call(null,a,b);c=cljs_time.core.minus_._;if(null!=c)return c.call(null,a,b);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.minus-",a);}; +cljs_time.core.minus_=function(a,b){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$minus_$arity$2?a.cljs_time$core$DateTimeProtocol$minus_$arity$2(a,b):cljs_time$core$DateTimeProtocol$minus_$dyn_29450.call(null,a,b)}; +var cljs_time$core$DateTimeProtocol$first_day_of_the_month_$dyn_29451=function(a){var b=cljs_time.core.first_day_of_the_month_[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.first_day_of_the_month_._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.first-day-of-the-month-",a);}; +cljs_time.core.first_day_of_the_month_=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$first_day_of_the_month_$arity$1?a.cljs_time$core$DateTimeProtocol$first_day_of_the_month_$arity$1(a):cljs_time$core$DateTimeProtocol$first_day_of_the_month_$dyn_29451.call(null,a)}; +var cljs_time$core$DateTimeProtocol$last_day_of_the_month_$dyn_29452=function(a){var b=cljs_time.core.last_day_of_the_month_[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.last_day_of_the_month_._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.last-day-of-the-month-",a);}; +cljs_time.core.last_day_of_the_month_=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$last_day_of_the_month_$arity$1?a.cljs_time$core$DateTimeProtocol$last_day_of_the_month_$arity$1(a):cljs_time$core$DateTimeProtocol$last_day_of_the_month_$dyn_29452.call(null,a)}; +var cljs_time$core$DateTimeProtocol$week_number_of_year$dyn_29453=function(a){var b=cljs_time.core.week_number_of_year[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.week_number_of_year._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.week-number-of-year",a);}; +cljs_time.core.week_number_of_year=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$week_number_of_year$arity$1?a.cljs_time$core$DateTimeProtocol$week_number_of_year$arity$1(a):cljs_time$core$DateTimeProtocol$week_number_of_year$dyn_29453.call(null,a)}; +var cljs_time$core$DateTimeProtocol$week_year$dyn_29454=function(a){var b=cljs_time.core.week_year[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.week_year._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.week-year",a);}; +cljs_time.core.week_year=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$week_year$arity$1?a.cljs_time$core$DateTimeProtocol$week_year$arity$1(a):cljs_time$core$DateTimeProtocol$week_year$dyn_29454.call(null,a)};cljs_time.core.InTimeUnitProtocol=function(){}; +var cljs_time$core$InTimeUnitProtocol$in_millis$dyn_29455=function(a){var b=cljs_time.core.in_millis[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.in_millis._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"InTimeUnitProtocol.in-millis",a);}; +cljs_time.core.in_millis=function(a){return null!=a&&null!=a.cljs_time$core$InTimeUnitProtocol$in_millis$arity$1?a.cljs_time$core$InTimeUnitProtocol$in_millis$arity$1(a):cljs_time$core$InTimeUnitProtocol$in_millis$dyn_29455.call(null,a)}; +var cljs_time$core$InTimeUnitProtocol$in_seconds$dyn_29456=function(a){var b=cljs_time.core.in_seconds[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.in_seconds._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"InTimeUnitProtocol.in-seconds",a);}; +cljs_time.core.in_seconds=function(a){return null!=a&&null!=a.cljs_time$core$InTimeUnitProtocol$in_seconds$arity$1?a.cljs_time$core$InTimeUnitProtocol$in_seconds$arity$1(a):cljs_time$core$InTimeUnitProtocol$in_seconds$dyn_29456.call(null,a)}; +var cljs_time$core$InTimeUnitProtocol$in_minutes$dyn_29457=function(a){var b=cljs_time.core.in_minutes[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.in_minutes._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"InTimeUnitProtocol.in-minutes",a);}; +cljs_time.core.in_minutes=function(a){return null!=a&&null!=a.cljs_time$core$InTimeUnitProtocol$in_minutes$arity$1?a.cljs_time$core$InTimeUnitProtocol$in_minutes$arity$1(a):cljs_time$core$InTimeUnitProtocol$in_minutes$dyn_29457.call(null,a)}; +var cljs_time$core$InTimeUnitProtocol$in_hours$dyn_29458=function(a){var b=cljs_time.core.in_hours[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.in_hours._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"InTimeUnitProtocol.in-hours",a);}; +cljs_time.core.in_hours=function(a){return null!=a&&null!=a.cljs_time$core$InTimeUnitProtocol$in_hours$arity$1?a.cljs_time$core$InTimeUnitProtocol$in_hours$arity$1(a):cljs_time$core$InTimeUnitProtocol$in_hours$dyn_29458.call(null,a)}; +var cljs_time$core$InTimeUnitProtocol$in_days$dyn_29459=function(a){var b=cljs_time.core.in_days[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.in_days._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"InTimeUnitProtocol.in-days",a);}; +cljs_time.core.in_days=function(a){return null!=a&&null!=a.cljs_time$core$InTimeUnitProtocol$in_days$arity$1?a.cljs_time$core$InTimeUnitProtocol$in_days$arity$1(a):cljs_time$core$InTimeUnitProtocol$in_days$dyn_29459.call(null,a)}; +var cljs_time$core$InTimeUnitProtocol$in_weeks$dyn_29460=function(a){var b=cljs_time.core.in_weeks[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.in_weeks._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"InTimeUnitProtocol.in-weeks",a);}; +cljs_time.core.in_weeks=function(a){return null!=a&&null!=a.cljs_time$core$InTimeUnitProtocol$in_weeks$arity$1?a.cljs_time$core$InTimeUnitProtocol$in_weeks$arity$1(a):cljs_time$core$InTimeUnitProtocol$in_weeks$dyn_29460.call(null,a)}; +var cljs_time$core$InTimeUnitProtocol$in_months$dyn_29461=function(a){var b=cljs_time.core.in_months[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.in_months._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"InTimeUnitProtocol.in-months",a);}; +cljs_time.core.in_months=function(a){return null!=a&&null!=a.cljs_time$core$InTimeUnitProtocol$in_months$arity$1?a.cljs_time$core$InTimeUnitProtocol$in_months$arity$1(a):cljs_time$core$InTimeUnitProtocol$in_months$dyn_29461.call(null,a)}; +var cljs_time$core$InTimeUnitProtocol$in_years$dyn_29462=function(a){var b=cljs_time.core.in_years[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.in_years._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"InTimeUnitProtocol.in-years",a);}; +cljs_time.core.in_years=function(a){return null!=a&&null!=a.cljs_time$core$InTimeUnitProtocol$in_years$arity$1?a.cljs_time$core$InTimeUnitProtocol$in_years$arity$1(a):cljs_time$core$InTimeUnitProtocol$in_years$dyn_29462.call(null,a)};cljs_time.core.Interval=function(a,b,c,d,e){this.start=a;this.end=b;this.__meta=c;this.__extmap=d;this.__hash=e;this.cljs$lang$protocol_mask$partition0$=2230716170;this.cljs$lang$protocol_mask$partition1$=139264}; +cljs_time.core.Interval.prototype.cljs$core$ILookup$_lookup$arity$2=function(a,b){return this.cljs$core$ILookup$_lookup$arity$3(null,b,null)};cljs_time.core.Interval.prototype.cljs$core$ILookup$_lookup$arity$3=function(a,b,c){switch(b instanceof cljs.core.Keyword?b.fqn:null){case "start":return this.start;case "end":return this.end;default:return cljs.core.get.call(null,this.__extmap,b,c)}}; +cljs_time.core.Interval.prototype.cljs$core$IKVReduce$_kv_reduce$arity$3=function(a,b,c){return cljs.core.reduce.call(null,function(a,c){var d=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return b.call(null,a,d,c)},c,this)}; +cljs_time.core.Interval.prototype.cljs$core$IPrintWithWriter$_pr_writer$arity$3=function(a,b,c){return cljs.core.pr_sequential_writer.call(null,b,function(a){return cljs.core.pr_sequential_writer.call(null,b,cljs.core.pr_writer,""," ","",c,a)},"#cljs-time.core.Interval{",", ","}",c,cljs.core.concat.call(null,new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"start", +"start",-355208981),this.start],null),new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"end","end",-268185958),this.end],null)],null),this.__extmap))}; +cljs_time.core.Interval.prototype.cljs$core$IIterable$_iterator$arity$1=function(a){return new cljs.core.RecordIter(0,this,2,new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"start","start",-355208981),new cljs.core.Keyword(null,"end","end",-268185958)],null),cljs.core.truth_(this.__extmap)?cljs.core._iterator.call(null,this.__extmap):cljs.core.nil_iter.call(null))};cljs_time.core.Interval.prototype.cljs$core$IMeta$_meta$arity$1=function(a){return this.__meta}; +cljs_time.core.Interval.prototype.cljs$core$ICloneable$_clone$arity$1=function(a){return new cljs_time.core.Interval(this.start,this.end,this.__meta,this.__extmap,this.__hash)};cljs_time.core.Interval.prototype.cljs$core$ICounted$_count$arity$1=function(a){return 2+cljs.core.count.call(null,this.__extmap)};cljs_time.core.Interval.prototype.cljs$core$IHash$_hash$arity$1=function(a){a=this.__hash;return null!=a?a:this.__hash=a=534314193^cljs.core.hash_unordered_coll.call(null,this)}; +cljs_time.core.Interval.prototype.cljs$core$IEquiv$_equiv$arity$2=function(a,b){return null!=b&&this.constructor===b.constructor&&cljs.core._EQ_.call(null,this.start,b.start)&&cljs.core._EQ_.call(null,this.end,b.end)&&cljs.core._EQ_.call(null,this.__extmap,b.__extmap)}; +cljs_time.core.Interval.prototype.cljs$core$IMap$_dissoc$arity$2=function(a,b){return cljs.core.contains_QMARK_.call(null,new cljs.core.PersistentHashSet(null,new cljs.core.PersistentArrayMap(null,2,[new cljs.core.Keyword(null,"start","start",-355208981),null,new cljs.core.Keyword(null,"end","end",-268185958),null],null),null),b)?cljs.core.dissoc.call(null,cljs.core._with_meta.call(null,cljs.core.into.call(null,cljs.core.PersistentArrayMap.EMPTY,this),this.__meta),b):new cljs_time.core.Interval(this.start, +this.end,this.__meta,cljs.core.not_empty.call(null,cljs.core.dissoc.call(null,this.__extmap,b)),null)}; +cljs_time.core.Interval.prototype.cljs$core$IAssociative$_assoc$arity$3=function(a,b,c){a=cljs.core.keyword_identical_QMARK_;return cljs.core.truth_(a.call(null,new cljs.core.Keyword(null,"start","start",-355208981),b))?new cljs_time.core.Interval(c,this.end,this.__meta,this.__extmap,null):cljs.core.truth_(a.call(null,new cljs.core.Keyword(null,"end","end",-268185958),b))?new cljs_time.core.Interval(this.start,c,this.__meta,this.__extmap,null):new cljs_time.core.Interval(this.start,this.end,this.__meta, +cljs.core.assoc.call(null,this.__extmap,b,c),null)};cljs_time.core.Interval.prototype.cljs$core$ISeqable$_seq$arity$1=function(a){return cljs.core.seq.call(null,cljs.core.concat.call(null,new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.MapEntry(new cljs.core.Keyword(null,"start","start",-355208981),this.start,null),new cljs.core.MapEntry(new cljs.core.Keyword(null,"end","end",-268185958),this.end,null)],null),this.__extmap))}; +cljs_time.core.Interval.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return new cljs_time.core.Interval(this.start,this.end,b,this.__extmap,this.__hash)};cljs_time.core.Interval.prototype.cljs$core$ICollection$_conj$arity$2=function(a,b){return cljs.core.vector_QMARK_.call(null,b)?this.cljs$core$IAssociative$_assoc$arity$3(null,cljs.core._nth.call(null,b,0),cljs.core._nth.call(null,b,1)):cljs.core.reduce.call(null,cljs.core._conj,this,b)}; +cljs_time.core.Interval.getBasis=function(){return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"start","start",1285322546,null),new cljs.core.Symbol(null,"end","end",1372345569,null)],null)};cljs_time.core.Interval.cljs$lang$type=!0;cljs_time.core.Interval.cljs$lang$ctorPrSeq=function(a){return new cljs.core.List(null,"cljs-time.core/Interval",null,1,null)}; +cljs_time.core.Interval.cljs$lang$ctorPrWriter=function(a,b){return cljs.core._write.call(null,b,"cljs-time.core/Interval")};cljs_time.core.__GT_Interval=function(a,b){return new cljs_time.core.Interval(a,b,null,null,null)}; +cljs_time.core.map__GT_Interval=function(a){var b=cljs.core.dissoc.call(null,a,new cljs.core.Keyword(null,"start","start",-355208981),new cljs.core.Keyword(null,"end","end",-268185958));b=cljs.core.record_QMARK_.call(null,a)?cljs.core.into.call(null,cljs.core.PersistentArrayMap.EMPTY,b):b;return new cljs_time.core.Interval((new cljs.core.Keyword(null,"start","start",-355208981)).cljs$core$IFn$_invoke$arity$1(a),(new cljs.core.Keyword(null,"end","end",-268185958)).cljs$core$IFn$_invoke$arity$1(a), +null,cljs.core.not_empty.call(null,b),null)};cljs_time.core.interval=function(a,b){if(!(a.getTime()<=b.getTime()))throw Error("Assert failed: (\x3c\x3d (.getTime start) (.getTime end))");return cljs_time.core.__GT_Interval.call(null,a,b)}; +cljs_time.core.Period=function(a,b,c,d,e,f,g,h,k,l,m){this.years=a;this.months=b;this.weeks=c;this.days=d;this.hours=e;this.minutes=f;this.seconds=g;this.millis=h;this.__meta=k;this.__extmap=l;this.__hash=m;this.cljs$lang$protocol_mask$partition0$=2230716170;this.cljs$lang$protocol_mask$partition1$=139264};cljs_time.core.Period.prototype.cljs$core$ILookup$_lookup$arity$2=function(a,b){return this.cljs$core$ILookup$_lookup$arity$3(null,b,null)}; +cljs_time.core.Period.prototype.cljs$core$ILookup$_lookup$arity$3=function(a,b,c){switch(b instanceof cljs.core.Keyword?b.fqn:null){case "years":return this.years;case "months":return this.months;case "weeks":return this.weeks;case "days":return this.days;case "hours":return this.hours;case "minutes":return this.minutes;case "seconds":return this.seconds;case "millis":return this.millis;default:return cljs.core.get.call(null,this.__extmap,b,c)}}; +cljs_time.core.Period.prototype.cljs$core$IKVReduce$_kv_reduce$arity$3=function(a,b,c){return cljs.core.reduce.call(null,function(a,c){var d=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return b.call(null,a,d,c)},c,this)}; +cljs_time.core.Period.prototype.cljs$core$IPrintWithWriter$_pr_writer$arity$3=function(a,b,c){return cljs.core.pr_sequential_writer.call(null,b,function(a){return cljs.core.pr_sequential_writer.call(null,b,cljs.core.pr_writer,""," ","",c,a)},"#cljs-time.core.Period{",", ","}",c,cljs.core.concat.call(null,new cljs.core.PersistentVector(null,8,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"years","years", +-1298579689),this.years],null),new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"months","months",-45571637),this.months],null),new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"weeks","weeks",1844596125),this.weeks],null),new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"days","days",-1394072564),this.days],null),new cljs.core.PersistentVector(null, +2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"hours","hours",58380855),this.hours],null),new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"minutes","minutes",1319166394),this.minutes],null),new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"seconds","seconds",-445266194),this.seconds],null),new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE, +[new cljs.core.Keyword(null,"millis","millis",-1338288387),this.millis],null)],null),this.__extmap))}; +cljs_time.core.Period.prototype.cljs$core$IIterable$_iterator$arity$1=function(a){return new cljs.core.RecordIter(0,this,8,new cljs.core.PersistentVector(null,8,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"years","years",-1298579689),new cljs.core.Keyword(null,"months","months",-45571637),new cljs.core.Keyword(null,"weeks","weeks",1844596125),new cljs.core.Keyword(null,"days","days",-1394072564),new cljs.core.Keyword(null,"hours","hours",58380855),new cljs.core.Keyword(null, +"minutes","minutes",1319166394),new cljs.core.Keyword(null,"seconds","seconds",-445266194),new cljs.core.Keyword(null,"millis","millis",-1338288387)],null),cljs.core.truth_(this.__extmap)?cljs.core._iterator.call(null,this.__extmap):cljs.core.nil_iter.call(null))};cljs_time.core.Period.prototype.cljs$core$IMeta$_meta$arity$1=function(a){return this.__meta}; +cljs_time.core.Period.prototype.cljs$core$ICloneable$_clone$arity$1=function(a){return new cljs_time.core.Period(this.years,this.months,this.weeks,this.days,this.hours,this.minutes,this.seconds,this.millis,this.__meta,this.__extmap,this.__hash)};cljs_time.core.Period.prototype.cljs$core$ICounted$_count$arity$1=function(a){return 8+cljs.core.count.call(null,this.__extmap)}; +cljs_time.core.Period.prototype.cljs$core$IHash$_hash$arity$1=function(a){a=this.__hash;return null!=a?a:this.__hash=a=1393857022^cljs.core.hash_unordered_coll.call(null,this)}; +cljs_time.core.Period.prototype.cljs$core$IEquiv$_equiv$arity$2=function(a,b){return null!=b&&this.constructor===b.constructor&&cljs.core._EQ_.call(null,this.years,b.years)&&cljs.core._EQ_.call(null,this.months,b.months)&&cljs.core._EQ_.call(null,this.weeks,b.weeks)&&cljs.core._EQ_.call(null,this.days,b.days)&&cljs.core._EQ_.call(null,this.hours,b.hours)&&cljs.core._EQ_.call(null,this.minutes,b.minutes)&&cljs.core._EQ_.call(null,this.seconds,b.seconds)&&cljs.core._EQ_.call(null,this.millis,b.millis)&& +cljs.core._EQ_.call(null,this.__extmap,b.__extmap)}; +cljs_time.core.Period.prototype.cljs$core$IMap$_dissoc$arity$2=function(a,b){return cljs.core.contains_QMARK_.call(null,new cljs.core.PersistentHashSet(null,new cljs.core.PersistentArrayMap(null,8,[new cljs.core.Keyword(null,"months","months",-45571637),null,new cljs.core.Keyword(null,"days","days",-1394072564),null,new cljs.core.Keyword(null,"seconds","seconds",-445266194),null,new cljs.core.Keyword(null,"hours","hours",58380855),null,new cljs.core.Keyword(null,"years","years",-1298579689),null, +new cljs.core.Keyword(null,"minutes","minutes",1319166394),null,new cljs.core.Keyword(null,"weeks","weeks",1844596125),null,new cljs.core.Keyword(null,"millis","millis",-1338288387),null],null),null),b)?cljs.core.dissoc.call(null,cljs.core._with_meta.call(null,cljs.core.into.call(null,cljs.core.PersistentArrayMap.EMPTY,this),this.__meta),b):new cljs_time.core.Period(this.years,this.months,this.weeks,this.days,this.hours,this.minutes,this.seconds,this.millis,this.__meta,cljs.core.not_empty.call(null, +cljs.core.dissoc.call(null,this.__extmap,b)),null)}; +cljs_time.core.Period.prototype.cljs$core$IAssociative$_assoc$arity$3=function(a,b,c){a=cljs.core.keyword_identical_QMARK_;return cljs.core.truth_(a.call(null,new cljs.core.Keyword(null,"years","years",-1298579689),b))?new cljs_time.core.Period(c,this.months,this.weeks,this.days,this.hours,this.minutes,this.seconds,this.millis,this.__meta,this.__extmap,null):cljs.core.truth_(a.call(null,new cljs.core.Keyword(null,"months","months",-45571637),b))?new cljs_time.core.Period(this.years,c,this.weeks,this.days, +this.hours,this.minutes,this.seconds,this.millis,this.__meta,this.__extmap,null):cljs.core.truth_(a.call(null,new cljs.core.Keyword(null,"weeks","weeks",1844596125),b))?new cljs_time.core.Period(this.years,this.months,c,this.days,this.hours,this.minutes,this.seconds,this.millis,this.__meta,this.__extmap,null):cljs.core.truth_(a.call(null,new cljs.core.Keyword(null,"days","days",-1394072564),b))?new cljs_time.core.Period(this.years,this.months,this.weeks,c,this.hours,this.minutes,this.seconds,this.millis, +this.__meta,this.__extmap,null):cljs.core.truth_(a.call(null,new cljs.core.Keyword(null,"hours","hours",58380855),b))?new cljs_time.core.Period(this.years,this.months,this.weeks,this.days,c,this.minutes,this.seconds,this.millis,this.__meta,this.__extmap,null):cljs.core.truth_(a.call(null,new cljs.core.Keyword(null,"minutes","minutes",1319166394),b))?new cljs_time.core.Period(this.years,this.months,this.weeks,this.days,this.hours,c,this.seconds,this.millis,this.__meta,this.__extmap,null):cljs.core.truth_(a.call(null, +new cljs.core.Keyword(null,"seconds","seconds",-445266194),b))?new cljs_time.core.Period(this.years,this.months,this.weeks,this.days,this.hours,this.minutes,c,this.millis,this.__meta,this.__extmap,null):cljs.core.truth_(a.call(null,new cljs.core.Keyword(null,"millis","millis",-1338288387),b))?new cljs_time.core.Period(this.years,this.months,this.weeks,this.days,this.hours,this.minutes,this.seconds,c,this.__meta,this.__extmap,null):new cljs_time.core.Period(this.years,this.months,this.weeks,this.days, +this.hours,this.minutes,this.seconds,this.millis,this.__meta,cljs.core.assoc.call(null,this.__extmap,b,c),null)}; +cljs_time.core.Period.prototype.cljs$core$ISeqable$_seq$arity$1=function(a){return cljs.core.seq.call(null,cljs.core.concat.call(null,new cljs.core.PersistentVector(null,8,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.MapEntry(new cljs.core.Keyword(null,"years","years",-1298579689),this.years,null),new cljs.core.MapEntry(new cljs.core.Keyword(null,"months","months",-45571637),this.months,null),new cljs.core.MapEntry(new cljs.core.Keyword(null,"weeks","weeks",1844596125),this.weeks,null), +new cljs.core.MapEntry(new cljs.core.Keyword(null,"days","days",-1394072564),this.days,null),new cljs.core.MapEntry(new cljs.core.Keyword(null,"hours","hours",58380855),this.hours,null),new cljs.core.MapEntry(new cljs.core.Keyword(null,"minutes","minutes",1319166394),this.minutes,null),new cljs.core.MapEntry(new cljs.core.Keyword(null,"seconds","seconds",-445266194),this.seconds,null),new cljs.core.MapEntry(new cljs.core.Keyword(null,"millis","millis",-1338288387),this.millis,null)],null),this.__extmap))}; +cljs_time.core.Period.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return new cljs_time.core.Period(this.years,this.months,this.weeks,this.days,this.hours,this.minutes,this.seconds,this.millis,b,this.__extmap,this.__hash)}; +cljs_time.core.Period.prototype.cljs$core$ICollection$_conj$arity$2=function(a,b){return cljs.core.vector_QMARK_.call(null,b)?this.cljs$core$IAssociative$_assoc$arity$3(null,cljs.core._nth.call(null,b,0),cljs.core._nth.call(null,b,1)):cljs.core.reduce.call(null,cljs.core._conj,this,b)}; +cljs_time.core.Period.getBasis=function(){return new cljs.core.PersistentVector(null,8,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"years","years",341951838,null),new cljs.core.Symbol(null,"months","months",1594959890,null),new cljs.core.Symbol(null,"weeks","weeks",-809839644,null),new cljs.core.Symbol(null,"days","days",246458963,null),new cljs.core.Symbol(null,"hours","hours",1698912382,null),new cljs.core.Symbol(null,"minutes","minutes",-1335269375,null),new cljs.core.Symbol(null, +"seconds","seconds",1195265333,null),new cljs.core.Symbol(null,"millis","millis",302243140,null)],null)};cljs_time.core.Period.cljs$lang$type=!0;cljs_time.core.Period.cljs$lang$ctorPrSeq=function(a){return new cljs.core.List(null,"cljs-time.core/Period",null,1,null)};cljs_time.core.Period.cljs$lang$ctorPrWriter=function(a,b){return cljs.core._write.call(null,b,"cljs-time.core/Period")}; +cljs_time.core.__GT_Period=function(a,b,c,d,e,f,g,h){return new cljs_time.core.Period(a,b,c,d,e,f,g,h,null,null,null)}; +cljs_time.core.map__GT_Period=function(a){var b=cljs.core.dissoc.call(null,a,new cljs.core.Keyword(null,"years","years",-1298579689),new cljs.core.Keyword(null,"months","months",-45571637),new cljs.core.Keyword(null,"weeks","weeks",1844596125),new cljs.core.Keyword(null,"days","days",-1394072564),new cljs.core.Keyword(null,"hours","hours",58380855),new cljs.core.Keyword(null,"minutes","minutes",1319166394),new cljs.core.Keyword(null,"seconds","seconds",-445266194),new cljs.core.Keyword(null,"millis", +"millis",-1338288387));b=cljs.core.record_QMARK_.call(null,a)?cljs.core.into.call(null,cljs.core.PersistentArrayMap.EMPTY,b):b;return new cljs_time.core.Period((new cljs.core.Keyword(null,"years","years",-1298579689)).cljs$core$IFn$_invoke$arity$1(a),(new cljs.core.Keyword(null,"months","months",-45571637)).cljs$core$IFn$_invoke$arity$1(a),(new cljs.core.Keyword(null,"weeks","weeks",1844596125)).cljs$core$IFn$_invoke$arity$1(a),(new cljs.core.Keyword(null,"days","days",-1394072564)).cljs$core$IFn$_invoke$arity$1(a), +(new cljs.core.Keyword(null,"hours","hours",58380855)).cljs$core$IFn$_invoke$arity$1(a),(new cljs.core.Keyword(null,"minutes","minutes",1319166394)).cljs$core$IFn$_invoke$arity$1(a),(new cljs.core.Keyword(null,"seconds","seconds",-445266194)).cljs$core$IFn$_invoke$arity$1(a),(new cljs.core.Keyword(null,"millis","millis",-1338288387)).cljs$core$IFn$_invoke$arity$1(a),null,cljs.core.not_empty.call(null,b),null)}; +cljs_time.core.period=function(a){switch(arguments.length){case 2:return cljs_time.core.period.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:for(var b=[],c=arguments.length,d=0;;)if(db.getTime()};goog.date.UtcDateTime.prototype.cljs_time$core$DateTimeProtocol$before_QMARK_$arity$2=function(a,b){return this.getTime()b.getTime()};goog.date.DateTime.prototype.cljs_time$core$DateTimeProtocol$before_QMARK_$arity$2=function(a,b){return this.getTime()cljs_time.core.compare_local_dates.call(null,this,b)};goog.date.Date.prototype.cljs_time$core$DateTimeProtocol$plus_$arity$2=function(a,b){return cljs_time.core.period_fn.call(null,b).call(null,cljs.core._PLUS_,this)};goog.date.Date.prototype.cljs_time$core$DateTimeProtocol$minus_$arity$2=function(a,b){return cljs_time.core.period_fn.call(null,b).call(null,cljs.core._,this)}; +goog.date.Date.prototype.cljs_time$core$DateTimeProtocol$first_day_of_the_month_$arity$1=function(a){return new goog.date.Date(this.getYear(),this.getMonth(),1)};goog.date.Date.prototype.cljs_time$core$DateTimeProtocol$last_day_of_the_month_$arity$1=function(a){return cljs_time.core.minus_.call(null,new goog.date.Date(this.getYear(),this.getMonth()+1,1),cljs_time.core.period.call(null,new cljs.core.Keyword(null,"days","days",-1394072564),1))}; +goog.date.Date.prototype.cljs_time$core$DateTimeProtocol$week_number_of_year$arity$1=function(a){return goog.date.getWeekNumber(this.getYear(),this.getMonth(),this.getDate())};goog.date.Date.prototype.cljs_time$core$DateTimeProtocol$week_year$arity$1=function(a){return cljs_time.internal.core.get_week_year.call(null,this.getYear(),this.getMonth(),this.getDate())}; +cljs_time.core.utc={id:"UTC",std_offset:0,names:new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,["UTC"],null),transitions:cljs.core.PersistentVector.EMPTY};cljs_time.core.default_ms_fn=function(){return function(){return(new goog.date.UtcDateTime).getTime()}};cljs_time.core.offset_ms_fn=function(a){return function(){return(new goog.date.UtcDateTime).getTime()+a}};cljs_time.core.static_ms_fn=function(a){return function(){return a}};cljs_time.core._STAR_ms_fn_STAR_=cljs_time.core.default_ms_fn.call(null); +cljs_time.core.now=function(){var a=new goog.date.UtcDateTime;a.setTime(cljs_time.core._STAR_ms_fn_STAR_.call(null));return a};cljs_time.core.time_now=function(){var a=new goog.date.DateTime;a.setTime(cljs_time.core._STAR_ms_fn_STAR_.call(null));return a};cljs_time.core.at_midnight=function(a){a=a.clone();a.setHours(0);a.setMinutes(0);a.setSeconds(0);a.setMilliseconds(0);return a};cljs_time.core.today_at_midnight=function(){return cljs_time.core.at_midnight.call(null,cljs_time.core.now.call(null))}; +cljs_time.core.epoch=function(){var a=new goog.date.UtcDateTime;a.setTime(0);return a}; +cljs_time.core.date_midnight=function(a){switch(arguments.length){case 1:return cljs_time.core.date_midnight.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.core.date_midnight.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return cljs_time.core.date_midnight.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.core.date_midnight.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.date_midnight.call(null,a,1,1)};cljs_time.core.date_midnight.cljs$core$IFn$_invoke$arity$2=function(a,b){return cljs_time.core.date_midnight.call(null,a,b,1)};cljs_time.core.date_midnight.cljs$core$IFn$_invoke$arity$3=function(a,b,c){return new goog.date.UtcDateTime(a,b-1,c)};cljs_time.core.date_midnight.cljs$lang$maxFixedArity=3; +cljs_time.core.date_time=function(a){switch(arguments.length){case 1:return cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);case 4:return cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$4(arguments[0],arguments[1],arguments[2],arguments[3]);case 5:return cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$5(arguments[0], +arguments[1],arguments[2],arguments[3],arguments[4]);case 6:return cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$6(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4],arguments[5]);case 7:return cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$7(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4],arguments[5],arguments[6]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.date_time.call(null,a,1,1,0,0,0,0)};cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$2=function(a,b){return cljs_time.core.date_time.call(null,a,b,1,0,0,0,0)};cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$3=function(a,b,c){return cljs_time.core.date_time.call(null,a,b,c,0,0,0,0)}; +cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$4=function(a,b,c,d){return cljs_time.core.date_time.call(null,a,b,c,d,0,0,0)};cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$5=function(a,b,c,d,e){return cljs_time.core.date_time.call(null,a,b,c,d,e,0,0)};cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$6=function(a,b,c,d,e,f){return cljs_time.core.date_time.call(null,a,b,c,d,e,f,0)}; +cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$7=function(a,b,c,d,e,f,g){return new goog.date.UtcDateTime(a,b-1,c,d,e,f,g)};cljs_time.core.date_time.cljs$lang$maxFixedArity=7; +cljs_time.core.local_date_time=function(a){switch(arguments.length){case 1:return cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);case 4:return cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$4(arguments[0],arguments[1],arguments[2],arguments[3]); +case 5:return cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$5(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4]);case 6:return cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$6(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4],arguments[5]);case 7:return cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$7(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4],arguments[5],arguments[6]);default:throw Error(["Invalid arity: ", +cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.local_date_time.call(null,a,1,1,0,0,0,0)};cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$2=function(a,b){return cljs_time.core.local_date_time.call(null,a,b,1,0,0,0,0)};cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$3=function(a,b,c){return cljs_time.core.local_date_time.call(null,a,b,c,0,0,0,0)}; +cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$4=function(a,b,c,d){return cljs_time.core.local_date_time.call(null,a,b,c,d,0,0,0)};cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$5=function(a,b,c,d,e){return cljs_time.core.local_date_time.call(null,a,b,c,d,e,0,0)};cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$6=function(a,b,c,d,e,f){return cljs_time.core.local_date_time.call(null,a,b,c,d,e,f,0)}; +cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$7=function(a,b,c,d,e,f,g){return new goog.date.DateTime(a,b-1,c,d,e,f,g)};cljs_time.core.local_date_time.cljs$lang$maxFixedArity=7;cljs_time.core.local_date=function(a,b,c){return new goog.date.Date(a,b-1,c)};cljs_time.core.today=function(){return new goog.date.Date(new Date(cljs_time.core._STAR_ms_fn_STAR_.call(null)))}; +cljs_time.core.time_zone_for_offset=function(a){switch(arguments.length){case 1:return cljs_time.core.time_zone_for_offset.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.core.time_zone_for_offset.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.core.time_zone_for_offset.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.time_zone_for_offset.call(null,a,null)}; +cljs_time.core.time_zone_for_offset.cljs$core$IFn$_invoke$arity$2=function(a,b){var c=0>a?new cljs.core.Keyword(null,"-","-",-2112348439):new cljs.core.Keyword(null,"+","+",1913524883),d=["UTC%s%02d",cljs.core.truth_(b)?":%02d":null].join("");a=0>a?-1*a:a;d=cljs.core.truth_(b)?cljs_time.internal.core.format.call(null,d,cljs.core.name.call(null,c),a,b):cljs_time.internal.core.format.call(null,d,cljs.core.name.call(null,c),a);return cljs.core.with_meta.call(null,new cljs.core.PersistentArrayMap(null, +4,[new cljs.core.Keyword(null,"id","id",-1388402092),d,new cljs.core.Keyword(null,"offset","offset",296498311),new cljs.core.PersistentVector(null,4,5,cljs.core.PersistentVector.EMPTY_NODE,[c,a,cljs.core.truth_(b)?b:0,0],null),new cljs.core.Keyword(null,"rules","rules",1198912366),"-",new cljs.core.Keyword(null,"names","names",-1943074658),new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[d],null)],null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null, +"type","type",1174270348),new cljs.core.Keyword("cljs-time.core","time-zone","cljs-time.core/time-zone",751963705)],null))};cljs_time.core.time_zone_for_offset.cljs$lang$maxFixedArity=2;cljs_time.core.default_time_zone=function(){var a=new goog.date.DateTime;a.setTime(cljs_time.core._STAR_ms_fn_STAR_.call(null));a=-1*a.getTimezoneOffset()/60;return cljs_time.core.time_zone_for_offset.call(null,a|0,cljs.core.mod.call(null,a,1))};cljs_time.core.to_default_time_zone=function(a){return new goog.date.DateTime(a)}; +cljs_time.core.to_utc_time_zone=function(a){return goog.date.UtcDateTime.fromTimestamp(a.getTime())};cljs_time.core.from_default_time_zone=function(a){return new goog.date.DateTime(a.getYear(),a.getMonth(),a.getDate(),a.getHours(),a.getMinutes(),a.getSeconds(),a.getMilliseconds())}; +cljs_time.core.from_utc_time_zone=function(a){var b=a.getYear(),c=a.getMonth(),d=a.getDate();return cljs.core.truth_(cljs_time.core._EQ_.call(null,goog.date.Date,cljs.core.type.call(null,a)))?new goog.date.UtcDateTime(b,c,d):new goog.date.UtcDateTime(b,c,d,a.getHours(),a.getMinutes(),a.getSeconds(),a.getMilliseconds())}; +cljs_time.core.years=function(a){switch(arguments.length){case 0:return cljs_time.core.years.cljs$core$IFn$_invoke$arity$0();case 1:return cljs_time.core.years.cljs$core$IFn$_invoke$arity$1(arguments[0]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};cljs_time.core.years.cljs$core$IFn$_invoke$arity$0=function(){return cljs_time.core.years.call(null,null)}; +cljs_time.core.years.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.period.call(null,new cljs.core.Keyword(null,"years","years",-1298579689),a)};cljs_time.core.years.cljs$lang$maxFixedArity=1; +cljs_time.core.months=function(a){switch(arguments.length){case 0:return cljs_time.core.months.cljs$core$IFn$_invoke$arity$0();case 1:return cljs_time.core.months.cljs$core$IFn$_invoke$arity$1(arguments[0]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};cljs_time.core.months.cljs$core$IFn$_invoke$arity$0=function(){return cljs_time.core.months.call(null,null)}; +cljs_time.core.months.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.period.call(null,new cljs.core.Keyword(null,"months","months",-45571637),a)};cljs_time.core.months.cljs$lang$maxFixedArity=1; +cljs_time.core.weeks=function(a){switch(arguments.length){case 0:return cljs_time.core.weeks.cljs$core$IFn$_invoke$arity$0();case 1:return cljs_time.core.weeks.cljs$core$IFn$_invoke$arity$1(arguments[0]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};cljs_time.core.weeks.cljs$core$IFn$_invoke$arity$0=function(){return cljs_time.core.weeks.call(null,null)}; +cljs_time.core.weeks.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.period.call(null,new cljs.core.Keyword(null,"weeks","weeks",1844596125),a)};cljs_time.core.weeks.cljs$lang$maxFixedArity=1; +cljs_time.core.days=function(a){switch(arguments.length){case 0:return cljs_time.core.days.cljs$core$IFn$_invoke$arity$0();case 1:return cljs_time.core.days.cljs$core$IFn$_invoke$arity$1(arguments[0]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};cljs_time.core.days.cljs$core$IFn$_invoke$arity$0=function(){return cljs_time.core.days.call(null,null)}; +cljs_time.core.days.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.period.call(null,new cljs.core.Keyword(null,"days","days",-1394072564),a)};cljs_time.core.days.cljs$lang$maxFixedArity=1; +cljs_time.core.hours=function(a){switch(arguments.length){case 0:return cljs_time.core.hours.cljs$core$IFn$_invoke$arity$0();case 1:return cljs_time.core.hours.cljs$core$IFn$_invoke$arity$1(arguments[0]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};cljs_time.core.hours.cljs$core$IFn$_invoke$arity$0=function(){return cljs_time.core.hours.call(null,null)}; +cljs_time.core.hours.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.period.call(null,new cljs.core.Keyword(null,"hours","hours",58380855),a)};cljs_time.core.hours.cljs$lang$maxFixedArity=1; +cljs_time.core.minutes=function(a){switch(arguments.length){case 0:return cljs_time.core.minutes.cljs$core$IFn$_invoke$arity$0();case 1:return cljs_time.core.minutes.cljs$core$IFn$_invoke$arity$1(arguments[0]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};cljs_time.core.minutes.cljs$core$IFn$_invoke$arity$0=function(){return cljs_time.core.minutes.call(null,null)}; +cljs_time.core.minutes.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.period.call(null,new cljs.core.Keyword(null,"minutes","minutes",1319166394),a)};cljs_time.core.minutes.cljs$lang$maxFixedArity=1; +cljs_time.core.seconds=function(a){switch(arguments.length){case 0:return cljs_time.core.seconds.cljs$core$IFn$_invoke$arity$0();case 1:return cljs_time.core.seconds.cljs$core$IFn$_invoke$arity$1(arguments[0]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};cljs_time.core.seconds.cljs$core$IFn$_invoke$arity$0=function(){return cljs_time.core.seconds.call(null,null)}; +cljs_time.core.seconds.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.period.call(null,new cljs.core.Keyword(null,"seconds","seconds",-445266194),a)};cljs_time.core.seconds.cljs$lang$maxFixedArity=1; +cljs_time.core.millis=function(a){switch(arguments.length){case 0:return cljs_time.core.millis.cljs$core$IFn$_invoke$arity$0();case 1:return cljs_time.core.millis.cljs$core$IFn$_invoke$arity$1(arguments[0]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};cljs_time.core.millis.cljs$core$IFn$_invoke$arity$0=function(){return cljs_time.core.millis.call(null,null)}; +cljs_time.core.millis.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.period.call(null,new cljs.core.Keyword(null,"millis","millis",-1338288387),a)};cljs_time.core.millis.cljs$lang$maxFixedArity=1; +cljs_time.core.plus=function(a){switch(arguments.length){case 2:return cljs_time.core.plus.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:for(var b=[],c=arguments.length,d=0;;)if(d=b?new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[parseInt(cljs.core.apply.call(null,cljs.core.str,cljs.core.take.call(null,c,a))),cljs.core.concat.call(null,cljs.core.drop.call(null, +c,a),d)],null):new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[parseInt(cljs.core.apply.call(null,cljs.core.str,a)),d],null)};cljs_time.internal.parse.parse_number.cljs$lang$maxFixedArity=3; +cljs_time.internal.parse.parse_period=function(a){switch(arguments.length){case 3:return cljs_time.internal.parse.parse_period.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);case 4:return cljs_time.internal.parse.parse_period.cljs$core$IFn$_invoke$arity$4(arguments[0],arguments[1],arguments[2],arguments[3]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_period.cljs$core$IFn$_invoke$arity$3=function(a,b,c){return cljs_time.internal.parse.parse_period.call(null,a,b,1,c)}; +cljs_time.internal.parse.parse_period.cljs$core$IFn$_invoke$arity$4=function(a,b,c,d){c=cljs_time.internal.parse.parse_number.call(null,a,c,d);a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[b,a],null),c],null)};cljs_time.internal.parse.parse_period.cljs$lang$maxFixedArity=4; +cljs_time.internal.parse.parse_year=function(a){switch(arguments.length){case 1:return cljs_time.internal.parse.parse_year.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.internal.parse.parse_year.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_year.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.internal.parse.parse_year.call(null,1,a)};cljs_time.internal.parse.parse_year.cljs$core$IFn$_invoke$arity$2=function(a,b){return function(c){return cljs_time.internal.parse.parse_period.call(null,c,new cljs.core.Keyword(null,"years","years",-1298579689),a,b)}};cljs_time.internal.parse.parse_year.cljs$lang$maxFixedArity=2; +cljs_time.internal.parse.parse_weekyear=function(a){switch(arguments.length){case 1:return cljs_time.internal.parse.parse_weekyear.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.internal.parse.parse_weekyear.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_weekyear.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.internal.parse.parse_year.call(null,1,a)};cljs_time.internal.parse.parse_weekyear.cljs$core$IFn$_invoke$arity$2=function(a,b){return function(c){return cljs_time.internal.parse.parse_period.call(null,c,new cljs.core.Keyword(null,"weekyear","weekyear",-74064500),a,b)}};cljs_time.internal.parse.parse_weekyear.cljs$lang$maxFixedArity=2; +cljs_time.internal.parse.parse_weekyear_week=function(a){switch(arguments.length){case 1:return cljs_time.internal.parse.parse_weekyear_week.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.internal.parse.parse_weekyear_week.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_weekyear_week.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.internal.parse.parse_year.call(null,1,a)};cljs_time.internal.parse.parse_weekyear_week.cljs$core$IFn$_invoke$arity$2=function(a,b){return function(c){return cljs_time.internal.parse.parse_period.call(null,c,new cljs.core.Keyword(null,"weekyear-week","weekyear-week",795291571),a,b)}};cljs_time.internal.parse.parse_weekyear_week.cljs$lang$maxFixedArity=2; +cljs_time.internal.parse.parse_month=function(a){switch(arguments.length){case 1:return cljs_time.internal.parse.parse_month.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.internal.parse.parse_month.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_month.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.internal.parse.parse_month.call(null,1,a)};cljs_time.internal.parse.parse_month.cljs$core$IFn$_invoke$arity$2=function(a,b){return function(c){return cljs_time.internal.parse.parse_period.call(null,c,new cljs.core.Keyword(null,"months","months",-45571637),a,b)}};cljs_time.internal.parse.parse_month.cljs$lang$maxFixedArity=2; +cljs_time.internal.parse.parse_day=function(a){switch(arguments.length){case 1:return cljs_time.internal.parse.parse_day.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.internal.parse.parse_day.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_day.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.internal.parse.parse_day.call(null,1,a)};cljs_time.internal.parse.parse_day.cljs$core$IFn$_invoke$arity$2=function(a,b){return function(c){return cljs_time.internal.parse.parse_period.call(null,c,new cljs.core.Keyword(null,"days","days",-1394072564),a,b)}};cljs_time.internal.parse.parse_day.cljs$lang$maxFixedArity=2; +cljs_time.internal.parse.parse_day_of_week=function(a){switch(arguments.length){case 1:return cljs_time.internal.parse.parse_day_of_week.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.internal.parse.parse_day_of_week.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_day_of_week.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.internal.parse.parse_day.call(null,1,a)};cljs_time.internal.parse.parse_day_of_week.cljs$core$IFn$_invoke$arity$2=function(a,b){return function(c){return cljs_time.internal.parse.parse_period.call(null,c,new cljs.core.Keyword(null,"day-of-week","day-of-week",1639326729),a,b)}};cljs_time.internal.parse.parse_day_of_week.cljs$lang$maxFixedArity=2; +cljs_time.internal.parse.parse_hours=function(a){switch(arguments.length){case 1:return cljs_time.internal.parse.parse_hours.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.internal.parse.parse_hours.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_hours.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.internal.parse.parse_hours.call(null,1,a)};cljs_time.internal.parse.parse_hours.cljs$core$IFn$_invoke$arity$2=function(a,b){return function(c){return cljs_time.internal.parse.parse_period.call(null,c,new cljs.core.Keyword(null,"hours","hours",58380855),a,b)}};cljs_time.internal.parse.parse_hours.cljs$lang$maxFixedArity=2; +cljs_time.internal.parse.parse_HOURS=function(a){switch(arguments.length){case 1:return cljs_time.internal.parse.parse_HOURS.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.internal.parse.parse_HOURS.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_HOURS.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.internal.parse.parse_HOURS.call(null,1,a)};cljs_time.internal.parse.parse_HOURS.cljs$core$IFn$_invoke$arity$2=function(a,b){return function(c){return cljs_time.internal.parse.parse_period.call(null,c,new cljs.core.Keyword(null,"HOURS","HOURS",-1611068963),a,b)}};cljs_time.internal.parse.parse_HOURS.cljs$lang$maxFixedArity=2; +cljs_time.internal.parse.parse_minutes=function(a){switch(arguments.length){case 1:return cljs_time.internal.parse.parse_minutes.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.internal.parse.parse_minutes.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_minutes.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.internal.parse.parse_minutes.call(null,1,a)};cljs_time.internal.parse.parse_minutes.cljs$core$IFn$_invoke$arity$2=function(a,b){return function(c){return cljs_time.internal.parse.parse_period.call(null,c,new cljs.core.Keyword(null,"minutes","minutes",1319166394),a,b)}};cljs_time.internal.parse.parse_minutes.cljs$lang$maxFixedArity=2; +cljs_time.internal.parse.parse_seconds=function(a){switch(arguments.length){case 1:return cljs_time.internal.parse.parse_seconds.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.internal.parse.parse_seconds.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_seconds.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.internal.parse.parse_seconds.call(null,1,a)};cljs_time.internal.parse.parse_seconds.cljs$core$IFn$_invoke$arity$2=function(a,b){return function(c){return cljs_time.internal.parse.parse_period.call(null,c,new cljs.core.Keyword(null,"seconds","seconds",-445266194),a,b)}};cljs_time.internal.parse.parse_seconds.cljs$lang$maxFixedArity=2; +cljs_time.internal.parse.parse_millis=function(a){switch(arguments.length){case 1:return cljs_time.internal.parse.parse_millis.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.internal.parse.parse_millis.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_millis.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.internal.parse.parse_millis.call(null,1,a)};cljs_time.internal.parse.parse_millis.cljs$core$IFn$_invoke$arity$2=function(a,b){return function(c){return cljs_time.internal.parse.parse_period.call(null,c,new cljs.core.Keyword(null,"millis","millis",-1338288387),a,b)}};cljs_time.internal.parse.parse_millis.cljs$lang$maxFixedArity=2; +cljs_time.internal.parse.timezone_adj=function(a,b,c){b=parseInt(b,10);c=parseInt(c,10);c=60*b+c;a=cljs.core._EQ_.call(null,a,"+")?cljs.core._:cljs.core._PLUS_;return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"timezone","timezone",1831928099),new goog.date.Interval(goog.date.Interval.MINUTES,a.call(null,c))],null)}; +cljs_time.internal.parse.parse_timezone=function(a){return function(b){var c=cljs.core.seq.call(null,b),d=cljs.core.first.call(null,c),e=cljs.core.next.call(null,c);c=function(){return cljs.core.ex_info.call(null,["Invalid timezone format: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(b)].join(""),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"parse-error","parse-error",255902478)],null))};var f=function(a){var b=clojure.string.join.call(null, +cljs.core.take.call(null,4,e)),c=cljs.core.re_find.call(null,/^(\d{2})(\d{2})/,b);return cljs.core.truth_(c)?(cljs.core.nth.call(null,c,0,null),b=cljs.core.nth.call(null,c,1,null),c=cljs.core.nth.call(null,c,2,null),new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[cljs_time.internal.parse.timezone_adj.call(null,a,b,c),cljs.core.drop.call(null,4,e)],null)):null},g=function(a){var b=clojure.string.join.call(null,cljs.core.take.call(null,5,e)),c=cljs.core.re_find.call(null, +/^(\d{2}):(\d{2})/,b);return cljs.core.truth_(c)?(cljs.core.nth.call(null,c,0,null),b=cljs.core.nth.call(null,c,1,null),c=cljs.core.nth.call(null,c,2,null),new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[cljs_time.internal.parse.timezone_adj.call(null,a,b,c),cljs.core.drop.call(null,5,e)],null)):null};if(cljs.core.truth_((new cljs.core.PersistentHashSet(null,new cljs.core.PersistentArrayMap(null,2,["+",null,"-",null],null),null)).call(null,d))){var h=a instanceof cljs.core.Keyword? +a.fqn:null;switch(h){case "dddd":f=f.call(null,d);if(cljs.core.truth_(f))return f;d=g.call(null,d);if(cljs.core.truth_(d))return d;throw c.call(null);case "long":f=f.call(null,d);if(cljs.core.truth_(f))return f;d=g.call(null,d);if(cljs.core.truth_(d))return d;throw c.call(null);default:throw Error(["No matching clause: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(h)].join(""));}}else{if(cljs.core._EQ_.call(null,d,"Z"))return new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE, +[new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"timezone","timezone",1831928099),cljs_time.internal.parse.timezone_adj.call(null,cljs.core._PLUS_,"0","0")],null)],null);switch(a instanceof cljs.core.Keyword?a.fqn:null){case "abbr":d=cljs.core.take.call(null,3,b);d=cljs_time.internal.parse.read_while.call(null,function(a){return cljs.core.re_find.call(null,/[A-Z]/,a)},d);g=cljs.core.nth.call(null,d,0,null);cljs.core.nth.call(null,d,1,null); +if(cljs.core._EQ_.call(null,cljs.core.count.call(null,g),3))return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"timezone","timezone",1831928099),clojure.string.join.call(null,g)],null),cljs.core.drop.call(null,3,b)],null);throw c.call(null);case "full":throw cljs.core.ex_info.call(null,["Cannot parse long form timezone:",cljs.core.str.cljs$core$IFn$_invoke$arity$1(b)].join(""), +new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"parse-error","parse-error",255902478)],null));default:throw c.call(null);}}}}; +cljs_time.internal.parse.parse_meridiem=function(){return function(a){var b=cljs.core.split_at.call(null,2,a),c=cljs.core.nth.call(null,b,0,null);a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);b=cljs.core.nth.call(null,b,1,null);var d=[cljs.core.str.cljs$core$IFn$_invoke$arity$1(a),cljs.core.str.cljs$core$IFn$_invoke$arity$1(c)].join("");if(cljs.core.truth_((new cljs.core.PersistentHashSet(null,new cljs.core.PersistentArrayMap(null,4,["AM",null,"am",null,"pm",null,"PM",null], +null),null)).call(null,d)))a=new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[d,b],null);else if(cljs.core.truth_((new cljs.core.PersistentHashSet(null,new cljs.core.PersistentArrayMap(null,2,["a",null,"p",null],null),null)).call(null,a)))a=new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[(new cljs.core.PersistentArrayMap(null,2,["a","am","p","pm"],null)).call(null,a),cljs.core.cons.call(null,c,b)],null);else if(cljs.core.truth_((new cljs.core.PersistentHashSet(null, +new cljs.core.PersistentArrayMap(null,2,["A",null,"P",null],null),null)).call(null,a)))a=new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[(new cljs.core.PersistentArrayMap(null,2,["A","am","P","pm"],null)).call(null,a),cljs.core.cons.call(null,c,b)],null);else throw cljs.core.ex_info.call(null,["Invalid meridiem format: ",d].join(""),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"parse-error", +"parse-error",255902478)],null));c=a;a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"meridiem","meridiem",1668960617),cljs.core.keyword.call(null,a)],null),clojure.string.join.call(null,c)],null)}}; +cljs_time.internal.parse.parse_period_name=function(a,b,c,d){c=cljs.core.concat.call(null,c,cljs.core.map.call(null,function(a){return cljs.core.subs.call(null,a,0,3)},c));var e=cljs.core.first.call(null,cljs.core.remove.call(null,cljs.core.comp.call(null,cljs.core.partial.call(null,cljs.core._EQ_,a),cljs.core.second),cljs.core.map.call(null,function(b){return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[b,cljs_time.internal.parse.replace.call(null,a,cljs.core.re_pattern.call(null, +["^",cljs.core.str.cljs$core$IFn$_invoke$arity$1(b)].join("")),"")],null)},c)));d=cljs.core.nth.call(null,e,0,null);e=cljs.core.nth.call(null,e,1,null);if(cljs.core.truth_(d))return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[b,cljs.core.mod.call(null,cljs_time.internal.core.index_of.call(null,c,d),12)],null),e],null);throw cljs.core.ex_info.call(null,["Could not parse ",cljs.core.name.call(null, +b)," name"].join(""),new cljs.core.PersistentArrayMap(null,4,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"parse-error","parse-error",255902478),new cljs.core.Keyword(null,"sub-type","sub-type",-997954412),new cljs.core.Keyword(null,"period-match-erroro","period-match-erroro",1058444722),new cljs.core.Keyword(null,"period","period",-352129191),b,new cljs.core.Keyword(null,"in","in",-1531184865),e],null));}; +cljs_time.internal.parse.parse_month_name=function(a){return function(b){return cljs.core.update_in.call(null,cljs_time.internal.parse.parse_period_name.call(null,b,new cljs.core.Keyword(null,"months","months",-45571637),cljs_time.internal.core.months,a),new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[0,1],null),cljs.core.inc)}}; +cljs_time.internal.parse.parse_day_name=function(a){return function(b){return cljs_time.internal.parse.parse_period_name.call(null,b,new cljs.core.Keyword(null,"days","days",-1394072564),cljs_time.internal.core.days,a)}}; +cljs_time.internal.parse.parse_quoted=function(a){var b=cljs.core.re_pattern.call(null,cljs.core.apply.call(null,cljs.core.str,"^",a));return function(c){c=clojure.string.join.call(null,c);var d=cljs_time.internal.parse.replace.call(null,c,b,"");if(cljs.core._EQ_.call(null,c,d))throw cljs.core.ex_info.call(null,"Quoted text not found",new cljs.core.PersistentArrayMap(null,2,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"parse-error","parse-error",255902478),new cljs.core.Keyword(null, +"where","where",-2044795965),new cljs.core.Keyword(null,"parse-quoted","parse-quoted",1180570118)],null));return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"quoted","quoted",2117344952),a],null),d],null)}}; +cljs_time.internal.parse.parse_ordinal_suffix=function(){return function(a){var b=cljs_time.internal.parse.parse_match.call(null,a,new cljs.core.Keyword(null,"ordinal-suffix","ordinal-suffix",-1311843199),"st");if(cljs.core.truth_(b))return b;b=cljs_time.internal.parse.parse_match.call(null,a,new cljs.core.Keyword(null,"ordinal-suffix","ordinal-suffix",-1311843199),"nd");if(cljs.core.truth_(b))return b;b=cljs_time.internal.parse.parse_match.call(null,a,new cljs.core.Keyword(null,"ordinal-suffix", +"ordinal-suffix",-1311843199),"rd");return cljs.core.truth_(b)?b:cljs_time.internal.parse.parse_match.call(null,a,new cljs.core.Keyword(null,"ordinal-suffix","ordinal-suffix",-1311843199),"th")}}; +cljs_time.internal.parse.lookup=function(a){var b=cljs.core.nth.call(null,a,0,null);a=cljs.core.nth.call(null,a,1,null);if(cljs.core._EQ_.call(null,b,new cljs.core.Keyword(null,"token","token",-1211463215)))switch(a){case "S":return cljs_time.internal.parse.parse_millis.call(null,1,2);case "SSS":return cljs_time.internal.parse.parse_millis.call(null,3,3);case "s":return cljs_time.internal.parse.parse_seconds.call(null,1,2);case "ss":return cljs_time.internal.parse.parse_seconds.call(null,2,2);case "m":return cljs_time.internal.parse.parse_minutes.call(null, +1,2);case "mm":return cljs_time.internal.parse.parse_minutes.call(null,2,2);case "h":return cljs_time.internal.parse.parse_hours.call(null,1,2);case "hh":return cljs_time.internal.parse.parse_hours.call(null,2,2);case "H":return cljs_time.internal.parse.parse_HOURS.call(null,1,2);case "HH":return cljs_time.internal.parse.parse_HOURS.call(null,2,2);case "d":return cljs_time.internal.parse.parse_day.call(null,1,2);case "dd":return cljs_time.internal.parse.parse_day.call(null,2,2);case "D":return cljs_time.internal.parse.parse_day.call(null, +1,3);case "DD":return cljs_time.internal.parse.parse_day.call(null,2,3);case "DDD":return cljs_time.internal.parse.parse_day.call(null,3,3);case "M":return cljs_time.internal.parse.parse_month.call(null,1,2);case "MM":return cljs_time.internal.parse.parse_month.call(null,1,2);case "MMM":return cljs_time.internal.parse.parse_month_name.call(null,!0);case "MMMM":return cljs_time.internal.parse.parse_month_name.call(null,!1);case "y":return cljs_time.internal.parse.parse_year.call(null,1,4);case "yy":return cljs_time.internal.parse.parse_year.call(null, +2,2);case "yyyy":return cljs_time.internal.parse.parse_year.call(null,4,4);case "Y":return cljs_time.internal.parse.parse_year.call(null,1,4);case "YY":return cljs_time.internal.parse.parse_year.call(null,2,2);case "YYYY":return cljs_time.internal.parse.parse_year.call(null,4,4);case "x":return cljs_time.internal.parse.parse_weekyear.call(null,1,4);case "xx":return cljs_time.internal.parse.parse_weekyear.call(null,2,2);case "xxxx":return cljs_time.internal.parse.parse_weekyear.call(null,4,4);case "w":return cljs_time.internal.parse.parse_weekyear_week.call(null, +1,2);case "ww":return cljs_time.internal.parse.parse_weekyear_week.call(null,2,2);case "E":return cljs_time.internal.parse.parse_day_name.call(null,!0);case "EEE":return cljs_time.internal.parse.parse_day_name.call(null,!0);case "EEEE":return cljs_time.internal.parse.parse_day_name.call(null,!1);case "e":return cljs_time.internal.parse.parse_day_of_week.call(null,1,2);case "a":return cljs_time.internal.parse.parse_meridiem.call(null);case "A":return cljs_time.internal.parse.parse_meridiem.call(null); +case "Z":return cljs_time.internal.parse.parse_timezone.call(null,new cljs.core.Keyword(null,"dddd","dddd",217016228));case "ZZ":return cljs_time.internal.parse.parse_timezone.call(null,new cljs.core.Keyword(null,"long","long",-171452093));case "ZZZ":return cljs_time.internal.parse.parse_timezone.call(null,new cljs.core.Keyword(null,"abbr","abbr",2088591884));case "ZZZZ":return cljs_time.internal.parse.parse_timezone.call(null,new cljs.core.Keyword(null,"abbr","abbr",2088591884));case "z":return cljs_time.internal.parse.parse_timezone.call(null, +new cljs.core.Keyword(null,"abbr","abbr",2088591884));case "zz":return cljs_time.internal.parse.parse_timezone.call(null,new cljs.core.Keyword(null,"abbr","abbr",2088591884));case "zzz":return cljs_time.internal.parse.parse_timezone.call(null,new cljs.core.Keyword(null,"abbr","abbr",2088591884));case "zzzz":return cljs_time.internal.parse.parse_timezone.call(null,new cljs.core.Keyword(null,"full","full",436801220));case "o":return cljs_time.internal.parse.parse_ordinal_suffix.call(null);default:throw cljs.core.ex_info.call(null, +["Illegal pattern component: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(a)].join(""),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"illegal-pattern","illegal-pattern",-1810990520)],null));}else return cljs_time.internal.parse.parse_quoted.call(null,a)}; +cljs_time.internal.parse.parse=function(a,b){a=cljs.core.map.call(null,cljs_time.internal.parse.lookup,cljs_time.internal.parse.read_pattern.call(null,a));for(var c=cljs.core.seq.call(null,a),d=cljs.core.first.call(null,c),e=cljs.core.next.call(null,c),f=cljs.core.PersistentVector.EMPTY,g=b,h=a,k=f;;){var l=g,m=h,n=cljs.core.seq.call(null,m),p=cljs.core.first.call(null,n),q=cljs.core.next.call(null,n),r=p,t=q,u=k;g=function(a,c,d,e,f,g,h,k,l,m,n,p,q,r,u,t,J,L,M,N){return function(){return cljs.core.ex_info.call(null, +["Invalid format: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(b)," is malformed at ",cljs.core.pr_str.call(null,e)].join(""),new cljs.core.PersistentArrayMap(null,2,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"parse-error","parse-error",255902478),new cljs.core.Keyword(null,"sub-type","sub-type",-997954412),new cljs.core.Keyword(null,"invalid-format","invalid-format",-72676108)],null))}}(g,h,k,l,m,n,p,q,r,t,u,b,a,a,c,d,e,d,e,f);if(cljs.core.seq.call(null,l)){if(null== +r)throw g.call(null);h=r.call(null,l);g=cljs.core.nth.call(null,h,0,null);h=cljs.core.nth.call(null,h,1,null);u=cljs.core.conj.call(null,u,g);g=h;h=t;k=u}else{if(cljs.core.truth_(r))throw g.call(null);return u}}};cljs_time.internal.parse.infer_years=function(a,b){var c=(new goog.date.Date).getYear(),d=c-30;c-=cljs.core.mod.call(null,c,100);a=cljs.core.truth_(a)?a:cljs.core.truth_(b)?b:0;return ad?"AM":"PM":12>d?"am":"pm";return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[[cljs.core.str.cljs$core$IFn$_invoke$arity$1(b),d].join(""),c],null)}}; +cljs_time.internal.unparse.unparse_timezone=function(){return function(a,b){return b instanceof goog.date.UtcDateTime?new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[[cljs.core.str.cljs$core$IFn$_invoke$arity$1(a),cljs.core.str.cljs$core$IFn$_invoke$arity$1(b.getTimezoneOffsetString())].join(""),b],null):new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[a,b],null)}}; +cljs_time.internal.unparse.unparse_ordinal_suffix=function(a){return function(b,c){var d=a.call(null,c),e=function(){switch(d){case 1:return"st";case 2:return"nd";case 3:return"rd";case 21:return"st";case 22:return"nd";case 23:return"rd";case 31:return"st";default:return"th"}}();return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[[cljs.core.str.cljs$core$IFn$_invoke$arity$1(b),cljs.core.str.cljs$core$IFn$_invoke$arity$1(e)].join(""),c],null)}}; +cljs_time.internal.unparse.lookup=function(a){var b=cljs.core.nth.call(null,a,0,null);a=cljs.core.nth.call(null,a,1,null);if(cljs.core._EQ_.call(null,b,new cljs.core.Keyword(null,"token","token",-1211463215)))switch(a){case "S":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"millis","millis",-1338288387),1,2],null);case "SSS":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null, +"millis","millis",-1338288387),3,3],null);case "s":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"seconds","seconds",-445266194),1,2],null);case "ss":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"seconds","seconds",-445266194),2,2],null);case "m":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"minutes", +"minutes",1319166394),1,2],null);case "mm":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"minutes","minutes",1319166394),2,2],null);case "h":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"hours","hours",58380855),1,2],null);case "hh":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"hours","hours",58380855), +2,2],null);case "H":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"HOURS","HOURS",-1611068963),1,2],null);case "HH":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"HOURS","HOURS",-1611068963),2,2],null);case "d":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"day","day",-274800446),1,2],null);case "dd":return new cljs.core.PersistentVector(null, +3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"day","day",-274800446),2,2],null);case "D":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"day","day",-274800446),1,3],null);case "DD":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"day","day",-274800446),2,3],null);case "DDD":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE, +[new cljs.core.Keyword(null,"day","day",-274800446),3,3],null);case "M":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"month","month",-1960248533),1,2],null);case "MM":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"month","month",-1960248533),2,2],null);case "MMM":return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null, +"month-name","month-name",-605509534),!0],null);case "MMMM":return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"month-name","month-name",-605509534),!1],null);case "y":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"year","year",335913393),1,4],null);case "yy":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null, +"year","year",335913393),2,2],null);case "yyyy":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"year","year",335913393),4,4],null);case "Y":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"year","year",335913393),1,4],null);case "YY":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"year","year",335913393), +2,2],null);case "YYYY":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"year","year",335913393),4,4],null);case "x":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"weekyear","weekyear",-74064500),1,4],null);case "xx":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"weekyear","weekyear",-74064500),2,2],null); +case "xxxx":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"weekyear","weekyear",-74064500),4,4],null);case "w":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"weekyear-week","weekyear-week",795291571),1,2],null);case "ww":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"weekyear-week","weekyear-week",795291571), +2,2],null);case "e":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"day-of-week","day-of-week",1639326729),1,1],null);case "E":return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"day-name","day-name",1806125744),!0],null);case "EEE":return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"day-name","day-name",1806125744), +!0],null);case "EEEE":return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"day-name","day-name",1806125744),!1],null);case "a":return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"meridiem","meridiem",1668960617),!1],null);case "A":return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"meridiem","meridiem",1668960617), +!0],null);case "Z":return new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"timezone","timezone",1831928099)],null);case "ZZ":return new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"timezone","timezone",1831928099)],null);case "o":return new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"ordinal-suffix","ordinal-suffix",-1311843199)], +null);default:throw Error(["No matching clause: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(a)].join(""));}else return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"quoted","quoted",2117344952),a],null)}; +cljs_time.internal.unparse.lookup_getter=function(a){a=a instanceof cljs.core.Keyword?a.fqn:null;switch(a){case "millis":return function(a){return a.getMilliseconds()};case "seconds":return function(a){return a.getSeconds()};case "minutes":return function(a){return a.getMinutes()};case "hours":return function(a){return a.getHours()};case "HOURS":return function(a){return a.getHours()};case "day":return function(a){return a.getDate()};case "month":return function(a){return a.getMonth()};case "year":return function(a){return a.getYear()}; +default:throw Error(["No matching clause: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(a)].join(""));}}; +cljs_time.internal.unparse.lookup_fn=function(a,b,c){var d=cljs.core.seq.call(null,c);c=cljs.core.first.call(null,d);d=cljs.core.next.call(null,d);c=c instanceof cljs.core.Keyword?c.fqn:null;switch(c){case "millis":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_millis,d);case "seconds":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_seconds,d);case "minutes":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_minutes,d);case "hours":return cljs.core.apply.call(null, +cljs_time.internal.unparse.unparse_hours,d);case "HOURS":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_HOURS,d);case "day":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_day,d);case "month":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_month,d);case "month-name":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_month_name,d);case "year":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_year,d);case "weekyear":return cljs.core.apply.call(null, +cljs_time.internal.unparse.unparse_weekyear,d);case "weekyear-week":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_weekyear_week,d);case "day-name":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_day_name,d);case "day-of-week":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_day_of_week,d);case "meridiem":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_meridiem,d);case "timezone":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_timezone, +d);case "ordinal-suffix":return a=a.call(null,b-1),a=cljs.core.nth.call(null,a,0,null),cljs_time.internal.unparse.unparse_ordinal_suffix.call(null,cljs_time.internal.unparse.lookup_getter.call(null,a));case "quoted":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_quoted,d);default:throw Error(["No matching clause: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(c)].join(""));}}; +cljs_time.internal.unparse.unparse=function(a,b){a=cljs.core.mapv.call(null,cljs_time.internal.unparse.lookup,cljs_time.internal.parse.read_pattern.call(null,a));var c=cljs.core.map_indexed.call(null,cljs.core.partial.call(null,cljs_time.internal.unparse.lookup_fn,a),a);a=cljs.core.seq.call(null,c);cljs.core.first.call(null,a);cljs.core.next.call(null,a);a=b;for(b="";;){var d=cljs.core.seq.call(null,c);c=cljs.core.first.call(null,d);var e=cljs.core.next.call(null,d);d=c;c=e;if(null==d)return b;a= +d.call(null,b,a);b=cljs.core.nth.call(null,a,0,null);a=cljs.core.nth.call(null,a,1,null)}};goog.i18n.currency={};goog.i18n.currency.PRECISION_MASK_=7;goog.i18n.currency.POSITION_FLAG_=16;goog.i18n.currency.SPACE_FLAG_=32;goog.i18n.currency.tier2Enabled_=!1;goog.i18n.currency.isAvailable=function(a){return a in goog.i18n.currency.CurrencyInfo};goog.i18n.currency.addTier2Support=function(){if(!goog.i18n.currency.tier2Enabled_){for(const a in goog.i18n.currency.CurrencyInfoTier2)goog.i18n.currency.CurrencyInfo[a]=goog.i18n.currency.CurrencyInfoTier2[a];goog.i18n.currency.tier2Enabled_=!0}}; +goog.i18n.currency.getGlobalCurrencyPattern=function(a){const b=goog.i18n.currency.CurrencyInfo[a],c=b[0];return a==b[1]?goog.i18n.currency.getCurrencyPattern_(c,b[1]):a+" "+goog.i18n.currency.getCurrencyPattern_(c,b[1])};goog.i18n.currency.getGlobalCurrencySign=function(a){const b=goog.i18n.currency.CurrencyInfo[a];return a==b[1]?a:a+" "+b[1]};goog.i18n.currency.getGlobalCurrencySignWithFallback=function(a){var b=goog.i18n.currency.CurrencyInfo[a];return b?a==b[1]?a:a+" "+b[1]:a}; +goog.i18n.currency.getLocalCurrencyPattern=function(a){a=goog.i18n.currency.CurrencyInfo[a];return goog.i18n.currency.getCurrencyPattern_(a[0],a[1])};goog.i18n.currency.getLocalCurrencySign=function(a){return goog.i18n.currency.CurrencyInfo[a][1]};goog.i18n.currency.getLocalCurrencySignWithFallback=function(a){return a in goog.i18n.currency.CurrencyInfo?goog.i18n.currency.CurrencyInfo[a][1]:a}; +goog.i18n.currency.getPortableCurrencyPattern=function(a){a=goog.i18n.currency.CurrencyInfo[a];return goog.i18n.currency.getCurrencyPattern_(a[0],a[2])};goog.i18n.currency.getPortableCurrencySign=function(a){return goog.i18n.currency.CurrencyInfo[a][2]};goog.i18n.currency.isValid=function(a){if(!a||3!==a.length)return!1;for(let b=0;3>b;b++){const c=a[b];if("A">c||"Z"c||"z"this.negativePrefix_.length?d=!1:this.positivePrefix_.length=p)m+=p,e=!0;else if(n==h.charAt(0)){if(c|| +d)break;m+=".";c=!0}else if(n==k.charAt(0)&&(" "!=k.charAt(0)||b[0]+1a||0==a&&0>1/a;b.push(d?this.negativePrefix_:this.positivePrefix_);isFinite(a)?(a=a*(d?-1:1)*this.multiplier_,this.useExponentialNotation_?this.subformatExponential_(a,b):this.subformatFixed_(a,this.minimumIntegerDigits_, +b)):b.push(this.getNumberFormatSymbols_().INFINITY);b.push(d?this.negativeSuffix_:this.positiveSuffix_);b.push(c.suffix);return b.join("")}; +goog.i18n.NumberFormat.prototype.roundNumber_=function(a){var b=goog.i18n.NumberFormat.decimalShift_,c=b(a,this.maximumFractionDigits_);0this.maximumFractionDigits_)throw Error("Min value must be less than max value");c||(c=[]);a=this.roundNumber_(a);var d=a.intValue,e=a.fracValue,f=0==d?0:this.intLog10_(d)+1,g=0b.length&&(b="1"+goog.string.repeat("0",this.maximumFractionDigits_-b.length)+b);for(e=b.length;"0"==b.charAt(e-1)&&e>a+1;)e--;for(d=1;da?(a=-a,b.push(this.getNumberFormatSymbols_().MINUS_SIGN)):this.useSignForPositiveExponent_&&b.push(this.getNumberFormatSymbols_().PLUS_SIGN);a=""+a;for(var c=this.getNumberFormatSymbols_().ZERO_DIGIT,d=a.length;dthis.minimumIntegerDigits_?(d=c%this.maximumIntegerDigits_,0>d&&(d=this.maximumIntegerDigits_+d),a=goog.i18n.NumberFormat.decimalShift_(a,d),c-=d,d=1):1>this.minimumIntegerDigits_? +(c++,a=goog.i18n.NumberFormat.decimalShift_(a,-1)):(c-=this.minimumIntegerDigits_-1,a=goog.i18n.NumberFormat.decimalShift_(a,this.minimumIntegerDigits_-1));this.subformatFixed_(a,d,b);this.addExponentPart_(c,b)}};goog.i18n.NumberFormat.prototype.getDigit_=function(a){a=a.charCodeAt(0);if(48<=a&&58>a)return a-48;var b=this.getNumberFormatSymbols_().ZERO_DIGIT.charCodeAt(0);return b<=a&&ac&&g++;break;case goog.i18n.NumberFormat.PATTERN_ZERO_DIGIT_:if(0c&&g++;break;case goog.i18n.NumberFormat.PATTERN_GROUPING_SEPARATOR_:0 +d+e||1>this.minExponentDigits_)throw Error('Malformed exponential pattern "'+a+'"');k=!1;break;default:b[0]--,k=!1}0==e&&0c&&0d+e)||0==g)throw Error('Malformed pattern "'+a+'"');a=d+e+f;this.maximumFractionDigits_=0<=c?a-c:0;0<=c&&(this.minimumFractionDigits_=d+e-c,0>this.minimumFractionDigits_&&(this.minimumFractionDigits_=0));this.minimumIntegerDigits_=(0<=c?c:a)-d;this.useExponentialNotation_&&(this.maximumIntegerDigits_=d+this.minimumIntegerDigits_, +0==this.maximumFractionDigits_&&0==this.minimumIntegerDigits_&&(this.minimumIntegerDigits_=1));this.groupingArray_.push(Math.max(0,g));this.decimalSeparatorAlwaysShown_=0==c||c==a};goog.i18n.NumberFormat.NULL_UNIT_={prefix:"",suffix:"",divisorBase:0}; +goog.i18n.NumberFormat.prototype.getUnitFor_=function(a,b){var c=this.compactStyle_==goog.i18n.NumberFormat.CompactStyle.SHORT?goog.i18n.CompactNumberFormatSymbols.COMPACT_DECIMAL_SHORT_PATTERN:goog.i18n.CompactNumberFormatSymbols.COMPACT_DECIMAL_LONG_PATTERN;null==c&&(c=goog.i18n.CompactNumberFormatSymbols.COMPACT_DECIMAL_SHORT_PATTERN);if(3>a)return goog.i18n.NumberFormat.NULL_UNIT_;var d=goog.i18n.NumberFormat.decimalShift_;a=Math.min(14,a);var e=c[d(1,a)];for(--a;!e&&3<=a;)e=c[d(1,a)],a--;if(!e)return goog.i18n.NumberFormat.NULL_UNIT_; +b=e[b];return b&&"0"!=b?(b=/([^0]*)(0+)(.*)/.exec(b))?{prefix:b[1],suffix:b[3],divisorBase:a+1-(b[2].length-1)}:goog.i18n.NumberFormat.NULL_UNIT_:goog.i18n.NumberFormat.NULL_UNIT_}; +goog.i18n.NumberFormat.prototype.getUnitAfterRounding_=function(a,b){if(this.compactStyle_==goog.i18n.NumberFormat.CompactStyle.NONE)return goog.i18n.NumberFormat.NULL_UNIT_;a=Math.abs(a);b=Math.abs(b);var c=this.pluralForm_(a),d=1>=a?0:this.intLog10_(a);c=this.getUnitFor_(d,c).divisorBase;b=goog.i18n.NumberFormat.decimalShift_(b,-c);b=this.roundNumber_(b);a=goog.i18n.NumberFormat.decimalShift_(a,-c);a=this.roundNumber_(a);b=this.pluralForm_(b.intValue+b.fracValue);return this.getUnitFor_(c+this.intLog10_(a.intValue), +b)};goog.i18n.NumberFormat.prototype.intLog10_=function(a){if(!isFinite(a))return 0=a%10||9==a%10)&&(10>a%100||19a%100||79a%100||99=c%10&&(12>c%100||14=a.f%10&&(12>a.f%100||14=a?goog.i18n.pluralRules.Keyword.ONE:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.csSelect_=function(a,b){const c=a|0;a=goog.i18n.pluralRules.get_vf_(a,b);return 1==c&&0==a.v?goog.i18n.pluralRules.Keyword.ONE:2<=c&&4>=c&&0==a.v?goog.i18n.pluralRules.Keyword.FEW:0!=a.v?goog.i18n.pluralRules.Keyword.MANY:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.plSelect_=function(a,b){const c=a|0;a=goog.i18n.pluralRules.get_vf_(a,b);return 1==c&&0==a.v?goog.i18n.pluralRules.Keyword.ONE:0==a.v&&2<=c%10&&4>=c%10&&(12>c%100||14=c%10||0==a.v&&5<=c%10&&9>=c%10||0==a.v&&12<=c%100&&14>=c%100?goog.i18n.pluralRules.Keyword.MANY:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.shiSelect_=function(a,b){return 0==(a|0)||1==a?goog.i18n.pluralRules.Keyword.ONE:2<=a&&10>=a?goog.i18n.pluralRules.Keyword.FEW:goog.i18n.pluralRules.Keyword.OTHER};goog.i18n.pluralRules.lvSelect_=function(a,b){b=goog.i18n.pluralRules.get_vf_(a,b);return 0==a%10||11<=a%100&&19>=a%100||2==b.v&&11<=b.f%100&&19>=b.f%100?goog.i18n.pluralRules.Keyword.ZERO:1==a%10&&11!=a%100||2==b.v&&1==b.f%10&&11!=b.f%100||2!=b.v&&1==b.f%10?goog.i18n.pluralRules.Keyword.ONE:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.iuSelect_=function(a,b){return 1==a?goog.i18n.pluralRules.Keyword.ONE:2==a?goog.i18n.pluralRules.Keyword.TWO:goog.i18n.pluralRules.Keyword.OTHER};goog.i18n.pluralRules.heSelect_=function(a,b){const c=a|0;b=goog.i18n.pluralRules.get_vf_(a,b);return 1==c&&0==b.v?goog.i18n.pluralRules.Keyword.ONE:2==c&&0==b.v?goog.i18n.pluralRules.Keyword.TWO:0==b.v&&(0>a||10=a%100?goog.i18n.pluralRules.Keyword.FEW:11<=a%100&&19>=a%100?goog.i18n.pluralRules.Keyword.MANY:goog.i18n.pluralRules.Keyword.OTHER};goog.i18n.pluralRules.siSelect_=function(a,b){const c=a|0;b=goog.i18n.pluralRules.get_vf_(a,b);return 0==a||1==a||0==c&&1==b.f?goog.i18n.pluralRules.Keyword.ONE:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.cySelect_=function(a,b){return 0==a?goog.i18n.pluralRules.Keyword.ZERO:1==a?goog.i18n.pluralRules.Keyword.ONE:2==a?goog.i18n.pluralRules.Keyword.TWO:3==a?goog.i18n.pluralRules.Keyword.FEW:6==a?goog.i18n.pluralRules.Keyword.MANY:goog.i18n.pluralRules.Keyword.OTHER};goog.i18n.pluralRules.daSelect_=function(a,b){const c=a|0;b=goog.i18n.pluralRules.get_vf_(a,b);b=goog.i18n.pluralRules.get_wt_(b.v,b.f);return 1==a||0!=b.t&&(0==c||1==c)?goog.i18n.pluralRules.Keyword.ONE:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.ruSelect_=function(a,b){const c=a|0;a=goog.i18n.pluralRules.get_vf_(a,b);return 0==a.v&&1==c%10&&11!=c%100?goog.i18n.pluralRules.Keyword.ONE:0==a.v&&2<=c%10&&4>=c%10&&(12>c%100||14=c%10||0==a.v&&11<=c%100&&14>=c%100?goog.i18n.pluralRules.Keyword.MANY:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.gvSelect_=function(a,b){const c=a|0;a=goog.i18n.pluralRules.get_vf_(a,b);return 0==a.v&&1==c%10?goog.i18n.pluralRules.Keyword.ONE:0==a.v&&2==c%10?goog.i18n.pluralRules.Keyword.TWO:0!=a.v||0!=c%100&&20!=c%100&&40!=c%100&&60!=c%100&&80!=c%100?0!=a.v?goog.i18n.pluralRules.Keyword.MANY:goog.i18n.pluralRules.Keyword.OTHER:goog.i18n.pluralRules.Keyword.FEW}; +goog.i18n.pluralRules.beSelect_=function(a,b){return 1==a%10&&11!=a%100?goog.i18n.pluralRules.Keyword.ONE:2<=a%10&&4>=a%10&&(12>a%100||14=a%10||11<=a%100&&14>=a%100?goog.i18n.pluralRules.Keyword.MANY:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.gaSelect_=function(a,b){return 1==a?goog.i18n.pluralRules.Keyword.ONE:2==a?goog.i18n.pluralRules.Keyword.TWO:3<=a&&6>=a?goog.i18n.pluralRules.Keyword.FEW:7<=a&&10>=a?goog.i18n.pluralRules.Keyword.MANY:goog.i18n.pluralRules.Keyword.OTHER};goog.i18n.pluralRules.esSelect_=function(a,b){return 1==a?goog.i18n.pluralRules.Keyword.ONE:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.dsbSelect_=function(a,b){const c=a|0;a=goog.i18n.pluralRules.get_vf_(a,b);return 0==a.v&&1==c%100||1==a.f%100?goog.i18n.pluralRules.Keyword.ONE:0==a.v&&2==c%100||2==a.f%100?goog.i18n.pluralRules.Keyword.TWO:0==a.v&&3<=c%100&&4>=c%100||3<=a.f%100&&4>=a.f%100?goog.i18n.pluralRules.Keyword.FEW:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.lagSelect_=function(a,b){b=a|0;return 0==a?goog.i18n.pluralRules.Keyword.ZERO:0!=b&&1!=b||0==a?goog.i18n.pluralRules.Keyword.OTHER:goog.i18n.pluralRules.Keyword.ONE};goog.i18n.pluralRules.mkSelect_=function(a,b){const c=a|0;a=goog.i18n.pluralRules.get_vf_(a,b);return 0==a.v&&1==c%10&&11!=c%100||1==a.f%10&&11!=a.f%100?goog.i18n.pluralRules.Keyword.ONE:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.isSelect_=function(a,b){const c=a|0;a=goog.i18n.pluralRules.get_vf_(a,b);a=goog.i18n.pluralRules.get_wt_(a.v,a.f);return 0==a.t&&1==c%10&&11!=c%100||0!=a.t?goog.i18n.pluralRules.Keyword.ONE:goog.i18n.pluralRules.Keyword.OTHER};goog.i18n.pluralRules.kshSelect_=function(a,b){return 0==a?goog.i18n.pluralRules.Keyword.ZERO:1==a?goog.i18n.pluralRules.Keyword.ONE:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.roSelect_=function(a,b){const c=a|0;b=goog.i18n.pluralRules.get_vf_(a,b);return 1==c&&0==b.v?goog.i18n.pluralRules.Keyword.ONE:0!=b.v||0==a||2<=a%100&&19>=a%100?goog.i18n.pluralRules.Keyword.FEW:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.arSelect_=function(a,b){return 0==a?goog.i18n.pluralRules.Keyword.ZERO:1==a?goog.i18n.pluralRules.Keyword.ONE:2==a?goog.i18n.pluralRules.Keyword.TWO:3<=a%100&&10>=a%100?goog.i18n.pluralRules.Keyword.FEW:11<=a%100&&99>=a%100?goog.i18n.pluralRules.Keyword.MANY:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.gdSelect_=function(a,b){return 1==a||11==a?goog.i18n.pluralRules.Keyword.ONE:2==a||12==a?goog.i18n.pluralRules.Keyword.TWO:3<=a&&10>=a||13<=a&&19>=a?goog.i18n.pluralRules.Keyword.FEW:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.slSelect_=function(a,b){const c=a|0;a=goog.i18n.pluralRules.get_vf_(a,b);return 0==a.v&&1==c%100?goog.i18n.pluralRules.Keyword.ONE:0==a.v&&2==c%100?goog.i18n.pluralRules.Keyword.TWO:0==a.v&&3<=c%100&&4>=c%100||0!=a.v?goog.i18n.pluralRules.Keyword.FEW:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.ltSelect_=function(a,b){b=goog.i18n.pluralRules.get_vf_(a,b);return 1==a%10&&(11>a%100||19=a%10&&(11>a%100||19=a||11<=a&&99>=a?goog.i18n.pluralRules.Keyword.ONE:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.enSelect_=function(a,b){const c=a|0;a=goog.i18n.pluralRules.get_vf_(a,b);return 1==c&&0==a.v?goog.i18n.pluralRules.Keyword.ONE:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.kwSelect_=function(a,b){return 0==a?goog.i18n.pluralRules.Keyword.ZERO:1==a?goog.i18n.pluralRules.Keyword.ONE:2==a%100||22==a%100||42==a%100||62==a%100||82==a%100||0==a%1E3&&(1E3<=a%1E5&&2E4>=a%1E5||4E4==a%1E5||6E4==a%1E5||8E4==a%1E5)||0!=a&&1E5==a%1E6?goog.i18n.pluralRules.Keyword.TWO:3==a%100||23==a%100||43==a%100||63==a%100||83==a%100?goog.i18n.pluralRules.Keyword.FEW:1==a||1!=a%100&&21!=a%100&&41!=a%100&&61!=a%100&&81!=a%100?goog.i18n.pluralRules.Keyword.OTHER:goog.i18n.pluralRules.Keyword.MANY}; +goog.i18n.pluralRules.akSelect_=function(a,b){return 0<=a&&1>=a?goog.i18n.pluralRules.Keyword.ONE:goog.i18n.pluralRules.Keyword.OTHER};goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_;"af"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"am"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.hiSelect_);"ar"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.arSelect_); +if("ar_DZ"==goog.LOCALE||"ar-DZ"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.arSelect_;if("ar_EG"==goog.LOCALE||"ar-EG"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.arSelect_;"az"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"be"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.beSelect_);"bg"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_); +"bn"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.hiSelect_);"br"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.brSelect_);"bs"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.srSelect_);"ca"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_);"chr"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"cs"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.csSelect_); +"cy"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.cySelect_);"da"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.daSelect_);"de"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_);if("de_AT"==goog.LOCALE||"de-AT"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_;if("de_CH"==goog.LOCALE||"de-CH"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_; +"el"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"en"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_);if("en_AU"==goog.LOCALE||"en-AU"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_;if("en_CA"==goog.LOCALE||"en-CA"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_;if("en_GB"==goog.LOCALE||"en-GB"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_; +if("en_IE"==goog.LOCALE||"en-IE"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_;if("en_IN"==goog.LOCALE||"en-IN"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_;if("en_SG"==goog.LOCALE||"en-SG"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_;if("en_US"==goog.LOCALE||"en-US"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_; +if("en_ZA"==goog.LOCALE||"en-ZA"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_;"es"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);if("es_419"==goog.LOCALE||"es-419"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_;if("es_ES"==goog.LOCALE||"es-ES"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_;if("es_MX"==goog.LOCALE||"es-MX"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_; +if("es_US"==goog.LOCALE||"es-US"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_;"et"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_);"eu"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"fa"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.hiSelect_);"fi"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_);"fil"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.filSelect_); +"fr"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.frSelect_);if("fr_CA"==goog.LOCALE||"fr-CA"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.frSelect_;"ga"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.gaSelect_);"gl"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_);"gsw"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"gu"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.hiSelect_); +"haw"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"he"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.heSelect_);"hi"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.hiSelect_);"hr"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.srSelect_);"hu"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"hy"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.frSelect_); +"id"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_);"in"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_);"is"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.isSelect_);"it"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_);"iw"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.heSelect_);"ja"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_); +"ka"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"kk"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"km"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_);"kn"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.hiSelect_);"ko"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_);"ky"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_); +"ln"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.akSelect_);"lo"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_);"lt"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.ltSelect_);"lv"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.lvSelect_);"mk"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.mkSelect_);"ml"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_); +"mn"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"mo"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.roSelect_);"mr"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"ms"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_);"mt"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.mtSelect_);"my"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_); +"nb"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"ne"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"nl"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_);"no"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);if("no_NO"==goog.LOCALE||"no-NO"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_;"or"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_); +"pa"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.akSelect_);"pl"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.plSelect_);"pt"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.ptSelect_);if("pt_BR"==goog.LOCALE||"pt-BR"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.ptSelect_;if("pt_PT"==goog.LOCALE||"pt-PT"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_; +"ro"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.roSelect_);"ru"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.ruSelect_);"sh"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.srSelect_);"si"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.siSelect_);"sk"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.csSelect_);"sl"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.slSelect_); +"sq"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"sr"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.srSelect_);if("sr_Latn"==goog.LOCALE||"sr-Latn"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.srSelect_;"sv"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_);"sw"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_);"ta"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_); +"te"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"th"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_);"tl"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.filSelect_);"tr"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"uk"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.ruSelect_);"ur"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_); +"uz"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"vi"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_);"zh"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_);if("zh_CN"==goog.LOCALE||"zh-CN"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_;if("zh_HK"==goog.LOCALE||"zh-HK"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_; +if("zh_TW"==goog.LOCALE||"zh-TW"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_;"zu"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.hiSelect_);goog.i18n.MessageFormat=function(a){this.pattern_=a;this.parsedPattern_=this.literals_=this.initialLiterals_=null;this.numberFormatter_=goog.i18n.MessageFormat.getNumberFormatter_()};goog.i18n.MessageFormat.numberFormatterSymbols_=null;goog.i18n.MessageFormat.compactNumberFormatterSymbols_=null;goog.i18n.MessageFormat.numberFormatter_=null;goog.i18n.MessageFormat.LITERAL_PLACEHOLDER_="﷟_";goog.i18n.MessageFormat.Element_={STRING:0,BLOCK:1}; +goog.i18n.MessageFormat.BlockType_={PLURAL:0,ORDINAL:1,SELECT:2,SIMPLE:3,STRING:4,UNKNOWN:5};goog.i18n.MessageFormat.OTHER_="other";goog.i18n.MessageFormat.REGEX_LITERAL_=/'([{}#].*?)'/g;goog.i18n.MessageFormat.REGEX_DOUBLE_APOSTROPHE_=/''/g; +goog.i18n.MessageFormat.getNumberFormatter_=function(){var a=goog.i18n.NumberFormatSymbols,b=goog.i18n.CompactNumberFormatSymbols;if(goog.i18n.MessageFormat.numberFormatterSymbols_!==a||goog.i18n.MessageFormat.compactNumberFormatterSymbols_!==b)goog.i18n.MessageFormat.numberFormatterSymbols_=a,goog.i18n.MessageFormat.compactNumberFormatterSymbols_=b,goog.i18n.MessageFormat.numberFormatter_=new goog.i18n.NumberFormat(goog.i18n.NumberFormat.Format.DECIMAL);return goog.i18n.MessageFormat.numberFormatter_}; +goog.i18n.MessageFormat.prototype.format=function(a){return this.format_(a,!1)};goog.i18n.MessageFormat.prototype.formatIgnoringPound=function(a){return this.format_(a,!0)}; +goog.i18n.MessageFormat.prototype.format_=function(a,b){this.init_();if(!this.parsedPattern_||0==this.parsedPattern_.length)return"";this.literals_=goog.array.clone(this.initialLiterals_);var c=[];this.formatBlock_(this.parsedPattern_,a,b,c);a=c.join("");for(b||goog.asserts.assert(-1==a.search("#"),"Not all # were replaced.");0=a)return d;a=cljs.core.second.call(null,cljs.core.first.call(null,cljs.core.rsubseq.call(null,f,cljs.core._LT_,e*a)));return cljs.core.truth_(a)? +a:d}}; +bigml.dixie.flatline.population.cdf=function(a){var b=null!=a&&(a.cljs$lang$protocol_mask$partition0$&64||cljs.core.PROTOCOL_SENTINEL===a.cljs$core$ISeq$)?cljs.core.apply.call(null,cljs.core.hash_map,a):a;a=cljs.core.get.call(null,b,new cljs.core.Keyword(null,"bins","bins",1670395210));b=cljs.core.get.call(null,b,new cljs.core.Keyword(null,"counts","counts",234305892));b=cljs.core.sort_by.call(null,cljs.core.first,cljs.core._LT_,cljs.core.empty_QMARK_.call(null,b)?a:b);a=cljs.core.map.call(null,cljs.core.first, +b);var c=cljs.core.last.call(null,a),d=cljs.core.first.call(null,a);b=cljs.core.map.call(null,cljs.core.second,b);var e=cljs.core.reduce.call(null,cljs.core._PLUS_,b);b=cljs.core.map.call(null,function(a){return a/e},b);b=cljs.core.reductions.call(null,cljs.core._PLUS_,b);var f=cljs.core.into.call(null,cljs.core.sorted_map.call(null),cljs.core.map.call(null,cljs.core.vector,a,b));return function(a){if(a>=c)return 1;if(af?d:c.call(null,b)}}; +bigml.dixie.flatline.args.choose_type=function(a){return cljs.core.juxt.call(null,bigml.dixie.flatline.types.fn_type,bigml.dixie.flatline.types.fn_desc).call(null,function(){var b=cljs.core.some.call(null,function(a){return bigml.dixie.flatline.types.var_fn_QMARK_.call(null,a)?a:null},a);return cljs.core.truth_(b)?b:cljs.core.first.call(null,a)}())}; +bigml.dixie.flatline.args.check_args=function(a){for(var b=[],c=arguments.length,d=0;;)if(d>16)+(b>>16)+(c>>16)<<16|c&65535}function c(a,b){return a>>>b|a<<32-b}a=function(a){a=a.replace(/\r\n/g,"\n");for(var b="",c=0;cd?b+=String.fromCharCode(d):(127d?b+=String.fromCharCode(d>>6|192):(b+=String.fromCharCode(d>>12|224),b+=String.fromCharCode(d>>6&63|128)),b+=String.fromCharCode(d&63|128))}return b}(a);return function(a){for(var b="",c=0;c<4*a.length;c++)b+= +"0123456789abcdef".charAt(a[c>>2]>>8*(3-c%4)+4&15)+"0123456789abcdef".charAt(a[c>>2]>>8*(3-c%4)&15);return b}(function(a,e){var d=[1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372, +1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298],g=[1779033703,3144134277,1013904242,2773480762,1359893119,2600822924,528734635,1541459225],h=Array(64),k,l;a[e>>5]|=128<<24-e%32;a[(e+64>>9<<4)+15]=e;for(k=0;kl;l++){if(16>l)h[l]=a[l+k];else{var v=l;var w=h[l-2];w=c(w,17)^c(w,19)^w>>>10;w=b(w,h[l-7]);var x=h[l-15];x=c(x,7)^c(x,18)^x>>>3;h[v]=b(b(w,x),h[l-16])}v=q;v=c(v,6)^c(v,11)^c(v,25);v=b(b(b(b(u,v),q&r^~q&t),d[l]),h[l]);u=e;u=c(u,2)^c(u,13)^c(u,22);w=b(u,e&m^e&n^m&n);u=t;t=r;r=q;q=b(p,v);p=n;n=m;m=e;e=b(v,w)}g[0]=b(e,g[0]);g[1]=b(m,g[1]);g[2]=b(n,g[2]);g[3]=b(p,g[3]);g[4]=b(q,g[4]);g[5]=b(r,g[5]);g[6]=b(t,g[6]);g[7]=b(u, +g[7])}return g}(function(a){for(var b=[],c=0;c<8*a.length;c+=8)b[c>>5]|=(a.charCodeAt(c/8)&255)<<24-c%32;return b}(a),8*a.length))};bigml.dixie.flatline.text={};bigml.dixie.flatline.text.term_stream=function(a){return"string"===typeof a?cljs.core.keep.call(null,cljs.core.not_empty,clojure.string.split.call(null,a,/[\W_]/)):null};bigml.dixie.flatline.text.term_stemmer=function(a){return cljs.core.identity};bigml.dixie.flatline.text.detect_language=cljs.core.constantly.call(null,new cljs.core.Keyword(null,"en","en",88457073)); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"occurrences","occurrences",295025356),function(){var a=function(a,b,e,f){a=cljs.core.nth.call(null,e,0,null);f=cljs.core.nth.call(null,e,1,null);var c=cljs.core.nth.call(null,e,2,null);e=cljs.core.nth.call(null,e,3,null);bigml.dixie.flatline.errors.check_arity.call(null,b,2,4);e=cljs.core.truth_(e)?e:bigml.dixie.flatline.types.constant_fn.call(null,"none","string");c=cljs.core.truth_(c)?c:bigml.dixie.flatline.types.constant_fn.call(null, +!1);bigml.dixie.flatline.types.check_types.call(null,b,new cljs.core.PersistentVector(null,4,5,cljs.core.PersistentVector.EMPTY_NODE,[a,f,c,e],null),new cljs.core.PersistentVector(null,4,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"string","string",-1989541586),new cljs.core.Keyword(null,"string","string",-1989541586),new cljs.core.Keyword(null,"boolean","boolean",-1919418404),new cljs.core.Keyword(null,"string","string",-1989541586)],null));cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null, +c))||bigml.dixie.flatline.utils.raise.call(null,new cljs.core.Keyword(null,"flatline-type-error","flatline-type-error",845929452),"Case flag must be a constant in %s",b);cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null,e))||bigml.dixie.flatline.utils.raise.call(null,new cljs.core.Keyword(null,"flatline-type-error","flatline-type-error",845929452),"Language must be a constant in %s",b);var d=bigml.dixie.flatline.text.term_stemmer.call(null,e.call(null)),k=cljs.core.truth_(d)? +cljs.core.truth_(c.call(null))?cljs.core.comp.call(null,d,clojure.string.lower_case):d:null;cljs.core.truth_(k)||bigml.dixie.flatline.utils.raise.call(null,new cljs.core.Keyword(null,"flatline-invalid-arguments","flatline-invalid-arguments",1649316504),"Unknown language %s in %s",e.call(null),b);if(cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null,f))){var l=k.call(null,f.call(null)),m=cljs.core.comp.call(null,cljs.core.PersistentHashSet.createAsIfByAssoc([l]),k);b=function(a){return cljs.core.truth_(a)? +(cljs.core._EQ_.call(null,l,k.call(null,a))?1:0)+cljs.core.count.call(null,cljs.core.filter.call(null,m,bigml.dixie.flatline.text.term_stream.call(null,a))):null};return bigml.dixie.flatline.eval.make_primop.call(null,b,new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[a],null),new cljs.core.Keyword(null,"numeric","numeric",-1495594714))}b=function(a,b){if(cljs.core.truth_(cljs.core.truth_(a)?b:a)){b=k.call(null,b);var c=cljs.core.comp.call(null,cljs.core.PersistentHashSet.createAsIfByAssoc([b]), +k);c=cljs.core.filter.call(null,c,bigml.dixie.flatline.text.term_stream.call(null,a));return(cljs.core._EQ_.call(null,b,a)?1:0)+cljs.core.count.call(null,c)}return null};return bigml.dixie.flatline.eval.make_primop.call(null,b,new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[a,f],null),new cljs.core.Keyword(null,"numeric","numeric",-1495594714))},b=function(b,d,e,f){var c=null;if(3b)&&1>=b}); +cljs.core._add_method.call(null,bigml.hideo.error.valid.validate_STAR_,new cljs.core.Keyword(null,"posint","posint",-2069727394),function(a,b){return cljs.core.integer_QMARK_.call(null,b)&&0b)});cljs.core._add_method.call(null,bigml.hideo.error.valid.validate_STAR_,new cljs.core.Keyword(null,"non-negnum","non-negnum",-1832609773),function(a,b){return"number"===typeof b&&!(0>b)}); +cljs.core._add_method.call(null,bigml.hideo.error.valid.validate_STAR_,new cljs.core.Keyword(null,"negint","negint",-1697065950),function(a,b){return cljs.core.integer_QMARK_.call(null,b)&&0>b});cljs.core._add_method.call(null,bigml.hideo.error.valid.validate_STAR_,new cljs.core.Keyword(null,"negnum","negnum",1224917298),function(a,b){return"number"===typeof b&&0>b}); +cljs.core._add_method.call(null,bigml.hideo.error.valid.validate_STAR_,new cljs.core.Keyword(null,"percentage","percentage",-1610213650),function(a,b){return"number"===typeof b&&!(0>b)&&100>=b});cljs.core._add_method.call(null,bigml.hideo.error.valid.validate_STAR_,new cljs.core.Keyword(null,"numseq","numseq",-2000947142),function(a,b){return bigml.hideo.error.valid.is_array_QMARK_.call(null,b)&&!cljs.core.empty_QMARK_.call(null,b)&&cljs.core.every_QMARK_.call(null,cljs.core.number_QMARK_,b)}); +cljs.core._add_method.call(null,bigml.hideo.error.valid.validate_STAR_,new cljs.core.Keyword(null,"posintseq","posintseq",417757074),function(a,b){return bigml.hideo.error.valid.is_array_QMARK_.call(null,b)?cljs.core.empty_QMARK_.call(null,b)?!1:cljs.core.apply.call(null,bigml.hideo.error.valid.valid_QMARK_,new cljs.core.Keyword(null,"posint","posint",-2069727394),b):!1}); +cljs.core._add_method.call(null,bigml.hideo.error.valid.validate_STAR_,new cljs.core.Keyword(null,"string","string",-1989541586),function(a,b){return"string"===typeof b});cljs.core._add_method.call(null,bigml.hideo.error.valid.validate_STAR_,new cljs.core.Keyword(null,"nestring","nestring",-1735528560),function(a,b){return"string"===typeof b&&0cljs.core.nth.call(null,a,1)&&cljs.core.nth.call(null,a,4)>cljs.core.nth.call(null,a,2)}; +bigml.dixie.fields.regions.bounding_box_QMARK_=cljs.core.comp.call(null,cljs.core.empty_QMARK_,cljs.core.first);bigml.dixie.fields.regions.make_bounding_box=function(a,b){return new cljs.core.PersistentVector(null,5,5,cljs.core.PersistentVector.EMPTY_NODE,["",0,0,a,b],null)};bigml.dixie.fields.regions.regions_QMARK_=function(a){return cljs.core.sequential_QMARK_.call(null,a)&&cljs.core.every_QMARK_.call(null,bigml.dixie.fields.regions.region_QMARK_,a)};bigml.dixie.fields.regions.regions__GT_str=cljs.core.pr_str;bigml.dixie.flatline.regions={}; +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"region?","region?",1275253817),function(){var a=function(a,b,e,f){a=cljs.core.nth.call(null,e,0,null);bigml.dixie.flatline.errors.check_arity.call(null,b,1);return bigml.dixie.flatline.eval.make_primop.call(null,bigml.dixie.fields.regions.region_QMARK_,new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[a],null),new cljs.core.Keyword(null,"boolean","boolean",-1919418404))},b=function(b, +d,e,f){var c=null;if(3>>32-g,c)}function d(a,c,d,e,f,g,h){a=b(a,b(b(c&e|d&~e,f),h));return b(a<>>32-g,c)}function e(a,c,d,e,f,g,h){a=b(a,b(b(c^d^e,f),h));return b(a<>>32-g,c)}function f(a,c,d,e,f,g,h){a=b(a, +b(b(d^(c|~e),f),h));return b(a<>>32-g,c)}function g(a){var b="",c;for(c=0;3>=c;c++){var d=a>>>8*c&255;d="0"+d.toString(16);b+=d.substr(d.length-2,2)}return b}var h=[];a=function(a){a=a.replace(/\r\n/g,"\n");for(var b="",c=0;cd?b+=String.fromCharCode(d):(127d?b+=String.fromCharCode(d>>6|192):(b+=String.fromCharCode(d>>12|224),b+=String.fromCharCode(d>>6&63|128)),b+=String.fromCharCode(d&63|128))}return b}(a);h=function(a){var b=a.length;var c= +b+8;for(var d=16*((c-c%64)/64+1),e=Array(d-1),f,g=0;g>>29;return e}(a);var k=1732584193;var l=4023233417;var m=2562383102;var n=271733878;for(a=0;a>>32-b}function c(a){var b="",c;for(c=7;0<=c;c--){var d=a>>>4*c&15;b+=d.toString(16)}return b}var d,e=Array(80),f=1732584193,g=4023233417,h=2562383102,k=271733878,l=3285377520;a=function(a){a=a.replace(/\r\n/g,"\n");for(var b="",c=0;cd?b+=String.fromCharCode(d):(127d?b+=String.fromCharCode(d>>6|192):(b+=String.fromCharCode(d>>12|224),b+=String.fromCharCode(d>>6&63|128)),b+=String.fromCharCode(d& +63|128))}return b}(a);var m=a.length;var n=[];for(d=0;d>>29);n.push(m<<3&4294967295);for(a=0;ad;d++)e[d]=n[a+d];for(d=16;79>=d;d++)e[d]=b(e[d-3]^e[d-8]^e[d-14]^e[d-16],1);p=f;m=g;var q=h;var r=k;var t=l;for(d=0;19>=d;d++){var u=b(p,5)+(m&q|~m&r)+t+e[d]+1518500249&4294967295;t=r;r=q;q=b(m,30);m=p;p=u}for(d=20;39>=d;d++)u=b(p,5)+(m^q^r)+t+e[d]+1859775393&4294967295,t=r,r=q,q=b(m,30),m=p,p=u;for(d=40;59>=d;d++)u=b(p,5)+(m&q|m&r|q&r)+t+e[d]+2400959708&4294967295,t=r,r=q,q=b(m,30),m=p,p=u;for(d=60;79>=d;d++)u=b(p,5)+(m^q^r)+t+e[d]+3395469782&4294967295,t=r,r=q,q=b(m,30),m=p,p=u;f=f+p&4294967295; +g=g+m&4294967295;h=h+q&4294967295;k=k+r&4294967295;l=l+t&4294967295}u=c(f)+c(g)+c(h)+c(k)+c(l);return u.toLowerCase()};bigml.dixie.flatline.strings={};bigml.dixie.flatline.strings.deep_merge_with=function(a){for(var b=[],c=arguments.length,d=0;;)if(d=parseInt(a,b)}; +cljs.tools.reader.read_char_STAR_=function(a,b,c,d){b=cljs.tools.reader.reader_types.read_char.call(null,a);if(null!=b){b=cljs.tools.reader.macro_terminating_QMARK_.call(null,b)||cljs.tools.reader.impl.utils.whitespace_QMARK_.call(null,b)?cljs.core.str.cljs$core$IFn$_invoke$arity$1(b):cljs.tools.reader.read_token.call(null,a,new cljs.core.Keyword(null,"character","character",380652989),b);c=b.length;if(1===c)return b.charAt(0);if(cljs.core._EQ_.call(null,b,"newline"))return"\n";if(cljs.core._EQ_.call(null, +b,"space"))return" ";if(cljs.core._EQ_.call(null,b,"tab"))return"\t";if(cljs.core._EQ_.call(null,b,"backspace"))return"\b";if(cljs.core._EQ_.call(null,b,"formfeed"))return"\f";if(cljs.core._EQ_.call(null,b,"return"))return"\r";if(goog.string.startsWith(b,"u"))return b=cljs.tools.reader.read_unicode_char.call(null,b,1,4,16),c=b.charCodeAt(0),c>cljs.tools.reader.upper_limit&&cc?c:!1):null};cljs.tools.reader.check_reserved_features=function(a,b){return cljs.core.truth_(cljs.core.get.call(null,cljs.tools.reader.RESERVED_FEATURES,b))?cljs.tools.reader.impl.errors.reader_error.call(null,a,"Feature name ",b," is reserved"):null}; +cljs.tools.reader.check_invalid_read_cond=function(a,b,c){return a===cljs.tools.reader.READ_FINISHED?0>c?cljs.tools.reader.impl.errors.reader_error.call(null,b,"read-cond requires an even number of forms"):cljs.tools.reader.impl.errors.reader_error.call(null,b,"read-cond starting on line ",c," requires an even number of forms"):null}; +cljs.tools.reader.read_suppress=function(a,b,c,d){var e=cljs.tools.reader._STAR_suppress_read_STAR_;cljs.tools.reader._STAR_suppress_read_STAR_=!0;try{var f=cljs.tools.reader.read_STAR_.call(null,b,!1,cljs.tools.reader.READ_EOF,")",c,d);cljs.tools.reader.check_eof_error.call(null,f,b,a);return f===cljs.tools.reader.READ_FINISHED?cljs.tools.reader.READ_FINISHED:null}finally{cljs.tools.reader._STAR_suppress_read_STAR_=e}}; +if("undefined"===typeof cljs||"undefined"===typeof cljs.tools||"undefined"===typeof cljs.tools.reader||"undefined"===typeof cljs.tools.reader.NO_MATCH)cljs.tools.reader.NO_MATCH={}; +cljs.tools.reader.match_feature=function(a,b,c,d){var e=cljs.tools.reader.read_STAR_.call(null,b,!1,cljs.tools.reader.READ_EOF,")",c,d);cljs.tools.reader.check_eof_error.call(null,e,b,a);if(cljs.core._EQ_.call(null,e,cljs.tools.reader.READ_FINISHED))return cljs.tools.reader.READ_FINISHED;cljs.tools.reader.check_reserved_features.call(null,b,e);if(cljs.tools.reader.has_feature_QMARK_.call(null,b,e,c))return c=cljs.tools.reader.read_STAR_.call(null,b,!1,cljs.tools.reader.READ_EOF,")",c,d),cljs.tools.reader.check_eof_error.call(null, +c,b,a),cljs.tools.reader.check_invalid_read_cond.call(null,c,b,a),c;a=cljs.tools.reader.read_suppress.call(null,a,b,c,d);return cljs.core.truth_(a)?a:cljs.tools.reader.NO_MATCH}; +cljs.tools.reader.read_cond_delimited=function(a,b,c,d){var e=cljs.tools.reader.reader_types.indexing_reader_QMARK_.call(null,a)?cljs.tools.reader.reader_types.get_line_number.call(null,a):-1;a:for(var f=cljs.tools.reader.NO_MATCH,g=null;;)if(f===cljs.tools.reader.NO_MATCH)if(f=cljs.tools.reader.match_feature.call(null,e,a,c,d),f===cljs.tools.reader.READ_FINISHED){c=cljs.tools.reader.READ_FINISHED;break a}else g=null;else if(g!==cljs.tools.reader.READ_FINISHED)g=cljs.tools.reader.read_suppress.call(null, +e,a,c,d);else{c=f;break a}return c===cljs.tools.reader.READ_FINISHED?a:cljs.core.truth_(b)?null!=c&&(c.cljs$lang$protocol_mask$partition0$&16777216||cljs.core.PROTOCOL_SENTINEL===c.cljs$core$ISequential$)?(goog.array.insertArrayAt(d,cljs.core.to_array.call(null,c),0),a):cljs.tools.reader.impl.errors.reader_error.call(null,a,"Spliced form list in read-cond-splicing must implement ISequential"):c}; +cljs.tools.reader.read_cond=function(a,b,c,d){if(cljs.core.not.call(null,cljs.core.truth_(c)?(new cljs.core.PersistentHashSet(null,new cljs.core.PersistentArrayMap(null,2,[new cljs.core.Keyword(null,"preserve","preserve",1276846509),null,new cljs.core.Keyword(null,"allow","allow",-1857325745),null],null),null)).call(null,(new cljs.core.Keyword(null,"read-cond","read-cond",1056899244)).cljs$core$IFn$_invoke$arity$1(c)):c))throw cljs.core.ex_info.call(null,"Conditional read not allowed",new cljs.core.PersistentArrayMap(null, +1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"runtime-exception","runtime-exception",-1495664514)],null));var e=cljs.tools.reader.reader_types.read_char.call(null,a);if(cljs.core.truth_(e))if(e=(b=cljs.core._EQ_.call(null,e,"@"))?cljs.tools.reader.reader_types.read_char.call(null,a):e,b&&(cljs.core.truth_(cljs.tools.reader._STAR_read_delim_STAR_)||cljs.tools.reader.impl.errors.reader_error.call(null,a,"cond-splice not in list")),e=cljs.tools.reader.impl.utils.whitespace_QMARK_.call(null, +e)?cljs.tools.reader.impl.commons.read_past.call(null,cljs.tools.reader.impl.utils.whitespace_QMARK_,a):e,cljs.core.truth_(e)){if(cljs.core.not_EQ_.call(null,e,"("))throw cljs.core.ex_info.call(null,"read-cond body must be a list",new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"runtime-exception","runtime-exception",-1495664514)],null));var f=cljs.tools.reader._STAR_suppress_read_STAR_;var g=cljs.tools.reader._STAR_suppress_read_STAR_; +g=cljs.core.truth_(g)?g:cljs.core._EQ_.call(null,new cljs.core.Keyword(null,"preserve","preserve",1276846509),(new cljs.core.Keyword(null,"read-cond","read-cond",1056899244)).cljs$core$IFn$_invoke$arity$1(c));cljs.tools.reader._STAR_suppress_read_STAR_=g;try{return cljs.core.truth_(cljs.tools.reader._STAR_suppress_read_STAR_)?cljs.tools.reader.impl.utils.reader_conditional.call(null,cljs.tools.reader.read_list.call(null,a,e,c,d),b):cljs.tools.reader.read_cond_delimited.call(null,a,b,c,d)}finally{cljs.tools.reader._STAR_suppress_read_STAR_= +f}}else return cljs.tools.reader.impl.errors.throw_eof_in_character.call(null,a);else return cljs.tools.reader.impl.errors.throw_eof_in_character.call(null,a)};cljs.tools.reader.arg_env=null;cljs.tools.reader.garg=function(a){return cljs.core.symbol.call(null,[-1===a?"rest":["p",cljs.core.str.cljs$core$IFn$_invoke$arity$1(a)].join(""),"__",cljs.core.str.cljs$core$IFn$_invoke$arity$1(cljs.tools.reader.impl.utils.next_id.call(null)),"#"].join(""))}; +cljs.tools.reader.read_fn=function(a,b,c,d){if(cljs.core.truth_(cljs.tools.reader.arg_env))throw cljs.core.ex_info.call(null,"Nested #()s are not allowed",new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"illegal-state","illegal-state",-1519851182)],null));b=cljs.tools.reader.arg_env;var e=cljs.core.sorted_map.call(null);cljs.tools.reader.arg_env=e;try{var f=cljs.tools.reader.read_STAR_.call(null,function(){cljs.tools.reader.reader_types.unread.call(null, +a,"(");return a}(),!0,null,c,d),g=cljs.core.rseq.call(null,cljs.tools.reader.arg_env),h=g?function(){var a=cljs.core.key.call(null,cljs.core.first.call(null,g)),b=function(){for(var b=1,c=cljs.core.transient$.call(null,cljs.core.PersistentVector.EMPTY);;){if(b>a)return cljs.core.persistent_BANG_.call(null,c);var d=b+1;c=cljs.core.conj_BANG_.call(null,c,function(){var a=cljs.core.get.call(null,cljs.tools.reader.arg_env,b);return cljs.core.truth_(a)?a:cljs.tools.reader.garg.call(null,b)}());b=d}}(); +return cljs.core.truth_(cljs.tools.reader.arg_env.call(null,-1))?cljs.core.conj.call(null,b,new cljs.core.Symbol(null,"\x26","\x26",-2144855648,null),cljs.tools.reader.arg_env.call(null,-1)):b}():cljs.core.PersistentVector.EMPTY;return new cljs.core.List(null,new cljs.core.Symbol(null,"fn*","fn*",-752876845,null),new cljs.core.List(null,h,new cljs.core.List(null,f,null,1,null),2,null),3,null)}finally{cljs.tools.reader.arg_env=b}}; +cljs.tools.reader.register_arg=function(a){if(cljs.core.truth_(cljs.tools.reader.arg_env)){var b=cljs.tools.reader.arg_env.call(null,a);if(cljs.core.truth_(b))return b;b=cljs.tools.reader.garg.call(null,a);cljs.tools.reader.arg_env=cljs.core.assoc.call(null,cljs.tools.reader.arg_env,a,b);return b}throw cljs.core.ex_info.call(null,"Arg literal not in #()",new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"illegal-state","illegal-state", +-1519851182)],null));}; +cljs.tools.reader.read_arg=function(a,b,c,d){if(null==cljs.tools.reader.arg_env)return cljs.tools.reader.read_symbol.call(null,a,b);b=cljs.tools.reader.reader_types.peek_char.call(null,a);if(cljs.tools.reader.impl.utils.whitespace_QMARK_.call(null,b)||cljs.tools.reader.macro_terminating_QMARK_.call(null,b)||null==b)return cljs.tools.reader.register_arg.call(null,1);if(cljs.core._EQ_.call(null,b,"\x26"))return cljs.tools.reader.reader_types.read_char.call(null,a),cljs.tools.reader.register_arg.call(null,-1); +a=cljs.tools.reader.read_STAR_.call(null,a,!0,null,c,d);if(cljs.core.integer_QMARK_.call(null,a))return cljs.tools.reader.register_arg.call(null,a);throw cljs.core.ex_info.call(null,"Arg literal must be %, %\x26 or %integer",new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"illegal-state","illegal-state",-1519851182)],null));};cljs.tools.reader.gensym_env=null; +cljs.tools.reader.read_unquote=function(a,b,c,d){b=cljs.tools.reader.reader_types.peek_char.call(null,a);return cljs.core.truth_(b)?cljs.core._EQ_.call(null,"@",b)?cljs.tools.reader.wrapping_reader.call(null,new cljs.core.Symbol("clojure.core","unquote-splicing","clojure.core/unquote-splicing",-552003150,null)).call(null,function(){cljs.tools.reader.reader_types.read_char.call(null,a);return a}(),"@",c,d):cljs.tools.reader.wrapping_reader.call(null,new cljs.core.Symbol("clojure.core","unquote","clojure.core/unquote", +843087510,null)).call(null,a,"~",c,d):null};cljs.tools.reader.unquote_splicing_QMARK_=function(a){return cljs.core.seq_QMARK_.call(null,a)&&cljs.core._EQ_.call(null,cljs.core.first.call(null,a),new cljs.core.Symbol("clojure.core","unquote-splicing","clojure.core/unquote-splicing",-552003150,null))}; +cljs.tools.reader.unquote_QMARK_=function(a){return cljs.core.seq_QMARK_.call(null,a)&&cljs.core._EQ_.call(null,cljs.core.first.call(null,a),new cljs.core.Symbol("clojure.core","unquote","clojure.core/unquote",843087510,null))}; +cljs.tools.reader.expand_list=function(a){a=cljs.core.seq.call(null,a);for(var b=cljs.core.transient$.call(null,cljs.core.PersistentVector.EMPTY);;)if(a){var c=cljs.core.first.call(null,a);b=cljs.core.conj_BANG_.call(null,b,cljs.tools.reader.unquote_QMARK_.call(null,c)?new cljs.core.List(null,new cljs.core.Symbol("clojure.core","list","clojure.core/list",-1119203325,null),new cljs.core.List(null,cljs.core.second.call(null,c),null,1,null),2,null):cljs.tools.reader.unquote_splicing_QMARK_.call(null, +c)?cljs.core.second.call(null,c):new cljs.core.List(null,new cljs.core.Symbol("clojure.core","list","clojure.core/list",-1119203325,null),new cljs.core.List(null,cljs.tools.reader.syntax_quote_STAR_.call(null,c),null,1,null),2,null));a=cljs.core.next.call(null,a)}else return cljs.core.seq.call(null,cljs.core.persistent_BANG_.call(null,b))}; +cljs.tools.reader.flatten_map=function(a){a=cljs.core.seq.call(null,a);for(var b=cljs.core.transient$.call(null,cljs.core.PersistentVector.EMPTY);;)if(a){var c=cljs.core.first.call(null,a);a=cljs.core.next.call(null,a);b=cljs.core.conj_BANG_.call(null,cljs.core.conj_BANG_.call(null,b,cljs.core.key.call(null,c)),cljs.core.val.call(null,c))}else return cljs.core.seq.call(null,cljs.core.persistent_BANG_.call(null,b))}; +cljs.tools.reader.register_gensym=function(a){if(cljs.core.not.call(null,cljs.tools.reader.gensym_env))throw cljs.core.ex_info.call(null,"Gensym literal not in syntax-quote",new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"illegal-state","illegal-state",-1519851182)],null));var b=cljs.core.get.call(null,cljs.tools.reader.gensym_env,a);if(cljs.core.truth_(b))return b;b=cljs.core.symbol.call(null,[cljs.core.subs.call(null,cljs.core.name.call(null, +a),0,cljs.core.name.call(null,a).length-1),"__",cljs.core.str.cljs$core$IFn$_invoke$arity$1(cljs.tools.reader.impl.utils.next_id.call(null)),"__auto__"].join(""));cljs.tools.reader.gensym_env=cljs.core.assoc.call(null,cljs.tools.reader.gensym_env,a,b);return b}; +cljs.tools.reader.add_meta=function(a,b){return null!=a&&(a.cljs$lang$protocol_mask$partition0$&262144||cljs.core.PROTOCOL_SENTINEL===a.cljs$core$IWithMeta$)&&cljs.core.seq.call(null,cljs.core.dissoc.call(null,cljs.core.meta.call(null,a),new cljs.core.Keyword(null,"line","line",212345235),new cljs.core.Keyword(null,"column","column",2078222095),new cljs.core.Keyword(null,"end-line","end-line",1837326455),new cljs.core.Keyword(null,"end-column","end-column",1425389514),new cljs.core.Keyword(null,"file", +"file",-1269645878),new cljs.core.Keyword(null,"source","source",-433931539)))?new cljs.core.List(null,new cljs.core.Symbol("cljs.core","with-meta","cljs.core/with-meta",749126446,null),new cljs.core.List(null,b,new cljs.core.List(null,cljs.tools.reader.syntax_quote_STAR_.call(null,cljs.core.meta.call(null,a)),null,1,null),2,null),3,null):b}; +cljs.tools.reader.syntax_quote_coll=function(a,b){b=new cljs.core.List(null,new cljs.core.Symbol("cljs.core","sequence","cljs.core/sequence",1908459032,null),new cljs.core.List(null,cljs.core.cons.call(null,new cljs.core.Symbol("cljs.core","concat","cljs.core/concat",-1133584918,null),cljs.tools.reader.expand_list.call(null,b)),null,1,null),2,null);return cljs.core.truth_(a)?new cljs.core.List(null,new cljs.core.Symbol("cljs.core","apply","cljs.core/apply",1757277831,null),new cljs.core.List(null, +a,new cljs.core.List(null,b,null,1,null),2,null),3,null):b};cljs.tools.reader.map_func=function(a){return 16<=cljs.core.count.call(null,a)?new cljs.core.Symbol("cljs.core","hash-map","cljs.core/hash-map",303385767,null):new cljs.core.Symbol("cljs.core","array-map","cljs.core/array-map",-1519210683,null)};cljs.tools.reader.bool_QMARK_=function(a){return a instanceof Boolean||!0===a||!1===a}; +cljs.tools.reader.resolve_symbol=function(a){throw cljs.core.ex_info.call(null,"resolve-symbol is not implemented",new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"sym","sym",-1444860305),a],null));}; +cljs.tools.reader.syntax_quote_STAR_=function(a){return cljs.tools.reader.add_meta.call(null,a,cljs.core.special_symbol_QMARK_.call(null,a)?new cljs.core.List(null,new cljs.core.Symbol(null,"quote","quote",1377916282,null),new cljs.core.List(null,a,null,1,null),2,null):a instanceof cljs.core.Symbol?new cljs.core.List(null,new cljs.core.Symbol(null,"quote","quote",1377916282,null),new cljs.core.List(null,cljs.core.not.call(null,cljs.core.namespace.call(null,a))&&goog.string.endsWith(cljs.core.name.call(null, +a),"#")?cljs.tools.reader.register_gensym.call(null,a):function(){var b=cljs.core.str.cljs$core$IFn$_invoke$arity$1(a);return goog.string.endsWith(b,".")?(b=cljs.core.symbol.call(null,cljs.core.subs.call(null,b,0,b.length-1)),cljs.core.symbol.call(null,[cljs.core.str.cljs$core$IFn$_invoke$arity$1(cljs.tools.reader.resolve_symbol.call(null,b)),"."].join(""))):cljs.tools.reader.resolve_symbol.call(null,a)}(),null,1,null),2,null):cljs.tools.reader.unquote_QMARK_.call(null,a)?cljs.core.second.call(null, +a):cljs.tools.reader.unquote_splicing_QMARK_.call(null,a)?function(){throw cljs.core.ex_info.call(null,"unquote-splice not in list",new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"illegal-state","illegal-state",-1519851182)],null));}():cljs.core.coll_QMARK_.call(null,a)?null!=a&&(a.cljs$lang$protocol_mask$partition0$&67108864||cljs.core.PROTOCOL_SENTINEL===a.cljs$core$IRecord$)?a:cljs.core.map_QMARK_.call(null,a)?cljs.tools.reader.syntax_quote_coll.call(null, +cljs.tools.reader.map_func.call(null,a),cljs.tools.reader.flatten_map.call(null,a)):cljs.core.vector_QMARK_.call(null,a)?new cljs.core.List(null,new cljs.core.Symbol("cljs.core","vec","cljs.core/vec",307622519,null),new cljs.core.List(null,cljs.tools.reader.syntax_quote_coll.call(null,null,a),null,1,null),2,null):cljs.core.set_QMARK_.call(null,a)?cljs.tools.reader.syntax_quote_coll.call(null,new cljs.core.Symbol("cljs.core","hash-set","cljs.core/hash-set",1130426749,null),a):cljs.core.seq_QMARK_.call(null, +a)||cljs.core.list_QMARK_.call(null,a)?function(){var b=cljs.core.seq.call(null,a);return b?cljs.tools.reader.syntax_quote_coll.call(null,null,b):cljs.core.list(new cljs.core.Symbol("cljs.core","list","cljs.core/list",-1331406371,null))}():function(){throw cljs.core.ex_info.call(null,"Unknown Collection type",new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"unsupported-operation","unsupported-operation",1890540953)],null));}(): +a instanceof cljs.core.Keyword||"number"===typeof a||"string"===typeof a||null==a||cljs.tools.reader.bool_QMARK_.call(null,a)||a instanceof RegExp?a:new cljs.core.List(null,new cljs.core.Symbol(null,"quote","quote",1377916282,null),new cljs.core.List(null,a,null,1,null),2,null))}; +cljs.tools.reader.read_syntax_quote=function(a,b,c,d){b=cljs.tools.reader.gensym_env;cljs.tools.reader.gensym_env=cljs.core.PersistentArrayMap.EMPTY;try{return cljs.tools.reader.syntax_quote_STAR_.call(null,cljs.tools.reader.read_STAR_.call(null,a,!0,null,c,d))}finally{cljs.tools.reader.gensym_env=b}}; +cljs.tools.reader.read_namespaced_map=function(a,b,c,d){var e=cljs.tools.reader.read_token.call(null,a,new cljs.core.Keyword(null,"namespaced-map","namespaced-map",1235665380),cljs.tools.reader.reader_types.read_char.call(null,a));b=cljs.core._EQ_.call(null,e,":")?cljs.core.ns_name.call(null,cljs.core._STAR_ns_STAR_):cljs.core._EQ_.call(null,":",cljs.core.first.call(null,e))?function(){var a=null==e?null:cljs.core.subs.call(null,e,1);a=null==a?null:cljs.tools.reader.impl.commons.parse_symbol.call(null, +a);a=null==a?null:cljs.tools.reader.impl.utils.second_SINGLEQUOTE_.call(null,a);a=null==a?null:cljs.core.symbol.call(null,a);return null==a?null:cljs.tools.reader.resolve_ns.call(null,a)}():function(){var a=null==e?null:cljs.tools.reader.impl.commons.parse_symbol.call(null,e);return null==a?null:cljs.tools.reader.impl.utils.second_SINGLEQUOTE_.call(null,a)}();return cljs.core.truth_(b)?"{"===cljs.tools.reader.impl.commons.read_past.call(null,cljs.tools.reader.impl.utils.whitespace_QMARK_,a)?(c=cljs.tools.reader.read_delimited.call(null, +new cljs.core.Keyword(null,"namespaced-map","namespaced-map",1235665380),"}",a,c,d),cljs.core.odd_QMARK_.call(null,cljs.core.count.call(null,c))&&cljs.tools.reader.impl.errors.throw_odd_map.call(null,a,null,null,c),d=cljs.tools.reader.impl.utils.namespace_keys.call(null,cljs.core.str.cljs$core$IFn$_invoke$arity$1(b),cljs.core.take_nth.call(null,2,c)),c=cljs.core.take_nth.call(null,2,cljs.core.rest.call(null,c)),cljs.core._EQ_.call(null,cljs.core.count.call(null,cljs.core.set.call(null,d)),cljs.core.count.call(null, +d))||cljs.tools.reader.impl.errors.throw_dup_keys.call(null,a,new cljs.core.Keyword(null,"namespaced-map","namespaced-map",1235665380),d),cljs.core.zipmap.call(null,d,c)):cljs.tools.reader.impl.errors.throw_ns_map_no_map.call(null,a,e):cljs.tools.reader.impl.errors.throw_bad_ns.call(null,a,e)}; +cljs.tools.reader.macros=function(a){switch(a){case '"':return cljs.tools.reader.read_string_STAR_;case ":":return cljs.tools.reader.read_keyword;case ";":return cljs.tools.reader.impl.commons.read_comment;case "'":return cljs.tools.reader.wrapping_reader.call(null,new cljs.core.Symbol(null,"quote","quote",1377916282,null));case "@":return cljs.tools.reader.wrapping_reader.call(null,new cljs.core.Symbol("clojure.core","deref","clojure.core/deref",188719157,null));case "^":return cljs.tools.reader.read_meta; +case "`":return cljs.tools.reader.read_syntax_quote;case "~":return cljs.tools.reader.read_unquote;case "(":return cljs.tools.reader.read_list;case ")":return cljs.tools.reader.read_unmatched_delimiter;case "[":return cljs.tools.reader.read_vector;case "]":return cljs.tools.reader.read_unmatched_delimiter;case "{":return cljs.tools.reader.read_map;case "}":return cljs.tools.reader.read_unmatched_delimiter;case "\\":return cljs.tools.reader.read_char_STAR_;case "%":return cljs.tools.reader.read_arg; +case "#":return cljs.tools.reader.read_dispatch;default:return null}}; +cljs.tools.reader.dispatch_macros=function(a){switch(a){case "^":return cljs.tools.reader.read_meta;case "'":return cljs.tools.reader.wrapping_reader.call(null,new cljs.core.Symbol(null,"var","var",870848730,null));case "(":return cljs.tools.reader.read_fn;case "{":return cljs.tools.reader.read_set;case "\x3c":return cljs.tools.reader.impl.commons.throwing_reader.call(null,"Unreadable form");case "\x3d":return cljs.tools.reader.impl.commons.throwing_reader.call(null,"read-eval not supported");case '"':return cljs.tools.reader.read_regex; +case "!":return cljs.tools.reader.impl.commons.read_comment;case "_":return cljs.tools.reader.read_discard;case "?":return cljs.tools.reader.read_cond;case ":":return cljs.tools.reader.read_namespaced_map;case "#":return cljs.tools.reader.read_symbolic_value;default:return null}}; +cljs.tools.reader.read_tagged=function(a,b,c,d){b=cljs.tools.reader.read_STAR_.call(null,a,!0,null,c,d);b instanceof cljs.core.Symbol||cljs.tools.reader.impl.errors.throw_bad_reader_tag.call(null,a,b);if(cljs.core.truth_(cljs.tools.reader._STAR_suppress_read_STAR_))return cljs.core.tagged_literal.call(null,b,cljs.tools.reader.read_STAR_.call(null,a,!0,null,c,d));var e=cljs.tools.reader._STAR_data_readers_STAR_.call(null,b);e=cljs.core.truth_(e)?e:cljs.tools.reader.default_data_readers.call(null,b); +if(cljs.core.truth_(e))return e.call(null,cljs.tools.reader.read_STAR_.call(null,a,!0,null,c,d));e=cljs.tools.reader._STAR_default_data_reader_fn_STAR_;return cljs.core.truth_(e)?e.call(null,b,cljs.tools.reader.read_STAR_.call(null,a,!0,null,c,d)):cljs.tools.reader.impl.errors.throw_unknown_reader_tag.call(null,a,b)};cljs.tools.reader._STAR_data_readers_STAR_=cljs.core.PersistentArrayMap.EMPTY;cljs.tools.reader._STAR_default_data_reader_fn_STAR_=null;cljs.tools.reader._STAR_suppress_read_STAR_=!1; +cljs.tools.reader.default_data_readers=cljs.core.PersistentArrayMap.EMPTY; +cljs.tools.reader.read_STAR__internal=function(a,b,c,d,e,f){for(;;){if(cljs.tools.reader.reader_types.source_logging_reader_QMARK_.call(null,a)&&!cljs.tools.reader.impl.utils.whitespace_QMARK_.call(null,cljs.tools.reader.reader_types.peek_char.call(null,a)))return cljs.tools.reader.reader_types.log_source_STAR_.call(null,a,function(){for(;;)if(goog.array.isEmpty(f)){var g=cljs.tools.reader.reader_types.read_char.call(null,a);if(!cljs.tools.reader.impl.utils.whitespace_QMARK_.call(null,g)){if(null== +g)return b?cljs.tools.reader.impl.errors.throw_eof_error.call(null,a,null):c;if(g===d)return cljs.tools.reader.READ_FINISHED;if(cljs.tools.reader.impl.commons.number_literal_QMARK_.call(null,a,g))return cljs.tools.reader.read_number.call(null,a,g);var h=cljs.tools.reader.macros.call(null,g);if(null!=h){if(g=h.call(null,a,g,e,f),g!==a)return g}else return cljs.tools.reader.read_symbol.call(null,a,g)}}else return g=f[0],goog.array.removeAt(f,0),g});if(goog.array.isEmpty(f)){var g=cljs.tools.reader.reader_types.read_char.call(null, +a);if(!cljs.tools.reader.impl.utils.whitespace_QMARK_.call(null,g)){if(null==g)return b?cljs.tools.reader.impl.errors.throw_eof_error.call(null,a,null):c;if(g===d)return cljs.tools.reader.READ_FINISHED;if(cljs.tools.reader.impl.commons.number_literal_QMARK_.call(null,a,g))return cljs.tools.reader.read_number.call(null,a,g);var h=cljs.tools.reader.macros.call(null,g);if(null!=h){if(g=h.call(null,a,g,e,f),g!==a)return g}else return cljs.tools.reader.read_symbol.call(null,a,g)}}else return g=f[0],goog.array.removeAt(f, +0),g}};cljs.tools.reader.read_STAR_=function(a){switch(arguments.length){case 5:return cljs.tools.reader.read_STAR_.cljs$core$IFn$_invoke$arity$5(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4]);case 6:return cljs.tools.reader.read_STAR_.cljs$core$IFn$_invoke$arity$6(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4],arguments[5]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.tools.reader.read_STAR_.cljs$core$IFn$_invoke$arity$5=function(a,b,c,d,e){return cljs.tools.reader.read_STAR_.call(null,a,b,c,null,d,e)}; +cljs.tools.reader.read_STAR_.cljs$core$IFn$_invoke$arity$6=function(a,b,c,d,e,f){try{return cljs.tools.reader.read_STAR__internal.call(null,a,b,c,d,e,f)}catch(g){if(g instanceof Error){b=g;if(cljs.tools.reader.impl.utils.ex_info_QMARK_.call(null,b)){c=cljs.core.ex_data.call(null,b);if(cljs.core._EQ_.call(null,new cljs.core.Keyword(null,"reader-exception","reader-exception",-1938323098),(new cljs.core.Keyword(null,"type","type",1174270348)).cljs$core$IFn$_invoke$arity$1(c)))throw b;throw cljs.core.ex_info.call(null, +b.message,cljs.core.merge.call(null,new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"reader-exception","reader-exception",-1938323098)],null),c,cljs.tools.reader.reader_types.indexing_reader_QMARK_.call(null,a)?new cljs.core.PersistentArrayMap(null,3,[new cljs.core.Keyword(null,"line","line",212345235),cljs.tools.reader.reader_types.get_line_number.call(null,a),new cljs.core.Keyword(null,"column","column",2078222095),cljs.tools.reader.reader_types.get_column_number.call(null, +a),new cljs.core.Keyword(null,"file","file",-1269645878),cljs.tools.reader.reader_types.get_file_name.call(null,a)],null):null),b);}throw cljs.core.ex_info.call(null,b.message,cljs.core.merge.call(null,new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"reader-exception","reader-exception",-1938323098)],null),cljs.tools.reader.reader_types.indexing_reader_QMARK_.call(null,a)?new cljs.core.PersistentArrayMap(null,3,[new cljs.core.Keyword(null, +"line","line",212345235),cljs.tools.reader.reader_types.get_line_number.call(null,a),new cljs.core.Keyword(null,"column","column",2078222095),cljs.tools.reader.reader_types.get_column_number.call(null,a),new cljs.core.Keyword(null,"file","file",-1269645878),cljs.tools.reader.reader_types.get_file_name.call(null,a)],null):null),b);}throw g;}};cljs.tools.reader.read_STAR_.cljs$lang$maxFixedArity=6; +cljs.tools.reader.read=function(a){switch(arguments.length){case 1:return cljs.tools.reader.read.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.tools.reader.read.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return cljs.tools.reader.read.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.tools.reader.read.cljs$core$IFn$_invoke$arity$1=function(a){return cljs.tools.reader.read.call(null,a,!0,null)}; +cljs.tools.reader.read.cljs$core$IFn$_invoke$arity$2=function(a,b){a=null!=a&&(a.cljs$lang$protocol_mask$partition0$&64||cljs.core.PROTOCOL_SENTINEL===a.cljs$core$ISeq$)?cljs.core.apply.call(null,cljs.core.hash_map,a):a;var c=cljs.core.get.call(null,a,new cljs.core.Keyword(null,"eof","eof",-489063237),new cljs.core.Keyword(null,"eofthrow","eofthrow",-334166531));return cljs.tools.reader.read_STAR_.call(null,b,cljs.core._EQ_.call(null,c,new cljs.core.Keyword(null,"eofthrow","eofthrow",-334166531)), +c,null,a,cljs.core.to_array.call(null,cljs.core.PersistentVector.EMPTY))};cljs.tools.reader.read.cljs$core$IFn$_invoke$arity$3=function(a,b,c){return cljs.tools.reader.read_STAR_.call(null,a,b,c,null,cljs.core.PersistentArrayMap.EMPTY,cljs.core.to_array.call(null,cljs.core.PersistentVector.EMPTY))};cljs.tools.reader.read.cljs$lang$maxFixedArity=3; +cljs.tools.reader.read_string=function(a){switch(arguments.length){case 1:return cljs.tools.reader.read_string.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.tools.reader.read_string.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.tools.reader.read_string.cljs$core$IFn$_invoke$arity$1=function(a){return cljs.tools.reader.read_string.call(null,cljs.core.PersistentArrayMap.EMPTY,a)};cljs.tools.reader.read_string.cljs$core$IFn$_invoke$arity$2=function(a,b){return cljs.core.truth_(cljs.core.truth_(b)?""!==b:b)?cljs.tools.reader.read.call(null,a,cljs.tools.reader.reader_types.string_push_back_reader.call(null,b)):null};cljs.tools.reader.read_string.cljs$lang$maxFixedArity=2; +cljs.tools.reader.read_PLUS_string=function(a){switch(arguments.length){case 1:return cljs.tools.reader.read_PLUS_string.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 3:return cljs.tools.reader.read_PLUS_string.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);case 2:return cljs.tools.reader.read_PLUS_string.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join("")); +}};cljs.tools.reader.read_PLUS_string.cljs$core$IFn$_invoke$arity$1=function(a){return cljs.tools.reader.read_PLUS_string.call(null,a,!0,null)}; +cljs.tools.reader.read_PLUS_string.cljs$core$IFn$_invoke$arity$3=function(a,b,c){var d=function(b){return cljs.core.str.cljs$core$IFn$_invoke$arity$1((new cljs.core.Keyword(null,"buffer","buffer",617295198)).cljs$core$IFn$_invoke$arity$1(cljs.core.deref.call(null,a.frames)))},e=d.call(null,a).length,f=cljs.tools.reader.reader_types.source_logging_reader_QMARK_.call(null,a)&&!cljs.tools.reader.impl.utils.whitespace_QMARK_.call(null,cljs.tools.reader.reader_types.peek_char.call(null,a))?cljs.tools.reader.reader_types.log_source_STAR_.call(null, +a,function(){return cljs.tools.reader.read.call(null,a,b,c)}):cljs.tools.reader.read.call(null,a,b,c);d=cljs.core.subs.call(null,d.call(null,a),e).trim();return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[f,d],null)}; +cljs.tools.reader.read_PLUS_string.cljs$core$IFn$_invoke$arity$2=function(a,b){var c=function(a){return cljs.core.str.cljs$core$IFn$_invoke$arity$1((new cljs.core.Keyword(null,"buffer","buffer",617295198)).cljs$core$IFn$_invoke$arity$1(cljs.core.deref.call(null,b.frames)))},d=c.call(null,b).length,e=cljs.tools.reader.reader_types.source_logging_reader_QMARK_.call(null,b)&&!cljs.tools.reader.impl.utils.whitespace_QMARK_.call(null,cljs.tools.reader.reader_types.peek_char.call(null,b))?cljs.tools.reader.reader_types.log_source_STAR_.call(null, +b,function(){return cljs.tools.reader.read.call(null,a,b)}):cljs.tools.reader.read.call(null,a,b);c=cljs.core.subs.call(null,c.call(null,b),d).trim();return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[e,c],null)};cljs.tools.reader.read_PLUS_string.cljs$lang$maxFixedArity=3;cljs.tools.reader.edn={};cljs.tools.reader.edn.macro_terminating_QMARK_=function(a){return"#"!==a?"'"!==a?":"!==a?cljs.tools.reader.edn.macros.call(null,a):!1:!1:!1};cljs.tools.reader.edn.not_constituent_QMARK_=function(a){return"@"===a||"`"===a||"~"===a}; +cljs.tools.reader.edn.read_token=function(a){switch(arguments.length){case 3:return cljs.tools.reader.edn.read_token.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);case 4:return cljs.tools.reader.edn.read_token.cljs$core$IFn$_invoke$arity$4(arguments[0],arguments[1],arguments[2],arguments[3]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.tools.reader.edn.read_token.cljs$core$IFn$_invoke$arity$3=function(a,b,c){return cljs.tools.reader.edn.read_token.call(null,a,b,c,!0)}; +cljs.tools.reader.edn.read_token.cljs$core$IFn$_invoke$arity$4=function(a,b,c,d){if(cljs.core.not.call(null,c))return cljs.tools.reader.impl.errors.throw_eof_at_start.call(null,a,b);if(cljs.core.truth_(cljs.core.truth_(d)?cljs.tools.reader.edn.not_constituent_QMARK_.call(null,c):d))return cljs.tools.reader.impl.errors.throw_bad_char.call(null,a,b,c);d=new goog.string.StringBuffer;for(cljs.tools.reader.reader_types.unread.call(null,a,c);;){if(cljs.tools.reader.impl.utils.whitespace_QMARK_.call(null, +c)||cljs.tools.reader.edn.macro_terminating_QMARK_.call(null,c)||null==c)return cljs.core.str.cljs$core$IFn$_invoke$arity$1(d);if(cljs.tools.reader.edn.not_constituent_QMARK_.call(null,c))return cljs.tools.reader.impl.errors.throw_bad_char.call(null,a,b,c);d.append(cljs.tools.reader.reader_types.read_char.call(null,a));c=cljs.tools.reader.reader_types.peek_char.call(null,a)}};cljs.tools.reader.edn.read_token.cljs$lang$maxFixedArity=4; +cljs.tools.reader.edn.read_dispatch=function(a,b,c){var d=cljs.tools.reader.reader_types.read_char.call(null,a);if(cljs.core.truth_(d)){b=cljs.tools.reader.edn.dispatch_macros.call(null,d);if(cljs.core.truth_(b))return b.call(null,a,d,c);c=cljs.tools.reader.edn.read_tagged.call(null,function(){cljs.tools.reader.reader_types.unread.call(null,a,d);return a}(),d,c);return cljs.core.truth_(c)?c:cljs.tools.reader.impl.errors.throw_no_dispatch.call(null,a,d)}return cljs.tools.reader.impl.errors.throw_eof_at_dispatch.call(null, +a)};cljs.tools.reader.edn.read_unmatched_delimiter=function(a,b,c){return cljs.tools.reader.impl.errors.throw_unmatch_delimiter.call(null,a,b)}; +cljs.tools.reader.edn.read_unicode_char=function(a){switch(arguments.length){case 4:return cljs.tools.reader.edn.read_unicode_char.cljs$core$IFn$_invoke$arity$4(arguments[0],arguments[1],arguments[2],arguments[3]);case 5:return cljs.tools.reader.edn.read_unicode_char.cljs$core$IFn$_invoke$arity$5(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.tools.reader.edn.read_unicode_char.cljs$core$IFn$_invoke$arity$4=function(a,b,c,d){c=b+c;cljs.core.count.call(null,a)!==c&&cljs.tools.reader.impl.errors.throw_invalid_unicode_literal.call(null,null,a);for(var e=0;;){if(b===c)return String.fromCharCode(e);var f=cljs.tools.reader.impl.utils.char_code.call(null,cljs.core.nth.call(null,a,b),d);if(-1===f)return cljs.tools.reader.impl.errors.throw_invalid_unicode_digit_in_token.call(null,null,cljs.core.nth.call(null,a,b),a);e=f+e*d;b+=1}}; +cljs.tools.reader.edn.read_unicode_char.cljs$core$IFn$_invoke$arity$5=function(a,b,c,d,e){for(var f=1,g=cljs.tools.reader.impl.utils.char_code.call(null,b,c);;){if(-1===g)return cljs.tools.reader.impl.errors.throw_invalid_unicode_digit.call(null,a,b);if(f!==d){var h=cljs.tools.reader.reader_types.peek_char.call(null,a);if(cljs.core.truth_(function(){var a=cljs.tools.reader.impl.utils.whitespace_QMARK_.call(null,h);if(a)return a;a=cljs.tools.reader.edn.macros.call(null,h);return cljs.core.truth_(a)? +a:null==h}()))return cljs.core.truth_(e)?cljs.tools.reader.impl.errors.throw_invalid_unicode_len.call(null,a,f,d):String.fromCharCode(g);var k=cljs.tools.reader.impl.utils.char_code.call(null,h,c);cljs.tools.reader.reader_types.read_char.call(null,a);if(-1===k)return cljs.tools.reader.impl.errors.throw_invalid_unicode_digit.call(null,a,h);g=k+g*c;f+=1}else return String.fromCharCode(g)}};cljs.tools.reader.edn.read_unicode_char.cljs$lang$maxFixedArity=5;cljs.tools.reader.edn.upper_limit=55295; +cljs.tools.reader.edn.lower_limit=57344; +cljs.tools.reader.edn.read_char_STAR_=function(a,b,c){b=cljs.tools.reader.reader_types.read_char.call(null,a);if(null!=b){b=cljs.tools.reader.edn.macro_terminating_QMARK_.call(null,b)||cljs.tools.reader.edn.not_constituent_QMARK_.call(null,b)||cljs.tools.reader.impl.utils.whitespace_QMARK_.call(null,b)?cljs.core.str.cljs$core$IFn$_invoke$arity$1(b):cljs.tools.reader.edn.read_token.call(null,a,new cljs.core.Keyword(null,"character","character",380652989),b,!1);c=b.length;if(1===c)return cljs.core.nth.call(null, +b,0);if("newline"===b)return"\n";if("space"===b)return" ";if("tab"===b)return"\t";if("backspace"===b)return"\b";if("formfeed"===b)return"\f";if("return"===b)return"\r";if(goog.string.startsWith(b,"u"))return b=cljs.tools.reader.edn.read_unicode_char.call(null,b,1,4,16),c=b.charCodeAt(),c>cljs.tools.reader.edn.upper_limit&&cc?a:c}(),function(){var a=cljs.core.apply.call(null,cljs.core.max,c),b=bigml.dixie.flatline.eval.max_window_width;return a -1: + try: + origin_index = origin_classes.index(class_name) new_prediction.append(prediction[origin_index]) - else: - new_prediction = 0.0 + except ValueError: + new_prediction.append(0.0) return new_prediction @@ -103,6 +104,8 @@ def get_models_weight(models_info): else: model_ids = models_info weights = None + if weights is None: + weights = [1] * len(model_ids) return model_ids, weights except KeyError: raise ValueError("Failed to find the models in the fusion info.") @@ -128,9 +131,19 @@ class Fusion(ModelFields): cache storage. """ - def __init__(self, fusion, api=None, max_models=None): + def __init__(self, fusion, api=None, max_models=None, cache_get=None, + operation_settings=None): + + if use_cache(cache_get): + # using a cache to store the model attributes + self.__dict__ = load(get_fusion_id(fusion), cache_get) + self.api = get_api_connection(api) + self.operation_settings = operation_settings + return self.resource_id = None + self.name = None + self.description = None self.models_ids = None self.objective_id = None self.distribution = None @@ -143,10 +156,18 @@ def __init__(self, fusion, api=None, max_models=None): self.api = get_api_connection(api) self.resource_id, fusion = get_resource_dict( \ - fusion, "fusion", api=self.api) + fusion, + "fusion", api=self.api) if 'object' in fusion: fusion = fusion.get('object', {}) + try: + self.name = fusion.get('name') + self.description = fusion.get('description') + except AttributeError: + raise ValueError("Failed to find the expected " + "JSON structure. Check your arguments.") + self.model_ids, self.weights = get_models_weight( \ fusion['models']) model_types = [get_resource_type(model) for model in self.model_ids] @@ -154,7 +175,7 @@ def __init__(self, fusion, api=None, max_models=None): for model_type in model_types: if model_type not in LOCAL_SUPERVISED: raise ValueError("The resource %s has not an allowed" - " supervised model type.") + " supervised model type." % model_type) self.importance = fusion.get('importance', []) self.missing_numerics = fusion.get('missing_numerics', True) if fusion.get('fusion'): @@ -166,12 +187,25 @@ def __init__(self, fusion, api=None, max_models=None): number_of_models = len(self.model_ids) # Downloading the model information to cache it - if self.api.storage is not None: + if self.api.storage is not None or cache_get is not None: + # adding shared_ref to the API info when donwloading children + api = self.api + if self.resource_id.startswith("shared"): + api = deepcopy(api) + api.shared_ref = self.resource_id.replace("shared/", "") + elif hasattr(api, "shared_ref") and \ + api.shared_ref is not None: + api = deepcopy(api) + # adding the resource ID to the sharing chain + api.shared_ref += ",%s" % self.resource_id for model_id in self.model_ids: if get_resource_type(model_id) == "fusion": - Fusion(model_id, api=self.api) + Fusion(model_id, api=api, cache_get=cache_get, + operation_settings=operation_settings) else: - SupervisedModel(model_id, api=self.api) + SupervisedModel(model_id, api=api, + cache_get=cache_get, + operation_settings=operation_settings) if max_models is None: self.models_splits = [self.model_ids] @@ -180,71 +214,28 @@ def __init__(self, fusion, api=None, max_models=None): for index in range(0, number_of_models, max_models)] - if self.fields: - summary = self.fields[self.objective_id]['summary'] - if 'bins' in summary: - distribution = summary['bins'] - elif 'counts' in summary: - distribution = summary['counts'] - elif 'categories' in summary: - distribution = summary['categories'] - else: - distribution = [] - self.distribution = distribution - - self.regression = \ - self.fields[self.objective_id].get('optype') == 'numeric' - - if not self.regression: - objective_field = self.fields[self.objective_id] - categories = objective_field['summary']['categories'] - classes = [category[0] for category in categories] - self.class_names = sorted(classes) - self.objective_categories = [category for \ - category, _ in self.fields[self.objective_id][ \ - "summary"]["categories"]] ModelFields.__init__( \ self, self.fields, objective_id=self.objective_id) - def get_fusion_resource(self, fusion): - """Extracts the fusion resource info. The fusion argument can be - - a path to a local file - - an fusion id - """ - # the string can be a path to a JSON file - if isinstance(fusion, basestring): - try: - path = os.path.dirname(os.path.abspath(fusion)) - with open(fusion) as fusion_file: - fusion = json.load(fusion_file) - self.resource_id = get_fusion_id(fusion) - if self.resource_id is None: - raise ValueError("The JSON file does not seem" - " to contain a valid BigML fusion" - " representation.") - else: - self.api = BigML(storage=path) - except IOError: - # if it is not a path, it can be an fusion id - self.resource_id = get_fusion_id(fusion) - if self.resource_id is None: - if fusion.find('fusion/') > -1: - raise Exception( - self.api.error_message(fusion, - resource_type='fusion', - method='get')) - else: - raise IOError("Failed to open the expected JSON file" - " at %s" % fusion) - except ValueError: - raise ValueError("Failed to interpret %s." - " JSON file expected.") - if not isinstance(fusion, dict): - fusion = retrieve_resource(self.api, self.resource_id, - no_check_fields=False) - return fusion + add_distribution(self) + summary = self.fields[self.objective_id]['summary'] + if 'bins' in summary: + distribution = summary['bins'] + elif 'counts' in summary: + distribution = summary['counts'] + elif 'categories' in summary: + distribution = summary['categories'] + self.objective_categories = [ + category for category, _ in distribution] + self.class_names = sorted( + self.objective_categories) + else: + distribution = [] + self.distribution = distribution + self.regression = \ + self.fields[self.objective_id].get('optype') == NUMERIC def list_models(self): """Lists all the model/ids that compound the fusion. @@ -260,7 +251,7 @@ def predict_probability(self, input_data, each possible output class, based on input values. The input fields must be a dictionary keyed by field name or field ID. - For regressions, the output is a single element list + For regressions, the output is a single element containing the prediction. :param input_data: Input data to be predicted @@ -276,21 +267,23 @@ def predict_probability(self, input_data, if not self.missing_numerics: check_no_missing_numerics(input_data, self.model_fields) + weights = [] for models_split in self.models_splits: models = [] for model in models_split: - if get_resource_type(model) == "fusion": + model_type = get_resource_type(model) + if model_type == "fusion": models.append(Fusion(model, api=self.api)) else: models.append(SupervisedModel(model, api=self.api)) votes_split = [] for model in models: try: + kwargs = {"compact": True} + if model_type in ["model", "ensemble", "fusion"]: + kwargs.update({"missing_strategy": missing_strategy}) prediction = model.predict_probability( \ - input_data, - missing_strategy=missing_strategy, - compact=True) - + input_data, **kwargs) except ValueError: # logistic regressions can raise this error if they # have missing_numerics=False and some numeric missings @@ -298,38 +291,34 @@ def predict_probability(self, input_data, continue if self.regression: prediction = prediction[0] - if self.weights is not None: - prediction = self.weigh(prediction, model.resource_id) - else: - if self.weights is not None: - prediction = self.weigh( \ - prediction, model.resource_id) - # we need to check that all classes in the fusion - # are also in the composing model - if not self.regression and \ - self.class_names != model.class_names: - try: - prediction = rearrange_prediction( \ - model.class_names, - self.class_names, - prediction) - except AttributeError: - # class_names should be defined, but just in case - pass + weights.append(self.weights[self.model_ids.index( + model.resource_id)]) + prediction = self.weigh(prediction, model.resource_id) + # we need to check that all classes in the fusion + # are also in the composing model + if not self.regression and \ + self.class_names != model.class_names: + try: + prediction = rearrange_prediction( \ + model.class_names, + self.class_names, + prediction) + except AttributeError: + # class_names should be defined, but just in case + pass votes_split.append(prediction) - - votes.extend(votes_split) if self.regression: - total_weight = len(votes.predictions) if self.weights is None \ - else sum(self.weights) - prediction = sum([prediction for prediction in \ - votes.predictions]) / float(total_weight) + prediction = 0 + total_weight = sum(weights) + for index, pred in enumerate(votes.predictions): + prediction += pred # the weight is already considered in pred + if total_weight > 0: + prediction /= float(total_weight) if compact: output = [prediction] else: output = {"prediction": prediction} - else: output = votes.combine_to_distribution(normalize=True) if not compact: @@ -340,6 +329,98 @@ def predict_probability(self, input_data, return output + def predict_confidence(self, input_data, + missing_strategy=LAST_PREDICTION, + compact=False): + + """For classification models, Predicts a confidence for + each possible output class, based on input values. The input + fields must be a dictionary keyed by field name or field ID. + + For regressions, the output is a single element + containing the prediction and the associated confidence. + + WARNING: Only decision-tree based models in the Fusion object will + have an associated confidence, so the result for fusions that don't + contain such models can be None. + + :param input_data: Input data to be predicted + :param missing_strategy: LAST_PREDICTION|PROPORTIONAL missing strategy + for missing fields + :param compact: If False, prediction is returned as a list of maps, one + per class, with the keys "prediction" and "confidence" + mapped to the name of the class and it's confidence, + respectively. If True, returns a list of confidences + ordered by the sorted order of the class names. + """ + if not self.missing_numerics: + check_no_missing_numerics(input_data, self.model_fields) + + predictions = [] + weights = [] + for models_split in self.models_splits: + models = [] + for model in models_split: + model_type = get_resource_type(model) + if model_type == "fusion": + models.append(Fusion(model, api=self.api)) + else: + models.append(SupervisedModel(model, api=self.api)) + votes_split = [] + for model in models: + try: + kwargs = {"compact": False} + if model_type in ["model", "ensemble", "fusion"]: + kwargs.update({"missing_strategy": missing_strategy}) + prediction = model.predict_confidence( \ + input_data, **kwargs) + except Exception as exc: + # logistic regressions can raise this error if they + # have missing_numerics=False and some numeric missings + # are found and Linear Regressions have no confidence + continue + predictions.append(prediction) + weights.append(self.weights[self.model_ids.index( + model.resource_id)]) + if self.regression: + prediction = prediction["prediction"] + if self.regression: + prediction = 0 + confidence = 0 + total_weight = sum(weights) + for index, pred in enumerate(predictions): + prediction += pred.get("prediction") * weights[index] + confidence += pred.get("confidence") + if total_weight > 0: + prediction /= float(total_weight) + confidence /= float(len(predictions)) + if compact: + output = [prediction, confidence] + else: + output = {"prediction": prediction, "confidence": confidence} + else: + output = self._combine_confidences(predictions) + if not compact: + output = [{'category': class_name, + 'confidence': confidence} + for class_name, confidence in + zip(self.class_names, output)] + return output + + def _combine_confidences(self, predictions): + """Combining the confidences per class of classification models""" + output = [] + count = float(len(predictions)) + for class_name in self.class_names: + confidence = 0 + for prediction in predictions: + for category_info in prediction: + if category_info["category"] == class_name: + confidence += category_info.get("confidence") + break + output.append(round(confidence / count, DECIMALS)) + return output + def weigh(self, prediction, model_id): """Weighs the prediction according to the weight associated to the current model in the fusion. @@ -367,13 +448,16 @@ def predict(self, input_data, missing_strategy=LAST_PREDICTION, operating point can be defined in terms of: - the positive_class, the class that is important to predict accurately - - the probability_threshold, - the probability that is stablished + - the threshold, + the value that is stablished as minimum for the positive_class to be predicted. + - the kind of measure used to set a threshold: + probability or confidence (if available) The operating_point is then defined as a map with two attributes, e.g.: {"positive_class": "Iris-setosa", - "probability_threshold": 0.5} + "threshold": 0.5, + "kind": "probability"} full: Boolean that controls whether to include the prediction's attributes. By default, only the prediction is produced. If set to True, the rest of available information is added in a @@ -406,7 +490,7 @@ def predict(self, input_data, missing_strategy=LAST_PREDICTION, unused_fields=unused_fields) if full: return dict((key, value) for key, value in \ - full_prediction.iteritems() if value is not None) + full_prediction.items() if value is not None) return full_prediction['prediction'] @@ -420,6 +504,9 @@ def _predict(self, input_data, missing_strategy=LAST_PREDICTION, # When operating_point is used, we need the probabilities # of all possible classes to decide, so se use # the `predict_probability` method + if operating_point is None and self.operation_settings is not None: + operating_point = self.operation_settings.get("operating_point") + if operating_point: if self.regression: raise ValueError("The operating_point argument can only be" @@ -429,16 +516,28 @@ def _predict(self, input_data, missing_strategy=LAST_PREDICTION, missing_strategy=missing_strategy, operating_point=operating_point) return prediction - result = self.predict_probability( \ input_data, missing_strategy=missing_strategy, compact=False) + confidence_result = self.predict_confidence( \ + input_data, + missing_strategy=missing_strategy, + compact=False) if not self.regression: + try: + for index, value in enumerate(result): + result[index].update( + {"confidence": confidence_result[index]["confidence"]}) + except Exception as exc: + pass result = sorted(result, key=lambda x: - x["probability"])[0] result["prediction"] = result["category"] del result["category"] + else: + result.update( + {"confidence": confidence_result["confidence"]}) # adding unused fields, if any if unused_fields: @@ -452,10 +551,14 @@ def predict_operating(self, input_data, """Computes the prediction based on a user-given operating point. """ + if operating_point is None and self.operation_settings is not None: + operating_point = self.operation_settings.get("operating_point") + # only probability is allowed as operating kind operating_point.update({"kind": "probability"}) kind, threshold, positive_class = parse_operating_point( \ - operating_point, OPERATING_POINT_KINDS, self.class_names) + operating_point, OPERATING_POINT_KINDS, self.class_names, + self.operation_settings) predictions = self.predict_probability(input_data, missing_strategy, False) @@ -477,6 +580,7 @@ def predict_operating(self, input_data, del prediction["category"] return prediction + #pylint: disable=locally-disabled,invalid-name def _sort_predictions(self, a, b, criteria): """Sorts the categories in the predicted node according to the given criteria @@ -485,3 +589,20 @@ def _sort_predictions(self, a, b, criteria): if a[criteria] == b[criteria]: return sort_categories(a, b, self.objective_categories) return 1 if b[criteria] > a[criteria] else -1 + + def dump(self, output=None, cache_set=None): + """Uses msgpack to serialize the resource object + If cache_set is filled with a cache set method, the method is called + + """ + self_vars = vars(self) + del self_vars["api"] + dump(self_vars, output=output, cache_set=cache_set) + + def dumps(self): + """Uses msgpack to serialize the resource object to a string + + """ + self_vars = vars(self) + del self_vars["api"] + dumps(self_vars) diff --git a/bigml/out_tree/__init__.py b/bigml/generators/__init__.py similarity index 100% rename from bigml/out_tree/__init__.py rename to bigml/generators/__init__.py diff --git a/bigml/generators/boosted_tree.py b/bigml/generators/boosted_tree.py new file mode 100644 index 00000000..14bbf2be --- /dev/null +++ b/bigml/generators/boosted_tree.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Tree level output for python +This module defines functions that generate python code to make local +predictions +""" + +from bigml.tree_utils import COMPOSED_FIELDS, INDENT +from bigml.predict_utils.common import missing_branch, \ + none_value, get_node, get_predicate, mintree_split +from bigml.generators.tree_common import value_to_print, map_data, \ + missing_prefix_code, filter_nodes, split_condition_code +from bigml.util import NUMERIC + + +MISSING_OPERATOR = { + "=": "is", + "!=": "is not" +} + + +def missing_check_code(tree, offsets, fields, + field, depth, input_map, cmv): + """Builds the code to predict when the field is missing + """ + node = get_node(tree) + code = "%sif (%s is None):\n" % \ + (INDENT * depth, + map_data(fields[field]['slug'], input_map, True)) + value = value_to_print(node[offsets["output"]], NUMERIC) + code += "%sreturn {\"prediction\":%s" % (INDENT * (depth + 1), + value) + code += "}\n" + cmv.append(fields[field]['slug']) + return code + + +def boosted_plug_in_body(tree, offsets, fields, objective_id, regression, + depth=1, cmv=None, input_map=False, + ids_path=None, subtree=True): + """Translate the model into a set of "if" python statements. + `depth` controls the size of indentation. As soon as a value is missing + that node is returned without further evaluation. + """ + if cmv is None: + cmv = [] + body = "" + term_analysis_fields = [] + item_analysis_fields = [] + + + node = get_node(tree) + children = [] if node[offsets["children#"]] == 0 else \ + node[offsets["children"]] + children = filter_nodes(children, offsets, ids=ids_path, subtree=subtree) + + if children: + + # field used in the split + field = mintree_split(children) + + has_missing_branch = (missing_branch(children) or + none_value(children)) + # the missing is singled out as a special case only when there's + # no missing branch in the children list + one_branch = not has_missing_branch or \ + fields[field]['optype'] in COMPOSED_FIELDS + if (one_branch and not fields[field]['slug'] in cmv): + body += missing_check_code(tree, offsets, fields, + field, depth, input_map, cmv) + + for child in children: + [_, field, value, _, _] = get_predicate(child) + pre_condition = "" + # code when missing_splits has been used + if has_missing_branch and value is not None: + pre_condition = missing_prefix_code(child, fields, field, + input_map, cmv) + + # complete split condition code + body += split_condition_code( \ + child, fields, + depth, input_map, pre_condition, + term_analysis_fields, item_analysis_fields, cmv) + + # value to be determined in next node + next_level = boosted_plug_in_body( \ + child, offsets, fields, objective_id, regression, depth + 1, + cmv=cmv[:], input_map=input_map, ids_path=ids_path, + subtree=subtree) + + body += next_level[0] + term_analysis_fields.extend(next_level[1]) + item_analysis_fields.extend(next_level[2]) + else: + value = value_to_print(node[offsets["output"]], NUMERIC) + body = "%sreturn {\"prediction\":%s" % (INDENT * depth, value) + body += "}\n" + + return body, term_analysis_fields, item_analysis_fields diff --git a/bigml/generators/model.py b/bigml/generators/model.py new file mode 100644 index 00000000..51c65e92 --- /dev/null +++ b/bigml/generators/model.py @@ -0,0 +1,1057 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +Functions used to generate or write output from the decision tree models + +""" +import sys +import os +import math +import keyword + + +from functools import reduce, partial + +from bigml.path import Path, BRIEF +from bigml.basemodel import print_importance +from bigml.io import UnicodeWriter +from bigml.util import markdown_cleanup, prefix_as_comment, utf8, NUMERIC +from bigml.predicate import Predicate +from bigml.model import PYTHON_CONV +from bigml.predict_utils.common import missing_branch, \ + none_value, get_node, get_predicate +from bigml.predicate_utils.utils import predicate_to_rule, \ + EQ, NE, to_lisp_rule, INVERSE_OP +from bigml.tree_utils import MAX_ARGS_LENGTH, tableau_string, slugify, \ + sort_fields, TM_TOKENS, TM_ALL, TM_FULL_TERM, TERM_OPTIONS, ITEM_OPTIONS, \ + PYTHON_OPERATOR +from bigml.generators.tree import plug_in_body +from bigml.generators.boosted_tree import boosted_plug_in_body +from bigml.generators.tree import filter_nodes + + +# templates for static Python +BIGML_SCRIPT = os.path.dirname(__file__) + +TERM_TEMPLATE = "%s/static/term_analysis.txt" % BIGML_SCRIPT +ITEMS_TEMPLATE = "%s/static/items_analysis.txt" % BIGML_SCRIPT +HADOOP_CSV_TEMPLATE = "%s/static/python_hadoop_csv.txt" % \ + BIGML_SCRIPT +HADOOP_NEXT_TEMPLATE = "%s/static/python_hadoop_next.txt" % \ + BIGML_SCRIPT +HADOOP_REDUCER_TEMPLATE = "%s/static/python_hadoop_reducer.txt" % \ + BIGML_SCRIPT + +DEFAULT_IMPURITY = 0.2 + +INDENT = ' ' + +DFT_ATTR = "output" + + +MISSING_OPERATOR = { + EQ: "is", + NE: "is not" +} + +T_MISSING_OPERATOR = { + EQ: "ISNULL(", + NE: "NOT ISNULL(" +} + + +def print_distribution(distribution, out=sys.stdout): + """Prints distribution data + + """ + total = reduce(lambda x, y: x + y, + [group[1] for group in distribution]) + for group in distribution: + out.write(utf8( + " %s: %.2f%% (%d instance%s)\n" % ( + group[0], + round(group[1] * 1.0 / total, 4) * 100, + group[1], + "" if group[1] == 1 else "s"))) + + +def list_fields(model, out=sys.stdout): + """Prints descriptions of the fields for this model. + + """ + out.write(utf8('<%-32s : %s>\n' % ( + model.fields[model.objective_id]['name'], + model.fields[model.objective_id]['optype']))) + out.flush() + + if hasattr(model, "model_fields"): + fields = model.model_fields + else: + fields = model.fields + for field in [(val['name'], val['optype']) for key, val in + sort_fields(fields) + if key != model.objective_id]: + out.write(utf8('[%-32s : %s]\n' % (field[0], field[1]))) + out.flush() + return model.fields + + +def gini_impurity(distribution, count): + """Returns the gini impurity score associated to the distribution + in the node + + """ + purity = 0.0 + if distribution is None: + return None + for _, instances in distribution: + purity += math.pow(instances / float(count), 2) + return 1.0 - purity + + +def get_leaves(model, path=None, filter_function=None): + """Returns a list that includes all the leaves of the tree. + + """ + + leaves = [] + + if path is None: + path = [] + + offsets = model.offsets + + def get_tree_leaves(tree, fields, path, filter_function=None): + + leaves = [] + node = get_node(tree) + predicate = get_predicate(tree) + if isinstance(predicate, list): + [operator, field, value, term, missing] = get_predicate(tree) + path.append(to_lisp_rule(operator, field, value, term, missing, + fields[field])) + + children_number = node[offsets["children#"]] + children = [] if children_number == 0 else node[offsets["children"]] + + if children: + for child in children: + + leaves += get_tree_leaves(child, fields, + path[:], + filter_function=filter_function) + else: + leaf = { + 'id': node[offsets["id"]], + 'confidence': node[offsets["confidence"]], + 'count': node[offsets["count"]], + 'distribution': node[offsets["distribution"]], + 'impurity': gini_impurity(node[offsets["distribution"]], + node[offsets["count"]]), + 'output': node[offsets["output"]], + 'path': path} + if 'weighted_distribution' in offsets: + leaf.update( \ + {"weighted_distribution": node[offsets[ \ + "weighted_distribution"]], + "weight": node[offsets["weight"]]}) + if (not hasattr(filter_function, '__call__') + or filter_function(leaf)): + leaves += [leaf] + return leaves + return get_tree_leaves(model.tree, model.fields, path, + filter_function) + + +def impure_leaves(model, impurity_threshold=DEFAULT_IMPURITY): + """Returns a list of leaves that are impure + + """ + if model.regression or model.boosting: + raise AttributeError("This method is available for non-boosting" + " categorization models only.") + def is_impure(node, impurity_threshold=impurity_threshold): + """Returns True if the gini impurity of the node distribution + goes above the impurity threshold. + + """ + return node.get('impurity') > impurity_threshold + + is_impure = partial(is_impure, impurity_threshold=impurity_threshold) + return get_leaves(model, filter_function=is_impure) + + +def docstring(model): + """Returns the docstring describing the model. + + """ + objective_name = model.fields[model.objective_id]['name'] if \ + not model.boosting else \ + model.fields[model.boosting["objective_field"]]['name'] + docstring_cmt = ("Predictor for %s from %s\n" % ( + objective_name, + model.resource_id)) + model.description = ( + str( + markdown_cleanup(model.description).strip()) or + 'Predictive model by BigML - Machine Learning Made Easy') + docstring_cmt += "\n" + INDENT * 2 + ( + "%s" % prefix_as_comment(INDENT * 2, model.description)) + return docstring_cmt + + +def build_ids_map(tree, offsets, ids_map, parent_id=None): + """Builds a map for the tree from each node id to its parent + + """ + node = get_node(tree) + node_id = node[offsets["id"]] + ids_map[node_id] = parent_id + children_number = node[offsets["children#"]] + children = [] if children_number == 0 else node[offsets["children"]] + for child in children: + build_ids_map(child, offsets, ids_map, node_id) + + +def fill_ids_map(model): + """Filling the parent, child map + + """ + + if not (hasattr(model, "ids_map") and model.ids_map): + model.ids_map = {} + build_ids_map(model.tree, model.offsets, model.ids_map) + return model + + +def get_ids_path(model, filter_id): + """Builds the list of ids that go from a given id to the tree root + + """ + model = fill_ids_map(model) + + ids_path = [] + if filter_id is not None and model.tree[model.offsets["id"]] is not None: + if filter_id not in model.ids_map: + raise ValueError("The given id does not exist.") + ids_path = [filter_id] + last_id = filter_id + while model.ids_map[last_id] is not None: + ids_path.append(model.ids_map[last_id]) + last_id = model.ids_map[last_id] + return ids_path + + +def generate_rules(tree, offsets, objective_id, fields, + depth=0, ids_path=None, subtree=True): + """Translates a tree model into a set of IF-THEN rules. + + """ + rules_str = "" + + node = get_node(tree) + children_number = node[offsets["children#"]] + children = [] if children_number == 0 else node[offsets["children"]] + children = filter_nodes(children, offsets, ids=ids_path, + subtree=subtree) + if children: + for child in children: + predicate = get_predicate(child) + if isinstance(predicate, list): + [operator, field, value, term, missing] = predicate + child_node = get_node(child) + rules_str += ("%s IF %s %s\n" % + (INDENT * depth, + predicate_to_rule(operator, fields[field], + value, term, missing, + label='slug'), + "AND" if child_node[offsets["children#"]] > 0 + else "THEN")) + rules_str += generate_rules(child, offsets, objective_id, fields, + depth + 1, ids_path=ids_path, + subtree=subtree) + else: + rules_str += ("%s %s = %s\n" % + (INDENT * depth, + (fields[objective_id]['slug'] + if objective_id else "Prediction"), + node[offsets["output"]])) + return rules_str + + +def rules(model, out=sys.stdout, filter_id=None, subtree=True): + """Returns a IF-THEN rule set that implements the model. + + `out` is file descriptor to write the rules. + + """ + if model.boosting: + raise AttributeError("This method is not available for boosting" + " models.") + ids_path = get_ids_path(model, filter_id) + + def tree_rules(tree, offsets, objective_id, fields, + out, ids_path=None, subtree=True): + """Prints out an IF-THEN rule version of the tree. + + """ + for field in sort_fields(fields): + + slug = slugify(fields[field[0]]['name']) + fields[field[0]].update(slug=slug) + out.write(utf8(generate_rules(tree, offsets, objective_id, + fields, + ids_path=ids_path, + subtree=subtree))) + out.flush() + + return tree_rules(model.tree, model.offsets, model.objective_id, + model.fields, out, + ids_path=ids_path, subtree=subtree) + + +def python(model, out=sys.stdout, hadoop=False, + filter_id=None, subtree=True): + """Returns a basic python function that implements the model. + + `out` is file descriptor to write the python code. + + """ + if model.boosting: + raise AttributeError("This method is not available for boosting" + " models.") + ids_path = get_ids_path(model, filter_id) + if hadoop: + return (hadoop_python_mapper(model, out=out, + ids_path=ids_path, + subtree=subtree) or + hadoop_python_reducer(out=out)) + return tree_python(model.tree, model.offsets, model.fields, + model.objective_id, model.boosting, out, + docstring(model), ids_path=ids_path, subtree=subtree) + +def hadoop_python_mapper(model, out=sys.stdout, ids_path=None, + subtree=True): + """Generates a hadoop mapper header to make predictions in python + + """ + input_fields = [(value, key) for (key, value) in + sorted(list(model.inverted_fields.items()), + key=lambda x: x[1])] + parameters = [value for (key, value) in + input_fields if key != model.objective_id] + args = [] + for field in input_fields: + slug = slugify(model.fields[field[0]]['name']) + model.fields[field[0]].update(slug=slug) + if field[0] != model.objective_id: + args.append("\"" + model.fields[field[0]]['slug'] + "\"") + + with open(HADOOP_CSV_TEMPLATE) as template_handler: + output = template_handler.read() % ",".join(parameters) + + output += "\n%sself.INPUT_FIELDS = [%s]\n" % \ + ((INDENT * 3), (",\n " + INDENT * 8).join(args)) + + input_types = [] + prefixes = [] + suffixes = [] + count = 0 + fields = model.fields + for key in [field[0] for field in input_fields + if field[0] != model.objective_id]: + input_type = ('None' if not fields[key]['datatype'] in + PYTHON_CONV + else PYTHON_CONV[fields[key]['datatype']]) + input_types.append(input_type) + if 'prefix' in fields[key]: + prefixes.append("%s: %s" % (count, + repr(fields[key]['prefix']))) + if 'suffix' in fields[key]: + suffixes.append("%s: %s" % (count, + repr(fields[key]['suffix']))) + count += 1 + static_content = "%sself.INPUT_TYPES = [" % (INDENT * 3) + formatter = ",\n%s" % (" " * len(static_content)) + output += "\n%s%s%s" % (static_content, + formatter.join(input_types), + "]\n") + static_content = "%sself.PREFIXES = {" % (INDENT * 3) + formatter = ",\n%s" % (" " * len(static_content)) + output += "\n%s%s%s" % (static_content, + formatter.join(prefixes), + "}\n") + static_content = "%sself.SUFFIXES = {" % (INDENT * 3) + formatter = ",\n%s" % (" " * len(static_content)) + output += "\n%s%s%s" % (static_content, + formatter.join(suffixes), + "}\n") + + with open(HADOOP_NEXT_TEMPLATE) as template_handler: + output += template_handler.read() + + out.write(output) + out.flush() + + tree_python(model.tree, model.offsets, model.fields, model.objective_id, + False if not hasattr(model, "boosting") else model.boosting, + out, docstring(model), ids_path=ids_path, subtree=subtree) + + output = \ +""" +csv = CSVInput() +for values in csv: + if not isinstance(values, bool): + print u'%%s\\t%%s' %% (repr(values), repr(predict_%s(values))) +\n\n +""" % fields[model.objective_id]['slug'] + out.write(utf8(output)) + out.flush() + +def hadoop_python_reducer(out=sys.stdout): + """Generates a hadoop reducer to make predictions in python + + """ + + with open(HADOOP_REDUCER_TEMPLATE) as template_handler: + output = template_handler.read() + out.write(utf8(output)) + out.flush() + +def tree_python(tree, offsets, fields, objective_id, boosting, + out, docstring_str, input_map=False, + ids_path=None, subtree=True): + """Writes a python function that implements the model. + + """ + args = [] + args_tree = [] + parameters = sort_fields(fields) + if not input_map: + input_map = len(parameters) > MAX_ARGS_LENGTH + reserved_keywords = keyword.kwlist if not input_map else None + prefix = "_" if not input_map else "" + for field in parameters: + field_name_to_show = fields[field[0]]['name'].strip() + if field_name_to_show == "": + field_name_to_show = field[0] + slug = slugify(field_name_to_show, + reserved_keywords=reserved_keywords, prefix=prefix) + fields[field[0]].update(slug=slug) + if not input_map: + if field[0] != objective_id: + args.append("%s=None" % (slug)) + args_tree.append("%s=%s" % (slug, slug)) + if input_map: + args.append("data={}") + args_tree.append("data=data") + + function_name = fields[objective_id]['slug'] if \ + not boosting else fields[boosting["objective_field"]]['slug'] + if prefix == "_" and function_name[0] == prefix: + function_name = function_name[1:] + if function_name == "": + function_name = "field_" + objective_id + python_header = "# -*- coding: utf-8 -*-\n" + predictor_definition = ("def predict_%s" % + function_name) + depth = len(predictor_definition) + 1 + predictor = "%s(%s):\n" % (predictor_definition, + (",\n" + " " * depth).join(args)) + + predictor_doc = (INDENT + "\"\"\" " + docstring_str + + "\n" + INDENT + "\"\"\"\n") + body_fn = boosted_plug_in_body if boosting else plug_in_body + body, term_analysis_predicates, item_analysis_predicates = \ + body_fn(tree, offsets, fields, objective_id, + fields[objective_id]["optype"] == NUMERIC, + input_map=input_map, + ids_path=ids_path, subtree=subtree) + terms_body = "" + if term_analysis_predicates or item_analysis_predicates: + terms_body = term_analysis_body(fields, + term_analysis_predicates, + item_analysis_predicates) + predictor = python_header + predictor + \ + predictor_doc + terms_body + body + + predictor_model = "def predict" + depth = len(predictor_model) + 1 + predictor += "\n\n%s(%s):\n" % (predictor_model, + (",\n" + " " * depth).join(args)) + predictor += "%sprediction = predict_%s(%s)\n" % ( \ + INDENT, function_name, ", ".join(args_tree)) + + if boosting is not None: + predictor += "%sprediction.update({\"weight\": %s})\n" % \ + (INDENT, boosting.get("weight")) + if boosting.get("objective_class") is not None: + predictor += "%sprediction.update({\"class\": \"%s\"})\n" % \ + (INDENT, boosting.get("objective_class")) + predictor += "%sreturn prediction" % INDENT + + out.write(utf8(predictor)) + out.flush() + + +def term_analysis_body(fields, term_analysis_predicates, + item_analysis_predicates): + """ Writes auxiliary functions to handle the term and item + analysis fields + + """ + body = """ + import re +""" + # static content + + if term_analysis_predicates: + body += """ + tm_tokens = '%s' + tm_full_term = '%s' + tm_all = '%s' + +""" % (TM_TOKENS, TM_FULL_TERM, TM_ALL) + with open(TERM_TEMPLATE) as template_handler: + body += template_handler.read() + + term_analysis_options = {predicate[0] for predicate in + term_analysis_predicates} + term_analysis_predicates = set(term_analysis_predicates) + body += """ + term_analysis = {""" + for field_id in term_analysis_options: + field = fields[field_id] + body += """ + \"%s\": {""" % field['slug'] + options = sorted(field['term_analysis'].keys()) + for option in options: + if option in TERM_OPTIONS: + body += """ + \"%s\": %s,""" % (option, repr(field['term_analysis'][option])) + body += """ + },""" + body += """ + }""" + body += """ + term_forms = {""" + term_forms = {} + for field_id, term in term_analysis_predicates: + alternatives = [] + field = fields[field_id] + if field['slug'] not in term_forms: + term_forms[field['slug']] = {} + all_forms = field['summary'].get('term_forms', {}) + if all_forms: + alternatives = all_forms.get(term, []) + if alternatives: + terms = [term] + terms.extend(all_forms.get(term, [])) + term_forms[field['slug']][term] = terms + for field, field_term_forms in term_forms.items(): + body += """ + \"%s\": {""" % field + terms = sorted(field_term_forms.keys()) + for term in terms: + body += """ + \"%s\": %s,""" % (term, field_term_forms[term]) + body += """ + },""" + body += """ + } + +""" + if item_analysis_predicates: + with open(ITEMS_TEMPLATE) as template_handler: + body += template_handler.read() + + item_analysis_options = {predicate[0] for predicate in + item_analysis_predicates} + item_analysis_predicates = set(item_analysis_predicates) + body += """ + item_analysis = {""" + for field_id in item_analysis_options: + field = fields[field_id] + body += """ + \"%s\": {""" % field['slug'] + for option in field['item_analysis']: + if option in ITEM_OPTIONS: + body += """ + \"%s\": %s,""" % (option, repr(field['item_analysis'][option])) + body += """ + },""" + body += """ + } + +""" + + return body + + +def tableau(model, out=sys.stdout, hadoop=False, + filter_id=None, subtree=True, attr=DFT_ATTR): + """Returns a basic tableau function that implements the model. + + `out` is file descriptor to write the tableau code. + + """ + if model.boosting: + raise AttributeError("This method is not available for boosting" + " models.") + ids_path = get_ids_path(model, filter_id) + if hadoop: + return "Hadoop output not available." + response = tree_tableau(model.tree, model.offsets, model.fields, + model.objective_id, + out, ids_path=ids_path, + subtree=subtree, attr=attr) + if response: + out.write("END\n") + else: + out.write("\nThis function cannot be represented " + "in Tableau syntax.\n") + out.flush() + return None + + + +def tableau_body(tree, offsets, fields, objective_id, + body="", conditions=None, cmv=None, + ids_path=None, subtree=True, attr=DFT_ATTR): + """Translate the model into a set of "if" statements in Tableau syntax + + `depth` controls the size of indentation. As soon as a value is missing + that node is returned without further evaluation. + + """ + + if cmv is None: + cmv = [] + if body: + alternate = "ELSEIF" + else: + if conditions is None: + conditions = [] + alternate = "IF" + + node = get_node(tree) + children_number = node[offsets["children#"]] + children = [] if children_number == 0 else node[offsets["children"]] + children = filter_nodes(children, offsets, ids=ids_path, + subtree=subtree) + if children: + [_, field, _, _, _] = get_predicate(children[0]) + has_missing_branch = (missing_branch(children) or + none_value(children)) + # the missing is singled out as a special case only when there's + # no missing branch in the children list + if (not has_missing_branch and + fields[field]['name'] not in cmv): + conditions.append("ISNULL([%s])" % fields[field]['name']) + body += ("%s %s THEN " % + (alternate, " AND ".join(conditions))) + if fields[objective_id]['optype'] == 'numeric': + value = node[offsets[attr]] + else: + value = tableau_string(node[offsets[attr]]) + body += ("%s\n" % value) + cmv.append(fields[field]['name']) + alternate = "ELSEIF" + del conditions[-1] + + for child in children: + pre_condition = "" + post_condition = "" + [operator, field, ch_value, _, missing] = get_predicate(child) + if has_missing_branch and ch_value is not None: + negation = "" if missing else "NOT " + connection = "OR" if missing else "AND" + pre_condition = ( + "(%sISNULL([%s]) %s " % ( + negation, fields[field]['name'], connection)) + if not missing: + cmv.append(fields[field]['name']) + post_condition = ")" + optype = fields[field]['optype'] + if ch_value is None: + value = "" + elif optype in ['text', 'items']: + return "" + elif optype == 'numeric': + value = ch_value + else: + value = repr(ch_value) + + operator = ("" if ch_value is None else + PYTHON_OPERATOR[operator]) + if ch_value is None: + pre_condition = ( + T_MISSING_OPERATOR[operator]) + post_condition = ")" + + conditions.append("%s[%s]%s%s%s" % ( + pre_condition, + fields[field]['name'], + operator, + value, + post_condition)) + body = tableau_body(child, offsets, fields, objective_id, + body, conditions[:], cmv=cmv[:], + ids_path=ids_path, subtree=subtree, attr=attr) + del conditions[-1] + else: + if fields[objective_id]['optype'] == 'numeric': + value = tree[offsets[attr]] + else: + value = tableau_string(node[offsets[attr]]) + body += ( + "%s %s THEN" % (alternate, " AND ".join(conditions))) + body += " %s\n" % value + + return body + +def tree_tableau(tree, offsets, fields, objective_id, + out, ids_path=None, subtree=True, attr=DFT_ATTR): + """Writes a Tableau function that implements the model. + + """ + body = tableau_body(tree, offsets, fields, objective_id, + ids_path=ids_path, subtree=subtree, attr=attr) + if not body: + return False + out.write(utf8(body)) + out.flush() + return True + + +def group_prediction(model): + """Groups in categories or bins the predicted data + + dict - contains a dict grouping counts in 'total' and 'details' lists. + 'total' key contains a 3-element list. + - common segment of the tree for all instances + - data count + - predictions count + 'details' key contains a list of elements. Each element is a + 3-element list: + - complete path of the tree from the root to the leaf + - leaf predictions count + - confidence + """ + if model.boosting: + raise AttributeError("This method is not available for boosting" + " models.") + groups = {} + tree = model.tree + node = get_node(tree) + offsets = model.offsets + distribution = node[offsets["distribution"]] + + for group in distribution: + groups[group[0]] = {'total': [[], group[1], 0], + 'details': []} + path = [] + + def add_to_groups(groups, output, path, count, confidence, + impurity=None): + """Adds instances to groups array + + """ + group = output + if output not in groups: + groups[group] = {'total': [[], 0, 0], + 'details': []} + groups[group]['details'].append([path, count, confidence, + impurity]) + groups[group]['total'][2] += count + + def depth_first_search(tree, path): + """Search for leafs' values and instances + + """ + node = get_node(tree) + predicate = get_predicate(tree) + if isinstance(predicate, list): + [operation, field, value, term, _] = predicate + operator = INVERSE_OP[operation] + path.append(Predicate(operator, field, value, term)) + if term: + if field not in model.terms: + model.terms[field] = [] + if term not in model.terms[field]: + model.terms[field].append(term) + + if node[offsets["children#"]] == 0: + add_to_groups(groups, node[offsets["output"]], + path, node[offsets["count"]], + node[offsets["confidence"]], + gini_impurity(node[offsets["distribution"]], + node[offsets["count"]])) + return node[offsets["count"]] + children = node[offsets["children"]][:] + children.reverse() + + children_sum = 0 + for child in children: + children_sum += depth_first_search(child, path[:]) + if children_sum < node[offsets["count"]]: + add_to_groups(groups, node[offsets["output"]], path, + node[offsets["count"]] - children_sum, + node[offsets["confidence"]], + gini_impurity(node[offsets["distribution"]], + node[offsets["count"]])) + return node[offsets["count"]] + + depth_first_search(tree, path) + + return groups + + +def get_data_distribution(model): + """Returns training data distribution + + """ + if model.boosting: + raise AttributeError("This method is not available for boosting" + " models.") + node = get_node(model.tree) + + distribution = node[model.offsets["distribution"]] + + return sorted(distribution, key=lambda x: x[0]) + + +def get_prediction_distribution(model, groups=None): + """Returns model predicted distribution + + """ + if model.boosting: + raise AttributeError("This method is not available for boosting" + " models.") + if groups is None: + groups = group_prediction(model) + + predictions = [[group, groups[group]['total'][2]] for group in groups] + # remove groups that are not predicted + predictions = [prediction for prediction in predictions \ + if prediction[1] > 0] + + return sorted(predictions, key=lambda x: x[0]) + + +#pylint: disable=locally-disabled,redefined-builtin +def summarize(model, out=sys.stdout, format=BRIEF): + """Prints summary grouping distribution as class header and details + + """ + if model.boosting: + raise AttributeError("This method is not available for boosting" + " models.") + tree = model.tree + + def extract_common_path(groups): + """Extracts the common segment of the prediction path for a group + + """ + for group in groups: + details = groups[group]['details'] + common_path = [] + if len(details) > 0: + mcd_len = min([len(x[0]) for x in details]) + for i in range(0, mcd_len): + test_common_path = details[0][0][i] + for subgroup in details: + if subgroup[0][i] != test_common_path: + i = mcd_len + break + if i < mcd_len: + common_path.append(test_common_path) + groups[group]['total'][0] = common_path + if len(details) > 0: + groups[group]['details'] = sorted(details, + key=lambda x: x[1], + reverse=True) + + def confidence_error(value, impurity=None): + """Returns confidence for categoric objective fields + and error for numeric objective fields + """ + if value is None: + return "" + impurity_literal = "" + if impurity is not None and impurity > 0: + impurity_literal = "; impurity: %.2f%%" % (round(impurity, 4)) + objective_type = model.fields[model.objective_id]['optype'] + if objective_type == 'numeric': + return " [Error: %s]" % value + return " [Confidence: %.2f%%%s]" % (round(value, 4) * 100, + impurity_literal) + + distribution = get_data_distribution(model) + + out.write(utf8("Data distribution:\n")) + print_distribution(distribution, out=out) + out.write(utf8("\n\n")) + + groups = group_prediction(model) + predictions = get_prediction_distribution(model, groups) + + out.write(utf8("Predicted distribution:\n")) + print_distribution(predictions, out=out) + out.write(utf8("\n\n")) + + if model.field_importance: + out.write(utf8("Field importance:\n")) + print_importance(model, out=out) + + extract_common_path(groups) + + out.write(utf8("\n\nRules summary:")) + + node = get_node(tree) + count = node[model.offsets["count"]] + for group in [x[0] for x in predictions]: + details = groups[group]['details'] + path = Path(groups[group]['total'][0]) + data_per_group = groups[group]['total'][1] * 1.0 / count + pred_per_group = groups[group]['total'][2] * 1.0 / count + out.write(utf8("\n\n%s : (data %.2f%% / prediction %.2f%%) %s" % + (group, + round(data_per_group, 4) * 100, + round(pred_per_group, 4) * 100, + path.to_rules(model.fields, format=format)))) + + if len(details) == 0: + out.write(utf8("\n The model will never predict this" + " class\n")) + elif len(details) == 1: + subgroup = details[0] + out.write(utf8("%s\n" % confidence_error( + subgroup[2], impurity=subgroup[3]))) + else: + out.write(utf8("\n")) + for subgroup in details: + pred_per_sgroup = subgroup[1] * 1.0 / \ + groups[group]['total'][2] + path = Path(subgroup[0]) + path_chain = path.to_rules(model.fields, format=format) if \ + path.predicates else "(root node)" + out.write(utf8(" · %.2f%%: %s%s\n" % + (round(pred_per_sgroup, 4) * 100, + path_chain, + confidence_error(subgroup[2], + impurity=subgroup[3])))) + + out.flush() + + +def get_nodes_info(model, headers, leaves_only=False): + """Generator that yields the nodes information in a row format + + """ + if model.boosting: + raise AttributeError("This method is not available for boosting" + " models.") + + def get_tree_nodes_info(tree, offsets, regression, fields, objective_id, + headers=None, leaves_only=False): + """Yields the information associated to each of the tree nodes + + """ + row = [] + node = get_node(tree) + if not regression: + category_dict = dict(node[offsets["distribution"]]) + for header in headers: + if header == fields[objective_id]['name']: + row.append(node[offsets["output"]]) + continue + if header in ['confidence', 'error']: + row.append(node[offsets["confidence"]]) + continue + if header == 'impurity': + row.append(gini_impurity(node[offsets["distribution"]], + node[offsets["count"]])) + continue + if regression and header.startswith('bin'): + for bin_value, bin_instances in node[offsets["distribution"]]: + row.append(bin_value) + row.append(bin_instances) + break + if not regression: + row.append(category_dict.get(header)) + while len(row) < len(headers): + row.append(None) + if not leaves_only or not tree.children: + yield row + + if node[offsets["children#"]] > 0: + for child in node[offsets["children"]]: + for row in get_tree_nodes_info(child, offsets, regression, + fields, objective_id, headers, + leaves_only=leaves_only): + yield row + + return get_tree_nodes_info(model.tree, + model.offsets, + model.regression, + model.fields, + model.objective_id, + headers, leaves_only=leaves_only) + + +def tree_csv(model, file_name=None, leaves_only=False): + """Outputs the node structure to a CSV file or array + + """ + if model.boosting: + raise AttributeError("This method is not available for boosting" + " models.") + headers_names = [] + if model.regression: + headers_names.append( + model.fields[model.objective_id]['name']) + headers_names.append("error") + max_bins = get_node(model.tree)[model.offsets["max_bins"]] + for index in range(0, max_bins): + headers_names.append("bin%s_value" % index) + headers_names.append("bin%s_instances" % index) + else: + headers_names.append( + model.fields[model.objective_id]['name']) + headers_names.append("confidence") + headers_names.append("impurity") + node = get_node(model.tree) + for category, _ in node[model.offsets["distribution"]]: + headers_names.append(category) + + nodes_generator = get_nodes_info(model, headers_names, + leaves_only=leaves_only) + if file_name is not None: + with UnicodeWriter(file_name) as writer: + writer.writerow([utf8(header) + for header in headers_names]) + for row in nodes_generator: + writer.writerow([item if not isinstance(item, str) + else utf8(item) + for item in row]) + return file_name + rows = [] + rows.append(headers_names) + for row in nodes_generator: + rows.append(row) + return rows diff --git a/bigml/out_model/static/items_analysis.py b/bigml/generators/static/items_analysis.txt similarity index 100% rename from bigml/out_model/static/items_analysis.py rename to bigml/generators/static/items_analysis.txt diff --git a/bigml/out_model/static/python_haddop_csv.py b/bigml/generators/static/python_haddop_csv.txt similarity index 100% rename from bigml/out_model/static/python_haddop_csv.py rename to bigml/generators/static/python_haddop_csv.txt diff --git a/bigml/out_model/static/python_hadoop_next.py b/bigml/generators/static/python_hadoop_next.txt similarity index 100% rename from bigml/out_model/static/python_hadoop_next.py rename to bigml/generators/static/python_hadoop_next.txt diff --git a/bigml/out_model/static/python_hadoop_reducer.py b/bigml/generators/static/python_hadoop_reducer.txt similarity index 100% rename from bigml/out_model/static/python_hadoop_reducer.py rename to bigml/generators/static/python_hadoop_reducer.txt diff --git a/bigml/out_model/static/term_analysis.py b/bigml/generators/static/term_analysis.txt similarity index 100% rename from bigml/out_model/static/term_analysis.py rename to bigml/generators/static/term_analysis.txt diff --git a/bigml/generators/tree.py b/bigml/generators/tree.py new file mode 100644 index 00000000..95d7200e --- /dev/null +++ b/bigml/generators/tree.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Tree level output for python +This module defines functions that generate python code to make local +predictions +""" + +from bigml.tree_utils import INDENT, COMPOSED_FIELDS + +from bigml.predict_utils.common import missing_branch, \ + none_value, get_node, get_predicate, mintree_split +from bigml.generators.tree_common import value_to_print, map_data, \ + missing_prefix_code, filter_nodes, split_condition_code + + +MISSING_OPERATOR = { + "=": "is", + "!=": "is not" +} + + +def missing_check_code(tree, offsets, fields, objective_id, + field, depth, input_map, cmv, metric): + """Builds the code to predict when the field is missing + """ + code = "%sif (%s is None):\n" % \ + (INDENT * depth, + map_data(fields[field]['slug'], input_map, True)) + node = get_node(tree) + value = value_to_print(node[offsets["output"]], + fields[objective_id]['optype']) + code += "%sreturn {\"prediction\": %s," \ + " \"%s\": %s}\n" % \ + (INDENT * (depth + 1), value, metric, node[offsets["confidence"]]) + cmv.append(fields[field]['slug']) + return code + + +def plug_in_body(tree, offsets, fields, objective_id, regression, + depth=1, cmv=None, input_map=False, + ids_path=None, subtree=True): + """Translate the model into a set of "if" python statements. + `depth` controls the size of indentation. As soon as a value is missing + that node is returned without further evaluation. + """ + # label for the confidence measure and initialization + metric = "error" if regression else "confidence" + if cmv is None: + cmv = [] + body = "" + term_analysis_fields = [] + item_analysis_fields = [] + + node = get_node(tree) + children = [] if node[offsets["children#"]] == 0 else \ + node[offsets["children"]] + children = filter_nodes(children, offsets, ids=ids_path, + subtree=subtree) + if children: + + # field used in the split + field = mintree_split(children) + + has_missing_branch = (missing_branch(children) or + none_value(children)) + # the missing is singled out as a special case only when there's + # no missing branch in the children list + one_branch = not has_missing_branch or \ + fields[field]['optype'] in COMPOSED_FIELDS + if (one_branch and + not fields[field]['slug'] in cmv): + body += missing_check_code(tree, offsets, fields, objective_id, + field, depth, input_map, cmv, metric) + + for child in children: + [_, field, value, _, _] = get_predicate(child) + pre_condition = "" + # code when missing_splits has been used + if has_missing_branch and value is not None: + pre_condition = missing_prefix_code(child, fields, field, + input_map, cmv) + + # complete split condition code + body += split_condition_code( \ + child, fields, depth, input_map, pre_condition, + term_analysis_fields, item_analysis_fields, cmv) + + # value to be determined in next node + next_level = plug_in_body(child, offsets, fields, objective_id, + regression, depth + 1, cmv=cmv[:], + input_map=input_map, ids_path=ids_path, + subtree=subtree) + + body += next_level[0] + term_analysis_fields.extend(next_level[1]) + item_analysis_fields.extend(next_level[2]) + else: + value = value_to_print(node[offsets["output"]], + fields[objective_id]['optype']) + body = "%sreturn {\"prediction\":%s, \"%s\":%s}\n" % ( \ + INDENT * depth, value, metric, node[offsets["confidence"]]) + + return body, term_analysis_fields, item_analysis_fields diff --git a/bigml/generators/tree_common.py b/bigml/generators/tree_common.py new file mode 100644 index 00000000..4a46b8e6 --- /dev/null +++ b/bigml/generators/tree_common.py @@ -0,0 +1,133 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Tree level output for python +This module defines functions that generate python code to make local +predictions +""" + +from bigml.tree_utils import ( + INDENT, PYTHON_OPERATOR, NUMERIC_VALUE_FIELDS) +from bigml.predict_utils.common import \ + get_node, get_predicate, MISSING_OFFSET + +MISSING_OPERATOR = { + "=": "is", + "!=": "is not" +} + + +def value_to_print(value, optype): + """String of code that represents a value according to its type + """ + # the value is numeric for these fields + if (optype in NUMERIC_VALUE_FIELDS or value is None): + return value + return "\"%s\"" % value.replace('"', '\\"') + + +def map_data(field, input_map=False, missing=False): + """Returns the subject of the condition in map format when + more than MAX_ARGS_LENGTH arguments are used. + """ + if input_map: + if missing: + return "data.get('%s')" % field + return "data['%s']" % field + return field + + +def missing_prefix_code(tree, fields, field, input_map, cmv): + """Part of the condition that checks for missings when missing_splits + has been used + """ + + predicate = get_predicate(tree) + missing = predicate[MISSING_OFFSET] + negation = "" if missing else " not" + connection = "or" if missing else "and" + if not missing: + cmv.append(fields[field]['slug']) + return "%s is%s None %s " % (map_data(fields[field]['slug'], + input_map, + True), + negation, + connection) + + +def split_condition_code(tree, fields, depth, input_map, + pre_condition, term_analysis_fields, + item_analysis_fields, cmv): + """Condition code for the split + """ + + predicate = get_predicate(tree) + [operation, field, value, term, _] = predicate + optype = fields[field]['optype'] + value = value_to_print(value, optype) + + if optype in ['text', 'items']: + if optype == 'text': + term_analysis_fields.append((field, term)) + matching_function = "term_matches" + else: + item_analysis_fields.append((field, term)) + matching_function = "item_matches" + + return "%sif (%s%s(%s, \"%s\", %s%s) %s " \ + "%s):\n" % \ + (INDENT * depth, pre_condition, matching_function, + map_data(fields[field]['slug'], + input_map, + False), + fields[field]['slug'], + 'u' if isinstance(term, str) else '', + value_to_print(term, 'categorical'), + PYTHON_OPERATOR[operation], + value) + + operator = (MISSING_OPERATOR[operation] if + value is None else + PYTHON_OPERATOR[operation]) + if value is None: + cmv.append(fields[field]['slug']) + return "%sif (%s%s %s %s):\n" % \ + (INDENT * depth, pre_condition, + map_data(fields[field]['slug'], input_map, + False), + operator, + value) + + +def filter_nodes(trees_list, offsets, ids=None, subtree=True): + """Filters the contents of a trees_list. If any of the nodes is in the + ids list, the rest of nodes are removed. If none is in the ids list + we include or exclude the nodes depending on the subtree flag. + + """ + if not trees_list: + return None + trees = trees_list[:] + if ids is not None: + for tree in trees: + node = get_node(tree) + node_id = node[offsets["id"]] + if node_id in ids: + trees = [tree] + return trees + if not subtree: + trees = [] + return trees diff --git a/bigml/images/__init__.py b/bigml/images/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bigml/images/featurizers.py b/bigml/images/featurizers.py new file mode 100644 index 00000000..d6919ed1 --- /dev/null +++ b/bigml/images/featurizers.py @@ -0,0 +1,467 @@ +# -*- coding: utf-8 -*- +#pylint: disable=invalid-name +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Image Featurizers + +This module defines the classes that produce the features extracted from +images in BigML. They are used in Modefields to extend the original input +data provided for local predictions. + +""" +import os +import math +import numpy as np + + +from PIL import Image +from sensenet.models.wrappers import create_image_feature_extractor +from bigml.featurizer import Featurizer, expand_date +from bigml.constants import IMAGE + +TOP_SIZE = 512 +N_BINS = 16 +INTENSITY_RANGE = 256 +BIN_WIDTH = INTENSITY_RANGE / N_BINS +HOG_BINS = 9 +HOG_BIN_WIDTH = np.pi / HOG_BINS +DECOMPS = ["horizontal", "diagonal", "vertical"] + +PRETRAINED = "pretrained_cnn" +WAVELET = "wavelet_subbands" + +def resize_to(image, top_size=TOP_SIZE): + """Resizing the image to a maximum width or height """ + width, height = image.size + if width > top_size or height > top_size: + if width > height: + ratio = height / width + image = image.resize((top_size , int(ratio * top_size)), + Image.BICUBIC) + else: + ratio = width / height + image = image.resize((int(ratio * top_size), top_size), + Image.BICUBIC) + return image + + +def grid_coords(image_a, grid_size): + """ getting the start and end positions for each grid """ + try: + height, width, _ = image_a.shape + except ValueError: + height, width = image_a.shape + f_grid_size = float(grid_size) + h_step = height / f_grid_size + w_step = width / f_grid_size + coords = [] + for h in range(0, grid_size): + for w in range(0, grid_size): + h_start = int(max([0, math.floor(h * h_step)])) + w_start = int(max([0, math.floor(w * w_step)])) + h_end = int(min([height, math.ceil((h + 1) * h_step)])) + w_end = int(min([width, math.ceil((w + 1) * w_step)])) + coords.append([h_start, w_start, h_end, w_end]) + return coords + + +def dimensions_extractor(image_file): + """Returns the features related to the image dimensions: + file size, width, height, aspect ratio + """ + file_size = os.stat(image_file).st_size + image = Image.open(image_file) + width, height = image.size + aspect_ratio = width / float(height) + return [file_size, width, height, aspect_ratio] + + +def average_pixels_extractor(image_file): + """ Averaging pixels for the entire image, 3x3 and 4x4 grids + The image passed as argument should already be resized to 512 max + """ + image = Image.open(image_file) + image = resize_to(image) + image_a = np.array(image) + avg_pixels = [np.average(image_a[:, :, n]) for n in range(0, 3)] + coords = grid_coords(image_a, 3) + coords.extend(grid_coords(image_a, 4)) + for h_start, w_start, h_end, w_end in coords: + avg_pixels.extend( + [np.average(image_a[h_start: h_end, w_start: w_end, n]) + for n in range(0, 3)]) + return avg_pixels + + +def get_bin(value, bin_width): + """Returns the bin where a value falls in.""" + return math.floor(value / bin_width) + + +def get_luminance(image_a): + """Getting the Y coordinate in YUV in terms of the RGB channel info.""" + r = image_a[:, :, 0] + g = image_a[:, :, 1] + b = image_a[:, :, 2] + + image_l = 0.299 * r + 0.587 * g + 0.114 * b + image_l = image_l.astype('d') + return image_l + +def level_histogram_extractor(image_file): + """Level histogram feature extractor.""" + image = Image.open(image_file) + image = resize_to(image) + image_a = np.array(image) + height, width, _ = image_a.shape + pixels_per_channel = width * height + output = [0] * 3 * N_BINS + for c in range(0, 3): + offset = N_BINS * c + for h in range(0, height): + for w in range(0, width): + bin_index = get_bin(image_a[h][w][c], BIN_WIDTH) + output[bin_index + offset] += 1 + for index, _ in enumerate(output): + output[index] /= pixels_per_channel + + return output + + +def HOG_transform(image_a): + """Histogram of Gradients transformation.""" + image_l = get_luminance(image_a) + height, width = image_l.shape + if height > 2 and width > 2: + trans_image = np.empty(((height - 2), (width - 2), 2)) + trans_image.astype('d') + for y in range(0, (height - 2)): + for x in range(0, (width - 2)): + py = y + 1 + px = x + 1 + x_edge = image_l[py][x] - image_l[py][px + 1] + y_edge = image_l[y][px] - image_l[py + 1][px] + + trans_image[y][x][0] = math.sqrt( + x_edge * x_edge + y_edge * y_edge) + + # Convert to zero - pi radians + if x_edge == 0: + if y_edge > 0: + trans_image[y][x][1] = np.pi + elif y_edge < 0: + trans_image[y][x][1] = 0 + else: + trans_image[y][x][1] = np.nan + else: + trans_image[y][x][1] = math.atan( + y_edge / x_edge) + (np.pi / 2) + else: + trans_image = np.empty((height, width, 2)) + for y in range(0, height): + for x in range(0, width): + trans_image[y][x][0] = 0 + trans_image[y][x][1] = np.nan + + return trans_image + + +def HOG_aggregate(trans_image, grid_size): + """Histogram of Gradients aggregation.""" + # Laplace correction to avoid zero norm; kind of arbitrary + features = np.ones(((grid_size * grid_size), HOG_BINS)) + + bounds = grid_coords(trans_image, grid_size) + for index, bound in enumerate(bounds): + h_start, w_start, h_end, w_end = bound + for y in range(h_start, h_end): + for x in range(w_start, w_end): + mag = trans_image[y][x][0] + angle = trans_image[y][x][1] + + if mag > 0: + if angle >= np.pi: + low = HOG_BINS - 1 + else: + low = get_bin(angle, HOG_BIN_WIDTH) + high = (low + 1) % HOG_BINS + + high_weight = ( + angle - low * HOG_BIN_WIDTH) / HOG_BIN_WIDTH + low_weight = 1 - high_weight + + # Split vote between adjacent bins + features[index][low] += mag * low_weight + features[index][high] += mag * high_weight + norm = np.linalg.norm(features[index]) + features[index] = features[index] / norm + return features + + +def HOG_extractor(image_file): + """Histogram of Gradients Feature extractor""" + image = Image.open(image_file) + image = image.convert('RGB') + image = resize_to(image) + image_a = np.array(image) + transform = HOG_transform(image_a) + features = HOG_aggregate(transform, 1) + features3x3 = HOG_aggregate(transform, 3) + features4x4 = HOG_aggregate(transform, 4) + features_list = list(features.reshape(-1)) + features_list.extend(list(features3x3.reshape(-1))) + features_list.extend(list(features4x4.reshape(-1))) + return features_list + + +def energy_parameters(values, coords): + """Energy parameters computation.""" + if len(values) < 2 and len(values[0]) < 2: + return np.array([values[0][0], 0]) + count = 0 + mean = 0 + sum_sq = 0 + h_start, w_start, h_end, w_end = coords + + for y in range(h_start, h_end): + for x in range(w_start, w_end): + new_value = values[y][x] + count += 1 + delta1 = new_value - mean + mean += delta1 / count + delta2 = new_value - mean + sum_sq += delta1 * delta2 + + return np.array([mean, sum_sq / (count - 1)]) + + +def haar1Ds(signal): + """1-dimensional Haard components.""" + output = np.empty((2, max([1, int(len(signal) / 2)]))) + + if len(signal) > 1: + for i in range(0, len(signal) - 1, 2): + index = int(i / 2) + output[0][index] = (signal[i] + signal[i + 1]) / 2 + output[1][index] = abs(signal[i] - signal[i + 1]) + + else: + output[0][0] = signal[0] + output[1][0] = 0 + + return output + + +def haar1D(image, vertical): + """1-dimensional Haard vertical component.""" + if vertical: + image = image.transpose() + + output = np.empty((2, len(image), max([1, int(len(image[0]) / 2)]))) + + for i, cell in enumerate(image): + row_decomp = haar1Ds(cell) + output[0][i] = row_decomp[0] + output[1][i] = row_decomp[1] + + if vertical: + output = np.array([output[0].transpose(), + output[1].transpose()]) + + return output + + +def haar2D(image): + """2-dimensional Haard components.""" + h_mean, h_detail = haar1D(image, False) + average, vertical = haar1D(h_mean, True) + horizontal, diagonal = haar1D(h_detail, True) + + return np.array([average, horizontal, diagonal, vertical]) + + +def wavelet_subbands_aggregate(trans_image, grid_size): + """Wavelet subbands aggregation. """ + index = 0 + features = np.empty((((len(trans_image) - 1) * len(DECOMPS) + 1) * + grid_size * grid_size * 2,)) + features.astype('d') + bounds = [] + for cell in trans_image: + bounds.append(grid_coords(cell[0], grid_size)) + for cell_index in range(grid_size * grid_size): + for i, row in enumerate(trans_image): + for cell in row: + params = energy_parameters( + cell, bounds[i][cell_index]) + features[index] = params[0] + features[index + 1] = params[1] + + index += len(params) + + return features + + +def wavelet_subbands_transform(image_a, levels): + """Haard Wavelet subbands transformation.""" + image_l = get_luminance(image_a) + + output = [] + + for _ in range(0, levels): + level_output = [] + decomp = haar2D(image_l) + for j in range(0, len(DECOMPS)): + level_output.append(decomp[j + 1]) + image_l = decomp[0] + output.append(level_output) + + output.append([image_l]) + + return output + + +def wavelet_subbands_extractor(image_file, levels): + """Wavelet subbands feature extractor.""" + image = Image.open(image_file) + image = image.convert('RGB') + image = resize_to(image) + image_a = np.array(image) + transform = wavelet_subbands_transform(image_a, levels) + features = wavelet_subbands_aggregate(transform, 1) + features2x2 = wavelet_subbands_aggregate(transform, 2) + features_list = list(features.reshape(-1)) + features_list.extend(list(features2x2.reshape(-1))) + return features_list + + +IMAGE_EXTRACTORS = { + "dimensions": dimensions_extractor, + "average_pixels": average_pixels_extractor, + "level_histogram": level_histogram_extractor, + "histogram_of_gradients": HOG_extractor +} + +IMAGE_PROVENANCE = list(IMAGE_EXTRACTORS.keys()) + [PRETRAINED, WAVELET] + + +#pylint: disable=locally-disabled,bare-except +def get_image_extractors(res_object, field_id): + """Returns the feature extractor function for an image field""" + extractors = [] + try: + extracted_features = res_object.fields[field_id].get( + "image_analysis", {}).get("extracted_features") + for feature in extracted_features: + if isinstance(feature, list) and feature[0] == PRETRAINED: + _, cnn_name = feature[:] + extractors.append(lambda x, param=cnn_name: list( + create_image_feature_extractor(param, None)(x))[0]) + elif isinstance(feature, list) and feature[0] == WAVELET: + _, levels = feature[:] + extractors.append(lambda x, param=levels: + wavelet_subbands_extractor(x, param)) + else: + extractors.append(IMAGE_EXTRACTORS[feature]) + + except: + pass + return extractors + + +def expand_image(res_object, parent_id, image_file): + """ Retrieves all the values of the subfields generated from + a parent image field + + """ + expanded = {} + keys = res_object.fields[parent_id]["child_ids"] + values = [] + for generator in res_object.generators[parent_id]: + values.extend(generator(image_file)) + expanded = dict(zip(keys, values)) + return expanded + + +class ImageFeaturizer(Featurizer): + """This class provides methods for image Feature extraction.""" + + def __init__(self, fields, input_fields, selected_fields=None, + preferred_only=True): + self.fields = fields + self.input_fields = input_fields + self.subfields = {} + self.generators = {} + self.preferred_only = preferred_only + self.selected_fields = self.add_subfields(selected_fields, + preferred_only=preferred_only) + super().__init__(fields, input_fields, selected_fields, preferred_only) + + def _add_subfield(self, field_id, field): + """Adding a subfield and the corresponding generator """ + parent_id = field["parent_ids"][0] + subfield = {field_id: field["datatype"]} + if parent_id in list(self.subfields.keys()): + self.subfields[parent_id].update(subfield) + else: + parent_type = self.fields[parent_id]["optype"] + expand_fn_list = get_image_extractors(self, parent_id) \ + if parent_type == IMAGE else [expand_date] + self.selected_fields[parent_id] = self.fields[parent_id] + self.subfields[parent_id] = subfield + self.generators.update({parent_id: expand_fn_list}) + + def add_subfields(self, selected_fields=None, preferred_only=True): + """Adding the subfields information in the fields structure and the + generating functions for the subfields values. + """ + # filling preferred fields with preferred input fields + fields = selected_fields or self.fields + + if selected_fields is None: + selected_fields = {} + selected_fields.update({field_id: field for field_id, field \ + in fields.items() if field_id in self.input_fields \ + and (not preferred_only or self.fields[field_id].get( + "preferred", True))}) + self.selected_fields = selected_fields + + # computing the generated subfields + for fid, finfo in list(self.selected_fields.items()): + if finfo.get('parent_optype', False) == 'datetime' or \ + finfo.get('provenance', False) in IMAGE_PROVENANCE: + # datetime and image subfields + self._add_subfield(fid, finfo) + + return self.selected_fields + + def extend_input(self, input_data): + """Computing the values for the generated subfields and adding them + to the original input data. Parent fields will be removed. + """ + extended = {} + for f_id, value in list(input_data.items()): + if f_id in self.generators.keys(): + if not self.preferred_only: + extended[f_id] = value + if self.fields[f_id]["optype"] == IMAGE: + extended.update(expand_image(self, f_id, input_data[f_id])) + else: + extended.update( + self.generators[f_id][0](self, f_id, input_data[f_id])) + else: + extended[f_id] = value + return extended diff --git a/bigml/images/utils.py b/bigml/images/utils.py new file mode 100644 index 00000000..26378deb --- /dev/null +++ b/bigml/images/utils.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Miscellaneous utility functions for image processing + +""" +import os +import tempfile + +from PIL import Image + +from bigml.constants import TEMP_DIR, TOP_IMAGE_SIZE as TOP_SIZE, DECIMALS + + +def resize_to(image, top_size=TOP_SIZE): + """Resizing the image to a maximum width or height """ + width, height = image.size + if width > top_size or height > top_size: + if width > height: + ratio = height / width + image = image.resize((top_size , int(ratio * top_size)), + Image.BICUBIC) + else: + ratio = width / height + image = image.resize((int(ratio * top_size), top_size), + Image.BICUBIC) + return image + + +def to_relative_coordinates(image_file, regions_list): + """Transforms predictions with regions having absolute pixels regions + to the relative format used remotely and rounds to the same precision. + """ + + if regions_list: + image_obj = Image.open(image_file) + width, height = image_obj.size + for index, region in enumerate(regions_list): + [xmin, ymin, xmax, ymax] = region["box"] + region["box"] = [round(xmin / width, DECIMALS), + round(ymin / height, DECIMALS), + round(xmax / width, DECIMALS), + round(ymax / height, DECIMALS)] + region["score"] = round(region["score"], DECIMALS) + regions_list[index] = region + return regions_list + + +def remote_preprocess(image_file): + """Emulating the preprocessing of images done in the backend to + get closer results in local predictions + """ + # converting to jpg + image = Image.open(image_file) + if not (image_file.lower().endswith(".jpg") or + image_file.lower().endswith(".jpeg")): + image = image.convert('RGB') + # resizing to top size=512 + resize_to(image) + with tempfile.NamedTemporaryFile(delete=False) as temp_fp: + tmp_file_name = os.path.join(TEMP_DIR, "%s.jpg" % temp_fp.name) + # compressing to 90% + image.save(tmp_file_name, quality=90) + return tmp_file_name diff --git a/bigml/io.py b/bigml/io.py index b158a090..c9dc0a20 100644 --- a/bigml/io.py +++ b/bigml/io.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- +# pylint: disable=R1732 # -# Copyright (c) 2015-2019 BigML, Inc +# Copyright (c) 2015-2025 BigML, Inc # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,20 +16,18 @@ # under the License. -"""Python 2/3 compatibility for I/O functions. +"""Python I/O functions. :author: jao -:date: Wed Apr 08, 2015-2019 17:52 +:date: Wed Apr 08, 2015-2025 17:52 """ import csv -from bigml.util import PY3 - -class UnicodeReader(object): - """Adapter to handle Python 2 to 3 conversion when reading files +class UnicodeReader(): + """Adapter to read files """ def __init__(self, filename, dialect=csv.excel, @@ -49,11 +48,9 @@ def open_reader(self): """ if self.filename.__class__.__name__ == 'UTF8Recoder': self.file_handler = self.filename - elif PY3: + else: self.file_handler = open(self.filename, 'rt', encoding=self.encoding, newline='') - else: - self.file_handler = open(self.filename, 'rb') self.reader = csv.reader(self.file_handler, dialect=self.dialect, **self.kwargs) return self @@ -70,14 +67,11 @@ def __exit__(self, ftype, value, traceback): """ self.close_reader() - def next(self): + def __next__(self): """Reading records """ - row = next(self.reader) - if PY3: - return row - return [s.decode(self.encoding) for s in row] + return next(self.reader) def __iter__(self): """Iterator @@ -93,8 +87,8 @@ def close_reader(self): self.file_handler.close() -class UnicodeWriter(object): - """Adapter to handle Python 2 to 3 conversion when writing to files +class UnicodeWriter(): + """Adapter to write files """ def __init__(self, filename, dialect=csv.excel, @@ -113,11 +107,8 @@ def open_writer(self): """Opening the file """ - if PY3: - self.file_handler = open(self.filename, 'wt', - encoding=self.encoding, newline='') - else: - self.file_handler = open(self.filename, 'wb') + self.file_handler = open(self.filename, 'wt', + encoding=self.encoding, newline='') self.writer = csv.writer(self.file_handler, dialect=self.dialect, **self.kwargs) return self @@ -144,9 +135,6 @@ def writerow(self, row): """Writer emulating CSV writerow """ - if not PY3: - row = [(s if not isinstance(s, basestring) else - s.encode(self.encoding)) for s in row] self.writer.writerow(row) def writerows(self, rows): diff --git a/bigml/iris_ensemble/ensemble_5f580eb0e84f942429000c22 b/bigml/iris_ensemble/ensemble_5f580eb0e84f942429000c22 new file mode 100644 index 00000000..91039cd1 --- /dev/null +++ b/bigml/iris_ensemble/ensemble_5f580eb0e84f942429000c22 @@ -0,0 +1 @@ +{"code": 200, "resource": "ensemble/5f580eb0e84f942429000c22", "location": "https://bigml.io/andromeda/ensemble/5f580eb0e84f942429000c22", "object": {"boosting": null, "category": 0, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2020-09-08T23:07:28.350000", "creator": "mmartin", "credits": 0.01735687255859375, "credits_per_prediction": 0.0, "dataset": "dataset/5f580e962fb31c516d000f0a", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "distributions": [{"importance": [["000003", 0.65193], ["100004", 0.3272], ["000001", 0.02087]], "predictions": {"categories": [["Iris-setosa", 48], ["Iris-versicolor", 48], ["Iris-virginica", 54]]}, "training": {"categories": [["Iris-setosa", 48], ["Iris-versicolor", 47], ["Iris-virginica", 55]]}}, {"importance": [["000003", 0.9548], ["000001", 0.03384], ["100004", 0.01137]], "predictions": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 51], ["Iris-virginica", 49]]}, "training": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]]}}, {"importance": [["000003", 0.88756], ["100004", 0.08608], ["000000", 0.02636]], "predictions": {"categories": [["Iris-setosa", 46], ["Iris-versicolor", 49], ["Iris-virginica", 55]]}, "training": {"categories": [["Iris-setosa", 46], ["Iris-versicolor", 50], ["Iris-virginica", 54]]}}, {"importance": [["000003", 0.69065], ["100004", 0.29616], ["000001", 0.01319]], "predictions": {"categories": [["Iris-setosa", 48], ["Iris-versicolor", 50], ["Iris-virginica", 52]]}, "training": {"categories": [["Iris-setosa", 48], ["Iris-versicolor", 51], ["Iris-virginica", 51]]}}, {"importance": [["000003", 0.92673], ["100004", 0.05445], ["000000", 0.01882]], "predictions": {"categories": [["Iris-setosa", 54], ["Iris-versicolor", 46], ["Iris-virginica", 50]]}, "training": {"categories": [["Iris-setosa", 54], ["Iris-versicolor", 47], ["Iris-virginica", 49]]}}], "ensemble": {"fields": {"000000": {"column_number": 0, "datatype": "double", "generated": false, "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "generated": false, "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000003": {"column_number": 2, "datatype": "double", "generated": false, "name": "petal width", "optype": "numeric", "order": 2, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "100004": {"column_number": 3, "datatype": "double", "description": "", "generated": true, "label": "", "name": "petal length", "optype": "numeric", "order": 3, "preferred": true, "provenance": "flatline", "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "100005": {"column_number": 4, "datatype": "string", "description": "", "generated": true, "label": "", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "provenance": "flatline", "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}}, "ensemble_sample": {"rate": 1.0, "replacement": true, "seed": "f0864448cf4447869a965d1ca580946c"}, "error_models": 0, "fields_meta": {"count": 5, "limit": -1, "offset": 0, "query_total": 5, "total": 5}, "finished_models": 5, "focus_field": null, "focus_field_name": null, "importance": {"000000": 0.00904, "000001": 0.01358, "000003": 0.82233, "100004": 0.15505}, "input_fields": ["000000", "000001", "000003", "100004"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "models": ["model/5f580eb2440ca135f602213e", "model/5f580eb2440ca135f6022140", "model/5f580eb2440ca135f6022142", "model/5f580eb3440ca135f6022144", "model/5f580eb3440ca135f6022146"], "name": "iris [extended]", "name_options": "bootstrap decision forest, 10-node, 5-model, pruned, deterministic order", "node_threshold": 10, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_models": 5, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "100005", "objective_field_details": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "order": 4}, "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["100005"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": "project/5f5670e85299633dc000fbd8", "randomize": false, "range": null, "replacement": false, "resource": "ensemble/5f580eb0e84f942429000c22", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4550, "source": "source/5f5671b1946b3047cc009818", "source_status": true, "split_candidates": 32, "split_field": null, "split_field_name": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 1630, "message": "The ensemble has been created", "progress": 1}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2020-09-08T23:07:31.227000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/iris_ensemble/model_5f580eb2440ca135f602213e b/bigml/iris_ensemble/model_5f580eb2440ca135f602213e new file mode 100644 index 00000000..03505b40 --- /dev/null +++ b/bigml/iris_ensemble/model_5f580eb2440ca135f602213e @@ -0,0 +1 @@ +{"code": 200, "resource": "model/5f580eb2440ca135f602213e", "location": "https://bigml.io/andromeda/model/5f580eb2440ca135f602213e", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2020-09-08T23:07:30.454000", "creator": "mmartin", "credits": 0.0, "credits_per_prediction": 0.0, "dataset": "dataset/5f580e962fb31c516d000f0a", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": true, "ensemble_id": "5f580eb0e84f942429000c22", "ensemble_index": 0, "excluded_fields": [], "fields_meta": {"count": 5, "limit": -1, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000003", "100004"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["Iris-setosa", 48], ["Iris-versicolor", 48], ["Iris-virginica", 54]]}, "training": {"categories": [["Iris-setosa", 48], ["Iris-versicolor", 47], ["Iris-virginica", 55]]}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "generated": false, "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "generated": false, "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000003": {"column_number": 2, "datatype": "double", "generated": false, "name": "petal width", "optype": "numeric", "order": 2, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "100004": {"column_number": 3, "datatype": "double", "description": "", "generated": true, "label": "", "name": "petal length", "optype": "numeric", "order": 3, "preferred": true, "provenance": "flatline", "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "100005": {"column_number": 4, "datatype": "string", "description": "", "generated": true, "label": "", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "provenance": "flatline", "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000003", 0.65193], ["100004", 0.3272], ["000001", 0.02087]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000001": {"column_number": 1, "datatype": "double", "generated": false, "name": "sepal width", "optype": "numeric", "preferred": true}, "000003": {"column_number": 2, "datatype": "double", "generated": false, "name": "petal width", "optype": "numeric", "preferred": true}, "100004": {"column_number": 3, "datatype": "double", "description": "", "generated": true, "label": "", "name": "petal length", "optype": "numeric", "preferred": true, "provenance": "flatline"}, "100005": {"column_number": 4, "datatype": "string", "description": "", "generated": true, "label": "", "name": "species", "optype": "categorical", "preferred": true, "provenance": "flatline", "term_analysis": {"enabled": true}}}, "node_threshold": 10, "root": {"children": [{"children": [{"children": [{"confidence": 0.91799, "count": 43, "id": 3, "objective_summary": {"categories": [["Iris-virginica", 43]]}, "output": "Iris-virginica", "predicate": {"field": "100004", "operator": ">", "value": 5.05}}, {"children": [{"children": [{"confidence": 0.34237, "count": 2, "id": 6, "objective_summary": {"categories": [["Iris-versicolor", 2]]}, "output": "Iris-versicolor", "predicate": {"field": "000001", "operator": ">", "value": 3.1}}, {"confidence": 0.74116, "count": 11, "id": 7, "objective_summary": {"categories": [["Iris-virginica", 11]]}, "output": "Iris-virginica", "predicate": {"field": "000001", "operator": "<=", "value": 3.1}}], "confidence": 0.57765, "count": 13, "id": 5, "objective_summary": {"categories": [["Iris-virginica", 11], ["Iris-versicolor", 2]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 1.75}}, {"confidence": 0.43849, "count": 3, "id": 8, "objective_summary": {"categories": [["Iris-versicolor", 3]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": "<=", "value": 1.75}}], "confidence": 0.44404, "count": 16, "id": 4, "objective_summary": {"categories": [["Iris-virginica", 11], ["Iris-versicolor", 5]]}, "output": "Iris-virginica", "predicate": {"field": "100004", "operator": "<=", "value": 5.05}}], "confidence": 0.81648, "count": 59, "id": 2, "objective_summary": {"categories": [["Iris-virginica", 54], ["Iris-versicolor", 5]]}, "output": "Iris-virginica", "predicate": {"field": "100004", "operator": ">", "value": 4.75}}, {"confidence": 0.87941, "count": 43, "id": 9, "objective_summary": {"categories": [["Iris-versicolor", 42], ["Iris-virginica", 1]]}, "output": "Iris-versicolor", "predicate": {"field": "100004", "operator": "<=", "value": 4.75}}], "confidence": 0.44282, "count": 102, "id": 1, "objective_summary": {"categories": [["Iris-virginica", 55], ["Iris-versicolor", 47]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 0.8}}, {"confidence": 0.9259, "count": 48, "id": 10, "objective_summary": {"categories": [["Iris-setosa", 48]]}, "output": "Iris-setosa", "predicate": {"field": "000003", "operator": "<=", "value": 0.8}}], "confidence": 0.29377, "count": 150, "id": 0, "objective_summary": {"categories": [["Iris-virginica", 55], ["Iris-setosa", 48], ["Iris-versicolor", 47]]}, "output": "Iris-virginica", "predicate": true}}, "name": "iris [extended] - 0", "name_options": "10-node, pruned, deterministic order", "node_threshold": 10, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "100005", "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["100005"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": "project/5f5670e85299633dc000fbd8", "randomize": false, "range": null, "replacement": false, "resource": "model/5f580eb2440ca135f602213e", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4550, "source": "source/5f5671b1946b3047cc009818", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 0, "message": "The model has been created", "progress": 1}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2020-09-08T23:08:29.441000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/iris_ensemble/model_5f580eb2440ca135f6022140 b/bigml/iris_ensemble/model_5f580eb2440ca135f6022140 new file mode 100644 index 00000000..7a755a91 --- /dev/null +++ b/bigml/iris_ensemble/model_5f580eb2440ca135f6022140 @@ -0,0 +1 @@ +{"code": 200, "resource": "model/5f580eb2440ca135f6022140", "location": "https://bigml.io/andromeda/model/5f580eb2440ca135f6022140", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2020-09-08T23:07:30.728000", "creator": "mmartin", "credits": 0.0, "credits_per_prediction": 0.0, "dataset": "dataset/5f580e962fb31c516d000f0a", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": true, "ensemble_id": "5f580eb0e84f942429000c22", "ensemble_index": 1, "excluded_fields": [], "fields_meta": {"count": 5, "limit": -1, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000003", "100004"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 51], ["Iris-virginica", 49]]}, "training": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]]}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "generated": false, "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "generated": false, "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000003": {"column_number": 2, "datatype": "double", "generated": false, "name": "petal width", "optype": "numeric", "order": 2, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "100004": {"column_number": 3, "datatype": "double", "description": "", "generated": true, "label": "", "name": "petal length", "optype": "numeric", "order": 3, "preferred": true, "provenance": "flatline", "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "100005": {"column_number": 4, "datatype": "string", "description": "", "generated": true, "label": "", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "provenance": "flatline", "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000003", 0.9548], ["000001", 0.03384], ["100004", 0.01137]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000001": {"column_number": 1, "datatype": "double", "generated": false, "name": "sepal width", "optype": "numeric", "preferred": true}, "000003": {"column_number": 2, "datatype": "double", "generated": false, "name": "petal width", "optype": "numeric", "preferred": true}, "100004": {"column_number": 3, "datatype": "double", "description": "", "generated": true, "label": "", "name": "petal length", "optype": "numeric", "preferred": true, "provenance": "flatline"}, "100005": {"column_number": 4, "datatype": "string", "description": "", "generated": true, "label": "", "name": "species", "optype": "categorical", "preferred": true, "provenance": "flatline", "term_analysis": {"enabled": true}}}, "node_threshold": 10, "root": {"children": [{"children": [{"confidence": 0.9197, "count": 44, "id": 2, "objective_summary": {"categories": [["Iris-virginica", 44]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 1.75}}, {"children": [{"children": [{"children": [{"confidence": 0.09453, "count": 2, "id": 6, "objective_summary": {"categories": [["Iris-versicolor", 1], ["Iris-virginica", 1]]}, "output": "Iris-versicolor", "predicate": {"field": "100004", "operator": ">", "value": 5.05}}, {"confidence": 0.60966, "count": 6, "id": 7, "objective_summary": {"categories": [["Iris-versicolor", 6]]}, "output": "Iris-versicolor", "predicate": {"field": "100004", "operator": "<=", "value": 5.05}}], "confidence": 0.52911, "count": 8, "id": 5, "objective_summary": {"categories": [["Iris-versicolor", 7], ["Iris-virginica", 1]]}, "output": "Iris-versicolor", "predicate": {"field": "000001", "operator": ">", "value": 2.6}}, {"confidence": 0.56551, "count": 5, "id": 8, "objective_summary": {"categories": [["Iris-virginica", 5]]}, "output": "Iris-virginica", "predicate": {"field": "000001", "operator": "<=", "value": 2.6}}], "confidence": 0.29143, "count": 13, "id": 4, "objective_summary": {"categories": [["Iris-versicolor", 7], ["Iris-virginica", 6]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": ">", "value": 1.45}}, {"confidence": 0.91799, "count": 43, "id": 9, "objective_summary": {"categories": [["Iris-versicolor", 43]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": "<=", "value": 1.45}}], "confidence": 0.78531, "count": 56, "id": 3, "objective_summary": {"categories": [["Iris-versicolor", 50], ["Iris-virginica", 6]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": "<=", "value": 1.75}}], "confidence": 0.40383, "count": 100, "id": 1, "objective_summary": {"categories": [["Iris-versicolor", 50], ["Iris-virginica", 50]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": ">", "value": 0.8}}, {"confidence": 0.92865, "count": 50, "id": 10, "objective_summary": {"categories": [["Iris-setosa", 50]]}, "output": "Iris-setosa", "predicate": {"field": "000003", "operator": "<=", "value": 0.8}}], "confidence": 0.26289, "count": 150, "id": 0, "objective_summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]]}, "output": "Iris-setosa", "predicate": true}}, "name": "iris [extended] - 1", "name_options": "10-node, pruned, deterministic order", "node_threshold": 10, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "100005", "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["100005"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": "project/5f5670e85299633dc000fbd8", "randomize": false, "range": null, "replacement": false, "resource": "model/5f580eb2440ca135f6022140", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4550, "source": "source/5f5671b1946b3047cc009818", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 0, "message": "The model has been created", "progress": 1}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2020-09-08T23:08:30.072000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/iris_ensemble/model_5f580eb2440ca135f6022142 b/bigml/iris_ensemble/model_5f580eb2440ca135f6022142 new file mode 100644 index 00000000..63dbfe47 --- /dev/null +++ b/bigml/iris_ensemble/model_5f580eb2440ca135f6022142 @@ -0,0 +1 @@ +{"code": 200, "resource": "model/5f580eb2440ca135f6022142", "location": "https://bigml.io/andromeda/model/5f580eb2440ca135f6022142", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2020-09-08T23:07:30.866000", "creator": "mmartin", "credits": 0.0, "credits_per_prediction": 0.0, "dataset": "dataset/5f580e962fb31c516d000f0a", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": true, "ensemble_id": "5f580eb0e84f942429000c22", "ensemble_index": 2, "excluded_fields": [], "fields_meta": {"count": 5, "limit": -1, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000003", "100004"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["Iris-setosa", 46], ["Iris-versicolor", 49], ["Iris-virginica", 55]]}, "training": {"categories": [["Iris-setosa", 46], ["Iris-versicolor", 50], ["Iris-virginica", 54]]}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "generated": false, "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "generated": false, "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000003": {"column_number": 2, "datatype": "double", "generated": false, "name": "petal width", "optype": "numeric", "order": 2, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "100004": {"column_number": 3, "datatype": "double", "description": "", "generated": true, "label": "", "name": "petal length", "optype": "numeric", "order": 3, "preferred": true, "provenance": "flatline", "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "100005": {"column_number": 4, "datatype": "string", "description": "", "generated": true, "label": "", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "provenance": "flatline", "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000003", 0.88756], ["100004", 0.08608], ["000000", 0.02636]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000000": {"column_number": 0, "datatype": "double", "generated": false, "name": "sepal length", "optype": "numeric", "preferred": true}, "000003": {"column_number": 2, "datatype": "double", "generated": false, "name": "petal width", "optype": "numeric", "preferred": true}, "100004": {"column_number": 3, "datatype": "double", "description": "", "generated": true, "label": "", "name": "petal length", "optype": "numeric", "preferred": true, "provenance": "flatline"}, "100005": {"column_number": 4, "datatype": "string", "description": "", "generated": true, "label": "", "name": "species", "optype": "categorical", "preferred": true, "provenance": "flatline", "term_analysis": {"enabled": true}}}, "node_threshold": 10, "root": {"children": [{"children": [{"children": [{"confidence": 0.91433, "count": 41, "id": 3, "objective_summary": {"categories": [["Iris-virginica", 41]]}, "output": "Iris-virginica", "predicate": {"field": "100004", "operator": ">", "value": 5.05}}, {"children": [{"confidence": 0.34237, "count": 2, "id": 5, "objective_summary": {"categories": [["Iris-versicolor", 2]]}, "output": "Iris-versicolor", "predicate": {"field": "000000", "operator": ">", "value": 6.5}}, {"confidence": 0.62264, "count": 11, "id": 6, "objective_summary": {"categories": [["Iris-virginica", 10], ["Iris-versicolor", 1]]}, "output": "Iris-virginica", "predicate": {"field": "000000", "operator": "<=", "value": 6.5}}], "confidence": 0.49743, "count": 13, "id": 4, "objective_summary": {"categories": [["Iris-virginica", 10], ["Iris-versicolor", 3]]}, "output": "Iris-virginica", "predicate": {"field": "100004", "operator": "<=", "value": 5.05}}], "confidence": 0.84893, "count": 54, "id": 2, "objective_summary": {"categories": [["Iris-virginica", 51], ["Iris-versicolor", 3]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 1.55}}, {"children": [{"confidence": 0.43849, "count": 3, "id": 8, "objective_summary": {"categories": [["Iris-virginica", 3]]}, "output": "Iris-virginica", "predicate": {"field": "100004", "operator": ">", "value": 4.95}}, {"confidence": 0.92444, "count": 47, "id": 9, "objective_summary": {"categories": [["Iris-versicolor", 47]]}, "output": "Iris-versicolor", "predicate": {"field": "100004", "operator": "<=", "value": 4.95}}], "confidence": 0.83783, "count": 50, "id": 7, "objective_summary": {"categories": [["Iris-versicolor", 47], ["Iris-virginica", 3]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": "<=", "value": 1.55}}], "confidence": 0.42424, "count": 104, "id": 1, "objective_summary": {"categories": [["Iris-virginica", 54], ["Iris-versicolor", 50]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 0.75}}, {"confidence": 0.92292, "count": 46, "id": 10, "objective_summary": {"categories": [["Iris-setosa", 46]]}, "output": "Iris-setosa", "predicate": {"field": "000003", "operator": "<=", "value": 0.75}}], "confidence": 0.28756, "count": 150, "id": 0, "objective_summary": {"categories": [["Iris-virginica", 54], ["Iris-versicolor", 50], ["Iris-setosa", 46]]}, "output": "Iris-virginica", "predicate": true}}, "name": "iris [extended] - 2", "name_options": "10-node, pruned, deterministic order", "node_threshold": 10, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "100005", "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["100005"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": "project/5f5670e85299633dc000fbd8", "randomize": false, "range": null, "replacement": false, "resource": "model/5f580eb2440ca135f6022142", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4550, "source": "source/5f5671b1946b3047cc009818", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 0, "message": "The model has been created", "progress": 1}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2020-09-08T23:08:30.597000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/iris_ensemble/model_5f580eb3440ca135f6022144 b/bigml/iris_ensemble/model_5f580eb3440ca135f6022144 new file mode 100644 index 00000000..1143259a --- /dev/null +++ b/bigml/iris_ensemble/model_5f580eb3440ca135f6022144 @@ -0,0 +1 @@ +{"code": 200, "resource": "model/5f580eb3440ca135f6022144", "location": "https://bigml.io/andromeda/model/5f580eb3440ca135f6022144", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2020-09-08T23:07:31.009000", "creator": "mmartin", "credits": 0.0, "credits_per_prediction": 0.0, "dataset": "dataset/5f580e962fb31c516d000f0a", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": true, "ensemble_id": "5f580eb0e84f942429000c22", "ensemble_index": 3, "excluded_fields": [], "fields_meta": {"count": 5, "limit": -1, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000003", "100004"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["Iris-setosa", 48], ["Iris-versicolor", 50], ["Iris-virginica", 52]]}, "training": {"categories": [["Iris-setosa", 48], ["Iris-versicolor", 51], ["Iris-virginica", 51]]}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "generated": false, "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "generated": false, "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000003": {"column_number": 2, "datatype": "double", "generated": false, "name": "petal width", "optype": "numeric", "order": 2, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "100004": {"column_number": 3, "datatype": "double", "description": "", "generated": true, "label": "", "name": "petal length", "optype": "numeric", "order": 3, "preferred": true, "provenance": "flatline", "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "100005": {"column_number": 4, "datatype": "string", "description": "", "generated": true, "label": "", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "provenance": "flatline", "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000003", 0.69065], ["100004", 0.29616], ["000001", 0.01319]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000001": {"column_number": 1, "datatype": "double", "generated": false, "name": "sepal width", "optype": "numeric", "preferred": true}, "000003": {"column_number": 2, "datatype": "double", "generated": false, "name": "petal width", "optype": "numeric", "preferred": true}, "100004": {"column_number": 3, "datatype": "double", "description": "", "generated": true, "label": "", "name": "petal length", "optype": "numeric", "preferred": true, "provenance": "flatline"}, "100005": {"column_number": 4, "datatype": "string", "description": "", "generated": true, "label": "", "name": "species", "optype": "categorical", "preferred": true, "provenance": "flatline", "term_analysis": {"enabled": true}}}, "node_threshold": 10, "root": {"children": [{"children": [{"children": [{"confidence": 0.92292, "count": 46, "id": 3, "objective_summary": {"categories": [["Iris-virginica", 46]]}, "output": "Iris-virginica", "predicate": {"field": "100004", "operator": ">", "value": 5.05}}, {"confidence": 0.20765, "count": 3, "id": 4, "objective_summary": {"categories": [["Iris-virginica", 2], ["Iris-versicolor", 1]]}, "output": "Iris-virginica", "predicate": {"field": "100004", "operator": "<=", "value": 5.05}}], "confidence": 0.89306, "count": 49, "id": 2, "objective_summary": {"categories": [["Iris-virginica", 48], ["Iris-versicolor", 1]]}, "output": "Iris-virginica", "predicate": {"field": "100004", "operator": ">", "value": 4.95}}, {"children": [{"children": [{"confidence": 0.34237, "count": 2, "id": 7, "objective_summary": {"categories": [["Iris-versicolor", 2]]}, "output": "Iris-versicolor", "predicate": {"field": "000001", "operator": ">", "value": 3.1}}, {"confidence": 0.43849, "count": 3, "id": 8, "objective_summary": {"categories": [["Iris-virginica", 3]]}, "output": "Iris-virginica", "predicate": {"field": "000001", "operator": "<=", "value": 3.1}}], "confidence": 0.23072, "count": 5, "id": 6, "objective_summary": {"categories": [["Iris-virginica", 3], ["Iris-versicolor", 2]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 1.7}}, {"confidence": 0.9259, "count": 48, "id": 9, "objective_summary": {"categories": [["Iris-versicolor", 48]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": "<=", "value": 1.7}}], "confidence": 0.8463, "count": 53, "id": 5, "objective_summary": {"categories": [["Iris-versicolor", 50], ["Iris-virginica", 3]]}, "output": "Iris-versicolor", "predicate": {"field": "100004", "operator": "<=", "value": 4.95}}], "confidence": 0.40474, "count": 102, "id": 1, "objective_summary": {"categories": [["Iris-versicolor", 51], ["Iris-virginica", 51]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": ">", "value": 0.8}}, {"confidence": 0.9259, "count": 48, "id": 10, "objective_summary": {"categories": [["Iris-setosa", 48]]}, "output": "Iris-setosa", "predicate": {"field": "000003", "operator": "<=", "value": 0.8}}], "confidence": 0.26903, "count": 150, "id": 0, "objective_summary": {"categories": [["Iris-versicolor", 51], ["Iris-virginica", 51], ["Iris-setosa", 48]]}, "output": "Iris-versicolor", "predicate": true}}, "name": "iris [extended] - 3", "name_options": "10-node, pruned, deterministic order", "node_threshold": 10, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "100005", "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["100005"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": "project/5f5670e85299633dc000fbd8", "randomize": false, "range": null, "replacement": false, "resource": "model/5f580eb3440ca135f6022144", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4550, "source": "source/5f5671b1946b3047cc009818", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 0, "message": "The model has been created", "progress": 1}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2020-09-08T23:08:31.225000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/iris_ensemble/model_5f580eb3440ca135f6022146 b/bigml/iris_ensemble/model_5f580eb3440ca135f6022146 new file mode 100644 index 00000000..34e0f4dc --- /dev/null +++ b/bigml/iris_ensemble/model_5f580eb3440ca135f6022146 @@ -0,0 +1 @@ +{"code": 200, "resource": "model/5f580eb3440ca135f6022146", "location": "https://bigml.io/andromeda/model/5f580eb3440ca135f6022146", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2020-09-08T23:07:31.125000", "creator": "mmartin", "credits": 0.0, "credits_per_prediction": 0.0, "dataset": "dataset/5f580e962fb31c516d000f0a", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": true, "ensemble_id": "5f580eb0e84f942429000c22", "ensemble_index": 4, "excluded_fields": [], "fields_meta": {"count": 5, "limit": -1, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000003", "100004"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["Iris-setosa", 54], ["Iris-versicolor", 46], ["Iris-virginica", 50]]}, "training": {"categories": [["Iris-setosa", 54], ["Iris-versicolor", 47], ["Iris-virginica", 49]]}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "generated": false, "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "generated": false, "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000003": {"column_number": 2, "datatype": "double", "generated": false, "name": "petal width", "optype": "numeric", "order": 2, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "100004": {"column_number": 3, "datatype": "double", "description": "", "generated": true, "label": "", "name": "petal length", "optype": "numeric", "order": 3, "preferred": true, "provenance": "flatline", "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "100005": {"column_number": 4, "datatype": "string", "description": "", "generated": true, "label": "", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "provenance": "flatline", "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000003", 0.92673], ["100004", 0.05445], ["000000", 0.01882]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000000": {"column_number": 0, "datatype": "double", "generated": false, "name": "sepal length", "optype": "numeric", "preferred": true}, "000001": {"column_number": 1, "datatype": "double", "generated": false, "name": "sepal width", "optype": "numeric", "preferred": true}, "000003": {"column_number": 2, "datatype": "double", "generated": false, "name": "petal width", "optype": "numeric", "preferred": true}, "100004": {"column_number": 3, "datatype": "double", "description": "", "generated": true, "label": "", "name": "petal length", "optype": "numeric", "preferred": true, "provenance": "flatline"}, "100005": {"column_number": 4, "datatype": "string", "description": "", "generated": true, "label": "", "name": "species", "optype": "categorical", "preferred": true, "provenance": "flatline", "term_analysis": {"enabled": true}}}, "node_threshold": 10, "root": {"children": [{"children": [{"children": [{"confidence": 0.92135, "count": 45, "id": 3, "objective_summary": {"categories": [["Iris-virginica", 45]]}, "output": "Iris-virginica", "predicate": {"field": "000000", "operator": ">", "value": 5.95}}, {"confidence": 0.20654, "count": 1, "id": 4, "objective_summary": {"categories": [["Iris-versicolor", 1]]}, "output": "Iris-versicolor", "predicate": {"field": "000000", "operator": "<=", "value": 5.95}}], "confidence": 0.88664, "count": 46, "id": 2, "objective_summary": {"categories": [["Iris-virginica", 45], ["Iris-versicolor", 1]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 1.75}}, {"children": [{"children": [{"confidence": 0.5101, "count": 4, "id": 7, "objective_summary": {"categories": [["Iris-versicolor", 4]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": ">", "value": 1.65}}, {"confidence": 0.37553, "count": 5, "id": 8, "objective_summary": {"categories": [["Iris-virginica", 4], ["Iris-versicolor", 1]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": "<=", "value": 1.65}}], "confidence": 0.26665, "count": 9, "id": 6, "objective_summary": {"categories": [["Iris-versicolor", 5], ["Iris-virginica", 4]]}, "output": "Iris-versicolor", "predicate": {"field": "100004", "operator": ">", "value": 4.95}}, {"confidence": 0.91433, "count": 41, "id": 9, "objective_summary": {"categories": [["Iris-versicolor", 41]]}, "output": "Iris-versicolor", "predicate": {"field": "100004", "operator": "<=", "value": 4.95}}], "confidence": 0.81161, "count": 50, "id": 5, "objective_summary": {"categories": [["Iris-versicolor", 46], ["Iris-virginica", 4]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": "<=", "value": 1.75}}], "confidence": 0.41196, "count": 96, "id": 1, "objective_summary": {"categories": [["Iris-virginica", 49], ["Iris-versicolor", 47]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 0.8}}, {"confidence": 0.93358, "count": 54, "id": 10, "objective_summary": {"categories": [["Iris-setosa", 54]]}, "output": "Iris-setosa", "predicate": {"field": "000003", "operator": "<=", "value": 0.8}}], "confidence": 0.28756, "count": 150, "id": 0, "objective_summary": {"categories": [["Iris-setosa", 54], ["Iris-virginica", 49], ["Iris-versicolor", 47]]}, "output": "Iris-setosa", "predicate": true}}, "name": "iris [extended] - 4", "name_options": "10-node, pruned, deterministic order", "node_threshold": 10, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "100005", "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["100005"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": "project/5f5670e85299633dc000fbd8", "randomize": false, "range": null, "replacement": false, "resource": "model/5f580eb3440ca135f6022146", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4550, "source": "source/5f5671b1946b3047cc009818", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 0, "message": "The model has been created", "progress": 1}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2020-09-08T23:08:31.730000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/item.py b/bigml/item.py index 87d5a8ba..3314507a 100644 --- a/bigml/item.py +++ b/bigml/item.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -21,9 +20,9 @@ """ from bigml.associationrule import SUPPORTED_LANGUAGES -from bigml.predicate import term_matches, item_matches +from bigml.predicate_utils.utils import term_matches, item_matches -class Item(object): +class Item(): """ Object encapsulating an Association resource item as described in https://bigml.com/developers/associations @@ -36,7 +35,7 @@ def __init__(self, index, item_info, fields): self.count = item_info.get('count') self.description = item_info.get('description') self.field_id = item_info.get('field_id') - self.field_info = fields[self.field_id] + self.field_info = item_info.get('field_info', fields[self.field_id]) self.name = item_info.get('name') self.bin_end = item_info.get('bin_end') self.bin_start = item_info.get('bin_start') @@ -75,7 +74,7 @@ def to_lisp_rule(self): """ flatline = "" if self.name is None: - return u"(missing? (f %s))" % self.field_id + return "(missing? (f %s))" % self.field_id field_type = self.field_info['optype'] if field_type == "numeric": start = self.bin_end if self.complement else \ @@ -84,32 +83,32 @@ def to_lisp_rule(self): self.bin_end if start is not None and end is not None: if start < end: - flatline = u"(and (< %s (f %s)) (<= (f %s) %s))" % \ + flatline = "(and (< %s (f %s)) (<= (f %s) %s))" % \ (start, self.field_id, self.field_id, end) else: - flatline = u"(or (> (f %s) %s) (<= (f %s) %s))" % \ + flatline = "(or (> (f %s) %s) (<= (f %s) %s))" % \ (self.field_id, start, self.field_id, end) elif start is not None: - flatline = u"(> (f %s) %s)" % (self.field_id, start) + flatline = "(> (f %s) %s)" % (self.field_id, start) else: - flatline = u"(<= (f %s) %s)" % (self.field_id, end) + flatline = "(<= (f %s) %s)" % (self.field_id, end) elif field_type == "categorical": - operator = u"!=" if self.complement else u"=" - flatline = u"(%s (f %s) %s)" % ( + operator = "!=" if self.complement else "=" + flatline = "(%s (f %s) %s)" % ( operator, self.field_id, self.name) elif field_type == "text": - operator = u"=" if self.complement else u">" + operator = "=" if self.complement else ">" options = self.field_info['term_analysis'] case_insensitive = not options.get('case_sensitive', False) - case_insensitive = u'true' if case_insensitive else u'false' + case_insensitive = 'true' if case_insensitive else 'false' language = options.get('language') - language = u"" if language is None else u" %s" % language - flatline = u"(%s (occurrences (f %s) %s %s%s) 0)" % ( + language = "" if language is None else " %s" % language + flatline = "(%s (occurrences (f %s) %s %s%s) 0)" % ( operator, self.field_id, self.name, case_insensitive, language) elif field_type == 'items': - operator = u"!" if self.complement else u"" - flatline = u"(%s (contains-items? %s %s))" % ( + operator = "!" if self.complement else "" + flatline = "(%s (contains-items? %s %s))" % ( operator, self.field_id, self.name) return flatline diff --git a/bigml/laminar/constants.py b/bigml/laminar/constants.py index fea7b160..8009710c 100644 --- a/bigml/laminar/constants.py +++ b/bigml/laminar/constants.py @@ -40,3 +40,8 @@ LEARN_INCREMENT = 8 MAX_QUEUE = LEARN_INCREMENT * 4 N_CANDIDATES = MAX_QUEUE * 64 + +# Activation constants +ALPHA = 1.6732632423543772848170429916717 +LAMBDA = 1.0507009873554804934193349852946 +LEAKY_RELU_CONST = 0.1 diff --git a/bigml/laminar/math_ops.py b/bigml/laminar/math_ops.py deleted file mode 100644 index e6a63edc..00000000 --- a/bigml/laminar/math_ops.py +++ /dev/null @@ -1,182 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2017-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -"""Activation functions and helpers in pure python - -""" - -import math - -from bigml.laminar.constants import LARGE_EXP - - -def broadcast(fn): - def broadcaster(xs): - if len(xs) == 0: - return [] - elif isinstance(xs[0], list): - return [fn(xvec) for xvec in xs] - else: - return fn(xs) - - return broadcaster - - -def plus(mat, vec): - return [[r + v for r, v in zip(row, vec)] for row in mat] - - -def minus(mat, vec): - return [[r - v for r, v in zip(row, vec)] for row in mat] - - -def times(mat, vec): - return [[r * v for r, v in zip(row, vec)] for row in mat] - - -def divide(mat, vec): - return [[r / v for r, v in zip(row, vec)] for row in mat] - - -def dot(mat1, mat2): - out_mat = [] - for row1 in mat1: - new_row = [sum(m1 * m2 for m1, m2 in zip(row1, row2)) for row2 in mat2] - out_mat.append(new_row) - - return out_mat - - -def batch_norm(X, mean, stdev, shift, scale): - norm_vals = divide(minus(X, mean), stdev) - return plus(times(norm_vals, scale), shift) - - -def sigmoid(xs): - out_vec = [] - - for x in xs: - if x > 0: - if x < LARGE_EXP: - ex_val = math.exp(x) - out_vec.append(ex_val / (ex_val + 1)) - else: - out_vec.append(1) - else: - if -x < LARGE_EXP: - out_vec.append(1 / (1 + math.exp(-x))) - else: - out_vec.append(0) - - return out_vec - - -def softplus(xs): - return [math.log(math.exp(x) + 1) if x < LARGE_EXP else x for x in xs] - - -def softmax(xs): - xmax = max(xs) - exps = [math.exp(x - xmax) for x in xs] - sumex = sum(exps) - return [ex / sumex for ex in exps] - - -ACTIVATORS = { - 'tanh': broadcast(lambda xs: [math.tanh(x) for x in xs]), - 'sigmoid': broadcast(sigmoid), - 'softplus': broadcast(softplus), - 'relu': broadcast(lambda xs: [x if x > 0 else 0 for x in xs]), - 'softmax': broadcast(softmax), - 'identity': broadcast(lambda xs: [float(x) for x in xs]) -} - - -def init_layers(layers): - return [dict(layer) for layer in layers] - - -def destandardize(vec, v_mean, v_stdev): - return [[v[0] * v_stdev + v_mean] for v in vec] - - -def to_width(mat, width): - if width > len(mat[0]): - ntiles = int(math.ceil(width / float(len(mat[0])))) - else: - ntiles = 1 - - output = [(row * ntiles)[:width] for row in mat] - - return output - - -def add_residuals(residuals, identities): - to_add = to_width(identities, len(residuals[0])) - - assert len(to_add[0]) == len(residuals[0]) - - return [[r + v for r, v in zip(rrow, vrow)] - for rrow, vrow in zip(residuals, to_add)] - - -def propagate(x_in, layers): - last_X = identities = x_in - for layer in layers: - w = layer['weights'] - m = layer['mean'] - s = layer['stdev'] - b = layer['offset'] - g = layer['scale'] - - afn = layer['activation_function'] - - X_dot_w = dot(last_X, w) - - if m is not None and s is not None: - next_in = batch_norm(X_dot_w, m, s, b, g) - else: - next_in = plus(X_dot_w, b) - - if layer['residuals']: - next_in = add_residuals(next_in, identities) - last_X = ACTIVATORS[afn](next_in) - identities = last_X - else: - last_X = ACTIVATORS[afn](next_in) - return last_X - - -def sum_and_normalize(youts, is_regression): - ysums = [] - out_dist = [] - - if is_regression: - out_dist = sum(youts) / len(youts) - else: - for i, row in enumerate(youts[0]): - sum_row = [] - for j, _ in enumerate(row): - sum_row.append(sum([yout[i][j] for yout in youts])) - - ysums.append(sum_row) - - for ysum in ysums: - rowsum = sum(ysum) - out_dist.append([y / rowsum for y in ysum]) - - return out_dist diff --git a/bigml/laminar/numpy_ops.py b/bigml/laminar/numpy_ops.py index 08d513cb..85c21ea4 100644 --- a/bigml/laminar/numpy_ops.py +++ b/bigml/laminar/numpy_ops.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=invalid-name,missing-function-docstring # -# Copyright 2017-2019 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -23,17 +23,17 @@ import numpy as np +#pylint: disable=locally-disabled,no-name-in-module from scipy.special import expit from bigml.laminar.constants import LARGE_EXP, MATRIX_PARAMS, \ - VEC_PARAMS + VEC_PARAMS, ALPHA, LAMBDA, LEAKY_RELU_CONST def to_numpy_array(xs): if isinstance(xs, np.ndarray): return np.copy(xs) - else: - return np.array(xs, dtype=np.float32) + return np.array(xs, dtype=np.float32) def softplus(xs): @@ -65,14 +65,32 @@ def softmax(xs): return dist +def selu(xs): + x_cpy = to_numpy_array(xs) + + return np.where(x_cpy > 0, + LAMBDA * x_cpy, + LAMBDA * ALPHA * (np.exp(x_cpy) - 1)) + +def leaky_relu(xs): + x_cpy = to_numpy_array(xs) + + return np.maximum(x_cpy, x_cpy * LEAKY_RELU_CONST) + + ACTIVATORS = { 'tanh': np.tanh, 'sigmoid': expit, 'softplus': softplus, 'relu': relu, 'softmax': softmax, - 'identity': lambda x: x -} + 'identity': lambda x: x, + 'linear': lambda x: x, + 'swish': lambda x: x * expit(x), + 'mish': lambda x: np.tanh(softplus(x)), + 'relu6': lambda x: np.clip(relu(x), 0, 6), + 'leaky_relu': leaky_relu, + 'selu': selu} def plus(mat, vec): @@ -82,10 +100,10 @@ def plus(mat, vec): def dot(mat1, mat2): output = [] for row1 in mat1: - new_row = [] - for row2 in mat2: - new_row.append(np.dot(row1, row2).tolist()) - output.append(new_row) + new_row = [] + for row2 in mat2: + new_row.append(np.dot(row1, row2).tolist()) + output.append(new_row) return output def batch_norm(X, mean, stdev, shift, scale): @@ -135,13 +153,18 @@ def sum_and_normalize(youts, is_regression): if is_regression: return ysums / len(youts) - else: - return ysums / np.sum(ysums, axis=1).reshape(-1, 1) + return ysums / np.sum(ysums, axis=1).reshape(-1, 1) def propagate(x_in, layers): last_X = identities = to_numpy_array(x_in) - for layer in layers: + + if any(layer["residuals"] for layer in layers): + first_identities = not any(layer["residuals"] for layer in layers[:2]) + else: + first_identities = False + + for i, layer in enumerate(layers): w = layer['weights'] m = layer['mean'] s = layer['stdev'] @@ -163,4 +186,7 @@ def propagate(x_in, layers): else: last_X = ACTIVATORS[afn](next_in) + if first_identities and i == 0: + identities = last_X + return last_X diff --git a/bigml/laminar/preprocess.py b/bigml/laminar/preprocess.py deleted file mode 100644 index 79f60dac..00000000 --- a/bigml/laminar/preprocess.py +++ /dev/null @@ -1,205 +0,0 @@ -# Copyright 2017-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -"""Auxiliary functions for preprocessing - -""" - -import math - -from copy import deepcopy - -from bigml.laminar.constants import NUMERIC, CATEGORICAL - -MEAN = "mean" -STANDARD_DEVIATION = "stdev" - -ZERO = "zero_value" -ONE = "one_value" - - -def dtype_ok(dtype_fn): - return dtype_fn is not None and dtype_fn in [int, float] - - -def np_zeros(x_dim, y_dim, dtype_fn=None): - value = "0" - try: - if dtype_ok(dtype_fn): - value = dtype_fn(value) - except ValueError: - pass - array = [] - for i in range(x_dim): - array.append([]) - for _ in range(y_dim): - array[i].append(value) - return array - - -def np_asarray(array, dtype_fn=None): - new_array = deepcopy(array) - if not isinstance(array, list): - new_array = [item for item in new_array] - try: - new_array = ['nan' if item is None else item for item in new_array] - if dtype_ok(dtype_fn): - new_array = [dtype_fn(item) for item in new_array] - except (ValueError, NameError): - pass - return new_array - - -def np_c_(array_a, array_c): - if array_a in [None, []]: - return [array_c] - - new_array = deepcopy(array_a) - new_array[0].extend(array_c) - - return new_array - - -def v_index(alist, value): - try: - return alist.index(value) - except ValueError: - return None - -def one_hot(vector, possible_values): - idxs = list(enumerate(v_index(possible_values, v) for v in vector[0])) - valid_pairs = [x for x in idxs if x[1] is not None] - outvec = np_zeros(len(idxs), len(possible_values), dtype_fn=float) - for i, j in valid_pairs: - outvec[i][j] = 1 - - return outvec - -def standardize(vector, mean, stdev): - newvec = [component - mean for component in vector] - - if stdev > 0: - newvec = [component / stdev for component in newvec] - - for index, component in enumerate(newvec): - newvec[index] = 0.0 if math.isnan(component) else component - return newvec - -def binarize(vector, zero, one): - for index, value in enumerate(vector): - if one == 0.0: - if value == one: - vector[index] = 1.0 - if value != one and value != 1.0: - vector[index] = 0.0 - else: - if value != one: - vector[index] = 0.0 - if value == one: - vector[index] = 1.0 - - return vector - -def moments(amap): - return amap[MEAN], amap[STANDARD_DEVIATION] - -def bounds(amap): - return amap[ZERO], amap[ONE] - -def transform(vector, spec): - vtype = spec['type'] - - if vtype == NUMERIC: - if STANDARD_DEVIATION in spec: - mean, stdev = moments(spec) - output = standardize(vector, mean, stdev) - elif ZERO in spec: - low, high = bounds(spec) - output = binarize(vector, low, high) - else: - raise ValueError("'%s' is not a valid numeric spec!" % str(spec)) - elif vtype == CATEGORICAL: - output = one_hot(vector, spec['values'])[0] - else: - raise ValueError("'%s' is not a valid spec type!" % vtype) - return output - - -def tree_predict(tree, point): - node = tree[:] - - while node[-1] is not None: - if point[node[0]] <= node[1]: - node = node[2] - else: - node = node[3] - - return node[0] - - -def get_embedding(X, model): - if isinstance(model, list): - preds = None - for tree in model: - tree_preds = [] - for row in X: - tree_preds.append(tree_predict(tree, row)) - - if preds is None: - preds = np_asarray(tree_preds)[0] - else: - for index, pred in enumerate(preds): - preds[index] += np_asarray(tree_preds)[0][index] - - if preds and len(preds) > 1: - norm = sum(preds) - preds = [pred / float(norm) for pred in preds] - else: - preds = [pred / float(len(model)) for pred in preds] - - return [preds] - else: - raise ValueError("Model is unknown type!") - - -def tree_transform(X, trees): - outdata = None - - for feature_range, model in trees: - sidx, eidx = feature_range - inputs = X[:] - for index, row in enumerate(inputs): - inputs[index] = row[sidx:eidx] - outarray = get_embedding(inputs, model) - if outdata is not None: - outdata = np_c_(outdata, outarray[0]) - else: - outdata = outarray - return np_c_(outdata, X[0]) - - -def preprocess(columns, specs): - outdata = None - - for spec in specs: - column = [columns[spec['index']]] - - if spec['type'] == NUMERIC: - column = np_asarray(column, dtype_fn=float) - outarray = transform(column, spec) - if outdata is not None: - outdata = np_c_(outdata, outarray) - else: - outdata = [outarray] - return outdata diff --git a/bigml/laminar/preprocess_np.py b/bigml/laminar/preprocess_np.py index f6514f99..95e64899 100644 --- a/bigml/laminar/preprocess_np.py +++ b/bigml/laminar/preprocess_np.py @@ -1,3 +1,22 @@ +# -*- coding: utf-8 -*- +#pylint: disable=invalid-name,missing-function-docstring +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Pre-processing fields for deepent computations """ + import math import numpy as np @@ -21,7 +40,7 @@ def index(alist, value): def one_hot(vector, possible_values): idxs = list(enumerate(index(possible_values, v) for v in vector)) - valid_pairs = filter(lambda x: x[1] is not None, idxs) + valid_pairs = [x for x in idxs if x[1] is not None] outvec = np.zeros((len(idxs), len(possible_values)), dtype=np.float32) for v in valid_pairs: outvec[v[0], v[1]] = 1 @@ -37,6 +56,7 @@ def standardize(vector, mn, stdev): newvec = np.vectorize(fill_dft)(newvec) return newvec +#pylint: disable=locally-disabled,unused-argument def binarize(vector, zero, one): if one == 0.0: vector[vector == one] = 1.0 @@ -103,8 +123,7 @@ def get_embedding(X, model): preds /= len(model) return preds - else: - raise ValueError("Model is unknown type!") + raise ValueError("Model is unknown type!") def tree_transform(X, trees): diff --git a/bigml/linear.py b/bigml/linear.py index ce00daa6..c6e00407 100644 --- a/bigml/linear.py +++ b/bigml/linear.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -21,7 +20,7 @@ embedded into your application without needing to send requests to BigML.io. -This module cannot only save you a few credits, but also enormously +This module can help you enormously to reduce the latency for each prediction and let you use your linear regressions offline. @@ -42,8 +41,6 @@ """ import logging import math -import copy -import json try: import numpy as np @@ -52,15 +49,14 @@ except ImportError: STATS = False -from functools import cmp_to_key from bigml.api import FINISHED -from bigml.api import get_status, get_api_connection -from bigml.util import cast, check_no_training_missings, PRECISION, NUMERIC, \ - flatten +from bigml.api import get_status, get_api_connection, get_linear_regression_id +from bigml.util import cast, check_no_training_missings, flatten, \ + use_cache, load, dump, dumps, get_data_transformations, NUMERIC from bigml.basemodel import get_resource_dict, extract_objective -from bigml.model import parse_operating_point, sort_categories from bigml.modelfields import ModelFields +from bigml.constants import DECIMALS try: from bigml.laminar.numpy_ops import dot @@ -80,7 +76,7 @@ CONTRAST = "contrast" OTHER = "other" -def get_terms_array(terms, unique_terms, field, field_id): +def get_terms_array(terms, unique_terms, field_id): """ Returns an array that represents the frequency of terms as ordered in the reference `terms` parameter. @@ -104,9 +100,21 @@ class LinearRegression(ModelFields): """ - def __init__(self, linear_regression, api=None): + def __init__(self, linear_regression, api=None, cache_get=None): + + self.xtx_inverse = [] + if use_cache(cache_get): + # using a cache to store the model attributes + self.__dict__ = load(get_linear_regression_id(linear_regression), + cache_get) + for index, elem in enumerate(self.xtx_inverse): + self.xtx_inverse[index] = np.array(elem) + return self.resource_id = None + self.name = None + self.description = None + self.parent_id = None self.input_fields = [] self.term_forms = {} self.tag_clouds = {} @@ -122,15 +130,21 @@ def __init__(self, linear_regression, api=None): self.mean_squared_error = None self.number_of_parameters = None self.number_of_samples = None - self.api = get_api_connection(api) + self.default_numeric_value = None + api = get_api_connection(api) self.resource_id, linear_regression = get_resource_dict( \ - linear_regression, "linearregression", api=self.api) + linear_regression, "linearregression", api=api) if 'object' in linear_regression and \ isinstance(linear_regression['object'], dict): linear_regression = linear_regression['object'] + self.parent_id = linear_regression.get('dataset') + self.name = linear_regression.get('name') + self.description = linear_regression.get('description') try: self.input_fields = linear_regression.get("input_fields", []) + self.default_numeric_value = linear_regression.get( \ + "default_numeric_value") self.dataset_field_types = linear_regression.get( "dataset_field_types", {}) self.weight_field = linear_regression.get("weight_field") @@ -151,7 +165,7 @@ def __init__(self, linear_regression, api=None): if not self.input_fields: self.input_fields = [ \ field_id for field_id, _ in - sorted(fields.items(), + sorted(list(fields.items()), key=lambda x: x[1].get("column_number"))] self.coeff_ids = self.input_fields[:] self.coefficients = linear_regression_info.get( \ @@ -166,7 +180,7 @@ def __init__(self, linear_regression, api=None): objective_id = extract_objective(objective_field) ModelFields.__init__( self, fields, - objective_id=objective_id, terms=True, categories=True, + objective_id=objective_id, categories=True, numerics=True, missing_tokens=missing_tokens) self.field_codings = linear_regression_info.get( \ 'field_codings', {}) @@ -213,7 +227,7 @@ def expand_input(self, input_data, unique_terms, compact=False): as numerics. """ input_array = [] - for index, field_id in enumerate(self.coeff_ids): + for field_id in self.coeff_ids: field = self.fields[field_id] optype = field["optype"] missing = False @@ -230,7 +244,7 @@ def expand_input(self, input_data, unique_terms, compact=False): length = len(terms) if field_id in unique_terms: new_inputs = get_terms_array( \ - terms, unique_terms, field, field_id) + terms, unique_terms, field_id) else: new_inputs = [0] * length missing = True @@ -291,31 +305,32 @@ def predict(self, input_data, full=False): # Checks and cleans input_data leaving the fields used in the model unused_fields = [] - new_data = self.filter_input_data( \ + norm_input_data = self.filter_input_data( \ input_data, add_unused_fields=full) if full: - new_data, unused_fields = new_data + norm_input_data, unused_fields = norm_input_data # Strips affixes for numeric values and casts to the final field type - cast(new_data, self.fields) + cast(norm_input_data, self.fields) # In case that the training data has no missings, input data shouldn't - check_no_training_missings(new_data, self.model_fields, + check_no_training_missings(norm_input_data, self.model_fields, self.weight_field, self.objective_id) # Computes text and categorical field expansion - unique_terms = self.get_unique_terms(new_data) + unique_terms = self.get_unique_terms(norm_input_data) # Creates an input vector with the values for all expanded fields. - input_array = self.expand_input(new_data, unique_terms) - compact_input_array = self.expand_input(new_data, unique_terms, True) + input_array = self.expand_input(norm_input_data, unique_terms) + compact_input_array = self.expand_input(norm_input_data, unique_terms, + True) prediction = dot([flatten(self.coefficients)], [input_array])[0][0] result = { - "prediction": prediction} + "prediction": round(prediction, DECIMALS)} if self.xtx_inverse: result.update({"confidence_bounds": self.confidence_bounds( \ compact_input_array)}) @@ -328,6 +343,21 @@ def predict(self, input_data, full=False): return result + def predict_probability(self, input_data, compact=False): + """Method to homogeinize predictions in fusions and composites + + """ + + prediction = self.predict(input_data, full=not compact) + + if compact: + output = [prediction] + else: + output = prediction + + return output + + def confidence_bounds(self, input_array): """Computes the confidence interval for the prediction @@ -349,7 +379,6 @@ def confidence_bounds(self, input_array): "prediction_interval": prediction_interval, "valid": valid} - def format_field_codings(self): """ Changes the field codings format to the dict notation @@ -366,3 +395,31 @@ def format_field_codings(self): else: self.field_codings[field_id] = {\ element["coding"]: element['coefficients']} + + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + Avoiding to set it in a Mixin to maintain the current dump function. + """ + return get_data_transformations(self.resource_id, self.parent_id) + + def dump(self, output=None, cache_set=None): + """Uses msgpack to serialize the resource object + If cache_set is filled with a cache set method, the method is called + + """ + self_vars = vars(self) + xtx = self_vars["xtx_inverse"] + for index, elem in enumerate(xtx): + self_vars["xtx_inverse"][index] = list(elem) + dump(self_vars, output=output, cache_set=cache_set) + + def dumps(self): + """Uses msgpack to serialize the resource object to a string + + """ + self_vars = vars(self) + xtx = self_vars["xtx_inverse"] + for index, elem in enumerate(xtx): + self_vars["xtx_inverse"][index] = list(elem) + dumps(self_vars) diff --git a/bigml/local_model.py b/bigml/local_model.py new file mode 100644 index 00000000..c8ed68c9 --- /dev/null +++ b/bigml/local_model.py @@ -0,0 +1,237 @@ +# -*- coding: utf-8 -*- +# pylint: disable=super-init-not-called +# +# Copyright 2023-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A local Predictive model class abstracting all kind of models + +This module abstracts any BigML model to make predictions locally or +embedded into your application without needing to send requests to +BigML.io. + +This module cannot only save you a few credits, but also enormously +reduce the latency for each prediction and let you use your supervised models +offline. + +Example usage (assuming that you have previously set up the BIGML_USERNAME +and BIGML_API_KEY environment variables and that you own the +logisticregression/id below): + +from bigml.api import BigML +from bigml.local_model import LocalModel + +api = BigML() + +model = LocalModel( + 'logisticregression/5026965515526876630001b2') +model.predict({"petal length": 3, "petal width": 1, + "sepal length": 1, "sepal width": 0.5}) + +""" + +import json +import os + + +from bigml.api import get_resource_id, get_resource_type, \ + get_api_connection, get_ensemble_id +from bigml.basemodel import BaseModel +from bigml.model import Model +from bigml.ensemble import Ensemble +from bigml.logistic import LogisticRegression +from bigml.deepnet import Deepnet +from bigml.linear import LinearRegression +from bigml.fusion import Fusion +from bigml.cluster import Cluster +from bigml.anomaly import Anomaly +from bigml.association import Association +from bigml.timeseries import TimeSeries +try: + from bigml.topicmodel import TopicModel + TOPIC_ENABLED = True +except ImportError: + TOPIC_ENABLED = False +from bigml.pca import PCA +from bigml.constants import OUT_NEW_FIELDS, OUT_NEW_HEADERS, INTERNAL +from bigml.util import get_data_format, get_formatted_data, format_data + + +SUPERVISED_CLASSES = { + "model": Model, + "ensemble": Ensemble, + "logisticregression": LogisticRegression, + "deepnet": Deepnet, + "linearregression": LinearRegression, + "fusion": Fusion} + + +DFT_OUTPUTS = ["prediction", "probability"] + + +MODEL_CLASSES = { + "cluster": Cluster, + "anomaly": Anomaly, + "association": Association, + "pca": PCA, + "timeseries": TimeSeries} +MODEL_CLASSES.update(SUPERVISED_CLASSES) +if TOPIC_ENABLED: + MODEL_CLASSES.update({"topicmodel": TopicModel}) + + +def extract_id(model, api): + """Extract the resource id from: + - a resource ID string + - a list of resources (ensemble + models) + - a resource structure + - the name of the file that contains a resource structure + + """ + # the string can be a path to a JSON file + if isinstance(model, str): + try: + path = os.path.dirname(os.path.abspath(model)) + with open(model) as model_file: + model = json.load(model_file) + resource_id = get_resource_id(model) + if resource_id is None: + raise ValueError("The JSON file does not seem" + " to contain a valid BigML resource" + " representation.") + api.storage = path + except IOError: + # if it is not a path, it can be a model id + resource_id = get_resource_id(model) + if resource_id is None: + for resource_type in MODEL_CLASSES.keys(): + if model.find("%s/" % resource_type) > -1: + raise Exception( + api.error_message(model, + resource_type=resource_type, + method="get")) + raise IOError("Failed to open the expected JSON file" + " at %s." % model) + except ValueError: + raise ValueError("Failed to interpret %s." + " JSON file expected.") + if isinstance(model, list): + resource_id = get_ensemble_id(model[0]) + if resource_id is None: + raise ValueError("The first argument does not contain a valid" + " BigML model structure.") + else: + resource_id = get_resource_id(model) + if resource_id is None: + raise ValueError("The first argument does not contain a valid" + " BigML model structure.") + return resource_id, model + + +class LocalModel(BaseModel): + """ A lightweight wrapper around any BigML model. + + Uses any BigML remote model to build a local version + that can be used to generate predictions locally. + + """ + + def __init__(self, model, api=None, cache_get=None, + operation_settings=None): + + self.api = get_api_connection(api) + resource_id, model = extract_id(model, self.api) + resource_type = get_resource_type(resource_id) + if resource_type == "topicmodel" and not TOPIC_ENABLED: + raise ValueError("Failed to import the TopicModel class. " + "Please, check the bindings extra options to install" + " the class.") + kwargs = {"api": self.api, "cache_get": cache_get} + if resource_type in SUPERVISED_CLASSES.keys() and \ + resource_type != "linearregression": + kwargs.update({"operation_settings": operation_settings}) + local_model = MODEL_CLASSES[resource_type](model, **kwargs) + self.__class__.__bases__ = local_model.__class__.__bases__ + for attr, value in list(local_model.__dict__.items()): + setattr(self, attr, value) + self.local_model = local_model + self.supervised = resource_type in SUPERVISED_CLASSES.keys() + self.name = self.local_model.name + self.description = self.local_model.description + + def predict(self, *args, **kwargs): + """Delegating method to local model object""" + return self.local_model.predict(*args, **kwargs) + + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + """ + return self.local_model.data_transformations() + + def batch_predict(self, input_data_list, outputs=None, **kwargs): + """Creates a batch prediction for a list of inputs using the local + BigML model. Allows to define some output settings to + decide the fields to be added to the input_data (prediction, + probability, etc.) and the name that we want to assign to these new + fields. The outputs argument accepts a dictionary with keys + "output_fields", to contain a list of the prediction properties to add + (["prediction", "probability"] by default) and "output_headers", to + contain a list of the headers to be used when adding them (identical + to "output_fields" list, by default). + + :param input_data_list: List of input data to be predicted + :type input_data_list: list or Panda's dataframe + :param dict outputs: properties that define the headers and fields to + be added to the input data + :return: the list of input data plus the predicted values + :rtype: list or Panda's dataframe depending on the input type in + input_data_list + """ + if isinstance(self.local_model, (Association, TimeSeries)): + raise ValueError("The method is not available for Associations or " + "TimeSeries.") + if self.supervised: + if outputs is None: + outputs = {} + new_fields = outputs.get(OUT_NEW_FIELDS, DFT_OUTPUTS) + new_headers = outputs.get(OUT_NEW_HEADERS, new_fields) + if len(new_fields) > len(new_headers): + new_headers.expand(new_fields[len(new_headers):]) + else: + new_headers = new_headers[0: len(new_fields)] + data_format = get_data_format(input_data_list) + inner_data_list = get_formatted_data(input_data_list, INTERNAL) + kwargs.update({"full": True}) + for input_data in inner_data_list: + prediction = self.predict(input_data, **kwargs) + for index, key in enumerate(new_fields): + try: + input_data[new_headers[index]] = prediction[key] + except KeyError: + pass + if data_format != INTERNAL: + return format_data(inner_data_list, out_format=data_format) + return inner_data_list + return self.local_model.batch_predict(input_data_list, + outputs=outputs, **kwargs) + + #pylint: disable=locally-disabled,arguments-differ + def dump(self, **kwargs): + """Delegate to local model""" + self.local_model.dump(**kwargs) + + def dumps(self): + """Delegate to local model""" + return self.local_model.dumps() diff --git a/bigml/logistic.py b/bigml/logistic.py index 094a9e1c..67199512 100644 --- a/bigml/logistic.py +++ b/bigml/logistic.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -21,7 +20,7 @@ embedded into your application without needing to send requests to BigML.io. -This module cannot only save you a few credits, but also enormously +This module can help you enormously to reduce the latency for each prediction and let you use your logistic regressions offline. @@ -43,20 +42,21 @@ import logging import math import copy -import json from functools import cmp_to_key from bigml.api import FINISHED -from bigml.api import get_status, get_api_connection -from bigml.util import cast, check_no_missing_numerics, PRECISION, NUMERIC +from bigml.api import get_status, get_api_connection, \ + get_logistic_regression_id +from bigml.util import cast, check_no_missing_numerics, use_cache, load, \ + get_data_transformations, PRECISION, NUMERIC from bigml.basemodel import get_resource_dict, extract_objective from bigml.model import parse_operating_point, sort_categories from bigml.modelfields import ModelFields LOGGER = logging.getLogger('BigML') -EXPANSION_ATTRIBUTES = {"categorical": "categories", "text": "tag_cloud", +EXPANSION_ATTRIBUTES = {"categorical": "categories", "text": "tag_clouds", "items": "items"} @@ -86,11 +86,42 @@ class LogisticRegression(ModelFields): Uses a BigML remote logistic regression model to build a local version that can be used to generate predictions locally. + """ - def __init__(self, logistic_regression, api=None): + #pylint: disable=locally-disabled,invalid-name + def __init__(self, logistic_regression, api=None, cache_get=None, + operation_settings=None): + """ + :param logistic_regression: logistic_regression object or id, list of + ensemble model objects or ids or list of + ensemble obj and local model objects + (see Model) + :param api: connection object. If None, a new connection object is + instantiated. + :param max_models: integer that limits the number of models instantiated + and held in memory at the same time while predicting. + If None, no limit is set and all the ensemble models + are instantiated and held in memory permanently. + :param cache_get: user-provided function that should return the JSON + information describing the model or the corresponding + LogisticRegression object. Can be used to read these + objects from a cache storage. + :param operation_settings: Dict object that contains operating options + """ + + if use_cache(cache_get): + # using a cache to store the model attributes + self.__dict__ = load(get_logistic_regression_id( \ + logistic_regression), cache_get) + self.operation_settings = self._add_operation_settings( + operation_settings) + return self.resource_id = None + self.name = None + self.description = None + self.parent_id = None self.class_names = None self.input_fields = [] self.term_forms = {} @@ -103,6 +134,7 @@ def __init__(self, logistic_regression, api=None): self.data_field_types = {} self.field_codings = {} self.numeric_fields = {} + self.default_numeric_value = None self.bias = None self.missing_numerics = None self.c = None @@ -110,18 +142,24 @@ def __init__(self, logistic_regression, api=None): self.lr_normalize = None self.balance_fields = None self.regularization = None - self.api = get_api_connection(api) + self.flat_coefficients = None + api = get_api_connection(api) old_coefficients = False self.resource_id, logistic_regression = get_resource_dict( \ - logistic_regression, "logisticregression", api=self.api) + logistic_regression, "logisticregression", api=api) if 'object' in logistic_regression and \ isinstance(logistic_regression['object'], dict): logistic_regression = logistic_regression['object'] + self.parent_id = logistic_regression.get('dataset') + self.name = logistic_regression.get("name") + self.description = logistic_regression.get("description") try: self.input_fields = logistic_regression.get("input_fields", []) + self.default_numeric_value = logistic_regression.get( + "default_numeric_value") self.dataset_field_types = logistic_regression.get( "dataset_field_types", {}) self.weight_field = logistic_regression.get("weight_field") @@ -142,11 +180,11 @@ def __init__(self, logistic_regression, api=None): if not self.input_fields: self.input_fields = [ \ field_id for field_id, _ in - sorted(fields.items(), + sorted(list(fields.items()), key=lambda x: x[1].get("column_number"))] self.coefficients.update(logistic_regression_info.get( \ 'coefficients', [])) - if not isinstance(self.coefficients.values()[0][0], list): + if not isinstance(list(self.coefficients.values())[0][0], list): old_coefficients = True self.bias = logistic_regression_info.get('bias', True) self.c = logistic_regression_info.get('c') @@ -166,8 +204,9 @@ def __init__(self, logistic_regression, api=None): missing_tokens = logistic_regression_info.get("missing_tokens") ModelFields.__init__( self, fields, - objective_id=objective_id, terms=True, categories=True, - numerics=True, missing_tokens=missing_tokens) + objective_id=objective_id, categories=True, + numerics=True, missing_tokens=missing_tokens, + operation_settings=operation_settings) self.field_codings = logistic_regression_info.get( \ 'field_codings', {}) self.format_field_codings() @@ -180,17 +219,14 @@ def __init__(self, logistic_regression, api=None): del self.field_codings[field_id] if old_coefficients: self.map_coefficients() - categories = self.fields[self.objective_id].get( \ - "summary", {}).get('categories') - if len(self.coefficients.keys()) > len(categories): + categories = self.categories[self.objective_id] + if len(list(self.coefficients.keys())) > len(categories): self.class_names = [""] else: self.class_names = [] - self.class_names.extend(sorted([category[0] - for category in categories])) + self.class_names.extend(sorted(categories)) # order matters - self.objective_categories = [category[0] - for category in categories] + self.objective_categories = categories else: raise Exception("The logistic regression isn't finished yet") else: @@ -199,6 +235,7 @@ def __init__(self, logistic_regression, api=None): " in the resource:\n\n%s" % logistic_regression) + #pylint: disable=locally-disabled,invalid-name def _sort_predictions(self, a, b, criteria): """Sorts the categories in the predicted node according to the given criteria @@ -225,8 +262,18 @@ def predict_probability(self, input_data, compact=False): if compact: return [category['probability'] for category in distribution] - else: - return distribution + return distribution + + def predict_confidence(self, input_data, compact=False): + """For logistic regressions we assume that probability can be used + as confidence. + """ + if compact: + return self.predict_probability(input_data, compact=compact) + return [{"category": pred["category"], + "confidence": pred["probability"]} + for pred in self.predict_probability(input_data, + compact=compact)] def predict_operating(self, input_data, operating_point=None): @@ -234,8 +281,9 @@ def predict_operating(self, input_data, """ - kind, threshold, positive_class = parse_operating_point( \ - operating_point, ["probability"], self.class_names) + kind, threshold, positive_class = parse_operating_point( + operating_point, ["probability"], + self.class_names, self.operation_settings) predictions = self.predict_probability(input_data, False) position = self.class_names.index(positive_class) if predictions[position][kind] > threshold: @@ -253,6 +301,7 @@ def predict_operating(self, input_data, prediction = prediction[0] prediction["prediction"] = prediction["category"] del prediction["category"] + prediction['confidence'] = prediction['probability'] return prediction def predict_operating_kind(self, input_data, @@ -273,8 +322,10 @@ def predict_operating_kind(self, input_data, prediction = predictions[0] prediction["prediction"] = prediction["category"] del prediction["category"] + prediction['confidence'] = prediction['probability'] return prediction + #pylint: disable=locally-disabled,consider-using-dict-items def predict(self, input_data, operating_point=None, operating_kind=None, full=False): @@ -311,49 +362,52 @@ def predict(self, input_data, # Checks and cleans input_data leaving the fields used in the model unused_fields = [] - new_data = self.filter_input_data( \ + norm_input_data = self.filter_input_data( \ input_data, add_unused_fields=full) if full: - input_data, unused_fields = new_data - else: - input_data = new_data + norm_input_data, unused_fields = norm_input_data # Strips affixes for numeric values and casts to the final field type - cast(input_data, self.fields) + cast(norm_input_data, self.fields) # When operating_point is used, we need the probabilities # of all possible classes to decide, so se use # the `predict_probability` method + if operating_point is None and self.operation_settings is not None: + operating_point = self.operation_settings.get("operating_point") + if operating_kind is None and self.operation_settings is not None: + operating_kind = self.operation_settings.get("operating_kind") + if operating_point: return self.predict_operating( \ - input_data, operating_point=operating_point) + norm_input_data, operating_point=operating_point) if operating_kind: return self.predict_operating_kind( \ - input_data, operating_kind=operating_kind) + norm_input_data, operating_kind=operating_kind) # In case that missing_numerics is False, checks that all numeric # fields are present in input data. - if not self.missing_numerics: - check_no_missing_numerics(input_data, self.model_fields, + if not self.missing_numerics and self.default_numeric_value is None: + check_no_missing_numerics(norm_input_data, self.model_fields, self.weight_field) if self.balance_fields: - balance_input(input_data, self.fields) + balance_input(norm_input_data, self.fields) # Computes text and categorical field expansion - unique_terms = self.get_unique_terms(input_data) + unique_terms = self.get_unique_terms(norm_input_data) probabilities = {} total = 0 # Computes the contributions for each category for category in self.coefficients: probability = self.category_probability( \ - input_data, unique_terms, category) + norm_input_data, unique_terms, category) try: order = self.categories[self.objective_id].index(category) except ValueError: - if category == u'': + if category == '': order = len(self.categories[self.objective_id]) probabilities[category] = {"category": category, "probability": probability, @@ -366,7 +420,7 @@ def predict(self, input_data, probabilities[category]["probability"], PRECISION) # Chooses the most probable category as prediction - predictions = sorted(probabilities.items(), + predictions = sorted(list(probabilities.items()), key=lambda x: (x[1]["probability"], - x[1]["order"]), reverse=True) for prediction, probability in predictions: @@ -381,7 +435,8 @@ def predict(self, input_data, for category, probability in predictions]} if full: - result.update({'unused_fields': unused_fields}) + result.update({'unused_fields': unused_fields, 'confidence': + result['probability']}) else: result = result["prediction"] @@ -414,7 +469,7 @@ def category_probability(self, numeric_inputs, unique_terms, category): index = self.items[field_id].index(term) elif field_id in self.categories and ( \ not field_id in self.field_codings or \ - self.field_codings[field_id].keys()[0] == \ + list(self.field_codings[field_id].keys())[0] == \ "dummy"): index = self.categories[field_id].index(term) elif field_id in self.categories: @@ -422,7 +477,7 @@ def category_probability(self, numeric_inputs, unique_terms, category): index = self.categories[field_id].index(term) coeff_index = 0 for contribution in \ - self.field_codings[field_id].values()[0]: + list(self.field_codings[field_id].values())[0]: probability += \ coefficients[coeff_index] * \ contribution[index] * occurrences @@ -457,17 +512,13 @@ def category_probability(self, numeric_inputs, unique_terms, category): field_id != self.objective_id and \ field_id not in unique_terms: if field_id not in self.field_codings or \ - self.field_codings[field_id].keys()[0] == "dummy": + list(self.field_codings[field_id].keys())[0] == "dummy": probability += coefficients[ \ len(self.categories[field_id])] else: - """ codings are given as arrays of coefficients. The - last one is for missings and the previous ones are - one per category as found in summary - """ coeff_index = 0 for contribution in \ - self.field_codings[field_id].values()[0]: + list(self.field_codings[field_id].values())[0]: probability += coefficients[coeff_index] * \ contribution[-1] coeff_index += 1 @@ -506,7 +557,7 @@ def map_coefficients(self): shift = 0 for field_id in field_ids: optype = self.fields[field_id]['optype'] - if optype in EXPANSION_ATTRIBUTES.keys(): + if optype in list(EXPANSION_ATTRIBUTES.keys()): # text and items fields have one coefficient per # text plus a missing terms coefficient plus a bias # coefficient @@ -514,13 +565,13 @@ def map_coefficients(self): # field coding. if optype != 'categorical' or \ not field_id in self.field_codings or \ - self.field_codings[field_id].keys()[0] == "dummy": - length = len(self.fields[field_id]['summary'][ \ - EXPANSION_ATTRIBUTES[optype]]) + list(self.field_codings[field_id].keys())[0] == "dummy": + length = len(getattr( + self, EXPANSION_ATTRIBUTES[optype])[field_id]) # missing coefficient length += 1 else: - length = len(self.field_codings[field_id].values()[0]) + length = len(list(self.field_codings[field_id].values())[0]) else: # numeric fields have one coefficient and an additional one # if self.missing_numerics is True @@ -571,3 +622,10 @@ def format_field_codings(self): else: self.field_codings[field_id] = {\ element["coding"]: element['coefficients']} + + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + Avoiding to set it in a Mixin to maintain the current dump function. + """ + return get_data_transformations(self.resource_id, self.parent_id) diff --git a/bigml/model.py b/bigml/model.py index b61624fc..560d5c37 100644 --- a/bigml/model.py +++ b/bigml/model.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2013-2019 BigML +# Copyright 2013-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -21,7 +20,7 @@ embedded into your application without needing to send requests to BigML.io. -This module cannot only save you a few credits, but also enormously +This module can help you enormously to reduce the latency for each prediction and let you use your models offline. @@ -49,29 +48,42 @@ """ import logging -import sys +#pylint: disable=locally-disabled,unused-import import locale +from functools import cmp_to_key -from functools import partial, cmp_to_key +import bigml.predict_utils.classification as c +import bigml.predict_utils.regression as r +import bigml.predict_utils.boosting as b + +from bigml.predict_utils.common import FIELD_OFFSET, extract_distribution +from bigml.exceptions import NoRootDecisionTree from bigml.api import FINISHED, STATUSES -from bigml.api import get_status, get_api_connection -from bigml.util import slugify, markdown_cleanup, prefix_as_comment, utf8, \ - find_locale, cast -from bigml.util import DEFAULT_LOCALE, PRECISION -from bigml.tree import Tree, LAST_PREDICTION, PROPORTIONAL -from bigml.boostedtree import BoostedTree -from bigml.predicate import Predicate -from bigml.basemodel import BaseModel, get_resource_dict, print_importance +from bigml.api import get_status, get_api_connection, get_model_id +from bigml.util import find_locale, cast, use_cache, load, \ + get_data_transformations +from bigml.util import DEFAULT_LOCALE, PRECISION, NUMERIC +from bigml.constants import LAST_PREDICTION, PROPORTIONAL, DECIMALS +from bigml.basemodel import BaseModel, get_resource_dict from bigml.multivote import ws_confidence -from bigml.io import UnicodeWriter -from bigml.path import Path, BRIEF from bigml.prediction import Prediction LOGGER = logging.getLogger('BigML') +OPERATING_POINT_KINDS = ["probability", "confidence"] + +DICTIONARY = "dict" + +OUT_FORMATS = [DICTIONARY, "list"] + + +BOOSTING = "boosting" +REGRESSION = "regression" +CLASSIFICATION = "classification" + # we use the atof conversion for integers to include integers written as # 10.0 PYTHON_CONV = { @@ -90,23 +102,15 @@ "second": "lambda x: int(locale.atof(x))", "millisecond": "lambda x: int(locale.atof(x))", "day-of-week": "lambda x: int(locale.atof(x))", - "day-of-month": "lambda x: int(locale.atof(x))" -} - -PYTHON_FUNC = dict([(numtype, eval(function)) - for numtype, function in PYTHON_CONV.iteritems()]) - -INDENT = u' ' + "day-of-month": "lambda x: int(locale.atof(x))"} -DEFAULT_IMPURITY = 0.2 -OPERATING_POINT_KINDS = ["probability", "confidence"] - -DICTIONARY = "dict" - -OUT_FORMATS = [DICTIONARY, "list"] +#pylint: disable=locally-disabled,eval-used +PYTHON_FUNC = {numtype: eval(function) + for numtype, function in PYTHON_CONV.items()} +#pylint: disable=locally-disabled,invalid-name def init_structure(to): """Creates the empty structure to store predictions depending on the chosen format. @@ -119,6 +123,7 @@ def init_structure(to): else [] +#pylint: disable=locally-disabled,unused-argument,invalid-name,redefined-builtin def cast_prediction(full_prediction, to=None, confidence=False, probability=False, path=False, distribution=False, @@ -155,11 +160,10 @@ def cast_prediction(full_prediction, to=None, prediction_properties = [ \ "prediction", "confidence", "probability", "path", "distribution", "count", "next", "d_min", "d_max", "median", "unused_fields"] - prediction = True result = init_structure(to) for prop in prediction_properties: value = full_prediction.get(prop) - if eval(prop): + if prop == prediction_properties[0] or eval(prop): if to is None: # tuple result = result + (value,) @@ -171,6 +175,7 @@ def cast_prediction(full_prediction, to=None, return result +#pylint: disable=locally-disabled,invalid-name,redefined-outer-name def sort_categories(a, b, categories_list): """Sorts a list of dictionaries with category keys according to their value and order in the categories_list. If not found, alphabetic order is @@ -189,29 +194,21 @@ def sort_categories(a, b, categories_list): return 0 -def print_distribution(distribution, out=sys.stdout): - """Prints distribution data - - """ - total = reduce(lambda x, y: x + y, - [group[1] for group in distribution]) - for group in distribution: - out.write(utf8( - u" %s: %.2f%% (%d instance%s)\n" % ( - group[0], - round(group[1] * 1.0 / total, 4) * 100, - group[1], - u"" if group[1] == 1 else u"s"))) - - -def parse_operating_point(operating_point, operating_kinds, class_names): +def parse_operating_point(operating_point, operating_kinds, class_names, + operation_settings): """Checks the operating point contents and extracts the three defined variables """ + # When operating_point is used, we need the probabilities + # of all possible classes to decide, so se use + # the `predict_probability` method + if operating_point is None and operation_settings is not None: + operating_point = operation_settings.get("operating_point") + if "kind" not in operating_point: raise ValueError("Failed to find the kind of operating point.") - elif operating_point["kind"] not in operating_kinds: + if operating_point["kind"] not in operating_kinds: raise ValueError("Unexpected operating point kind. Allowed values" " are: %s." % ", ".join(operating_kinds)) if "threshold" not in operating_point: @@ -224,18 +221,105 @@ def parse_operating_point(operating_point, operating_kinds, class_names): if "positive_class" not in operating_point: raise ValueError("The operating point needs to have a" " positive_class attribute.") - else: - positive_class = operating_point["positive_class"] - if positive_class not in class_names: - raise ValueError("The positive class must be one of the" - "objective field classes: %s." % - ", ".join(class_names)) + positive_class = operating_point["positive_class"] + if positive_class not in class_names: + raise ValueError("The positive class must be one of the" + "objective field classes: %s." % + ", ".join(class_names)) kind = operating_point["kind"] threshold = operating_point["threshold"] return kind, threshold, positive_class +def to_prediction(model, value_as_string, data_locale=DEFAULT_LOCALE): + """Given a prediction string, returns its value in the required type + + """ + if not isinstance(value_as_string, str): + value_as_string = str(value_as_string, "utf-8") + + objective_id = model.objective_id + if model.fields[objective_id]['optype'] == NUMERIC: + if data_locale is None: + data_locale = model.locale + find_locale(data_locale) + datatype = model.fields[objective_id]['datatype'] + cast_function = PYTHON_FUNC.get(datatype, None) + if cast_function is not None: + return cast_function(value_as_string) + return value_as_string + + +def average_confidence(model): + """Average for the confidence of the predictions resulting from + running the training data through the model + + """ + if model.boosting: + raise AttributeError("This method is not available for boosting" + " models.") + total = 0.0 + cumulative_confidence = 0 + groups = model.group_prediction() + for _, predictions in list(groups.items()): + for _, count, confidence in predictions['details']: + cumulative_confidence += count * confidence + total += count + return float('nan') if total == 0.0 else cumulative_confidence + + +def tree_predict(tree, tree_type, weighted, fields, + input_data, missing_strategy=LAST_PREDICTION): + """Makes a prediction based on a number of field values. + + The input fields must be keyed by Id. There are two possible + strategies to predict when the value for the splitting field + is missing: + 0 - LAST_PREDICTION: the last issued prediction is returned. + 1 - PROPORTIONAL: as we cannot choose between the two branches + in the tree that stem from this split, we consider both. The + algorithm goes on until the final leaves are reached and + all their predictions are used to decide the final prediction. + """ + + if missing_strategy == PROPORTIONAL: + if tree_type == REGRESSION: + return r.regression_proportional_predict(tree, weighted, fields, + input_data) + + if tree_type == CLASSIFICATION: + # classification + return c.classification_proportional_predict(tree, weighted, + fields, + input_data) + # boosting + return b.boosting_proportional_predict(tree, fields, input_data) + + if tree_type == REGRESSION: + # last prediction missing strategy + return r.regression_last_predict(tree, weighted, fields, input_data) + if tree_type == CLASSIFICATION: + return c.classification_last_predict(tree, weighted, fields, + input_data) + # boosting + return b.boosting_last_predict(tree, fields, input_data) + + +def laplacian_term(root_dist, weighted): + """Correction term based on the training dataset distribution + + """ + + if weighted: + category_map = {category[0]: 0.0 for category in root_dist} + else: + total = float(sum([category[1] for category in root_dist])) + category_map = {category[0]: category[1] / total + for category in root_dist} + return category_map + + class Model(BaseModel): """ A lightweight wrapper around a Tree model. @@ -244,140 +328,149 @@ class Model(BaseModel): """ - def __init__(self, model, api=None, fields=None): + def __init__(self, model, api=None, fields=None, cache_get=None, + operation_settings=None): """The Model constructor can be given as first argument: - a model structure - a model id - a path to a JSON file containing a model structure + :param model: The model info or reference + :param api: Connection object that will be used to download the deepnet + info if not locally available + :param cache_get: Get function that handles memory-cached objects + :param operation_settings: Dict object that contains operating options + + The operation_settings will depend on the type of ML problem: + - regressions: no operation_settings allowed + - classifications: operating_point, operating_kind + """ + + if use_cache(cache_get): + # using a cache to store the model attributes + self.__dict__ = load(get_model_id(model), cache_get) + return + self.resource_id = None + self.name = None + self.description = None + self.parent_id = None self.ids_map = {} self.terms = {} self.regression = False self.boosting = None self.class_names = None - self.api = get_api_connection(api) + self.default_numeric_value = None + api = get_api_connection(api) + # retrieving model information from self.resource_id, model = get_resource_dict( \ - model, "model", api=self.api) - + model, "model", api=api, no_check_fields=fields is not None) if 'object' in model and isinstance(model['object'], dict): model = model['object'] - + try: + self.parent_id = model.get('dataset') + self.name = model.get('name') + self.description = model.get('description') + except AttributeError: + raise ValueError("Failed to find the expected " + "JSON structure. Check your arguments.") if 'model' in model and isinstance(model['model'], dict): status = get_status(model) if 'code' in status and status['code'] == FINISHED: - - self.input_fields = model["input_fields"] - BaseModel.__init__(self, model, api=api, fields=fields) - - # boosting models are to be handled using the BoostedTree - # class + # fill boosting info before creating modelfields if model.get("boosted_ensemble"): self.boosting = model.get('boosting', False) if self.boosting == {}: self.boosting = False - self.regression = \ - not self.boosting and \ - self.fields[self.objective_id]['optype'] == 'numeric' \ - or (self.boosting and \ - self.boosting.get("objective_class") is None) - if not hasattr(self, 'tree_class'): - self.tree_class = Tree if not self.boosting else \ - BoostedTree + self.default_numeric_value = model.get('default_numeric_value') + self.input_fields = model["input_fields"] + BaseModel.__init__(self, model, api=api, fields=fields, + operation_settings=operation_settings) + + try: + root = model['model']['root'] + except KeyError: + raise NoRootDecisionTree("Model %s has no `root` element" + " and cannot be used" + % self.resource_id) + self.weighted = "weighted_objective_summary" in root + + terms = {} + + if self.boosting: + # build boosted tree + self.tree = b.build_boosting_tree( \ + model['model']['root'], terms=terms) + elif self.regression: + self.root_distribution = model['model'][ \ + 'distribution']['training'] + # build regression tree + self.tree = r.build_regression_tree(root, \ + distribution=self.root_distribution, \ + weighted=self.weighted, terms=terms) + else: + # build classification tree + self.root_distribution = model['model'][\ + 'distribution']['training'] + self.laplacian_term = laplacian_term( \ + extract_distribution(self.root_distribution)[1], + self.weighted) + self.tree = c.build_classification_tree( \ + model['model']['root'], \ + distribution=self.root_distribution, \ + weighted=self.weighted, terms=terms) + self.class_names = sorted( \ + [category[0] for category in \ + self.root_distribution["categories"]]) + self.objective_categories = [category for \ + category, _ in self.fields[self.objective_id][ \ + "summary"]["categories"]] + + if not hasattr(self, "tag_clouds"): + self.tag_clouds = {} + if not hasattr(self, "items"): + self.items = {} + + if terms: + # only the terms used in the model are kept + for field_id, field_terms in terms.items(): + if self.tag_clouds.get(field_id): + self.tag_clouds[field_id] = field_terms + elif self.items.get(field_id): + self.items[field_id] = field_terms if self.boosting: - self.tree = self.tree_class( - model['model']['root'], - self.fields, - objective_field=self.objective_id) + self.tree_type = BOOSTING + self.offsets = b.OFFSETS + elif self.regression: + self.tree_type = REGRESSION + self.offsets = r.OFFSETS[str(self.weighted)] else: - distribution = model['model']['distribution']['training'] - # will store global information in the tree: regression and - # max_bins number - tree_info = {'max_bins': 0} - self.tree = self.tree_class( - model['model']['root'], - self.fields, - objective_field=self.objective_id, - root_distribution=distribution, - parent_id=None, - ids_map=self.ids_map, - tree_info=tree_info) - - self.tree.regression = tree_info['regression'] - - if self.tree.regression: - try: - import numpy - import scipy - self._max_bins = tree_info['max_bins'] - self.regression_ready = True - except ImportError: - self.regression_ready = False - else: - root_dist = self.tree.distribution - self.class_names = sorted([category[0] - for category in root_dist]) - self.objective_categories = [category for \ - category, _ in self.fields[self.objective_id][ \ - "summary"]["categories"]] - if not self.regression and not self.boosting: - self.laplacian_term = self._laplacian_term() + self.tree_type = CLASSIFICATION + self.offsets = c.OFFSETS[str(self.weighted)] else: raise Exception("Cannot create the Model instance." - " Only correctly finished models can be used." - " The model status is currently: %s\n" % - STATUSES[status['code']]) + " Only correctly finished models can be" + " used. The model status is currently:" + " %s\n" % STATUSES[status['code']]) else: raise Exception("Cannot create the Model instance. Could not" - " find the 'model' key in the resource:\n\n%s" % - model) - - def list_fields(self, out=sys.stdout): - """Prints descriptions of the fields for this model. - - """ - self.tree.list_fields(out) - - def get_leaves(self, filter_function=None): - """Returns a list that includes all the leaves of the model. - - filter_function should be a function that returns a boolean - when applied to each leaf node. - """ - return self.tree.get_leaves(filter_function=filter_function) - - def impure_leaves(self, impurity_threshold=DEFAULT_IMPURITY): - """Returns a list of leaves that are impure - - """ - if self.regression or self.boosting: - raise AttributeError("This method is available for non-boosting" - " categorization models only.") - def is_impure(node, impurity_threshold=impurity_threshold): - """Returns True if the gini impurity of the node distribution - goes above the impurity threshold. - - """ - return node.get('impurity') > impurity_threshold - - is_impure = partial(is_impure, impurity_threshold=impurity_threshold) - return self.get_leaves(filter_function=is_impure) + " find the 'model' key in the resource:" + "\n\n%s" % model) def _to_output(self, output_map, compact, value_key): if compact: return [round(output_map.get(name, 0.0), PRECISION) for name in self.class_names] - else: - output = [] - for name in self.class_names: - output.append({ - 'category': name, - value_key: round(output_map.get(name, 0.0), PRECISION) - }) - return output + output = [] + for name in self.class_names: + output.append({ + 'category': name, + value_key: round(output_map.get(name, 0.0), PRECISION) + }) + return output def predict_confidence(self, input_data, missing_strategy=LAST_PREDICTION, compact=False): @@ -412,44 +505,32 @@ def predict_confidence(self, input_data, missing_strategy=LAST_PREDICTION, confidence=True) return output - elif self.boosting: + if self.boosting: raise AttributeError("This method is available for non-boosting" " models only.") - root_dist = self.tree.distribution + root_dist = self.root_distribution category_map = {category[0]: 0.0 for category in root_dist} prediction = self.predict(input_data, missing_strategy=missing_strategy, full=True) distribution = prediction['distribution'] + population = prediction['count'] for class_info in distribution: name = class_info[0] - category_map[name] = ws_confidence(name, distribution) + category_map[name] = ws_confidence(name, distribution, + ws_n=population) return self._to_output(category_map, compact, "confidence") - def _laplacian_term(self): - """Correction term based on the training dataset distribution - - """ - root_dist = self.tree.distribution - - if self.tree.weighted: - category_map = {category[0]: 0.0 for category in root_dist} - else: - total = float(sum([category[1] for category in root_dist])) - category_map = {category[0]: category[1] / total - for category in root_dist} - return category_map - def _probabilities(self, distribution): """Computes the probability of a distribution using a Laplacian correction. """ - total = 0 if self.tree.weighted else 1 + total = 0 if self.weighted else 1 category_map = {} category_map.update(self.laplacian_term) @@ -461,7 +542,6 @@ def _probabilities(self, distribution): category_map[k] /= total return category_map - def predict_probability(self, input_data, missing_strategy=LAST_PREDICTION, compact=False): @@ -508,7 +588,8 @@ def predict_operating(self, input_data, """ kind, threshold, positive_class = parse_operating_point( \ - operating_point, OPERATING_POINT_KINDS, self.class_names) + operating_point, OPERATING_POINT_KINDS, self.class_names, + self.operation_settings) if kind == "probability": predictions = self.predict_probability(input_data, missing_strategy, False) @@ -534,6 +615,7 @@ def predict_operating(self, input_data, del prediction["category"] return prediction + #pylint: disable=locally-disabled,invalid-name,redefined-outer-name def _sort_predictions(self, a, b, criteria): """Sorts the categories in the predicted node according to the given criteria @@ -619,24 +701,30 @@ def predict(self, input_data, missing_strategy=LAST_PREDICTION, # Checks and cleans input_data leaving the fields used in the model unused_fields = [] - new_data = self.filter_input_data( \ + norm_input_data = self.filter_input_data( \ input_data, add_unused_fields=full) if full: - input_data, unused_fields = new_data - else: - input_data = new_data + norm_input_data, unused_fields = norm_input_data + # Strips affixes for numeric values and casts to the final field type - cast(input_data, self.fields) + cast(norm_input_data, self.fields) + + if operating_point is None and self.operation_settings is not None: + operating_point = self.operation_settings.get("operating_point") + if operating_kind is None and self.operation_settings is not None: + operating_kind = self.operation_settings.get("operating_kind") full_prediction = self._predict( \ - input_data, missing_strategy=missing_strategy, + norm_input_data, missing_strategy=missing_strategy, operating_point=operating_point, operating_kind=operating_kind, unused_fields=unused_fields) + if self.regression: + full_prediction['prediction'] = round( + full_prediction['prediction'], DECIMALS) if full: return dict((key, value) for key, value in \ - full_prediction.iteritems() if value is not None) - + full_prediction.items() if value is not None) return full_prediction['prediction'] def _predict(self, input_data, missing_strategy=LAST_PREDICTION, @@ -670,23 +758,14 @@ def _predict(self, input_data, missing_strategy=LAST_PREDICTION, operating_kind=operating_kind) return prediction - # Checks if this is a regression model, using PROPORTIONAL - # missing_strategy - if (not self.boosting and - self.regression and missing_strategy == PROPORTIONAL and - not self.regression_ready): - raise ImportError("Failed to find the numpy and scipy libraries," - " needed to use proportional missing strategy" - " for regressions. Please install them before" - " using local predictions for the model.") - - prediction = self.tree.predict(input_data, - missing_strategy=missing_strategy) + prediction = tree_predict( \ + self.tree, self.tree_type, self.weighted, self.fields, + input_data, missing_strategy=missing_strategy) if self.boosting and missing_strategy == PROPORTIONAL: # output has to be recomputed and comes in a different format g_sum, h_sum, population, path = prediction - prediction = Prediction( + prediction = Prediction( \ - g_sum / (h_sum + self.boosting.get("lambda", 1)), path, None, @@ -701,7 +780,7 @@ def _predict(self, input_data, missing_strategy=LAST_PREDICTION, del result['output'] # next field = (None if len(prediction.children) == 0 else - prediction.children[0].predicate.field) + prediction.children[0][FIELD_OFFSET]) if field is not None and field in self.model_fields: field = self.model_fields[field]['name'] result.update({'next': field}) @@ -715,586 +794,9 @@ def _predict(self, input_data, missing_strategy=LAST_PREDICTION, return result - def docstring(self): - """Returns the docstring describing the model. - - """ - objective_name = self.fields[self.tree.objective_id]['name'] if \ - not self.boosting else \ - self.fields[self.boosting["objective_field"]]['name'] - docstring = (u"Predictor for %s from %s\n" % ( - objective_name, - self.resource_id)) - self.description = ( - unicode( - markdown_cleanup(self.description).strip()) or - u'Predictive model by BigML - Machine Learning Made Easy') - docstring += u"\n" + INDENT * 2 + ( - u"%s" % prefix_as_comment(INDENT * 2, self.description)) - return docstring - - def get_ids_path(self, filter_id): - """Builds the list of ids that go from a given id to the tree root - - """ - ids_path = None - if filter_id is not None and self.tree.id is not None: - if filter_id not in self.ids_map: - raise ValueError("The given id does not exist.") - else: - ids_path = [filter_id] - last_id = filter_id - while self.ids_map[last_id].parent_id is not None: - ids_path.append(self.ids_map[last_id].parent_id) - last_id = self.ids_map[last_id].parent_id - return ids_path - - def rules(self, out=sys.stdout, filter_id=None, subtree=True): - """Returns a IF-THEN rule set that implements the model. - - `out` is file descriptor to write the rules. - - """ - if self.boosting: - raise AttributeError("This method is not available for boosting" - " models.") - ids_path = self.get_ids_path(filter_id) - return self.tree.rules(out, ids_path=ids_path, subtree=subtree) - - def python(self, out=sys.stdout, hadoop=False, - filter_id=None, subtree=True): - """Returns a basic python function that implements the model. - - `out` is file descriptor to write the python code. - + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + Avoiding to set it in a Mixin to maintain the current dump function. """ - if self.boosting: - raise AttributeError("This method is not available for boosting" - " models.") - ids_path = self.get_ids_path(filter_id) - if hadoop: - return (self.hadoop_python_mapper(out=out, - ids_path=ids_path, - subtree=subtree) or - self.hadoop_python_reducer(out=out)) - else: - return self.tree.python(out, self.docstring(), ids_path=ids_path, - subtree=subtree) - - def tableau(self, out=sys.stdout, hadoop=False, - filter_id=None, subtree=True): - """Returns a basic tableau function that implements the model. - - `out` is file descriptor to write the tableau code. - - """ - if self.boosting: - raise AttributeError("This method is not available for boosting" - " models.") - ids_path = self.get_ids_path(filter_id) - if hadoop: - return "Hadoop output not available." - else: - response = self.tree.tableau(out, ids_path=ids_path, - subtree=subtree) - if response: - out.write(u"END\n") - else: - out.write(u"\nThis function cannot be represented " - u"in Tableau syntax.\n") - out.flush() - return None - - def group_prediction(self): - """Groups in categories or bins the predicted data - - dict - contains a dict grouping counts in 'total' and 'details' lists. - 'total' key contains a 3-element list. - - common segment of the tree for all instances - - data count - - predictions count - 'details' key contains a list of elements. Each element is a - 3-element list: - - complete path of the tree from the root to the leaf - - leaf predictions count - - confidence - """ - if self.boosting: - raise AttributeError("This method is not available for boosting" - " models.") - groups = {} - tree = self.tree - distribution = tree.distribution - - for group in distribution: - groups[group[0]] = {'total': [[], group[1], 0], - 'details': []} - path = [] - - def add_to_groups(groups, output, path, count, confidence, - impurity=None): - """Adds instances to groups array - - """ - group = output - if output not in groups: - groups[group] = {'total': [[], 0, 0], - 'details': []} - groups[group]['details'].append([path, count, confidence, - impurity]) - groups[group]['total'][2] += count - - def depth_first_search(tree, path): - """Search for leafs' values and instances - - """ - if isinstance(tree.predicate, Predicate): - path.append(tree.predicate) - if tree.predicate.term: - term = tree.predicate.term - if tree.predicate.field not in self.terms: - self.terms[tree.predicate.field] = [] - if term not in self.terms[tree.predicate.field]: - self.terms[tree.predicate.field].append(term) - - if len(tree.children) == 0: - add_to_groups(groups, tree.output, - path, tree.count, tree.confidence, tree.impurity) - return tree.count - else: - children = tree.children[:] - children.reverse() - - children_sum = 0 - for child in children: - children_sum += depth_first_search(child, path[:]) - if children_sum < tree.count: - add_to_groups(groups, tree.output, path, - tree.count - children_sum, tree.confidence, - tree.impurity) - return tree.count - - depth_first_search(tree, path) - - return groups - - def get_data_distribution(self): - """Returns training data distribution - - """ - if self.boosting: - raise AttributeError("This method is not available for boosting" - " models.") - tree = self.tree - distribution = tree.distribution - - return sorted(distribution, key=lambda x: x[0]) - - def get_prediction_distribution(self, groups=None): - """Returns model predicted distribution - - """ - if self.boosting: - raise AttributeError("This method is not available for boosting" - " models.") - if groups is None: - groups = self.group_prediction() - - predictions = [[group, groups[group]['total'][2]] for group in groups] - # remove groups that are not predicted - predictions = [prediction for prediction in predictions \ - if prediction[1] > 0] - - return sorted(predictions, key=lambda x: x[0]) - - def summarize(self, out=sys.stdout, format=BRIEF): - """Prints summary grouping distribution as class header and details - - """ - if self.boosting: - raise AttributeError("This method is not available for boosting" - " models.") - tree = self.tree - - def extract_common_path(groups): - """Extracts the common segment of the prediction path for a group - - """ - for group in groups: - details = groups[group]['details'] - common_path = [] - if len(details) > 0: - mcd_len = min([len(x[0]) for x in details]) - for i in range(0, mcd_len): - test_common_path = details[0][0][i] - for subgroup in details: - if subgroup[0][i] != test_common_path: - i = mcd_len - break - if i < mcd_len: - common_path.append(test_common_path) - groups[group]['total'][0] = common_path - if len(details) > 0: - groups[group]['details'] = sorted(details, - key=lambda x: x[1], - reverse=True) - - def confidence_error(value, impurity=None): - """Returns confidence for categoric objective fields - and error for numeric objective fields - """ - if value is None: - return "" - impurity_literal = "" - if impurity is not None and impurity > 0: - impurity_literal = "; impurity: %.2f%%" % (round(impurity, 4)) - objective_type = self.fields[tree.objective_id]['optype'] - if objective_type == 'numeric': - return u" [Error: %s]" % value - else: - return u" [Confidence: %.2f%%%s]" % ((round(value, 4) * 100), - impurity_literal) - - distribution = self.get_data_distribution() - - out.write(utf8(u"Data distribution:\n")) - print_distribution(distribution, out=out) - out.write(utf8(u"\n\n")) - - groups = self.group_prediction() - predictions = self.get_prediction_distribution(groups) - - out.write(utf8(u"Predicted distribution:\n")) - print_distribution(predictions, out=out) - out.write(utf8(u"\n\n")) - - if self.field_importance: - out.write(utf8(u"Field importance:\n")) - print_importance(self, out=out) - - extract_common_path(groups) - - out.write(utf8(u"\n\nRules summary:")) - - for group in [x[0] for x in predictions]: - details = groups[group]['details'] - path = Path(groups[group]['total'][0]) - data_per_group = groups[group]['total'][1] * 1.0 / tree.count - pred_per_group = groups[group]['total'][2] * 1.0 / tree.count - out.write(utf8(u"\n\n%s : (data %.2f%% / prediction %.2f%%) %s" % - (group, - round(data_per_group, 4) * 100, - round(pred_per_group, 4) * 100, - path.to_rules(self.fields, format=format)))) - - if len(details) == 0: - out.write(utf8(u"\n The model will never predict this" - u" class\n")) - elif len(details) == 1: - subgroup = details[0] - out.write(utf8(u"%s\n" % confidence_error( - subgroup[2], impurity=subgroup[3]))) - else: - out.write(utf8(u"\n")) - for j in range(0, len(details)): - subgroup = details[j] - pred_per_sgroup = subgroup[1] * 1.0 / \ - groups[group]['total'][2] - path = Path(subgroup[0]) - path_chain = path.to_rules(self.fields, format=format) if \ - path.predicates else "(root node)" - out.write(utf8(u" · %.2f%%: %s%s\n" % - (round(pred_per_sgroup, 4) * 100, - path_chain, - confidence_error(subgroup[2], - impurity=subgroup[3])))) - - out.flush() - - def hadoop_python_mapper(self, out=sys.stdout, ids_path=None, - subtree=True): - """Returns a hadoop mapper header to make predictions in python - - """ - if self.boosting: - raise AttributeError("This method is not available for boosting" - " models.") - input_fields = [(value, key) for (key, value) in - sorted(self.inverted_fields.items(), - key=lambda x: x[1])] - parameters = [value for (key, value) in - input_fields if key != self.tree.objective_id] - args = [] - for field in input_fields: - slug = slugify(self.fields[field[0]]['name']) - self.fields[field[0]].update(slug=slug) - if field[0] != self.tree.objective_id: - args.append("\"" + self.fields[field[0]]['slug'] + "\"") - output = \ -u"""#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -import csv -import locale -locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') - - -class CSVInput(object): - \"\"\"Reads and parses csv input from stdin - - Expects a data section (without headers) with the following fields: - %s - - Data is processed to fall into the corresponding input type by applying - INPUT_TYPES, and per field PREFIXES and SUFFIXES are removed. You can - also provide strings to be considered as no content markers in - MISSING_TOKENS. - \"\"\" - def __init__(self, input=sys.stdin): - \"\"\" Opens stdin and defines parsing constants - - \"\"\" - try: - self.reader = csv.reader(input, delimiter=',', quotechar='\"') -""" % ",".join(parameters) - - output += ( - u"\n%sself.INPUT_FIELDS = [%s]\n" % - ((INDENT * 3), (",\n " + INDENT * 8).join(args))) - - input_types = [] - prefixes = [] - suffixes = [] - count = 0 - fields = self.fields - for key in [key[0] for key in input_fields - if key != self.tree.objective_id]: - input_type = ('None' if fields[key]['datatype'] not in - PYTHON_CONV - else PYTHON_CONV[fields[key]['datatype']]) - input_types.append(input_type) - if 'prefix' in fields[key]: - prefixes.append("%s: %s" % (count, - repr(fields[key]['prefix']))) - if 'suffix' in fields[key]: - suffixes.append("%s: %s" % (count, - repr(fields[key]['suffix']))) - count += 1 - static_content = "%sself.INPUT_TYPES = [" % (INDENT * 3) - formatter = ",\n%s" % (" " * len(static_content)) - output += u"\n%s%s%s" % (static_content, - formatter.join(input_types), - "]\n") - static_content = "%sself.PREFIXES = {" % (INDENT * 3) - formatter = ",\n%s" % (" " * len(static_content)) - output += u"\n%s%s%s" % (static_content, - formatter.join(prefixes), - "}\n") - static_content = "%sself.SUFFIXES = {" % (INDENT * 3) - formatter = ",\n%s" % (" " * len(static_content)) - output += u"\n%s%s%s" % (static_content, - formatter.join(suffixes), - "}\n") - output += \ -u""" self.MISSING_TOKENS = ['?'] - except Exception, exc: - sys.stderr.write(\"Cannot read csv\" - \" input. %s\\n\" % str(exc)) - - def __iter__(self): - \"\"\" Iterator method - - \"\"\" - return self - - def next(self): - \"\"\" Returns processed data in a list structure - - \"\"\" - def normalize(value): - \"\"\"Transforms to unicode and cleans missing tokens - \"\"\" - value = unicode(value.decode('utf-8')) - return \"\" if value in self.MISSING_TOKENS else value - - def cast(function_value): - \"\"\"Type related transformations - \"\"\" - function, value = function_value - if not len(value): - return None - if function is None: - return value - else: - return function(value) - - try: - values = self.reader.next() - except StopIteration: - raise StopIteration() - if len(values) < len(self.INPUT_FIELDS): - sys.stderr.write(\"Found %s fields when %s were expected.\\n\" % - (len(values), len(self.INPUT_FIELDS))) - raise StopIteration() - else: - values = values[0:len(self.INPUT_FIELDS)] - try: - values = map(normalize, values) - for key in self.PREFIXES: - prefix_len = len(self.PREFIXES[key]) - if values[key][0:prefix_len] == self.PREFIXES[key]: - values[key] = values[key][prefix_len:] - for key in self.SUFFIXES: - suffix_len = len(self.SUFFIXES[key]) - if values[key][-suffix_len:] == self.SUFFIXES[key]: - values[key] = values[key][0:-suffix_len] - function_tuples = zip(self.INPUT_TYPES, values) - values = map(cast, function_tuples) - data = {} - for i in range(len(values)): - data.update({self.INPUT_FIELDS[i]: values[i]}) - return data - except Exception, exc: - sys.stderr.write(\"Error in data transformations. %s\\n\" % - str(exc)) - return False -\n\n -""" - out.write(utf8(output)) - out.flush() - - self.tree.python(out, self.docstring(), - input_map=True, - ids_path=ids_path, - subtree=subtree) - output = \ -u""" -csv = CSVInput() -for values in csv: - if not isinstance(values, bool): - print u'%%s\\t%%s' %% (repr(values), repr(predict_%s(values))) -\n\n -""" % fields[self.tree.objective_id]['slug'] - out.write(utf8(output)) - out.flush() - - def hadoop_python_reducer(self, out=sys.stdout): - """Returns a hadoop reducer to make predictions in python - - """ - - output = \ -u"""#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys - -count = 0 -previous = None - -def print_result(values, prediction, count): - \"\"\"Prints input data and predicted value as an ordered list. - - \"\"\" - result = \"[%s, %s]\" % (values, prediction) - print u\"%s\\t%s\" % (result, count) - -for line in sys.stdin: - values, prediction = line.strip().split('\\t') - if previous is None: - previous = (values, prediction) - if values != previous[0]: - print_result(previous[0], previous[1], count) - previous = (values, prediction) - count = 0 - count += 1 -if count > 0: - print_result(previous[0], previous[1], count) -""" - out.write(utf8(output)) - out.flush() - - def to_prediction(self, value_as_string, data_locale=DEFAULT_LOCALE): - """Given a prediction string, returns its value in the required type - - """ - if not isinstance(value_as_string, unicode): - value_as_string = unicode(value_as_string, "utf-8") - - objective_id = self.tree.objective_id - if self.fields[objective_id]['optype'] == 'numeric': - if data_locale is None: - data_locale = self.locale - find_locale(data_locale) - datatype = self.fields[objective_id]['datatype'] - cast_function = PYTHON_FUNC.get(datatype, None) - if cast_function is not None: - return cast_function(value_as_string) - return value_as_string - - def average_confidence(self): - """Average for the confidence of the predictions resulting from - running the training data through the model - - """ - if self.boosting: - raise AttributeError("This method is not available for boosting" - " models.") - total = 0.0 - cumulative_confidence = 0 - groups = self.group_prediction() - for _, predictions in groups.items(): - for _, count, confidence in predictions['details']: - cumulative_confidence += count * confidence - total += count - return float('nan') if total == 0.0 else cumulative_confidence - - def get_nodes_info(self, headers, leaves_only=False): - """Generator that yields the nodes information in a row format - - """ - if self.boosting: - raise AttributeError("This method is not available for boosting" - " models.") - return self.tree.get_nodes_info(headers, leaves_only=leaves_only) - - def tree_csv(self, file_name=None, leaves_only=False): - """Outputs the node structure to a CSV file or array - - """ - if self.boosting: - raise AttributeError("This method is not available for boosting" - " models.") - headers_names = [] - if self.regression: - headers_names.append( - self.fields[self.tree.objective_id]['name']) - headers_names.append("error") - for index in range(0, self._max_bins): - headers_names.append("bin%s_value" % index) - headers_names.append("bin%s_instances" % index) - else: - headers_names.append( - self.fields[self.tree.objective_id]['name']) - headers_names.append("confidence") - headers_names.append("impurity") - for category, _ in self.tree.distribution: - headers_names.append(category) - - nodes_generator = self.get_nodes_info(headers_names, - leaves_only=leaves_only) - if file_name is not None: - with UnicodeWriter(file_name) as writer: - writer.writerow([header.encode("utf-8") - for header in headers_names]) - for row in nodes_generator: - writer.writerow([item if not isinstance(item, basestring) - else item.encode("utf-8") - for item in row]) - else: - rows = [] - rows.append(headers_names) - for row in nodes_generator: - rows.append(row) - return rows + return get_data_transformations(self.resource_id, self.parent_id) diff --git a/bigml/modelfields.py b/bigml/modelfields.py index 86e003c7..964015f0 100644 --- a/bigml/modelfields.py +++ b/bigml/modelfields.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2013-2019 BigML +# Copyright 2013-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -25,20 +24,18 @@ """ import logging import re +import copy -from bigml.util import invert_dictionary, DEFAULT_LOCALE -from bigml.fields import DEFAULT_MISSING_TOKENS, FIELDS_PARENT -from bigml.resourcehandler import get_resource_type, resource_is_ready -from bigml.predicate import TM_FULL_TERM, TM_ALL -from bigml_chronos import chronos +from bigml.util import invert_dictionary, dump, dumps, DEFAULT_LOCALE +from bigml.constants import DEFAULT_MISSING_TOKENS, FIELDS_PARENT, \ + ENSEMBLE_PATH, DEFAULT_OPERATION_SETTINGS +from bigml.api_handlers.resourcehandler import get_resource_type +from bigml.predicate import TM_FULL_TERM, TM_ALL LOGGER = logging.getLogger('BigML') -DATE_FNS = { - "day-of-month": lambda (x): x.day, - "day-of-week": lambda (x): x.weekday() + 1, - "millisecond": lambda(x): x.microsecond / 1000} +NUMERIC = "numeric" def parse_terms(text, case_sensitive=True): @@ -47,7 +44,7 @@ def parse_terms(text, case_sensitive=True): """ if text is None: return [] - expression = ur'(\b|_)([^\b_\s]+?)(\b|_)' + expression = r'(\b|_)([^\b_\s]+?)(\b|_)' pattern = re.compile(expression) return [match[1] if case_sensitive else match[1].lower() for match in re.findall(pattern, text)] @@ -63,27 +60,57 @@ def parse_items(text, regexp): return [term.strip() for term in pattern.split(text)] -def check_model_structure(model, inner_key="model"): - """Checks the model structure to see if it contains all the - main expected keys +def check_resource_fields(resource): + """Checks the resource structure to see whether it contains the required + fields information """ - return (isinstance(model, dict) and 'resource' in model and - model['resource'] is not None and - ('object' in model and inner_key in model['object'] or - inner_key in model)) + inner_key = FIELDS_PARENT.get(get_resource_type(resource), 'model') + if check_resource_structure(resource, inner_key): + resource = resource.get('object', resource) + fields = resource.get("fields", + resource.get(inner_key, {}).get('fields')) + input_fields = resource.get("input_fields") + # models only need model_fields to work. The rest of resources will + # need all fields to work + model_fields = list(resource.get(inner_key, {}).get( \ + 'model_fields', {}).keys()) + # fusions don't have input fields + if input_fields is None and inner_key != "fusion": + return False + if not model_fields: + fields_meta = resource.get('fields_meta', \ + resource.get(inner_key, {}).get('fields_meta', {})) + try: + return fields_meta['count'] == fields_meta['total'] + except KeyError: + # stored old models will not have the fields_meta info, so + # we return True to avoid failing in this case + return True + else: + if fields is None: + return False + return all(field_id in list(fields.keys()) \ + for field_id in model_fields) + return False -def lacks_info(model, inner_key="model"): - """Whether the information in `model` is not enough to use it locally +def check_resource_structure(resource, inner_key=None): + """Checks the resource structure to see if it contains all the + main expected keys """ - try: - return not (resource_is_ready(model) and \ - check_model_structure(model, inner_key) and \ - check_model_fields(model)) - except Exception: - return True + if inner_key is None: + inner_key = FIELDS_PARENT.get(get_resource_type(resource), 'model') + # for datasets, only checking the resource ID + if inner_key is None: + return (isinstance(resource, dict) and 'resource' in resource and + resource['resource'] is not None) + # for the rest of models + return (isinstance(resource, dict) and 'resource' in resource and + resource['resource'] is not None and + (('object' in resource and inner_key in resource['object']) or + inner_key in resource)) def get_unique_terms(terms, term_forms, tag_cloud): @@ -93,7 +120,7 @@ def get_unique_terms(terms, term_forms, tag_cloud): """ extend_forms = {} - for term, forms in term_forms.items(): + for term, forms in list(term_forms.items()): for form in forms: extend_forms[form] = term extend_forms[term] = term @@ -108,181 +135,110 @@ def get_unique_terms(terms, term_forms, tag_cloud): if term not in terms_set: terms_set[term] = 0 terms_set[term] += 1 - return terms_set.items() - - -def check_model_fields(model): - """Checks the model structure to see whether it contains the required - fields information - - """ - inner_key = FIELDS_PARENT.get(get_resource_type(model), 'model') - if check_model_structure(model, inner_key): - model = model.get('object', model) - fields = model.get("fields", model.get(inner_key, {}).get('fields')) - input_fields = model.get("input_fields") - # models only need model_fields to work. The rest of resources will - # need all fields to work - model_fields = model.get(inner_key, {}).get( \ - 'model_fields', {}).keys() - # fusions don't have input fields - if input_fields is None and inner_key != "fusion": - return False - if not model_fields: - fields_meta = model.get('fields_meta', \ - model.get(inner_key, {}).get('fields_meta', {})) - try: - return fields_meta['count'] == fields_meta['total'] - except KeyError: - # stored old models will not have the fields_meta info, so - # we return True to avoid failing in this case - return True - else: - if fields is None: - return False - return all([field_id in fields.keys() \ - for field_id in model_fields]) - return False + return list(terms_set.items()) -def get_datetime_subfields(fields): - """ From a dictionary of fields, returns another dictionary - with the subfields from each datetime field - - """ - subfields = {} - for fid, finfo in fields.items(): - if finfo.get('parent_optype', False) == 'datetime': - parent_id = finfo["parent_ids"][0] - parent_name = fields[parent_id]["name"] - subfield = {fid: finfo["datatype"]} - if parent_id in subfields.keys(): - subfields[parent_id].update(subfield) - else: - subfields[parent_id] = subfield - return subfields - - -def expand_date(date, subfields, timeformats): - """ Retrieves all the values of the subfields from - a given date - - """ - expanded = {} - try: - parsed_date = chronos.parse(date, format_names=timeformats) - except ValueError: - return {} - for fid, ftype in subfields.items(): - date_fn = DATE_FNS.get(ftype) - if date_fn is not None: - expanded.update({fid: date_fn(parsed_date)}) - else: - expanded.update({fid: getattr(parsed_date, ftype)}) - return expanded - - -def get_datetime_formats(fields): - """From a dictionary of fields, return another dictionary - with the time formats form each datetime field - - """ - timeformats = {} - for f_id, finfo in fields.items(): - if finfo.get('optype', False) == 'datetime': - timeformats[f_id] = finfo.get('time_formats', {}) - return timeformats - - -def add_expanded_dates(input_data, datetime_fields): - """Add the expanded dates in date_fields to the input_data - provided by the user (only if the user didn't specify it) - - """ - for index, value in datetime_fields.items(): - if index not in input_data: - input_data[index] = value - return input_data - - -class ModelFields(object): +class ModelFields: """ A lightweight wrapper of the field information in the model, cluster or anomaly objects """ - + #pylint: disable=locally-disabled,no-member,access-member-before-definition def __init__(self, fields, objective_id=None, data_locale=None, - missing_tokens=None, terms=False, categories=False, - numerics=False): + missing_tokens=None, categories=False, + numerics=False, operation_settings=None, model_fields=None): if isinstance(fields, dict): + tmp_fields = copy.deepcopy(fields) try: self.objective_id = objective_id - self.uniquify_varnames(fields) - self.inverted_fields = invert_dictionary(fields) - self.fields = {} - self.fields.update(fields) + self.uniquify_varnames(tmp_fields) + self.inverted_fields = invert_dictionary(tmp_fields) + self.fields = tmp_fields if not (hasattr(self, "input_fields") and self.input_fields): self.input_fields = [field_id for field_id, field in \ - sorted( \ - [(field_id, field) for field_id, - field in self.fields.items()], - key=lambda(x): x[1].get("column_number")) \ + sorted(list(self.fields.items()), + key=lambda x: x[1].get("column_number")) \ if not self.objective_id or \ field_id != self.objective_id] - self.model_fields = {} - self.datetime_parents = [] - self.model_fields.update( - dict([(field_id, field) for field_id, field in \ - self.fields.items() if field_id in self.input_fields and \ - self.fields[field_id].get("preferred", True)])) - # if any of the model fields is a generated datetime field - # we need to add the parent datetime field - self.model_fields = self.add_datetime_parents() + if model_fields is not None: + self.model_fields = model_fields + else: + self.model_fields = {} + self.model_fields.update( + {field_id: field for field_id, field \ + in self.fields.items() if field_id in \ + self.input_fields and self.fields[field_id].get( + "preferred", True)}) self.data_locale = data_locale self.missing_tokens = missing_tokens if self.data_locale is None: self.data_locale = DEFAULT_LOCALE if self.missing_tokens is None: self.missing_tokens = DEFAULT_MISSING_TOKENS - if terms: - # adding text and items information to handle terms - # expansion - self.term_forms = {} - self.tag_clouds = {} - self.term_analysis = {} - self.items = {} - self.item_analysis = {} + # adding text and items information to handle terms + # expansion + self.term_forms = [] + self.tag_clouds = {} + self.term_analysis = {} + self.items = {} + self.item_analysis = {} if categories: self.categories = {} - if terms or categories or numerics: - self.add_terms(categories, numerics) - + self.add_terms(categories, numerics) + + if self.objective_id is not None and \ + hasattr(self, "resource_id") and self.resource_id and \ + get_resource_type(self.resource_id) != ENSEMBLE_PATH: + # Only for models. Ensembles need their own logic + self.regression = \ + (not hasattr(self, "boosting") or not self.boosting) \ + and self.fields[self.objective_id][ \ + 'optype'] == NUMERIC \ + or (hasattr(self, "boosting") and self.boosting and \ + self.boosting.get("objective_class") is None) + self.operation_settings = self._add_operation_settings( + operation_settings) except KeyError: raise Exception("Wrong field structure.") + def _add_operation_settings(self, operation_settings): + """Checks and adds the user-given operation settings """ + if operation_settings is None: + return None + if self.regression: + raise ValueError("No operating settings are allowed" + " for regressions") + return {setting: operation_settings[setting] for + setting in operation_settings.keys() if setting in + DEFAULT_OPERATION_SETTINGS + } + def add_terms(self, categories=False, numerics=False): """Adds the terms information of text and items fields """ - for field_id, field in self.fields.items(): - if field['optype'] == 'text': - self.term_forms[field_id] = {} - self.term_forms[field_id].update( - field['summary']['term_forms']) + for field_id, field in list(self.fields.items()): + if field['optype'] == 'text' and \ + self.fields[field_id]['summary'].get('tag_cloud'): + self.term_forms.append(field_id) self.tag_clouds[field_id] = [] self.tag_clouds[field_id] = [tag for [tag, _] in field[ 'summary']['tag_cloud']] + del self.fields[field_id]["summary"]["tag_cloud"] self.term_analysis[field_id] = {} self.term_analysis[field_id].update( field['term_analysis']) - if field['optype'] == 'items': + if field['optype'] == 'items' and \ + self.fields[field_id]["summary"].get("items"): self.items[field_id] = [] self.items[field_id] = [item for item, _ in \ field['summary']['items']] + del self.fields[field_id]["summary"]["items"] self.item_analysis[field_id] = {} self.item_analysis[field_id].update( field['item_analysis']) - if categories and field['optype'] == 'categorical': + if categories and field['optype'] == 'categorical' and \ + self.fields[field_id]["summary"]["categories"]: self.categories[field_id] = [category for \ [category, _] in field['summary']['categories']] if field['optype'] == 'datetime' and \ @@ -300,7 +256,7 @@ def uniquify_varnames(self, fields): transformation is applied to ensure unicity. """ - unique_names = set([fields[key]['name'] for key in fields]) + unique_names = {fields[key]['name'] for key in fields} if len(unique_names) < len(fields): self.transform_repeated_names(fields) @@ -332,41 +288,26 @@ def normalize(self, value): """Transforms to unicode and cleans missing tokens """ - if isinstance(value, basestring) and not isinstance(value, unicode): - value = unicode(value, "utf-8") - return None if value in self.missing_tokens else value - - def expand_datetime_fields(self, input_data): - """Returns the values for all the subfields - from all the datetime fields in input_data - - """ - expanded = {} - timeformats = get_datetime_formats(self.fields) - subfields = get_datetime_subfields(self.fields) - for f_id, value in input_data.items(): - if f_id in subfields: - formats = timeformats.get(f_id, []) - expanded.update(expand_date(value, subfields[f_id], formats)) - return expanded - - def add_datetime_parents(self): - """Adding the fields information for the fields that generate other - datetime fields used in the model - """ - subfields = get_datetime_subfields(self.fields) - for f_id in subfields.keys(): - self.model_fields[f_id] = self.fields[f_id] - self.datetime_parents.append(f_id) - return self.model_fields + if isinstance(value, str) and not isinstance(value, str): + value = str(value, "utf-8") + return None if hasattr(self, "missing_tokens") and \ + value in self.missing_tokens else value - def remove_parent_datetimes(self, input_data): - """Removes the parents of datetime fields + def fill_numeric_defaults(self, input_data): + """Fills the value set as default for numeric missing fields if user + created the model with the default_numeric_value option """ - for f_id in self.datetime_parents: - if f_id in input_data: - del input_data[f_id] + if hasattr(self, "default_numeric_value") and \ + self.default_numeric_value is not None: + for key in self.fields: + if key in self.model_fields and \ + (self.objective_id is None or \ + key != self.objective_id) and \ + self.fields[key]["optype"] == NUMERIC and \ + input_data.get(key) is None: + input_data[key] = self.fields[key]["summary"].get( \ + self.default_numeric_value, 0) return input_data def filter_input_data(self, input_data, @@ -378,13 +319,15 @@ def filter_input_data(self, input_data, """ unused_fields = [] new_input = {} - if isinstance(input_data, dict): + tmp_input = {} + tmp_input.update(input_data) + if isinstance(tmp_input, dict): # remove all missing values - for key, value in input_data.items(): + for key, value in list(tmp_input.items()): value = self.normalize(value) if value is None: - del input_data[key] - for key, value in input_data.items(): + del tmp_input[key] + for key, value in list(tmp_input.items()): if key not in self.fields: key = self.inverted_fields.get(key, key) # only the fields that are listed in input_fields and appear @@ -395,16 +338,21 @@ def filter_input_data(self, input_data, new_input[key] = value else: unused_fields.append(key) - datetime_fields = self.expand_datetime_fields(new_input) - new_input = add_expanded_dates(new_input, datetime_fields) - new_input = self.remove_parent_datetimes(new_input) - result = (new_input, unused_fields) if add_unused_fields else \ - new_input + # Feature generation (datetime and image features) is now done + # when a Pipeline is created for the model, so no features are + # added any more at this point. + # We fill the input with the chosen default, if selected + new_input = self.fill_numeric_defaults(new_input) + final_input = {} + for key, value in new_input.items(): + if key in self.model_fields: + final_input.update({key: value}) + result = (final_input, unused_fields) if add_unused_fields else \ + final_input return result - else: - LOGGER.error("Failed to read input data in the expected" - " {field:value} format.") - return ({}, []) if add_unused_fields else {} + LOGGER.error("Failed to read input data in the expected" + " {field:value} format.") + return ({}, []) if add_unused_fields else {} def get_unique_terms(self, input_data): """Parses the input data to find the list of unique terms in the @@ -415,7 +363,7 @@ def get_unique_terms(self, input_data): for field_id in self.term_forms: if field_id in input_data: input_data_field = input_data.get(field_id, '') - if isinstance(input_data_field, basestring): + if isinstance(input_data_field, str): case_sensitive = self.term_analysis[field_id].get( 'case_sensitive', True) token_mode = self.term_analysis[field_id].get( @@ -436,23 +384,24 @@ def get_unique_terms(self, input_data): terms[0] != full_term)): terms.append(full_term) unique_terms[field_id] = get_unique_terms( - terms, self.term_forms[field_id], + terms, self.fields[field_id]["summary"]["term_forms"], self.tag_clouds.get(field_id, [])) else: unique_terms[field_id] = [(input_data_field, 1)] del input_data[field_id] # the same for items fields + #pylint: disable=locally-disabled,consider-using-dict-items for field_id in self.item_analysis: if field_id in input_data: input_data_field = input_data.get(field_id, '') - if isinstance(input_data_field, basestring): + if isinstance(input_data_field, str): # parsing the items in input_data separator = self.item_analysis[field_id].get( 'separator', ' ') regexp = self.item_analysis[field_id].get( 'separator_regexp') if regexp is None: - regexp = ur'%s' % re.escape(separator) + regexp = r'%s' % re.escape(separator) terms = parse_items(input_data_field, regexp) unique_terms[field_id] = get_unique_terms( terms, {}, @@ -468,3 +417,18 @@ def get_unique_terms(self, input_data): unique_terms[field_id] = [(input_data_field, 1)] del input_data[field_id] return unique_terms + + def dump(self, output=None, cache_set=None): + """Uses msgpack to serialize the resource object + If cache_set is filled with a cache set method, the method is called + + """ + self_vars = vars(self) + dump(self_vars, output=output, cache_set=cache_set) + + def dumps(self): + """Uses msgpack to serialize the resource object to a string + + """ + self_vars = vars(self) + return dumps(self_vars) diff --git a/bigml/multimodel.py b/bigml/multimodel.py index 34e3feaf..85e7eb9e 100644 --- a/bigml/multimodel.py +++ b/bigml/multimodel.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2012-2019 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -20,7 +19,7 @@ This module defines a Multiple Model to make predictions locally using multiple local models. -This module cannot only save you a few credits, but also enormously +This module can help you enormously to reduce the latency for each prediction and let you use your models offline. @@ -39,8 +38,10 @@ import logging import ast +from functools import partial -from bigml.model import Model, cast_prediction +from bigml.exceptions import NoRootDecisionTree +from bigml.model import Model, cast_prediction, to_prediction from bigml.model import LAST_PREDICTION from bigml.util import get_predictions_file_name from bigml.multivote import MultiVote @@ -52,7 +53,7 @@ LOGGER = logging.getLogger('BigML') -def read_votes(votes_files, to_prediction, data_locale=None): +def read_votes(votes_files, to_prediction_fn, data_locale=None): """Reads the votes found in the votes' files. Returns a list of MultiVote objects containing the list of predictions. @@ -70,12 +71,11 @@ def read_votes(votes_files, to_prediction, data_locale=None): used in numeric formatting. """ votes = [] - for order in range(0, len(votes_files)): - votes_file = votes_files[order] + for order, votes_file in enumerate(votes_files): index = 0 with UnicodeReader(votes_file) as rdr: for row in rdr: - prediction = to_prediction(row[0], data_locale=data_locale) + prediction = to_prediction_fn(row[0], data_locale=data_locale) if index > (len(votes) - 1): votes.append(MultiVote([])) distribution = None @@ -94,7 +94,7 @@ def read_votes(votes_files, to_prediction, data_locale=None): return votes -class MultiModel(object): +class MultiModel(): """A multiple local model. Uses a number of BigML remote models to build a local version that can be @@ -102,18 +102,33 @@ class MultiModel(object): """ - def __init__(self, models, api=None, fields=None, class_names=None): + def __init__(self, models, api=None, fields=None, class_names=None, + cache_get=None, operation_settings=None): + self.models = [] self.class_names = class_names if isinstance(models, list): - if all([isinstance(model, Model) for model in models]): + if all(isinstance(model, Model) for model in models): self.models = models else: for model in models: - self.models.append(Model(model, api=api, fields=fields)) + # some models have no root info and should not be added + try: + self.models.append(Model( + model, api=api, fields=fields, + cache_get=cache_get, + operation_settings=operation_settings)) + except NoRootDecisionTree: + pass else: - self.models.append(Model(models, api=api, fields=fields)) + try: + self.models.append(Model( + models, api=api, fields=fields, + cache_get=cache_get, + operation_settings=operation_settings)) + except NoRootDecisionTree: + pass def list_models(self): """Lists all the model/ids that compound the multi model. @@ -157,8 +172,7 @@ def generate_votes(self, input_data, made by each of the models. """ votes = MultiVote([]) - for order in range(0, len(self.models)): - model = self.models[order] + for model in self.models: prediction_info = model.predict( \ input_data, missing_strategy=missing_strategy, full=True) @@ -174,6 +188,7 @@ def generate_votes(self, input_data, return votes + #pylint: disable=locally-disabled,protected-access def _generate_votes(self, input_data, missing_strategy=LAST_PREDICTION, unused_fields=None): """ Generates a MultiVote object that contains the predictions @@ -183,8 +198,7 @@ def _generate_votes(self, input_data, missing_strategy=LAST_PREDICTION, to the correct type will be applied. """ votes = MultiVote([]) - for order in range(0, len(self.models)): - model = self.models[order] + for model in self.models: prediction_info = model._predict( \ input_data, missing_strategy=missing_strategy, unused_fields=unused_fields) @@ -205,6 +219,10 @@ def generate_votes_distribution(self, input_data, missing_strategy=LAST_PREDICTION, method=PROBABILITY_CODE): + """Generates a MultiVoteList object to contain the predictions + of a list of models as the list of classes and their predicted + probabilities or confidence. + """ votes = [] for model in self.models: model.class_names = self.class_names @@ -262,9 +280,8 @@ def batch_predict(self, input_data_list, output_file_path=None, output_file_path) if reuse: try: - predictions_file = open(output_file) - predictions_file.close() - continue + with open(output_file): + continue except IOError: pass try: @@ -277,11 +294,11 @@ def batch_predict(self, input_data_list, output_file_path=None, out.open_writer() for index, input_data in enumerate(input_data_list): if add_headers: - input_data = dict(zip(headers, input_data)) + input_data = dict(list(zip(headers, input_data))) prediction = model.predict(input_data, missing_strategy=missing_strategy, full=True) - if model.tree.regression: + if model.regression: # if median is to be used, we just replace the prediction if use_median: prediction["prediction"] = prediction["median"] @@ -299,6 +316,7 @@ def batch_predict(self, input_data_list, output_file_path=None, out.close_writer() if not to_file: return votes + return output_file_path def batch_votes(self, predictions_file_path, data_locale=None): """Adds the votes for predictions generated by the models. @@ -314,4 +332,5 @@ def batch_votes(self, predictions_file_path, data_locale=None): model.resource_id, predictions_file_path)) return read_votes( - votes_files, self.models[0].to_prediction, data_locale=data_locale) + votes_files, partial(to_prediction, self.models[0]), + data_locale=data_locale) diff --git a/bigml/multivote.py b/bigml/multivote.py index 656d5af2..873e79aa 100644 --- a/bigml/multivote.py +++ b/bigml/multivote.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=dangerous-default-value # -# Copyright 2012-2019 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -84,7 +84,7 @@ def softmax(predictions): """ total = 0.0 normalized = {} - for category, cat_info in predictions.items(): + for category, cat_info in list(predictions.items()): normalized[category] = { \ "probability": math.exp(cat_info["probability"]), "order": cat_info["order"]} @@ -92,7 +92,7 @@ def softmax(predictions): return float('nan') if total == 0 else \ {category: {"probability": cat_info["probability"] / total, "order": cat_info["order"]} - for category, cat_info in normalized.items()} + for category, cat_info in list(normalized.items())} def ws_confidence(prediction, distribution, ws_z=1.96, ws_n=None): @@ -137,7 +137,7 @@ def merge_distributions(distribution, new_distribution): """Adds up a new distribution structure to a map formatted distribution """ - for value, instances in new_distribution.items(): + for value, instances in list(new_distribution.items()): if value not in distribution: distribution[value] = 0 distribution[value] += instances @@ -169,7 +169,7 @@ def merge_bins(distribution, limit): return merge_bins(new_distribution, limit) -class MultiVote(object): +class MultiVote(): """A multiple vote prediction Uses a number of predictions to generate a combined prediction. @@ -189,7 +189,7 @@ def grouped_distribution(cls, instance): dict((x[0], x[1]) for x in prediction['distribution'])) # when there's more instances, sort elements by their mean distribution = [list(element) for element in - sorted(joined_distribution.items(), + sorted(list(joined_distribution.items()), key=lambda x: x[0])] if distribution_unit == 'counts': distribution_unit = ('bins' if len(distribution) > BINS_LIMIT @@ -208,8 +208,8 @@ def avg(cls, instance, full=False): returned """ if (instance.predictions and full and - not all([CONFIDENCE_W in prediction - for prediction in instance.predictions])): + not all(CONFIDENCE_W in prediction + for prediction in instance.predictions)): raise Exception("Not enough data to use the selected " "prediction method. Try creating your" " model anew.") @@ -267,8 +267,8 @@ def error_weighted(cls, instance, full=False): predictions) is also returned """ if (instance.predictions and full and - not all([CONFIDENCE_W in prediction - for prediction in instance.predictions])): + not all(CONFIDENCE_W in prediction + for prediction in instance.predictions)): raise Exception("Not enough data to use the selected " "prediction method. Try creating your" " model anew.") @@ -282,8 +282,7 @@ def error_weighted(cls, instance, full=False): if normalization_factor == 0: if full: return {"prediction": float('nan')} - else: - return float('nan') + return float('nan') if full: combined_error = 0.0 for prediction in instance.predictions: @@ -323,9 +322,9 @@ def normalize_error(cls, instance, top_range): """Normalizes error to a [0, top_range] and builds probabilities """ - if instance.predictions and not all([CONFIDENCE_W in prediction - for prediction - in instance.predictions]): + if instance.predictions and not all(CONFIDENCE_W in prediction + for prediction + in instance.predictions): raise Exception("Not enough data to use the selected " "prediction method. Try creating your" " model anew.") @@ -371,10 +370,10 @@ def __init__(self, predictions, boosting_offsets=None): else: self.predictions.append(predictions) - if not all(['order' in prediction for prediction in predictions]): + if not all('order' in prediction for prediction in predictions): - for i in range(len(self.predictions)): - self.predictions[i]['order'] = i + for i, prediction in enumerate(self.predictions): + prediction['order'] = i def is_regression(self): """Returns True if all the predictions are numbers @@ -383,8 +382,8 @@ def is_regression(self): if self.boosting: return any(prediction.get('class') is None for prediction in self.predictions) - return all([isinstance(prediction['prediction'], numbers.Number) - for prediction in self.predictions]) + return all(isinstance(prediction['prediction'], numbers.Number) + for prediction in self.predictions) def next_order(self): """Return the next order to be assigned to a prediction @@ -417,8 +416,8 @@ def combine(self, method=DEFAULT_METHOD, options=None, full=False): # and all predictions should have the weight-related keys if keys is not None: for key in keys: - if not all([key in prediction for prediction - in self.predictions]): + if not all(key in prediction for prediction + in self.predictions): raise Exception("Not enough data to use the selected " "prediction method. Try creating your" " model anew.") @@ -431,46 +430,45 @@ def combine(self, method=DEFAULT_METHOD, options=None, full=False): # boosting offset return weighted_sum(self.predictions, weight="weight") + \ self.boosting_offsets - else: - return self.classification_boosting_combiner( \ - options, full=full) - elif self.is_regression(): + return self.classification_boosting_combiner( \ + options, full=full) + if self.is_regression(): for prediction in self.predictions: if prediction[CONFIDENCE_W] is None: prediction[CONFIDENCE_W] = 0 function = NUMERICAL_COMBINATION_METHODS.get(method, self.__class__.avg) return function(self, full=full) + if method == THRESHOLD: + if options is None: + options = {} + predictions = self.single_out_category(options) + elif method == PROBABILITY: + predictions = MultiVote([]) + predictions.predictions = self.probability_weight() else: - if method == THRESHOLD: - if options is None: - options = {} - predictions = self.single_out_category(options) - elif method == PROBABILITY: - predictions = MultiVote([]) - predictions.predictions = self.probability_weight() - else: - predictions = self - return predictions.combine_categorical( - COMBINATION_WEIGHTS.get(method, None), - full=full) + predictions = self + return predictions.combine_categorical( + COMBINATION_WEIGHTS.get(method, None), + full=full) def probability_weight(self): """Reorganizes predictions depending on training data probability """ predictions = [] - for prediction in self.predictions: - if 'distribution' not in prediction or 'count' not in prediction: + for prediction_info in self.predictions: + if 'distribution' not in prediction_info or \ + 'count' not in prediction_info: raise Exception("Probability weighting is not available " "because distribution information is missing.") - total = prediction['count'] + total = prediction_info['count'] if total < 1 or not isinstance(total, int): raise Exception("Probability weighting is not available " "because distribution seems to have %s " "as number of instances in a node" % total) - order = prediction['order'] - for prediction, instances in prediction['distribution']: + order = prediction_info['order'] + for prediction, instances in prediction_info['distribution']: predictions.append({ \ 'prediction': prediction, 'probability': round(float(instances) / total, PRECISION), @@ -485,8 +483,8 @@ def combine_distribution(self, weight_label='probability'): them and associate the sum of weights (the weight being the contents of the weight_label field of each prediction) """ - if not all([weight_label in prediction - for prediction in self.predictions]): + if not all(weight_label in prediction + for prediction in self.predictions): raise Exception("Not enough data to use the selected " "prediction method. Try creating your" " model anew.") @@ -499,7 +497,7 @@ def combine_distribution(self, weight_label='probability'): total += prediction['count'] if total > 0: distribution = [[key, value] for key, value in - distribution.items()] + list(distribution.items())] else: distribution = [] return distribution, total @@ -522,14 +520,13 @@ def combine_categorical(self, weight_label=None, full=False): weight = 1 for prediction in self.predictions: if weight_label is not None: - if weight_label not in COMBINATION_WEIGHTS.values(): + if weight_label not in list(COMBINATION_WEIGHTS.values()): raise Exception("Wrong weight_label value.") if weight_label not in prediction: raise Exception("Not enough data to use the selected " "prediction method. Try creating your" " model anew.") - else: - weight = prediction[weight_label] + weight = prediction[weight_label] category = prediction['prediction'] if full: instances += prediction['count'] @@ -539,9 +536,9 @@ def combine_categorical(self, weight_label=None, full=False): else: mode[category] = {"count": weight, "order": prediction['order']} - prediction = sorted(mode.items(), key=lambda x: (x[1]['count'], - -x[1]['order'], - x[0]), + prediction = sorted(list(mode.items()), key=lambda x: (x[1]['count'], + -x[1]['order'], + x[0]), reverse=True)[0][0] if full: output = {'prediction': prediction} @@ -575,9 +572,9 @@ def weighted_confidence(self, combined_prediction, weight_label): predictions = [prediction for prediction in self.predictions \ if prediction['prediction'] == combined_prediction] if (weight_label is not None and - (not isinstance(weight_label, basestring) or - any([not CONFIDENCE_W or weight_label not in prediction - for prediction in predictions]))): + (not isinstance(weight_label, str) or + any(not CONFIDENCE_W or weight_label not in prediction + for prediction in predictions))): raise ValueError("Not enough data to use the selected " "prediction method. Lacks %s information." % weight_label) @@ -611,10 +608,10 @@ def classification_boosting_combiner(self, options, full=False): "probability": weighted_sum(value, weight="weight") + \ self.boosting_offsets.get(key, 0), "order": categories.index(key)} for - key, value in grouped_predictions.items()} + key, value in list(grouped_predictions.items())} predictions = softmax(predictions) predictions = sorted( \ - predictions.items(), key=lambda(x): \ + list(predictions.items()), key=lambda x: \ (- x[1]["probability"], x[1]["order"])) prediction, prediction_info = predictions[0] confidence = round(prediction_info["probability"], PRECISION) @@ -626,8 +623,7 @@ def classification_boosting_combiner(self, options, full=False): "probability": round(prediction_info["probability"], PRECISION)} for prediction, prediction_info in predictions]} - else: - return prediction + return prediction def append(self, prediction_info): """Adds a new prediction into a list of predictions @@ -655,11 +651,6 @@ def append(self, prediction_info): "The minimal key for the prediction is " "'prediction': " "\n{'prediction': 'Iris-virginica'") - """ - elif isinstance(prediction_info, list): - if self.probabilities: - self.predictions.append(prediction_info) - """ def single_out_category(self, options): """Singles out the votes for a chosen category and returns a prediction @@ -727,9 +718,9 @@ def append_row(self, prediction_row, prediction_headers.append('order') prediction_row.append(order) prediction_info = {} - for i in range(0, len(prediction_row)): + for i, prediction_row_item in enumerate(prediction_row): prediction_info.update({prediction_headers[i]: - prediction_row[i]}) + prediction_row_item}) self.predictions.append(prediction_info) else: LOGGER.error("WARNING: failed to add the prediction.\n" @@ -747,8 +738,7 @@ def extend(self, predictions_info): """ if isinstance(predictions_info, list): order = self.next_order() - for i in range(0, len(predictions_info)): - prediction = predictions_info[i] + for i, prediction in enumerate(predictions_info): if isinstance(prediction, dict): prediction['order'] = order + i self.append(prediction) @@ -781,8 +771,7 @@ def extend_rows(self, predictions_rows, index = len(prediction_headers) prediction_headers.append('order') if isinstance(predictions_rows, list): - for i in range(0, len(predictions_rows)): - prediction = predictions_rows[i] + for i, prediction in enumerate(predictions_rows): if isinstance(prediction, list): if index == len(prediction): prediction.append(order + i) diff --git a/bigml/multivotelist.py b/bigml/multivotelist.py index 9f5e2401..72f2cb56 100644 --- a/bigml/multivotelist.py +++ b/bigml/multivotelist.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2017-2019 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,13 +18,12 @@ """ import logging +from bigml.util import PRECISION LOGGER = logging.getLogger('BigML') -from bigml.util import PRECISION - -class MultiVoteList(object): +class MultiVoteList(): """A multiple vote prediction in compact format Uses a number of predictions to generate a combined prediction. diff --git a/bigml/out_model/pythonmodel.py b/bigml/out_model/pythonmodel.py deleted file mode 100644 index 6839bde1..00000000 --- a/bigml/out_model/pythonmodel.py +++ /dev/null @@ -1,425 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2017-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -"""Model level output for python - -This module defines functions that generate python code to make local -predictions -""" -import sys -import os - -from bigml.tree_utils import slugify, INDENT, sort_fields, docstring_comment, \ - TERM_OPTIONS, TM_TOKENS, TM_FULL_TERM, TM_ALL, \ - ITEM_OPTIONS -from bigml.model import Model -from bigml.util import PY3 - -from bigml.out_tree.pythontree import PythonTree, PythonBoostedTree - - -# templates for static Python -BIGML_SCRIPT = os.path.dirname(__file__) -TERM_TEMPLATE = "%s/static/term_analysis.py" % BIGML_SCRIPT -ITEMS_TEMPLATE = "%s/static/items_analysis.py" % BIGML_SCRIPT -HADOOP_CSV_TEMPLATE = "%s/static/python_hadoop_csv.py" % \ - BIGML_SCRIPT -HADOOP_NEXT_TEMPLATE = "%s/static/python_hadoop_next.py" % \ - BIGML_SCRIPT -MAX_ARGS_LENGTH = -1 # in this version, the argument will be the input array - -PYTHON_CONV = { - "double": "locale.atof", - "float": "locale.atof", - "integer": "lambda x: int(locale.atof(x))", - "int8": "lambda x: int(locale.atof(x))", - "int16": "lambda x: int(locale.atof(x))", - "int32": "lambda x: int(locale.atof(x))", - "int64": "lambda x: long(locale.atof(x))", - "day": "lambda x: int(locale.atof(x))", - "month": "lambda x: int(locale.atof(x))", - "year": "lambda x: int(locale.atof(x))", - "hour": "lambda x: int(locale.atof(x))", - "minute": "lambda x: int(locale.atof(x))", - "second": "lambda x: int(locale.atof(x))", - "millisecond": "lambda x: int(locale.atof(x))", - "day-of-week": "lambda x: int(locale.atof(x))", - "day-of-month": "lambda x: int(locale.atof(x))" -} - -PYTHON_TYPE = { - "double": "float", - "float": "float", - "integer": "int", - "int8": "int", - "int16": "int", - "int32": "int", - "int64": "long", - "day": "int", - "month": "int", - "year": "int", - "hour": "int", - "minute": "int", - "second": "int", - "millisecond": "int", - "day-of-week": "int", - "day-of-month": "int" -} - - -PYTHON_KEYWORDS = [ - "and", - "assert", - "break", - "class", - "continue", - "def", - "del", - "elif", - "else", - "except", - "exec", - "finally", - "for", - "from", - "global", - "if", - "import", - "in", - "is", - "lambda", - "not", - "or", - "pass", - "print", - "raise", - "return", - "try", - "while ", - "Data", - "Float", - "Int", - "Numeric", - "Oxphys", - "array", - "close", - "float", - "int", - "input", - "open", - "range", - "type", - "write", - "zeros", - "acos", - "asin", - "atan", - "cos", - "e", - "exp", - "fabs", - "floor", - "log", - "log10", - "pi", - "sin", - "sqrt", - "tan" -] - - -class PythonModel(Model): - - - def __init__(self, model, api=None, fields=None, boosting=None): - """Empty attributes to be overriden - - """ - self.tree_class = PythonTree if not boosting else PythonBoostedTree - Model.__init__(self, model, api, fields) - - def plug_in(self, out=sys.stdout, - filter_id=None, subtree=True, hadoop=False): - """Generates a basic python function that implements the model. - - `out` is file descriptor to write the python code. - - """ - ids_path = self.get_ids_path(filter_id) - if hadoop: - return (self.hadoop_python_mapper(out=out, - ids_path=ids_path, - subtree=subtree) or - self.hadoop_python_reducer(out=out)) - else: - return self.python(out, self.docstring(), ids_path=ids_path, - subtree=subtree) - - def hadoop_python_mapper(self, out=sys.stdout, ids_path=None, - subtree=True): - """Generates a hadoop mapper header to make predictions in python - - """ - input_fields = [(value, key) for (key, value) in - sorted(self.inverted_fields.items(), - key=lambda x: x[1])] - parameters = [value for (key, value) in - input_fields if key != self.tree.objective_id] - args = [] - for field in input_fields: - slug = slugify(self.tree.fields[field[0]]['name']) - self.tree.fields[field[0]].update(slug=slug) - if field[0] != self.tree.objective_id: - args.append("\"" + self.tree.fields[field[0]]['slug'] + "\"") - - with open(HADOOP_CSV_TEMPLATE) as template_hander: - output = template_handler.read() % u",".join(parameters) - - output += u"\n%sself.INPUT_FIELDS = [%s]\n" % \ - ((INDENT * 3), (",\n " + INDENT * 8).join(args)) - - input_types = [] - prefixes = [] - suffixes = [] - count = 0 - fields = self.tree.fields - for key in [field[0] for field in input_fields - if field[0] != self.tree.objective_id]: - input_type = ('None' if not fields[key]['datatype'] in - PYTHON_CONV - else PYTHON_CONV[fields[key]['datatype']]) - input_types.append(input_type) - if 'prefix' in fields[key]: - prefixes.append("%s: %s" % (count, - repr(fields[key]['prefix']))) - if 'suffix' in fields[key]: - suffixes.append("%s: %s" % (count, - repr(fields[key]['suffix']))) - count += 1 - static_content = "%sself.INPUT_TYPES = [" % (INDENT * 3) - formatter = ",\n%s" % (" " * len(static_content)) - output += u"\n%s%s%s" % (static_content, - formatter.join(input_types), - "]\n") - static_content = "%sself.PREFIXES = {" % (INDENT * 3) - formatter = ",\n%s" % (" " * len(static_content)) - output += u"\n%s%s%s" % (static_content, - formatter.join(prefixes), - "}\n") - static_content = "%sself.SUFFIXES = {" % (INDENT * 3) - formatter = ",\n%s" % (" " * len(static_content)) - output += u"\n%s%s%s" % (static_content, - formatter.join(suffixes), - "}\n") - - with open(HADOOP_NEXT_TEMPLATE) as template_hander: - output += template_handler.read() - - out.write(output) - out.flush() - - self.tree.python(out, self.docstring(), - input_map=True, - ids_path=ids_path, - subtree=subtree) - - output = \ -u""" -csv = CSVInput() -for values in csv: - if not isinstance(values, bool): - print u'%%s\\t%%s' %% (repr(values), repr(predict_%s(values))) -\n\n -""" % fields[self.tree.objective_id]['slug'] - out.write(output) - out.flush() - - def hadoop_python_reducer(self, out=sys.stdout): - """Generates a hadoop reducer to make predictions in python - - """ - - with open(HADOOP_NEXT_TEMPLATE) as template_hander: - output = template_handler.read() - out.write(output) - out.flush() - - def term_analysis_body(self, term_analysis_predicates, - item_analysis_predicates): - """ Writes auxiliary functions to handle the term and item - analysis fields - - """ - body = u"" - # static content - body += u""" - import re -""" - if term_analysis_predicates: - body += """ - tm_tokens = '%s' - tm_full_term = '%s' - tm_all = '%s' - -""" % (TM_TOKENS, TM_FULL_TERM, TM_ALL) - - with open(TERM_TEMPLATE) as template_handler: - body += template_handler.read() - - term_analysis_options = set(map(lambda x: x[0], - term_analysis_predicates)) - term_analysis_predicates = set(term_analysis_predicates) - body += """ - term_analysis = {""" - for field_id in term_analysis_options: - field = self.fields[field_id] - body += """ - \"%s\": {""" % field['slug'] - options = sorted(field['term_analysis'].keys()) - for option in options: - if option in TERM_OPTIONS: - body += """ - \"%s\": %s,""" % (option, repr(field['term_analysis'][option])) - body += """ - },""" - body += """ - }""" - body += """ - term_forms = {""" - term_forms = {} - fields = self.fields - for field_id, term in term_analysis_predicates: - alternatives = [] - field = fields[field_id] - if field['slug'] not in term_forms: - term_forms[field['slug']] = {} - all_forms = field['summary'].get('term_forms', {}) - if all_forms: - alternatives = all_forms.get(term, []) - if alternatives: - terms = [term] - terms.extend(all_forms.get(term, [])) - term_forms[field['slug']][term] = terms - for field in term_forms: - body += """ - \"%s\": {""" % field - terms = sorted(term_forms[field].keys()) - for term in terms: - body += """ - u\"%s\": %s,""" % (term, term_forms[field][term]) - body += """ - },""" - body += """ - } - -""" - if item_analysis_predicates: - with open(ITEMS_TEMPLATE) as template_handler: - body += template_handler.read() - - item_analysis_options = set(map(lambda x: x[0], - item_analysis_predicates)) - item_analysis_predicates = set(item_analysis_predicates) - body += """ - item_analysis = {""" - for field_id in item_analysis_options: - field = self.fields[field_id] - body += """ - \"%s\": {""" % field['slug'] - for option in field['item_analysis']: - if option in ITEM_OPTIONS: - body += """ - \"%s\": %s,""" % (option, repr(field['item_analysis'][option])) - body += """ - },""" - body += """ - } - -""" - return body - - def python(self, out, docstring, ids_path=None, subtree=True): - """Generates a python function that implements the model. - - """ - - args = [] - args_tree = [] - parameters = sort_fields(self.fields) - input_map = len(parameters) > MAX_ARGS_LENGTH and MAX_ARGS_LENGTH > 0 - reserved_keywords = PYTHON_KEYWORDS if not input_map else None - prefix = "_" if not input_map else "" - for field in [(key, val) for key, val in parameters]: - field_name_to_show = self.fields[field[0]]['name'].strip() - if field_name_to_show == "": - field_name_to_show = field[0] - slug = slugify(field_name_to_show, - reserved_keywords=reserved_keywords, prefix=prefix) - self.fields[field[0]].update(slug=slug) - if not input_map: - if field[0] != self.objective_id: - args.append("%s=None" % (slug)) - args_tree.append("%s=%s" % (slug, slug)) - if input_map: - args.append("data={}") - args_tree.append("data=data") - - function_name = self.fields[self.objective_id]['slug'] if \ - not self.boosting else \ - self.fields[self.boosting["objective_field"]]['slug'] - if prefix == "_" and function_name[0] == prefix: - function_name = function_name[1:] - if function_name == "": - function_name = "field_" + self.objective_id - python_header = u"#!/usr/bin/env python\n# -*- coding: utf-8 -*-\n" - predictor_definition = (u"def predict_%s" % - function_name) - depth = len(predictor_definition) + 1 - predictor = u"%s(%s):\n" % (predictor_definition, - (",\n" + " " * depth).join(args)) - predictor_doc = (INDENT + u"\"\"\" " + docstring + - u"\n" + INDENT + u"\"\"\"\n") - body, term_analysis_predicates, item_analysis_predicates = \ - self.tree.plug_in_body(input_map=input_map, - ids_path=ids_path, - subtree=subtree) - terms_body = "" - if term_analysis_predicates or item_analysis_predicates: - terms_body = self.term_analysis_body(term_analysis_predicates, - item_analysis_predicates) - predictor = python_header + predictor + \ - predictor_doc + terms_body + body - - predictor_model = u"def predict" - depth = len(predictor_model) + 1 - predictor += u"\n\n%s(%s):\n" % (predictor_model, - (",\n" + " " * depth).join(args)) - predictor += u"%sprediction = predict_%s(%s)\n" % ( \ - INDENT, function_name, ", ".join(args_tree)) - - if self.boosting is not None: - predictor += u"%sprediction.update({\"weight\": %s})\n" % \ - (INDENT, self.boosting.get("weight")) - if self.boosting.get("objective_class") is not None: - predictor += u"%sprediction.update({\"class\": \"%s\"})\n" % \ - (INDENT, self.boosting.get("objective_class")) - predictor += u"%sreturn prediction" % INDENT - - if not PY3: - predictor = predictor.encode("utf8") - out.write(predictor) - out.flush() diff --git a/bigml/out_tree/pythontree.py b/bigml/out_tree/pythontree.py deleted file mode 100644 index a5807841..00000000 --- a/bigml/out_tree/pythontree.py +++ /dev/null @@ -1,322 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2017-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -"""Tree level output for python - -This module defines functions that generate python code to make local -predictions -""" - -from bigml.tree_utils import ( - slugify, sort_fields, filter_nodes, missing_branch, none_value, - one_branch, split, MAX_ARGS_LENGTH, INDENT, PYTHON_OPERATOR, TM_TOKENS, - TM_FULL_TERM, TM_ALL, TERM_OPTIONS, ITEM_OPTIONS, COMPOSED_FIELDS, - NUMERIC_VALUE_FIELDS) - -from bigml.tree import Tree -from bigml.boostedtree import BoostedTree - -MISSING_OPERATOR = { - "=": "is", - "!=": "is not" -} - - -def value_to_print(value, optype): - """String of code that represents a value according to its type - - """ - # the value is numeric for these fields - if (optype in NUMERIC_VALUE_FIELDS or value is None): - return value - return u"\"%s\"" % value.replace('"', '\\"') - - -def map_data(field, input_map=False, missing=False): - """Returns the subject of the condition in map format when - more than MAX_ARGS_LENGTH arguments are used. - """ - if input_map: - if missing: - return "data.get('%s')" % field - else: - return "data['%s']" % field - return field - - -def missing_prefix_code(self, field, input_map, cmv): - """Part of the condition that checks for missings when missing_splits - has been used - - """ - - negation = u"" if self.predicate.missing else u" not" - connection = u"or" if self.predicate.missing else u"and" - if not self.predicate.missing: - cmv.append(self.fields[field]['slug']) - return u"%s is%s None %s " % (map_data(self.fields[field]['slug'], - input_map, - True), - negation, - connection) - - -def split_condition_code(self, field, depth, input_map, - pre_condition, term_analysis_fields, - item_analysis_fields): - """Condition code for the split - - """ - - optype = self.fields[field]['optype'] - value = value_to_print(self.predicate.value, optype) - - if optype in ['text', 'items']: - if optype == 'text': - term_analysis_fields.append((field, - self.predicate.term)) - matching_function = "term_matches" - else: - item_analysis_fields.append((field, - self.predicate.term)) - matching_function = "item_matches" - - return u"%sif (%s%s(%s, \"%s\", %s%s) %s " \ - u"%s):\n" % \ - (INDENT * depth, pre_condition, matching_function, - map_data(self.fields[field]['slug'], - input_map, - False), - self.fields[self.predicate.field]['slug'], - 'u' if isinstance(self.predicate.term, unicode) else '', - value_to_print(self.predicate.term, 'categorical'), - PYTHON_OPERATOR[self.predicate.operator], - value) - - operator = (MISSING_OPERATOR[self.predicate.operator] if - self.predicate.value is None else - PYTHON_OPERATOR[self.predicate.operator]) - if self.predicate.value is None: - cmv.append(self.fields[field]['slug']) - return u"%sif (%s%s %s %s):\n" % \ - (INDENT * depth, pre_condition, - map_data(self.fields[field]['slug'], input_map, - False), - operator, - value) - - - -class PythonTree(Tree): - - def missing_check_code(self, field, depth, input_map, cmv, metric): - """Builds the code to predict when the field is missing - - """ - code = u"%sif (%s is None):\n" % \ - (INDENT * depth, - map_data(self.fields[field]['slug'], input_map, True)) - value = value_to_print(self.output, - self.fields[self.objective_id]['optype']) - code += u"%sreturn {\"prediction\": %s," \ - u" \"%s\": %s}\n" % \ - (INDENT * (depth + 1), value, metric, self.confidence) - cmv.append(self.fields[field]['slug']) - return code - - - def missing_prefix_code(self, field, input_map, cmv): - """Part of the condition that checks for missings when missing_splits - has been used - - """ - return missing_prefix_code(self, field, input_map, cmv) - - def split_condition_code(self, field, depth, input_map, - pre_condition, term_analysis_fields, - item_analysis_fields): - """Condition code for the split - - """ - - return split_condition_code(self, field, depth, input_map, - pre_condition, term_analysis_fields, - item_analysis_fields) - - def plug_in_body(self, depth=1, cmv=None, input_map=False, - ids_path=None, subtree=True): - """Translate the model into a set of "if" python statements. - - `depth` controls the size of indentation. As soon as a value is missing - that node is returned without further evaluation. - - """ - # label for the confidence measure and initialization - metric = "error" if self.regression else "confidence" - if cmv is None: - cmv = [] - body = u"" - term_analysis_fields = [] - item_analysis_fields = [] - - children = filter_nodes(self.children, ids=ids_path, - subtree=subtree) - if children: - - # field used in the split - field = split(children) - - has_missing_branch = (missing_branch(children) or - none_value(children)) - # the missing is singled out as a special case only when there's - # no missing branch in the children list - one_branch = not has_missing_branch or \ - self.fields[field]['optype'] in COMPOSED_FIELDS - if (one_branch and - not self.fields[field]['slug'] in cmv): - body += self.missing_check_code(field, depth, input_map, cmv, - metric) - - for child in children: - field = child.predicate.field - pre_condition = u"" - # code when missing_splits has been used - if has_missing_branch and child.predicate.value is not None: - pre_condition = self.missing_prefix_code(child, field, - input_map, cmv) - - # complete split condition code - body += child.split_condition_code( \ - field, depth, input_map, pre_condition, - term_analysis_fields, item_analysis_fields) - - # value to be determined in next node - next_level = child.plug_in_body(depth + 1, - cmv=cmv[:], - input_map=input_map, - ids_path=ids_path, - subtree=subtree) - - body += next_level[0] - term_analysis_fields.extend(next_level[1]) - item_analysis_fields.extend(next_level[2]) - else: - value = value_to_print(self.output, - self.fields[self.objective_id]['optype']) - body = u"%sreturn {\"prediction\":%s, \"%s\":%s}\n" % ( \ - INDENT * depth, value, metric, self.confidence) - - return body, term_analysis_fields, item_analysis_fields - - -class PythonBoostedTree(BoostedTree): - - def missing_check_code(self, field, depth, input_map, cmv): - """Builds the code to predict when the field is missing - - """ - code = u"%sif (%s is None):\n" % \ - (INDENT * depth, - map_data(self.fields[field]['slug'], input_map, True)) - value = value_to_print(self.output, "numeric") - code += u"%sreturn {\"prediction\":%s" % (INDENT * (depth + 1), - value) - if hasattr(self, "probability"): - code += u", \"probability\": %s" % self.probability - code += u"}\n" - cmv.append(self.fields[field]['slug']) - return code - - def missing_prefix_code(self, field, input_map, cmv): - """Part of the condition that checks for missings when missing_splits - has been used - - """ - return missing_prefix_code(self, field, input_map, cmv) - - def split_condition_code(self, field, depth, input_map, - pre_condition, term_analysis_fields, - item_analysis_fields): - """Condition code for the split - - """ - return split_condition_code(self, field, depth, input_map, - pre_condition, term_analysis_fields, - item_analysis_fields) - - def plug_in_body(self, depth=1, cmv=None, input_map=False, - ids_path=None, subtree=True): - """Translate the model into a set of "if" python statements. - - `depth` controls the size of indentation. As soon as a value is missing - that node is returned without further evaluation. - - """ - if cmv is None: - cmv = [] - body = u"" - term_analysis_fields = [] - item_analysis_fields = [] - - children = filter_nodes(self.children, ids=ids_path, - subtree=subtree) - if children: - - # field used in the split - field = split(children) - - has_missing_branch = (missing_branch(children) or - none_value(children)) - # the missing is singled out as a special case only when there's - # no missing branch in the children list - one_branch = not has_missing_branch or \ - self.fields[field]['optype'] in COMPOSED_FIELDS - if (one_branch and - not self.fields[field]['slug'] in cmv): - body += self.missing_check_code(field, depth, input_map, cmv) - - for child in children: - field = child.predicate.field - pre_condition = u"" - # code when missing_splits has been used - if has_missing_branch and child.predicate.value is not None: - pre_condition = self.missing_prefix_code(child, field, - input_map, cmv) - - # complete split condition code - body += child.split_condition_code( \ - field, depth, input_map, pre_condition, - term_analysis_fields, item_analysis_fields) - - # value to be determined in next node - next_level = child.plug_in_body(depth + 1, - cmv=cmv[:], - input_map=input_map, - ids_path=ids_path, - subtree=subtree) - - body += next_level[0] - term_analysis_fields.extend(next_level[1]) - item_analysis_fields.extend(next_level[2]) - else: - value = value_to_print(self.output, "numeric") - body = u"%sreturn {\"prediction\":%s" % (INDENT * depth, value) - if hasattr(self, "probability"): - body += u", \"probability\": %s" % self.probability - body += u"}\n" - - return body, term_analysis_fields, item_analysis_fields diff --git a/bigml/path.py b/bigml/path.py index 991c4b5b..e85a2ac3 100644 --- a/bigml/path.py +++ b/bigml/path.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -54,13 +53,13 @@ def merge_rules(list_of_predicates, fields, label='name'): last_predicate = list_of_predicates[-1] # if the last predicate is "is missing" forget about the rest if last_predicate.operator == "=" and last_predicate.value is None: - return u"%s is missing" % name + return "%s is missing" % name # if the last predicate is "is not missing" if last_predicate.operator[0] in ["!", "/"] and \ last_predicate.value is None: if len(list_of_predicates) == 1: # if there's only one predicate, then write "is not missing" - return u"%s is not missing" % name + return "%s is not missing" % name list_of_predicates = list_of_predicates[0: -1] missing_flag = False if last_predicate.missing: @@ -83,6 +82,7 @@ def merge_rules(list_of_predicates, fields, label='name'): return " and ".join( [predicate.to_rule(fields, label=label).strip() for predicate in list_of_predicates]) + return "" def merge_numeric_rules(list_of_predicates, fields, label='name', @@ -106,18 +106,18 @@ def merge_numeric_rules(list_of_predicates, fields, label='name', break if equal is not None: return equal.to_rule(fields, label=label, missing=missing_flag) - rule = u'' + rule = '' field_id = list_of_predicates[0].field name = fields[field_id][label] if minor[0] is not None and major[0] is not None: predicate, value = minor - rule = u"%s %s " % (value, reverse(predicate.operator)) + rule = "%s %s " % (value, reverse(predicate.operator)) rule += name predicate, value = major - rule += u" %s %s " % (predicate.operator, value) + rule += " %s %s " % (predicate.operator, value) if missing_flag: - rule += u" or missing" + rule += " or missing" else: predicate = minor[0] if minor[0] is not None else major[0] rule = predicate.to_rule(fields, label=label, missing=missing_flag) @@ -143,7 +143,7 @@ def merge_text_rules(list_of_predicates, fields, label='name'): for predicate in contains[1:]: if predicate.term not in rules: rules.append(predicate.term) - rule = u" and ".join(rules) + rule = " and ".join(rules) if not_contains: if not rules: rules_not.append( @@ -155,7 +155,7 @@ def merge_text_rules(list_of_predicates, fields, label='name'): for predicate in not_contains[1:]: if predicate.term not in rules_not: rules_not.append(predicate.term) - rule += u" or ".join(rules_not) + rule += " or ".join(rules_not) return rule @@ -180,7 +180,7 @@ def merge_categorical_rules(list_of_predicates, for predicate in equal[1:]: if not predicate.value in rules: rules.append(predicate.value) - rule = u" and ".join(rules) + rule = " and ".join(rules) if not_equal and not rules: rules_not.append(not_equal[0].to_rule( \ fields, label=label, missing=False).strip()) @@ -188,14 +188,14 @@ def merge_categorical_rules(list_of_predicates, if predicate.value not in rules_not: rules_not.append(predicate.value) if rules_not: - connector = u" and " if rule else u"" - rule += connector + u" or ".join(rules_not) + connector = " and " if rule else "" + rule += connector + " or ".join(rules_not) if missing_flag: - rule += u" or missing" + rule += " or missing" return rule -class Path(object): +class Path(): """A Path as a list of Predicates """ @@ -213,19 +213,17 @@ def __init__(self, predicates=None): " objects. Please check the arguments for the" " constructor.") - + #pylint: disable=locally-disabled,redefined-builtin def to_rules(self, fields, label='name', format=EXTENDED): """ Builds rules string from a list lf predicates in different formats """ if format == EXTENDED: return self.to_extended_rules(fields, label=label) - elif format == BRIEF: + if format == BRIEF: return self.to_brief_rules(fields, label=label) - else: - raise ValueError("Invalid format. The list of valid formats are 0 " - "(extended) or 1 (brief).") - + raise ValueError("Invalid format. The list of valid formats are 0 " + "(extended) or 1 (brief).") def to_extended_rules(self, fields, label='name'): """ Builds rules string in ordered and extended format diff --git a/bigml/pca.py b/bigml/pca.py index 465f0c4f..22eb37c8 100644 --- a/bigml/pca.py +++ b/bigml/pca.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2018-2019 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -21,7 +20,7 @@ embedded into your application without needing to send requests to BigML.io. -This module cannot only save you a few credits, but also enormously +This module can help you enormously to reduce the latency for each prediction and let you use your PCAs offline. Example usage (assuming that you have previously set up the BIGML_USERNAME @@ -44,10 +43,13 @@ from bigml.api import FINISHED -from bigml.api import get_status, get_api_connection -from bigml.util import cast, NUMERIC +from bigml.api import get_status, get_api_connection, get_pca_id +from bigml.util import cast, use_cache, load, NUMERIC, get_data_format, \ + get_formatted_data, format_data, get_data_transformations from bigml.basemodel import get_resource_dict from bigml.modelfields import ModelFields +from bigml.constants import OUT_NEW_FIELDS, OUT_NEW_HEADERS, INTERNAL + try: from bigml.laminar.numpy_ops import dot @@ -89,10 +91,19 @@ class PCA(ModelFields): """ - def __init__(self, pca, api=None): + def __init__(self, pca, api=None, cache_get=None): + + if use_cache(cache_get): + # using a cache to store the model attributes + self.__dict__ = load(get_pca_id(pca), cache_get) + return self.resource_id = None + self.name = None + self.description = None + self.parent_id = None self.input_fields = [] + self.default_numeric_value = None self.term_forms = {} self.tag_clouds = {} self.dataset_field_types = {} @@ -104,22 +115,25 @@ def __init__(self, pca, api=None): self.item_analysis = {} self.standardize = None self.famd_j = 1 - self.api = get_api_connection(api) + api = get_api_connection(api) self.resource_id, pca = get_resource_dict( \ - pca, "pca", api=self.api) + pca, "pca", api=api) if 'object' in pca and \ isinstance(pca['object'], dict): pca = pca['object'] try: + self.parent_id = pca.get('dataset') + self.name = pca.get("name") + self.description = pca.get("description") self.input_fields = pca.get("input_fields", []) + self.default_numeric_value = pca.get("default_numeric_value") self.dataset_field_types = pca.get("dataset_field_types", {}) self.famd_j = 1 if (self.dataset_field_types['categorical'] != \ self.dataset_field_types['total']) else \ self.dataset_field_types['categorical'] - - except KeyError: + except (AttributeError, KeyError): raise ValueError("Failed to find the pca expected " "JSON structure. Check your arguments.") if 'pca' in pca and \ @@ -133,25 +147,26 @@ def __init__(self, pca, api=None): if not self.input_fields: self.input_fields = [ \ field_id for field_id, _ in - sorted(self.fields.items(), + sorted(list(self.fields.items()), key=lambda x: x[1].get("column_number"))] missing_tokens = pca_info.get("missing_tokens") + for field_id, field in fields.items(): + if field["optype"] == "categorical": + probabilities = [probability for _, probability in \ + field["summary"]["categories"]] + if field["summary"].get("missing_count", 0) > 0: + probabilities.append( + field["summary"]["missing_count"]) + total = float(sum(probabilities)) + if total > 0: + probabilities = [probability / total for probability \ + in probabilities] + self.categories_probabilities[field_id] = probabilities ModelFields.__init__( self, fields, - objective_id=None, terms=True, categories=True, + objective_id=None, categories=True, numerics=False, missing_tokens=missing_tokens) - for field_id in self.categories: - field = self.fields[field_id] - probabilities = [probability for _, probability in \ - field["summary"]["categories"]] - if field["summary"].get("missing_count", 0) > 0: - probabilities.append(field["summary"]["missing_count"]) - total = float(sum(probabilities)) - if total > 0: - probabilities = [probability / total for probability \ - in probabilities] - self.categories_probabilities[field_id] = probabilities self.components = pca_info.get('components') self.eigenvectors = pca_info.get('eigenvectors') self.cumulative_variance = pca_info.get('cumulative_variance') @@ -176,23 +191,23 @@ def projection(self, input_data, max_components=None, """ - new_data = self.filter_input_data( \ + norm_input_data = self.filter_input_data( \ input_data, add_unused_fields=False) # Strips affixes for numeric values and casts to the final field type - cast(new_data, self.fields) + cast(norm_input_data, self.fields) # Computes text and categorical field expansion into an input array of # terms and frequencies - unique_terms = self.get_unique_terms(new_data) + unique_terms = self.get_unique_terms(norm_input_data) # Creates an input vector with the values for all expanded fields. # The input mask marks the non-missing or categorical fields # The `missings` variable is a boolean indicating whether there's # non-categorical fields missing - input_array, missings, input_mask = self.expand_input(new_data, + input_array, missings, input_mask = self.expand_input(norm_input_data, unique_terms) components = self.eigenvectors[:] if max_components is not None: @@ -212,8 +227,8 @@ def projection(self, input_data, max_components=None, result[index] = value / missing_sums[index] \ if missing_sums[index] > 0 else value if full: - result = dict(zip(["PC%s" % index \ - for index in range(1, len(components) + 1)], result)) + result = dict(list(zip(["PC%s" % index \ + for index in range(1, len(components) + 1)], result))) return result @@ -238,12 +253,11 @@ def _get_mean_stdev(self, field, field_id=None, index=None): mean = self.categories_probabilities[field_id][index] stdev = self.famd_j * math.sqrt(mean * self.famd_j) return mean, stdev - elif field['optype'] == NUMERIC: + if field['optype'] == NUMERIC: return field["summary"]["mean"], \ field["summary"]["standard_deviation"] - else: - return self.text_stats[field_id]['means'][index], \ - self.text_stats[field_id]['standard_deviations'][index] + return self.text_stats[field_id]['means'][index], \ + self.text_stats[field_id]['standard_deviations'][index] def expand_input(self, input_data, unique_terms): @@ -262,7 +276,7 @@ def expand_input(self, input_data, unique_terms): input_array = [] input_mask = [] missings = False - for index, field_id in enumerate(self.input_fields): + for field_id in self.input_fields: field = self.fields[field_id] optype = field["optype"] if optype == NUMERIC: @@ -297,13 +311,66 @@ def expand_input(self, input_data, unique_terms): input_mask.append(1) if self.standardized: - for index, frequency in enumerate(new_inputs): + for index2, frequency in enumerate(new_inputs): mean, stdev = self._get_mean_stdev( \ - field, field_id, index) - new_inputs[index] = frequency - mean + field, field_id, index2) + new_inputs[index2] = frequency - mean if stdev > 0: - new_inputs[index] /= stdev + new_inputs[index2] /= stdev # indexes of non-missing values input_array.extend(new_inputs) return input_array, missings, input_mask + + def predict(self, input_data, max_components=None, + variance_threshold=None, full=False): + """Method to homogeneize the local models interface for all BigML + models. It returns the projection method result. + """ + return self.projection(input_data, max_components=max_components, + variance_threshold=variance_threshold, full=full) + + def batch_predict(self, input_data_list, outputs=None, **kwargs): + """Creates a batch projection for a list of inputs using the local + topic model. Allows to define some output settings to + decide the fields to be added to the input_data (prediction, + probability, etc.) and the name that we want to assign to these new + fields. The outputs argument accepts a dictionary with keys + "output_fields", to contain a list of the prediction properties to add + (all principal components by default) and "output_headers", to + contain a list of the headers to be used when adding them (identical + to "output_fields" list, by default). + + :param input_data_list: List of input data to be predicted + :type input_data_list: list or Panda's dataframe + :param dict outputs: properties that define the headers and fields to + be added to the input data + :return: the list of input data plus the predicted values + :rtype: list or Panda's dataframe depending on the input type in + input_data_list + """ + if outputs is None: + outputs = {} + new_fields = outputs.get(OUT_NEW_FIELDS, ["PC%s" % index + for index in range(1, len(self.eigenvectors) + 1)]) + new_headers = outputs.get(OUT_NEW_HEADERS, new_fields) + if len(new_fields) > len(new_headers): + new_headers.expand(new_fields[len(new_headers):]) + else: + new_headers = new_headers[0: len(new_fields)] + data_format = get_data_format(input_data_list) + inner_data_list = get_formatted_data(input_data_list, INTERNAL) + for input_data in inner_data_list: + kwargs.update({"full": True}) + prediction = self.projection(input_data, **kwargs) + for index, key in enumerate(new_fields): + input_data[new_headers[index]] = prediction[key] + if data_format != INTERNAL: + return format_data(inner_data_list, out_format=data_format) + return inner_data_list + + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + """ + return get_data_transformations(self.resource_id, self.parent_id) diff --git a/bigml/pipeline/__init__.py b/bigml/pipeline/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bigml/pipeline/pipeline.py b/bigml/pipeline/pipeline.py new file mode 100644 index 00000000..20cbb8b9 --- /dev/null +++ b/bigml/pipeline/pipeline.py @@ -0,0 +1,417 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,cyclic-import +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +Pipeline: Classes that encapsulate the information needed to add the new +fields and predictions defined in a sequence of transformations or models. +The arguments to create a Pipeline are its name and the list of +datasets and models (and/or anomaly dectectors, clusters, +etc.) that describe the input data processing to be used. + +""" + +import os +import zipfile + +from datetime import datetime + +from bigml.api import get_api_connection, get_resource_id, get_resource_type +from bigml.util import use_cache, load, check_dir, get_data_format, \ + format_data, save_json, fs_cache_get, fs_cache_set, \ + dump, asciify +from bigml.constants import STORAGE +from bigml.dataset import Dataset +from bigml.supervised import SupervisedModel +from bigml.cluster import Cluster +from bigml.anomaly import Anomaly +from bigml.pca import PCA +from bigml.pipeline.transformer import BMLDataTransformer, DataTransformer + +try: + from bigml.topicmodel import TopicModel + NO_TOPIC = False +except ImportError: + NO_TOPIC = True + + +if NO_TOPIC: + LOCAL_CLASSES = { + "dataset": Dataset, + "cluster": Cluster, + "anomaly": Anomaly, + "pca": PCA, + } +else: + LOCAL_CLASSES = { + "dataset": Dataset, + "cluster": Cluster, + "anomaly": Anomaly, + "topicmodel": TopicModel, + "pca": PCA, + } + + +def get_datasets_chain(dataset, dataset_list=None): + """Builds recursively the chain of datasets leading to a dataset """ + if dataset_list is None: + dataset_list = [] + dataset_list.append(dataset) + if dataset.origin_dataset is None: + return dataset_list + + return get_datasets_chain(dataset.origin_dataset, dataset_list) + + +def get_datasets_dict(dataset, dataset_dict=None): + """Stores a dictionary dataset_id -> Dataset for the chain of datasets """ + if dataset_dict is None: + dataset_dict = {} + dataset_dict.update({dataset.resource_id: dataset}) + if dataset.origin_dataset is None: + return dataset_dict + + return get_datasets_dict(dataset.origin_dataset, dataset_dict) + + +def check_in_path(path, resource_list): + """Checks whether a list of resources is stored in a folder """ + for resource_id in resource_list: + if not os.path.exists(os.path.join( + path, resource_id.replace("/", "_"))): + return False + return True + + +class Pipeline(DataTransformer): + """Class to define sequential transformations. The transformations can + come from BigML resources or be defined as Pipe steps defined as functions + to be applied to DataFrame pipes, scikit pipelines + + """ + def __init__(self, name, steps=None, resource_id=None, description=None): + """Builds a Pipeline from the list of steps provided in the `steps` + argument. It is compulsory to assign a name that will be used as + reference + :param name: Reference name for the pipeline + :type name: str + :param steps: List of DataTransformers. All of them need to offer a + `.transform` method + :type steps: list + :param description: Description of the transformations in the pipeline + :type description: str + """ + super().__init__(None, # no generator is provided + None, # no data format is assumed + resource_id or name, + name, + description) + + self.steps = [] + self.extend(steps) + + def extend(self, steps=None): + """Adding new transformations to the Pipeline steps""" + if steps is None: + steps = [] + for step in steps: + if not hasattr(step, "transform"): + raise ValueError("Failed to find the .transform method in " + "all the Pipeline steps.") + self.steps.extend(steps) + + def transform(self, input_data_list, out_format=None): + """Applying the Pipeline transformations and predictions on the + list of input data. `out_format` forces the output format + to either a DataFrame or a list of dictionaries. + + """ + result = self.data_transform(input_data_list) + if out_format is not None: + current_format = get_data_format(result) + if current_format != out_format: + return format_data(result, out_format) + return result + + def data_transform(self, input_data_list): + """Delegates transformation to each DataTransformer step""" + current_format = get_data_format(input_data_list) + if len(self.steps) == 0: + return input_data_list + inner_data_list = input_data_list + for index, step in enumerate(self.steps[:-1]): + try: + inner_data_list = step.transform(inner_data_list) + except Exception as exc: + raise ValueError( + "Failed to apply step number %s in pipeline %s: %s" % + (index, self.name, exc)) + try: + inner_data_list = self.steps[-1].transform( + inner_data_list, out_format=current_format) + if hasattr(self.steps[-1], "add_input") and \ + self.steps[-1].add_input: + self.steps[-1].merge_input_data( + input_data_list, inner_data_list, + out_format=current_format) + except Exception as exc: + raise ValueError("Failed to apply the last step: %s" % exc) + return inner_data_list + + +class BMLPipeline(Pipeline): + """The class represents the sequential transformations (and predictions) + that the input data goes through in a prediction workflow. + Reproduces the pre-modeling steps that need to be applied before + the application of the model predict (centroid, anomaly score, etc.) + method to add the final prediction. The mandatory arguments for the class + are: + - name: Each pipeline needs to be identified with a unique name + - resource_list: A list of resource IDs. Only datasets and supervised + or unsupervised model resources are allowed. + + When a dataset is provided, only the chain of transformations leading to + that dataset structure is applied. When a model is provided, the input + data is pre-modeled using that chain of transformations and the result + is used as input for the predict-like method of the model, that adds the + prediction to the result. If the pipeline is expected to use strictly + the resources in the original resource_list, you can use the last_step + argument + + """ + def __init__(self, name, resource_list=None, description=None, api=None, + cache_get=None, init_settings=None, execution_settings=None, + last_step=False): + """The pipeline needs + :param name: A unique name that will be used when caching the + resources it needs to be executed. + :type name: str + :param resource_list: A dataset/model ID or a list of them + to define the transformations and predictions + to be added to the input data. + :type resource_list: list + Optionally, it can receive: + :param description: A description of the pipeline procedure + :type description: str + :param api: A BigML API connection object + :type api: BigML + :param cache_get: A cache_get function to retrieve cached resources + :type cache_get: function + :param init_settings: A dictionary describing the optional arguments + added when instantiating the local model + (one per model ID) + e.g.: + {"deepnet/111111111111111111": { + "operation_settings": { + "region_score_threshold": 0.6}}, + "deepnet/222222222222222222": { + "operation_settings": { + "region_score_threshold": 0.7}}} + :type init_settings: dict + :param execution_settings: A dictionary describing the optional + arguments added when creating the + predictions. + e.g.: + {"model/111111111111111111": { + "missing_strategy": 1}, + "model/222222222222222222": { + "operating_kind": "confidence"}} + :type execution_settings: dict + + """ + + if resource_list is None and use_cache(cache_get): + self.__dict__ = load(name, cache_get) + else: + super().__init__(name, description=description) + + # API related attributes + if resource_list is None: + resource_list = [] + self.resource_list = resource_list + if isinstance(resource_list, str): + self.resource_list = [resource_list] + for item in self.resource_list: + resource_id = get_resource_id(item) + if resource_id is None: + raise ValueError("Only resource IDs are allowed as first " + "argument.") + self.init_settings = init_settings or {} + self.execution_settings = execution_settings or {} + self._api = get_api_connection(api) + if self._api.storage is None: + self._api.storage = self._get_pipeline_storage() + self._cache_get = cache_get + self.steps = [] + self.extend(self.__retrieve_steps(last_step)) + + def __retrieve_steps(self, last_step): + """Retrieving the steps that need to be used to reproduce the + transformations leading to the resources given in the original list + """ + local_resources = [] + init_settings = self.init_settings.copy() + execution_settings = self.execution_settings.copy() + datasets = {} + steps = [] + + kwargs = {} + if self._api is not None: + kwargs["api"] = self._api + if self._cache_get is not None: + kwargs["cache_get"] = self._cache_get + + for resource_id in self.resource_list: + init_settings[resource_id] = init_settings.get( + resource_id, {}) + init_settings[resource_id].update(kwargs) + + for index, resource in enumerate(self.resource_list): + resource_id = get_resource_id(resource) + resource_type = get_resource_type(resource_id) + local_class = LOCAL_CLASSES.get(resource_type, SupervisedModel) + kwargs = init_settings.get(resource_id, {}) + local_resource = local_class(resource, **kwargs) + if isinstance(local_resource, SupervisedModel): + execution_settings[resource_id] = \ + execution_settings.get( + resource_id, {}) + execution_settings[resource_id].update({"full": True}) + local_resources.append([local_resource]) + if (hasattr(local_resource, "parent_id") and \ + get_resource_type(local_resource.parent_id) == "dataset"): + if local_resource.parent_id in datasets: + dataset = datasets[local_resource.parent_id] + else: + dataset = Dataset(local_resource.parent_id, + api=self._api) + datasets = get_datasets_dict(dataset, datasets) + if not last_step: + dataset_chain = get_datasets_chain(dataset) + local_resources[index].extend(dataset_chain) + local_resources[index].reverse() + + try: + new_resources = local_resources[0][:] + except IndexError: + new_resources = [] + for index, resources in enumerate(local_resources): + if index < 1: + continue + for resource in resources: + if resource not in new_resources: + new_resources.append(resource) + local_resources = new_resources + for local_resource in local_resources: + # non-flatline datasets will not add transformations + if isinstance(local_resource, Dataset) and \ + local_resource.origin_dataset is not None and \ + local_resource.transformations is None: + continue + execution_settings = self.execution_settings.get( + local_resource.resource_id, {}) + steps.append(BMLDataTransformer( + local_resource, **execution_settings)) + return steps + + def _get_pipeline_storage(self): + """ Creating a separate folder inside the given storage folder to + contain the pipeline related models based on the pipeline name. + If the folder already exists, first we check that all the resources + in the resources list are already stored there. If that's not the + case, we rename the folder by adding a datetime suffix and create a + new pipeline folder to store them. + """ + if self._api.storage is None: + self._api.storage = STORAGE + path = os.path.join(self._api.storage, self.name) + if os.path.exists(path): + if check_in_path(path, self.resource_list): + return path + # adding a suffix to store old pipeline version + datetime_str = str(datetime.now()).replace(" ", "_") + bck_path = f"{path}_{datetime_str}_bck" + os.rename(path, bck_path) + check_dir(path) + return path + + def export(self, output_directory=None): + """Exports all the resources needed in the pipeline to the user-given + output directory. The entire pipeline folder is exported and its name + is used as filename. + """ + def zipdir(path, ziph): + # ziph is zipfile handle + for root, _, files in os.walk(path): + for file in files: + ziph.write(os.path.join(root, file), + os.path.relpath(os.path.join(root, file), + os.path.join(path, '..'))) + + if output_directory is None: + output_directory = os.getcwd() + check_dir(output_directory) + name = asciify(self.name) + out_filename = os.path.join(output_directory, f"{name}.zip") + + # write README file with the information that describes the Pipeline + name = self.name + description = self.description or "" + resources = ", ".join(self.resource_list) + readme = (f"Pipeline name: {name}\n{description}\n\n" + f"Built from: {resources}") + with open(os.path.join(self._api.storage, "README.txt"), "w", + encoding="utf-8") as readme_handler: + readme_handler.write(readme) + # write JSON file describing the pipeline resources + pipeline_vars = vars(self) + stored_vars = {} + for key, value in pipeline_vars.items(): + if not key.startswith("_") and not key == "steps": + stored_vars.update({key: value}) + pipeline_filename = os.path.join(self._api.storage, asciify(self.name)) + save_json(stored_vars, pipeline_filename) + with zipfile.ZipFile(out_filename, 'w', zipfile.ZIP_DEFLATED) as zipf: + zipdir(self._api.storage, zipf) + + def dump(self, output_dir=None, cache_set=None): + """Uses msgpack to serialize the resource object and all its steps + If cache_set is filled with a cache set method, the method is called + to store the serialized value + """ + pipeline_vars = vars(self) + stored_vars = {} + for key, value in pipeline_vars.items(): + if not key.startswith("_") and not key == "steps": + stored_vars.update({key: value}) + if output_dir is not None: + check_dir(output_dir) + cache_set = cache_set or fs_cache_set(output_dir) + dump(stored_vars, output=None, cache_set=cache_set) + for step in self.steps: + step.dump(cache_set=cache_set) + + @classmethod + def load(cls, name, dump_dir): + """Restores the information of the pipeline and its steps from a + previously dumped pipeline file. The objects used in each step + of the pipeline are expected to be in the same + """ + if dump_dir is not None and name is not None: + return cls(name, + None, + cache_get=fs_cache_get(dump_dir)) + return None diff --git a/bigml/pipeline/transformer.py b/bigml/pipeline/transformer.py new file mode 100644 index 00000000..3b983cd8 --- /dev/null +++ b/bigml/pipeline/transformer.py @@ -0,0 +1,275 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +DataTransformer classes that handle the transformations generated on input +data by Feature Engineering, Models, Anomaly Detectors, etc. +The BMLDataTransformer will take care of transformations that use BigML +objects as transformation generators. Other libraries, like Pandas +(DFDataTransfomer) and scikit-learn (SKDataTransformer) +will need their own DataTransformer subclasses to define +their own transformations. + +""" + +import types + +from datetime import datetime + +from bigml.constants import INTERNAL, DATAFRAME, OUT_NEW_HEADERS +from bigml.util import get_formatted_data, format_data, get_data_format + +try: + from pandas import DataFrame, concat + PANDAS_READY = True +except ImportError: + PANDAS_READY = False + + +class DataTransformer(): + """Base class to handle transformations. It offers a transform method + that can handle list of dictionaries or Pandas DataFrames a inputs and + delegates to the `data_transform` method the actual transformations to + be applied and should be implemented in the classes derived from it. + """ + + def __init__(self, generator, data_format, resource_id=None, name=None, + description=None): + """Adds initial attributes: + - generator: object, function or list of functions that will be + doing the transformation + - data_format: whether to accept a DataFrame or a list of dictionaries + as inputs for the generator + - resource_id: unique identifier for the data transformer object + - name: name for the data transformer + - description: description for the transformations in the data + transformer + """ + self.generator = generator + self.data_format = data_format + self.resource_id = resource_id + self.name = name + self.description = description + + def _formatted_input(self, input_data_list): + """Returns a copy of the input data list in the expected format """ + return get_formatted_data(input_data_list, self.data_format) + + def transform(self, input_data_list, out_format=None): + """Returns a new input_data_list where the transformations defined + in the generator have been applied. It handles format transformation + if needed before applying the generator function. + """ + data_format = get_data_format(input_data_list) + inner_data_list = self._formatted_input(input_data_list) + result = self.data_transform(inner_data_list) + if self.data_format != data_format and out_format is None: + return format_data(result, data_format) + if self.data_format != out_format: + return format_data(result, out_format) + return result + + def data_transform(self, input_data_list): + """Method to be re-implemented in each of the data transformers. Using + identity by default.""" + raise NotImplementedError("This method needs to be implemented") + + +class BMLDataTransformer(DataTransformer): + """Transformer wrapper for BigML resources.""" + def __init__(self, local_resource, outputs=None, **kwargs): + """Receives a local resource (Dataset, SupervisedModel, Cluster...) + and creates a `DataTransformer` from it to apply the corresponding + transformations. + - for Datasets, Flatline transformations (if any) are applied + - for models, a batch prediction (scoring, topic distribution, etc.) is + applied and added to the original input. + + Optional arguments are: + :param outputs: dictionary of output fields and headers + :type outputs: dict + :param kwargs: dictionary of runtime settings for batch predictions + (e.g. missing_strategy, operating_point, etc.) + :type kwargs: dict + """ + try: + generator = local_resource.transform + self.add_input = False + except AttributeError: + if hasattr(local_resource, "batch_predict"): + generator = lambda x : \ + local_resource.batch_predict(x, outputs=outputs, **kwargs) + self.add_input = True + else: + raise ValueError("The local resource needs to provide " + "a transform, or batch_predict " + "method to generate transformations.") + super().__init__(generator, + INTERNAL, + local_resource.resource_id, + local_resource.name, + local_resource.description) + self.local_resource = local_resource + self.dump = local_resource.dump + + def data_transform(self, input_data_list): + """Returns a list of dictionaries with the generated transformations. + The input list is expected to be a list of dictionaries""" + return self.generator(input_data_list) + + def merge_input_data(self, input_data_list, output_data_list, + out_format=None): + """Adding input data to the output """ + data_format = get_data_format(input_data_list) + input_data_list = self._formatted_input(input_data_list) + output_data_list = self._formatted_input(output_data_list) + for index, input_data in enumerate(input_data_list): + for key, value in input_data.items(): + if key not in output_data_list[index]: + output_data_list[index].update({key: value}) + if self.data_format != out_format: + return format_data(output_data_list, data_format) + return output_data_list + + +class DFDataTransformer(DataTransformer): + """DataTransformer wrapper for DataFrames """ + def __init__(self, generator, resource_id=None, name=None, + description=None): + """Receives the function or list of functions to be applied on + the input DataFrame + Optional parameters are: + :param resource_id: unique ID for the DataTransformer + :type resource_id: str + :param name: DataTransformer name + :type name: str + :param description: Description for the transformations. + :type description: str + """ + if not isinstance(generator, list): + generator = [generator] + for index, item in enumerate(generator): + if not isinstance(item, tuple) and isinstance( + item, types.FunctionType): + generator[index] = (item, [], {}) + elif isinstance(item, tuple) and isinstance( + item[0], types.FunctionType): + try: + args = item[1] + if not isinstance(args, list): + raise ValueError("The syntax of the first argument is " + " function or (function, list, dict)") + except IndexError: + args = [] + try: + kwargs = item[2] + if not isinstance(kwargs, dict): + raise ValueError("The syntax of the first argument is " + " function or (function, list, dict)") + except IndexError: + kwargs = {} + + generator[index] = (item[0], args, kwargs) + else: + raise ValueError("Only functions or tuples of functions are " + "allowed as first argument.") + + super().__init__(generator, + DATAFRAME, + resource_id or "dftrans_%s" % + str(datetime.now()).replace(" ", "_"), + name, + description) + + def data_transform(self, input_data_list): + """Calling the corresponding method in the generator. + The input_data_list is expected to be a Dataframe. + + """ + result = input_data_list.copy() + for function, args, kwargs in self.generator: + result = result.pipe(function, *args, **kwargs) + return result + + +class SKDataTransformer(DataTransformer): + """DataTransformer wrapper for scikit learn pipelines or transformations """ + def __init__(self, generator, resource_id=None, name=None, + description=None, output=None): + """Receives the pipeline or transformation to be applied on + the input DataFrame + Optional parameters are: + :param resource_id: unique ID for the DataTransformer + :type resource_id: str + :param name: DataTransformer name + :type name: str + :param description: Description for the transformations. + :type description: str + :param output: Dictionary containing the headers to be used for the + new fields generated in the transformation. + :type output: dict + """ + + try: + generator_fn = generator.transform + self.add_input = False + except AttributeError: + try: + generator_fn = generator.predict + self.add_input = True + except AttributeError: + try: + generator_fn = generator.score + self.add_input = True + except AttributeError: + raise ValueError("Failed to find a .transform, .predict " + "or .score method in the first argument " + "object.") + + super().__init__(generator_fn, + DATAFRAME, + resource_id or "sktrans_%s" % + str(datetime.now()).replace(" ", "_"), + name, + description) + self.output = output or {} + try: + self.output_headers = generator.get_feature_names_out() + except AttributeError: + self.output_headers = self.output.get(OUT_NEW_HEADERS) + + def data_transform(self, input_data_list): + """Calling the corresponding method in the generator. + The input_data_list is expected to be a Dataframe. + + """ + result = self.generator(input_data_list) + try: + result = result.toarray() + except AttributeError: + pass + df_kwargs = {"index": input_data_list.index} + if self.output_headers is not None: + df_kwargs.update({"columns": self.output_headers}) + result = DataFrame(result, **df_kwargs) + if not self.add_input: + return result + return concat([input_data_list, result], axis=1) + + @staticmethod + def merge_input_data(input_data_list, output_data_list): + """Adding input data to the output """ + return concat([input_data_list, output_data_list], axis=1) diff --git a/bigml/predicate.py b/bigml/predicate.py index dd108bda..ed6ec690 100644 --- a/bigml/predicate.py +++ b/bigml/predicate.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2013-2019 BigML +# Copyright 2013-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -16,33 +15,17 @@ # under the License. """Predicate structure for the BigML local Model - This module defines an auxiliary Predicate structure that is used in the Tree to save the node's predicate info. - """ -import operator import re +from bigml.predicate_utils.utils import TM_TOKENS, TM_FULL_TERM, TM_ALL, \ + FULL_TERM_PATTERN, OPERATOR_CODE +from bigml.predicate_utils.utils import apply_predicate from bigml.util import plural -# Map operator str to its corresponding function -OPERATOR = { - "<": operator.lt, - "<=": operator.le, - "=": operator.eq, - "!=": operator.ne, - "/=": operator.ne, - ">=": operator.ge, - ">": operator.gt, - "in": operator.contains -} - -TM_TOKENS = 'tokens_only' -TM_FULL_TERM = 'full_terms_only' -TM_ALL = 'all' -FULL_TERM_PATTERN = re.compile(r'^.+\b.+$', re.U) RELATIONS = { '<=': 'no more than %s %s', '>=': '%s %s at most', @@ -50,99 +33,25 @@ '<': 'less than %s %s' } - -def term_matches(text, forms_list, options): - """ Counts the number of occurences of the words in forms_list in the text - - The terms in forms_list can either be tokens or full terms. The - matching for tokens is contains and for full terms is equals. - """ - token_mode = options.get('token_mode', TM_TOKENS) - case_sensitive = options.get('case_sensitive', False) - first_term = forms_list[0] - if token_mode == TM_FULL_TERM: - return full_term_match(text, first_term, case_sensitive) - # In token_mode='all' we will match full terms using equals and - # tokens using contains - if token_mode == TM_ALL and len(forms_list) == 1: - if re.match(FULL_TERM_PATTERN, first_term): - return full_term_match(text, first_term, case_sensitive) - return term_matches_tokens(text, forms_list, case_sensitive) - - -def full_term_match(text, full_term, case_sensitive): - """Counts the match for full terms according to the case_sensitive option - - """ - if not case_sensitive: - text = text.lower() - full_term = full_term.lower() - return 1 if text == full_term else 0 - - -def get_tokens_flags(case_sensitive): - """Returns flags for regular expression matching depending on text analysis - options - - """ - flags = re.U - if not case_sensitive: - flags = (re.I | flags) - return flags - - -def term_matches_tokens(text, forms_list, case_sensitive): - """Counts the number of occurences of the words in forms_list in the text - - """ - flags = get_tokens_flags(case_sensitive) - expression = ur'(\b|_)%s(\b|_)' % '(\\b|_)|(\\b|_)'.join(forms_list) - pattern = re.compile(expression, flags=flags) - matches = re.findall(pattern, text) - return len(matches) - - -def item_matches(text, item, options): - """ Counts the number of occurences of the item in the text - - The matching considers the separator or - the separating regular expression. - """ - separator = options.get('separator', ' ') - regexp = options.get('separator_regexp') - if regexp is None: - regexp = ur"%s" % re.escape(separator) - return count_items_matches(text, item, regexp) - - -def count_items_matches(text, item, regexp): - """ Counts the number of occurences of the item in the text - - """ - expression = ur'(^|%s)%s($|%s)' % (regexp, item, regexp) - pattern = re.compile(expression, flags=re.U) - matches = re.findall(pattern, text) - return len(matches) - - - -class Predicate(object): +class Predicate(): """A predicate to be evaluated in a tree's node. - """ def __init__(self, operation, field, value, term=None): self.operator = operation self.missing = False + if self.operator.endswith("*"): self.operator = self.operator[0: -1] self.missing = True + elif operation == 'in' and None in value: + self.missing = True + self.field = field self.value = value self.term = term def is_full_term(self, fields): """Returns a boolean showing if a term is considered as a full_term - """ if self.term is not None: # new optype has to be handled in tokens @@ -158,7 +67,6 @@ def is_full_term(self, fields): def to_rule(self, fields, label='name', missing=None): """Builds rule string from a predicate - """ # externally forcing missing to True or False depending on the path if missing is None: @@ -166,106 +74,67 @@ def to_rule(self, fields, label='name', missing=None): if label is not None: name = fields[self.field][label] else: - name = u"" + name = "" full_term = self.is_full_term(fields) - relation_missing = u" or missing" if missing else u"" + relation_missing = " or missing" if missing else "" if self.term is not None: relation_suffix = '' if ((self.operator == '<' and self.value <= 1) or (self.operator == '<=' and self.value == 0)): - relation_literal = (u'is not equal to' if full_term - else u'does not contain') + relation_literal = ('is not equal to' if full_term + else 'does not contain') else: - relation_literal = u'is equal to' if full_term else u'contains' + relation_literal = 'is equal to' if full_term else 'contains' if not full_term: if self.operator != '>' or self.value != 0: relation_suffix = (RELATIONS[self.operator] % (self.value, plural('time', self.value))) - return u"%s %s %s %s%s" % (name, relation_literal, - self.term, relation_suffix, - relation_missing) + return "%s %s %s %s%s" % (name, relation_literal, + self.term, relation_suffix, + relation_missing) if self.value is None: - return u"%s %s" % (name, - u"is missing" if self.operator == '=' - else u"is not missing") - return u"%s %s %s%s" % (name, - self.operator, - self.value, - relation_missing) - - def to_LISP_rule(self, fields): - """To be deprecated. See to_lisp_rule - - """ - self.to_lisp_rule(fields) + return "%s %s" % (name, + "is missing" if self.operator == '=' + else "is not missing") + return "%s %s %s%s" % (name, + self.operator, + self.value, + relation_missing) def to_lisp_rule(self, fields): """Builds rule string in LISP from a predicate - """ if self.term is not None: if fields[self.field]['optype'] == 'text': options = fields[self.field]['term_analysis'] case_insensitive = not options.get('case_sensitive', False) - case_insensitive = u'true' if case_insensitive else u'false' + case_insensitive = 'true' if case_insensitive else 'false' language = options.get('language') - language = u"" if language is None else u" %s" % language - return u"(%s (occurrences (f %s) %s %s%s) %s)" % ( + language = "" if language is None else " %s" % language + return "(%s (occurrences (f %s) %s %s%s) %s)" % ( self.operator, self.field, self.term, case_insensitive, language, self.value) - elif fields[self.field]['optype'] == 'items': - return u"(%s (if (contains-items? %s %s) 1 0) %s)" % ( + + if fields[self.field]['optype'] == 'items': + return "(%s (if (contains-items? %s %s) 1 0) %s)" % ( self.operator, self.field, self.term, self.value) if self.value is None: - negation = u"" if self.operator == "=" else u"not " - return u"(%s missing? %s)" % (negation, self.field) - rule = u"(%s (f %s) %s)" % (self.operator, - self.field, - self.value) + negation = "" if self.operator == "=" else "not " + return "(%s missing? %s)" % (negation, self.field) + rule = "(%s (f %s) %s)" % (self.operator, + self.field, + self.value) if self.missing: - rule = u"(or (missing? %s) %s)" % (self.field, rule) + rule = "(or (missing? %s) %s)" % (self.field, rule) return rule def apply(self, input_data, fields): """Applies the operators defined in the predicate as strings to the provided input data - """ - # for missing operators - if input_data.get(self.field) is None: - # text and item fields will treat missing values by following the - # doesn't contain branch - if self.term is None: - return self.missing or ( - self.operator == '=' and self.value is None) - elif self.operator == '!=' and self.value is None: - return True - - if self.term is not None: - if fields[self.field]['optype'] == 'text': - all_forms = fields[self.field]['summary'].get('term_forms', {}) - term_forms = all_forms.get(self.term, []) - terms = [self.term] - terms.extend(term_forms) - options = fields[self.field]['term_analysis'] - return apply(OPERATOR[self.operator], - [term_matches(input_data.get(self.field, ""), - terms, options), - self.value]) - else: - # new items optype - options = fields[self.field]['item_analysis'] - return apply(OPERATOR[self.operator], - [item_matches(input_data.get(self.field, ""), - self.term, options), - self.value]) - if self.operator == "in": - return apply(OPERATOR[self.operator], - [self.value, - input_data[self.field]]) - return apply(OPERATOR[self.operator], - [input_data[self.field], - self.value]) + return apply_predicate(OPERATOR_CODE.get(self.operator), self.field, + self.value, self.term, self.missing, input_data, + fields[self.field]) diff --git a/bigml/predicate_utils/__init__.py b/bigml/predicate_utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bigml/predicate_utils/utils.py b/bigml/predicate_utils/utils.py new file mode 100644 index 00000000..7239d01e --- /dev/null +++ b/bigml/predicate_utils/utils.py @@ -0,0 +1,308 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +Common auxiliar functions to be used in the node predicate evaluation +""" +import operator +import re + +from bigml.util import plural + +# Operator Codes +LT = 0 +LE = 1 +EQ = 2 +NE = 3 +GE = 4 +GT = 5 +IN = 6 + +# Map operator string to its corresponding code +OPERATOR_CODE = {"<": LT, + "<=": LE, + "=": EQ, + "!=": NE, + "/=": NE, + ">=": GE, + ">": GT, + "in": IN} + +# Map operator code to its corresponding function +OPERATOR = [operator.lt, + operator.le, + operator.eq, + operator.ne, + operator.ge, + operator.gt, + operator.contains] + +INVERSE_OP = dict(zip(OPERATOR_CODE.values(), OPERATOR_CODE.keys())) + +RELATIONS = { + '<=': 'no more than %s %s', + '>=': '%s %s at most', + '>': 'more than %s %s', + '<': 'less than %s %s' +} + +TM_TOKENS = 'tokens_only' +TM_FULL_TERM = 'full_terms_only' +TM_ALL = 'all' +FULL_TERM_PATTERN = re.compile(r'^.+\b.+$', re.U) + +OPERATION_OFFSET = 2 +FIELD_OFFSET = 3 +VALUE_OFFSET = 4 +TERM_OFFSET = 5 +MISSING_OFFSET = 6 + +PREDICATE_INFO_LENGTH = 5 + + +def term_matches(text, forms_list, options): + """ Counts the number of occurences of the words in forms_list in the text + The terms in forms_list can either be tokens or full terms. The + matching for tokens is contains and for full terms is equals. + """ + token_mode = options.get('token_mode', TM_TOKENS) + case_sensitive = options.get('case_sensitive', False) + first_term = forms_list[0] + if token_mode == TM_FULL_TERM: + return full_term_match(text, first_term, case_sensitive) + + return term_matches_tokens(text, forms_list, case_sensitive) + + +def is_full_term(term, field): + """Returns a boolean showing if a term is considered as a full_term + """ + if term is not None: + # new optype has to be handled in tokens + if field['optype'] == 'items': + return False + options = field['term_analysis'] + token_mode = options.get('token_mode', TM_TOKENS) + if token_mode == TM_FULL_TERM: + return True + if token_mode == TM_ALL: + return re.match(FULL_TERM_PATTERN, term) + return False + + +def full_term_match(text, full_term, case_sensitive): + """Counts the match for full terms according to the case_sensitive option + """ + if not case_sensitive: + text = text.lower() + full_term = full_term.lower() + + return 1 if text == full_term else 0 + + +def get_tokens_flags(case_sensitive): + """Returns flags for regular expression matching depending on text analysis + options + """ + flags = re.U + if not case_sensitive: + flags = (re.I | flags) + + return flags + + +def term_matches_tokens(text, forms_list, case_sensitive): + """Counts the number of occurences of the words in forms_list in the text + """ + flags = get_tokens_flags(case_sensitive) + + expression = r'(\b|_)%s(\b|_)' % '(\\b|_)|(\\b|_)'.join([re.escape(term) \ + for term in forms_list]) + pattern = re.compile(expression, flags=flags) + matches = re.findall(pattern, text) + return len(matches) + + +def item_matches(text, item, options): + """Counts the number of occurences of the item in the text + The matching considers the separator or + the separating regular expression. + """ + separator = options.get('separator', ' ') + regexp = options.get('separator_regexp') + if regexp is None: + regexp = r"%s" % re.escape(separator) + + return count_items_matches(text, item, regexp) + + +def count_items_matches(text, item, regexp): + """Counts the number of occurences of the item in the text.""" + expression = r'(^|%s)%s($|%s)' % (regexp, re.escape(item), regexp) + pattern = re.compile(expression, flags=re.U) + matches = re.findall(pattern, text) + + return len(matches) + +def apply_predicates(node, input_data, fields, normalize_repeats=False): + """Evaluates the predicate for a particular input data.""" + shift = 1 if normalize_repeats else 0 + num_predicates = node[1 + shift] + + predicates_ok = 0 + + for i in range(num_predicates): + operation = node[OPERATION_OFFSET + (PREDICATE_INFO_LENGTH * i) + shift] + field = node[FIELD_OFFSET + (PREDICATE_INFO_LENGTH * i) + shift] + value = node[VALUE_OFFSET + (PREDICATE_INFO_LENGTH * i) + shift] + term = node[TERM_OFFSET + (PREDICATE_INFO_LENGTH * i) + shift] + missing = node[MISSING_OFFSET + (PREDICATE_INFO_LENGTH * i) + shift] + + predicate_ok = apply_predicate(operation, field, value, term, missing, + input_data, fields[field]) + if predicate_ok: + predicates_ok += 1 + + return predicates_ok + +def apply_predicate(operation, field, value, term, missing, input_data, + field_info): + """Applies the operators defined in the predicate as strings to + the provided input data + """ + # for missing operators + if input_data.get(field) is None: + # text and item fields will treat missing values by following the + # doesn't contain branch + if term is None: + return missing or ( + operation == EQ and value is None) + elif operation == NE and value is None: + return True + + if term is not None: + if field_info['optype'] == 'text': + all_forms = field_info['summary'].get('term_forms', {}) + term_forms = all_forms.get(term, []) + terms = [term] + terms.extend(term_forms) + options = field_info['term_analysis'] + input_terms = term_matches(input_data.get(field, ""), terms, + options) + return OPERATOR[operation](input_terms, value) + # new items optype + options = field_info['item_analysis'] + input_items = item_matches(input_data.get(field, ""), term, + options) + return OPERATOR[operation](input_items, value) + if operation == IN: + return OPERATOR[operation](value, input_data[field]) + return OPERATOR[operation](input_data[field], value) + + +def pack_predicate(predicate): + """Compacts the predicate condition + + """ + node = [] + if predicate and predicate is not True: + operation = predicate.get('operator') + value = predicate.get('value') + missing = False + if operation.endswith("*"): + operation = operation[0: -1] + missing = True + elif operation == 'in' and None in value: + missing = True + + node.append(OPERATOR_CODE.get(operation)) + node.append(predicate.get('field')) + node.append(value) + node.append(predicate.get('term')) + node.append(missing) + else: + node.append(True) + return node + + +def predicate_to_rule(operation, field_info, value, term, + missing, label='name'): + """Predicate condition string + + """ + # externally forcing missing to True or False depending on the path + if missing is None: + missing = False + if label is not None: + name = field_info[label] + else: + name = "" + operation = INVERSE_OP[operation] + full_term = is_full_term(term, field_info) + relation_missing = " or missing" if missing else "" + if term is not None: + relation_suffix = '' + if ((operation == '<' and value <= 1) or + (operation == '<=' and value == 0)): + relation_literal = ('is not equal to' if full_term + else 'does not contain') + else: + relation_literal = 'is equal to' if full_term else 'contains' + if not full_term: + if operation != '>' or value != 0: + relation_suffix = (RELATIONS[operation] % + (value, + plural('time', value))) + return "%s %s %s %s%s" % (name, relation_literal, + term, relation_suffix, + relation_missing) + if value is None: + return "%s %s" % (name, + "is missing" if operation == '=' + else "is not missing") + return "%s %s %s%s" % (name, + operation, + value, + relation_missing) + + +def to_lisp_rule(operation, field, value, term, + missing, field_info): + """Builds rule string in LISP from a predicate + + """ + if term is not None: + if field_info['optype'] == 'text': + options = field_info['term_analysis'] + case_insensitive = not options.get('case_sensitive', False) + case_insensitive = 'true' if case_insensitive else 'false' + language = options.get('language') + language = "" if language is None else " %s" % language + return "(%s (occurrences (f %s) %s %s%s) %s)" % ( + operation, field, term, + case_insensitive, language, value) + if field_info['optype'] == 'items': + return "(%s (if (contains-items? %s %s) 1 0) %s)" % ( + operation, field, term, value) + if value is None: + negation = "" if operation == "=" else "not " + return "(%s missing? %s)" % (negation, field) + rule = "(%s (f %s) %s)" % (operation, + field, + value) + if missing: + rule = "(or (missing? %s) %s)" % (field, rule) + return rule diff --git a/bigml/predicates.py b/bigml/predicates.py index d8e6848b..54537858 100644 --- a/bigml/predicates.py +++ b/bigml/predicates.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -23,7 +22,7 @@ """ from bigml.predicate import Predicate -class Predicates(object): +class Predicates(): """A list of predicates to be evaluated in an anomaly tree's node. """ @@ -53,6 +52,6 @@ def apply(self, input_data, fields): """ - return all([predicate.apply(input_data, fields) for - predicate in self.predicates - if isinstance(predicate, Predicate)]) + return all(predicate.apply(input_data, fields) for + predicate in self.predicates + if isinstance(predicate, Predicate)) diff --git a/bigml/predict_utils/__init__.py b/bigml/predict_utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bigml/predict_utils/boosting.py b/bigml/predict_utils/boosting.py new file mode 100644 index 00000000..1380e96d --- /dev/null +++ b/bigml/predict_utils/boosting.py @@ -0,0 +1,165 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Predict utilities for boosting models + +""" +from bigml.predict_utils.common import one_branch, \ + get_node, get_predicate, FIELD_OFFSET +from bigml.predicate_utils.utils import predicate_to_rule, apply_predicate, \ + pack_predicate +from bigml.prediction import Prediction + + +OFFSETS = { \ + "id": 0, + "output": 1, + "count": 2, + "g_sum": 3, + "h_sum": 4, + "children#": 5, + "children": 6} + + +def build_boosting_tree(node_dict, node=None, terms=None): + """Builds a compressed version of the tree structure as an list of + lists. Starting from the root node, that is represented by a list: + [#predicates, op-code, field, value, term, missing...] + + And each child is represented by a list whose elements are: + [id, output, count, g_sum, h_sum, + #children, children_nodes_list*] + """ + if terms is None: + terms = {} + predicate = node_dict.get('predicate', True) + outer = node if node else list(pack_predicate(predicate)) + children = node_dict.get("children", []) + outer.append(node_dict.get("id")) + outer.append(node_dict.get("output")) + outer.append(node_dict.get("count")) + outer.append(node_dict.get("g_sum")) + outer.append(node_dict.get("h_sum")) + outer.append(len(children)) + children_list = [] + for child in children: + predicate = child.get('predicate') + field = predicate.get("field") + if field not in terms: + terms[field] = [] + term = predicate.get("term") + if term not in terms[field]: + terms[field].append(term) + inner = pack_predicate(predicate) + build_boosting_tree(child, node=inner, terms=terms) + children_list.append(inner) + if children_list: + outer.append(children_list) + + return outer + + +#pylint: disable=locally-disabled,inconsistent-return-statements +def boosting_proportional_predict(tree, fields, input_data, path=None, + missing_found=False): + """Makes a prediction based on a number of field values considering all + the predictions of the leaves that fall in a subtree. + + Each time a splitting field has no value assigned, we consider + both branches of the split to be true, merging their + predictions. The function returns the merged distribution and the + last node reached by a unique path. + + """ + + if path is None: + path = [] + + node = get_node(tree) + children_number = node[OFFSETS["children#"]] + children = [] if children_number == 0 else node[OFFSETS["children"]] + g_sum = node[OFFSETS["g_sum"]] + h_sum = node[OFFSETS["h_sum"]] + count = node[OFFSETS["count"]] + + if not children: + return (g_sum, h_sum, count, path) + if one_branch(children, input_data) or \ + fields[children[0][FIELD_OFFSET]]["optype"] in \ + ["text", "items"]: + for child in children: + [operator, field, value, term, missing] = get_predicate(child) + if apply_predicate(operator, field, value, term, missing, + input_data, fields[field]): + new_rule = predicate_to_rule(operator, fields[field], value, + term, missing) + if new_rule not in path and not missing_found: + path.append(new_rule) + return boosting_proportional_predict( \ + child, fields, + input_data, path, missing_found) + else: + # missing value found, the unique path stops + missing_found = True + g_sums = 0.0 + h_sums = 0.0 + population = 0 + for child in children: + g_sum, h_sum, count, _ = \ + boosting_proportional_predict( \ + child, fields, input_data, + path, missing_found) + g_sums += g_sum + h_sums += h_sum + population += count + return (g_sums, h_sums, population, path) + + +def boosting_last_predict(tree, fields, input_data, path=None): + """Predict function for boosting and last prediction strategy + + """ + + if path is None: + path = [] + node = get_node(tree) + + children_number = node[OFFSETS["children#"]] + children = [] if children_number == 0 else node[OFFSETS["children"]] + count = node[OFFSETS["count"]] + + if children: + for child in children: + [operator, field, value, term, missing] = get_predicate(child) + if apply_predicate(operator, field, value, term, missing, + input_data, fields[field]): + path.append(predicate_to_rule(operator, fields[field], + value, term, missing)) + return boosting_last_predict( \ + child, fields, \ + input_data, path=path) + + return Prediction( + node[OFFSETS["output"]], + path, + None, + distribution=None, + count=count, + median=None, + distribution_unit=None, + children=children, + d_min=None, + d_max=None) diff --git a/bigml/predict_utils/classification.py b/bigml/predict_utils/classification.py new file mode 100644 index 00000000..862b32c7 --- /dev/null +++ b/bigml/predict_utils/classification.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Predict utilities for classifications + +""" +from bigml.predict_utils.common import last_prediction_predict, \ + proportional_predict, extract_distribution +from bigml.predicate_utils.utils import pack_predicate +from bigml.prediction import Prediction +from bigml.multivote import ws_confidence + + +OFFSETS = { \ + "False": {"id": 0, + "output": 1, + "count": 2, + "confidence": 3, + "distribution": 4, + "children#": 5, + "children": 6}, + "True": {"id": 0, + "output": 1, + "count": 2, + "confidence": 3, + "distribution": 4, + "wdistribution": 5, + "weight": 6, + "children#": 7, + "children": 8}} + + +def build_classification_tree(node_dict, node=None, distribution=None, + weighted=False, terms=None): + """Builds a compressed version of the tree structure as an list of + lists. Starting from the root node, that is represented by a list: + [weight, #predicates, op-code, field, value, term, missing...] + + And each child is represented by a list whose elements are: + [children#, id, output, count, confidence, output, distribution, + distribution_unit, + wdistribution, wdistribution_unit, children_nodes_list*] + """ + if terms is None: + terms = {} + predicate = node_dict.get('predicate', True) + outer = node if node else list(pack_predicate(predicate)) + outer.append(node_dict.get("id")) + outer.append(node_dict.get("output")) + outer.append(node_dict.get("count")) + outer.append(node_dict.get("confidence")) + distribution = distribution if distribution is not None else \ + node_dict.get("objective_summary") + _, distribution = extract_distribution(distribution) + outer.append(distribution) + if weighted: + _, wdistribution = extract_distribution( \ + node_dict.get("weighted_objective_summary")) + outer.append(wdistribution) + outer.append(node_dict.get("weight")) + children = node_dict.get("children", []) + outer.append(len(children)) + children_list = [] + for child in children: + predicate = child.get('predicate') + field = predicate.get("field") + if field not in terms: + terms[field] = [] + term = predicate.get("term") + if term not in terms[field]: + terms[field].append(term) + inner = pack_predicate(predicate) + build_classification_tree(child, node=inner, weighted=weighted, + terms=terms) + children_list.append(inner) + if children_list: + outer.append(children_list) + return outer + + +def classification_proportional_predict(tree, weighted, fields, input_data): + """Prediction for classification using proportional strategy + + """ + offset = OFFSETS[str(weighted)] + (final_distribution, _, _, last_node, population, + _, path) = proportional_predict( \ + tree, offset, fields, input_data, path=None) + + distribution = [list(element) for element in + sorted(list(final_distribution.items()), + key=lambda x: (-x[1], x[0]))] + return Prediction( \ + distribution[0][0], + path, + ws_confidence(distribution[0][0], final_distribution, + ws_n=population), + distribution, + population, + None, + 'categories', + [] if last_node[OFFSETS[str(weighted)]["children#"]] == 0 else \ + last_node[OFFSETS[str(weighted)]["children"]]) + + +def classification_last_predict(tree, weighted, fields, input_data): + """Predict for classification and last prediction missing strategy + + """ + return last_prediction_predict(tree, OFFSETS[str(weighted)], fields, + input_data) diff --git a/bigml/predict_utils/common.py b/bigml/predict_utils/common.py new file mode 100644 index 00000000..6b967f52 --- /dev/null +++ b/bigml/predict_utils/common.py @@ -0,0 +1,215 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Common predict utilities + +""" +from bigml.predicate_utils.utils import apply_predicate, predicate_to_rule +from bigml.prediction import Prediction + +from bigml.multivote import merge_distributions + +OPERATION_OFFSET = 0 +FIELD_OFFSET = 1 +VALUE_OFFSET = 2 +TERM_OFFSET = 3 +MISSING_OFFSET = 4 + +PREDICATE_INFO_LENGTH = 5 + +DISTRIBUTION_GROUPS = ['bins', 'counts', 'categories'] + + +def mintree_split(children): + """Returns the field ID for the split + + """ + return children[0][FIELD_OFFSET] + + +def one_branch(children, input_data): + """Check if there's only one branch to be followed + + """ + no_missing = mintree_split(children) in input_data + return (no_missing or missing_branch(children) + or none_value(children)) + + +def missing_branch(children): + """Checks if the missing values are assigned to a special branch + + """ + return any(child[MISSING_OFFSET] for child in children) + + +def none_value(children): + """Checks if the predicate has a None value + + """ + return any(child[VALUE_OFFSET] is None for child in children) + + +def extract_distribution(summary): + """Extracts the distribution info from the objective_summary structure + in any of its grouping units: bins, counts or categories + + """ + for group in DISTRIBUTION_GROUPS: + if group in summary: + return group, summary.get(group) + return None, [] + + +def last_prediction_predict(tree, offsets, fields, input_data, path=None): + """ Predictions for last prediction missing strategy + + """ + + if path is None: + path = [] + + node = get_node(tree) + + children_number = node[offsets["children#"]] + children = [] if children_number == 0 else node[offsets["children"]] + + for child in children: + [operator, field, value, term, missing] = get_predicate(child) + if apply_predicate(operator, field, value, term, missing, + input_data, fields[field]): + new_rule = predicate_to_rule(operator, fields[field], value, + term, missing) + path.append(new_rule) + return last_prediction_predict(child, + offsets, fields, + input_data, path=path) + + if "wdistribution" in offsets: + output_distribution = node[offsets["wdistribution"]] + output_unit = 'categories' if "distribution_unit" not in offsets else \ + node[offsets["wdistribution_unit"]] + else: + output_distribution = node[offsets["distribution"]] + output_unit = 'categories' if "distribution_unit" not in offsets else \ + node[offsets["distribution_unit"]] + + return Prediction( \ + node[offsets["output"]], + path, + node[offsets["confidence"]], + distribution=output_distribution, + count=node[offsets["count"]], + median=None if offsets.get("median") is None else \ + node[offsets["median"]], + distribution_unit=output_unit, + children=[] if node[offsets["children#"]] == 0 else \ + node[offsets["children"]], + d_min=None if offsets.get("min") is None else \ + node[offsets["min"]], + d_max=None if offsets.get("max") is None else \ + node[offsets["max"]]) + + +#pylint: disable=locally-disabled,inconsistent-return-statements +def proportional_predict(tree, offsets, fields, input_data, path=None, + missing_found=False, median=False, parent=None): + """Makes a prediction based on a number of field values averaging + the predictions of the leaves that fall in a subtree. + + Each time a splitting field has no value assigned, we consider + both branches of the split to be true, merging their + predictions. The function returns the merged distribution and the + last node reached by a unique path. + + """ + + if path is None: + path = [] + + node = get_node(tree) + + final_distribution = {} + children_number = node[offsets["children#"]] + if "wdistribution" in offsets: + distribution = node[offsets["wdistribution"]] + else: + distribution = node[offsets["distribution"]] + children = [] if children_number == 0 else node[offsets["children"]] + t_min = None if offsets.get("min") is None else node[offsets["min"]] + t_max = None if offsets.get("max") is None else node[offsets["max"]] + count = node[offsets["count"]] + + if children_number == 0: + return (merge_distributions({}, dict((x[0], x[1]) + for x in distribution)), + t_min, t_max, node, count, parent, path) + if one_branch(children, input_data) or \ + fields[children[0][FIELD_OFFSET]]["optype"] in \ + ["text", "items"]: + for child in children: + [operator, field, value, term, missing] = get_predicate(child) + if apply_predicate(operator, field, value, term, missing, + input_data, fields[field]): + new_rule = predicate_to_rule(operator, fields[field], value, + term, missing) + if new_rule not in path and not missing_found: + path.append(new_rule) + return proportional_predict( \ + child, offsets, fields, + input_data, path, + missing_found, median, parent=node) + else: + # missing value found, the unique path stops + missing_found = True + minimums = [] + maximums = [] + population = 0 + for child in children: + (subtree_distribution, subtree_min, + subtree_max, _, subtree_pop, _, path) = \ + proportional_predict( \ + child, offsets, fields, + input_data, path, missing_found, median, parent=node) + if subtree_min is not None: + minimums.append(subtree_min) + if subtree_max is not None: + maximums.append(subtree_max) + population += subtree_pop + final_distribution = merge_distributions( + final_distribution, subtree_distribution) + return (final_distribution, + min(minimums) if minimums else None, + max(maximums) if maximums else None, node, population, + parent, path) + + +def get_node(tree): + """Extracts the properties of the node + + """ + if isinstance(tree[0], bool) and tree[0]: # predicate is True + return tree[1:] + return tree[PREDICATE_INFO_LENGTH:] + + +def get_predicate(tree): + """Extracts the predicate for the node + + """ + if isinstance(tree[0], bool) and tree[0]: + return True + return tree[0: PREDICATE_INFO_LENGTH] diff --git a/bigml/predict_utils/regression.py b/bigml/predict_utils/regression.py new file mode 100644 index 00000000..4c291f05 --- /dev/null +++ b/bigml/predict_utils/regression.py @@ -0,0 +1,269 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Predict utilities for regressions + +""" +import numbers +import math + + +from scipy import stats + +from bigml.predict_utils.common import last_prediction_predict, \ + proportional_predict, extract_distribution +from bigml.predicate_utils.utils import pack_predicate +from bigml.util import PRECISION +from bigml.prediction import Prediction +from bigml.multivote import BINS_LIMIT, merge_bins + + +OFFSETS = { \ + "False": {"id": 0, + "output": 1, + "count": 2, + "confidence": 3, + "distribution": 4, + "distribution_unit": 5, + "max_bins": 6, + "max": 7, + "min": 8, + "median": 9, + "children#": 10, + "children": 11}, + "True": {"id": 0, + "output": 1, + "count": 2, + "confidence": 3, + "distribution": 4, + "distribution_unit": 5, + "max_bins": 6, + "max": 7, + "min": 8, + "median": 9, + "wdistribution": 10, + "wdistribution_unit": 11, + "weight": 12, + "children#": 13, + "children": 14}} + + +def dist_median(distribution, count): + """Returns the median value for a distribution + + """ + counter = 0 + previous_value = None + for value, instances in distribution: + counter += instances + if counter > count / 2.0: + if (not count % 2 and (counter - 1) == (count / 2) and + previous_value is not None): + return (value + previous_value) / 2.0 + return value + previous_value = value + return None + + +def mean(distribution): + """Computes the mean of a distribution in the [[point, instances]] syntax + + """ + addition = 0.0 + count = 0.0 + for point, instances in distribution: + addition += point * instances + count += instances + if count > 0: + return addition / count + return float('nan') + + +def unbiased_sample_variance(distribution, distribution_mean=None): + """Computes the standard deviation of a distribution in the + [[point, instances]] syntax + + """ + addition = 0.0 + count = 0.0 + if (distribution_mean is None or not + isinstance(distribution_mean, numbers.Number)): + distribution_mean = mean(distribution) + for point, instances in distribution: + addition += ((point - distribution_mean) ** 2) * instances + count += instances + if count > 1: + return addition / (count - 1) + return float('nan') + + +def regression_error(distribution_variance, population, r_z=1.96): + """Computes the variance error + + """ + if population > 0: + chi_distribution = stats.chi2(population) + ppf = chi_distribution.ppf(1 - math.erf(r_z / math.sqrt(2))) + if ppf != 0: + error = distribution_variance * (population - 1) / ppf + error = error * ((math.sqrt(population) + r_z) ** 2) + return math.sqrt(error / population) + return float('nan') + + +def build_regression_tree(node_dict, node=None, distribution=None, + weighted=False, terms=None): + """Builds a compressed version of the tree structure as an list of + lists. Starting from the root node, that is represented by a list: + [weight, #predicates, op-code, field, value, term, missing...] + + And each child is represented by a list whose elements are: + [#children, id, output, count, confidence, output, distribution, + distribution_unit, max_bins, max. min, median, + wdistribution, wdistribution_unit, children_nodes_list*] + """ + if terms is None: + terms = {} + predicate = node_dict.get('predicate', True) + outer = node if node else list(pack_predicate(predicate)) + outer.append(node_dict.get("id")) + outer.append(node_dict.get("output")) + outer.append(node_dict.get("count")) + outer.append(node_dict.get("confidence")) + distribution = distribution if distribution is not None else \ + node_dict.get("objective_summary") + distribution_unit, distribution = extract_distribution(distribution) + outer.append(distribution) + outer.append(distribution_unit) + node_median = None + summary = node_dict.get("summary", {}) + if "summary" in node_dict: + node_median = summary.get('median') + if not node_median: + node_median = dist_median(distribution, node_dict.get("count")) + node_max = summary.get('maximum') or \ + max([value for [value, _] in distribution]) + node_min = summary.get('minimum') or \ + min([value for [value, _] in distribution]) + node_max_bins = max(node_dict.get('max_bins', 0), + len(distribution)) + outer.append(node_max_bins) + outer.append(node_max) + outer.append(node_min) + outer.append(node_median) + if weighted: + wdistribution_unit, wdistribution = extract_distribution( \ + node_dict.get("weighted_objective_summary")) + outer.append(wdistribution) + outer.append(wdistribution_unit) + outer.append(node_dict.get("weight")) + children = node_dict.get("children", []) + outer.append(len(children)) + children_list = [] + for child in children: + predicate = child.get('predicate') + field = predicate.get("field") + if field not in terms: + terms[field] = [] + term = predicate.get("term") + if term not in terms[field]: + terms[field].append(term) + inner = pack_predicate(predicate) + build_regression_tree(child, node=inner, weighted=weighted, terms=terms) + children_list.append(inner) + if children_list: + outer.append(children_list) + + return outer + + +def regression_proportional_predict(tree, weighted, fields, input_data): + """Proportional prediction for regressions + + """ + + offset = OFFSETS[str(weighted)] + (final_distribution, d_min, d_max, last_node, population, + parent_node, path) = proportional_predict( \ + tree, offset, fields, input_data, path=None) + # singular case: + # when the prediction is the one given in a 1-instance node + if len(list(final_distribution.items())) == 1: + prediction, instances = list(final_distribution.items())[0] + if instances == 1: + return Prediction( \ + last_node[offset["output"]], + path, + last_node[offset["confidence"]], + distribution=last_node[offset["distribution"]] \ + if not weighted else \ + last_node[offset["wdistribution"]], + count=instances, + median=last_node[offset["median"]], + distribution_unit=last_node[offset["distribution_unit"]], + children=[] if last_node[offset["children#"]] == 0 else \ + last_node[offset["children"]], + d_min=last_node[offset["min"]], + d_max=last_node[offset["max"]]) + # when there's more instances, sort elements by their mean + distribution = [list(element) for element in + sorted(list(final_distribution.items()), + key=lambda x: x[0])] + distribution_unit = ('bins' if len(distribution) > BINS_LIMIT + else 'counts') + distribution = merge_bins(distribution, BINS_LIMIT) + total_instances = sum([instances + for _, instances in distribution]) + if len(distribution) == 1: + # where there's only one bin, there will be no error, but + # we use a correction derived from the parent's error + prediction = distribution[0][0] + if total_instances < 2: + total_instances = 1 + try: + # some strange models can have nodes with no confidence + confidence = round(parent_node[offset["confidence"]] / + math.sqrt(total_instances), + PRECISION) + except AttributeError: + confidence = None + else: + prediction = mean(distribution) + # weighted trees use the unweighted population to + # compute the associated error + confidence = round(regression_error( + unbiased_sample_variance(distribution, prediction), + population), PRECISION) + return Prediction( \ + prediction, + path, + confidence, + distribution=distribution, + count=total_instances, + median=dist_median(distribution, total_instances), + distribution_unit=distribution_unit, + children=[] if last_node[offset["children#"]] == 0 else \ + last_node[offset["children"]], + d_min=d_min, + d_max=d_max) + + +def regression_last_predict(tree, weighted, fields, input_data): + """Predict for regression and last prediction missing strategy + + """ + return last_prediction_predict(tree, OFFSETS[str(weighted)], fields, + input_data) diff --git a/bigml/prediction.py b/bigml/prediction.py index bb36ccdd..19327510 100644 --- a/bigml/prediction.py +++ b/bigml/prediction.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -21,7 +20,7 @@ Tree module to store all the available prediction info. """ -class Prediction(object): +class Prediction(): """A Prediction object containing the predicted Node info or the subtree grouped prediction info for proportional missing strategy diff --git a/bigml/shapwrapper.py b/bigml/shapwrapper.py new file mode 100644 index 00000000..65586ca2 --- /dev/null +++ b/bigml/shapwrapper.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- +# pylint: disable=super-init-not-called +# +# Copyright 2023-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A wrapper for models to produce predictions as expected by Shap Explainer + +""" +import numpy as np + +from bigml.supervised import SupervisedModel, extract_id +from bigml.fusion import Fusion +from bigml.fields import Fields +from bigml.api import get_resource_type, get_api_connection + + +class ShapWrapper(): + """ A lightweight wrapper around any supervised model that offers a + predict method adapted to the expected Shap Explainer syntax""" + + def __init__(self, model, api=None, cache_get=None, + operation_settings=None): + + self.api = get_api_connection(api) + resource_id, model = extract_id(model, self.api) + resource_type = get_resource_type(resource_id) + model_class = Fusion if resource_type == "fusion" else SupervisedModel + self.local_model = model_class(model, api=api, cache_get=cache_get, + operation_settings=operation_settings) + objective_id = getattr(self.local_model, "objective_id", None) + self.fields = Fields(self.local_model.fields, + objective_field=objective_id) + self.objective_categories = self.local_model.objective_categories + self.x_headers = [self.fields.field_name(field_id) for field_id in + self.fields.sorted_field_ids()] + self.y_header = self.fields.field_name(self.fields.objective_field) + + def predict(self, x_test, **kwargs): + """Prediction method that interfaces with the Shap library""" + input_data_list = self.fields.from_numpy(x_test) + batch_prediction = self.local_model.batch_predict( + input_data_list, outputs={"output_fields": ["prediction"], + "output_headers": [self.y_header]}, + all_fields=False, **kwargs) + objective_field = self.fields.objective_field_info() + pred_fields = Fields(objective_field) + return pred_fields.to_numpy(batch_prediction, + objective=True).reshape(-1) + + def predict_proba(self, x_test): + """Prediction method that interfaces with the Shap library""" + if self.local_model.regression: + raise ValueError("This method is only available for classification" + " models.") + input_data_list = self.fields.from_numpy(x_test) + np_list = np.empty(shape=(len(input_data_list), + len(self.objective_categories))) + for index, input_data in enumerate(input_data_list): + prediction = self.local_model.predict_probability( + input_data, compact=True) + np_list[index] = np.asarray([prediction]) + return np_list diff --git a/bigml/sourcehandler.py b/bigml/sourcehandler.py deleted file mode 100644 index e51af865..00000000 --- a/bigml/sourcehandler.py +++ /dev/null @@ -1,488 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2014-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -"""Base class for sources' REST calls - - https://bigml.com/api/sources - -""" - -import sys -import os -import urllib2 -import numbers -try: - #added to allow GAE to work - from google.appengine.api import urlfetch - GAE_ENABLED = True -except ImportError: - GAE_ENABLED = False - import ssl - -try: - import simplejson as json -except ImportError: - import json - - -from threading import Thread - - -PYTHON_2_7_9 = len(urllib2.urlopen.__defaults__) > 2 -PYTHON_2 = sys.version_info < (3, 0) - -if PYTHON_2: - from poster.encode import multipart_encode, MultipartParam -if PYTHON_2_7_9: - from bigml.sslposter import StreamingHTTPSHandler, register_openers -elif PYTHON_2: - from poster.streaminghttp import StreamingHTTPSHandler, register_openers -else: - from requests_toolbelt import MultipartEncoder - import mimetypes - -from bigml.util import (localize, clear_console_line, reset_console_line, - console_log, is_url) -from bigml.bigmlconnection import ( - HTTP_CREATED, HTTP_ACCEPTED, HTTP_BAD_REQUEST, - HTTP_UNAUTHORIZED, HTTP_PAYMENT_REQUIRED, HTTP_NOT_FOUND, - HTTP_TOO_MANY_REQUESTS, HTTP_FORBIDDEN, - HTTP_INTERNAL_SERVER_ERROR, GAE_ENABLED, SEND_JSON) -from bigml.bigmlconnection import json_load -from bigml.resourcehandler import (check_resource_type, - resource_is_ready, - get_source_id) -from bigml.constants import SOURCE_PATH, UPLOADING -from bigml.resourcehandler import ResourceHandler, LOGGER - -if PYTHON_2: - register_openers() -else: - import requests - from bigml.util import maybe_save - -class SourceHandler(ResourceHandler): - - """This class is used by the BigML class as - a mixin that provides the REST calls to sources. It should not - be instantiated independently. - - """ - - def __init__(self): - """Initializes the SourceHandler. This class is intended to be - used as a mixin on ResourceHandler, that inherits its - attributes and basic method from BigMLConnection, and must not be - instantiated independently. - - """ - self.source_url = self.url + SOURCE_PATH - - def _create_remote_source(self, url, args=None): - """Creates a new source using a URL - - """ - create_args = {} - if args is not None: - create_args.update(args) - create_args.update({"remote": url}) - create_args = self._add_project(create_args) - body = json.dumps(create_args) - return self._create(self.source_url, body) - - def _create_inline_source(self, src_obj, args=None): - """Create source from inline data - - The src_obj data should be a list of rows stored as dict or - list objects. - """ - create_args = {} - if args is not None: - create_args.update(args) - create_args = self._add_project(create_args) - - # some basic validation - if (not isinstance(src_obj, list) or ( - not all([isinstance(row, dict) for row in src_obj]) and - not all([isinstance(row, list) for row in src_obj]))): - raise TypeError( - 'ERROR: inline source must be a list of dicts or a ' - 'list of lists') - - create_args.update({"data": json.dumps(src_obj)}) - body = json.dumps(create_args) - return self._create(self.source_url, body) - - def _upload_source(self, args, source, out=sys.stdout): - """Uploads a source asynchronously. - - """ - - def update_progress(param, current, total): - """Updates source's progress. - - """ - progress = round(current * 1.0 / total, 2) - if progress < 1.0: - source['object']['status']['progress'] = progress - - resource = self._process_source(source['resource'], source['location'], - source['object'], - args=args, progress_bar=True, - callback=update_progress, out=out) - source['code'] = resource['code'] - source['resource'] = resource['resource'] - source['location'] = resource['location'] - source['object'] = resource['object'] - source['error'] = resource['error'] - - def _stream_source(self, file_name, args=None, async_load=False, - progress_bar=False, out=sys.stdout): - """Creates a new source. - - """ - - def draw_progress_bar(param, current, total): - """Draws a text based progress report. - - """ - pct = 100 - ((total - current) * 100) / (total) - console_log("Uploaded %s out of %s bytes [%s%%]" % ( - localize(current), localize(total), pct), reset=True) - create_args = {} - if args is not None: - create_args.update(args) - if 'source_parser' in create_args: - create_args['source_parser'] = json.dumps( - create_args['source_parser']) - - resource_id = None - location = None - resource = None - error = None - - try: - if isinstance(file_name, basestring): - create_args.update({os.path.basename(file_name): - open(file_name, "rb")}) - else: - create_args = create_args.items() - name = 'Stdin input' - create_args.append(MultipartParam(name, filename=name, - fileobj=file_name)) - except IOError, exception: - raise IOError("Error: cannot read training set. %s" % - str(exception)) - - if async_load: - source = { - 'code': HTTP_ACCEPTED, - 'resource': resource_id, - 'location': location, - 'object': {'status': {'message': 'The upload is in progress', - 'code': UPLOADING, - 'progress': 0.0}}, - 'error': error} - upload_args = (create_args, source) - thread = Thread(target=self._upload_source, - args=upload_args, - kwargs={'out': out}) - thread.start() - return source - return self._process_source(resource_id, location, resource, - args=create_args, - progress_bar=progress_bar, - callback=draw_progress_bar, out=out) - - def _process_source(self, resource_id, location, resource, - args=None, progress_bar=False, callback=None, - out=sys.stdout): - """Creates a new source. - - """ - code = HTTP_INTERNAL_SERVER_ERROR - error = { - "status": { - "code": code, - "message": "The resource couldn't be created"}} - - if args is None: - args = {} - args = self._add_project(args, True) - - if progress_bar and callback is not None: - body, headers = multipart_encode(args, cb=callback) - else: - body, headers = multipart_encode(args) - - url = self._add_credentials(self.source_url) - - if GAE_ENABLED: - try: - response = urlfetch.fetch(url=url, - payload="".join(body), - method=urlfetch.POST, - headers=headers) - code = response.status_code - content = response.content - if code in [HTTP_CREATED]: - if 'location' in response.headers: - location = response.headers['location'] - resource = json_load(response.content) - resource_id = resource['resource'] - error = {} - elif code in [HTTP_BAD_REQUEST, - HTTP_UNAUTHORIZED, - HTTP_PAYMENT_REQUIRED, - HTTP_FORBIDDEN, - HTTP_NOT_FOUND, - HTTP_TOO_MANY_REQUESTS]: - error = json_load(response.content) - LOGGER.error(self.error_message(error, method='create')) - elif code != HTTP_ACCEPTED: - LOGGER.error("Unexpected error (%s)", code) - code = HTTP_INTERNAL_SERVER_ERROR - except urlfetch.Error, exception: - LOGGER.error("Error establishing connection: %s", - str(exception)) - else: - try: - request = urllib2.Request(url, - body, headers) - # try using the new SSL checking in python 2.7.9 - try: - if not self.verify and PYTHON_2_7_9: - context = ssl.create_default_context( - ssl.Purpose.CLIENT_AUTH) - context.verify_mode = ssl.CERT_NONE - https_handler = StreamingHTTPSHandler(context=context) - opener = urllib2.build_opener(https_handler) - urllib2.install_opener(opener) - response = urllib2.urlopen(request) - else: - response = urllib2.urlopen(request) - except AttributeError: - response = urllib2.urlopen(request) - clear_console_line(out=out) - reset_console_line(out=out) - code = response.getcode() - if code == HTTP_CREATED: - location = response.headers['location'] - content = response.read() - resource = json_load(content) - resource_id = resource['resource'] - error = {} - except ValueError: - LOGGER.error("Malformed response.") - except urllib2.HTTPError, exception: - code = exception.code - if code in [HTTP_BAD_REQUEST, - HTTP_UNAUTHORIZED, - HTTP_PAYMENT_REQUIRED, - HTTP_NOT_FOUND, - HTTP_TOO_MANY_REQUESTS]: - content = exception.read() - error = json_load(content) - LOGGER.error(self.error_message(error, method='create')) - else: - LOGGER.error("Unexpected error (%s)", code) - code = HTTP_INTERNAL_SERVER_ERROR - - except urllib2.URLError, exception: - LOGGER.error("Error establishing connection: %s", - str(exception)) - error = exception.args - return { - 'code': code, - 'resource': resource_id, - 'location': location, - 'object': resource, - 'error': error} - - def _create_local_source(self, file_name, args=None): - """Creates a new source using a local file. - - This function is only used from Python 3. No async-prepared. - - """ - create_args = {} - if args is not None: - create_args.update(args) - - for key, value in create_args.items(): - if value is not None and (isinstance(value, list) or - isinstance(value, dict)): - create_args[key] = json.dumps(value) - elif value is not None and isinstance(value, numbers.Number): - # the multipart encoder only accepts strings and files - create_args[key] = str(value) - - - code = HTTP_INTERNAL_SERVER_ERROR - resource_id = None - location = None - resource = None - error = { - "status": { - "code": code, - "message": "The resource couldn't be created"}} - - try: - - if isinstance(file_name, basestring): - name = os.path.basename(file_name) - file_handler = open(file_name, "rb") - else: - name = 'Stdin input' - file_handler = file_name - except IOError: - sys.exit("ERROR: cannot read training set") - - url = self._add_credentials(self.source_url) - create_args = self._add_project(create_args, True) - if GAE_ENABLED: - try: - req_options = { - 'url': url, - 'method': urlfetch.POST, - 'headers': SEND_JSON, - 'data': create_args, - 'files': {name: file_handler}, - 'validate_certificate': self.verify - } - response = urlfetch.fetch(**req_options) - except urlfetch.Error, exception: - LOGGER.error("HTTP request error: %s", - str(exception)) - return maybe_save(resource_id, self.storage, code, - location, resource, error) - else: - try: - files = {"file": (name, - file_handler, - mimetypes.guess_type(name)[0])} - files.update(create_args) - multipart = MultipartEncoder(fields=files) - response = requests.post( \ - url, - headers={'Content-Type': multipart.content_type}, - data=multipart, verify=self.verify) - except (requests.ConnectionError, - requests.Timeout, - requests.RequestException), exc: - LOGGER.error("HTTP request error: %s", str(exc)) - code = HTTP_INTERNAL_SERVER_ERROR - return maybe_save(resource_id, self.storage, code, - location, resource, error) - try: - code = response.status_code - if code == HTTP_CREATED: - location = response.headers['location'] - resource = json_load(response.content) - resource_id = resource['resource'] - error = None - elif code in [HTTP_BAD_REQUEST, - HTTP_UNAUTHORIZED, - HTTP_PAYMENT_REQUIRED, - HTTP_NOT_FOUND, - HTTP_TOO_MANY_REQUESTS]: - error = json_load(response.content) - else: - LOGGER.error("Unexpected error (%s)" % code) - code = HTTP_INTERNAL_SERVER_ERROR - - except ValueError: - LOGGER.error("Malformed response") - - return maybe_save(resource_id, self.storage, code, - location, resource, error) - - def create_source(self, path=None, args=None, async_load=False, - progress_bar=False, out=sys.stdout): - """Creates a new source. - - The source can be a local file path or a URL. - - """ - - if path is None: - raise Exception('A local path or a valid URL must be provided.') - - if is_url(path): - return self._create_remote_source(path, args=args) - elif isinstance(path, list): - return self._create_inline_source(path, args=args) - elif PYTHON_2: - return self._stream_source(file_name=path, args=args, - async_load=async_load, - progress_bar=progress_bar, out=out) - else: - return self._create_local_source(file_name=path, args=args) - - def get_source(self, source, query_string=''): - """Retrieves a remote source. - The source parameter should be a string containing the - source id or the dict returned by create_source. - As source is an evolving object that is processed - until it reaches the FINISHED or FAULTY state, thet function will - return a dict that encloses the source values and state info - available at the time it is called. - """ - check_resource_type(source, SOURCE_PATH, - message="A source id is needed.") - source_id = get_source_id(source) - if source_id: - return self._get("%s%s" % (self.url, source_id), - query_string=query_string) - - def source_is_ready(self, source): - """Checks whether a source' status is FINISHED. - - """ - check_resource_type(source, SOURCE_PATH, - message="A source id is needed.") - source = self.get_source(source) - return resource_is_ready(source) - - def list_sources(self, query_string=''): - """Lists all your remote sources. - - """ - return self._list(self.source_url, query_string) - - def update_source(self, source, changes): - """Updates a source. - - Updates remote `source` with `changes'. - - """ - check_resource_type(source, SOURCE_PATH, - message="A source id is needed.") - source_id = get_source_id(source) - if source_id: - body = json.dumps(changes) - return self._update("%s%s" % (self.url, source_id), body) - - def delete_source(self, source): - """Deletes a remote source permanently. - - """ - check_resource_type(source, SOURCE_PATH, - message="A source id is needed.") - source_id = get_source_id(source) - if source_id: - return self._delete("%s%s" % (self.url, source_id)) diff --git a/bigml/sslposter.py b/bigml/sslposter.py deleted file mode 100644 index f110e72b..00000000 --- a/bigml/sslposter.py +++ /dev/null @@ -1,70 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2014-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -"""Provisional patch for the poster library HTTPS streamer class - - Waiting for the poster update to match python 2.7.9 specifications -""" - -import urllib2 -import httplib - -from poster.streaminghttp import (StreamingHTTPSHandler, - StreamingHTTPHandler, - StreamingHTTPRedirectHandler, - StreamingHTTPSConnection) - - -if hasattr(httplib, 'HTTPS'): - class StreamingHTTPSHandler(urllib2.HTTPSHandler): - """Subclass of `urllib2.HTTPSHandler` that uses - StreamingHTTPSConnection as its http connection class.""" - - handler_order = urllib2.HTTPSHandler.handler_order - 1 - - def https_open(self, req): - return self.do_open(StreamingHTTPSConnection, req, - context=self._context) - - def https_request(self, req): - # Make sure that if we're using an iterable object as the request - # body, that we've also specified Content-Length - if req.has_data(): - data = req.get_data() - if hasattr(data, 'read') or hasattr(data, 'next'): - if not req.has_header('Content-length'): - raise ValueError( - "No Content-Length specified for iterable body") - return urllib2.HTTPSHandler.do_request_(self, req) - - -def get_handlers(): - handlers = [StreamingHTTPHandler, StreamingHTTPRedirectHandler] - if hasattr(httplib, "HTTPS"): - handlers.append(StreamingHTTPSHandler) - return handlers - -def register_openers(): - """Register the streaming http handlers in the global urllib2 default - opener object. - - Returns the created OpenerDirector object.""" - opener = urllib2.build_opener(*get_handlers()) - - urllib2.install_opener(opener) - - return opener diff --git a/bigml/supervised.py b/bigml/supervised.py index e7de6cf7..57155fa8 100644 --- a/bigml/supervised.py +++ b/bigml/supervised.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +# pylint: disable=super-init-not-called # -# Copyright 2018-2019 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -42,16 +42,19 @@ """ import json +import os -from bigml.api import get_resource_id, get_resource_type, BigML, \ - get_api_connection +from bigml.api import get_resource_id, get_resource_type, \ + get_api_connection, get_ensemble_id +from bigml.basemodel import BaseModel from bigml.model import Model from bigml.ensemble import Ensemble from bigml.logistic import LogisticRegression from bigml.deepnet import Deepnet from bigml.linear import LinearRegression -from bigml.basemodel import BaseModel +from bigml.constants import OUT_NEW_FIELDS, OUT_NEW_HEADERS, INTERNAL +from bigml.util import get_data_format, get_formatted_data, format_data COMPONENT_CLASSES = { @@ -61,17 +64,21 @@ "deepnet": Deepnet, "linearregression": LinearRegression} +DFT_OUTPUTS = ["prediction", "probability"] + def extract_id(model, api): """Extract the resource id from: - a resource ID string + - a list of resources (ensemble + models) - a resource structure - the name of the file that contains a resource structure """ # the string can be a path to a JSON file - if isinstance(model, basestring): + if isinstance(model, str): try: + path = os.path.dirname(os.path.abspath(model)) with open(model) as model_file: model = json.load(model_file) resource_id = get_resource_id(model) @@ -79,21 +86,27 @@ def extract_id(model, api): raise ValueError("The JSON file does not seem" " to contain a valid BigML resource" " representation.") + api.storage = path except IOError: # if it is not a path, it can be a model id resource_id = get_resource_id(model) if resource_id is None: - if model.find('model/') > -1: - raise Exception( - api.error_message(model, - resource_type='model', - method='get')) - else: - raise IOError("Failed to open the expected JSON file" - " at %s" % model) + for resource_type in COMPONENT_CLASSES.keys(): + if model.find("%s/" % resource_type) > -1: + raise Exception( + api.error_message(model, + resource_type=resource_type, + method="get")) + raise IOError("Failed to open the expected JSON file" + " at %s." % model) except ValueError: raise ValueError("Failed to interpret %s." " JSON file expected.") + if isinstance(model, list): + resource_id = get_ensemble_id(model[0]) + if resource_id is None: + raise ValueError("The first argument does not contain a valid" + " supervised model structure.") else: resource_id = get_resource_id(model) if resource_id is None: @@ -110,22 +123,33 @@ class SupervisedModel(BaseModel): """ - def __init__(self, model, api=None): + def __init__(self, model, api=None, cache_get=None, + operation_settings=None): self.api = get_api_connection(api) - resource_id, model = extract_id(model, api) + resource_id, model = extract_id(model, self.api) resource_type = get_resource_type(resource_id) - kwargs = {"api": self.api} + kwargs = {"api": self.api, "cache_get": cache_get} + if resource_type != "linearregression": + kwargs.update({"operation_settings": operation_settings}) local_model = COMPONENT_CLASSES[resource_type](model, **kwargs) self.__class__.__bases__ = local_model.__class__.__bases__ - for attr, value in local_model.__dict__.items(): + for attr, value in list(local_model.__dict__.items()): setattr(self, attr, value) self.local_model = local_model + self.regression = resource_type == "linearregression" or \ + self.local_model.regression + if not self.regression: + self.objective_categories = self.local_model.objective_categories + self.name = self.local_model.name + self.description = self.local_model.description def predict(self, *args, **kwargs): + """Delegating method to local model object""" return self.local_model.predict(*args, **kwargs) def predict_probability(self, *args, **kwargs): + """Delegating method to local model object""" new_kwargs = {} new_kwargs.update(kwargs) try: @@ -133,3 +157,77 @@ def predict_probability(self, *args, **kwargs): except TypeError: del new_kwargs["missing_strategy"] return self.local_model.predict_probability(*args, **new_kwargs) + + def predict_confidence(self, *args, **kwargs): + """Delegating method to local model object""" + new_kwargs = {} + new_kwargs.update(kwargs) + try: + return self.local_model.predict_confidence(*args, **new_kwargs) + except TypeError: + del new_kwargs["missing_strategy"] + return self.local_model.predict_confidence(*args, **new_kwargs) + + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + """ + return self.local_model.data_transformations() + + def batch_predict(self, input_data_list, outputs=None, all_fields=True, + **kwargs): + """Creates a batch prediction for a list of inputs using the local + supervised model. Allows to define some output settings to + decide the fields to be added to the input_data (prediction, + probability, etc.) and the name that we want to assign to these new + fields. The outputs argument accepts a dictionary with keys + "output_fields", to contain a list of the prediction properties to add + (["prediction", "probability"] by default) and "output_headers", to + contain a list of the headers to be used when adding them (identical + to "output_fields" list, by default). + + :param input_data_list: List of input data to be predicted + :type input_data_list: list or Panda's dataframe + :param dict outputs: properties that define the headers and fields to + be added to the input data + :param boolean all_fields: whether all the fields in the input data + should be part of the response + :return: the list of input data plus the predicted values + :rtype: list or Panda's dataframe depending on the input type in + input_data_list + """ + if outputs is None: + outputs = {} + new_fields = outputs.get(OUT_NEW_FIELDS, DFT_OUTPUTS) + new_headers = outputs.get(OUT_NEW_HEADERS, new_fields) + if len(new_fields) > len(new_headers): + new_headers.expand(new_fields[len(new_headers):]) + else: + new_headers = new_headers[0: len(new_fields)] + data_format = get_data_format(input_data_list) + inner_data_list = get_formatted_data(input_data_list, INTERNAL) + predictions_list = [] + kwargs.update({"full": True}) + for input_data in inner_data_list: + prediction = self.predict(input_data, **kwargs) + prediction_data = {} + if all_fields: + prediction_data.update(input_data) + for index, key in enumerate(new_fields): + try: + prediction_data[new_headers[index]] = prediction[key] + except KeyError: + pass + predictions_list.append(prediction_data) + if data_format != INTERNAL: + return format_data(predictions_list, out_format=data_format) + return predictions_list + + #pylint: disable=locally-disabled,arguments-differ + def dump(self, **kwargs): + """Delegate to local model""" + self.local_model.dump(**kwargs) + + def dumps(self): + """Delegate to local model""" + return self.local_model.dumps() diff --git a/bigml/tests/compare_dataset_steps.py b/bigml/tests/compare_dataset_steps.py new file mode 100644 index 00000000..04bc9110 --- /dev/null +++ b/bigml/tests/compare_dataset_steps.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import json + +from bigml.dataset import Dataset + +from .world import res_filename, eq_ + + +def i_create_a_local_dataset_from_file(step, dataset_file): + """Step: I create a local dataset from a file""" + step.bigml["local_dataset"] = Dataset(res_filename(dataset_file)) + + +def the_transformed_data_is(step, input_data, output_data): + """Checking expected transformed data""" + if input_data is None: + input_data = "{}" + if output_data is None: + output_data = "{}" + input_data = json.loads(input_data) + output_data = json.loads(output_data) + transformed_data = step.bigml["local_dataset"].transform([input_data]) + for key, value in transformed_data[0].items(): + eq_(output_data.get(key), value) diff --git a/bigml/tests/compare_forecasts_steps.py b/bigml/tests/compare_forecasts_steps.py index b0b998bf..0d4fe85a 100644 --- a/bigml/tests/compare_forecasts_steps.py +++ b/bigml/tests/compare_forecasts_steps.py @@ -1,7 +1,5 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2017-2019 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -17,31 +15,30 @@ import json -import os -from nose.tools import eq_, assert_almost_equal -from world import world, res_filename +from .world import eq_, approx_ -#@step(r'I create a local forecast for "(.*)"') def i_create_a_local_forecast(step, input_data): + """Step: I create a local forecast for """ input_data = json.loads(input_data) - world.local_forecast = world.local_time_series.forecast(input_data) + step.bigml["local_forecast"] = step.bigml[ \ + "local_time_series"].forecast(input_data) -#@step(r'the local forecast is "(.*)"') def the_local_forecast_is(step, local_forecasts): + """Step: the local forecast is """ local_forecasts = json.loads(local_forecasts) attrs = ["point_forecast", "model"] for field_id in local_forecasts: - forecast = world.local_forecast[field_id] + forecast = step.bigml["local_forecast"][field_id] local_forecast = local_forecasts[field_id] - eq_(len(forecast), len(local_forecast), "forecast: %s" % forecast) - for index in range(len(forecast)): + eq_(len(forecast), len(local_forecast), msg="forecast: %s" % forecast) + for index, forecast_item in enumerate(forecast): for attr in attrs: - if isinstance(forecast[index][attr], list): - for pos, item in enumerate(forecast[index][attr]): - assert_almost_equal(local_forecast[index][attr][pos], - item, places=5) + if isinstance(forecast_item[attr], list): + for pos, item in enumerate(forecast_item[attr]): + approx_(local_forecast[index][attr][pos], + item, precision=5) else: - eq_(forecast[index][attr], local_forecast[index][attr]) + eq_(forecast_item[attr], local_forecast[index][attr]) diff --git a/bigml/tests/compare_pipeline_steps.py b/bigml/tests/compare_pipeline_steps.py new file mode 100644 index 00000000..146ea408 --- /dev/null +++ b/bigml/tests/compare_pipeline_steps.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +import json +import os +import zipfile + +from bigml.pipeline.pipeline import BMLPipeline, Pipeline +from bigml.api import BigML + +from .world import res_filename, eq_, ok_ + + +def i_expand_file_with_models_list(step, pipeline_file, models_list): + """Extracting models from zip""" + inner_files = [] + models_list = json.loads(models_list) + for resource_id in models_list: + inner_files.append(resource_id.replace("/", "_")) + + pipeline_file = res_filename(pipeline_file) + with zipfile.ZipFile(pipeline_file, 'r') as zip_ref: + filenames = [os.path.basename(filename) for + filename in zip_ref.namelist()] + ok_(all(filename in filenames for filename in inner_files)) + zip_ref.extractall(os.path.dirname(pipeline_file)) + + +def i_create_a_local_pipeline_from_models_list( + step, models_list, name, storage=None): + """Step: I create a local pipeline for named """ + if not isinstance(models_list, list): + models_list = json.loads(models_list) + kwargs = {} + if storage is not None: + kwargs = {'api': BigML(storage=res_filename(storage))} + step.bigml["local_pipeline"] = BMLPipeline(name, + models_list, + **kwargs) + return step.bigml["local_pipeline"] + + +def the_pipeline_transformed_data_is(step, input_data, output_data): + """Checking pipeline's transform""" + if input_data is None: + input_data = "{}" + if output_data is None: + output_data = "{}" + input_data = json.loads(input_data) + output_data = json.loads(output_data) + transformed_data = step.bigml["local_pipeline"].transform([input_data]) + for key, value in transformed_data[0].items(): + eq_(output_data.get(key), value) + + +def the_pipeline_result_key_is(step, input_data, key, value, precision=None): + """Checking pipeline transformed property""" + if input_data is None: + input_data = "{}" + input_data = json.loads(input_data) + transformed_data = step.bigml["local_pipeline"].transform([input_data]) + pipe_value = transformed_data[0].get(key) + if precision is not None and not isinstance(value, str): + pipe_value = round(pipe_value, precision) + value = round(value, precision) + eq_(str(value), str(pipe_value)) + + +def i_create_composed_pipeline(step, pipelines_list, name): + """Creating local Pipeline""" + step.bigml["local_pipeline"] = Pipeline(name, pipelines_list) diff --git a/bigml/tests/compare_predictions_steps.py b/bigml/tests/compare_predictions_steps.py index 4ace9f2a..b0019411 100644 --- a/bigml/tests/compare_predictions_steps.py +++ b/bigml/tests/compare_predictions_steps.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member +#pylint: disable=locally-disabled,pointless-string-statement # -# Copyright 2012-2019 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,379 +20,581 @@ import json import os -from nose.tools import eq_, assert_almost_equal, assert_is_not_none -from world import world, res_filename +from zipfile import ZipFile from bigml.model import Model, cast_prediction from bigml.logistic import LogisticRegression from bigml.cluster import Cluster from bigml.anomaly import Anomaly from bigml.association import Association from bigml.multimodel import MultiModel -from bigml.multivote import MultiVote from bigml.topicmodel import TopicModel from bigml.deepnet import Deepnet from bigml.linear import LinearRegression from bigml.supervised import SupervisedModel +from bigml.local_model import LocalModel from bigml.fusion import Fusion from bigml.pca import PCA +from bigml.shapwrapper import ShapWrapper -from create_prediction_steps import check_prediction +from .create_prediction_steps import check_prediction +from .world import world, res_filename, eq_, approx_, ok_ + + +def extract_zip(input_zip): + """Extracting file names in zip""" + with ZipFile(input_zip) as zip_handler: + return {name: zip_handler.read(name) for name in \ + zip_handler.namelist()} + -#@step(r'I retrieve a list of remote models tagged with "(.*)"') def i_retrieve_a_list_of_remote_models(step, tag): + """Step: I retrieve a list of remote models tagged with """ world.list_of_models = [ \ world.api.get_model(model['resource']) for model in - world.api.list_models(query_string="project=%s;tags__in=%s" % \ + world.api.list_models(query_string="project=%s&tags__in=%s" % \ (world.project_id, tag))['objects']] -#@step(r'I retrieve a list of remote logistic regression tagged with "(.*)"') def i_retrieve_a_list_of_remote_logistic_regressions(step, tag): + """Step: I retrieve a list of remote logistic regression tagged with + + """ world.list_of_models = [ \ world.api.get_logistic_regression(model['resource']) for model in world.api.list_logistic_regressions( \ - query_string="project=%s;tags__in=%s" % \ + query_string="project=%s&tags__in=%s" % \ + (world.project_id, tag))['objects']] + + +def i_retrieve_a_list_of_remote_linear_regressions(step, tag): + """Step: I retrieve a list of remote linear regression tagged with """ + world.list_of_models = [ \ + world.api.get_linear_regression(model['resource']) for model in + world.api.list_linear_regressions( \ + query_string="project=%s&tags__in=%s" % \ (world.project_id, tag))['objects']] -#@step(r'I create a local model from a "(.*)" file$') def i_create_a_local_model_from_file(step, model_file): - world.local_model = Model(res_filename(model_file)) + """Step: I create a local model from a file""" + step.bigml["local_model"] = Model(res_filename(model_file)) + + +def i_create_a_local_deepnet_from_zip_file(step, deepnet_file, + operation_settings=None): + """Step: I create a local deepnet from a file""" + zipped_files = extract_zip(res_filename(deepnet_file)) + deepnet = json.loads(list(zipped_files.values())[0]) + step.bigml["local_model"] = Deepnet(deepnet, + operation_settings=operation_settings) + + +def i_create_a_local_supervised_model_from_file(step, model_file): + """Step: I create a local supervised model from a file""" + step.bigml["local_model"] = SupervisedModel(res_filename(model_file)) -#@step(r'I create a local model$') -def i_create_a_local_model(step): - world.local_model = Model(world.model) +def i_create_a_local_shap_wrapper_from_file(step, model_file): + """Step: I create a local ShapWrapper from a file""" + step.bigml["local_model"] = ShapWrapper(res_filename(model_file)) + + +def i_create_a_local_model(step, pre_model=False): + """Step: I create a local model""" + step.bigml["local_model"] = Model(world.model) + if pre_model: + step.bigml["local_pipeline"] = step.bigml["local_model"].data_transformations() + -#@step(r'I create a local fusion$') def i_create_a_local_fusion(step): - world.local_model = Fusion(world.fusion['resource']) - world.local_ensemble = None + """Step: I create a local fusion""" + step.bigml["local_model"] = Fusion(world.fusion['resource']) + step.bigml["local_ensemble"] = None + -#@step(r'I create a local supervised model$') def i_create_a_local_supervised_model(step, model_type=None): + """Step: I create a local supervised model""" if model_type is None: - model = world.model - else: - model = getattr(world, model_type) - world.local_model = SupervisedModel(model) + model_type = "model" + model = getattr(world, model_type) + step.bigml["local_model"] = SupervisedModel(model) -#@step(r'I create a multiple local prediction for "(.*)"') -def i_create_a_multiple_local_prediction(step, data=None): +def i_create_a_local_bigml_model(step, model_type=None): + """Step: I create a local BigML model""" + if model_type is None: + model_type = "model" + model = getattr(world, model_type) + step.bigml["local_model"] = LocalModel(model) + + +def i_create_a_local_bigml_model_prediction(step, data=None, + prediction_type=None, **kwargs): + """Step: I create a local prediction for """ if data is None: data = "{}" data = json.loads(data) - world.local_prediction = world.local_model.predict(data, multiple='all') + if prediction_type is None: + prediction_type = "prediction" + if kwargs is None: + kwargs = {} + kwargs.update({"full": True}) + step.bigml["local_%s" % prediction_type] = step.bigml[ + "local_model"].predict(data, **kwargs) + + +def the_local_bigml_prediction_is(step, value, prediction_type=None, key=None, + precision=None): + """Step: the local BigML model prediction is + """ + prediction = step.bigml["local_%s" % prediction_type] + if key is not None: + prediction = prediction[key] + eq_(value, prediction, precision=precision) -#@step(r'I create a local prediction for "(.*)" with confidence$') -def i_create_a_local_prediction_with_confidence(step, data=None): + +def i_create_a_local_prediction_with_confidence(step, data=None, + pre_model=None): + """Step: I create a local prediction for with confidence""" if data is None: data = "{}" - data = json.loads(data) - world.local_prediction = world.local_model.predict(data, - full=True) + input_data = json.loads(data) + if pre_model is not None: + input_data = pre_model.transform([input_data])[0] + step.bigml["local_prediction"] = step.bigml["local_model"].predict( + input_data, full=True) -#@step(r'I create a local prediction for "(.*)"$') -def i_create_a_local_prediction(step, data=None): +def i_create_a_shap_local_prediction(step, data=None): + """Step: I create a local prediction for """ + if data is None: + data = "[]" + step.bigml["local_prediction"] = step.bigml["local_model"].predict( + data).tolist()[0] + + +def i_create_a_local_prediction(step, data=None, pre_model=None): + """Step: I create a local prediction for """ if data is None: data = "{}" data = json.loads(data) - world.local_prediction = world.local_model.predict(data, full=True) + if pre_model is not None: + data = pre_model.transform([data])[0] + step.bigml["local_prediction"] = step.bigml["local_model"].predict(data, full=True) + + +def i_create_a_local_regions_prediction(step, image_file=None): + """Step: I create a local images prediction for """ + if image_file is None: + return None + data = res_filename(image_file) + step.bigml["local_prediction"] = step.bigml["local_model"].predict(data, full=True) + return step.bigml["local_prediction"] -#@step(r'I create a local prediction for "(.*)" in operating point "(.*)"$') def i_create_a_local_prediction_op(step, data=None, operating_point=None): + """Step: I create a local prediction for in operating point + + """ if data is None: data = "{}" - assert_is_not_none(operating_point) + ok_(operating_point is not None) data = json.loads(data) - world.local_prediction = world.local_model.predict( \ + step.bigml["local_prediction"] = step.bigml["local_model"].predict( \ data, operating_point=operating_point) -#@step(r'I create a local ensemble prediction for "(.*)" in operating point "(.*)"$') def i_create_a_local_ensemble_prediction_op(step, data=None, operating_point=None): + """Step: I create a local ensemble prediction for in operating + point + """ if data is None: data = "{}" - assert_is_not_none(operating_point) + ok_(operating_point is not None) data = json.loads(data) - world.local_prediction = world.local_ensemble.predict( \ + step.bigml["local_prediction"] = step.bigml["local_ensemble"].predict( \ data, operating_point=operating_point) -#@step(r'I create local probabilities for "(.*)"$') def i_create_local_probabilities(step, data=None): + """Step: I create local probabilities for """ if data is None: data = "{}" data = json.loads(data) + model = step.bigml["local_model"] + step.bigml["local_probabilities"] = model.predict_probability( + data, compact=True) + + +def i_create_shap_local_probabilities(step, data=None): + """Step: I create shap local probabilities for """ + model = step.bigml["local_model"] + step.bigml["local_probabilities"] = model.predict_proba( + data).tolist()[0] - model = world.local_model - world.local_probabilities = model.predict_probability(data, compact=True) -#@step(r'I create a local ensemble prediction for "(.*)"$') def i_create_a_local_ensemble_prediction(step, data=None): + """Step: I create a local ensemble prediction for """ if data is None: data = "{}" data = json.loads(data) - world.local_prediction = world.local_ensemble.predict(data) + step.bigml["local_prediction"] = step.bigml["local_ensemble"].predict(data) -#@step(r'I create a local deepnet prediction for "(.*)"$') -def i_create_a_local_deepnet_prediction(step, data=None): + +def i_create_a_local_deepnet_prediction(step, data=None, image_fields=None, + full=False): + """Step: I create a local deepnet prediction for """ if data is None: data = "{}" + if image_fields is None: + image_fields = [] data = json.loads(data) - world.local_prediction = world.local_model.predict(data) + for field in image_fields: + if field in data: + data[field] = res_filename(data[field]) + step.bigml["local_prediction"] = step.bigml["local_model"].predict(data, full=full) + -#@step(r'I create a local deepnet prediction with operating point for "(.*)"$') def i_create_a_local_deepnet_prediction_with_op(step, data=None, operating_point=None): + """Step: I create a local deepnet prediction with operating point + for + """ if data is None: data = "{}" data = json.loads(data) - world.local_prediction = world.local_model.predict( \ + step.bigml["local_prediction"] = step.bigml["local_model"].predict( \ data, operating_point=operating_point) -#@step(r'I create a local prediction using median for "(.*)"$') + def i_create_a_local_median_prediction(step, data=None): + """Step: I create a local prediction using median for """ if data is None: data = "{}" data = json.loads(data) - world.local_prediction = world.local_model.predict(data, median=True) + step.bigml["local_prediction"] = step.bigml["local_model"].predict(data, full=True) -#@step(r'I create a local multimodel batch prediction using median for "(.*)"$') -def i_create_a_local_mm_median_batch_prediction(self, data=None): +def i_create_a_local_mm_median_batch_prediction(step, data=None): + """Step: I create a local multimodel batch prediction using median + for + """ if data is None: data = "{}" data = json.loads(data) - world.local_prediction = world.local_model.batch_predict( + step.bigml["local_prediction"] = step.bigml["local_model"].batch_predict( [data], to_file=False, use_median=True)[0].predictions[0]['prediction'] -#@step(r'I create a proportional missing strategy local prediction -# using median for "(.*)"$') def i_create_a_local_proportional_median_prediction(step, data=None): + """Step: I create a proportional missing strategy local prediction + using median for + """ if data is None: data = "{}" data = json.loads(data) - world.local_prediction = world.local_model.predict( \ - data, missing_strategy=1, median=True) + step.bigml["local_prediction"] = step.bigml["local_model"].predict( \ + data, missing_strategy=1, full=True) -#@step(r'I create a local cluster') -def i_create_a_local_cluster(step): - world.local_cluster = Cluster(world.cluster["resource"]) +def i_create_a_local_cluster(step, pre_model=False): + """Step: I create a local cluster""" + step.bigml["local_cluster"] = Cluster(world.cluster["resource"]) + if pre_model: + step.bigml["local_pipeline"] = step.bigml["local_cluster"].data_transformations() -#@step(r'I create a local centroid for "(.*)"') -def i_create_a_local_centroid(step, data=None): +def i_create_a_local_centroid(step, data=None, pre_model=None): + """Step: I create a local centroid for """ if data is None: data = "{}" data = json.loads(data) - for key, value in data.items(): + for key, value in list(data.items()): if value == "": del data[key] - world.local_centroid = world.local_cluster.centroid(data) + if pre_model is not None: + data = pre_model.transform([data])[0] + step.bigml["local_centroid"] = step.bigml["local_cluster"].centroid(data) -#@step(r'the local centroid is "(.*)" with distance "(.*)"') def the_local_centroid_is(step, centroid, distance): - check_prediction(world.local_centroid['centroid_name'], centroid) - check_prediction(world.local_centroid['distance'], distance) + """Step: the local centroid is with distance """ + check_prediction(step.bigml["local_centroid"]['centroid_name'], centroid) + check_prediction(step.bigml["local_centroid"]['distance'], distance) + -#@step(r'I create a local anomaly detector$') -def i_create_a_local_anomaly(step): - world.local_anomaly = Anomaly(world.anomaly['resource']) +def i_create_a_local_anomaly(step, pre_model=False): + """Step: I create a local anomaly detector""" + step.bigml["local_anomaly"] = Anomaly(world.anomaly["resource"]) + if pre_model: + step.bigml["local_pipeline"] = step.bigml["local_anomaly"].data_transformations() -#@step(r'I create a local anomaly score for "(.*)"$') -def i_create_a_local_anomaly_score(step, input_data): +def i_create_a_local_anomaly_score(step, input_data, pre_model=None): + """Step: I create a local anomaly score for """ input_data = json.loads(input_data) - world.local_anomaly_score = world.local_anomaly.anomaly_score( \ + if pre_model is not None: + input_data = pre_model.transform([input_data])[0] + step.bigml["local_anomaly_score"] = step.bigml["local_anomaly"].anomaly_score( \ input_data) -#@step(r'the local anomaly score is "(.*)"$') + def the_local_anomaly_score_is(step, score): - eq_(str(round(world.local_anomaly_score, 2)), + """Step: the local anomaly score is """ + eq_(str(round(step.bigml["local_anomaly_score"], 2)), str(round(float(score), 2))) -#@step(r'I create a local association') -def i_create_a_local_association(step): - world.local_association = Association(world.association) +def i_create_a_local_association(step, pre_model=False): + """Step: I create a local association""" + step.bigml["local_association"] = Association(world.association) + if pre_model: + step.bigml["local_pipeline"] = step.bigml["local_association"].data_transformations() + -#@step(r'I create a proportional missing strategy local prediction for "(.*)"') def i_create_a_proportional_local_prediction(step, data=None): + """Step: I create a proportional missing strategy local prediction for + + """ if data is None: data = "{}" data = json.loads(data) - world.local_prediction = world.local_model.predict( + step.bigml["local_prediction"] = step.bigml["local_model"].predict( data, missing_strategy=1, full=True) - world.local_prediction = cast_prediction(world.local_prediction, + step.bigml["local_prediction"] = cast_prediction(step.bigml["local_prediction"], to="list", confidence=True) -#@step(r'I create a prediction from a multi model for "(.*)"') def i_create_a_prediction_from_a_multi_model(step, data=None): + """Step: I create a prediction from a multi model for """ if data is None: data = "{}" data = json.loads(data) - world.local_prediction = world.local_model.predict(data) + step.bigml["local_prediction"] = step.bigml["local_model"].predict(data) -#@step(r'I create a batch multimodel prediction for "(.*)"') def i_create_a_batch_prediction_from_a_multi_model(step, data=None): + """Step: I create a batch multimodel prediction for """ if data is None: data = "[{}]" data = json.loads(data) - world.local_prediction = world.local_model.batch_predict(data, + step.bigml["local_prediction"] = step.bigml["local_model"].batch_predict(data, to_file=False) -#@step(r'the predictions are "(.*)"') def the_batch_mm_predictions_are(step, predictions): + """Step: the predictions are """ if predictions is None: predictions = "[{}]" predictions = json.loads(predictions) - for i in range(len(predictions)): - multivote = world.local_prediction[i] - for prediction in multivote.predictions: - eq_(prediction['prediction'], predictions[i]) + for index, prediction in enumerate(predictions): + multivote = step.bigml["local_prediction"][index] + for mv_prediction in multivote.predictions: + eq_(mv_prediction['prediction'], prediction) -#@step(r'the multiple local prediction is "(.*)"') def the_multiple_local_prediction_is(step, prediction): - local_prediction = world.local_prediction + """Step: the multiple local prediction is """ + local_prediction = step.bigml["local_prediction"] prediction = json.loads(prediction) eq_(local_prediction, prediction) -#@step(r'the local prediction\'s confidence is "(.*)"') + def the_local_prediction_confidence_is(step, confidence): - if (isinstance(world.local_prediction, list) or - isinstance(world.local_prediction, tuple)): - local_confidence = world.local_prediction[1] + """Step: the local prediction's confidence is """ + if isinstance(step.bigml["local_prediction"], (list, tuple)): + local_confidence = step.bigml["local_prediction"][1] else: - local_confidence = world.local_prediction.get('confidence', \ - world.local_prediction.get('probability')) + local_confidence = step.bigml["local_prediction"].get('confidence', \ + step.bigml["local_prediction"].get('probability')) local_confidence = round(float(local_confidence), 4) confidence = round(float(confidence), 4) eq_(local_confidence, confidence) -#@step(r'the local prediction is "(.*)"') -def the_local_prediction_is(step, prediction): - if (isinstance(world.local_prediction, list) or - isinstance(world.local_prediction, tuple)): - local_prediction = world.local_prediction[0] - elif isinstance(world.local_prediction, dict): - local_prediction = world.local_prediction['prediction'] + +def the_highest_local_prediction_confidence_is( + step, input_data, confidence, missing_strategy=None): + """Step: the highest local prediction's confidence for is + """ + input_data = json.loads(input_data) + kwargs = {} + if missing_strategy is not None: + kwargs.update({"missing_strategy": missing_strategy}) + local_confidence = step.bigml["local_model"].predict_confidence(input_data, + **kwargs) + if isinstance(local_confidence, dict): + local_confidence = round(float(local_confidence["confidence"]), 4) else: - local_prediction = world.local_prediction - if hasattr(world, "local_ensemble") and world.local_ensemble is not None: - world.local_model = world.local_ensemble - if (hasattr(world.local_model, "regression") and \ - world.local_model.regression) or \ - (isinstance(world.local_model, MultiModel) and \ - world.local_model.models[0].regression): - local_prediction = round(float(local_prediction), 4) - prediction = round(float(prediction), 4) - assert_almost_equal(local_prediction, float(prediction), - places=5) + local_confidence = round(float(max([pred["confidence"] for pred in local_confidence])), 4) + confidence = round(float(confidence), 4) + eq_(local_confidence, confidence) + + +def the_local_prediction_is(step, prediction, precision=4): + """Step: the local prediction is """ + if isinstance(step.bigml["local_prediction"], (list, tuple)): + local_prediction = step.bigml["local_prediction"][0] + elif isinstance(step.bigml["local_prediction"], dict): + local_prediction = step.bigml["local_prediction"]['prediction'] else: - eq_(local_prediction, prediction) + local_prediction = step.bigml["local_prediction"] + if hasattr(world, "local_ensemble") and step.bigml["local_ensemble"] is not None: + step.bigml["local_model"] = step.bigml["local_ensemble"] + if (hasattr(step.bigml["local_model"], "regression") and \ + step.bigml["local_model"].regression) or \ + (isinstance(step.bigml["local_model"], MultiModel) and \ + step.bigml["local_model"].models[0].regression): + local_prediction = round(float(local_prediction), precision) + prediction = round(float(prediction), precision) + approx_(local_prediction, float(prediction), precision=precision) + else: + if isinstance(local_prediction, str): + eq_(local_prediction, prediction) + else: + if isinstance(prediction, str): + prediction = float(prediction) + eq_(round(local_prediction, precision), + round(float(prediction), precision)) + + +def the_local_regions_prediction_is(step, prediction): + """Step: the local regions prediction is """ + prediction = json.loads(prediction) + eq_(prediction, step.bigml["local_prediction"]) + -#@step(r'the local probabilities are "(.*)"') def the_local_probabilities_are(step, prediction): - local_probabilities = world.local_probabilities + """Step: the local probabilities are """ + local_probabilities = step.bigml["local_probabilities"] expected_probabilities = [float(p) for p in json.loads(prediction)] for local, expected in zip(local_probabilities, expected_probabilities): - assert_almost_equal(local, expected, places=4) + approx_(local, expected, precision=4) + + +def the_local_proba_prediction_is(step, proba_prediction): + """Step: the local probabilities prediction is """ + local_probabilities = step.bigml["local_probabilities"] + + for local, expected in zip(local_probabilities, proba_prediction): + approx_(local, expected, precision=4) + -#@step(r'the local ensemble prediction is "(.*)"') def the_local_ensemble_prediction_is(step, prediction): - if (isinstance(world.local_prediction, list) or - isinstance(world.local_prediction, tuple)): - local_prediction = world.local_prediction[0] - elif isinstance(world.local_prediction, dict): - local_prediction = world.local_prediction['prediction'] + """Step: the local ensemble prediction is """ + if isinstance(step.bigml["local_prediction"], (list, tuple)): + local_prediction = step.bigml["local_prediction"][0] + elif isinstance(step.bigml["local_prediction"], dict): + local_prediction = step.bigml["local_prediction"]['prediction'] else: - local_prediction = world.local_prediction - if world.local_ensemble.regression: - assert_almost_equal(local_prediction, float(prediction), places=5) + local_prediction = step.bigml["local_prediction"] + if step.bigml["local_ensemble"].regression: + approx_(local_prediction, float(prediction), precision=5) else: eq_(local_prediction, prediction) -#@step(r'the local probability is "(.*)"') + def the_local_probability_is(step, probability): - probability = round(float(probability), 4) - local_probability = world.local_prediction["probability"] + """Step: the local probability is """ + local_probability = step.bigml["local_prediction"]["probability"] + if isinstance(probability, str): + probability = float(probability) + eq_(local_probability, probability, precision=4) + + +def the_local_confidence_is(step, confidence): + """Step: the local confidence is """ + local_confidence = step.bigml["local_prediction"]["confidence"] + if isinstance(confidence, str): + confidence = float(confidence) + eq_(local_confidence, confidence, precision=4) + + +def eq_local_and_remote_probability(step): + """Step: check local and remote probability""" + local_probability = round(step.bigml["local_prediction"]["probability"], 3) + remote_probability = round(world.prediction["probability"], 3) + approx_(local_probability, remote_probability) + -#@step(r'I create a local multi model') def i_create_a_local_multi_model(step): - world.local_model = MultiModel(world.list_of_models) - world.local_ensemble = None + """Step: I create a local multi model""" + step.bigml["local_model"] = MultiModel(world.list_of_models) + step.bigml["local_ensemble"] = None + -#@step(r'I create a batch prediction for "(.*)" and save it in "(.*)"') def i_create_a_batch_prediction(step, input_data_list, directory): + """Step: I create a batch prediction for and save it + in + """ if len(directory) > 0 and not os.path.exists(directory): os.makedirs(directory) - input_data_list = eval(input_data_list) - assert isinstance(input_data_list, list) - world.local_model.batch_predict(input_data_list, directory) + input_data_list = json.loads(input_data_list) + ok_(isinstance(input_data_list, list)) + step.bigml["local_model"].batch_predict(input_data_list, directory) + -#@step(r'I combine the votes in "(.*)"') def i_combine_the_votes(step, directory): - world.votes = world.local_model.batch_votes(directory) + """Step: I combine the votes in """ + world.votes = step.bigml["local_model"].batch_votes(directory) + -#@step(r'the plurality combined predictions are "(.*)"') def the_plurality_combined_prediction(step, predictions): - predictions = eval(predictions) - for i in range(len(world.votes)): - combined_prediction = world.votes[i].combine() + """Step: the plurality combined predictions are """ + predictions = json.loads(predictions) + for i, votes_row in enumerate(world.votes): + combined_prediction = votes_row.combine() check_prediction(combined_prediction, predictions[i]) -#@step(r'the confidence weighted predictions are "(.*)"') + def the_confidence_weighted_prediction(step, predictions): - predictions = eval(predictions) - for i in range(len(world.votes)): - combined_prediction = world.votes[i].combine(1) + """Step: the confidence weighted predictions are """ + predictions = json.loads(predictions) + for i, votes_row in enumerate(world.votes): + combined_prediction = votes_row.combine(1) eq_(combined_prediction, predictions[i]) -#@step(r'I create a local logistic regression model$') -def i_create_a_local_logistic_model(step): - world.local_model = LogisticRegression(world.logistic_regression) + +def i_create_a_local_logistic_model(step, pre_model=False): + """Step: I create a local logistic regression model""" + step.bigml["local_model"] = LogisticRegression(world.logistic_regression) + if pre_model: + step.bigml["local_pipeline"] = step.bigml[ + "local_model"].data_transformations() if hasattr(world, "local_ensemble"): - world.local_ensemble = None + step.bigml["local_ensemble"] = None + -#@step(r'I create a local deepnet model$') def i_create_a_local_deepnet(step): - world.local_model = Deepnet(world.deepnet['resource']) + """Step: I create a local deepnet model""" + step.bigml["local_model"] = Deepnet({"resource": world.deepnet['resource'], + "object": world.deepnet}) if hasattr(world, "local_ensemble"): - world.local_ensemble = None + step.bigml["local_ensemble"] = None + -#@step(r'I create a local topic model$') def i_create_a_local_topic_model(step): - world.local_topic_model = TopicModel(world.topic_model) + """Step: I create a local topic model""" + step.bigml["local_topic_model"] = TopicModel(world.topic_model) + -#@step(r'the topic distribution is "(.*)"$') def the_topic_distribution_is(step, distribution): + """Step: the topic distribution is """ eq_(json.loads(distribution), world.topic_distribution['topic_distribution']['result']) -#@step(r'the local topic distribution is "(.*)"') def the_local_topic_distribution_is(step, distribution): + """Step: the local topic distribution is """ distribution = json.loads(distribution) - for index, topic_dist in enumerate(world.local_topic_distribution): - assert_almost_equal(topic_dist["probability"], distribution[index], - places=5) + for index, topic_dist in enumerate(step.bigml["local_topic_distribution"]): + approx_(topic_dist["probability"], distribution[index]) + -#@step(r'the association set is like file "(.*)"') def the_association_set_is_like_file(step, filename): + """Step: the association set is like file """ filename = res_filename(filename) result = world.association_set.get("association_set",{}).get("result", []) """ Uncomment if different text settings are used @@ -402,14 +605,18 @@ def the_association_set_is_like_file(step, filename): file_result = json.load(filehandler) eq_(result, file_result) -#@step(r'I create a local association set$') -def i_create_a_local_association_set(step, data): + +def i_create_a_local_association_set(step, data, pre_model=None): + """Step: I create a local association set""" data = json.loads(data) - world.local_association_set = world.local_association.association_set( \ + if pre_model is not None: + data = pre_model.transform([data])[0] + step.bigml["local_association_set"] = step.bigml["local_association"].association_set( \ data) -#@step(r'the local association set is like file "(.*)"') + def the_local_association_set_is_like_file(step, filename): + """Step: the local association set is like file """ filename = res_filename(filename) """ Uncomment if different text settings are used with open(filename, "w") as filehandler: @@ -417,87 +624,111 @@ def the_local_association_set_is_like_file(step, filename): """ with open(filename) as filehandler: file_result = json.load(filehandler) - eq_(world.local_association_set, file_result) + for index, result in enumerate(file_result): + approx_(result['score'], step.bigml["local_association_set"][ + index]['score']) + eq_(result['rules'], + step.bigml["local_association_set"][index]['rules']) + -#@step(r'I create a local prediction for "(.*)" in operating kind "(.*)"$') def i_create_a_local_prediction_op_kind(step, data=None, operating_kind=None): + """Step: I create a local prediction for in operating kind + + """ if data is None: data = "{}" - assert_is_not_none(operating_kind) + ok_(operating_kind is not None) data = json.loads(data) - world.local_prediction = world.local_model.predict( \ + step.bigml["local_prediction"] = step.bigml["local_model"].predict( \ data, operating_kind=operating_kind) -#@step(r'I create a local ensemble prediction for "(.*)" in operating kind "(.*)"$') + def i_create_a_local_ensemble_prediction_op_kind( \ step, data=None, operating_kind=None): + """Step: I create a local ensemble prediction for in operating + kind """ if data is None: data = "{}" - assert_is_not_none(operating_kind) + ok_(operating_kind is not None) data = json.loads(data) - world.local_prediction = world.local_ensemble.predict( \ + step.bigml["local_prediction"] = step.bigml["local_ensemble"].predict( \ data, operating_kind=operating_kind) -#@step(r'I create a local deepnet for "(.*)" in operating kind "(.*)"$') + def i_create_a_local_deepnet_prediction_op_kind( \ step, data=None, operating_kind=None): + """Step: I create a local deepnet for in operating kind + + """ if data is None: data = "{}" - assert_is_not_none(operating_kind) + ok_(operating_kind is not None) data = json.loads(data) - world.local_prediction = world.local_model.predict( \ + step.bigml["local_prediction"] = step.bigml["local_model"].predict( \ data, operating_kind=operating_kind) -#@step(r'I create a local logistic regression for "(.*)" in operating kind "(.*)"$') + def i_create_a_local_logistic_prediction_op_kind( \ step, data=None, operating_kind=None): + """Step: I create a local logistic regression for in operating + kind + """ if data is None: data = "{}" - assert_is_not_none(operating_kind) + ok_(operating_kind is not None) data = json.loads(data) - world.local_prediction = world.local_model.predict( \ + step.bigml["local_prediction"] = step.bigml["local_model"].predict( \ data, operating_kind=operating_kind) -#@step(r'I create a local PCA') -def create_local_pca(step): - world.local_pca = PCA(world.pca["resource"]) -#@step(r'I create a local PCA') +def create_local_pca(step, pre_model=False): + """Step: I create a local PCA""" + step.bigml["local_pca"] = PCA(world.pca["resource"]) + if pre_model: + step.bigml["local_pipeline"] = step.bigml["local_pca"].data_transformations() + + def i_create_a_local_linear(step): - world.local_model = LinearRegression(world.linear_regression["resource"]) + """Step: I create a local linear regression""" + step.bigml["local_model"] = LinearRegression(world.linear_regression["resource"]) -#@step(r'I create a local projection for "(.*)"') -def i_create_a_local_projection(step, data=None): + +def i_create_a_local_projection(step, data=None, pre_model=None): + """Step: I create a local projection for """ if data is None: data = "{}" data = json.loads(data) - for key, value in data.items(): + if pre_model is not None: + data = pre_model.transform([data])[0] + for key, value in list(data.items()): if value == "": del data[key] - world.local_projection = world.local_pca.projection(data, full=True) - for name, value in world.local_projection.items(): - world.local_projection[name] = round(value, 5) + step.bigml["local_projection"] = step.bigml["local_pca"].projection(data, full=True) + for name, value in list(step.bigml["local_projection"].items()): + step.bigml["local_projection"][name] = round(value, 5) + -#@step(r'I create a local linear regression prediction for "(.*)"') def i_create_a_local_linear_prediction(step, data=None): + """Step: I create a local linear regression prediction for """ if data is None: data = "{}" data = json.loads(data) - for key, value in data.items(): + for key, value in list(data.items()): if value == "": del data[key] - world.local_prediction = world.local_model.predict(data, full=True) - for name, value in world.local_prediction.items(): + step.bigml["local_prediction"] = step.bigml["local_model"].predict(data, full=True) + for name, value in list(step.bigml["local_prediction"].items()): if isinstance(value, float): - world.local_prediction[name] = round(value, 5) + step.bigml["local_prediction"][name] = round(value, 5) def the_local_projection_is(step, projection): + """Step: checking the local projection""" if projection is None: projection = "{}" projection = json.loads(projection) - eq_(len(projection.keys()), len(world.local_projection.keys())) - for name, value in projection.items(): - eq_(world.local_projection[name], projection[name], - "local: %s, %s - expected: %s" % ( \ - name, world.local_projection[name], projection[name])) + eq_(len(list(projection.keys())), len(list(step.bigml["local_projection"].keys()))) + for name, _ in list(projection.items()): + eq_(step.bigml["local_projection"][name], projection[name], + msg="local: %s, %s - expected: %s" % ( \ + name, step.bigml["local_projection"][name], projection[name])) diff --git a/bigml/tests/compute_lda_prediction_steps.py b/bigml/tests/compute_lda_prediction_steps.py index 91e5af6e..5ec5f6e8 100644 --- a/bigml/tests/compute_lda_prediction_steps.py +++ b/bigml/tests/compute_lda_prediction_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2016-2019 BigML +# Copyright 2016-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -17,19 +17,18 @@ from bigml.topicmodel import TopicModel -from nose.tools import assert_almost_equals, eq_ +from .world import eq_, approx_ + -#@step(r'predict the topic distribution for the text "(.*)"$') def i_make_a_prediction(step, model, text, expected): + """Step: predict the topic distribution for the text """ topic_model = TopicModel(model) distribution = topic_model.distribution(text) msg = ("Computed distribution is %s, but expected distribution is %s" % (str(distribution), str(expected))) - eq_(len(distribution), len(expected), msg) + eq_(len(distribution), len(expected), msg=msg) - for d, e in zip(distribution, expected): - assert_almost_equals(d['probability'], - e['probability'], - places=6, msg=msg) + for dis, exp in zip(distribution, expected): + approx_(dis['probability'], exp['probability'], precision=6, msg=msg) diff --git a/bigml/tests/compute_multivote_prediction_steps.py b/bigml/tests/compute_multivote_prediction_steps.py index f756009c..251423c1 100644 --- a/bigml/tests/compute_multivote_prediction_steps.py +++ b/bigml/tests/compute_multivote_prediction_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2012, 2015-2019 BigML +# Copyright 2012, 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,75 +15,76 @@ # License for the specific language governing permissions and limitations # under the License. -import time import json -import os -from datetime import datetime, timedelta -from world import world, res_filename -from nose.tools import eq_ - -from bigml.api import HTTP_CREATED -from bigml.api import HTTP_ACCEPTED -from bigml.api import FINISHED -from bigml.api import FAULTY -from bigml.api import get_status + from bigml.multivote import MultiVote +from .world import world, res_filename, eq_, ok_ + DIGITS = 5 -#@step(r'I create a MultiVote for the set of predictions in file (.*)$') + def i_create_a_multivote(step, predictions_file): - predictions_file = res_filename(predictions_file) + """Step: I create a MultiVote for the set of predictions in file + + """ + predictions_path = res_filename(predictions_file) try: - with open(predictions_file, 'r') as predictions_file: - world.multivote = MultiVote(json.load(predictions_file)) + with open(predictions_file, 'r') as predictions_path: + world.multivote = MultiVote(json.load(predictions_path)) except IOError: - assert False, "Failed to read %s" % predictions_file + ok_(False, "Failed to read %s" % predictions_path) + -#@step(r'I compute the prediction with confidence using method "(.*)"$') def compute_prediction(step, method): + """Step: I compute the prediction with confidence using method + + """ try: prediction = world.multivote.combine(int(method), full=True) world.combined_prediction = prediction["prediction"] world.combined_confidence = prediction["confidence"] except ValueError: - assert False, "Incorrect method" + ok_(False, "Incorrect method") + -#@step(r'I compute the prediction without confidence using method "(.*)"$') def compute_prediction_no_confidence(step, method): + """Step: I compute the prediction without confidence using method + """ try: world.combined_prediction_nc = world.multivote.combine(int(method)) except ValueError: - assert False, "Incorrect method" + ok_(False, "Incorrect method") -#@step(r'the combined prediction is "(.*)"$') -def check_combined_prediction(step, prediction): +def check_combined_prediction(step, prediction): + """Step: the combined prediction is """ if world.multivote.is_regression(): try: eq_(round(world.combined_prediction, DIGITS), round(float(prediction), DIGITS)) - except ValueError, exc: - assert False, str(exc) + except ValueError as exc: + ok_(False, str(exc)) else: eq_(world.combined_prediction, prediction) -#@step(r'the combined prediction without confidence is "(.*)"$') -def check_combined_prediction_no_confidence(step, prediction): +def check_combined_prediction_no_confidence(step, prediction): + """Step: the combined prediction without confidence is """ if world.multivote.is_regression(): try: eq_(round(world.combined_prediction_nc, DIGITS), round(float(prediction), DIGITS)) - except ValueError, exc: - assert False, str(exc) + except ValueError as exc: + ok_(False, str(exc)) else: eq_(world.combined_prediction, prediction) -#@step(r'the confidence for the combined prediction is (.*)$') + def check_combined_confidence(step, confidence): + """Step: the confidence for the combined prediction is """ try: eq_(round(world.combined_confidence, DIGITS), round(float(confidence), DIGITS)) - except ValueError, exc: - assert False, str(exc) + except ValueError as exc: + ok_(False, str(exc)) diff --git a/bigml/tests/create_anomaly_steps.py b/bigml/tests/create_anomaly_steps.py index f45fbb8b..f0b18d3a 100644 --- a/bigml/tests/create_anomaly_steps.py +++ b/bigml/tests/create_anomaly_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,54 +15,74 @@ # License for the specific language governing permissions and limitations # under the License. -import time import json -import os -from datetime import datetime, timedelta -from nose.tools import eq_, ok_, assert_less -from world import world, res_filename - -from read_anomaly_steps import i_get_the_anomaly from bigml.api import HTTP_CREATED -from bigml.api import HTTP_ACCEPTED -from bigml.api import FINISHED -from bigml.api import FAULTY -from bigml.api import get_status +from bigml.api import FINISHED, FAULTY from bigml.anomaly import Anomaly -#@step(r'I check the anomaly detector stems from the original dataset list') +from .world import world, res_filename, eq_, ok_ +from .read_resource_steps import wait_until_status_code_is + + def i_check_anomaly_datasets_and_datasets_ids(step): + """Step: I check the anomaly detector stems from the original dataset + list + """ anomaly = world.anomaly - ok_('datasets' in anomaly and anomaly['datasets'] == world.dataset_ids, + ok_('datasets' in anomaly and + anomaly['datasets'] == step.bigml["dataset_ids"], ("The anomaly detector contains only %s and the dataset ids are %s" % - (",".join(anomaly['datasets']), ",".join(world.dataset_ids)))) + (",".join(anomaly['datasets']), ",".join(step.bigml["dataset_ids"])))) + -#@step(r'I check the anomaly detector stems from the original dataset') def i_check_anomaly_dataset_and_datasets_ids(step): + """Step: I check the anomaly detector stems from the original dataset""" anomaly = world.anomaly - ok_('dataset' in anomaly and anomaly['dataset'] == world.dataset['resource'], + ok_('dataset' in anomaly and anomaly['dataset'] == world.dataset[ + 'resource'], ("The anomaly detector contains only %s and the dataset id is %s" % (anomaly['dataset'], world.dataset['resource']))) -#@step(r'I create an anomaly detector$') -def i_create_an_anomaly(step): - i_create_an_anomaly_from_dataset(step) +def i_create_an_anomaly(step, shared=None): + """Step: I create an anomaly detector""" + i_create_an_anomaly_from_dataset(step, shared=shared) -#@step(r'I create an anomaly detector from a dataset$') -def i_create_an_anomaly_from_dataset(step): - dataset = world.dataset.get('resource') - resource = world.api.create_anomaly(dataset, {'seed': 'BigML'}) +def clone_anomaly(step, anomaly): + """Step: I clone anomaly""" + resource = world.api.clone_anomaly(anomaly, + {'project': world.project_id}) + # update status world.status = resource['code'] - eq_(world.status, HTTP_CREATED) world.location = resource['location'] world.anomaly = resource['object'] + # save reference world.anomalies.append(resource['resource']) -#@step(r'I create an anomaly detector with (\d+) anomalies from a dataset$') + +def the_cloned_anomaly_is(step, anomaly): + """Checking expected cloned anomaly""" + eq_(world.anomaly["origin"], anomaly) + + +def i_create_an_anomaly_from_dataset(step, shared=None): + """Step: I create an anomaly detector from a dataset""" + if shared is None or world.shared.get("anomaly", {}).get(shared) is None: + dataset = world.dataset.get('resource') + resource = world.api.create_anomaly(dataset, {'seed': 'BigML'}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.anomaly = resource['object'] + world.anomalies.append(resource['resource']) + + def i_create_an_anomaly_with_top_n_from_dataset(step, top_n): + """Step: I create an anomaly detector with anomalies from + a dataset + """ dataset = world.dataset.get('resource') resource = world.api.create_anomaly( dataset, {'seed': 'BigML', 'top_n': int(top_n)}) @@ -73,55 +93,82 @@ def i_create_an_anomaly_with_top_n_from_dataset(step, top_n): world.anomaly = resource['object'] world.anomalies.append(resource['resource']) -#@step(r'I create an anomaly detector from a dataset list$') + +def i_create_an_anomaly_with_params(step, parms=None): + """Step: I create an anomaly detector with from a dataset""" + dataset = world.dataset.get('resource') + if parms is not None: + parms = json.loads(parms) + else: + parms = {} + parms.update({"seed": 'BigML'}) + resource = world.api.create_anomaly( + dataset, parms) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED, + "Expected: %s, found: %s" % (HTTP_CREATED, world.status)) + world.location = resource['location'] + world.anomaly = resource['object'] + world.anomalies.append(resource['resource']) + + def i_create_an_anomaly_from_dataset_list(step): - resource = world.api.create_anomaly(world.dataset_ids, {'seed': 'BigML'}) + """Step: I create an anomaly detector from a dataset list""" + resource = world.api.create_anomaly(step.bigml["dataset_ids"], + {'seed': 'BigML'}) world.status = resource['code'] eq_(world.status, HTTP_CREATED) world.location = resource['location'] world.anomaly = resource['object'] world.anomalies.append(resource['resource']) -#@step(r'I wait until the anomaly detector status code is either (\d) or (-\d) less than (\d+)') + def wait_until_anomaly_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - i_get_the_anomaly(step, world.anomaly['resource']) - status = get_status(world.anomaly) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - i_get_the_anomaly(step, world.anomaly['resource']) - status = get_status(world.anomaly) - eq_(status['code'], int(code1)) - -#@step(r'I wait until the anomaly detector is ready less than (\d+)') -def the_anomaly_is_finished_in_less_than(step, secs): - wait_until_anomaly_status_code_is(step, FINISHED, FAULTY, secs) - -#@step(r'I create a dataset with only the anomalies') + """Step: I wait until the anomaly detector status code is either + or less than + """ + world.anomaly = wait_until_status_code_is( + code1, code2, secs, world.anomaly) + + +def the_anomaly_is_finished_in_less_than(step, secs, shared=None): + """Step: I wait until the anomaly detector is ready less than """ + if shared is None or world.shared.get("anomaly", {}).get(shared) is None: + wait_until_anomaly_status_code_is(step, FINISHED, FAULTY, secs) + if shared is not None: + if "anomaly" not in world.shared: + world.shared["anomaly"] = {} + world.shared["anomaly"][shared] = world.anomaly + else: + world.anomaly = world.shared["anomaly"][shared] + print("Reusing %s" % world.anomaly["resource"]) + + def create_dataset_with_anomalies(step): + """Step: I create a dataset with only the anomalies""" local_anomalies = Anomaly(world.anomaly['resource']) world.dataset = world.api.create_dataset( world.dataset['resource'], {"lisp_filter": local_anomalies.anomalies_filter()}) world.datasets.append(world.dataset['resource']) -#@step(r'I check that the dataset has (\d+) rows') + def the_dataset_has_n_rows(step, rows): + """Step: I check that the dataset has rows""" eq_(world.dataset['rows'], int(rows)) -#@step(r'I export the anomaly$') + def i_export_anomaly(step, filename): + """Step: I export the anomaly""" world.api.export(world.anomaly.get('resource'), filename=res_filename(filename)) -#@step(r'I create a local anomaly from file "(.*)"') + def i_create_local_anomaly_from_file(step, export_file): - world.local_anomaly = Anomaly(res_filename(export_file)) + """Step: I create a local anomaly from file """ + step.bigml["local_anomaly"] = Anomaly(res_filename(export_file)) -#@step(r'the anomaly ID and the local anomaly ID match') def check_anomaly_id_local_id(step): - eq_(world.local_anomaly.resource_id, world.anomaly["resource"]) + """Step: the anomaly ID and the local anomaly ID match""" + eq_(step.bigml["local_anomaly"].resource_id, world.anomaly["resource"]) diff --git a/bigml/tests/create_association_steps.py b/bigml/tests/create_association_steps.py index 60b7250a..b54cd9be 100644 --- a/bigml/tests/create_association_steps.py +++ b/bigml/tests/create_association_steps.py @@ -1,31 +1,57 @@ -import time +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + import json -import os -import StringIO -from datetime import datetime, timedelta -from world import world, res_filename -from nose.tools import eq_, assert_less - -from bigml.api import BigML -from bigml.api import HTTP_CREATED -from bigml.api import HTTP_ACCEPTED -from bigml.api import FINISHED -from bigml.api import FAULTY -from bigml.api import get_status + +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY from bigml.association import Association -from read_association_steps import i_get_the_association +from .read_resource_steps import wait_until_status_code_is +from .world import world, res_filename, eq_ -#@step(r'the association name is "(.*)"') def i_check_association_name(step, name): + """Step: the association name is """ association_name = world.association['name'] eq_(name, association_name) -#@step(r'I create an association from a dataset$') -def i_create_an_association_from_dataset(step): + +def i_create_an_association_from_dataset(step, shared=None): + """Step: I create an association from a dataset""" + if shared is None or world.shared.get("association", {}).get("shared") is None: + dataset = world.dataset.get('resource') + resource = world.api.create_association(dataset, {'name': 'new association'}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.association = resource['object'] + world.associations.append(resource['resource']) + + +def i_create_an_association_from_dataset_with_params(step, parms=None): + """Step: I create an association from a dataset with params """ dataset = world.dataset.get('resource') - resource = world.api.create_association(dataset, {'name': 'new association'}) + if parms is not None: + parms = json.loads(parms) + else: + parms = {} + parms.update({'name': 'new association'}) + resource = world.api.create_association(dataset, parms) world.status = resource['code'] eq_(world.status, HTTP_CREATED) world.location = resource['location'] @@ -33,8 +59,10 @@ def i_create_an_association_from_dataset(step): world.associations.append(resource['resource']) -#@step(r'I create an association with search strategy "(.*)" from a dataset$') def i_create_an_association_with_strategy_from_dataset(step, strategy): + """Step: I create an association with search strategy + from a dataset + """ dataset = world.dataset.get('resource') resource = world.api.create_association( dataset, {'name': 'new association', 'search_strategy': strategy}) @@ -45,8 +73,8 @@ def i_create_an_association_with_strategy_from_dataset(step, strategy): world.associations.append(resource['resource']) -#@step(r'I update the association name to "(.*)"$') def i_update_association_name(step, name): + """Step: I update the association name to """ resource = world.api.update_association(world.association['resource'], {'name': name}) world.status = resource['code'] @@ -55,57 +83,75 @@ def i_update_association_name(step, name): world.association = resource['object'] -#@step(r'I wait until the association status code is either (\d) or (-\d) less than (\d+)') def wait_until_association_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - association_id = world.association['resource'] - i_get_the_association(step, association_id) - status = get_status(world.association) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - i_get_the_association(step, association_id) - status = get_status(world.association) - eq_(status['code'], int(code1)) - - -#@step(r'I wait until the association is ready less than (\d+)') -def the_association_is_finished_in_less_than(step, secs): - wait_until_association_status_code_is(step, FINISHED, FAULTY, secs) - - -#@step(r'I create a local association') + """Step: I wait until the association status code is either or + less than + """ + world.association = wait_until_status_code_is( + code1, code2, secs, world.association) + + +def the_association_is_finished_in_less_than(step, secs, shared=None): + """Steps: I wait until the association is ready less than """ + if shared is None or world.shared.get("association", {}).get(shared) is None: + wait_until_association_status_code_is(step, FINISHED, FAULTY, secs) + if shared is not None: + if "association" not in world.shared: + world.shared["association"] = {} + world.shared["association"][shared] = world.association + else: + world.association = world.shared["association"][shared] + print("Reusing %s" % world.association["resource"]) + + def i_create_a_local_association(step): - world.local_association = Association(world.association) + """Step: I create a local association""" + step.bigml["local_association"] = Association(world.association) -#@step(r'I get the rules for "(.*?)"$') def i_get_rules_for_item_list(step, item_list): - world.association_rules = world.local_association.get_rules( + """Step: I get the rules for """ + world.association_rules = step.bigml["local_association"].get_rules( item_list=item_list) -#@step(r'the first rule is "(.*?)"$') def the_first_rule_is(step, rule): + """Step: the first rule is """ found_rules = [] for a_rule in world.association_rules: found_rules.append(a_rule.to_json()) eq_(rule, found_rules[0]) -#@step(r'I export the association$') def i_export_association(step, filename): + """Step: I export the association""" world.api.export(world.association.get('resource'), filename=res_filename(filename)) -#@step(r'I create a local association from file "(.*)"') def i_create_local_association_from_file(step, export_file): - world.local_association = Association(res_filename(export_file)) + """Step: I create a local association from file """ + step.bigml["local_association"] = Association(res_filename(export_file)) -#@step(r'the association ID and the local association ID match') def check_association_id_local_id(step): - eq_(world.local_association.resource_id, world.association["resource"]) + """Step: the association ID and the local association ID match""" + eq_(step.bigml["local_association"].resource_id, + world.association["resource"]) + + +def clone_association(step, association): + """Step: I clone association""" + resource = world.api.clone_association(association, + {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.association = resource['object'] + # save reference + world.associations.append(resource['resource']) + + +def the_cloned_association_is(step, association): + """The association is a clone""" + eq_(world.association["origin"], association) diff --git a/bigml/tests/create_batch_prediction_steps.py b/bigml/tests/create_batch_prediction_steps.py index a11994cb..7988a3f9 100644 --- a/bigml/tests/create_batch_prediction_steps.py +++ b/bigml/tests/create_batch_prediction_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2012-2019 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,27 +15,16 @@ # License for the specific language governing permissions and limitations # under the License. -import time -import json -import requests -import csv -import traceback -from datetime import datetime, timedelta -from world import world, res_filename -from nose.tools import eq_, ok_, assert_less - from bigml.api import HTTP_CREATED -from bigml.api import FINISHED -from bigml.api import FAULTY -from bigml.api import get_status +from bigml.api import FINISHED, FAULTY from bigml.io import UnicodeReader -from read_batch_prediction_steps import (i_get_the_batch_prediction, - i_get_the_batch_centroid, i_get_the_batch_anomaly_score) +from .read_resource_steps import wait_until_status_code_is +from .world import world, res_filename, eq_, ok_ -#@step(r'I create a batch prediction for the dataset with the model$') def i_create_a_batch_prediction(step): + """Step: I create a batch prediction for the dataset with the model""" dataset = world.dataset.get('resource') model = world.model.get('resource') resource = world.api.create_batch_prediction(model, dataset) @@ -46,8 +35,9 @@ def i_create_a_batch_prediction(step): world.batch_predictions.append(resource['resource']) -#@step(r'I create a batch prediction for the dataset with the ensemble and "(.*)"$') def i_create_a_batch_prediction_ensemble(step, params=None): + """Step: I create a batch prediction for the dataset with the ensemble and + """ if params is None: params = {} dataset = world.dataset.get('resource') @@ -60,128 +50,115 @@ def i_create_a_batch_prediction_ensemble(step, params=None): world.batch_predictions.append(resource['resource']) -#@step(r'I wait until the batch prediction status code is either (\d) or (-\d) less than (\d+)') def wait_until_batch_prediction_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - i_get_the_batch_prediction(step, world.batch_prediction['resource']) - status = get_status(world.batch_prediction) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - i_get_the_batch_prediction(step, world.batch_prediction['resource']) - status = get_status(world.batch_prediction) - eq_(status['code'], int(code1)) - - -#@step(r'I wait until the batch centroid status code is either (\d) or (-\d) less than (\d+)') + """Step: I wait until the batch prediction status code is either + or less than """ + world.batch_prediction = wait_until_status_code_is( + code1, code2, secs, world.batch_prediction) + + def wait_until_batch_centroid_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - i_get_the_batch_centroid(step, world.batch_centroid['resource']) - status = get_status(world.batch_centroid) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less(datetime.utcnow() - start, timedelta(seconds=delta)) - i_get_the_batch_centroid(step, world.batch_centroid['resource']) - status = get_status(world.batch_centroid) - eq_(status['code'], int(code1)) - - -#@step(r'I wait until the batch anomaly score status code is either (\d) or (-\d) less than (\d+)') + """Step: I wait until the batch centroid status code is either or + less than """ + world.batch_centroid = wait_until_status_code_is( + code1, code2, secs, world.batch_centroid) + + def wait_until_batch_anomaly_score_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - i_get_the_batch_anomaly_score(step, world.batch_anomaly_score['resource']) - status = get_status(world.batch_anomaly_score) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less(datetime.utcnow() - start, timedelta(seconds=delta)) - i_get_the_batch_anomaly_score(step, world.batch_anomaly_score['resource']) - status = get_status(world.batch_anomaly_score) - eq_(status['code'], int(code1), msg="%s seconds waited." % delta) - - -#@step(r'I wait until the batch prediction is ready less than (\d+)') + """Step: I wait until the batch anomaly score status code is either + or less than """ + world.batch_anomlay_score = wait_until_status_code_is( + code1, code2, secs, world.batch_anomaly_score) + + def the_batch_prediction_is_finished_in_less_than(step, secs): + """Step: I wait until the batch prediction is ready less than """ wait_until_batch_prediction_status_code_is(step, FINISHED, FAULTY, secs) -#@step(r'I wait until the batch centroid is ready less than (\d+)') + def the_batch_centroid_is_finished_in_less_than(step, secs): + """Step: I wait until the batch centroid is ready less than """ wait_until_batch_centroid_status_code_is(step, FINISHED, FAULTY, secs) -#@step(r'I wait until the batch anomaly score is ready less than (\d+)') + def the_batch_anomaly_score_is_finished_in_less_than(step, secs): + """Step: I wait until the batch anomaly score is ready less than """ wait_until_batch_anomaly_score_status_code_is(step, FINISHED, FAULTY, secs) -#@step(r'I download the created predictions file to "(.*)"') def i_download_predictions_file(step, filename): + """Step: I download the created predictions file to """ file_object = world.api.download_batch_prediction( world.batch_prediction, filename=res_filename(filename)) ok_(file_object is not None) world.output = file_object -#@step(r'I download the created centroid file to "(.*)"') + def i_download_centroid_file(step, filename): + """Step: I download the created centroid file to """ file_object = world.api.download_batch_centroid( world.batch_centroid, filename=res_filename(filename)) ok_(file_object is not None) world.output = file_object -#@step(r'I download the created anomaly score file to "(.*)"') + def i_download_anomaly_score_file(step, filename): + """Step: I download the created anomaly score file to """ file_object = world.api.download_batch_anomaly_score( world.batch_anomaly_score, filename=res_filename(filename)) ok_(file_object is not None) world.output = file_object + def check_rows(prediction_rows, test_rows): + """Checking rows identity""" row_num = 0 for row in prediction_rows: check_row = next(test_rows) row_num += 1 eq_(len(check_row), len (row)) - for index in range(len(row)): - dot = row[index].find(".") + for index, cell in enumerate(row): + dot = cell.find(".") if dot > 0: try: - decs = min(len(row[index]), len(check_row[index])) - dot - 1 - row[index] = round(float(row[index]), decs) + decs = min(len(cell), len(check_row[index])) - dot - 1 + cell = round(float(cell), decs) check_row[index] = round(float(check_row[index]), decs) except ValueError: pass - eq_(check_row[index], row[index], + eq_(check_row[index], cell, "Got: %s/ Expected: %s in line %s" % (row, check_row, row_num)) -#@step(r'the batch prediction file is like "(.*)"') + def i_check_predictions(step, check_file): + """Step: I download the created anomaly score file to """ with UnicodeReader(world.output) as prediction_rows: with UnicodeReader(res_filename(check_file)) as test_rows: check_rows(prediction_rows, test_rows) -#@step(r'the batch centroid file is like "(.*)"') + def i_check_batch_centroid(step, check_file): + """Step: the batch centroid file is like """ i_check_predictions(step, check_file) -#@step(r'the batch anomaly score file is like "(.*)"') + def i_check_batch_anomaly_score(step, check_file): + """Step: the batch anomaly score file is like """ i_check_predictions(step, check_file) -#@step(r'I check the batch centroid is ok') + def i_check_batch_centroid_is_ok(step): + """Step: I check the batch centroid is ok""" ok_(world.api.ok(world.batch_centroid)) -#@step(r'I check the batch anomaly score is ok') + def i_check_batch_anomaly_score_is_ok(step): + """Step: I check the batch anomaly score is ok""" ok_(world.api.ok(world.batch_anomaly_score)) -#@step(r'I create a batch centroid for the dataset$') def i_create_a_batch_prediction_with_cluster(step): + """Step: I create a batch centroid for the dataset""" dataset = world.dataset.get('resource') cluster = world.cluster.get('resource') resource = world.api.create_batch_centroid(cluster, dataset) @@ -191,8 +168,9 @@ def i_create_a_batch_prediction_with_cluster(step): world.batch_centroid = resource['object'] world.batch_centroids.append(resource['resource']) -#@step(r'I create a batch anomaly score$') + def i_create_a_batch_prediction_with_anomaly(step): + """Step: I create a batch anomaly score""" dataset = world.dataset.get('resource') anomaly = world.anomaly.get('resource') resource = world.api.create_batch_anomaly_score(anomaly, dataset) @@ -203,8 +181,8 @@ def i_create_a_batch_prediction_with_anomaly(step): world.batch_anomaly_scores.append(resource['resource']) -#@step(r'I create a linear batch prediction$') def i_create_a_linear_batch_prediction(step): + """Step: I create a linear batch prediction""" dataset = world.dataset.get('resource') linear_regression = world.linear_regression.get('resource') resource = world.api.create_batch_prediction(linear_regression, dataset) @@ -215,8 +193,8 @@ def i_create_a_linear_batch_prediction(step): world.batch_predictions.append(resource['resource']) -#@step(r'I create a source from the batch prediction$') def i_create_a_source_from_batch_prediction(step): + """Step: I create a source from the batch prediction""" batch_prediction = world.batch_prediction.get('resource') resource = world.api.source_from_batch_prediction(batch_prediction) world.status = resource['code'] @@ -226,8 +204,10 @@ def i_create_a_source_from_batch_prediction(step): world.sources.append(resource['resource']) -#@step(r'I create a batch prediction for the dataset with the logistic regression$') def i_create_a_batch_prediction_logistic_model(step): + """Step: I create a batch prediction for the dataset with the logistic + regression + """ dataset = world.dataset.get('resource') logistic = world.logistic_regression.get('resource') resource = world.api.create_batch_prediction(logistic, dataset) @@ -238,8 +218,8 @@ def i_create_a_batch_prediction_logistic_model(step): world.batch_predictions.append(resource['resource']) -#@step(r'I create a batch prediction for the dataset with the fusion$') def i_create_a_batch_prediction_fusion(step): + """Step: I create a batch prediction for the dataset with the fusion""" dataset = world.dataset.get('resource') fusion = world.fusion.get('resource') resource = world.api.create_batch_prediction(fusion, dataset) diff --git a/bigml/tests/create_batch_projection_steps.py b/bigml/tests/create_batch_projection_steps.py index e4c4ef41..d18debf7 100644 --- a/bigml/tests/create_batch_projection_steps.py +++ b/bigml/tests/create_batch_projection_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2018-2019 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,26 +15,17 @@ # License for the specific language governing permissions and limitations # under the License. -import time -import json -import requests -import csv -import traceback -from datetime import datetime, timedelta -from world import world, res_filename -from nose.tools import eq_, ok_, assert_less from bigml.api import HTTP_CREATED -from bigml.api import FINISHED -from bigml.api import FAULTY -from bigml.api import get_status +from bigml.api import FINISHED, FAULTY from bigml.io import UnicodeReader -from read_batch_projection_steps import i_get_the_batch_projection +from .read_resource_steps import wait_until_status_code_is +from .world import world, res_filename, eq_, ok_ -#@step(r'I create a batch projection for the dataset with the PCA$') def i_create_a_batch_projection(step): + """Step: I create a batch projection for the dataset with the PCA""" dataset = world.dataset.get('resource') pca = world.pca.get('resource') resource = world.api.create_batch_projection(pca, dataset) @@ -45,38 +36,35 @@ def i_create_a_batch_projection(step): world.batch_projections.append(resource['resource']) -#@step(r'I wait until the batch projection status code is either (\d) or (-\d) less than (\d+)') def wait_until_batch_projection_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - i_get_the_batch_projection(step, world.batch_projection['resource']) - status = get_status(world.batch_projection) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - i_get_the_batch_projection(step, world.batch_projection['resource']) - status = get_status(world.batch_projection) - eq_(status['code'], int(code1)) + """Step: I wait until the batch projection status code is either + or less than + """ + world.batch_projection = wait_until_status_code_is( + code1, code2, secs, world.batch_projection) -#@step(r'I wait until the batch projection is ready less than (\d+)') def the_batch_projection_is_finished_in_less_than(step, secs): + """Step: I wait until the batch projection is ready less than """ wait_until_batch_projection_status_code_is(step, FINISHED, FAULTY, secs) -#@step(r'I download the created projections file to "(.*)"') + def i_download_projections_file(step, filename): + """Step: I download the created projections file to """ file_object = world.api.download_batch_projection( world.batch_projection, filename=res_filename(filename)) ok_(file_object is not None) world.output = file_object -#@step(r'the batch projection file is like "(.*)"') + def i_check_projections(step, check_file): + """Step: the batch projection file is like """ with UnicodeReader(world.output) as projection_rows: with UnicodeReader(res_filename(check_file)) as test_rows: check_csv_rows(projection_rows, test_rows) + def check_csv_rows(projections, expected): + """Checking expected projections""" for projection in projections: - eq_(projection, expected.next()) + eq_(projection, next(expected)) diff --git a/bigml/tests/create_cluster_steps.py b/bigml/tests/create_cluster_steps.py index d078b941..f6c9e002 100644 --- a/bigml/tests/create_cluster_steps.py +++ b/bigml/tests/create_cluster_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2012-2019 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -14,40 +14,36 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. - -import time import json import os -from datetime import datetime, timedelta -from world import world, res_filename -from nose.tools import eq_, assert_less - -from read_cluster_steps import i_get_the_cluster -from bigml.api import HTTP_CREATED -from bigml.api import HTTP_ACCEPTED -from bigml.api import FINISHED -from bigml.api import FAULTY +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY from bigml.api import get_status from bigml.cluster import Cluster +from .read_resource_steps import wait_until_status_code_is +from .world import world, res_filename, eq_ + + +def i_create_a_cluster(step, shared=None): + """Step: I create a cluster""" + if shared is None or world.shared.get("cluster", {}).get(shared) is None: + dataset = world.dataset.get('resource') + resource = world.api.create_cluster( + dataset, {'seed': 'BigML', + 'cluster_seed': 'BigML', + 'k': 8}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.cluster = resource['object'] + world.clusters.append(resource['resource']) -#@step(r'I create a cluster$') -def i_create_a_cluster(step): - dataset = world.dataset.get('resource') - resource = world.api.create_cluster( - dataset, {'seed': 'BigML', - 'cluster_seed': 'BigML', - 'k': 8}) - world.status = resource['code'] - eq_(world.status, HTTP_CREATED) - world.location = resource['location'] - world.cluster = resource['object'] - world.clusters.append(resource['resource']) -#@step(r'I create a cluster from a dataset list$') def i_create_a_cluster_from_dataset_list(step): - resource = world.api.create_cluster(world.dataset_ids) + """Step: I create a cluster from a dataset list""" + resource = world.api.create_cluster(step.bigml["dataset_ids"]) world.status = resource['code'] eq_(world.status, HTTP_CREATED) world.location = resource['location'] @@ -55,8 +51,8 @@ def i_create_a_cluster_from_dataset_list(step): world.clusters.append(resource['resource']) -#@step(r'I create a cluster with options "(.*)"$') def i_create_a_cluster_with_options(step, options): + """Step: I create a cluster with options """ dataset = world.dataset.get('resource') options = json.loads(options) options.update({'seed': 'BigML', @@ -70,26 +66,29 @@ def i_create_a_cluster_with_options(step, options): world.cluster = resource['object'] world.clusters.append(resource['resource']) -#@step(r'I wait until the cluster status code is either (\d) or (-\d) less than (\d+)') + def wait_until_cluster_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - i_get_the_cluster(step, world.cluster['resource']) - status = get_status(world.cluster) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - i_get_the_cluster(step, world.cluster['resource']) - status = get_status(world.cluster) - eq_(status['code'], int(code1)) - -#@step(r'I wait until the cluster is ready less than (\d+)') -def the_cluster_is_finished_in_less_than(step, secs): - wait_until_cluster_status_code_is(step, FINISHED, FAULTY, secs) - -#@step(r'I make the cluster shared') + """Step: I wait until the cluster status code is either or + less than """ + world.cluster = wait_until_status_code_is( + code1, code2, secs, world.cluster) + + +def the_cluster_is_finished_in_less_than(step, secs, shared=None): + """Step: I wait until the cluster is ready less than """ + if shared is None or world.shared.get("cluster", {}).get(shared) is None: + wait_until_cluster_status_code_is(step, FINISHED, FAULTY, secs) + if shared is not None: + if "cluster" not in world.shared: + world.shared["cluster"] = {} + world.shared["cluster"][shared] = world.cluster + else: + world.cluster = world.shared["cluster"][shared] + print("Reusing %s" % world.cluster["resource"]) + + def make_the_cluster_shared(step): + """Step: I make the cluster shared""" resource = world.api.update_cluster(world.cluster['resource'], {'shared': True}) world.status = resource['code'] @@ -97,27 +96,30 @@ def make_the_cluster_shared(step): world.location = resource['location'] world.cluster = resource['object'] -#@step(r'I get the cluster sharing info') + def get_sharing_info(step): + """Step: I get the cluster sharing info""" world.shared_hash = world.cluster['shared_hash'] world.sharing_key = world.cluster['sharing_key'] -#@step(r'I check the cluster status using the model\'s shared url') + def cluster_from_shared_url(step): + """Step: I check the cluster status using the model's shared url""" world.cluster = world.api.get_cluster("shared/cluster/%s" % world.shared_hash) eq_(get_status(world.cluster)['code'], FINISHED) -#@step(r'I check the cluster status using the model\'s shared key') -def cluster_from_shared_key(step): +def cluster_from_shared_key(step): + """Step: I check the cluster status using the model's shared key""" username = os.environ.get("BIGML_USERNAME") world.cluster = world.api.get_cluster(world.cluster['resource'], shared_username=username, shared_api_key=world.sharing_key) eq_(get_status(world.cluster)['code'], FINISHED) -#@step(r'the data point in the cluster closest to "(.*)" is "(.*)"') + def closest_in_cluster(step, reference, closest): - local_cluster = world.local_cluster + """Step: the data point in the cluster closest to is """ + local_cluster = step.bigml["local_cluster"] reference = json.loads(reference) closest = json.loads(closest) result = local_cluster.closest_in_cluster( \ @@ -125,25 +127,46 @@ def closest_in_cluster(step, reference, closest): result = json.loads(json.dumps(result)) eq_(closest, result) -#@step(r'the centroid in the cluster closest to "(.*)" is "(.*)"') + def closest_centroid_in_cluster(step, reference, closest_id): - local_cluster = world.local_cluster + """Step: the centroid in the cluster closest to is + + """ + local_cluster = step.bigml["local_cluster"] reference = json.loads(reference) result = local_cluster.sorted_centroids( \ reference) result = result["centroids"][0]["centroid_id"] eq_(closest_id, result) -#@step(r'I export the cluster$') def i_export_cluster(step, filename): + """Step: I export the cluster""" world.api.export(world.cluster.get('resource'), filename=res_filename(filename)) -#@step(r'I create a local cluster from file "(.*)"') + def i_create_local_cluster_from_file(step, export_file): - world.local_cluster = Cluster(res_filename(export_file)) + """Step: I create a local cluster from file """ + step.bigml["local_cluster"] = Cluster(res_filename(export_file)) -#@step(r'the cluster ID and the local cluster ID match') def check_cluster_id_local_id(step): - eq_(world.local_cluster.resource_id, world.cluster["resource"]) + """Step: the cluster ID and the local cluster ID match""" + eq_(step.bigml["local_cluster"].resource_id, world.cluster["resource"]) + + +def clone_cluster(step, cluster): + """Step: I clone cluster""" + resource = world.api.clone_cluster(cluster, + {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.cluster = resource['object'] + # save reference + world.clusters.append(resource['resource']) + + +def the_cloned_cluster_is(step, cluster): + """Checking the cluster is a clone""" + eq_(world.cluster["origin"], cluster) diff --git a/bigml/tests/create_configuration_steps.py b/bigml/tests/create_configuration_steps.py index 37ddfa90..5116986d 100644 --- a/bigml/tests/create_configuration_steps.py +++ b/bigml/tests/create_configuration_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2017-2019 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,23 +15,15 @@ # License for the specific language governing permissions and limitations # under the License. -import time -import json -import os -from datetime import datetime, timedelta -from world import world -from nose.tools import eq_ +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY -from read_configuration_steps import i_get_the_configuration +from .world import world, eq_ +from .read_resource_steps import wait_until_status_code_is -from bigml.api import HTTP_CREATED -from bigml.api import HTTP_ACCEPTED -from bigml.api import FINISHED -from bigml.api import FAULTY -from bigml.api import get_status -#@step(r'I create a configuration$') def i_create_configuration(step, configurations): + """Step: I create a configuration""" resource = world.api.create_configuration( configurations, {"name": "configuration"}) world.status = resource['code'] @@ -41,41 +33,34 @@ def i_create_configuration(step, configurations): world.configurations.append(resource['resource']) -#@step(r'I update a configuration$') def i_update_configuration(step, changes): + """Step: I update a configuration""" resource = world.api.update_configuration( world.configuration["resource"], changes) - print resource world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) world.location = resource['location'] world.configuration = resource['object'] -#@step(r'I wait until the configuration status code is either (\d) or (-\d) less than (\d+)') def wait_until_configuration_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - i_get_the_configuration(step, world.configuration['resource']) - status = get_status(world.configuration) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - i_get_the_configuration(step, world.configuration['resource']) - status = get_status(world.configuration) - eq_(status['code'], int(code1)) + """Step: I wait until the configuration status code is either or + less than + """ + world.configuration = wait_until_status_code_is( + code1, code2, secs, world.configuration) + -#@step(r'I wait until the configuration is ready less than (\d+)') def the_configuration_is_finished_in_less_than(step, secs): + """Step: I wait until the configuration is ready less than """ wait_until_configuration_status_code_is(step, FINISHED, FAULTY, secs) -#@step(r'the configuration name is "(.*)"$') def i_check_configuration_name(step, name): + """Step: the configuration name is """ eq_(world.configuration["name"], name["name"]) -#@step(r'the configuration contents are "(.*)"$') def i_check_configuration_conf(step, confs): + """Step: the configuration contents are """ eq_(world.configuration["configurations"], confs) diff --git a/bigml/tests/create_correlation_steps.py b/bigml/tests/create_correlation_steps.py index 7e776dd5..c5421c6b 100644 --- a/bigml/tests/create_correlation_steps.py +++ b/bigml/tests/create_correlation_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,29 +15,19 @@ # License for the specific language governing permissions and limitations # under the License. -import time -import json -import os -from datetime import datetime, timedelta -from world import world -from nose.tools import eq_, assert_less +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY -from bigml.api import HTTP_CREATED -from bigml.api import HTTP_ACCEPTED -from bigml.api import FINISHED -from bigml.api import FAULTY -from bigml.api import get_status +from .read_resource_steps import wait_until_status_code_is +from .world import world, eq_ -from read_correlation_steps import i_get_the_correlation - - -#@step(r'the correlation name is "(.*)"') def i_check_correlation_name(step, name): + """Step: the correlation name is """ correlation_name = world.correlation['name'] eq_(name, correlation_name) -#@step(r'I create a correlation from a dataset$') def i_create_a_correlation_from_dataset(step): + """Step: I create a correlation from a dataset""" dataset = world.dataset.get('resource') resource = world.api.create_correlation(dataset, {'name': 'new correlation'}) world.status = resource['code'] @@ -47,8 +37,8 @@ def i_create_a_correlation_from_dataset(step): world.correlations.append(resource['resource']) -#@step(r'I update the correlation name to "(.*)"$') def i_update_correlation_name(step, name): + """Step: I update the correlation name to """ resource = world.api.update_correlation(world.correlation['resource'], {'name': name}) world.status = resource['code'] @@ -57,22 +47,14 @@ def i_update_correlation_name(step, name): world.correlation = resource['object'] -#@step(r'I wait until the correlation status code is either (\d) or (-\d) less than (\d+)') def wait_until_correlation_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - correlation_id = world.correlation['resource'] - i_get_the_correlation(step, correlation_id) - status = get_status(world.correlation) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - i_get_the_correlation(step, correlation_id) - status = get_status(world.correlation) - eq_(status['code'], int(code1)) + """Step: I wait until the correlation status code is either + or less than + """ + world.correlation = wait_until_status_code_is( + code1, code2, secs, world.correlation) -#@step(r'I wait until the correlation is ready less than (\d+)') def the_correlation_is_finished_in_less_than(step, secs): + """Step: I wait until the correlation is ready less than """ wait_until_correlation_status_code_is(step, FINISHED, FAULTY, secs) diff --git a/bigml/tests/create_dataset_steps.py b/bigml/tests/create_dataset_steps.py index 83e91e7c..b341ba51 100644 --- a/bigml/tests/create_dataset_steps.py +++ b/bigml/tests/create_dataset_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2012-2019 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,45 +15,44 @@ # License for the specific language governing permissions and limitations # under the License. -import time import json -from datetime import datetime, timedelta -from world import world, res_filename -from nose.tools import eq_, assert_less -from bigml.api import HTTP_CREATED -from bigml.api import HTTP_OK -from bigml.api import HTTP_ACCEPTED -from bigml.api import FINISHED -from bigml.api import FAULTY + +from bigml.api import HTTP_CREATED, HTTP_OK, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY from bigml.api import get_status -import read_dataset_steps as read +from .read_resource_steps import wait_until_status_code_is +from .world import world, res_filename, eq_ -#@step(r'I create a dataset$') -def i_create_a_dataset(step): - resource = world.api.create_dataset(world.source['resource']) - world.status = resource['code'] - eq_(world.status, HTTP_CREATED) - world.location = resource['location'] - world.dataset = resource['object'] - world.datasets.append(resource['resource']) + +def i_create_a_dataset(step, shared=None): + """Step: I create a dataset""" + if shared is None or world.shared.get("dataset", {}).get(shared) is None: + resource = world.api.create_dataset(world.source['resource']) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.dataset = resource['object'] + world.datasets.append(resource['resource']) -#@step(r'I download the dataset file to "(.*)"$') def i_export_a_dataset(step, local_file): + """Step: I download the dataset file to """ world.api.download_dataset(world.dataset['resource'], filename=res_filename(local_file)) -#@step(r'file "(.*)" is like file "(.*)"$') def files_equal(step, local_file, data): - contents_local_file = open(res_filename(local_file)).read() - contents_data = open(res_filename(data)).read() + """Step: file is like file """ + with open(res_filename(local_file)) as handler: + contents_local_file = handler.read() + with open(res_filename(data)) as handler: + contents_data = handler.read() eq_(contents_local_file, contents_data) -#@step(r'I create a dataset with "(.*)"') def i_create_a_dataset_with(step, data="{}"): + """Step: I create a dataset with """ resource = world.api.create_dataset(world.source['resource'], json.loads(data)) world.status = resource['code'] @@ -63,26 +62,29 @@ def i_create_a_dataset_with(step, data="{}"): world.datasets.append(resource['resource']) -#@step(r'I wait until the dataset status code is either (\d) or (\d) less than (\d+)') def wait_until_dataset_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - read.i_get_the_dataset(step, world.dataset['resource']) - status = get_status(world.dataset) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - read.i_get_the_dataset(step, world.dataset['resource']) - status = get_status(world.dataset) - eq_(status['code'], int(code1)) - -#@step(r'I wait until the dataset is ready less than (\d+)') -def the_dataset_is_finished_in_less_than(step, secs): - wait_until_dataset_status_code_is(step, FINISHED, FAULTY, secs) - -#@step(r'I make the dataset public') + """Step: I wait until the dataset status code is either or + less than + """ + world.dataset = wait_until_status_code_is( + code1, code2, secs, world.dataset) + + +def the_dataset_is_finished_in_less_than(step, secs, shared=None): + """Step: I wait until the dataset is ready less than """ + if shared is None or world.shared.get("dataset", {}).get(shared) is None: + wait_until_dataset_status_code_is(step, FINISHED, FAULTY, secs) + if shared is not None: + if "dataset" not in world.shared: + world.shared["dataset"] = {} + world.shared["dataset"][shared]= world.dataset + else: + world.dataset = world.shared["dataset"][shared] + print("Reusing %s" % world.dataset["resource"]) + + def make_the_dataset_public(step): + """Step: I make the dataset public""" resource = world.api.update_dataset(world.dataset['resource'], {'private': False}) world.status = resource['code'] @@ -90,17 +92,19 @@ def make_the_dataset_public(step): world.location = resource['location'] world.dataset = resource['object'] -#@step(r'I get the dataset status using the dataset\'s public url') + def build_local_dataset_from_public_url(step): + """Step: I get the dataset status using the dataset's public url""" world.dataset = world.api.get_dataset("public/%s" % world.dataset['resource']) -#@step(r'the dataset\'s status is FINISHED') def dataset_status_finished(step): + """Step: the dataset's status is FINISHED""" eq_(get_status(world.dataset)['code'], FINISHED) -#@step(r'I create a dataset extracting a (.*) sample$') + def i_create_a_split_dataset(step, rate): + """Step: I create a dataset extracting a sample""" world.origin_dataset = world.dataset resource = world.api.create_dataset(world.dataset['resource'], {'sample_rate': float(rate)}) @@ -110,15 +114,15 @@ def i_create_a_split_dataset(step, rate): world.dataset = resource['object'] world.datasets.append(resource['resource']) -#@step(r'I create a multidataset with ranges (.*)$') + def i_create_a_multidataset(step, ranges): + """Step: I create a multidataset with ranges """ ranges = json.loads(ranges) datasets = world.datasets[-len(ranges):] world.origin_dataset = world.dataset resource = world.api.create_dataset( \ datasets, - {'sample_rates': dict([(dataset, d_range) for dataset, d_range in - zip(datasets, ranges)])}) + {'sample_rates': dict(list(zip(datasets, ranges)))}) world.status = resource['code'] eq_(world.status, HTTP_CREATED) world.location = resource['location'] @@ -126,8 +130,10 @@ def i_create_a_multidataset(step, ranges): world.datasets.append(resource['resource']) -#@step(r'I create a multi-dataset with same datasets and the first sample rate (.*)$') def i_create_a_multidataset_mixed_format(step, ranges): + """Step: I create a multi-dataset with same datasets and the first sample + rate + """ ranges = json.loads(ranges) dataset = world.dataset['resource'] origins = [] @@ -147,18 +153,20 @@ def i_create_a_multidataset_mixed_format(step, ranges): world.datasets.append(resource['resource']) -#@step(r'I compare the datasets\' instances$') def i_compare_datasets_instances(step): + """Step: I compare the datasets' instances""" world.datasets_instances = (world.dataset['rows'], world.origin_dataset['rows']) -#@step(r'the proportion of instances between datasets is (.*)$') + def proportion_datasets_instances(step, rate): + """Step: the proportion of instances between datasets is """ eq_(int(world.datasets_instances[1] * float(rate)), world.datasets_instances[0]) -#@step(r'I create a dataset associated to centroid "(.*)"') + def i_create_a_dataset_from_cluster(step, centroid_id): + """Step: I create a dataset associated to centroid """ resource = world.api.create_dataset( world.cluster['resource'], args={'centroid': centroid_id}) @@ -168,25 +176,62 @@ def i_create_a_dataset_from_cluster(step, centroid_id): world.dataset = resource['object'] world.datasets.append(resource['resource']) -#@step(r'I create a dataset from the cluster and the centroid$') + def i_create_a_dataset_from_cluster_centroid(step): + """Step: I create a dataset from the cluster and the centroid""" i_create_a_dataset_from_cluster(step, world.centroid['centroid_id']) -#@step(r'the dataset is associated to the centroid "(.*)" of the cluster') + def is_associated_to_centroid_id(step, centroid_id): + """Step: the dataset is associated to the centroid + of the cluster + """ cluster = world.api.get_cluster(world.cluster['resource']) world.status = cluster['code'] eq_(world.status, HTTP_OK) eq_("dataset/%s" % (cluster['object']['cluster_datasets'][centroid_id]), world.dataset['resource']) -#@step(r'I check that the dataset is created for the cluster and the centroid$') + def i_check_dataset_from_cluster_centroid(step): + """Step: I check that the dataset is created for the cluster and the + centroid + """ is_associated_to_centroid_id(step, world.centroid['centroid_id']) -#@step(r'I update the dataset with params "(.*)"') + def i_update_dataset_with(step, data="{}"): + """Step: I update the dataset with params """ resource = world.api.update_dataset(world.dataset.get('resource'), json.loads(data)) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) + + +def clone_dataset(step, dataset): + """Step: I clone dataset""" + resource = world.api.clone_dataset(dataset, {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.dataset = resource['object'] + # save reference + world.datasets.append(resource['resource']) + + +def the_cloned_dataset_is(step, dataset): + """Checking the dataset is a clone""" + eq_(world.dataset["origin"], dataset) + + +def check_annotations(step, annotations_field, annotations_num): + """Checking the dataset contains a number of annotations""" + annotations_num = int(annotations_num) + field = world.dataset["fields"][annotations_field] + if field["optype"] == "regions": + count = field["summary"]["regions"]["sum"] + else: + count = 0 + for _, num in field["summary"]["categories"]: + count += num + eq_(count, annotations_num) diff --git a/bigml/tests/create_ensemble_steps.py b/bigml/tests/create_ensemble_steps.py index 4c6cc1b4..7113dfde 100644 --- a/bigml/tests/create_ensemble_steps.py +++ b/bigml/tests/create_ensemble_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member,broad-except # -# Copyright 2012-2019 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,109 +15,151 @@ # License for the specific language governing permissions and limitations # under the License. -import time import json import os -from datetime import datetime, timedelta -from world import world, res_filename -from nose.tools import eq_, assert_less from bigml.api import HTTP_CREATED -from bigml.api import HTTP_ACCEPTED -from bigml.api import FINISHED -from bigml.api import FAULTY -from bigml.api import get_status +from bigml.api import FINISHED, FAULTY from bigml.ensemble import Ensemble from bigml.ensemblepredictor import EnsemblePredictor from bigml.model import Model from bigml.supervised import SupervisedModel +from bigml.local_model import LocalModel -from read_ensemble_steps import i_get_the_ensemble +from .read_resource_steps import wait_until_status_code_is +from .world import world, res_filename, eq_ NO_MISSING_SPLITS = {'missing_splits': False} ENSEMBLE_SAMPLE = {'seed': 'BigML', 'ensemble_sample': {"rate": 0.7, "seed": 'BigML'}} -#@step(r'I create an ensemble of (\d+) models and (\d+) tlp$') -def i_create_an_ensemble(step, number_of_models=2, tlp=1): - dataset = world.dataset.get('resource') - try: - number_of_models = int(number_of_models) - # tlp is no longer used - args = {'number_of_models': number_of_models} - except: - args = {} - args.update(NO_MISSING_SPLITS) - args.update(ENSEMBLE_SAMPLE) - resource = world.api.create_ensemble(dataset, args=args) - world.status = resource['code'] - eq_(world.status, HTTP_CREATED) - world.location = resource['location'] - world.ensemble = resource['object'] - world.ensemble_id = resource['resource'] - world.ensembles.append(resource['resource']) -#@step(r'I wait until the ensemble status code is either (\d) or (-\d) -# less than (\d+)') +def i_create_an_ensemble(step, number_of_models=2, shared=None): + """Step: I create an ensemble of models""" + if shared is None or world.shared.get("ensemble", {}).get(shared) is None: + dataset = world.dataset.get('resource') + try: + number_of_models = int(number_of_models) + # tlp is no longer used + args = {'number_of_models': number_of_models} + except Exception: + args = {} + args.update(NO_MISSING_SPLITS) + args.update(ENSEMBLE_SAMPLE) + resource = world.api.create_ensemble(dataset, args=args) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.ensemble = resource['object'] + world.ensemble_id = resource['resource'] + world.ensembles.append(resource['resource']) + + def wait_until_ensemble_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - i_get_the_ensemble(step, world.ensemble['resource']) - status = get_status(world.ensemble) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - i_get_the_ensemble(step, world.ensemble['resource']) - status = get_status(world.ensemble) - eq_(status['code'], int(code1)) - -#@step(r'I wait until the ensemble is ready less than (\d+)') -def the_ensemble_is_finished_in_less_than(step, secs): - wait_until_ensemble_status_code_is(step, FINISHED, FAULTY, secs) - - -#@step(r'I create a local Ensemble$') -def create_local_ensemble(step): - world.local_ensemble = Ensemble(world.ensemble_id, world.api) - world.local_model = Model(world.local_ensemble.model_ids[0], world.api) - -#@step(r'I create a local Ensemble$') + """Step: I wait until the ensemble status code is either or + less than + """ + world.ensemble = wait_until_status_code_is( + code1, code2, secs, world.ensemble) + + +def the_ensemble_is_finished_in_less_than(step, secs, shared=None): + """Step: I wait until the ensemble is ready less than """ + if shared is None or world.shared.get("ensemble", {}).get(shared) is None: + wait_until_ensemble_status_code_is(step, FINISHED, FAULTY, secs) + if shared is not None: + if "ensemble" not in world.shared: + world.shared["ensemble"] = {} + world.shared["ensemble"][shared] = world.ensemble + else: + world.ensemble = world.shared["ensemble"][shared] + world.ensemble_id = world.ensemble["resource"] + print("Reusing %s" % world.ensemble["resource"]) + + +def create_local_ensemble(step, path=None): + """Step: I create a local Ensemble""" + if path is None: + step.bigml["local_ensemble"] = Ensemble(world.ensemble_id, world.api) + step.bigml["local_model"] = Model( + step.bigml["local_ensemble"].model_ids[0], world.api) + else: + step.bigml["local_ensemble"] = Ensemble(res_filename(path)) + step.bigml["local_model"] = step.bigml[ + "local_ensemble"].multi_model.models[0] + + def create_local_supervised_ensemble(step): - world.local_ensemble = SupervisedModel(world.ensemble_id, world.api) - world.local_model = Model(world.local_ensemble.model_ids[0], world.api) + """Step: I create a local Ensemble""" + step.bigml["local_ensemble"] = SupervisedModel(world.ensemble_id, world.api) + step.bigml["local_model"] = Model(step.bigml[ + "local_ensemble"].model_ids[0], world.api) -#@step(r'I create a local EnsemblePredictor from (.*?)$') +def create_local_bigml_ensemble(step): + """Step: I create a local Ensemble""" + step.bigml["local_ensemble"] = LocalModel(world.ensemble_id, world.api) + step.bigml["local_model"] = Model(step.bigml[ + "local_ensemble"].model_ids[0], world.api) + def create_local_ensemble_predictor(step, directory): - module_dir = directory - directory = res_filename(directory) - with open(os.path.join(directory, "ensemble.json")) as file_handler: + """Step: I create a local EnsemblePredictor from """ + directory_path = res_filename(directory) + with open(os.path.join(directory_path, "ensemble.json")) as file_handler: + ensemble = json.load(file_handler) + step.bigml["local_ensemble"] = EnsemblePredictor(ensemble, directory) + + +def load_full_ensemble(step, directory): + """Step: Given I load the full ensemble information from """ + model_list = [] + directory_path = res_filename(directory) + with open(os.path.join(directory_path, "ensemble.json")) as file_handler: ensemble = json.load(file_handler) - world.local_ensemble = EnsemblePredictor(ensemble, module_dir) + model_list.append(ensemble) + for model_id in ensemble["object"]["models"]: + with open(os.path.join(directory_path, model_id.replace("/", "_"))) \ + as file_handler: + model = json.load(file_handler) + model_list.append(model) + return model_list + -#@step(r'I create a local Ensemble with the last (\d+) models$') def create_local_ensemble_with_list(step, number_of_models): - world.local_ensemble = Ensemble(world.models[-int(number_of_models):], + """Step: I create a local Ensemble with the last + models + """ + step.bigml["local_ensemble"] = Ensemble(world.models[-int(number_of_models):], world.api) -#@step(r'I create a local Ensemble with the last (\d+) local models$') + +def create_local_ensemble_from_list(step, model_list): + """Step: I create a local ensemble from the ensemble + models list + """ + step.bigml["local_ensemble"] = Ensemble(model_list) + + def create_local_ensemble_with_list_of_local_models(step, number_of_models): + """Step: I create a local Ensemble with the last + local models""" local_models = [Model(model) for model in world.models[-int(number_of_models):]] - world.local_ensemble = Ensemble(local_models, world.api) + step.bigml["local_ensemble"] = Ensemble(local_models, world.api) + -#@step(r'the field importance text is (.*?)$') def field_importance_print(step, field_importance): - field_importance_data = world.local_ensemble.field_importance_data()[0] + """Step: the field importance text is """ + field_importance_data = step.bigml["local_ensemble"].field_importance_data()[0] eq_(field_importance_data, json.loads(field_importance)) -#@step(r'I create an ensemble with "(.*)"$') + def i_create_an_ensemble_with_params(step, params): + """Step: I create an ensemble with """ dataset = world.dataset.get('resource') try: args = json.loads(params) - except: + except Exception: args = {} args.update(ENSEMBLE_SAMPLE) resource = world.api.create_ensemble(dataset, args=args) @@ -129,16 +171,34 @@ def i_create_an_ensemble_with_params(step, params): world.ensembles.append(resource['resource']) -#@step(r'I export the ensemble$') def i_export_ensemble(step, filename): + """Step: I export the ensemble""" world.api.export(world.ensemble.get('resource'), filename=res_filename(filename)) -#@step(r'I create a local ensemble from file "(.*)"') + def i_create_local_ensemble_from_file(step, export_file): - world.local_ensemble = Ensemble(res_filename(export_file)) + """Step: I create a local ensemble from file """ + step.bigml["local_ensemble"] = Ensemble(res_filename(export_file)) -#@step(r'the ensemble ID and the local ensemble ID match') def check_ensemble_id_local_id(step): - eq_(world.local_ensemble.resource_id, world.ensemble["resource"]) + """Step: the ensemble ID and the local ensemble ID match""" + eq_(step.bigml["local_ensemble"].resource_id, world.ensemble["resource"]) + + +def clone_ensemble(step, ensemble): + """Step: I clone ensemble""" + resource = world.api.clone_ensemble(ensemble, + {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.ensemble = resource['object'] + # save reference + world.ensembles.append(resource['resource']) + + +def the_cloned_ensemble_is(step, ensemble): + """Checking the ensemble is a clone""" + eq_(world.ensemble["origin"], ensemble) diff --git a/bigml/tests/create_evaluation_steps.py b/bigml/tests/create_evaluation_steps.py index 23c22873..c7412a38 100644 --- a/bigml/tests/create_evaluation_steps.py +++ b/bigml/tests/create_evaluation_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2012, 2015-2019 BigML +# Copyright 2012, 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,21 +15,17 @@ # License for the specific language governing permissions and limitations # under the License. -import time import json -from datetime import datetime, timedelta -from world import world -from nose.tools import eq_, assert_less, assert_greater from bigml.api import HTTP_CREATED -from bigml.api import FINISHED -from bigml.api import FAULTY -from bigml.api import get_status +from bigml.api import FINISHED, FAULTY +from bigml.evaluation import Evaluation -from read_evaluation_steps import i_get_the_evaluation +from .read_resource_steps import wait_until_status_code_is +from .world import world, eq_, ok_, res_filename, approx_ -#@step(r'I create an evaluation for the model with the dataset$') -def i_create_an_evaluation(step): +def i_create_an_evaluation(step, shared=None): + """Step: I create an evaluation for the model with the dataset""" dataset = world.dataset.get('resource') model = world.model.get('resource') resource = world.api.create_evaluation(model, dataset) @@ -40,8 +36,8 @@ def i_create_an_evaluation(step): world.evaluations.append(resource['resource']) -#@step(r'I create an evaluation for the ensemble with the dataset$') def i_create_an_evaluation_ensemble(step, params=None): + """Step: I create an evaluation for the ensemble with the dataset""" if params is None: params = {} dataset = world.dataset.get('resource') @@ -53,8 +49,11 @@ def i_create_an_evaluation_ensemble(step, params=None): world.evaluation = resource['object'] world.evaluations.append(resource['resource']) -#@step(r'I create an evaluation for the logistic regression with the dataset$') + def i_create_an_evaluation_logistic(step): + """Step: I create an evaluation for the logistic regression with + the dataset + """ dataset = world.dataset.get('resource') logistic = world.logistic_regression.get('resource') resource = world.api.create_evaluation(logistic, dataset) @@ -64,8 +63,9 @@ def i_create_an_evaluation_logistic(step): world.evaluation = resource['object'] world.evaluations.append(resource['resource']) -#@step(r'I create an evaluation for the deepnet with the dataset$') + def i_create_an_evaluation_deepnet(step): + """Step: I create an evaluation for the deepnet with the dataset""" dataset = world.dataset.get('resource') deepnet = world.deepnet.get('resource') resource = world.api.create_evaluation(deepnet, dataset) @@ -76,8 +76,8 @@ def i_create_an_evaluation_deepnet(step): world.evaluations.append(resource['resource']) -#@step(r'I create an evaluation for the fusion with the dataset$') def i_create_an_evaluation_fusion(step): + """Step: I create an evaluation for the fusion with the dataset""" dataset = world.dataset.get('resource') fusion = world.fusion.get('resource') resource = world.api.create_evaluation(fusion, dataset) @@ -87,31 +87,39 @@ def i_create_an_evaluation_fusion(step): world.evaluation = resource['object'] world.evaluations.append(resource['resource']) -#@step(r'I wait until the evaluation status code is either (\d) or (-\d) less than (\d+)') + def wait_until_evaluation_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - i_get_the_evaluation(step, world.evaluation['resource']) - status = get_status(world.evaluation) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - i_get_the_evaluation(step, world.evaluation['resource']) - status = get_status(world.evaluation) - eq_(status['code'], int(code1)) - -#@step(r'I wait until the evaluation is ready less than (\d+)') + """Step: I wait until the evaluation status code is either or + less than """ + world.evaluation = wait_until_status_code_is( + code1, code2, secs, world.evaluation) + + def the_evaluation_is_finished_in_less_than(step, secs): + """Step: I wait until the evaluation is ready less than """ wait_until_evaluation_status_code_is(step, FINISHED, FAULTY, secs) -#@step(r'the measured "(.*)" is (\d+\.*\d*)') + def the_measured_measure_is_value(step, measure, value): - ev = world.evaluation['result']['model'][measure] + 0.0 - eq_(ev, float(value), "The %s is: %s and %s is expected" % ( - measure, ev, float(value))) + """Step: the measured is """ + ev_ = world.evaluation['result']['model'][measure] + 0.0 + eq_(ev_, float(value), "The %s is: %s and %s is expected" % ( + measure, ev_, float(value))) + -#@step(r'the measured "(.*)" is greater than (\d+\.*\d*)') def the_measured_measure_is_greater_value(step, measure, value): - assert_greater(world.evaluation['result']['model'][measure] + 0.0, - float(value)) + """Step: the measured is greater than """ + ok_(float(world.evaluation['result']['model'][measure]) > float(value)) + +def i_create_a_local_evaluation(step, filename): + """Step: I create an Evaluation from the JSON file""" + filename = res_filename(filename) + with open(filename) as handler: + evaluation = json.load(handler) + local_evaluation = Evaluation(evaluation) + step.bigml["local_evaluation"] = local_evaluation + +def the_local_metric_is_value(step, metric, value): + """Step: The metric in the local evaluation is """ + approx_(getattr(step.bigml["local_evaluation"], metric), value, + precision=4) diff --git a/bigml/tests/create_execution_steps.py b/bigml/tests/create_execution_steps.py index b6b94cfb..6d4d69a6 100644 --- a/bigml/tests/create_execution_steps.py +++ b/bigml/tests/create_execution_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,47 +15,44 @@ # License for the specific language governing permissions and limitations # under the License. -import time -import json -import os -from datetime import datetime, timedelta -from world import world -from nose.tools import eq_, assert_less +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY +from bigml.execution import Execution -from bigml.api import HTTP_CREATED -from bigml.api import HTTP_ACCEPTED -from bigml.api import FINISHED -from bigml.api import FAULTY -from bigml.api import get_status +from .read_resource_steps import wait_until_status_code_is +from .world import world, eq_ -from read_execution_steps import i_get_the_execution - -#@step(r'the script id is correct, the value of "(.*)" is "(.*)" and the result is "(.*)"') def the_execution_and_attributes(step, param, param_value, result): + """Step: the script id is correct, the value of is + and the result is + """ eq_(world.script['resource'], world.execution['script']) - print world.execution['execution']['results'] eq_(world.execution['execution']['results'][0], result) res_param_value = world.execution[param] eq_(res_param_value, param_value, ("The execution %s is %s and the expected %s is %s" % (param, param_value, param, param_value))) -#@step(r'the script ids are correct, the value of "(.*)" is "(.*)" and the result is "(.*)"') + def the_execution_ids_and_attributes(step, number_of_scripts, param, param_value, result): + """Step: the script ids are correct, the value of is + and the result is + """ scripts = world.scripts[-number_of_scripts:] eq_(scripts, world.execution['scripts']) - print world.execution['execution']['results'] eq_(world.execution['execution']['results'], result) res_param_value = world.execution[param] eq_(res_param_value, param_value, ("The execution %s is %s and the expected %s is %s" % (param, param_value, param, param_value))) -#@step(r'I create a whizzml execution from an existing script"$') + def i_create_an_execution(step): - resource = world.api.create_execution(world.script['resource']) + """Step: I create a whizzml execution from an existing script""" + resource = world.api.create_execution(world.script['resource'], + {"project": world.project_id}) world.status = resource['code'] eq_(world.status, HTTP_CREATED) world.location = resource['location'] @@ -63,10 +60,11 @@ def i_create_an_execution(step): world.executions.append(resource['resource']) -#@step(r'I create a whizzml execution from the last two scripts$') def i_create_an_execution_from_list(step, number_of_scripts=2): + """Step: I create a whizzml execution from the last two scripts""" scripts = world.scripts[-number_of_scripts:] - resource = world.api.create_execution(scripts) + resource = world.api.create_execution(scripts, + {"project": world.project_id}) world.status = resource['code'] eq_(world.status, HTTP_CREATED) world.location = resource['location'] @@ -74,8 +72,8 @@ def i_create_an_execution_from_list(step, number_of_scripts=2): world.executions.append(resource['resource']) -#@step(r'I update the execution with "(.*)", "(.*)"$') def i_update_an_execution(step, param, param_value): + """Step: I update the execution with , """ resource = world.api.update_execution(world.execution['resource'], {param: param_value}) world.status = resource['code'] @@ -84,22 +82,23 @@ def i_update_an_execution(step, param, param_value): world.execution = resource['object'] -#@step(r'I wait until the execution status code is either (\d) or (-\d) less than (\d+)') def wait_until_execution_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - execution_id = world.execution['resource'] - i_get_the_execution(step, execution_id) - status = get_status(world.execution) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - i_get_the_execution(step, execution_id) - status = get_status(world.execution) - eq_(status['code'], int(code1)) - - -#@step(r'I wait until the script is ready less than (\d+)') + """Step: I wait until the execution status code is either or + less than """ + world.execution = wait_until_status_code_is( + code1, code2, secs, world.execution) + + def the_execution_is_finished(step, secs): + """Steps: I wait until the script is ready less than """ wait_until_execution_status_code_is(step, FINISHED, FAULTY, secs) + + +def create_local_execution(step): + """Step: I create a local execution""" + step.bigml["local_execution"] = Execution(world.execution) + + +def the_local_execution_result_is(step, result): + """Step: And the local execution result is """ + eq_(step.bigml["local_execution"].result, result) diff --git a/bigml/tests/create_external_steps.py b/bigml/tests/create_external_steps.py new file mode 100644 index 00000000..08bb6f22 --- /dev/null +++ b/bigml/tests/create_external_steps.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2020-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import json + +from bigml.api import HTTP_ACCEPTED +from bigml.api import FINISHED +from bigml.api import FAULTY + +from .read_resource_steps import wait_until_status_code_is +from .world import world, eq_, ok_ + + +def i_create_external_connector(step): + """Step: I create an external connector""" + resource = world.api.create_external_connector(None, \ + {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.external_connector = resource['object'] + # save reference + world.external_connectors.append(resource['resource']) + + +def wait_until_external_connector_status_code_is(step, code1, code2, secs): + """Step: I wait until the external connector status code is either + or less than + """ + world.external_connector = wait_until_status_code_is( + code1, code2, secs, world.external_connector) + + +def the_external_connector_is_finished(step, secs): + """Step: I wait until the external_connector is ready less than """ + wait_until_external_connector_status_code_is(step, FINISHED, FAULTY, secs) + + +def i_update_external_connector_with(step, data="{}"): + """Step: I update the external_connector with params """ + resource = world.api.update_external_connector( \ + world.external_connector.get('resource'), json.loads(data)) + world.status = resource['code'] + eq_(world.status, HTTP_ACCEPTED) + + +def external_connector_has_args(step, args="{}"): + """Step: the external connector exists and has args """ + args = json.loads(args) + for key, value in list(args.items()): + if key in world.external_connector: + eq_(world.external_connector[key], value, + "Expected key %s: %s. Found %s" % (key, value, world.external_connector[key])) + else: + ok_(False, "No key %s in external connector." % key) diff --git a/bigml/tests/create_forecast_steps.py b/bigml/tests/create_forecast_steps.py index ae18aff2..15a922b8 100644 --- a/bigml/tests/create_forecast_steps.py +++ b/bigml/tests/create_forecast_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2017-2019 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -16,17 +16,14 @@ # under the License. import json -import time -from nose.tools import assert_almost_equals, eq_ -from datetime import datetime, timedelta -from world import world + from bigml.api import HTTP_CREATED -from bigml.api import FINISHED, FAULTY -from bigml.api import get_status -from read_forecast_steps import i_get_the_forecast +from .world import world, eq_ + def i_create_a_forecast(step, data=None): + """Creating forecast """ if data is None: data = "{}" time_series = world.time_series['resource'] @@ -40,12 +37,13 @@ def i_create_a_forecast(step, data=None): def the_forecast_is(step, predictions): + """Checking forecast""" predictions = json.loads(predictions) attrs = ["point_forecast", "model"] for field_id in predictions: forecast = world.forecast['forecast']['result'][field_id] prediction = predictions[field_id] eq_(len(forecast), len(prediction), "forecast: %s" % forecast) - for index in range(len(forecast)): + for index, item in enumerate(forecast): for attr in attrs: - eq_(forecast[index][attr], prediction[index][attr]) + eq_(item[attr], prediction[index][attr]) diff --git a/bigml/tests/create_lda_steps.py b/bigml/tests/create_lda_steps.py index 4904e030..cd06ac96 100644 --- a/bigml/tests/create_lda_steps.py +++ b/bigml/tests/create_lda_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2012-2019 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,14 +15,8 @@ # License for the specific language governing permissions and limitations # under the License. -import time import json import os -from datetime import datetime, timedelta -from world import world, res_filename -from nose.tools import eq_, assert_less - -from read_lda_steps import i_get_the_topic_model from bigml.api import HTTP_CREATED from bigml.api import HTTP_ACCEPTED @@ -31,8 +25,12 @@ from bigml.api import get_status from bigml.topicmodel import TopicModel -#@step(r'I create a Topic Model') +from .world import world, res_filename, eq_ +from .read_resource_steps import wait_until_status_code_is + + def i_create_a_topic_model(step): + """Step: I create a Topic Model""" dataset = world.dataset.get('resource') resource = world.api.create_topic_model( dataset, {'seed': 'BigML', 'topicmodel_seed': 'BigML'}) @@ -42,9 +40,10 @@ def i_create_a_topic_model(step): world.topic_model = resource['object'] world.topic_models.append(resource['resource']) -#@step(r'I create a topic model from a dataset list$') + def i_create_a_topic_model_from_dataset_list(step): - resource = world.api.create_topic_model(world.dataset_ids) + """Step: I create a topic model from a dataset list""" + resource = world.api.create_topic_model(step.bigml["dataset_ids"]) world.status = resource['code'] eq_(world.status, HTTP_CREATED) world.location = resource['location'] @@ -52,8 +51,8 @@ def i_create_a_topic_model_from_dataset_list(step): world.topic_models.append(resource['resource']) -#@step(r'I create a topic model with options "(.*)"$') def i_create_a_topic_model_with_options(step, options): + """Step: I create a topic model with options """ dataset = world.dataset.get('resource') options = json.loads(options) options.update({'seed': 'BigML', @@ -67,8 +66,8 @@ def i_create_a_topic_model_with_options(step, options): world.topic_models.append(resource['resource']) -#@step(r'I update the topic model name to "(.*)"$') def i_update_topic_model_name(step, name): + """Step: I update the topic model name to """ resource = world.api.update_topic_model(world.topic_model['resource'], {'name': name}) world.status = resource['code'] @@ -77,26 +76,21 @@ def i_update_topic_model_name(step, name): world.topic_model = resource['object'] -#@step(r'I wait until the topic model status code is either (\d) or (-\d) less than (\d+)') def wait_until_topic_model_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - i_get_the_topic_model(step, world.topic_model['resource']) - status = get_status(world.topic_model) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - i_get_the_topic_model(step, world.topic_model['resource']) - status = get_status(world.topic_model) - eq_(status['code'], int(code1)) - -#@step(r'I wait until the topic model is ready less than (\d+)') + """Step: I wait until the topic model status code is either + or less than + """ + world.topic_model = wait_until_status_code_is( + code1, code2, secs, world.topic_model) + + def the_topic_model_is_finished_in_less_than(step, secs): + """Steps: I wait until the topic model is ready less than """ wait_until_topic_model_status_code_is(step, FINISHED, FAULTY, secs) -#@step(r'I make the topic model shared') + def make_the_topic_model_shared(step): + """Step: I make the topic model shared """ resource = world.api.update_topic_model(world.topic_model['resource'], {'shared': True}) world.status = resource['code'] @@ -104,20 +98,26 @@ def make_the_topic_model_shared(step): world.location = resource['location'] world.topic_model = resource['object'] -#@step(r'I get the topic_model sharing info') + def get_sharing_info(step): + """Step: I get the topic_model sharing info""" world.shared_hash = world.topic_model['shared_hash'] world.sharing_key = world.topic_model['sharing_key'] -#@step(r'I check the topic model status using the topic model\'s shared url') + def topic_model_from_shared_url(step): + """Step: I check the topic model status using the topic model\'s + shared url + """ world.topic_model = world.api.get_topic_model("shared/topicmodel/%s" % world.shared_hash) eq_(get_status(world.topic_model)['code'], FINISHED) -#@step(r'I check the topic model status using the topic model\'s shared key') -def topic_model_from_shared_key(step): +def topic_model_from_shared_key(step): + """Step: I check the topic model status using the topic model\'s + shared key + """ username = os.environ.get("BIGML_USERNAME") world.topic_model = world.api.get_topic_model( \ world.topic_model['resource'], @@ -125,12 +125,14 @@ def topic_model_from_shared_key(step): eq_(get_status(world.topic_model)['code'], FINISHED) -#@step(r'the topic model name is "(.*)"') def i_check_topic_model_name(step, name): + """Step: the topic model name is """ topic_model_name = world.topic_model['name'] eq_(name, topic_model_name) + def i_create_a_topic_distribution(step, data=None): + """Step: Create topic distribution """ if data is None: data = "{}" topic_model = world.topic_model['resource'] @@ -142,23 +144,42 @@ def i_create_a_topic_distribution(step, data=None): world.topic_distribution = resource['object'] world.topic_distributions.append(resource['resource']) -#@step(r'I create a local topic distribution') + def i_create_a_local_topic_distribution(step, data=None): - world.local_topic_distribution = \ - world.local_topic_model.distribution(json.loads(data)) + """Step: I create a local topic distribution""" + step.bigml["local_topic_distribution"] = \ + step.bigml["local_topic_model"].distribution(json.loads(data)) -#@step(r'I export the topic model$') def i_export_topic_model(step, filename): + """Step: I export the topic model""" world.api.export(world.topic_model.get('resource'), filename=res_filename(filename)) -#@step(r'I create a local topic model from file "(.*)"') def i_create_local_topic_model_from_file(step, export_file): - world.local_topic_model = TopicModel(res_filename(export_file)) + """Step: I create a local topic model from file """ + step.bigml["local_topic_model"] = TopicModel(res_filename(export_file)) -#@step(r'the topic model ID and the local topic model ID match') def check_topic_model_id_local_id(step): - eq_(world.local_topic_model.resource_id, world.topic_model["resource"]) + """Step: the topic model ID and the local topic model ID match""" + eq_(step.bigml["local_topic_model"].resource_id, + world.topic_model["resource"]) + + +def clone_topic_model(step, topic_model): + """Step: I clone topic model""" + resource = world.api.clone_topic_model(topic_model, + {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.topic_model = resource['object'] + # save reference + world.topic_models.append(resource['resource']) + + +def the_cloned_topic_model_is(step, topic_model): + """Check cloned topic model""" + eq_(world.topic_model["origin"], topic_model) diff --git a/bigml/tests/create_library_steps.py b/bigml/tests/create_library_steps.py index 79df69a1..dd8cb5d2 100644 --- a/bigml/tests/create_library_steps.py +++ b/bigml/tests/create_library_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -14,34 +14,27 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY -import time -import json -import os -from datetime import datetime, timedelta -from world import world -from nose.tools import eq_, assert_less +from .read_resource_steps import wait_until_status_code_is +from .world import world, eq_ -from bigml.api import HTTP_CREATED -from bigml.api import HTTP_ACCEPTED -from bigml.api import FINISHED -from bigml.api import FAULTY -from bigml.api import get_status -from read_library_steps import i_get_the_library - - -#@step(r'the library code is "(.*)" and the value of "(.*)" is "(.*)"') def the_library_code_and_attributes(step, source_code, param, param_value): + """Step: the library code is and the value of + is + """ res_param_value = world.library[param] eq_(res_param_value, param_value, ("The library %s is %s and the expected %s is %s" % (param, param_value, param, param_value))) -#@step(r'I create a whizzml library from a excerpt of code "(.*)"$') def i_create_a_library(step, source_code): - resource = world.api.create_library(source_code) + """Step: I create a whizzml library from a excerpt of code """ + resource = world.api.create_library(source_code, + {"project": world.project_id}) world.status = resource['code'] eq_(world.status, HTTP_CREATED) world.location = resource['location'] @@ -49,8 +42,8 @@ def i_create_a_library(step, source_code): world.libraries.append(resource['resource']) -#@step(r'I update the library with "(.*)", "(.*)"$') def i_update_a_library(step, param, param_value): + """Step: I update the library with , """ resource = world.api.update_library(world.library['resource'], {param: param_value}) world.status = resource['code'] @@ -59,22 +52,14 @@ def i_update_a_library(step, param, param_value): world.library = resource['object'] -#@step(r'I wait until the library status code is either (\d) or (-\d) less than (\d+)') def wait_until_library_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - library_id = world.library['resource'] - i_get_the_library(step, library_id) - status = get_status(world.library) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - i_get_the_library(step, library_id) - status = get_status(world.library) - eq_(status['code'], int(code1)) + """Step: I wait until the library status code is either or + less than + """ + world.library = wait_until_status_code_is( + code1, code2, secs, world.library) -#@step(r'I wait until the library is ready less than (\d+)') def the_library_is_finished(step, secs): + """Step: I wait until the library is ready less than """ wait_until_library_status_code_is(step, FINISHED, FAULTY, secs) diff --git a/bigml/tests/create_linear_steps.py b/bigml/tests/create_linear_steps.py index 2f9bba95..88fae1b9 100644 --- a/bigml/tests/create_linear_steps.py +++ b/bigml/tests/create_linear_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2019 BigML +# Copyright 2019-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,49 +15,47 @@ # License for the specific language governing permissions and limitations # under the License. -import time import json -import os -from datetime import datetime, timedelta -from world import world -from nose.tools import eq_, assert_less -from bigml.api import HTTP_CREATED -from bigml.api import HTTP_ACCEPTED -from bigml.api import FINISHED -from bigml.api import FAULTY -from bigml.api import get_status +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY -from read_linear_steps import i_get_the_linear_regression +from .read_resource_steps import wait_until_status_code_is +from .world import world, eq_ -#@step(r'the linear name is "(.*)"') def i_check_linear_name(step, name): + """Step: the linear name is """ linear_name = world.linear_regression['name'] eq_(name, linear_name) -#@step(r'I create a Linear Regression from a dataset$') -def i_create_a_linear_regression_from_dataset(step): - dataset = world.dataset.get('resource') - resource = world.api.create_linear_regression( \ - dataset, {'name': 'new linear regression'}) - world.status = resource['code'] - eq_(world.status, HTTP_CREATED) - world.location = resource['location'] - world.linear_regression = resource['object'] - world.linear_regressions.append(resource['resource']) + +def i_create_a_linear_regression_from_dataset(step, shared=None): + """Step: I create a Linear Regression from a dataset""" + if shared is None or \ + world.shared.get("linear_regression", {}).get(shared) is None: + dataset = world.dataset.get('resource') + resource = world.api.create_linear_regression( + dataset, {'name': 'new linear regression'}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.linear_regression = resource['object'] + world.linear_regressions.append(resource['resource']) -#@step(r'I create a Linear Regression from a dataset$') def i_create_a_linear_regression_with_params(step, params): + """Step: I create a Linear Regression from a dataset""" i_create_a_linear_regression_with_objective_and_params(step, None, params) -#@step(r'I create a Linear Regression with objective and params$') -def i_create_a_linear_regression_with_objective_and_params(step, - objective, - params): - params = json.loads(params) +def i_create_a_linear_regression_with_objective_and_params( + step, objective=None, params=None): + """Step: I create a Linear Regression with objective and params """ + if params is not None: + params = json.loads(params) + else: + params = {} if objective is not None: params.update({"objective_field": objective}) dataset = world.dataset.get('resource') @@ -68,12 +66,14 @@ def i_create_a_linear_regression_with_objective_and_params(step, world.linear_regression = resource['object'] world.linear_regressions.append(resource['resource']) -def i_create_a_linear_regression(step): - i_create_a_linear_regression_from_dataset(step) + +def i_create_a_linear_regression(step, shared=None): + """Creating linear regression from dataset """ + i_create_a_linear_regression_from_dataset(step, shared=shared) -#@step(r'I update the linear regression name to "(.*)"$') def i_update_linear_regression_name(step, name): + """Step: I update the linear regression name to """ resource = world.api.update_linear_regression( \ world.linear_regression['resource'], {'name': name}) @@ -83,22 +83,39 @@ def i_update_linear_regression_name(step, name): world.linear_regression = resource['object'] -#@step(r'I wait until the linear regression status code is either (\d) or (-\d) less than (\d+)') def wait_until_linear_regression_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - linear_regression_id = world.linear_regression['resource'] - i_get_the_linear_regression(step, linear_regression_id) - status = get_status(world.linear_regression) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - i_get_the_linear_regression(step, linear_regression_id) - status = get_status(world.linear_regression) - eq_(status['code'], int(code1)) - - -#@step(r'I wait until the linear is ready less than (\d+)') -def the_linear_regression_is_finished_in_less_than(step, secs): - wait_until_linear_regression_status_code_is(step, FINISHED, FAULTY, secs) + """Step: I wait until the linear regression status code is either + or less than + """ + world.linear_regression = wait_until_status_code_is( + code1, code2, secs, world.linear_regression) + + +def the_linear_regression_is_finished_in_less_than(step, secs, shared=None): + """#Step: I wait until the linear is ready less than """ + if shared is None or \ + world.shared.get("linear_regression", {}).get(shared) is None: + wait_until_linear_regression_status_code_is(step, FINISHED, FAULTY, secs) + if shared is not None: + if "linear_regression" not in world.shared: + world.shared["linear_regression"] = {} + world.shared["linear_regression"][shared] = world.linear_regression + else: + world.linear_regression = world.shared["linear_regression"][shared] + print("Reusing %s" % world.linear_regression["resource"]) + + +def clone_linear_regression(step, linear_regression): + """Step: I clone linear regression""" + resource = world.api.clone_linear_regression( + linear_regression, {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.linear_regression = resource['object'] + # save reference + world.linear_regressions.append(resource['resource']) + +def the_cloned_linear_regression_is(step, linear_regression): + """Checking linear regression is a clone""" + eq_(world.linear_regression["origin"], linear_regression) diff --git a/bigml/tests/create_model_steps.py b/bigml/tests/create_model_steps.py index 84eae2e8..811daf30 100644 --- a/bigml/tests/create_model_steps.py +++ b/bigml/tests/create_model_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2012-2019 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,12 +15,8 @@ # License for the specific language governing permissions and limitations # under the License. -import time import json import os -from nose.tools import eq_, assert_less -from datetime import datetime, timedelta -from world import world, res_filename from bigml.api import HTTP_OK from bigml.api import HTTP_CREATED @@ -34,34 +30,42 @@ from bigml.linear import LinearRegression from bigml.deepnet import Deepnet from bigml.fusion import Fusion +from bigml.ensemble import Ensemble +from bigml.generators.model import get_leaves + + +from .read_resource_steps import wait_until_status_code_is +from .world import world, res_filename, eq_, ok_ -import read_model_steps as read NO_MISSING_SPLITS = {'missing_splits': False} -#@step(r'I create a model$') -def i_create_a_model(step): - dataset = world.dataset.get('resource') - resource = world.api.create_model(dataset, args=NO_MISSING_SPLITS) - world.status = resource['code'] - eq_(world.status, HTTP_CREATED) - world.location = resource['location'] - world.model = resource['object'] - world.models.append(resource['resource']) -#@step(r'I export the model$') -def i_export_model(step, filename): - world.api.export(world.model.get('resource'), - filename=res_filename(filename)) +def i_create_a_model(step, shared=None): + """Step: I create a model""" + if shared is None or world.shared.get("model", {}).get(shared) is None: + dataset = world.dataset.get('resource') + resource = world.api.create_model(dataset, args=NO_MISSING_SPLITS) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.model = resource['object'] + world.models.append(resource['resource']) + + +def i_export_model(step, pmml, filename): + """Step: I export the model to file """ + world.api.export(world.model["resource"], res_filename(filename), pmml) -#@step(r'I export the last model$') def i_export_tags_model(step, filename, tag): + """Step: I export the last model""" world.api.export_last(tag, filename=res_filename(filename)) -#@step(r'I create a balanced model$') + def i_create_a_balanced_model(step): + """Step: I create a balanced model""" dataset = world.dataset.get('resource') args = {} args.update(NO_MISSING_SPLITS) @@ -73,9 +77,10 @@ def i_create_a_balanced_model(step): world.model = resource['object'] world.models.append(resource['resource']) -#@step(r'I create a model from a dataset list$') + def i_create_a_model_from_dataset_list(step): - resource = world.api.create_model(world.dataset_ids, + """Step: I create a model from a dataset list""" + resource = world.api.create_model(step.bigml["dataset_ids"], args=NO_MISSING_SPLITS) world.status = resource['code'] eq_(world.status, HTTP_CREATED) @@ -83,26 +88,30 @@ def i_create_a_model_from_dataset_list(step): world.model = resource['object'] world.models.append(resource['resource']) -#@step(r'I wait until the model status code is either (\d) or (-\d) less than (\d+)') + def wait_until_model_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - read.i_get_the_model(step, world.model['resource']) - status = get_status(world.model) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - read.i_get_the_model(step, world.model['resource']) - status = get_status(world.model) - eq_(status['code'], int(code1)) - -#@step(r'I wait until the model is ready less than (\d+)') -def the_model_is_finished_in_less_than(step, secs): - wait_until_model_status_code_is(step, FINISHED, FAULTY, secs) - -#@step(r'I create a model with "(.*)"') + """Step: I wait until the model status code is either + or less than + """ + wait_until_status_code_is(code1, code2, secs, world.model) + + +def the_model_is_finished_in_less_than(step, secs, shared=None): + """Step: I wait until the model is ready less than """ + if shared is None or world.shared.get("model", {}).get(shared) is None: + wait_until_model_status_code_is(step, FINISHED, FAULTY, secs) + if shared is not None: + if "model" not in world.shared: + world.shared["model"] = {} + world.shared["model"][shared] = world.model + print("New %s" % world.model["resource"]) + else: + world.model = world.shared["model"][shared] + print("Reusing %s" % world.model["resource"]) + + def i_create_a_model_with(step, data="{}"): + """Step: I create a model with """ args = json.loads(data) if not 'missing_splits' in args: args.update(NO_MISSING_SPLITS) @@ -114,58 +123,76 @@ def i_create_a_model_with(step, data="{}"): world.model = resource['object'] world.models.append(resource['resource']) -#@step(r'I create a model with missing splits') + def i_create_a_model_with_missing_splits(step): + """Step: I create a model with missing splits""" i_create_a_model_with(step, data='{"missing_splits": true}') -#@step(r'I make the model public') + +def i_create_a_weighted_model_with_missing_splits(step): + """Step: I create a model with missing splits""" + i_create_a_model_with(step, data='{"missing_splits": true, "balance_objective": true}') + + def make_the_model_public(step): + """Step: I make the model public""" resource = world.api.update_model(world.model['resource'], {'private': False, 'white_box': True}) world.status = resource['code'] if world.status != HTTP_ACCEPTED: - print "unexpected status: %s" % world.status + print("unexpected status: %s" % world.status) eq_(world.status, HTTP_ACCEPTED) world.location = resource['location'] world.model = resource['object'] -#@step(r'I check the model status using the model\'s public url') + def model_from_public_url(step): + """Step: I check the model status using the model''s public url""" world.model = world.api.get_model("public/%s" % world.model['resource']) eq_(get_status(world.model)['code'], FINISHED) -#@step(r'I make the model shared') -def make_the_model_shared(step): + +def make_the_model_shared(step, cloneable=False): + """Step: I make the model shared""" + shared = {'shared': True} + if cloneable: + shared.update({"shared_clonable": True}) resource = world.api.update_model(world.model['resource'], - {'shared': True}) + shared) + world.api.ok(resource) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) world.location = resource['location'] world.model = resource['object'] -#@step(r'I get the model sharing info') + def get_sharing_info(step): + """Step: I get the model sharing info""" world.shared_hash = world.model['shared_hash'] world.sharing_key = world.model['sharing_key'] -#@step(r'I check the model status using the model\'s shared url') + def model_from_shared_url(step): + """Step: I check the model status using the model's shared url""" world.model = world.api.get_model("shared/model/%s" % world.shared_hash) eq_(get_status(world.model)['code'], FINISHED) -#@step(r'I check the model status using the model\'s shared key') + def model_from_shared_key(step): + """Step: I check the model status using the model's shared key""" username = os.environ.get("BIGML_USERNAME") world.model = world.api.get_model(world.model['resource'], shared_username=username, shared_api_key=world.sharing_key) eq_(get_status(world.model)['code'], FINISHED) -#@step(r'"(.*)" field\'s name is changed to "(.*)"') + def field_name_to_new_name(step, field_id, new_name): - eq_(world.local_model.tree.fields[field_id]['name'], new_name) + """Step: field's name is changed to """ + eq_(step.bigml["local_model"].fields[field_id]['name'], new_name) + -#@step(r'I create a model associated to centroid "(.*)"') def i_create_a_model_from_cluster(step, centroid_id): + """Step: I create a model associated to centroid """ resource = world.api.create_model( world.cluster['resource'], args={'centroid': centroid_id}) @@ -175,33 +202,42 @@ def i_create_a_model_from_cluster(step, centroid_id): world.model = resource['object'] world.models.append(resource['resource']) -#@step(r'the model is associated to the centroid "(.*)" of the cluster') + def is_associated_to_centroid_id(step, centroid_id): + """Step: the model is associated to the centroid of the + cluster + """ cluster = world.api.get_cluster(world.cluster['resource']) world.status = cluster['code'] eq_(world.status, HTTP_OK) eq_("model/%s" % (cluster['object']['cluster_models'][centroid_id]), world.model['resource']) -#@step(r'I create a logistic regression model$') -def i_create_a_logistic_model(step): - dataset = world.dataset.get('resource') - resource = world.api.create_logistic_regression(dataset) - world.status = resource['code'] - eq_(world.status, HTTP_CREATED) - world.location = resource['location'] - world.logistic_regression = resource['object'] - world.logistic_regressions.append(resource['resource']) + +def i_create_a_logistic_model(step, shared=None): + """Step: I create a logistic regression model""" + if shared is None or world.shared.get("logistic", {}).get(shared) is None: + dataset = world.dataset.get('resource') + resource = world.api.create_logistic_regression(dataset) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.logistic_regression = resource['object'] + world.logistic_regressions.append(resource['resource']) -#@step(r'I create a logistic regression model with objective "(.*?)" and parms "(.*)"$') -def i_create_a_logistic_model_with_objective_and_parms(step, objective, parms=None): +def i_create_a_logistic_model_with_objective_and_parms(step, objective=None, + parms=None): + """Step: I create a logistic regression model with objective + and parms + """ dataset = world.dataset.get('resource') if parms is None: parms = {} else: parms = json.loads(parms) - parms.update({"objective_field": objective}) + if objective is not None: + parms.update({"objective_field": objective}) resource = world.api.create_logistic_regression( \ dataset, parms) world.status = resource['code'] @@ -210,55 +246,78 @@ def i_create_a_logistic_model_with_objective_and_parms(step, objective, parms=No world.logistic_regression = resource['object'] world.logistic_regressions.append(resource['resource']) -#@step(r'I wait until the logistic regression model status code is either (\d) or (-\d) less than (\d+)') def wait_until_logistic_model_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - read.i_get_the_logistic_model(step, world.logistic_regression['resource']) - status = get_status(world.logistic_regression) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - read.i_get_the_logistic_model(step, world.logistic_regression['resource']) - status = get_status(world.logistic_regression) - eq_(status['code'], int(code1)) - -#@step(r'I wait until the logistic regression model is ready less than (\d+)') -def the_logistic_model_is_finished_in_less_than(step, secs): - wait_until_logistic_model_status_code_is(step, FINISHED, FAULTY, secs) - -#@step(r'I create a deepnet model$') -def i_create_a_deepnet(step): - dataset = world.dataset.get('resource') - resource = world.api.create_deepnet(dataset) - world.status = resource['code'] - eq_(world.status, HTTP_CREATED) - world.location = resource['location'] - world.deepnet = resource['object'] - world.deepnets.append(resource['resource']) + """Step: I wait until the logistic regression model status code is either + or less than + """ + world.logistic_regression = wait_until_status_code_is( + code1, code2, secs, world.logistic_regression) + + +def the_logistic_model_is_finished_in_less_than(step, secs, shared=None): + """Step: I wait until the logistic regression model is ready less than + + """ + if shared is None or world.shared.get("logistic", {}).get(shared) is None: + wait_until_logistic_model_status_code_is(step, FINISHED, FAULTY, secs) + if shared is not None: + if "logistic" not in world.shared: + world.shared["logistic"] = {} + world.shared["logistic"][shared] = world.logistic_regression + else: + world.logistic_regression = world.shared["logistic"][shared] + print("Reusing %s" % world.logistic_regression["resource"]) + +def i_create_a_deepnet(step, shared=None): + """Step: I create a deepnet model""" + if shared is None or world.shared.get("deepnet", {}).get(shared) is None: + dataset = world.dataset.get('resource') + resource = world.api.create_deepnet(dataset) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.deepnet = resource['object'] + world.deepnets.append(resource['resource']) -#@step(r'I create a non-suggested deepnet model$') -def i_create_a_no_suggest_deepnet(step): + +def i_create_a_quick_deepnet(step): + """Step: I create a quick deepnet""" dataset = world.dataset.get('resource') - resource = world.api.create_deepnet(dataset, {"suggest_structure": False, - "max_iterations": 100, - "deepnet_seed": "bigml"}) + resource = world.api.create_deepnet(dataset, {"max_training_time": 100}) world.status = resource['code'] eq_(world.status, HTTP_CREATED) world.location = resource['location'] world.deepnet = resource['object'] world.deepnets.append(resource['resource']) -#@step(r'I create a deepnet model with objective "(.*?)" and parms "(.*)"$') -def i_create_a_deepnet_with_objective_and_params(step, objective, parms=None): + +def i_create_a_no_suggest_deepnet(step, shared=None): + """Step: I create a non-suggested deepnet model""" + if shared is None or \ + world.shared.get("deepnet", {}).get(shared) is None: + dataset = world.dataset.get('resource') + resource = world.api.create_deepnet(dataset, {"suggest_structure": False, + "max_iterations": 100, + "deepnet_seed": "bigml"}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.deepnet = resource['object'] + world.deepnets.append(resource['resource']) + + +def i_create_a_deepnet_with_objective_and_params(step, objective=None, parms=None): + """Step: I create a deepnet model with objective and parms + + """ dataset = world.dataset.get('resource') if parms is None: parms = {} else: parms = json.loads(parms) - parms.update({"objective_field": objective}) + if objective is not None: + parms.update({"objective_field": objective}) resource = world.api.create_deepnet(dataset, parms) world.status = resource['code'] eq_(world.status, HTTP_CREATED) @@ -266,37 +325,45 @@ def i_create_a_deepnet_with_objective_and_params(step, objective, parms=None): world.deepnet = resource['object'] world.deepnets.append(resource['resource']) -#@step(r'I wait until the deepnet model status code is either (\d) or (-\d) less than (\d+)') + def wait_until_deepnet_model_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - status = get_status(world.deepnet) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - read.i_get_the_deepnet_model(step, world.deepnet['resource']) - status = get_status(world.deepnet) - eq_(status['code'], int(code1)) - -#@step(r'I wait until the deepnet model is ready less than (\d+)') -def the_deepnet_is_finished_in_less_than(step, secs): - wait_until_deepnet_model_status_code_is(step, FINISHED, FAULTY, secs) - -#@step(r'I export the "(.*)" model to file "(.*)"$') -def i_export_model(step, pmml, filename): - world.api.export(world.model["resource"], res_filename(filename), pmml) + """Step: I wait until the deepnet model status code is either + or less than + """ + world.deepnet = wait_until_status_code_is(code1, code2, secs, world.deepnet) + + +def the_deepnet_is_finished_in_less_than(step, secs, shared=None): + """Step: wait until the deepnet model is ready less than """ + if shared is None or world.shared.get("deepnet", {}).get(shared) is None: + wait_until_deepnet_model_status_code_is(step, FINISHED, FAULTY, secs) + if shared is not None: + if "deepnete" not in world.shared: + world.shared["deepnet"] = {} + world.shared["deepnet"][shared] = world.deepnet + else: + world.deepnet = world.shared["deepnet"][shared] + print("Reusing %s" % world.deepnet["resource"]) + -#@step(r'I check the model is stored in "(.*)" file in "(.*)"$') def i_check_model_stored(step, filename, pmml): + """Step: I check the model is stored in file in """ with open(res_filename(filename)) as file_handler: content = file_handler.read() model_id = world.model["resource"][ \ (world.model["resource"].index("/") + 1):] - assert(content.index(model_id) > -1) + ok_(content.index(model_id) > -1) + + +def i_read_model_file(step, filename): + """Step: I read model from file """ + with open(res_filename(filename)) as file_handler: + content = file_handler.read() + world.model = json.loads(content) + -#@step(r'I create an optiml$') def i_create_an_optiml(step): + """Step: I create an optiml""" dataset = world.dataset.get('resource') resource = world.api.create_optiml(dataset) world.status = resource['code'] @@ -305,8 +372,11 @@ def i_create_an_optiml(step): world.optiml = resource['object'] world.optimls.append(resource['resource']) -#@step(r'I create an optiml model with objective "(.*?)" and parms "(.*)"$') + def i_create_an_optiml_with_objective_and_params(step, objective=None, parms=None): + """Step: I create an optiml model with objective and parms + + """ dataset = world.dataset.get('resource') if parms is None: parms = {} @@ -321,26 +391,21 @@ def i_create_an_optiml_with_objective_and_params(step, objective=None, parms=Non world.optiml = resource['object'] world.optimls.append(resource['resource']) -#@step(r'I wait until the optiml status code is either (\d) or (-\d) less than (\d+)') + def wait_until_optiml_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - read.i_get_the_optiml(step, world.optiml['resource']) - status = get_status(world.optiml) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - read.i_get_the_optiml(step, world.optiml['resource']) - status = get_status(world.optiml) - eq_(status['code'], int(code1)) - -#@step(r'I wait until the optiml is ready less than (\d+)') + """Step: I wait until the optiml status code is either or + less than + """ + world.optiml = wait_until_status_code_is(code1, code2, secs, world.optiml) + + def the_optiml_is_finished_in_less_than(step, secs): + """Step: I wait until the optiml is ready less than """ wait_until_optiml_status_code_is(step, FINISHED, FAULTY, secs) -#@step(r'I update the optiml name to "(.*)"') + def i_update_optiml_name(step, name): + """Step: I update the optiml name to """ resource = world.api.update_optiml(world.optiml['resource'], {'name': name}) world.status = resource['code'] @@ -348,14 +413,17 @@ def i_update_optiml_name(step, name): world.location = resource['location'] world.optiml = resource['object'] -#@step(r'the optiml name is "(.*)"') + def i_check_optiml_name(step, name): + """Step: the optiml name is """ optiml_name = world.optiml['name'] eq_(name, optiml_name) -#@step(r'I create a fusion$') + def i_create_a_fusion(step): - resource = world.api.create_fusion(world.list_of_models) + """Step: I create a fusion""" + resource = world.api.create_fusion(world.list_of_models, + {"project": world.project_id}) world.status = resource['code'] eq_(world.status, HTTP_CREATED) world.location = resource['location'] @@ -363,10 +431,10 @@ def i_create_a_fusion(step): world.fusions.append(resource['resource']) -#@step(r'I create a fusion with weights$') def i_create_a_fusion_with_weights(step, weights=None): + """Step: I create a fusion with weights""" if weights is None: - weights = range(1, len(world.list_of_models)) + weights = list(range(1, len(world.list_of_models))) else: weights = json.loads(weights) models = [] @@ -375,21 +443,23 @@ def i_create_a_fusion_with_weights(step, weights=None): models.append({"id": model["resource"], "weight": weights[index]}) except IndexError: pass - resource = world.api.create_fusion(models) + resource = world.api.create_fusion(models, + {"project": world.project_id}) world.status = resource['code'] eq_(world.status, HTTP_CREATED) world.location = resource['location'] world.fusion = resource['object'] world.fusions.append(resource['resource']) -#@step(r'I create a fusion with objective "(.*?)" and parms "(.*)"$') + def i_create_a_fusion_with_objective_and_params(step, objective, parms=None): + """Step: I create a fusion with objective and parms """ models = world.list_models if parms is None: parms = {} else: parms = json.loads(parms) - parms.update({"objective_field": objective}) + parms.update({"objective_field": objective, "project": world.project_id}) resource = world.api.create_fusion(models, parms) world.status = resource['code'] eq_(world.status, HTTP_CREATED) @@ -397,27 +467,21 @@ def i_create_a_fusion_with_objective_and_params(step, objective, parms=None): world.fusion = resource['object'] world.fusions.append(resource['resource']) -#@step(r'I wait until the fusion status code is either (\d) or (-\d) less than (\d+)') + def wait_until_fusion_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - read.i_get_the_fusion(step, world.fusion['resource']) - status = get_status(world.fusion) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - read.i_get_the_fusion(step, world.fusion['resource']) - status = get_status(world.fusion) - eq_(status['code'], int(code1)) - -#@step(r'I wait until the fusion is ready less than (\d+)') + """Step: I wait until the fusion status code is either or + less than + """ + world.fusion = wait_until_status_code_is(code1, code2, secs, world.fusion) + + def the_fusion_is_finished_in_less_than(step, secs): + """Step: I wait until the fusion is ready less than """ wait_until_fusion_status_code_is(step, FINISHED, FAULTY, secs) -#@step(r'I update the fusion name to "(.*)"') def i_update_fusion_name(step, name): + """Step: I update the fusion name to """ resource = world.api.update_fusion(world.fusion['resource'], {'name': name}) world.status = resource['code'] @@ -425,101 +489,210 @@ def i_update_fusion_name(step, name): world.location = resource['location'] world.fusion = resource['object'] -#@step(r'the fusion name is "(.*)"') + def i_check_fusion_name(step, name): + """Step: the fusion name is """ fusion_name = world.fusion['name'] eq_(name, fusion_name) -#@step(r'I create a local model from file "(.*)"') + def i_create_local_model_from_file(step, export_file): - world.local_model = Model( \ + """Step: I create a local model from file """ + step.bigml["local_model"] = Model( \ res_filename(export_file), api=BigML("wrong-user", "wrong-api-key")) -#@step(r'the model ID and the local model ID match') def check_model_id_local_id(step): - eq_(world.local_model.resource_id, world.model["resource"]) + """Step: the model ID and the local model ID match""" + eq_(step.bigml["local_model"].resource_id, world.model["resource"]) -#@step(r'I export the ensemble$') def i_export_ensemble(step, filename): + """Step: I export the ensemble""" world.api.export(world.ensemble.get('resource'), filename=res_filename(filename)) -#@step(r'I create a local ensemble from file "(.*)"') + def i_create_local_ensemble_from_file(step, export_file): - world.local_ensemble = Ensemble( \ + """Step: I create a local ensemble from file """ + step.bigml["local_ensemble"] = Ensemble( \ res_filename(export_file), api=BigML("wrong-user", "wrong-api-key")) -#@step(r'the ensemble ID and the local ensemble ID match') def check_ensemble_id_local_id(step): - eq_(world.local_ensemble.resource_id, world.ensemble["resource"]) + """Step: the ensemble ID and the local ensemble ID match""" + eq_(step.bigml["local_ensemble"].resource_id, world.ensemble["resource"]) -#@step(r'I export the logistic regression$') def i_export_logistic_regression(step, filename): + """Step: I export the logistic regression""" world.api.export(world.logistic_regression.get('resource'), filename=res_filename(filename)) -#@step(r'I create a local logistic regressin from file "(.*)"') + def i_create_local_logistic_regression_from_file(step, export_file): - world.local_logistic = LogisticRegression( \ + """Step: I create a local logistic regressin from file """ + step.bigml["local_logistic"] = LogisticRegression( \ res_filename(export_file), api=BigML("wrong-user", "wrong-api-key")) -#@step(r'the logistic ID and the local logistic ID match') def check_logistic_regression_id_local_id(step): - eq_(world.local_logistic.resource_id, world.logistic_regression["resource"]) + """Step: the logistic ID and the local logistic ID match""" + eq_(step.bigml["local_logistic"].resource_id, world.logistic_regression["resource"]) -#@step(r'I export the deepnet$') def i_export_deepnet(step, filename): + """Step: I export the deepnet""" world.api.export(world.deepnet.get('resource'), filename=res_filename(filename)) -#@step(r'I create a local deepnet from file "(.*)"') + def i_create_local_deepnet_from_file(step, export_file): - world.local_deepnet = Deepnet(res_filename(export_file), + """Step: I create a local deepnet from file """ + step.bigml["local_deepnet"] = Deepnet(res_filename(export_file), api=BigML("wrong-user", "wrong-api-key")) -#@step(r'the deepnet ID and the local deepnet ID match') -def check_deepnet_id_local_id(step): - eq_(world.local_deepnet.resource_id, world.deepnet["resource"]) - -#@step(r'I export the fusion$') def i_export_fusion(step, filename): + """Step: I export the fusion""" world.api.export(world.fusion.get('resource'), filename=res_filename(filename)) -#@step(r'I create a local fusion from file "(.*)"') + def i_create_local_fusion_from_file(step, export_file): - world.local_fusion = Fusion( \ + """Step: I create a local fusion from file """ + step.bigml["local_fusion"] = Fusion( \ res_filename(export_file), api=BigML("wrong-user", "wrong-api-key")) -#@step(r'the fusion ID and the local fusion ID match') def check_fusion_id_local_id(step): - eq_(world.local_fusion.resource_id, world.fusion["resource"]) + """Step: the fusion ID and the local fusion ID match""" + eq_(step.bigml["local_fusion"].resource_id, world.fusion["resource"]) -#@step(r'I export the linear regression$') def i_export_linear_regression(step, filename): + """Step: I export the linear regression""" world.api.export(world.linear_regression.get('resource'), filename=res_filename(filename)) -#@step(r'I create a local linear regression from file "(.*)"') def i_create_local_linear_regression_from_file(step, export_file): - world.local_linear_regression = LinearRegression( \ + """Step: I create a local linear regression from file """ + step.bigml["local_linear_regression"] = LinearRegression( \ res_filename(export_file), api=BigML("wrong-user", "wrong-api-key")) -#@step(r'the linear regression ID and the local linear regression ID match') def check_linear_regression_id_local_id(step): - eq_(world.local_linear_regression.resource_id, + """Step: the linear regression ID and the local linear regression ID + match + """ + eq_(step.bigml["local_linear_regression"].resource_id, world.linear_regression["resource"]) + + +def local_logistic_prediction_is(step, input_data, prediction): + """Checking local logistic prediction""" + eq_(step.bigml["local_logistic"].predict(input_data), prediction) + + +def local_linear_prediction_is(step, input_data, prediction): + """Checking local linear prediction""" + eq_(step.bigml["local_linear_regression"].predict(input_data), + prediction, precision=5) + +def local_deepnet_prediction_is(step, input_data, prediction): + """Checking local deepnet prediction""" + eq_(step.bigml["local_deepnet"].predict(input_data), prediction, precision=4) + + +def local_ensemble_prediction_is(step, input_data, prediction): + """Checking local ensemble prediction""" + eq_(step.bigml["local_ensemble"].predict(input_data), prediction, precision=5) + + +def local_model_prediction_is(step, input_data, prediction): + """Checking local model prediction""" + eq_(step.bigml["local_model"].predict(input_data), prediction, precision=5) + + +def local_cluster_prediction_is(step, input_data, prediction): + """Checking local cluster prediction""" + eq_(step.bigml["local_cluster"].centroid(input_data), prediction) + + +def local_anomaly_prediction_is(step, input_data, prediction): + """Checking local anomaly prediction""" + eq_(step.bigml["local_anomaly"].anomaly_score(input_data), prediction) + + +def local_association_prediction_is(step, input_data, prediction): + """Checking local association prediction""" + eq_(step.bigml["local_association"].association_set(input_data), prediction) + + +def local_time_series_prediction_is(step, input_data, prediction): + """Checking local time series prediction""" + eq_(step.bigml["local_time_series"].centroid(input_data), prediction) + + +def clone_model(step, model): + """Step: I clone model + """ + resource = world.api.clone_model(model, {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.model = resource['object'] + # save reference + world.models.append(resource['resource']) + + +def the_cloned_model_is(step, model): + """Checking the model is a clone""" + eq_(world.model["origin"], model) + + +def clone_deepnet(step, deepnet): + """Step: I clone deepnet""" + resource = world.api.clone_deepnet(deepnet, {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.deepnet = resource['object'] + # save reference + world.deepnets.append(resource['resource']) + + +def the_cloned_deepnet_is(step, deepnet): + """Checking the deepnet is a clone""" + eq_(world.deepnet["origin"], deepnet) + + +def clone_logistic_regression(step, logistic_regression): + """Step: I clone logistic regression""" + resource = world.api.clone_logistic_regression( + logistic_regression, {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.logistic_regression = resource['object'] + # save reference + world.logistic_regressions.append(resource['resource']) + + +def the_cloned_logistic_regression_is(step, logistic_regression): + """Checking logistic regression is a clone""" + eq_(world.logistic_regression["origin"], logistic_regression) + + +def check_deepnet_id_local_id(step): + """Checking that deepnet ID and local deepnet ID match""" + eq_(world.deepnet["resource"], step.bigml["local_deepnet"].resource_id) + + +def check_leaves_number(step, leaves_number): + """Checking the number of leaves in a tree local model""" + eq_(len(get_leaves(step.bigml["local_model"])), leaves_number) diff --git a/bigml/tests/create_multimodel_steps.py b/bigml/tests/create_multimodel_steps.py index ea4e1526..7fe82a82 100644 --- a/bigml/tests/create_multimodel_steps.py +++ b/bigml/tests/create_multimodel_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,16 +15,18 @@ # License for the specific language governing permissions and limitations # under the License. -from world import world -from nose.tools import ok_ +from .world import world, ok_ -#@step(r'I store the dataset id in a list') def i_store_dataset_id(step): - world.dataset_ids.append(world.dataset['resource']) + """Step: I store the dataset id in a list""" + if step.bigml.get("dataset_ids") is None: + step.bigml["dataset_ids"] = [] + step.bigml["dataset_ids"].append(world.dataset['resource']) + -#@step(r'I check the model stems from the original dataset list') def i_check_model_datasets_and_datasets_ids(step): + """Step: I check the model stems from the original dataset list""" model = world.model - ok_('datasets' in model and model['datasets'] == world.dataset_ids, + ok_('datasets' in model and model['datasets'] == step.bigml["dataset_ids"], ("The model contains only %s and the dataset ids are %s" % - (",".join(model['datasets']), ",".join(world.dataset_ids)))) + (",".join(model['datasets']), ",".join(step.bigml["dataset_ids"])))) diff --git a/bigml/tests/create_pca_steps.py b/bigml/tests/create_pca_steps.py index e66b75ad..c5a8ff09 100644 --- a/bigml/tests/create_pca_steps.py +++ b/bigml/tests/create_pca_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2018-2019 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,40 +15,35 @@ # License for the specific language governing permissions and limitations # under the License. -import time import json -import os -from datetime import datetime, timedelta -from world import world -from nose.tools import eq_, assert_less -from bigml.api import HTTP_CREATED -from bigml.api import HTTP_ACCEPTED -from bigml.api import FINISHED -from bigml.api import FAULTY -from bigml.api import get_status +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY -from read_pca_steps import i_get_the_pca +from .read_resource_steps import wait_until_status_code_is +from .world import world, eq_ -#@step(r'the pca name is "(.*)"') def i_check_pca_name(step, name): + """Step: the pca name is """ pca_name = world.pca['name'] eq_(name, pca_name) -#@step(r'I create a PCA from a dataset$') -def i_create_a_pca_from_dataset(step): - dataset = world.dataset.get('resource') - resource = world.api.create_pca(dataset, {'name': 'new PCA'}) - world.status = resource['code'] - eq_(world.status, HTTP_CREATED) - world.location = resource['location'] - world.pca = resource['object'] - world.pcas.append(resource['resource']) + +def i_create_a_pca_from_dataset(step, shared=None): + """Step: I create a PCA from a dataset""" + if shared is None or world.shared.get("pca", {}).get(shared) is None: + dataset = world.dataset.get('resource') + resource = world.api.create_pca(dataset, {'name': 'new PCA'}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.pca = resource['object'] + world.pcas.append(resource['resource']) -#@step(r'I create a PCA from a dataset$') def i_create_a_pca_with_params(step, params): + """Step: I create a PCA from a dataset""" params = json.loads(params) dataset = world.dataset.get('resource') resource = world.api.create_pca(dataset, params) @@ -58,12 +53,14 @@ def i_create_a_pca_with_params(step, params): world.pca = resource['object'] world.pcas.append(resource['resource']) -def i_create_a_pca(step): - i_create_a_pca_from_dataset(step) + +def i_create_a_pca(step, shared=None): + """Creating a PCA""" + i_create_a_pca_from_dataset(step, shared=shared) -#@step(r'I update the PCA name to "(.*)"$') def i_update_pca_name(step, name): + """Step: I update the PCA name to """ resource = world.api.update_pca(world.pca['resource'], {'name': name}) world.status = resource['code'] @@ -72,22 +69,38 @@ def i_update_pca_name(step, name): world.pca = resource['object'] -#@step(r'I wait until the PCA status code is either (\d) or (-\d) less than (\d+)') def wait_until_pca_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - pca_id = world.pca['resource'] - i_get_the_pca(step, pca_id) - status = get_status(world.pca) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - i_get_the_pca(step, pca_id) - status = get_status(world.pca) - eq_(status['code'], int(code1)) - - -#@step(r'I wait until the PCA is ready less than (\d+)') -def the_pca_is_finished_in_less_than(step, secs): - wait_until_pca_status_code_is(step, FINISHED, FAULTY, secs) + """Step: I wait until the PCA status code is either or + less than + """ + world.pca = wait_until_status_code_is(code1, code2, secs, world.pca) + + +def the_pca_is_finished_in_less_than(step, secs, shared=None): + """Step: I wait until the PCA is ready less than """ + if shared is None or world.shared.get("pca", {}).get(shared) is None: + wait_until_pca_status_code_is(step, FINISHED, FAULTY, secs) + if shared is not None: + if "pca" not in world.shared: + world.shared["pca"] = {} + world.shared["pca"][shared] = world.pca + else: + world.pca = world.shared["pca"][shared] + print("Reusing %s" % world.pca["resource"]) + + +def clone_pca(step, pca): + """Step: I clone pca""" + resource = world.api.clone_pca(pca, + {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.pca = resource['object'] + # save reference + world.pcas.append(resource['resource']) + + +def the_cloned_pca_is(step, pca): + """Checking that pca is a clone """ + eq_(world.pca["origin"], pca) diff --git a/bigml/tests/create_pca_steps_bck.py b/bigml/tests/create_pca_steps_bck.py deleted file mode 100644 index c01072cb..00000000 --- a/bigml/tests/create_pca_steps_bck.py +++ /dev/null @@ -1,93 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2018-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import time -import json -import os -from datetime import datetime, timedelta -from world import world -from nose.tools import eq_, assert_less - -from bigml.api import HTTP_CREATED -from bigml.api import HTTP_ACCEPTED -from bigml.api import FINISHED -from bigml.api import FAULTY -from bigml.api import get_status - -from read_pca_steps import i_get_the_pca - - -#@step(r'the pca name is "(.*)"') -def i_check_pca_name(step, name): - pca_name = world.pca['name'] - eq_(name, pca_name) - -#@step(r'I create a PCA from a dataset$') -def i_create_a_pca_from_dataset(step): - dataset = world.dataset.get('resource') - resource = world.api.create_pca(dataset, {'name': 'new PCA'}) - world.status = resource['code'] - eq_(world.status, HTTP_CREATED) - world.location = resource['location'] - world.pca = resource['object'] - world.pcas.append(resource['resource']) - - -#@step(r'I create a PCA from a dataset$') -def i_create_a_pca_with_params(step, params): - params = json.loads(params) - dataset = world.dataset.get('resource') - resource = world.api.create_pca(dataset, params) - world.status = resource['code'] - eq_(world.status, HTTP_CREATED) - world.location = resource['location'] - world.pca = resource['object'] - world.pcas.append(resource['resource']) - -def i_create_a_pca(step): - i_create_a_pca_from_dataset(step) - - -#@step(r'I update the PCA name to "(.*)"$') -def i_update_pca_name(step, name): - resource = world.api.update_pca(world.pca['resource'], - {'name': name}) - world.status = resource['code'] - eq_(world.status, HTTP_ACCEPTED) - world.location = resource['location'] - world.pca = resource['object'] - - -#@step(r'I wait until the PCA status code is either (\d) or (-\d) less than (\d+)') -def wait_until_pca_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - pca_id = world.pca['resource'] - i_get_the_pca(step, pca_id) - status = get_status(world.pca) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less(datetime.utcnow() - start, timedelta(seconds=delta)) - i_get_the_pca(step, pca_id) - status = get_status(world.pca) - eq_(status['code'], int(code1)) - - -#@step(r'I wait until the PCA is ready less than (\d+)') -def the_pca_is_finished_in_less_than(step, secs): - wait_until_pca_status_code_is(step, FINISHED, FAULTY, secs) diff --git a/bigml/tests/create_prediction_steps.py b/bigml/tests/create_prediction_steps.py index 01108aec..978d577c 100644 --- a/bigml/tests/create_prediction_steps.py +++ b/bigml/tests/create_prediction_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -16,18 +16,16 @@ # under the License. import json -import time -from nose.tools import assert_almost_equals, eq_, assert_is_not_none, \ - assert_less -from datetime import datetime, timedelta -from world import world + from bigml.api import HTTP_CREATED from bigml.api import FINISHED, FAULTY -from bigml.api import get_status -from read_prediction_steps import i_get_the_prediction +from .read_resource_steps import wait_until_status_code_is +from .world import world, res_filename, eq_, ok_, approx_ + def i_create_a_prediction(step, data=None): + """Creating prediction""" if data is None: data = "{}" model = world.model['resource'] @@ -41,9 +39,10 @@ def i_create_a_prediction(step, data=None): def i_create_a_prediction_op(step, data=None, operating_point=None): + """Creating prediction with operating point""" if data is None: data = "{}" - assert_is_not_none(operating_point) + ok_(operating_point is not None) model = world.model['resource'] data = json.loads(data) resource = world.api.create_prediction( \ @@ -56,9 +55,10 @@ def i_create_a_prediction_op(step, data=None, operating_point=None): def i_create_an_ensemble_prediction_op(step, data=None, operating_point=None): + """Creating prediction from ensemble with operating point""" if data is None: data = "{}" - assert_is_not_none(operating_point) + ok_(operating_point is not None) ensemble = world.ensemble['resource'] data = json.loads(data) resource = world.api.create_prediction( \ @@ -71,9 +71,10 @@ def i_create_an_ensemble_prediction_op(step, data=None, operating_point=None): def i_create_a_fusion_prediction_op(step, data=None, operating_point=None): + """Create prediction from fusion with operating point""" if data is None: data = "{}" - assert_is_not_none(operating_point) + ok_(operating_point is not None) fusion = world.fusion['resource'] data = json.loads(data) resource = world.api.create_prediction( \ @@ -86,6 +87,7 @@ def i_create_a_fusion_prediction_op(step, data=None, operating_point=None): def i_create_a_centroid(step, data=None): + """Create centroid""" if data is None: data = "{}" cluster = world.cluster['resource'] @@ -99,6 +101,7 @@ def i_create_a_centroid(step, data=None): def i_create_a_proportional_prediction(step, data=None): + """Create prediction using proportional strategy for missings""" if data is None: data = "{}" model = world.model['resource'] @@ -112,37 +115,51 @@ def i_create_a_proportional_prediction(step, data=None): world.predictions.append(resource['resource']) -def check_prediction(got, expected): - if not isinstance(got, basestring): - assert_almost_equals(got, float(expected), 5) +def check_prediction(got, expected, precision=4): + """Checking prediction is as expected""" + if not isinstance(got, str): + approx_(got, float(expected), precision=precision) else: eq_(got, expected) -def the_prediction_is(step, objective, prediction): - check_prediction(world.prediction['prediction'][objective], prediction) -def the_median_prediction_is(step, objective, prediction): +def the_prediction_is(step, objective, prediction, precision=4): + """Checking the prediction for objective field""" + check_prediction(world.prediction['prediction'][objective], prediction, + precision=precision) + + +def the_median_prediction_is(step, objective, prediction, precision=4): + """Checking the prediction using median""" check_prediction(world.prediction['prediction_path'][ - 'objective_summary']['median'], prediction) + 'objective_summary']['median'], prediction, precision=precision) + def the_centroid_is_with_distance(step, centroid, distance): + """Checking expected centroid and distance""" check_prediction(world.centroid['centroid_name'], centroid) check_prediction(world.centroid['distance'], distance) + def the_centroid_is(step, centroid): + """Checking centroid""" check_prediction(world.centroid['centroid_name'], centroid) + def the_centroid_is_ok(step): - assert world.api.ok(world.centroid) + """Checking centroid is ready""" + ok_(world.api.ok(world.centroid)) def the_confidence_is(step, confidence): + """Checking confidence""" local_confidence = world.prediction.get('confidence', \ world.prediction.get('probability')) - assert_almost_equals(float(local_confidence), - float(confidence), 4) + approx_(float(local_confidence), float(confidence), precision=4) + def i_create_an_ensemble_prediction(step, data=None): + """Creating prediction from ensemble""" if data is None: data = "{}" ensemble = world.ensemble['resource'] @@ -154,7 +171,11 @@ def i_create_an_ensemble_prediction(step, data=None): world.prediction = resource['object'] world.predictions.append(resource['resource']) + def i_create_an_ensemble_proportional_prediction(step, data=None, params=None): + """Creating prediction from ensemble using proportional strategy for + missings + """ if data is None: data = "{}" if params is None: @@ -172,52 +193,55 @@ def i_create_an_ensemble_proportional_prediction(step, data=None, params=None): def wait_until_prediction_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - i_get_the_prediction(step, world.prediction['resource']) - status = get_status(world.prediction) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - i_get_the_prediction(step, world.prediction['resource']) - status = get_status(world.prediction) - eq_(status['code'], int(code1)) + """Waiting for prediction and storing result""" + world.prediction = wait_until_status_code_is( + code1, code2, secs, world.prediction) def the_prediction_is_finished_in_less_than(step, secs): + """Checking wait time""" wait_until_prediction_status_code_is(step, FINISHED, FAULTY, secs) def create_local_ensemble_prediction_add_confidence(step, input_data): - world.local_prediction = world.local_ensemble.predict( + """Creating prediction from local ensemble with confidence""" + step.bigml["local_prediction"] = step.bigml["local_ensemble"].predict( json.loads(input_data), full=True) + def create_local_ensemble_prediction(step, input_data): - world.local_prediction = world.local_ensemble.predict(json.loads(input_data)) + """Creating prediction from local ensemble""" + step.bigml["local_prediction"] = step.bigml["local_ensemble"].predict(json.loads(input_data)) -def create_local_ensemble_prediction_with_confidence(step, input_data): - world.local_prediction = world.local_ensemble.predict( \ + +def create_local_ensemble_prediction_probabilities(step, input_data): + """Creating prediction from local ensemble with probabilities""" + step.bigml["local_prediction"] = step.bigml["local_ensemble"].predict( \ json.loads(input_data), full=True) - world.local_probabilities = world.local_ensemble.predict_probability( \ + step.bigml["local_probabilities"] = step.bigml[ + "local_ensemble"].predict_probability( \ json.loads(input_data), compact=True) + def create_local_ensemble_proportional_prediction_with_confidence( \ step, input_data, params=None): + """Creating prediction from local ensemble with confidence""" if params is None: params = {} kwargs = {"full": True, "missing_strategy": 1} kwargs.update(params) - world.local_prediction = world.local_ensemble.predict( \ + step.bigml["local_prediction"] = step.bigml["local_ensemble"].predict( \ json.loads(input_data), **kwargs) def create_local_ensemble_prediction_using_median_with_confidence( \ step, input_data): - world.local_prediction = world.local_ensemble.predict( \ + """Creating prediction from local ensemble using median with confidence""" + step.bigml["local_prediction"] = step.bigml["local_ensemble"].predict( \ json.loads(input_data), full=True) def i_create_an_anomaly_score(step, data=None): + """Creating anomaly score""" if data is None: data = "{}" anomaly = world.anomaly['resource'] @@ -231,6 +255,7 @@ def i_create_an_anomaly_score(step, data=None): def i_create_an_association_set(step, data=None): + """Creating association set""" if data is None: data = "{}" association = world.association['resource'] @@ -242,19 +267,24 @@ def i_create_an_association_set(step, data=None): world.association_set = resource['object'] world.association_sets.append(resource['resource']) + def the_anomaly_score_is(step, score): + """Checking the expected anomaly score""" check_prediction(world.anomaly_score['score'], score) def the_logistic_prediction_is(step, prediction): + """Checking the expected logistic regression prediction""" check_prediction(world.prediction['output'], prediction) def the_fusion_prediction_is(step, prediction): + """Checking the expected fusion prediction """ the_logistic_prediction_is(step, prediction) def i_create_a_logistic_prediction(step, data=None): + """Checking the expected logistic regression prediction""" if data is None: data = "{}" model = world.logistic_regression['resource'] @@ -266,20 +296,33 @@ def i_create_a_logistic_prediction(step, data=None): world.prediction = resource['object'] world.predictions.append(resource['resource']) -def i_create_a_deepnet_prediction(step, data=None): + +def i_create_a_deepnet_prediction(step, data=None, image_fields=None): + """Creating a prediction from a deepnet""" if data is None: data = "{}" + if image_fields is None: + image_fields = [] deepnet = world.deepnet['resource'] data = json.loads(data) + data_image_fields = [] + for field in image_fields: + if field in data: + data[field] = res_filename(data[field]) + data_image_fields.append(field) resource = world.api.create_prediction(deepnet, data) world.status = resource['code'] eq_(world.status, HTTP_CREATED) world.location = resource['location'] world.prediction = resource['object'] + for field in data_image_fields: + world.sources.append(world.prediction["input_data"][field]) world.predictions.append(resource['resource']) + def i_create_a_deepnet_prediction_with_op(step, data=None, operating_point=None): + """Creating a prediction from a deepnet with operating point""" if data is None: data = "{}" deepnet = world.deepnet['resource'] @@ -293,22 +336,41 @@ def i_create_a_deepnet_prediction_with_op(step, data=None, world.predictions.append(resource['resource']) +def i_create_a_logistic_prediction_with_op(step, data=None, + operating_point=None): + """Creating a prediction from a logistic regression with operating point""" + if data is None: + data = "{}" + logistic_regression = world.logistic_regression['resource'] + data = json.loads(data) + resource = world.api.create_prediction( \ + logistic_regression, data, {"operating_point": operating_point}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.prediction = resource['object'] + world.predictions.append(resource['resource']) + + +#pylint: disable=locally-disabled,undefined-loop-variable def the_logistic_probability_is(step, probability): + """Checking the logistic regression prediction probability""" for [prediction, remote_probability] in world.prediction['probabilities']: if prediction == world.prediction['output']: break - assert_almost_equals(round(float(remote_probability), 4), - round(float(probability), 4)) + approx_(float(remote_probability), float(probability), precision=4) def the_fusion_probability_is(step, probability): + """Checking the fusion prediction probability""" the_logistic_probability_is(step, probability) def i_create_a_prediction_op_kind(step, data=None, operating_kind=None): + """Creating a prediction with operating kind""" if data is None: data = "{}" - assert_is_not_none(operating_kind) + ok_(operating_kind is not None) model = world.model['resource'] data = json.loads(data) resource = world.api.create_prediction( \ @@ -320,10 +382,12 @@ def i_create_a_prediction_op_kind(step, data=None, operating_kind=None): world.predictions.append(resource['resource']) -def i_create_an_ensemble_prediction_op_kind(step, data=None, operating_kind=None): +def i_create_an_ensemble_prediction_op_kind( + step, data=None, operating_kind=None): + """Creating a prediction from an ensemble with operating kind""" if data is None: data = "{}" - assert_is_not_none(operating_kind) + ok_(operating_kind is not None) ensemble = world.ensemble['resource'] data = json.loads(data) resource = world.api.create_prediction( \ @@ -334,8 +398,10 @@ def i_create_an_ensemble_prediction_op_kind(step, data=None, operating_kind=None world.prediction = resource['object'] world.predictions.append(resource['resource']) + def i_create_a_deepnet_prediction_op_kind(step, data=None, operating_kind=None): + """Creating a prediction from a deepnet with operating kind""" if data is None: data = "{}" deepnet = world.deepnet['resource'] @@ -348,8 +414,10 @@ def i_create_a_deepnet_prediction_op_kind(step, data=None, world.prediction = resource['object'] world.predictions.append(resource['resource']) + def i_create_a_logistic_prediction_with_op_kind(step, data=None, operating_kind=None): + """Creating a prediction from a logistic regression with operating kind""" if data is None: data = "{}" logistic_regression = world.logistic_regression['resource'] @@ -362,7 +430,9 @@ def i_create_a_logistic_prediction_with_op_kind(step, data=None, world.prediction = resource['object'] world.predictions.append(resource['resource']) + def i_create_a_fusion_prediction(step, data=None): + """Creating a prediction from a fusion""" if data is None: data = "{}" fusion = world.fusion['resource'] @@ -374,7 +444,9 @@ def i_create_a_fusion_prediction(step, data=None): world.prediction = resource['object'] world.predictions.append(resource['resource']) + def i_create_a_linear_prediction(step, data=None): + """Creating a prediction from a linear regression""" if data is None: data = "{}" linear_regression = world.linear_regression['resource'] diff --git a/bigml/tests/create_project_steps.py b/bigml/tests/create_project_steps.py index 2969d707..3d997bfe 100644 --- a/bigml/tests/create_project_steps.py +++ b/bigml/tests/create_project_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,24 +15,15 @@ # License for the specific language governing permissions and limitations # under the License. -import os -import time -import json -from datetime import datetime, timedelta -from urllib import urlencode -from nose.tools import eq_, assert_less -from world import world +from bigml.api import HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY -from bigml.api import HTTP_CREATED, HTTP_ACCEPTED -from bigml.api import FINISHED -from bigml.api import FAULTY -from bigml.api import UPLOADING -from bigml.api import get_status - -from read_project_steps import i_get_the_project +from .read_resource_steps import wait_until_status_code_is +from .world import world, eq_ def i_create_project(step, name): + """Creating projects """ resource = world.api.create_project({"name": name}) # update status world.status = resource['code'] @@ -42,25 +33,13 @@ def i_create_project(step, name): world.projects.append(resource['resource']) -def wait_until_project_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - i_get_the_project(step, world.project['resource']) - status = get_status(world.project) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - i_get_the_project(step, world.project['resource']) - status = get_status(world.project) - eq_(status['code'], int(code1)) - - def the_project_is_finished(step, secs): - wait_until_project_status_code_is(step, FINISHED, FAULTY, secs) + """Waiting for project to be finished""" + wait_until_status_code_is(FINISHED, FAULTY, secs, world.project) def i_update_project_name_with(step, name=""): + """Updating project name""" resource = world.api.update_project(world.project.get('resource'), {"name": name}) world.status = resource['code'] @@ -69,5 +48,6 @@ def i_update_project_name_with(step, name=""): def i_check_project_name(step, name=""): + """Checking project name""" updated_name = world.project.get("name", "") eq_(updated_name, name) diff --git a/bigml/tests/create_projection_steps.py b/bigml/tests/create_projection_steps.py index 17c6371a..92df6cb7 100644 --- a/bigml/tests/create_projection_steps.py +++ b/bigml/tests/create_projection_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument # -# Copyright 2018-2019 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -16,17 +16,16 @@ # under the License. import json -import time -from nose.tools import assert_almost_equals, eq_, assert_is_not_none -from datetime import datetime, timedelta -from world import world from bigml.api import HTTP_CREATED from bigml.api import FINISHED, FAULTY -from bigml.api import get_status -from read_projection_steps import i_get_the_projection +from .world import world, eq_ +from .read_resource_steps import wait_until_status_code_is + +#pylint: disable=locally-disabled,no-member def i_create_a_projection(step, data=None): + """Creating Projection""" if data is None: data = "{}" pca = world.pca['resource'] @@ -40,31 +39,25 @@ def i_create_a_projection(step, data=None): def the_projection_is(step, projection): + """Checking projection""" if projection is None: projection = "{}" projection = json.loads(projection) - eq_(len(projection.keys()), - len(world.projection['projection']['result'].keys())) - for name, value in projection.items(): - eq_(world.projection['projection']['result'][name], projection[name], + eq_(len(list(projection.keys())), + len(list(world.projection['projection']['result'].keys()))) + for name, value in list(projection.items()): + eq_(world.projection['projection']['result'][name], value, "remote: %s, %s - expected: %s" % ( \ name, world.projection['projection']['result'][name], - projection[name])) + value)) def wait_until_projection_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - i_get_the_projection(step, world.projection['resource']) - status = get_status(world.projection) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - i_get_the_projection(step, world.projection['resource']) - status = get_status(world.projection) - eq_(status['code'], int(code1)) + """Checking status code""" + world.projection = wait_until_status_code_is( + code1, code2, secs, world.projection) def the_projection_is_finished_in_less_than(step, secs): + """Wait for completion""" wait_until_projection_status_code_is(step, FINISHED, FAULTY, secs) diff --git a/bigml/tests/create_sample_steps.py b/bigml/tests/create_sample_steps.py index 7093827e..8f451f4b 100644 --- a/bigml/tests/create_sample_steps.py +++ b/bigml/tests/create_sample_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -14,30 +14,21 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY -import time -import json -import os -from datetime import datetime, timedelta -from world import world -from nose.tools import eq_, assert_less +from .read_resource_steps import wait_until_status_code_is +from .world import world, eq_ -from bigml.api import HTTP_CREATED -from bigml.api import HTTP_ACCEPTED -from bigml.api import FINISHED -from bigml.api import FAULTY -from bigml.api import get_status -from read_sample_steps import i_get_the_sample - - -#@step(r'the sample name is "(.*)"') def i_check_sample_name(step, name): + """Step: the sample name is """ sample_name = world.sample['name'] eq_(name, sample_name) -#@step(r'I create a sample from a dataset$') + def i_create_a_sample_from_dataset(step): + """Step: I create a sample from a dataset""" dataset = world.dataset.get('resource') resource = world.api.create_sample(dataset, {'name': 'new sample'}) world.status = resource['code'] @@ -47,8 +38,8 @@ def i_create_a_sample_from_dataset(step): world.samples.append(resource['resource']) -#@step(r'I update the sample name to "(.*)"$') def i_update_sample_name(step, name): + """Step: I update the sample name to """ resource = world.api.update_sample(world.sample['resource'], {'name': name}) world.status = resource['code'] @@ -57,22 +48,7 @@ def i_update_sample_name(step, name): world.sample = resource['object'] -#@step(r'I wait until the sample status code is either (\d) or (-\d) less than (\d+)') -def wait_until_sample_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - sample_id = world.sample['resource'] - i_get_the_sample(step, sample_id) - status = get_status(world.sample) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - i_get_the_sample(step, sample_id) - status = get_status(world.sample) - eq_(status['code'], int(code1)) - - -#@step(r'I wait until the sample is ready less than (\d+)') def the_sample_is_finished_in_less_than(step, secs): - wait_until_sample_status_code_is(step, FINISHED, FAULTY, secs) + """Step: I wait until the sample is ready less than """ + world.sample = wait_until_status_code_is( + FINISHED, FAULTY, secs, world.sample) diff --git a/bigml/tests/create_script_steps.py b/bigml/tests/create_script_steps.py index aab4b428..cb7ab4ed 100644 --- a/bigml/tests/create_script_steps.py +++ b/bigml/tests/create_script_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,33 +15,41 @@ # License for the specific language governing permissions and limitations # under the License. -import time -import json -import os -from datetime import datetime, timedelta -from world import world -from nose.tools import eq_, assert_less +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY +from bigml.util import is_url -from bigml.api import HTTP_CREATED -from bigml.api import HTTP_ACCEPTED -from bigml.api import FINISHED -from bigml.api import FAULTY -from bigml.api import get_status +from .read_resource_steps import wait_until_status_code_is +from .world import world, res_filename, eq_ -from read_script_steps import i_get_the_script - -#@step(r'the script code is "(.*)" and the value of "(.*)" is "(.*)"') def the_script_code_and_attributes(step, source_code, param, param_value): + """Step: the script code is and the value of is + + """ res_param_value = world.script[param] eq_(res_param_value, param_value, ("The script %s is %s and the expected %s is %s" % (param, param_value, param, param_value))) -#@step(r'I create a whizzml script from a excerpt of code "(.*)"$') def i_create_a_script(step, source_code): - resource = world.api.create_script(source_code) + """Step: I create a whizzml script from a excerpt of code """ + resource = world.api.create_script(source_code, + {"project": world.project_id}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.script = resource['object'] + world.scripts.append(resource['resource']) + + +def i_create_a_script_from_file_or_url(step, source_code): + """Step: I create a whizzml script from file """ + if not is_url(source_code): + source_code = res_filename(source_code) + resource = world.api.create_script(source_code, + {"project": world.project_id}) world.status = resource['code'] eq_(world.status, HTTP_CREATED) world.location = resource['location'] @@ -49,8 +57,8 @@ def i_create_a_script(step, source_code): world.scripts.append(resource['resource']) -#@step(r'I update the script with "(.*)", "(.*)"$') def i_update_a_script(step, param, param_value): + """Step: I update the script with , """ resource = world.api.update_script(world.script['resource'], {param: param_value}) world.status = resource['code'] @@ -59,22 +67,7 @@ def i_update_a_script(step, param, param_value): world.script = resource['object'] -#@step(r'I wait until the script status code is either (\d) or (-\d) less than (\d+)') -def wait_until_script_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - script_id = world.script['resource'] - i_get_the_script(step, script_id) - status = get_status(world.script) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - i_get_the_script(step, script_id) - status = get_status(world.script) - eq_(status['code'], int(code1)) - - -#@step(r'I wait until the script is ready less than (\d+)') def the_script_is_finished(step, secs): - wait_until_script_status_code_is(step, FINISHED, FAULTY, secs) + """Step: I wait until the script is ready less than """ + world.script = wait_until_status_code_is( + FINISHED, FAULTY, secs, world.script) diff --git a/bigml/tests/create_source_steps.py b/bigml/tests/create_source_steps.py index 7a0893ff..3eac296a 100644 --- a/bigml/tests/create_source_steps.py +++ b/bigml/tests/create_source_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2012, 2015-2019 BigML +# Copyright 2012, 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,39 +15,35 @@ # License for the specific language governing permissions and limitations # under the License. -import time import json import csv -import sys +from bigml.api import HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY -from datetime import datetime, timedelta -from world import world, res_filename -from nose.tools import eq_, assert_less +from .read_resource_steps import wait_until_status_code_is +from .world import world, res_filename, eq_, ok_ -from bigml.api import HTTP_CREATED, HTTP_ACCEPTED -from bigml.api import FINISHED -from bigml.api import FAULTY -from bigml.api import UPLOADING -from bigml.api import get_status +def i_upload_a_file(step, filename, shared=None): + """Step: I create a data source uploading a file""" -import read_source_steps as read + if shared is None or world.shared.get("source", {}).get(shared) is None: + resource = world.api.create_source(res_filename(filename), \ + {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.source = resource['object'] + # save reference + world.sources.append(resource['resource']) -#@step(r'I create a data source uploading a "(.*)" file$') -def i_upload_a_file(step, file): - resource = world.api.create_source(res_filename(file), \ - {'project': world.project_id}) - # update status - world.status = resource['code'] - world.location = resource['location'] - world.source = resource['object'] - # save reference - world.sources.append(resource['resource']) -#@step(r'I create a data source uploading a "(.*)" file using a project$') -def i_upload_a_file_with_project_conn(step, file): - resource = world.api.create_source(res_filename(file)) +def i_upload_a_file_with_project_conn(step, filename): + """Step: I create a data source uploading a file using + a project + """ + resource = world.api.create_source(res_filename(filename)) # update status world.status = resource['code'] world.location = resource['location'] @@ -55,9 +51,10 @@ def i_upload_a_file_with_project_conn(step, file): # save reference world.sources.append(resource['resource']) -#@step(r'I create a data source from stdin uploading a "(.*)" file$') -def i_upload_a_file_from_stdin(step, file): - file_name = res_filename(file) + +def i_upload_a_file_from_stdin(step, filename): + """Step: I create a data source from stdin uploading a file """ + file_name = res_filename(filename) with open(file_name, 'rb') as file_handler: resource = world.api.create_source(file_handler, \ {'project': world.project_id}) @@ -69,11 +66,13 @@ def i_upload_a_file_from_stdin(step, file): world.sources.append(resource['resource']) -#@step(r'I create a data source uploading a "(.*)" file with args "(.*)"$') -def i_upload_a_file_with_args(step, file, args): +def i_upload_a_file_with_args(step, filename, args): + """Step: I create a data source uploading a file with args + + """ args = json.loads(args) args.update({'project': world.project_id}) - resource = world.api.create_source(res_filename(file), args) + resource = world.api.create_source(res_filename(filename), args) # update status world.status = resource['code'] world.location = resource['location'] @@ -81,8 +80,9 @@ def i_upload_a_file_with_args(step, file, args): # save reference world.sources.append(resource['resource']) -#@step(r'I create a data source using the url "(.*)"') + def i_create_using_url(step, url): + """Step: I create a data source using the url """ resource = world.api.create_source(url, {'project': world.project_id}) # update status world.status = resource['code'] @@ -91,12 +91,70 @@ def i_create_using_url(step, url): # save reference world.sources.append(resource['resource']) -#@step(r'I create a data source from inline data slurped from "(.*)"') + +def i_create_using_connector(step, connector): + """Step: I create a data source using the connection """ + resource = world.api.create_source(connector, {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.source = resource['object'] + # save reference + world.sources.append(resource['resource']) + + +def i_create_composite(step, sources): + """Step: I create from list of sources """ + resource = world.api.create_source(sources, {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.source = resource['object'] + # save reference + world.composites.append(resource['resource']) + + +def the_composite_contains(step, sources): + """Checking source in composite""" + eq_(world.source["sources"], sources) + + +def clone_source(step, source): + """Step: I clone source""" + resource = world.api.clone_source(source, {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.source = resource['object'] + # save reference + world.sources.append(resource['resource']) + + +def the_cloned_source_origin_is(step, source): + """Checking cloned source""" + eq_(world.source["origin"], source) + + +def i_create_annotated_source(step, directory, args=None): + """Creating annotated source""" + if args is None: + args = {} + args.update({'project': world.project_id}) + resource = world.api.create_annotated_source(res_filename(directory), + args) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.source = resource['object'] + # save reference + world.composites.append(resource['resource']) + + +#pylint: disable=locally-disabled,unnecessary-comprehension def i_create_using_dict_data(step, data): + """Step: I create a data source from inline data slurped from """ # slurp CSV file to local variable - mode = 'rb' - if sys.version > '3': - mode = 'rt' + mode = 'rt' with open(res_filename(data), mode) as fid: reader = csv.DictReader(fid) dict_data = [row for row in reader] @@ -110,59 +168,56 @@ def i_create_using_dict_data(step, data): # save reference world.sources.append(resource['resource']) -#@step(r'I create a data source uploading a "(.*)" file in asynchronous mode$') -def i_upload_a_file_async(step, file): - resource = world.api.create_source(res_filename(file), + +def i_upload_a_file_async(step, filename): + """Step: I create a data source uploading a file in + asynchronous mode + """ + resource = world.api.create_source(res_filename(filename), {'project': world.project_id}, async_load=True) world.resource = resource -#@step(r'I wait until the source has been created less than (\d+) secs') + def the_source_has_been_created_async(step, secs): - start = datetime.utcnow() - status = get_status(world.resource) - while status['code'] == UPLOADING: - time.sleep(3) - assert_less(datetime.utcnow() - start, timedelta(seconds=int(secs))) - status = get_status(world.resource) - eq_(world.resource['code'], HTTP_CREATED) - # update status - world.status = world.resource['code'] - world.location = world.resource['location'] - world.source = world.resource['object'] - # save reference - world.sources.append(world.resource['resource']) + """Step: I wait until the source has been created less than secs""" + world.source = wait_until_status_code_is( + FINISHED, FAULTY, secs, world.source) + -#@step(r'I wait until the source status code is either (\d) or (\d) less than (\d+)') def wait_until_source_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - read.i_get_the_source(step, world.source['resource']) - status = get_status(world.source) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - read.i_get_the_source(step, world.source['resource']) - status = get_status(world.source) - eq_(status['code'], int(code1)) - -#@step(r'I wait until the source is ready less than (\d+)') -def the_source_is_finished(step, secs): - wait_until_source_status_code_is(step, FINISHED, FAULTY, secs) - -#@step(r'I update the source with params "(.*)"') + """Step: I wait until the source status code is either + or less than + """ + world.source = wait_until_status_code_is(code1, code2, secs, world.source) + + +def the_source_is_finished(step, secs, shared=None): + """Step: I wait until the source is ready less than """ + if shared is None or world.shared.get("source", {}).get(shared) is None: + wait_until_source_status_code_is(step, FINISHED, FAULTY, secs) + if shared is not None: + if world.shared.get("source") is None: + world.shared["source"] = {} + world.shared["source"][shared] = world.source + else: + world.source = world.shared["source"][shared] + print("Reusing %s" % world.source["resource"]) + + def i_update_source_with(step, data="{}"): + """Step: I update the source with params """ resource = world.api.update_source(world.source.get('resource'), json.loads(data)) world.status = resource['code'] eq_(world.status, HTTP_ACCEPTED) -#@step(r'the source exists and has args "(.*)"') + def source_has_args(step, args="{}"): + """Step: the source exists and has args """ args = json.loads(args) - for key, value in args.items(): + for key, value in list(args.items()): if key in world.source: eq_(world.source[key], value, "Expected key %s: %s. Found %s" % (key, value, world.source[key])) else: - assert False, "No key %s in source." % key + ok_(False, "No key %s in source." % key) diff --git a/bigml/tests/create_statistical_tst_steps.py b/bigml/tests/create_statistical_tst_steps.py index e7177a96..44e76dd4 100644 --- a/bigml/tests/create_statistical_tst_steps.py +++ b/bigml/tests/create_statistical_tst_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -14,30 +14,21 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY -import time -import json -import os -from datetime import datetime, timedelta -from world import world -from nose.tools import eq_, assert_less +from .read_resource_steps import wait_until_status_code_is +from .world import world, eq_ -from bigml.api import HTTP_CREATED -from bigml.api import HTTP_ACCEPTED -from bigml.api import FINISHED -from bigml.api import FAULTY -from bigml.api import get_status -from read_statistical_tst_steps import i_get_the_tst - - -#@step(r'the statistical test name is "(.*)"') def i_check_tst_name(step, name): + """Step: the statistical test name is """ statistical_test_name = world.statistical_test['name'] eq_(name, statistical_test_name) -#@step(r'I create an statistical test from a dataset$') + def i_create_a_tst_from_dataset(step): + """Step: I create an statistical test from a dataset""" dataset = world.dataset.get('resource') resource = world.api.create_statistical_test(dataset, \ {'name': 'new statistical test'}) @@ -48,8 +39,8 @@ def i_create_a_tst_from_dataset(step): world.statistical_tests.append(resource['resource']) -#@step(r'I update the statistical test name to "(.*)"$') def i_update_tst_name(step, name): + """Step: I update the statistical test name to """ resource = world.api.update_statistical_test( \ world.statistical_test['resource'], {'name': name}) world.status = resource['code'] @@ -58,22 +49,13 @@ def i_update_tst_name(step, name): world.statistical_test = resource['object'] -#@step(r'I wait until the statistical test status code is either (\d) or (-\d) less than (\d+)') def wait_until_tst_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - statistical_test_id = world.statistical_test['resource'] - i_get_the_tst(step, statistical_test_id) - status = get_status(world.statistical_test) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - i_get_the_tst(step, statistical_test_id) - status = get_status(world.statistical_test) - eq_(status['code'], int(code1)) + """Step: I wait until the statistical test status code is either + code1 or code2 less than """ + world.statistical_test = wait_until_status_code_is( + code1, code2, secs, world.statistical_test) -#@step(r'I wait until the statistical test is ready less than (\d+)') def the_tst_is_finished_in_less_than(step, secs): + """Step: I wait until the statistical test is ready less than """ wait_until_tst_status_code_is(step, FINISHED, FAULTY, secs) diff --git a/bigml/tests/create_time_series_steps.py b/bigml/tests/create_time_series_steps.py index c131d46b..d12fc2c8 100644 --- a/bigml/tests/create_time_series_steps.py +++ b/bigml/tests/create_time_series_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2017-2019 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,26 +15,18 @@ # License for the specific language governing permissions and limitations # under the License. -import time import json -import os -from nose.tools import eq_, assert_less -from datetime import datetime, timedelta -from world import world, res_filename - -from bigml.api import HTTP_OK -from bigml.api import HTTP_CREATED -from bigml.api import HTTP_ACCEPTED -from bigml.api import FINISHED -from bigml.api import FAULTY -from bigml.api import get_status + +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY from bigml.timeseries import TimeSeries -import read_time_series_steps as read +from .read_resource_steps import wait_until_status_code_is +from .world import world, res_filename, eq_ -#@step(r'I create a time series$') def i_create_a_time_series(step): + """Step: I create a time series""" dataset = world.dataset.get('resource') resource = world.api.create_time_series(dataset) world.status = resource['code'] @@ -44,8 +36,8 @@ def i_create_a_time_series(step): world.time_series_set.append(resource['resource']) -#@step(r'I create a time series with params "(.*)"') def i_create_a_time_series_with_params(step, data="{}"): + """Step: I create a time series with params """ args = json.loads(data) resource = world.api.create_time_series(world.dataset.get('resource'), args=args) @@ -56,33 +48,20 @@ def i_create_a_time_series_with_params(step, data="{}"): world.time_series_set.append(resource['resource']) -#@step(r'I wait until the time series status code is either (\d) or (-\d) less than (\d+)') -def wait_until_time_series_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - delta = int(secs) * world.delta - read.i_get_the_time_series(step, world.time_series['resource']) - status = get_status(world.time_series) - while (status['code'] != int(code1) and - status['code'] != int(code2)): - time.sleep(3) - assert_less((datetime.utcnow() - start).seconds, delta) - read.i_get_the_time_series(step, world.time_series['resource']) - status = get_status(world.time_series) - eq_(status['code'], int(code1)) - -#@step(r'I wait until the time series is ready less than (\d+)') def the_time_series_is_finished_in_less_than(step, secs): - wait_until_time_series_status_code_is(step, FINISHED, FAULTY, secs) + """Step: I wait until the time series is ready less than """ + world.time_series = wait_until_status_code_is( + FINISHED, FAULTY, secs, world.time_series) -#@step(r'I create a local TimeSeries$') def create_local_time_series(step): - world.local_time_series = TimeSeries(world.time_series["resource"], + """Step: I create a local TimeSeries""" + step.bigml["local_time_series"] = TimeSeries(world.time_series["resource"], world.api) -#@step(r'I update the time series name to "(.*)"$') def i_update_time_series_name(step, name): + """Step: I update the time series name to """ resource = world.api.update_time_series(world.time_series['resource'], {'name': name}) world.status = resource['code'] @@ -90,23 +69,43 @@ def i_update_time_series_name(step, name): world.location = resource['location'] world.time_series = resource['object'] -#@step(r'the time series name is "(.*)"') + def i_check_time_series_name(step, name): + """Step: the time series name is """ time_series_name = world.time_series['name'] eq_(name, time_series_name) -#@step(r'I export the time series$') def i_export_time_series(step, filename): + """Step: I export the time series""" world.api.export(world.time_series.get('resource'), filename=res_filename(filename)) -#@step(r'I create a local time series from file "(.*)"') def i_create_local_time_series_from_file(step, export_file): - world.local_time_series = TimeSeries(res_filename(export_file)) + """Step: I create a local time series from file """ + step.bigml["local_time_series"] = TimeSeries( + res_filename(export_file)) -#@step(r'the time series ID and the local time series ID match') def check_time_series_id_local_id(step): - eq_(world.local_time_series.resource_id, world.time_series["resource"]) + """Step: the time series ID and the local time series ID match""" + eq_(step.bigml["local_time_series"].resource_id, + world.time_series["resource"]) + + +def clone_time_series(step, time_series): + """Step: I clone time series""" + resource = world.api.clone_time_series(time_series, + {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.time_series = resource['object'] + # save reference + world.time_series_set.append(resource['resource']) + + +def the_cloned_time_series_is(step, time_series): + """Checking the time series is a clone""" + eq_(world.time_series["origin"], time_series) diff --git a/bigml/tests/delete_project_steps.py b/bigml/tests/delete_project_steps.py index 309f9dca..49d6ddb6 100644 --- a/bigml/tests/delete_project_steps.py +++ b/bigml/tests/delete_project_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member,broad-except # -# Copyright 2014-2019 BigML +# Copyright 2014-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -17,24 +17,27 @@ import time from datetime import datetime, timedelta -from world import world -from nose.tools import eq_, assert_less + from bigml.api import HTTP_NO_CONTENT, HTTP_OK, HTTP_NOT_FOUND +from .world import world, eq_, ok_ + def i_delete_the_project(step): + """Deleting project""" resource = world.api.delete_project(world.project['resource']) world.status = resource['code'] eq_(world.status, HTTP_NO_CONTENT) def wait_until_project_deleted(step, secs): + """Waiting for delete """ start = datetime.utcnow() project_id = world.project['resource'] resource = world.api.get_project(project_id) - while (resource['code'] == HTTP_OK): + while resource['code'] == HTTP_OK: time.sleep(3) - assert_less(datetime.utcnow() - start, timedelta(seconds=int(secs))) + ok_(datetime.utcnow() - start < timedelta(seconds=int(secs))) resource = world.api.get_project(project_id) eq_(resource['code'], HTTP_NOT_FOUND) world.projects.remove(project_id) diff --git a/bigml/tests/fields_steps.py b/bigml/tests/fields_steps.py index f348a200..59336ea5 100644 --- a/bigml/tests/fields_steps.py +++ b/bigml/tests/fields_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,51 +15,57 @@ # License for the specific language governing permissions and limitations # under the License. -from world import world, res_filename -from bigml.fields import Fields +from bigml.fields import Fields, get_resource_type from bigml.io import UnicodeReader -from nose.tools import eq_ +from .world import world, res_filename, eq_, ok_ -#@step(r'I create a Fields object from the source with objective column "(.*)"') -def create_fields(step, objective_column): - world.fields = Fields(world.source, objective_field=int(objective_column), - objective_field_present=True) +def create_fields(step, objective_column): + """Step: I create a Fields object from the source with objective column + + """ + world.fields = Fields(world.source, objective_field=int(objective_column), + objective_field_present=True) -#@step(r'I create a Fields object from the dataset with objective column "(.*)"') def create_fields_from_dataset(step, objective_column): - world.fields = Fields(world.dataset, objective_field=int(objective_column), - objective_field_present=True) - + """Step: I create a Fields object from the dataset with objective column + objective_column + """ + world.fields = Fields(world.dataset, objective_field=int(objective_column), + objective_field_present=True) -#@step(r'the object id is "(.*)"') def check_objective(step, objective_id): + """Step: the object id is """ found_id = world.fields.field_id(world.fields.objective_field) eq_(found_id, objective_id) -#@step(r'I import a summary fields file "(.*)" as a fields structure') def import_summary_file(step, summary_file): + """#Step: I import a summary fields file as a fields + structure + """ world.fields_struct = world.fields.new_fields_structure( \ csv_attributes_file=res_filename(summary_file)) -#@step(r'I check the new field structure has field "(.*)" as "(.*)"') def check_field_type(step, field_id, field_type): - assert field_id in world.fields_struct['fields'].keys() + """Step: I check the new field structure has field as + + """ + ok_(field_id in list(world.fields_struct['fields'].keys())) eq_(world.fields_struct['fields'][field_id]["optype"], field_type) -#@step(r'I export a summary fields file "(.*)"') def generate_summary(step, summary_file): + """Step: I export a summary fields file """ world.fields.summary_csv(res_filename(summary_file)) -#@step(r'I check that the fields summary file is like "(.*)"') def check_summary_like_expected(step, summary_file, expected_file): + """Step: I check that the fields summary file is like """ summary_contents = [] expected_contents = [] with UnicodeReader(res_filename(summary_file)) as summary_handler: @@ -69,3 +75,25 @@ def check_summary_like_expected(step, summary_file, expected_file): for line in expected_handler: expected_contents.append(line) eq_(summary_contents, expected_contents) + + +def update_with_summary_file(step, resource, summary_file): + """Step: I update the with the file """ + if get_resource_type(resource) == "source": + # We need to download the source again, as it could have been closed + resource = world.api.get_source(resource) + if resource.get("object", {}).get("closed", False): + resource = world.api.clone_source(resource) + world.api.ok(resource) + fields = Fields(resource) + changes = fields.filter_fields_update( \ + fields.new_fields_structure(res_filename(summary_file))) + resource_type = get_resource_type(resource) + resource = world.api.updaters[resource_type](resource, changes) + world.api.ok(resource) + setattr(world, resource_type, resource) + + +def check_resource_field_type(step, resource, field_id, optype): + """Step: I check the source has field as """ + eq_(resource["object"]["fields"][field_id]["optype"], optype) diff --git a/bigml/tests/inspect_model_steps.py b/bigml/tests/inspect_model_steps.py index 1c2eaf20..a13c90ac 100644 --- a/bigml/tests/inspect_model_steps.py +++ b/bigml/tests/inspect_model_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,unused-argument,no-member # -# Copyright 2012, 2015-2019 BigML +# Copyright 2012, 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -16,61 +16,100 @@ # under the License. import io -import os +import json + +import bigml.generators.model as g + from bigml.tests.world import res_filename -from world import world -from nose.tools import eq_ +from bigml.predict_utils.common import extract_distribution +from bigml.util import utf8 + +from .world import world, eq_ -#@step(r'I translate the tree into IF-THEN rules$') +#pylint: disable=locally-disabled,invalid-name def i_translate_the_tree_into_IF_THEN_rules(step): - output = io.BytesIO() - world.local_model.rules(out=output) + """Step: I translate the tree into IF-THEN rules""" + output = io.StringIO() + g.rules(step.bigml["local_model"], out=output) world.output = output.getvalue() -#@step(r'I check data distribution with "(.*)" file$') -def i_check_the_data_distribution(step, file): - distribution = world.local_model.get_data_distribution() +def i_check_the_data_distribution(step, filename): + """Step: I check data distribution with file""" + distribution = g.get_data_distribution(step.bigml["local_model"]) distribution_str = '' for bin_value, bin_instances in distribution: distribution_str += "[%s,%s]\n" % (bin_value, bin_instances) + world.output = utf8(distribution_str) + i_check_if_the_output_is_like_expected_file(step, filename) - world.output = distribution_str.encode('utf-8') - - i_check_if_the_output_is_like_expected_file(step, file) - - -#@step(r'I check the predictions distribution with "(.*)" file$') -def i_check_the_predictions_distribution(step, file): - predictions = world.local_model.get_prediction_distribution() +def i_check_the_predictions_distribution(step, filename): + """Step: I check the predictions distribution with file""" + predictions = g.get_prediction_distribution(step.bigml["local_model"]) distribution_str = '' for group, instances in predictions: distribution_str += "[%s,%s]\n" % (group, instances) - world.output = distribution_str.encode('utf-8') + world.output = utf8(distribution_str) - i_check_if_the_output_is_like_expected_file(step, file) + i_check_if_the_output_is_like_expected_file(step, filename) -#@step(r'I check the model summary with "(.*)" file$') -def i_check_the_model_summary_with(step, file): - output = io.BytesIO() - world.local_model.summarize(out=output) +def i_check_the_model_summary_with(step, filename): + """Step: I check the model summary with file""" + output = io.StringIO() + g.summarize(step.bigml["local_model"], out=output) world.output = output.getvalue() - i_check_if_the_output_is_like_expected_file(step, file) + i_check_if_the_output_is_like_expected_file(step, filename) -#@step(r'I check the output is like "(.*)" expected file') def i_check_if_the_output_is_like_expected_file(step, expected_file): - file = open(res_filename(expected_file), "rb") - expected_content = file.read() - file.close() + """Step: I check the output is like expected file""" + with open(res_filename(expected_file), "r") as handler: + expected_content = handler.read() eq_(world.output.strip(), expected_content.strip()) +def i_check_print_distribution(step, filename): + """Step: I check the distribution print with file""" + output = io.StringIO() + _, distribution = extract_distribution( + step.bigml["local_model"].root_distribution) + g.print_distribution(distribution, output) + world.output = output.getvalue() + if world.debug: + backup = "%s.bck" % filename + with open(backup, "w") as bck_file: + bck_file.write(world.output) + i_check_if_the_output_is_like_expected_file(step, filename) + + +def i_list_fields(step, filename): + """Step: I check the list fields print with file""" + output = io.StringIO() + g.list_fields(step.bigml["local_model"], output) + world.output = output.getvalue() + if world.debug: + backup = "%s.bck" % filename + with open(backup, "w") as bck_file: + bck_file.write(world.output) + i_check_if_the_output_is_like_expected_file(step, filename) + + +def i_create_tree_csv(step, filename): + """Step: I check the tree csv print with file""" + rows = g.tree_csv(step.bigml["local_model"]) + world.output = json.dumps(rows) + if world.debug: + backup = "%s.bck" % filename + with open(backup, "w") as bck_file: + bck_file.write(world.output) + i_check_if_the_output_is_like_expected_file(step, filename) + def update_content(filename, content): + """Step: I check the tree csv print with file""" with open(res_filename(filename), "w") as file_handler: file_handler.write(content) diff --git a/bigml/tests/mlflow_ensemble/ensemble.json b/bigml/tests/mlflow_ensemble/ensemble.json new file mode 100644 index 00000000..fc312d49 --- /dev/null +++ b/bigml/tests/mlflow_ensemble/ensemble.json @@ -0,0 +1 @@ +{"code": 200, "resource": "ensemble/62605abc0c11da5783002915", "location": "https://bigml.io/andromeda/ensemble/62605abc0c11da5783002915", "object": {"boosting": null, "category": 0, "code": 200, "columns": 9, "configuration": null, "configuration_status": false, "created": "2022-04-20T19:10:52.806000", "creator": "mmartin", "dataset": "dataset/62605ab1049fde5d990028f1", "dataset_field_types": {"categorical": 1, "datetime": 0, "image": 0, "items": 0, "numeric": 8, "path": 0, "preferred": 9, "regions": 0, "text": 0, "total": 9}, "dataset_status": true, "depth_threshold": 512, "description": "", "distributions": [{"importance": [["000001", 0.39328], ["000005", 0.19841], ["000006", 0.16783], ["000007", 0.08845], ["000003", 0.04986], ["000002", 0.0447], ["000000", 0.0369], ["000004", 0.02058]], "predictions": {"categories": [["false", 417], ["true", 197]]}, "training": {"categories": [["false", 416], ["true", 198]]}}, {"importance": [["000001", 0.33357], ["000005", 0.2589], ["000006", 0.16331], ["000000", 0.10221], ["000007", 0.04527], ["000002", 0.04284], ["000004", 0.03004], ["000003", 0.02386]], "predictions": {"categories": [["false", 410], ["true", 204]]}, "training": {"categories": [["false", 410], ["true", 204]]}}], "ensemble": {"fields": {"000000": {"column_number": 0, "datatype": "int8", "name": "pregnancies", "optype": "numeric", "order": 0, "preferred": true, "summary": {"counts": [[0, 86], [1, 110], [2, 80], [3, 56], [4, 58], [5, 45], [6, 40], [7, 37], [8, 35], [9, 22], [10, 16], [11, 9], [12, 9], [13, 8], [14, 1], [15, 1], [17, 1]], "exact_histogram": {"populations": [196, 136, 103, 77, 57, 25, 17, 2, 1], "start": 0, "width": 2}, "kurtosis": 0.14808, "maximum": 17, "mean": 3.89088, "median": 3, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 0.88555, "standard_deviation": 3.38254, "sum": 2389, "sum_squares": 16309, "variance": 11.44158}}, "000001": {"column_number": 1, "datatype": "int16", "name": "plasma glucose", "optype": "numeric", "order": 1, "preferred": true, "summary": {"bins": [[0, 5], [44, 1], [57, 2], [61.5, 2], [65, 1], [67.75, 4], [73.21429, 14], [79.52941, 17], [84.2381, 21], [88.74074, 27], [92, 19], [95.81818, 33], [100.86275, 51], [105.67647, 34], [109.38235, 34], [113.77143, 35], [118.775, 40], [124.14634, 41], [128.93548, 31], [133.46154, 13], [137.32, 25], [141.95652, 23], [146.4, 20], [152.86957, 23], [158, 14], [162.41667, 12], [166.66667, 15], [172.92857, 14], [180.77778, 18], [187.90909, 11], [193, 4], [196.7, 10]], "exact_histogram": {"populations": [5, 0, 0, 0, 1, 2, 7, 21, 48, 74, 93, 75, 78, 46, 43, 36, 28, 19, 23, 15], "start": 0, "width": 10}, "kurtosis": 0.68455, "maximum": 199, "mean": 121.11401, "median": 117, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 0.08447, "standard_deviation": 32.75167, "sum": 74364, "sum_squares": 9664070, "variance": 1072.67214}}, "000002": {"column_number": 2, "datatype": "int8", "name": "blood pressure", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[0, 28], [30, 2], [39, 2], [44.66667, 6], [49.625, 16], [52, 9], [55.11111, 18], [58, 19], [60, 29], [62, 23], [64.87097, 62], [68, 38], [70, 43], [72, 37], [74.84524, 84], [78, 34], [80, 34], [82, 24], [84.85366, 41], [88, 17], [90, 19], [92, 4], [95, 9], [98, 3], [100, 2], [102, 1], [104, 1], [106, 3], [108, 2], [110, 2], [114, 1], [122, 1]], "exact_histogram": {"populations": [28, 0, 0, 0, 0, 0, 2, 1, 5, 5, 29, 30, 84, 68, 125, 73, 79, 37, 27, 8, 4, 5, 3, 0, 1], "start": 0, "width": 5}, "kurtosis": 5.12315, "maximum": 122, "mean": 69.10912, "median": 72, "minimum": 0, "missing_count": 0, "population": 614, "skewness": -1.82589, "standard_deviation": 19.37631, "sum": 42433, "sum_squares": 3162653, "variance": 375.44158}}, "000003": {"column_number": 3, "datatype": "int8", "name": "triceps skin thickness", "optype": "numeric", "order": 3, "preferred": true, "summary": {"bins": [[0, 189], [7.33333, 3], [10.57143, 7], [12, 4], [13.375, 16], [15.26667, 15], [17, 13], [18.46429, 28], [20.41176, 17], [22.48387, 31], [24.4, 20], [26.5625, 32], [28.46429, 28], [30.4375, 32], [32.38636, 44], [34.68421, 19], [36.6087, 23], [38.68421, 19], [40.46429, 28], [42.38462, 13], [44, 4], [45, 6], [46, 5], [47, 4], [48, 4], [49, 2], [50, 2], [51, 1], [52, 2], [56, 1], [63, 1], [99, 1]], "exact_histogram": {"open_max": 3, "populations": [189, 0, 0, 2, 1, 7, 14, 17, 17, 28, 17, 31, 20, 32, 28, 32, 44, 19, 23, 19, 28, 13, 10, 9, 6, 3, 2], "start": 0, "width": 2}, "kurtosis": -0.44344, "maximum": 99, "mean": 20.27687, "median": 22, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 0.14897, "standard_deviation": 16.11049, "sum": 12450, "sum_squares": 411550, "variance": 259.54801}}, "000004": {"column_number": 4, "datatype": "int16", "name": "insulin", "optype": "numeric", "order": 4, "preferred": true, "summary": {"bins": [[0, 295], [19.57143, 7], [38.36364, 11], [56.08889, 45], [76.55172, 29], [93.76471, 34], [113.11111, 27], [133.5, 38], [154.23077, 13], [173.31818, 22], [189.75, 16], [205.57143, 14], [219.28571, 7], [235.44444, 9], [268.5, 10], [292.33333, 3], [307, 2], [324.75, 8], [338.5, 2], [372.5, 2], [389.5, 2], [415, 1], [440, 1], [474.75, 4], [491.66667, 3], [510, 1], [542.66667, 3], [579, 1], [600, 1], [680, 1], [744, 1], [846, 1]], "exact_histogram": {"open_max": 3, "populations": [299, 10, 35, 34, 36, 27, 32, 24, 18, 22, 17, 10, 5, 7, 4, 3, 8, 1, 2, 2, 1, 0, 1, 2, 5, 1, 0, 3, 1, 0, 1], "start": 0, "width": 20}, "kurtosis": 7.14939, "maximum": 846, "mean": 82.72801, "median": 37, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 2.2975, "standard_deviation": 119.91638, "sum": 50795, "sum_squares": 13017071, "variance": 14379.93732}}, "000005": {"column_number": 5, "datatype": "double", "name": "bmi", "optype": "numeric", "order": 5, "preferred": true, "summary": {"bins": [[0, 9], [18.26667, 3], [19.5875, 8], [20.9375, 8], [22.13529, 17], [23.25333, 15], [24.14167, 24], [25.12059, 34], [26.32187, 32], [27.64706, 34], [28.8, 33], [30.17627, 59], [31.53077, 39], [32.85849, 53], [33.94118, 34], [34.89355, 31], [35.9129, 31], [37.34138, 29], [38.39048, 21], [39.45, 24], [40.60769, 13], [41.41429, 7], [42.90556, 18], [44.125, 8], [45.42727, 11], [46.4, 7], [48.225, 4], [49.85, 2], [52.8, 3], [55, 1], [57.3, 1], [59.4, 1]], "exact_histogram": {"populations": [9, 0, 0, 0, 0, 0, 0, 0, 0, 9, 17, 31, 59, 52, 60, 66, 81, 68, 42, 43, 21, 20, 17, 9, 3, 1, 3, 1, 1, 1], "start": 0, "width": 2}, "kurtosis": 3.13899, "maximum": 59.4, "mean": 31.9171, "median": 32, "minimum": 0, "missing_count": 0, "population": 614, "skewness": -0.52314, "standard_deviation": 7.80358, "sum": 19597.1, "sum_squares": 662811.83, "variance": 60.89594}}, "000006": {"column_number": 6, "datatype": "double", "name": "diabetes pedigree", "optype": "numeric", "order": 6, "preferred": true, "summary": {"bins": [[0.09254, 13], [0.14651, 61], [0.19706, 52], [0.25062, 86], [0.29458, 48], [0.34338, 42], [0.40032, 34], [0.44206, 33], [0.49592, 26], [0.54232, 31], [0.59219, 26], [0.64124, 21], [0.69418, 33], [0.744, 18], [0.81253, 15], [0.8645, 14], [0.93444, 16], [1.015, 5], [1.086, 2], [1.14533, 9], [1.2064, 5], [1.2702, 5], [1.33067, 3], [1.39375, 4], [1.45933, 3], [1.6, 1], [1.70933, 3], [1.781, 1], [2.137, 1], [2.288, 1], [2.329, 1], [2.42, 1]], "exact_histogram": {"populations": [9, 95, 140, 73, 68, 58, 52, 31, 28, 16, 6, 11, 8, 6, 4, 0, 3, 2, 0, 0, 0, 1, 1, 1, 1], "start": 0, "width": 0.1}, "kurtosis": 5.46252, "maximum": 2.42, "mean": 0.47944, "median": 0.3865, "minimum": 0.078, "missing_count": 0, "population": 614, "skewness": 1.91857, "standard_deviation": 0.34277, "sum": 294.378, "sum_squares": 213.15908, "variance": 0.11749}}, "000007": {"column_number": 7, "datatype": "int8", "name": "age", "optype": "numeric", "order": 7, "preferred": true, "summary": {"bins": [[21.56757, 111], [23, 29], [24.48611, 72], [26.5, 48], [28.4375, 48], [30.54545, 33], [32.59259, 27], [34.47368, 19], [36.48276, 29], [38.48, 25], [40.68, 25], [42.44, 25], [44.63158, 19], [46.27778, 18], [48.5, 8], [50.46667, 15], [52.41667, 12], [54.375, 8], [56.71429, 7], [58.375, 8], [60.28571, 7], [62, 3], [63, 4], [64, 1], [65, 2], [66, 3], [67, 3], [68, 1], [69, 1], [70, 1], [72, 1], [81, 1]], "exact_histogram": {"populations": [48, 92, 72, 48, 48, 33, 27, 19, 29, 25, 25, 25, 19, 18, 8, 15, 12, 8, 7, 8, 7, 7, 3, 6, 2, 1, 1, 0, 0, 0, 1], "start": 20, "width": 2}, "kurtosis": 0.42204, "maximum": 81, "mean": 33.66287, "median": 29, "minimum": 21, "missing_count": 0, "population": 614, "skewness": 1.05637, "standard_deviation": 12.0408, "sum": 20669, "sum_squares": 784651, "variance": 144.98077}}, "000008": {"column_number": 8, "datatype": "string", "name": "diabetes", "optype": "categorical", "order": 8, "preferred": true, "summary": {"categories": [["false", 394], ["true", 220]], "missing_count": 0}, "term_analysis": {"enabled": true}}}}, "ensemble_sample": {"rate": 1, "replacement": true, "seed": "d5f6867da5224b4793c0a4088697ef3d"}, "error_models": 0, "fields_meta": {"count": 9, "limit": 1000, "offset": 0, "query_total": 9, "total": 9}, "finished_models": 2, "focus_field": null, "focus_field_name": null, "importance": {"000000": 0.06955, "000001": 0.36342, "000002": 0.04377, "000003": 0.03686, "000004": 0.02531, "000005": 0.22865, "000006": 0.16557, "000007": 0.06686}, "input_fields": ["000000", "000001", "000002", "000003", "000004", "000005", "000006", "000007"], "locale": "en_US", "max_columns": 9, "max_rows": 614, "missing_splits": false, "models": ["model/62605ac123541b220100748a", "model/62605ac323541b220100748c"], "name": "diabetes", "name_options": "bootstrap decision forest, 512-node, 2-model, pruned, deterministic order", "node_threshold": 512, "number_of_batchpredictions": 0, "number_of_evaluations": 1, "number_of_models": 2, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "000008", "objective_field_details": {"column_number": 8, "datatype": "string", "name": "diabetes", "optype": "categorical", "order": 8}, "objective_field_name": "diabetes", "objective_field_type": "categorical", "objective_fields": ["000008"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": null, "randomize": false, "range": null, "replacement": false, "resource": "ensemble/62605abc0c11da5783002915", "rows": 614, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 20939, "source": "source/62605aa75198db5eed003416", "source_status": true, "split_candidates": 32, "split_field": null, "split_field_name": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 1193, "message": "The ensemble has been created", "progress": 1}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2022-04-20T19:11:09.173000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/tests/mlflow_ensemble/model_62605ac123541b220100748a b/bigml/tests/mlflow_ensemble/model_62605ac123541b220100748a new file mode 100644 index 00000000..818f9b86 --- /dev/null +++ b/bigml/tests/mlflow_ensemble/model_62605ac123541b220100748a @@ -0,0 +1 @@ +{"code": 200, "resource": "model/62605ac123541b220100748a", "location": "https://bigml.io/andromeda/model/62605ac123541b220100748a", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 9, "configuration": null, "configuration_status": false, "created": "2022-04-20T19:10:57.279000", "creator": "mmartin", "dataset": "dataset/62605ab1049fde5d990028f1", "dataset_field_types": {"categorical": 1, "datetime": 0, "image": 0, "items": 0, "numeric": 8, "path": 0, "preferred": 9, "regions": 0, "text": 0, "total": 9}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": true, "ensemble_id": "62605abc0c11da5783002915", "ensemble_index": 0, "excluded_fields": [], "fields_meta": {"count": 9, "limit": 1000, "offset": 0, "query_total": 9, "total": 9}, "focus_field": null, "input_fields": ["000000", "000001", "000002", "000003", "000004", "000005", "000006", "000007"], "locale": "en_US", "max_columns": 9, "max_rows": 614, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["false", 417], ["true", 197]]}, "training": {"categories": [["false", 416], ["true", 198]]}}, "fields": {"000000": {"column_number": 0, "datatype": "int8", "name": "pregnancies", "optype": "numeric", "order": 0, "preferred": true, "summary": {"counts": [[0, 86], [1, 110], [2, 80], [3, 56], [4, 58], [5, 45], [6, 40], [7, 37], [8, 35], [9, 22], [10, 16], [11, 9], [12, 9], [13, 8], [14, 1], [15, 1], [17, 1]], "exact_histogram": {"populations": [196, 136, 103, 77, 57, 25, 17, 2, 1], "start": 0, "width": 2}, "kurtosis": 0.14808, "maximum": 17, "mean": 3.89088, "median": 3, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 0.88555, "standard_deviation": 3.38254, "sum": 2389, "sum_squares": 16309, "variance": 11.44158}}, "000001": {"column_number": 1, "datatype": "int16", "name": "plasma glucose", "optype": "numeric", "order": 1, "preferred": true, "summary": {"bins": [[0, 5], [44, 1], [57, 2], [61.5, 2], [65, 1], [67.75, 4], [73.21429, 14], [79.52941, 17], [84.2381, 21], [88.74074, 27], [92, 19], [95.81818, 33], [100.86275, 51], [105.67647, 34], [109.38235, 34], [113.77143, 35], [118.775, 40], [124.14634, 41], [128.93548, 31], [133.46154, 13], [137.32, 25], [141.95652, 23], [146.4, 20], [152.86957, 23], [158, 14], [162.41667, 12], [166.66667, 15], [172.92857, 14], [180.77778, 18], [187.90909, 11], [193, 4], [196.7, 10]], "exact_histogram": {"populations": [5, 0, 0, 0, 1, 2, 7, 21, 48, 74, 93, 75, 78, 46, 43, 36, 28, 19, 23, 15], "start": 0, "width": 10}, "kurtosis": 0.68455, "maximum": 199, "mean": 121.11401, "median": 117, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 0.08447, "standard_deviation": 32.75167, "sum": 74364, "sum_squares": 9664070, "variance": 1072.67214}}, "000002": {"column_number": 2, "datatype": "int8", "name": "blood pressure", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[0, 28], [30, 2], [39, 2], [44.66667, 6], [49.625, 16], [52, 9], [55.11111, 18], [58, 19], [60, 29], [62, 23], [64.87097, 62], [68, 38], [70, 43], [72, 37], [74.84524, 84], [78, 34], [80, 34], [82, 24], [84.85366, 41], [88, 17], [90, 19], [92, 4], [95, 9], [98, 3], [100, 2], [102, 1], [104, 1], [106, 3], [108, 2], [110, 2], [114, 1], [122, 1]], "exact_histogram": {"populations": [28, 0, 0, 0, 0, 0, 2, 1, 5, 5, 29, 30, 84, 68, 125, 73, 79, 37, 27, 8, 4, 5, 3, 0, 1], "start": 0, "width": 5}, "kurtosis": 5.12315, "maximum": 122, "mean": 69.10912, "median": 72, "minimum": 0, "missing_count": 0, "population": 614, "skewness": -1.82589, "standard_deviation": 19.37631, "sum": 42433, "sum_squares": 3162653, "variance": 375.44158}}, "000003": {"column_number": 3, "datatype": "int8", "name": "triceps skin thickness", "optype": "numeric", "order": 3, "preferred": true, "summary": {"bins": [[0, 189], [7.33333, 3], [10.57143, 7], [12, 4], [13.375, 16], [15.26667, 15], [17, 13], [18.46429, 28], [20.41176, 17], [22.48387, 31], [24.4, 20], [26.5625, 32], [28.46429, 28], [30.4375, 32], [32.38636, 44], [34.68421, 19], [36.6087, 23], [38.68421, 19], [40.46429, 28], [42.38462, 13], [44, 4], [45, 6], [46, 5], [47, 4], [48, 4], [49, 2], [50, 2], [51, 1], [52, 2], [56, 1], [63, 1], [99, 1]], "exact_histogram": {"open_max": 3, "populations": [189, 0, 0, 2, 1, 7, 14, 17, 17, 28, 17, 31, 20, 32, 28, 32, 44, 19, 23, 19, 28, 13, 10, 9, 6, 3, 2], "start": 0, "width": 2}, "kurtosis": -0.44344, "maximum": 99, "mean": 20.27687, "median": 22, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 0.14897, "standard_deviation": 16.11049, "sum": 12450, "sum_squares": 411550, "variance": 259.54801}}, "000004": {"column_number": 4, "datatype": "int16", "name": "insulin", "optype": "numeric", "order": 4, "preferred": true, "summary": {"bins": [[0, 295], [19.57143, 7], [38.36364, 11], [56.08889, 45], [76.55172, 29], [93.76471, 34], [113.11111, 27], [133.5, 38], [154.23077, 13], [173.31818, 22], [189.75, 16], [205.57143, 14], [219.28571, 7], [235.44444, 9], [268.5, 10], [292.33333, 3], [307, 2], [324.75, 8], [338.5, 2], [372.5, 2], [389.5, 2], [415, 1], [440, 1], [474.75, 4], [491.66667, 3], [510, 1], [542.66667, 3], [579, 1], [600, 1], [680, 1], [744, 1], [846, 1]], "exact_histogram": {"open_max": 3, "populations": [299, 10, 35, 34, 36, 27, 32, 24, 18, 22, 17, 10, 5, 7, 4, 3, 8, 1, 2, 2, 1, 0, 1, 2, 5, 1, 0, 3, 1, 0, 1], "start": 0, "width": 20}, "kurtosis": 7.14939, "maximum": 846, "mean": 82.72801, "median": 37, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 2.2975, "standard_deviation": 119.91638, "sum": 50795, "sum_squares": 13017071, "variance": 14379.93732}}, "000005": {"column_number": 5, "datatype": "double", "name": "bmi", "optype": "numeric", "order": 5, "preferred": true, "summary": {"bins": [[0, 9], [18.26667, 3], [19.5875, 8], [20.9375, 8], [22.13529, 17], [23.25333, 15], [24.14167, 24], [25.12059, 34], [26.32187, 32], [27.64706, 34], [28.8, 33], [30.17627, 59], [31.53077, 39], [32.85849, 53], [33.94118, 34], [34.89355, 31], [35.9129, 31], [37.34138, 29], [38.39048, 21], [39.45, 24], [40.60769, 13], [41.41429, 7], [42.90556, 18], [44.125, 8], [45.42727, 11], [46.4, 7], [48.225, 4], [49.85, 2], [52.8, 3], [55, 1], [57.3, 1], [59.4, 1]], "exact_histogram": {"populations": [9, 0, 0, 0, 0, 0, 0, 0, 0, 9, 17, 31, 59, 52, 60, 66, 81, 68, 42, 43, 21, 20, 17, 9, 3, 1, 3, 1, 1, 1], "start": 0, "width": 2}, "kurtosis": 3.13899, "maximum": 59.4, "mean": 31.9171, "median": 32, "minimum": 0, "missing_count": 0, "population": 614, "skewness": -0.52314, "standard_deviation": 7.80358, "sum": 19597.1, "sum_squares": 662811.83, "variance": 60.89594}}, "000006": {"column_number": 6, "datatype": "double", "name": "diabetes pedigree", "optype": "numeric", "order": 6, "preferred": true, "summary": {"bins": [[0.09254, 13], [0.14651, 61], [0.19706, 52], [0.25062, 86], [0.29458, 48], [0.34338, 42], [0.40032, 34], [0.44206, 33], [0.49592, 26], [0.54232, 31], [0.59219, 26], [0.64124, 21], [0.69418, 33], [0.744, 18], [0.81253, 15], [0.8645, 14], [0.93444, 16], [1.015, 5], [1.086, 2], [1.14533, 9], [1.2064, 5], [1.2702, 5], [1.33067, 3], [1.39375, 4], [1.45933, 3], [1.6, 1], [1.70933, 3], [1.781, 1], [2.137, 1], [2.288, 1], [2.329, 1], [2.42, 1]], "exact_histogram": {"populations": [9, 95, 140, 73, 68, 58, 52, 31, 28, 16, 6, 11, 8, 6, 4, 0, 3, 2, 0, 0, 0, 1, 1, 1, 1], "start": 0, "width": 0.1}, "kurtosis": 5.46252, "maximum": 2.42, "mean": 0.47944, "median": 0.3865, "minimum": 0.078, "missing_count": 0, "population": 614, "skewness": 1.91857, "standard_deviation": 0.34277, "sum": 294.378, "sum_squares": 213.15908, "variance": 0.11749}}, "000007": {"column_number": 7, "datatype": "int8", "name": "age", "optype": "numeric", "order": 7, "preferred": true, "summary": {"bins": [[21.56757, 111], [23, 29], [24.48611, 72], [26.5, 48], [28.4375, 48], [30.54545, 33], [32.59259, 27], [34.47368, 19], [36.48276, 29], [38.48, 25], [40.68, 25], [42.44, 25], [44.63158, 19], [46.27778, 18], [48.5, 8], [50.46667, 15], [52.41667, 12], [54.375, 8], [56.71429, 7], [58.375, 8], [60.28571, 7], [62, 3], [63, 4], [64, 1], [65, 2], [66, 3], [67, 3], [68, 1], [69, 1], [70, 1], [72, 1], [81, 1]], "exact_histogram": {"populations": [48, 92, 72, 48, 48, 33, 27, 19, 29, 25, 25, 25, 19, 18, 8, 15, 12, 8, 7, 8, 7, 7, 3, 6, 2, 1, 1, 0, 0, 0, 1], "start": 20, "width": 2}, "kurtosis": 0.42204, "maximum": 81, "mean": 33.66287, "median": 29, "minimum": 21, "missing_count": 0, "population": 614, "skewness": 1.05637, "standard_deviation": 12.0408, "sum": 20669, "sum_squares": 784651, "variance": 144.98077}}, "000008": {"column_number": 8, "datatype": "string", "name": "diabetes", "optype": "categorical", "order": 8, "preferred": true, "summary": {"categories": [["false", 394], ["true", 220]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000001", 0.39328], ["000005", 0.19841], ["000006", 0.16783], ["000007", 0.08845], ["000003", 0.04986], ["000002", 0.0447], ["000000", 0.0369], ["000004", 0.02058]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000000": {"column_number": 0, "datatype": "int8", "name": "pregnancies", "optype": "numeric", "preferred": true}, "000001": {"column_number": 1, "datatype": "int16", "name": "plasma glucose", "optype": "numeric", "preferred": true}, "000002": {"column_number": 2, "datatype": "int8", "name": "blood pressure", "optype": "numeric", "preferred": true}, "000003": {"column_number": 3, "datatype": "int8", "name": "triceps skin thickness", "optype": "numeric", "preferred": true}, "000004": {"column_number": 4, "datatype": "int16", "name": "insulin", "optype": "numeric", "preferred": true}, "000005": {"column_number": 5, "datatype": "double", "name": "bmi", "optype": "numeric", "preferred": true}, "000006": {"column_number": 6, "datatype": "double", "name": "diabetes pedigree", "optype": "numeric", "preferred": true}, "000007": {"column_number": 7, "datatype": "int8", "name": "age", "optype": "numeric", "preferred": true}, "000008": {"column_number": 8, "datatype": "string", "name": "diabetes", "optype": "categorical", "preferred": true, "term_analysis": {"enabled": true}}}, "node_threshold": 512, "root": {"children": [{"children": [{"children": [{"children": [{"children": [{"confidence": 0.20654, "count": 1, "id": 5, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 39.8}}, {"confidence": 0.34237, "count": 2, "id": 6, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 39.8}}], "confidence": 0.20765, "count": 3, "id": 4, "objective_summary": {"categories": [["false", 2], ["true", 1]]}, "output": "false", "predicate": {"field": "000002", "operator": ">", "value": 93}}, {"children": [{"confidence": 0.20654, "count": 1, "id": 8, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000004", "operator": ">", "value": 643}}, {"children": [{"children": [{"confidence": 0.83887, "count": 20, "id": 11, "objective_summary": {"categories": [["true", 20]]}, "output": "true", "predicate": {"field": "000000", "operator": ">", "value": 3}}, {"children": [{"children": [{"confidence": 0.20654, "count": 1, "id": 14, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 41.65}}, {"confidence": 0.56551, "count": 5, "id": 15, "objective_summary": {"categories": [["true", 5]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 41.65}}], "confidence": 0.43649, "count": 6, "id": 13, "objective_summary": {"categories": [["true", 5], ["false", 1]]}, "output": "true", "predicate": {"field": "000007", "operator": ">", "value": 36}}, {"confidence": 0.34237, "count": 2, "id": 16, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000007", "operator": "<=", "value": 36}}], "confidence": 0.30574, "count": 8, "id": 12, "objective_summary": {"categories": [["true", 5], ["false", 3]]}, "output": "true", "predicate": {"field": "000000", "operator": "<=", "value": 3}}], "confidence": 0.72804, "count": 28, "id": 10, "objective_summary": {"categories": [["true", 25], ["false", 3]]}, "output": "true", "predicate": {"field": "000007", "operator": ">", "value": 33}}, {"confidence": 0.8668, "count": 25, "id": 17, "objective_summary": {"categories": [["true", 25]]}, "output": "true", "predicate": {"field": "000007", "operator": "<=", "value": 33}}], "confidence": 0.8463, "count": 53, "id": 9, "objective_summary": {"categories": [["true", 50], ["false", 3]]}, "output": "true", "predicate": {"field": "000004", "operator": "<=", "value": 643}}], "confidence": 0.82446, "count": 54, "id": 7, "objective_summary": {"categories": [["true", 50], ["false", 4]]}, "output": "true", "predicate": {"field": "000002", "operator": "<=", "value": 93}}], "confidence": 0.78877, "count": 57, "id": 3, "objective_summary": {"categories": [["true", 51], ["false", 6]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 27.25}}, {"children": [{"confidence": 0.34237, "count": 2, "id": 19, "objective_summary": {"categories": [["true", 2]]}, "output": "true", "predicate": {"field": "000001", "operator": ">", "value": 194}}, {"confidence": 0.5101, "count": 4, "id": 20, "objective_summary": {"categories": [["false", 4]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 194}}], "confidence": 0.29999, "count": 6, "id": 18, "objective_summary": {"categories": [["false", 4], ["true", 2]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 27.25}}], "confidence": 0.73188, "count": 63, "id": 2, "objective_summary": {"categories": [["true", 53], ["false", 10]]}, "output": "true", "predicate": {"field": "000001", "operator": ">", "value": 167}}, {"children": [{"children": [{"confidence": 0.34237, "count": 2, "id": 23, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000001", "operator": ">", "value": 163}}, {"confidence": 0.83182, "count": 19, "id": 24, "objective_summary": {"categories": [["true", 19]]}, "output": "true", "predicate": {"field": "000001", "operator": "<=", "value": 163}}], "confidence": 0.71085, "count": 21, "id": 22, "objective_summary": {"categories": [["true", 19], ["false", 2]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 41.85}}, {"children": [{"children": [{"confidence": 0.34237, "count": 2, "id": 27, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 1.4955}}, {"confidence": 0.7575, "count": 12, "id": 28, "objective_summary": {"categories": [["true", 12]]}, "output": "true", "predicate": {"field": "000006", "operator": "<=", "value": 1.4955}}], "confidence": 0.60058, "count": 14, "id": 26, "objective_summary": {"categories": [["true", 12], ["false", 2]]}, "output": "true", "predicate": {"field": "000006", "operator": ">", "value": 0.92725}}, {"children": [{"children": [{"children": [{"confidence": 0.70085, "count": 9, "id": 32, "objective_summary": {"categories": [["true", 9]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 29.8}}, {"children": [{"confidence": 0.34237, "count": 2, "id": 34, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 28.05}}, {"confidence": 0.20654, "count": 1, "id": 35, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 28.05}}], "confidence": 0.20765, "count": 3, "id": 33, "objective_summary": {"categories": [["false", 2], ["true", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 29.8}}], "confidence": 0.55196, "count": 12, "id": 31, "objective_summary": {"categories": [["true", 10], ["false", 2]]}, "output": "true", "predicate": {"field": "000006", "operator": ">", "value": 0.727}}, {"children": [{"children": [{"children": [{"children": [{"children": [{"children": [{"confidence": 0.20654, "count": 1, "id": 42, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 36.15}}, {"confidence": 0.56551, "count": 5, "id": 43, "objective_summary": {"categories": [["false", 5]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 36.15}}], "confidence": 0.43649, "count": 6, "id": 41, "objective_summary": {"categories": [["false", 5], ["true", 1]]}, "output": "false", "predicate": {"field": "000001", "operator": ">", "value": 149}}, {"children": [{"confidence": 0.64566, "count": 7, "id": 45, "objective_summary": {"categories": [["true", 7]]}, "output": "true", "predicate": {"field": "000001", "operator": ">", "value": 131}}, {"confidence": 0.20654, "count": 1, "id": 46, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 131}}], "confidence": 0.52911, "count": 8, "id": 44, "objective_summary": {"categories": [["true", 7], ["false", 1]]}, "output": "true", "predicate": {"field": "000001", "operator": "<=", "value": 149}}], "confidence": 0.3259, "count": 14, "id": 40, "objective_summary": {"categories": [["true", 8], ["false", 6]]}, "output": "true", "predicate": {"field": "000007", "operator": ">", "value": 34}}, {"children": [{"confidence": 0.20654, "count": 1, "id": 48, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000001", "operator": ">", "value": 157}}, {"confidence": 0.74116, "count": 11, "id": 49, "objective_summary": {"categories": [["false", 11]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 157}}], "confidence": 0.64611, "count": 12, "id": 47, "objective_summary": {"categories": [["false", 11], ["true", 1]]}, "output": "false", "predicate": {"field": "000007", "operator": "<=", "value": 34}}], "confidence": 0.4622, "count": 26, "id": 39, "objective_summary": {"categories": [["false", 17], ["true", 9]]}, "output": "false", "predicate": {"field": "000002", "operator": ">", "value": 60}}, {"confidence": 0.43849, "count": 3, "id": 50, "objective_summary": {"categories": [["true", 3]]}, "output": "true", "predicate": {"field": "000002", "operator": "<=", "value": 60}}], "confidence": 0.40738, "count": 29, "id": 38, "objective_summary": {"categories": [["false", 17], ["true", 12]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 31.25}}, {"confidence": 0.80639, "count": 16, "id": 51, "objective_summary": {"categories": [["false", 16]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 31.25}}], "confidence": 0.58961, "count": 45, "id": 37, "objective_summary": {"categories": [["false", 33], ["true", 12]]}, "output": "false", "predicate": {"field": "000003", "operator": ">", "value": 20}}, {"children": [{"children": [{"confidence": 0.34237, "count": 2, "id": 54, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000000", "operator": ">", "value": 9}}, {"children": [{"children": [{"confidence": 0.60966, "count": 6, "id": 57, "objective_summary": {"categories": [["true", 6]]}, "output": "true", "predicate": {"field": "000003", "operator": ">", "value": 8}}, {"confidence": 0.34237, "count": 2, "id": 58, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000003", "operator": "<=", "value": 8}}], "confidence": 0.40927, "count": 8, "id": 56, "objective_summary": {"categories": [["true", 6], ["false", 2]]}, "output": "true", "predicate": {"field": "000006", "operator": ">", "value": 0.286}}, {"confidence": 0.7719, "count": 13, "id": 59, "objective_summary": {"categories": [["true", 13]]}, "output": "true", "predicate": {"field": "000006", "operator": "<=", "value": 0.286}}], "confidence": 0.71085, "count": 21, "id": 55, "objective_summary": {"categories": [["true", 19], ["false", 2]]}, "output": "true", "predicate": {"field": "000000", "operator": "<=", "value": 9}}], "confidence": 0.62862, "count": 23, "id": 53, "objective_summary": {"categories": [["true", 19], ["false", 4]]}, "output": "true", "predicate": {"field": "000001", "operator": ">", "value": 142}}, {"children": [{"confidence": 0.70085, "count": 9, "id": 61, "objective_summary": {"categories": [["false", 9]]}, "output": "false", "predicate": {"field": "000001", "operator": ">", "value": 135}}, {"children": [{"confidence": 0.56551, "count": 5, "id": 63, "objective_summary": {"categories": [["false", 5]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 34.4}}, {"children": [{"confidence": 0.34237, "count": 2, "id": 65, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000003", "operator": ">", "value": 6}}, {"children": [{"children": [{"confidence": 0.20654, "count": 1, "id": 68, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 22.45}}, {"confidence": 0.20654, "count": 1, "id": 69, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 22.45}}], "confidence": 0.09453, "count": 2, "id": 67, "objective_summary": {"categories": [["false", 1], ["true", 1]]}, "output": "false", "predicate": {"field": "000007", "operator": ">", "value": 54}}, {"confidence": 0.67558, "count": 8, "id": 70, "objective_summary": {"categories": [["true", 8]]}, "output": "true", "predicate": {"field": "000007", "operator": "<=", "value": 54}}], "confidence": 0.59584, "count": 10, "id": 66, "objective_summary": {"categories": [["true", 9], ["false", 1]]}, "output": "true", "predicate": {"field": "000003", "operator": "<=", "value": 6}}], "confidence": 0.46769, "count": 12, "id": 64, "objective_summary": {"categories": [["true", 9], ["false", 3]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 34.4}}], "confidence": 0.30963, "count": 17, "id": 62, "objective_summary": {"categories": [["true", 9], ["false", 8]]}, "output": "true", "predicate": {"field": "000001", "operator": "<=", "value": 135}}], "confidence": 0.4622, "count": 26, "id": 60, "objective_summary": {"categories": [["false", 17], ["true", 9]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 142}}], "confidence": 0.4327, "count": 49, "id": 52, "objective_summary": {"categories": [["true", 28], ["false", 21]]}, "output": "true", "predicate": {"field": "000003", "operator": "<=", "value": 20}}], "confidence": 0.47353, "count": 94, "id": 36, "objective_summary": {"categories": [["false", 54], ["true", 40]]}, "output": "false", "predicate": {"field": "000006", "operator": "<=", "value": 0.727}}], "confidence": 0.43395, "count": 106, "id": 30, "objective_summary": {"categories": [["false", 56], ["true", 50]]}, "output": "false", "predicate": {"field": "000007", "operator": ">", "value": 24}}, {"children": [{"confidence": 0.79611, "count": 15, "id": 72, "objective_summary": {"categories": [["false", 15]]}, "output": "false", "predicate": {"field": "000001", "operator": ">", "value": 135}}, {"children": [{"confidence": 0.34237, "count": 2, "id": 74, "objective_summary": {"categories": [["true", 2]]}, "output": "true", "predicate": {"field": "000003", "operator": ">", "value": 38}}, {"children": [{"confidence": 0.20654, "count": 1, "id": 76, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000001", "operator": ">", "value": 132}}, {"confidence": 0.70085, "count": 9, "id": 77, "objective_summary": {"categories": [["false", 9]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 132}}], "confidence": 0.59584, "count": 10, "id": 75, "objective_summary": {"categories": [["false", 9], ["true", 1]]}, "output": "false", "predicate": {"field": "000003", "operator": "<=", "value": 38}}], "confidence": 0.46769, "count": 12, "id": 73, "objective_summary": {"categories": [["false", 9], ["true", 3]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 135}}], "confidence": 0.71942, "count": 27, "id": 71, "objective_summary": {"categories": [["false", 24], ["true", 3]]}, "output": "false", "predicate": {"field": "000007", "operator": "<=", "value": 24}}], "confidence": 0.51657, "count": 133, "id": 29, "objective_summary": {"categories": [["false", 80], ["true", 53]]}, "output": "false", "predicate": {"field": "000006", "operator": "<=", "value": 0.92725}}], "confidence": 0.47708, "count": 147, "id": 25, "objective_summary": {"categories": [["false", 82], ["true", 65]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 41.85}}], "confidence": 0.42524, "count": 168, "id": 21, "objective_summary": {"categories": [["false", 84], ["true", 84]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 167}}], "confidence": 0.5287, "count": 231, "id": 1, "objective_summary": {"categories": [["true", 137], ["false", 94]]}, "output": "true", "predicate": {"field": "000001", "operator": ">", "value": 124}}, {"children": [{"children": [{"children": [{"confidence": 0.79611, "count": 15, "id": 81, "objective_summary": {"categories": [["false", 15]]}, "output": "false", "predicate": {"field": "000001", "operator": ">", "value": 119}}, {"children": [{"children": [{"confidence": 0.5101, "count": 4, "id": 84, "objective_summary": {"categories": [["false", 4]]}, "output": "false", "predicate": {"field": "000007", "operator": ">", "value": 38}}, {"children": [{"confidence": 0.20654, "count": 1, "id": 86, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000002", "operator": ">", "value": 87}}, {"children": [{"confidence": 0.7719, "count": 13, "id": 88, "objective_summary": {"categories": [["true", 13]]}, "output": "true", "predicate": {"field": "000004", "operator": ">", "value": 94}}, {"children": [{"confidence": 0.60966, "count": 6, "id": 90, "objective_summary": {"categories": [["true", 6]]}, "output": "true", "predicate": {"field": "000002", "operator": ">", "value": 67}}, {"children": [{"confidence": 0.34237, "count": 2, "id": 92, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 0.55}}, {"confidence": 0.20654, "count": 1, "id": 93, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000006", "operator": "<=", "value": 0.55}}], "confidence": 0.20765, "count": 3, "id": 91, "objective_summary": {"categories": [["false", 2], ["true", 1]]}, "output": "false", "predicate": {"field": "000002", "operator": "<=", "value": 67}}], "confidence": 0.45258, "count": 9, "id": 89, "objective_summary": {"categories": [["true", 7], ["false", 2]]}, "output": "true", "predicate": {"field": "000004", "operator": "<=", "value": 94}}], "confidence": 0.72185, "count": 22, "id": 87, "objective_summary": {"categories": [["true", 20], ["false", 2]]}, "output": "true", "predicate": {"field": "000002", "operator": "<=", "value": 87}}], "confidence": 0.67872, "count": 23, "id": 85, "objective_summary": {"categories": [["true", 20], ["false", 3]]}, "output": "true", "predicate": {"field": "000007", "operator": "<=", "value": 38}}], "confidence": 0.55321, "count": 27, "id": 83, "objective_summary": {"categories": [["true", 20], ["false", 7]]}, "output": "true", "predicate": {"field": "000001", "operator": ">", "value": 107}}, {"children": [{"children": [{"children": [{"confidence": 0.72246, "count": 10, "id": 97, "objective_summary": {"categories": [["true", 10]]}, "output": "true", "predicate": {"field": "000004", "operator": ">", "value": 68}}, {"children": [{"confidence": 0.5101, "count": 4, "id": 99, "objective_summary": {"categories": [["true", 4]]}, "output": "true", "predicate": {"field": "000002", "operator": ">", "value": 75}}, {"confidence": 0.56551, "count": 5, "id": 100, "objective_summary": {"categories": [["false", 5]]}, "output": "false", "predicate": {"field": "000002", "operator": "<=", "value": 75}}], "confidence": 0.26665, "count": 9, "id": 98, "objective_summary": {"categories": [["false", 5], ["true", 4]]}, "output": "false", "predicate": {"field": "000004", "operator": "<=", "value": 68}}], "confidence": 0.51208, "count": 19, "id": 96, "objective_summary": {"categories": [["true", 14], ["false", 5]]}, "output": "true", "predicate": {"field": "000000", "operator": ">", "value": 0}}, {"confidence": 0.56551, "count": 5, "id": 101, "objective_summary": {"categories": [["false", 5]]}, "output": "false", "predicate": {"field": "000000", "operator": "<=", "value": 0}}], "confidence": 0.38834, "count": 24, "id": 95, "objective_summary": {"categories": [["true", 14], ["false", 10]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 32.7}}, {"children": [{"children": [{"children": [{"confidence": 0.20654, "count": 1, "id": 105, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000002", "operator": ">", "value": 69}}, {"confidence": 0.34237, "count": 2, "id": 106, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000002", "operator": "<=", "value": 69}}], "confidence": 0.20765, "count": 3, "id": 104, "objective_summary": {"categories": [["false", 2], ["true", 1]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 1.0865}}, {"confidence": 0.87544, "count": 27, "id": 107, "objective_summary": {"categories": [["false", 27]]}, "output": "false", "predicate": {"field": "000006", "operator": "<=", "value": 1.0865}}], "confidence": 0.83329, "count": 30, "id": 103, "objective_summary": {"categories": [["false", 29], ["true", 1]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 0.486}}, {"confidence": 0.34237, "count": 2, "id": 108, "objective_summary": {"categories": [["true", 2]]}, "output": "true", "predicate": {"field": "000006", "operator": "<=", "value": 0.486}}], "confidence": 0.75782, "count": 32, "id": 102, "objective_summary": {"categories": [["false", 29], ["true", 3]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 32.7}}], "confidence": 0.56664, "count": 56, "id": 94, "objective_summary": {"categories": [["false", 39], ["true", 17]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 107}}], "confidence": 0.44725, "count": 83, "id": 82, "objective_summary": {"categories": [["false", 46], ["true", 37]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 119}}], "confidence": 0.52356, "count": 98, "id": 80, "objective_summary": {"categories": [["false", 61], ["true", 37]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 0.47355}}, {"children": [{"children": [{"children": [{"confidence": 0.20654, "count": 1, "id": 112, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000000", "operator": ">", "value": 9}}, {"confidence": 0.70085, "count": 9, "id": 113, "objective_summary": {"categories": [["true", 9]]}, "output": "true", "predicate": {"field": "000000", "operator": "<=", "value": 9}}], "confidence": 0.59584, "count": 10, "id": 111, "objective_summary": {"categories": [["true", 9], ["false", 1]]}, "output": "true", "predicate": {"field": "000007", "operator": ">", "value": 34}}, {"children": [{"children": [{"confidence": 0.20654, "count": 1, "id": 116, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 47.45}}, {"children": [{"confidence": 0.83887, "count": 20, "id": 118, "objective_summary": {"categories": [["false", 20]]}, "output": "false", "predicate": {"field": "000001", "operator": ">", "value": 113}}, {"confidence": 0.37553, "count": 5, "id": 119, "objective_summary": {"categories": [["false", 4], ["true", 1]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 113}}], "confidence": 0.80456, "count": 25, "id": 117, "objective_summary": {"categories": [["false", 24], ["true", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 47.45}}], "confidence": 0.75858, "count": 26, "id": 115, "objective_summary": {"categories": [["false", 24], ["true", 2]]}, "output": "false", "predicate": {"field": "000003", "operator": ">", "value": 9}}, {"children": [{"confidence": 0.60966, "count": 6, "id": 121, "objective_summary": {"categories": [["false", 6]]}, "output": "false", "predicate": {"field": "000007", "operator": ">", "value": 28}}, {"children": [{"confidence": 0.60966, "count": 6, "id": 123, "objective_summary": {"categories": [["true", 6]]}, "output": "true", "predicate": {"field": "000006", "operator": ">", "value": 0.1335}}, {"confidence": 0.20654, "count": 1, "id": 124, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000006", "operator": "<=", "value": 0.1335}}], "confidence": 0.48687, "count": 7, "id": 122, "objective_summary": {"categories": [["true", 6], ["false", 1]]}, "output": "true", "predicate": {"field": "000007", "operator": "<=", "value": 28}}], "confidence": 0.29143, "count": 13, "id": 120, "objective_summary": {"categories": [["false", 7], ["true", 6]]}, "output": "false", "predicate": {"field": "000003", "operator": "<=", "value": 9}}], "confidence": 0.64466, "count": 39, "id": 114, "objective_summary": {"categories": [["false", 31], ["true", 8]]}, "output": "false", "predicate": {"field": "000007", "operator": "<=", "value": 34}}], "confidence": 0.51311, "count": 49, "id": 110, "objective_summary": {"categories": [["false", 32], ["true", 17]]}, "output": "false", "predicate": {"field": "000001", "operator": ">", "value": 110}}, {"children": [{"children": [{"confidence": 0.34237, "count": 2, "id": 127, "objective_summary": {"categories": [["true", 2]]}, "output": "true", "predicate": {"field": "000000", "operator": ">", "value": 11}}, {"children": [{"children": [{"confidence": 0.85688, "count": 23, "id": 130, "objective_summary": {"categories": [["false", 23]]}, "output": "false", "predicate": {"field": "000004", "operator": ">", "value": 9}}, {"children": [{"confidence": 0.20654, "count": 1, "id": 132, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 40.1}}, {"children": [{"confidence": 0.79611, "count": 15, "id": 134, "objective_summary": {"categories": [["false", 15]]}, "output": "false", "predicate": {"field": "000002", "operator": ">", "value": 77}}, {"children": [{"children": [{"confidence": 0.34237, "count": 2, "id": 137, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000003", "operator": ">", "value": 28}}, {"confidence": 0.34237, "count": 2, "id": 138, "objective_summary": {"categories": [["true", 2]]}, "output": "true", "predicate": {"field": "000003", "operator": "<=", "value": 28}}], "confidence": 0.15004, "count": 4, "id": 136, "objective_summary": {"categories": [["false", 2], ["true", 2]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 0.2455}}, {"confidence": 0.60966, "count": 6, "id": 139, "objective_summary": {"categories": [["false", 6]]}, "output": "false", "predicate": {"field": "000006", "operator": "<=", "value": 0.2455}}], "confidence": 0.49016, "count": 10, "id": 135, "objective_summary": {"categories": [["false", 8], ["true", 2]]}, "output": "false", "predicate": {"field": "000002", "operator": "<=", "value": 77}}], "confidence": 0.75033, "count": 25, "id": 133, "objective_summary": {"categories": [["false", 23], ["true", 2]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 40.1}}], "confidence": 0.71024, "count": 26, "id": 131, "objective_summary": {"categories": [["false", 23], ["true", 3]]}, "output": "false", "predicate": {"field": "000004", "operator": "<=", "value": 9}}], "confidence": 0.83479, "count": 49, "id": 129, "objective_summary": {"categories": [["false", 46], ["true", 3]]}, "output": "false", "predicate": {"field": "000000", "operator": ">", "value": 0}}, {"children": [{"confidence": 0.43849, "count": 3, "id": 141, "objective_summary": {"categories": [["false", 3]]}, "output": "false", "predicate": {"field": "000007", "operator": ">", "value": 25}}, {"confidence": 0.34237, "count": 2, "id": 142, "objective_summary": {"categories": [["true", 2]]}, "output": "true", "predicate": {"field": "000007", "operator": "<=", "value": 25}}], "confidence": 0.23072, "count": 5, "id": 140, "objective_summary": {"categories": [["false", 3], ["true", 2]]}, "output": "false", "predicate": {"field": "000000", "operator": "<=", "value": 0}}], "confidence": 0.8009, "count": 54, "id": 128, "objective_summary": {"categories": [["false", 49], ["true", 5]]}, "output": "false", "predicate": {"field": "000000", "operator": "<=", "value": 11}}], "confidence": 0.76374, "count": 56, "id": 126, "objective_summary": {"categories": [["false", 49], ["true", 7]]}, "output": "false", "predicate": {"field": "000002", "operator": ">", "value": 71}}, {"confidence": 0.93686, "count": 57, "id": 143, "objective_summary": {"categories": [["false", 57]]}, "output": "false", "predicate": {"field": "000002", "operator": "<=", "value": 71}}], "confidence": 0.87763, "count": 113, "id": 125, "objective_summary": {"categories": [["false", 106], ["true", 7]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 110}}], "confidence": 0.78902, "count": 162, "id": 109, "objective_summary": {"categories": [["false", 138], ["true", 24]]}, "output": "false", "predicate": {"field": "000006", "operator": "<=", "value": 0.47355}}], "confidence": 0.71024, "count": 260, "id": 79, "objective_summary": {"categories": [["false", 199], ["true", 61]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 26.86384}}, {"confidence": 0.96971, "count": 123, "id": 144, "objective_summary": {"categories": [["false", 123]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 26.86384}}], "confidence": 0.80073, "count": 383, "id": 78, "objective_summary": {"categories": [["false", 322], ["true", 61]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 124}}], "confidence": 0.63955, "count": 614, "id": 0, "objective_summary": {"categories": [["false", 416], ["true", 198]]}, "output": "false", "predicate": true}}, "name": "diabetes - 0", "name_options": "512-node, pruned, deterministic order", "node_threshold": 512, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "000008", "objective_field_name": "diabetes", "objective_field_type": "categorical", "objective_fields": ["000008"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": null, "randomize": false, "range": null, "replacement": false, "resource": "model/62605ac123541b220100748a", "rows": 614, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 20939, "source": "source/62605aa75198db5eed003416", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 0, "message": "The model has been created", "progress": 0.0}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2022-04-20T19:11:30.564000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/tests/mlflow_ensemble/model_62605ac323541b220100748c b/bigml/tests/mlflow_ensemble/model_62605ac323541b220100748c new file mode 100644 index 00000000..12c57cfa --- /dev/null +++ b/bigml/tests/mlflow_ensemble/model_62605ac323541b220100748c @@ -0,0 +1 @@ +{"code": 200, "resource": "model/62605ac323541b220100748c", "location": "https://bigml.io/andromeda/model/62605ac323541b220100748c", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 9, "configuration": null, "configuration_status": false, "created": "2022-04-20T19:10:59.462000", "creator": "mmartin", "dataset": "dataset/62605ab1049fde5d990028f1", "dataset_field_types": {"categorical": 1, "datetime": 0, "image": 0, "items": 0, "numeric": 8, "path": 0, "preferred": 9, "regions": 0, "text": 0, "total": 9}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": true, "ensemble_id": "62605abc0c11da5783002915", "ensemble_index": 1, "excluded_fields": [], "fields_meta": {"count": 9, "limit": 1000, "offset": 0, "query_total": 9, "total": 9}, "focus_field": null, "input_fields": ["000000", "000001", "000002", "000003", "000004", "000005", "000006", "000007"], "locale": "en_US", "max_columns": 9, "max_rows": 614, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["false", 410], ["true", 204]]}, "training": {"categories": [["false", 410], ["true", 204]]}}, "fields": {"000000": {"column_number": 0, "datatype": "int8", "name": "pregnancies", "optype": "numeric", "order": 0, "preferred": true, "summary": {"counts": [[0, 86], [1, 110], [2, 80], [3, 56], [4, 58], [5, 45], [6, 40], [7, 37], [8, 35], [9, 22], [10, 16], [11, 9], [12, 9], [13, 8], [14, 1], [15, 1], [17, 1]], "exact_histogram": {"populations": [196, 136, 103, 77, 57, 25, 17, 2, 1], "start": 0, "width": 2}, "kurtosis": 0.14808, "maximum": 17, "mean": 3.89088, "median": 3, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 0.88555, "standard_deviation": 3.38254, "sum": 2389, "sum_squares": 16309, "variance": 11.44158}}, "000001": {"column_number": 1, "datatype": "int16", "name": "plasma glucose", "optype": "numeric", "order": 1, "preferred": true, "summary": {"bins": [[0, 5], [44, 1], [57, 2], [61.5, 2], [65, 1], [67.75, 4], [73.21429, 14], [79.52941, 17], [84.2381, 21], [88.74074, 27], [92, 19], [95.81818, 33], [100.86275, 51], [105.67647, 34], [109.38235, 34], [113.77143, 35], [118.775, 40], [124.14634, 41], [128.93548, 31], [133.46154, 13], [137.32, 25], [141.95652, 23], [146.4, 20], [152.86957, 23], [158, 14], [162.41667, 12], [166.66667, 15], [172.92857, 14], [180.77778, 18], [187.90909, 11], [193, 4], [196.7, 10]], "exact_histogram": {"populations": [5, 0, 0, 0, 1, 2, 7, 21, 48, 74, 93, 75, 78, 46, 43, 36, 28, 19, 23, 15], "start": 0, "width": 10}, "kurtosis": 0.68455, "maximum": 199, "mean": 121.11401, "median": 117, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 0.08447, "standard_deviation": 32.75167, "sum": 74364, "sum_squares": 9664070, "variance": 1072.67214}}, "000002": {"column_number": 2, "datatype": "int8", "name": "blood pressure", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[0, 28], [30, 2], [39, 2], [44.66667, 6], [49.625, 16], [52, 9], [55.11111, 18], [58, 19], [60, 29], [62, 23], [64.87097, 62], [68, 38], [70, 43], [72, 37], [74.84524, 84], [78, 34], [80, 34], [82, 24], [84.85366, 41], [88, 17], [90, 19], [92, 4], [95, 9], [98, 3], [100, 2], [102, 1], [104, 1], [106, 3], [108, 2], [110, 2], [114, 1], [122, 1]], "exact_histogram": {"populations": [28, 0, 0, 0, 0, 0, 2, 1, 5, 5, 29, 30, 84, 68, 125, 73, 79, 37, 27, 8, 4, 5, 3, 0, 1], "start": 0, "width": 5}, "kurtosis": 5.12315, "maximum": 122, "mean": 69.10912, "median": 72, "minimum": 0, "missing_count": 0, "population": 614, "skewness": -1.82589, "standard_deviation": 19.37631, "sum": 42433, "sum_squares": 3162653, "variance": 375.44158}}, "000003": {"column_number": 3, "datatype": "int8", "name": "triceps skin thickness", "optype": "numeric", "order": 3, "preferred": true, "summary": {"bins": [[0, 189], [7.33333, 3], [10.57143, 7], [12, 4], [13.375, 16], [15.26667, 15], [17, 13], [18.46429, 28], [20.41176, 17], [22.48387, 31], [24.4, 20], [26.5625, 32], [28.46429, 28], [30.4375, 32], [32.38636, 44], [34.68421, 19], [36.6087, 23], [38.68421, 19], [40.46429, 28], [42.38462, 13], [44, 4], [45, 6], [46, 5], [47, 4], [48, 4], [49, 2], [50, 2], [51, 1], [52, 2], [56, 1], [63, 1], [99, 1]], "exact_histogram": {"open_max": 3, "populations": [189, 0, 0, 2, 1, 7, 14, 17, 17, 28, 17, 31, 20, 32, 28, 32, 44, 19, 23, 19, 28, 13, 10, 9, 6, 3, 2], "start": 0, "width": 2}, "kurtosis": -0.44344, "maximum": 99, "mean": 20.27687, "median": 22, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 0.14897, "standard_deviation": 16.11049, "sum": 12450, "sum_squares": 411550, "variance": 259.54801}}, "000004": {"column_number": 4, "datatype": "int16", "name": "insulin", "optype": "numeric", "order": 4, "preferred": true, "summary": {"bins": [[0, 295], [19.57143, 7], [38.36364, 11], [56.08889, 45], [76.55172, 29], [93.76471, 34], [113.11111, 27], [133.5, 38], [154.23077, 13], [173.31818, 22], [189.75, 16], [205.57143, 14], [219.28571, 7], [235.44444, 9], [268.5, 10], [292.33333, 3], [307, 2], [324.75, 8], [338.5, 2], [372.5, 2], [389.5, 2], [415, 1], [440, 1], [474.75, 4], [491.66667, 3], [510, 1], [542.66667, 3], [579, 1], [600, 1], [680, 1], [744, 1], [846, 1]], "exact_histogram": {"open_max": 3, "populations": [299, 10, 35, 34, 36, 27, 32, 24, 18, 22, 17, 10, 5, 7, 4, 3, 8, 1, 2, 2, 1, 0, 1, 2, 5, 1, 0, 3, 1, 0, 1], "start": 0, "width": 20}, "kurtosis": 7.14939, "maximum": 846, "mean": 82.72801, "median": 37, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 2.2975, "standard_deviation": 119.91638, "sum": 50795, "sum_squares": 13017071, "variance": 14379.93732}}, "000005": {"column_number": 5, "datatype": "double", "name": "bmi", "optype": "numeric", "order": 5, "preferred": true, "summary": {"bins": [[0, 9], [18.26667, 3], [19.5875, 8], [20.9375, 8], [22.13529, 17], [23.25333, 15], [24.14167, 24], [25.12059, 34], [26.32187, 32], [27.64706, 34], [28.8, 33], [30.17627, 59], [31.53077, 39], [32.85849, 53], [33.94118, 34], [34.89355, 31], [35.9129, 31], [37.34138, 29], [38.39048, 21], [39.45, 24], [40.60769, 13], [41.41429, 7], [42.90556, 18], [44.125, 8], [45.42727, 11], [46.4, 7], [48.225, 4], [49.85, 2], [52.8, 3], [55, 1], [57.3, 1], [59.4, 1]], "exact_histogram": {"populations": [9, 0, 0, 0, 0, 0, 0, 0, 0, 9, 17, 31, 59, 52, 60, 66, 81, 68, 42, 43, 21, 20, 17, 9, 3, 1, 3, 1, 1, 1], "start": 0, "width": 2}, "kurtosis": 3.13899, "maximum": 59.4, "mean": 31.9171, "median": 32, "minimum": 0, "missing_count": 0, "population": 614, "skewness": -0.52314, "standard_deviation": 7.80358, "sum": 19597.1, "sum_squares": 662811.83, "variance": 60.89594}}, "000006": {"column_number": 6, "datatype": "double", "name": "diabetes pedigree", "optype": "numeric", "order": 6, "preferred": true, "summary": {"bins": [[0.09254, 13], [0.14651, 61], [0.19706, 52], [0.25062, 86], [0.29458, 48], [0.34338, 42], [0.40032, 34], [0.44206, 33], [0.49592, 26], [0.54232, 31], [0.59219, 26], [0.64124, 21], [0.69418, 33], [0.744, 18], [0.81253, 15], [0.8645, 14], [0.93444, 16], [1.015, 5], [1.086, 2], [1.14533, 9], [1.2064, 5], [1.2702, 5], [1.33067, 3], [1.39375, 4], [1.45933, 3], [1.6, 1], [1.70933, 3], [1.781, 1], [2.137, 1], [2.288, 1], [2.329, 1], [2.42, 1]], "exact_histogram": {"populations": [9, 95, 140, 73, 68, 58, 52, 31, 28, 16, 6, 11, 8, 6, 4, 0, 3, 2, 0, 0, 0, 1, 1, 1, 1], "start": 0, "width": 0.1}, "kurtosis": 5.46252, "maximum": 2.42, "mean": 0.47944, "median": 0.3865, "minimum": 0.078, "missing_count": 0, "population": 614, "skewness": 1.91857, "standard_deviation": 0.34277, "sum": 294.378, "sum_squares": 213.15908, "variance": 0.11749}}, "000007": {"column_number": 7, "datatype": "int8", "name": "age", "optype": "numeric", "order": 7, "preferred": true, "summary": {"bins": [[21.56757, 111], [23, 29], [24.48611, 72], [26.5, 48], [28.4375, 48], [30.54545, 33], [32.59259, 27], [34.47368, 19], [36.48276, 29], [38.48, 25], [40.68, 25], [42.44, 25], [44.63158, 19], [46.27778, 18], [48.5, 8], [50.46667, 15], [52.41667, 12], [54.375, 8], [56.71429, 7], [58.375, 8], [60.28571, 7], [62, 3], [63, 4], [64, 1], [65, 2], [66, 3], [67, 3], [68, 1], [69, 1], [70, 1], [72, 1], [81, 1]], "exact_histogram": {"populations": [48, 92, 72, 48, 48, 33, 27, 19, 29, 25, 25, 25, 19, 18, 8, 15, 12, 8, 7, 8, 7, 7, 3, 6, 2, 1, 1, 0, 0, 0, 1], "start": 20, "width": 2}, "kurtosis": 0.42204, "maximum": 81, "mean": 33.66287, "median": 29, "minimum": 21, "missing_count": 0, "population": 614, "skewness": 1.05637, "standard_deviation": 12.0408, "sum": 20669, "sum_squares": 784651, "variance": 144.98077}}, "000008": {"column_number": 8, "datatype": "string", "name": "diabetes", "optype": "categorical", "order": 8, "preferred": true, "summary": {"categories": [["false", 394], ["true", 220]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000001", 0.33357], ["000005", 0.2589], ["000006", 0.16331], ["000000", 0.10221], ["000007", 0.04527], ["000002", 0.04284], ["000004", 0.03004], ["000003", 0.02386]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000000": {"column_number": 0, "datatype": "int8", "name": "pregnancies", "optype": "numeric", "preferred": true}, "000001": {"column_number": 1, "datatype": "int16", "name": "plasma glucose", "optype": "numeric", "preferred": true}, "000002": {"column_number": 2, "datatype": "int8", "name": "blood pressure", "optype": "numeric", "preferred": true}, "000003": {"column_number": 3, "datatype": "int8", "name": "triceps skin thickness", "optype": "numeric", "preferred": true}, "000004": {"column_number": 4, "datatype": "int16", "name": "insulin", "optype": "numeric", "preferred": true}, "000005": {"column_number": 5, "datatype": "double", "name": "bmi", "optype": "numeric", "preferred": true}, "000006": {"column_number": 6, "datatype": "double", "name": "diabetes pedigree", "optype": "numeric", "preferred": true}, "000007": {"column_number": 7, "datatype": "int8", "name": "age", "optype": "numeric", "preferred": true}, "000008": {"column_number": 8, "datatype": "string", "name": "diabetes", "optype": "categorical", "preferred": true, "term_analysis": {"enabled": true}}}, "node_threshold": 512, "root": {"children": [{"children": [{"children": [{"confidence": 0.64566, "count": 7, "id": 3, "objective_summary": {"categories": [["false", 7]]}, "output": "false", "predicate": {"field": "000002", "operator": ">", "value": 92}}, {"children": [{"children": [{"confidence": 0.93242, "count": 53, "id": 6, "objective_summary": {"categories": [["true", 53]]}, "output": "true", "predicate": {"field": "000002", "operator": ">", "value": 54}}, {"children": [{"confidence": 0.5101, "count": 4, "id": 8, "objective_summary": {"categories": [["true", 4]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 27.15}}, {"confidence": 0.20654, "count": 1, "id": 9, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 27.15}}], "confidence": 0.37553, "count": 5, "id": 7, "objective_summary": {"categories": [["true", 4], ["false", 1]]}, "output": "true", "predicate": {"field": "000002", "operator": "<=", "value": 54}}], "confidence": 0.90859, "count": 58, "id": 5, "objective_summary": {"categories": [["true", 57], ["false", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 23.1}}, {"confidence": 0.34237, "count": 2, "id": 10, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 23.1}}], "confidence": 0.86299, "count": 60, "id": 4, "objective_summary": {"categories": [["true", 57], ["false", 3]]}, "output": "true", "predicate": {"field": "000002", "operator": "<=", "value": 92}}], "confidence": 0.74659, "count": 67, "id": 2, "objective_summary": {"categories": [["true", 57], ["false", 10]]}, "output": "true", "predicate": {"field": "000001", "operator": ">", "value": 165}}, {"children": [{"confidence": 0.70085, "count": 9, "id": 12, "objective_summary": {"categories": [["false", 9]]}, "output": "false", "predicate": {"field": "000001", "operator": ">", "value": 163}}, {"children": [{"children": [{"children": [{"confidence": 0.60966, "count": 6, "id": 16, "objective_summary": {"categories": [["false", 6]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 0.3645}}, {"children": [{"confidence": 0.5101, "count": 4, "id": 18, "objective_summary": {"categories": [["true", 4]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 33.35}}, {"confidence": 0.20654, "count": 1, "id": 19, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 33.35}}], "confidence": 0.37553, "count": 5, "id": 17, "objective_summary": {"categories": [["true", 4], ["false", 1]]}, "output": "true", "predicate": {"field": "000006", "operator": "<=", "value": 0.3645}}], "confidence": 0.3538, "count": 11, "id": 15, "objective_summary": {"categories": [["false", 7], ["true", 4]]}, "output": "false", "predicate": {"field": "000004", "operator": ">", "value": 269}}, {"children": [{"children": [{"children": [{"confidence": 0.74116, "count": 11, "id": 23, "objective_summary": {"categories": [["true", 11]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 42.65}}, {"children": [{"confidence": 0.43849, "count": 3, "id": 25, "objective_summary": {"categories": [["false", 3]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 39.95}}, {"children": [{"confidence": 0.64566, "count": 7, "id": 27, "objective_summary": {"categories": [["true", 7]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 36.35}}, {"confidence": 0.20654, "count": 1, "id": 28, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 36.35}}], "confidence": 0.52911, "count": 8, "id": 26, "objective_summary": {"categories": [["true", 7], ["false", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 39.95}}], "confidence": 0.3538, "count": 11, "id": 24, "objective_summary": {"categories": [["true", 7], ["false", 4]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 42.65}}], "confidence": 0.61483, "count": 22, "id": 22, "objective_summary": {"categories": [["true", 18], ["false", 4]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 35.35}}, {"confidence": 0.81568, "count": 17, "id": 29, "objective_summary": {"categories": [["true", 17]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 35.35}}], "confidence": 0.76421, "count": 39, "id": 21, "objective_summary": {"categories": [["true", 35], ["false", 4]]}, "output": "true", "predicate": {"field": "000006", "operator": ">", "value": 0.2125}}, {"children": [{"confidence": 0.5101, "count": 4, "id": 31, "objective_summary": {"categories": [["false", 4]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 30.65}}, {"confidence": 0.20654, "count": 1, "id": 32, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 30.65}}], "confidence": 0.37553, "count": 5, "id": 30, "objective_summary": {"categories": [["false", 4], ["true", 1]]}, "output": "false", "predicate": {"field": "000006", "operator": "<=", "value": 0.2125}}], "confidence": 0.68039, "count": 44, "id": 20, "objective_summary": {"categories": [["true", 36], ["false", 8]]}, "output": "true", "predicate": {"field": "000004", "operator": "<=", "value": 269}}], "confidence": 0.59768, "count": 55, "id": 14, "objective_summary": {"categories": [["true", 40], ["false", 15]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 30.25833}}, {"children": [{"confidence": 0.79611, "count": 15, "id": 34, "objective_summary": {"categories": [["false", 15]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 25.2}}, {"children": [{"confidence": 0.20654, "count": 1, "id": 36, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000003", "operator": ">", "value": 9}}, {"confidence": 0.5101, "count": 4, "id": 37, "objective_summary": {"categories": [["true", 4]]}, "output": "true", "predicate": {"field": "000003", "operator": "<=", "value": 9}}], "confidence": 0.37553, "count": 5, "id": 35, "objective_summary": {"categories": [["true", 4], ["false", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 25.2}}], "confidence": 0.58398, "count": 20, "id": 33, "objective_summary": {"categories": [["false", 16], ["true", 4]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 30.25833}}], "confidence": 0.47366, "count": 75, "id": 13, "objective_summary": {"categories": [["true", 44], ["false", 31]]}, "output": "true", "predicate": {"field": "000001", "operator": "<=", "value": 163}}], "confidence": 0.41832, "count": 84, "id": 11, "objective_summary": {"categories": [["true", 44], ["false", 40]]}, "output": "true", "predicate": {"field": "000001", "operator": "<=", "value": 165}}], "confidence": 0.59044, "count": 151, "id": 1, "objective_summary": {"categories": [["true", 101], ["false", 50]]}, "output": "true", "predicate": {"field": "000001", "operator": ">", "value": 141}}, {"children": [{"children": [{"children": [{"children": [{"children": [{"confidence": 0.79611, "count": 15, "id": 43, "objective_summary": {"categories": [["true", 15]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 33.9}}, {"children": [{"children": [{"confidence": 0.43849, "count": 3, "id": 46, "objective_summary": {"categories": [["false", 3]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 0.7505}}, {"children": [{"confidence": 0.20654, "count": 1, "id": 48, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 33.15}}, {"confidence": 0.34237, "count": 2, "id": 49, "objective_summary": {"categories": [["true", 2]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 33.15}}], "confidence": 0.20765, "count": 3, "id": 47, "objective_summary": {"categories": [["true", 2], ["false", 1]]}, "output": "true", "predicate": {"field": "000006", "operator": "<=", "value": 0.7505}}], "confidence": 0.29999, "count": 6, "id": 45, "objective_summary": {"categories": [["false", 4], ["true", 2]]}, "output": "false", "predicate": {"field": "000002", "operator": ">", "value": 71}}, {"confidence": 0.56551, "count": 5, "id": 50, "objective_summary": {"categories": [["true", 5]]}, "output": "true", "predicate": {"field": "000002", "operator": "<=", "value": 71}}], "confidence": 0.3538, "count": 11, "id": 44, "objective_summary": {"categories": [["true", 7], ["false", 4]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 33.9}}], "confidence": 0.66468, "count": 26, "id": 42, "objective_summary": {"categories": [["true", 22], ["false", 4]]}, "output": "true", "predicate": {"field": "000000", "operator": ">", "value": 2}}, {"children": [{"children": [{"confidence": 0.70085, "count": 9, "id": 53, "objective_summary": {"categories": [["false", 9]]}, "output": "false", "predicate": {"field": "000002", "operator": ">", "value": 52}}, {"confidence": 0.20654, "count": 1, "id": 54, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000002", "operator": "<=", "value": 52}}], "confidence": 0.59584, "count": 10, "id": 52, "objective_summary": {"categories": [["false", 9], ["true", 1]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 0.896}}, {"confidence": 0.67558, "count": 8, "id": 55, "objective_summary": {"categories": [["true", 8]]}, "output": "true", "predicate": {"field": "000006", "operator": "<=", "value": 0.896}}], "confidence": 0.29031, "count": 18, "id": 51, "objective_summary": {"categories": [["false", 9], ["true", 9]]}, "output": "false", "predicate": {"field": "000000", "operator": "<=", "value": 2}}], "confidence": 0.5578, "count": 44, "id": 41, "objective_summary": {"categories": [["true", 31], ["false", 13]]}, "output": "true", "predicate": {"field": "000007", "operator": ">", "value": 26}}, {"children": [{"confidence": 0.20654, "count": 1, "id": 57, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000003", "operator": ">", "value": 37}}, {"children": [{"confidence": 0.20654, "count": 1, "id": 59, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000001", "operator": ">", "value": 131}}, {"confidence": 0.7719, "count": 13, "id": 60, "objective_summary": {"categories": [["false", 13]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 131}}], "confidence": 0.68531, "count": 14, "id": 58, "objective_summary": {"categories": [["false", 13], ["true", 1]]}, "output": "false", "predicate": {"field": "000003", "operator": "<=", "value": 37}}], "confidence": 0.62118, "count": 15, "id": 56, "objective_summary": {"categories": [["false", 13], ["true", 2]]}, "output": "false", "predicate": {"field": "000007", "operator": "<=", "value": 26}}], "confidence": 0.43289, "count": 59, "id": 40, "objective_summary": {"categories": [["true", 33], ["false", 26]]}, "output": "true", "predicate": {"field": "000006", "operator": ">", "value": 0.71247}}, {"children": [{"children": [{"confidence": 0.72246, "count": 10, "id": 63, "objective_summary": {"categories": [["true", 10]]}, "output": "true", "predicate": {"field": "000002", "operator": ">", "value": 65}}, {"confidence": 0.20654, "count": 1, "id": 64, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000002", "operator": "<=", "value": 65}}], "confidence": 0.62264, "count": 11, "id": 62, "objective_summary": {"categories": [["true", 10], ["false", 1]]}, "output": "true", "predicate": {"field": "000000", "operator": ">", "value": 12}}, {"children": [{"children": [{"children": [{"children": [{"confidence": 0.34237, "count": 2, "id": 69, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 0.6945}}, {"confidence": 0.83182, "count": 19, "id": 70, "objective_summary": {"categories": [["true", 19]]}, "output": "true", "predicate": {"field": "000006", "operator": "<=", "value": 0.6945}}], "confidence": 0.71085, "count": 21, "id": 68, "objective_summary": {"categories": [["true", 19], ["false", 2]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 27.55}}, {"confidence": 0.43849, "count": 3, "id": 71, "objective_summary": {"categories": [["false", 3]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 27.55}}], "confidence": 0.59529, "count": 24, "id": 67, "objective_summary": {"categories": [["true", 19], ["false", 5]]}, "output": "true", "predicate": {"field": "000006", "operator": ">", "value": 0.52269}}, {"children": [{"confidence": 0.43849, "count": 3, "id": 73, "objective_summary": {"categories": [["true", 3]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 45.7125}}, {"children": [{"children": [{"confidence": 0.7575, "count": 12, "id": 76, "objective_summary": {"categories": [["false", 12]]}, "output": "false", "predicate": {"field": "000002", "operator": ">", "value": 84}}, {"children": [{"confidence": 0.64566, "count": 7, "id": 78, "objective_summary": {"categories": [["false", 7]]}, "output": "false", "predicate": {"field": "000004", "operator": ">", "value": 127}}, {"children": [{"confidence": 0.5101, "count": 4, "id": 80, "objective_summary": {"categories": [["true", 4]]}, "output": "true", "predicate": {"field": "000003", "operator": ">", "value": 28}}, {"children": [{"confidence": 0.60966, "count": 6, "id": 82, "objective_summary": {"categories": [["false", 6]]}, "output": "false", "predicate": {"field": "000003", "operator": ">", "value": 7}}, {"children": [{"children": [{"confidence": 0.20654, "count": 1, "id": 85, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 35.65}}, {"confidence": 0.60966, "count": 6, "id": 86, "objective_summary": {"categories": [["false", 6]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 35.65}}], "confidence": 0.48687, "count": 7, "id": 84, "objective_summary": {"categories": [["false", 6], ["true", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 34.15}}, {"children": [{"confidence": 0.64566, "count": 7, "id": 88, "objective_summary": {"categories": [["true", 7]]}, "output": "true", "predicate": {"field": "000006", "operator": ">", "value": 0.261}}, {"children": [{"confidence": 0.34237, "count": 2, "id": 90, "objective_summary": {"categories": [["true", 2]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 31.6}}, {"confidence": 0.5101, "count": 4, "id": 91, "objective_summary": {"categories": [["false", 4]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 31.6}}], "confidence": 0.29999, "count": 6, "id": 89, "objective_summary": {"categories": [["false", 4], ["true", 2]]}, "output": "false", "predicate": {"field": "000006", "operator": "<=", "value": 0.261}}], "confidence": 0.42369, "count": 13, "id": 87, "objective_summary": {"categories": [["true", 9], ["false", 4]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 34.15}}], "confidence": 0.29929, "count": 20, "id": 83, "objective_summary": {"categories": [["false", 10], ["true", 10]]}, "output": "false", "predicate": {"field": "000003", "operator": "<=", "value": 7}}], "confidence": 0.42535, "count": 26, "id": 81, "objective_summary": {"categories": [["false", 16], ["true", 10]]}, "output": "false", "predicate": {"field": "000003", "operator": "<=", "value": 28}}], "confidence": 0.36142, "count": 30, "id": 79, "objective_summary": {"categories": [["false", 16], ["true", 14]]}, "output": "false", "predicate": {"field": "000004", "operator": "<=", "value": 127}}], "confidence": 0.461, "count": 37, "id": 77, "objective_summary": {"categories": [["false", 23], ["true", 14]]}, "output": "false", "predicate": {"field": "000002", "operator": "<=", "value": 84}}], "confidence": 0.57591, "count": 49, "id": 75, "objective_summary": {"categories": [["false", 35], ["true", 14]]}, "output": "false", "predicate": {"field": "000000", "operator": ">", "value": 2}}, {"children": [{"confidence": 0.81568, "count": 17, "id": 93, "objective_summary": {"categories": [["false", 17]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 35}}, {"children": [{"children": [{"confidence": 0.34237, "count": 2, "id": 96, "objective_summary": {"categories": [["true", 2]]}, "output": "true", "predicate": {"field": "000002", "operator": ">", "value": 56}}, {"confidence": 0.20654, "count": 1, "id": 97, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000002", "operator": "<=", "value": 56}}], "confidence": 0.20765, "count": 3, "id": 95, "objective_summary": {"categories": [["true", 2], ["false", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 33.8}}, {"confidence": 0.7575, "count": 12, "id": 98, "objective_summary": {"categories": [["false", 12]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 33.8}}], "confidence": 0.62118, "count": 15, "id": 94, "objective_summary": {"categories": [["false", 13], ["true", 2]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 35}}], "confidence": 0.79853, "count": 32, "id": 92, "objective_summary": {"categories": [["false", 30], ["true", 2]]}, "output": "false", "predicate": {"field": "000000", "operator": "<=", "value": 2}}], "confidence": 0.70295, "count": 81, "id": 74, "objective_summary": {"categories": [["false", 65], ["true", 16]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 45.7125}}], "confidence": 0.67353, "count": 84, "id": 72, "objective_summary": {"categories": [["false", 65], ["true", 19]]}, "output": "false", "predicate": {"field": "000006", "operator": "<=", "value": 0.52269}}], "confidence": 0.55441, "count": 108, "id": 66, "objective_summary": {"categories": [["false", 70], ["true", 38]]}, "output": "false", "predicate": {"field": "000001", "operator": ">", "value": 113}}, {"children": [{"confidence": 0.43849, "count": 3, "id": 100, "objective_summary": {"categories": [["true", 3]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 50.85}}, {"children": [{"children": [{"children": [{"children": [{"confidence": 0.7719, "count": 13, "id": 105, "objective_summary": {"categories": [["false", 13]]}, "output": "false", "predicate": {"field": "000002", "operator": ">", "value": 77}}, {"children": [{"children": [{"children": [{"confidence": 0.20654, "count": 1, "id": 109, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 35.9}}, {"confidence": 0.34237, "count": 2, "id": 110, "objective_summary": {"categories": [["true", 2]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 35.9}}], "confidence": 0.20765, "count": 3, "id": 108, "objective_summary": {"categories": [["true", 2], ["false", 1]]}, "output": "true", "predicate": {"field": "000000", "operator": ">", "value": 4}}, {"confidence": 0.78468, "count": 14, "id": 111, "objective_summary": {"categories": [["false", 14]]}, "output": "false", "predicate": {"field": "000000", "operator": "<=", "value": 4}}], "confidence": 0.65663, "count": 17, "id": 107, "objective_summary": {"categories": [["false", 15], ["true", 2]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 0.2965}}, {"children": [{"confidence": 0.56551, "count": 5, "id": 113, "objective_summary": {"categories": [["true", 5]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 29.7}}, {"confidence": 0.20654, "count": 1, "id": 114, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 29.7}}], "confidence": 0.43649, "count": 6, "id": 112, "objective_summary": {"categories": [["true", 5], ["false", 1]]}, "output": "true", "predicate": {"field": "000006", "operator": "<=", "value": 0.2965}}], "confidence": 0.49134, "count": 23, "id": 106, "objective_summary": {"categories": [["false", 16], ["true", 7]]}, "output": "false", "predicate": {"field": "000002", "operator": "<=", "value": 77}}], "confidence": 0.64972, "count": 36, "id": 104, "objective_summary": {"categories": [["false", 29], ["true", 7]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 27.05}}, {"confidence": 0.5101, "count": 4, "id": 115, "objective_summary": {"categories": [["true", 4]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 27.05}}], "confidence": 0.57165, "count": 40, "id": 103, "objective_summary": {"categories": [["false", 29], ["true", 11]]}, "output": "false", "predicate": {"field": "000001", "operator": ">", "value": 105}}, {"children": [{"children": [{"children": [{"confidence": 0.60966, "count": 6, "id": 119, "objective_summary": {"categories": [["false", 6]]}, "output": "false", "predicate": {"field": "000004", "operator": ">", "value": 42}}, {"children": [{"confidence": 0.5101, "count": 4, "id": 121, "objective_summary": {"categories": [["false", 4]]}, "output": "false", "predicate": {"field": "000007", "operator": ">", "value": 58}}, {"confidence": 0.60966, "count": 6, "id": 122, "objective_summary": {"categories": [["true", 6]]}, "output": "true", "predicate": {"field": "000007", "operator": "<=", "value": 58}}], "confidence": 0.31267, "count": 10, "id": 120, "objective_summary": {"categories": [["true", 6], ["false", 4]]}, "output": "true", "predicate": {"field": "000004", "operator": "<=", "value": 42}}], "confidence": 0.38641, "count": 16, "id": 118, "objective_summary": {"categories": [["false", 10], ["true", 6]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 0.2425}}, {"confidence": 0.80639, "count": 16, "id": 123, "objective_summary": {"categories": [["false", 16]]}, "output": "false", "predicate": {"field": "000006", "operator": "<=", "value": 0.2425}}], "confidence": 0.64691, "count": 32, "id": 117, "objective_summary": {"categories": [["false", 26], ["true", 6]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 35.525}}, {"confidence": 0.92444, "count": 47, "id": 124, "objective_summary": {"categories": [["false", 47]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 35.525}}], "confidence": 0.84404, "count": 79, "id": 116, "objective_summary": {"categories": [["false", 73], ["true", 6]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 105}}], "confidence": 0.78309, "count": 119, "id": 102, "objective_summary": {"categories": [["false", 102], ["true", 17]]}, "output": "false", "predicate": {"field": "000007", "operator": ">", "value": 22}}, {"confidence": 0.9162, "count": 42, "id": 125, "objective_summary": {"categories": [["false", 42]]}, "output": "false", "predicate": {"field": "000007", "operator": "<=", "value": 22}}], "confidence": 0.83741, "count": 161, "id": 101, "objective_summary": {"categories": [["false", 144], ["true", 17]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 50.85}}], "confidence": 0.81914, "count": 164, "id": 99, "objective_summary": {"categories": [["false", 144], ["true", 20]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 113}}], "confidence": 0.73427, "count": 272, "id": 65, "objective_summary": {"categories": [["false", 214], ["true", 58]]}, "output": "false", "predicate": {"field": "000000", "operator": "<=", "value": 12}}], "confidence": 0.70667, "count": 283, "id": 61, "objective_summary": {"categories": [["false", 215], ["true", 68]]}, "output": "false", "predicate": {"field": "000006", "operator": "<=", "value": 0.71247}}], "confidence": 0.65427, "count": 342, "id": 39, "objective_summary": {"categories": [["false", 241], ["true", 101]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 26.20018}}, {"children": [{"children": [{"confidence": 0.83182, "count": 19, "id": 128, "objective_summary": {"categories": [["false", 19]]}, "output": "false", "predicate": {"field": "000003", "operator": ">", "value": 6}}, {"children": [{"children": [{"confidence": 0.20654, "count": 1, "id": 131, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 25.15}}, {"confidence": 0.56551, "count": 5, "id": 132, "objective_summary": {"categories": [["false", 5]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 25.15}}], "confidence": 0.43649, "count": 6, "id": 130, "objective_summary": {"categories": [["false", 5], ["true", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 10.55}}, {"confidence": 0.20654, "count": 1, "id": 133, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 10.55}}], "confidence": 0.35893, "count": 7, "id": 129, "objective_summary": {"categories": [["false", 5], ["true", 2]]}, "output": "false", "predicate": {"field": "000003", "operator": "<=", "value": 6}}], "confidence": 0.75858, "count": 26, "id": 127, "objective_summary": {"categories": [["false", 24], ["true", 2]]}, "output": "false", "predicate": {"field": "000001", "operator": ">", "value": 122}}, {"confidence": 0.96113, "count": 95, "id": 134, "objective_summary": {"categories": [["false", 95]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 122}}], "confidence": 0.94173, "count": 121, "id": 126, "objective_summary": {"categories": [["false", 119], ["true", 2]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 26.20018}}], "confidence": 0.73746, "count": 463, "id": 38, "objective_summary": {"categories": [["false", 360], ["true", 103]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 141}}], "confidence": 0.62955, "count": 614, "id": 0, "objective_summary": {"categories": [["false", 410], ["true", 204]]}, "output": "false", "predicate": true}}, "name": "diabetes - 1", "name_options": "512-node, pruned, deterministic order", "node_threshold": 512, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "000008", "objective_field_name": "diabetes", "objective_field_type": "categorical", "objective_fields": ["000008"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": null, "randomize": false, "range": null, "replacement": false, "resource": "model/62605ac323541b220100748c", "rows": 614, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 20939, "source": "source/62605aa75198db5eed003416", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 0, "message": "The model has been created", "progress": 0.0}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2022-04-20T19:11:31.004000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/tests/my_dataset/dataset_62e2bd555687096969004659 b/bigml/tests/my_dataset/dataset_62e2bd555687096969004659 new file mode 100644 index 00000000..0e38a423 --- /dev/null +++ b/bigml/tests/my_dataset/dataset_62e2bd555687096969004659 @@ -0,0 +1 @@ +{"code": 200, "resource": "dataset/62e2bd555687096969004659", "location": "https://bigml.io/andromeda/dataset/62e2bd555687096969004659", "object": {"all_fields": true, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 9, "configuration": null, "configuration_status": false, "correlations": {}, "created": "2022-07-28T16:46:13.389000", "creator": "mmartin", "dataset_origin_status": true, "description": "", "download": {"code": 0, "decimal_separator": ".", "excluded_input_fields": [], "header": true, "input_fields": [], "message": "", "new_line": "lf", "preview": [], "separator": ","}, "evaluation": null, "excluded_fields": [], "field_types": {"categorical": 1, "datetime": 0, "image": 0, "items": 0, "numeric": 8, "path": 0, "preferred": 9, "regions": 0, "text": 0, "total": 9}, "fields": {"000000": {"column_number": 0, "datatype": "int8", "name": "pregnancies", "optype": "numeric", "order": 0, "preferred": true, "summary": {"counts": [[0, 111], [1, 135], [2, 103], [3, 75], [4, 68], [5, 57], [6, 50], [7, 45], [8, 38], [9, 28], [10, 24], [11, 11], [12, 9], [13, 10], [14, 2], [15, 1], [17, 1]], "exact_histogram": {"populations": [246, 178, 125, 95, 66, 35, 19, 3, 1], "start": 0, "width": 2}, "kurtosis": 0.15038, "maximum": 17, "mean": 3.84505, "median": 3, "minimum": 0, "missing_count": 0, "population": 768, "skewness": 0.89991, "standard_deviation": 3.36958, "sum": 2953, "sum_squares": 20063, "variance": 11.35406}}, "000001": {"column_number": 1, "datatype": "int16", "name": "plasma glucose", "optype": "numeric", "order": 1, "preferred": true, "summary": {"bins": [[0, 5], [44, 1], [56.66667, 3], [61.5, 2], [67.2, 5], [73.3125, 16], [79.47619, 21], [84.03448, 29], [89.83333, 54], [95.45455, 44], [100.69231, 65], [105.68182, 44], [109.46667, 45], [113.525, 40], [118.54902, 51], [123.98182, 55], [128.90476, 42], [133.45, 20], [137.82353, 34], [142.65217, 23], [146.4, 25], [150.92857, 14], [154.5625, 16], [158.15385, 13], [162.4, 15], [166.66667, 15], [172.21429, 14], [176.16667, 6], [180.29412, 17], [184, 6], [188.41667, 12], [195.6875, 16]], "exact_histogram": {"populations": [5, 0, 0, 0, 1, 3, 7, 25, 63, 93, 117, 94, 102, 61, 54, 41, 31, 25, 28, 18], "start": 0, "width": 10}, "kurtosis": 0.62881, "maximum": 199, "mean": 120.89453, "median": 117, "minimum": 0, "missing_count": 0, "population": 768, "skewness": 0.17341, "standard_deviation": 31.97262, "sum": 92847, "sum_squares": 12008759, "variance": 1022.24831}}, "000002": {"column_number": 2, "datatype": "int8", "name": "blood pressure", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[0, 35], [24, 1], [30, 2], [39, 2], [44.66667, 6], [49.44444, 18], [52, 11], [55.04, 25], [58, 21], [60.95833, 72], [64.8375, 80], [68, 45], [70, 57], [72, 44], [74.86869, 99], [78, 45], [80, 40], [82, 30], [84.96, 50], [88, 25], [90, 22], [92, 8], [94.81818, 11], [98, 3], [100, 3], [102, 1], [104, 2], [106, 3], [108, 2], [110, 3], [114, 1], [122, 1]], "exact_histogram": {"populations": [35, 0, 0, 0, 1, 0, 2, 1, 5, 7, 35, 35, 115, 82, 153, 92, 93, 52, 36, 8, 6, 5, 4, 0, 1], "start": 0, "width": 5}, "kurtosis": 5.13869, "maximum": 122, "mean": 69.10547, "median": 72, "minimum": 0, "missing_count": 0, "population": 768, "skewness": -1.84001, "standard_deviation": 19.35581, "sum": 53073, "sum_squares": 3954989, "variance": 374.64727}}, "000003": {"column_number": 3, "datatype": "int8", "name": "triceps skin thickness", "optype": "numeric", "order": 3, "preferred": true, "summary": {"bins": [[0, 227], [7.5, 4], [10.54545, 11], [12, 7], [13.35294, 17], [15.3, 20], [17, 14], [18.47368, 38], [20.43478, 23], [22.57895, 38], [24.57143, 28], [26.58974, 39], [28.45946, 37], [30.41304, 46], [32.39216, 51], [34.65217, 23], [36.53333, 30], [38.72, 25], [40.48387, 31], [42.35294, 17], [44.54545, 11], [46.33333, 12], [48, 4], [49, 3], [50, 3], [51, 1], [52, 2], [54, 2], [56, 1], [60, 1], [63, 1], [99, 1]], "exact_histogram": {"open_max": 3, "populations": [227, 0, 0, 2, 2, 11, 18, 20, 20, 38, 23, 38, 28, 39, 37, 46, 51, 23, 30, 25, 31, 17, 11, 12, 7, 4, 2, 2, 1], "start": 0, "width": 2}, "kurtosis": -0.52449, "maximum": 99, "mean": 20.53646, "median": 23, "minimum": 0, "missing_count": 0, "population": 768, "skewness": 0.10916, "standard_deviation": 15.95222, "sum": 15772, "sum_squares": 519082, "variance": 254.47325}}, "000004": {"column_number": 4, "datatype": "int16", "name": "insulin", "optype": "numeric", "order": 4, "preferred": true, "summary": {"bins": [[0, 374], [20.3, 10], [48.36, 50], [69.88636, 44], [90.93617, 47], [112.23256, 43], [134.95556, 45], [155.94444, 18], [173.26667, 30], [189.61111, 18], [207.23529, 17], [226.66667, 9], [239.4, 5], [257, 6], [277.36364, 11], [298.5, 6], [324.75, 8], [338.5, 2], [368.33333, 3], [393.66667, 3], [415, 1], [440, 1], [465, 1], [479.4, 5], [495, 2], [510, 1], [542.66667, 3], [579, 1], [600, 1], [680, 1], [744, 1], [846, 1]], "exact_histogram": {"open_max": 3, "populations": [379, 12, 43, 45, 39, 40, 37, 32, 25, 27, 17, 11, 7, 9, 7, 4, 8, 1, 3, 2, 2, 0, 1, 3, 5, 1, 0, 3, 1, 0, 1], "start": 0, "width": 20}, "kurtosis": 7.15957, "maximum": 846, "mean": 79.79948, "median": 30.5, "minimum": 0, "missing_count": 0, "population": 768, "skewness": 2.26781, "standard_deviation": 115.244, "sum": 61286, "sum_squares": 15077256, "variance": 13281.18008}}, "000005": {"column_number": 5, "datatype": "double", "name": "bmi", "optype": "numeric", "order": 5, "preferred": true, "summary": {"bins": [[0, 11], [18.25, 4], [19.6, 11], [20.90909, 11], [21.93333, 12], [23.00323, 31], [24.37179, 39], [25.505, 40], [26.55357, 28], [27.64667, 45], [28.77692, 39], [29.85581, 43], [30.83333, 45], [31.88125, 32], [32.8569, 58], [34.11587, 63], [35.42353, 51], [36.70286, 35], [37.95, 38], [39.17586, 29], [40.37143, 21], [41.90588, 17], [43.33462, 26], [45.23571, 14], [46.41111, 9], [48.225, 4], [49.65, 4], [52.675, 4], [55, 1], [57.3, 1], [59.4, 1], [67.1, 1]], "exact_histogram": {"populations": [11, 0, 0, 13, 93, 179, 224, 150, 62, 27, 5, 3, 0, 1], "start": 0, "width": 5}, "kurtosis": 3.26126, "maximum": 67.1, "mean": 31.99258, "median": 32, "minimum": 0, "missing_count": 0, "population": 768, "skewness": -0.42814, "standard_deviation": 7.88416, "sum": 24570.3, "sum_squares": 833743.95, "variance": 62.15998}}, "000006": {"column_number": 6, "datatype": "double", "name": "diabetes pedigree", "optype": "numeric", "order": 6, "preferred": true, "summary": {"bins": [[0.096, 16], [0.14349, 59], [0.19135, 78], [0.2508, 118], [0.29704, 56], [0.34648, 60], [0.40083, 47], [0.44738, 39], [0.49728, 29], [0.54156, 36], [0.5869, 29], [0.62955, 22], [0.68606, 47], [0.74575, 24], [0.8084, 15], [0.86, 20], [0.9389, 21], [1.015, 5], [1.0792, 5], [1.13722, 9], [1.20238, 8], [1.2702, 5], [1.33067, 3], [1.39375, 4], [1.45933, 3], [1.6, 1], [1.70933, 3], [1.781, 1], [1.893, 1], [2.137, 1], [2.3085, 2], [2.42, 1]], "exact_histogram": {"populations": [9, 116, 179, 104, 83, 71, 62, 39, 33, 21, 9, 13, 9, 6, 4, 0, 3, 2, 1, 0, 0, 1, 1, 1, 1], "start": 0, "width": 0.1}, "kurtosis": 5.55079, "maximum": 2.42, "mean": 0.47188, "median": 0.3725, "minimum": 0.078, "missing_count": 0, "population": 768, "skewness": 1.91616, "standard_deviation": 0.33133, "sum": 362.401, "sum_squares": 255.20866, "variance": 0.10978}}, "000007": {"column_number": 7, "datatype": "int8", "name": "age", "optype": "numeric", "order": 7, "preferred": true, "summary": {"bins": [[21.53333, 135], [23, 38], [24.51064, 94], [26.49231, 65], [28.45313, 64], [30.53333, 45], [32.51515, 33], [34.41667, 24], [36.54286, 35], [38.42857, 28], [40.62857, 35], [42.41935, 31], [44.65217, 23], [46.31579, 19], [48.5, 10], [50.5, 16], [52.38462, 13], [54.4, 10], [56.625, 8], [58.3, 10], [60.28571, 7], [62, 4], [63, 4], [64, 1], [65, 3], [66, 4], [67, 3], [68, 1], [69, 2], [70, 1], [72, 1], [81, 1]], "exact_histogram": {"populations": [63, 110, 94, 65, 64, 45, 33, 24, 35, 28, 35, 31, 23, 19, 10, 16, 13, 10, 8, 10, 7, 8, 4, 7, 3, 1, 1, 0, 0, 0, 1], "start": 20, "width": 2}, "kurtosis": 0.63118, "maximum": 81, "mean": 33.24089, "median": 29, "minimum": 21, "missing_count": 0, "population": 768, "skewness": 1.12739, "standard_deviation": 11.76023, "sum": 25529, "sum_squares": 954685, "variance": 138.30305}}, "000008": {"column_number": 8, "datatype": "string", "name": "diabetes", "optype": "categorical", "order": 8, "preferred": true, "summary": {"categories": [["false", 500], ["true", 268]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "fields_meta": {"count": 9, "effective_fields": 9, "limit": -1, "offset": 0, "parent_optypes": {}, "preferred": 9, "provenances": {}, "query_total": 9, "total": 9}, "input_fields": ["000000", "000001", "000002", "000003", "000004", "000005", "000006", "000007", "000008"], "juxtapose": false, "locale": "en_US", "missing_numeric_rows": 0, "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "name": "diabetes", "name_options": "768 instances, 9 fields (1 categorical, 8 numeric)", "new_fields": [], "number_of_anomalies": 0, "number_of_anomalyscores": 0, "number_of_associations": 0, "number_of_associationsets": 0, "number_of_batchanomalyscores": 0, "number_of_batchcentroids": 0, "number_of_batchpredictions": 0, "number_of_batchprojections": 0, "number_of_batchtopicdistributions": 0, "number_of_centroids": 0, "number_of_clusters": 0, "number_of_correlations": 0, "number_of_deepnets": 0, "number_of_ensembles": 0, "number_of_evaluations": 0, "number_of_forecasts": 0, "number_of_linearregressions": 0, "number_of_logisticregressions": 0, "number_of_models": 0, "number_of_optimls": 0, "number_of_pca": 0, "number_of_predictions": 0, "number_of_projections": 0, "number_of_statisticaltests": 0, "number_of_timeseries": 0, "number_of_topicdistributions": 0, "number_of_topicmodels": 0, "objective_field": {"column_number": 8, "datatype": "string", "id": "000008", "name": "diabetes", "optype": "categorical", "order": 8, "term_analysis": {"enabled": true}}, "optiml": null, "optiml_status": false, "origin_batch_dataset": null, "origin_batch_dataset_status": false, "origin_batch_model": null, "origin_batch_model_status": false, "origin_batch_resource": null, "origin_batch_status": false, "output_fields": [], "price": 0.0, "private": true, "project": null, "refresh_field_types": false, "refresh_objective": false, "refresh_preferred": false, "resource": "dataset/62e2bd555687096969004659", "row_offset": 0, "row_step": 1, "rows": 768, "shared": false, "size": 26191, "source": "source/62e2bd535687096969004656", "source_status": true, "sql_output_fields": [], "statisticaltest": null, "status": {"bytes": 26191, "code": 5, "elapsed": 1203, "extracted_count": 0, "field_errors": {}, "message": "The dataset has been created", "progress": 1, "row_format_errors": {"total": 0}, "serialized_rows": 768}, "subscription": true, "tags": [], "tde_download": {"code": 0, "excluded_input_fields": [], "input_fields": [], "message": "", "preview": []}, "term_limit": 1000, "timeseries": null, "timeseries_status": false, "type": 0, "updated": "2022-07-28T16:46:16.530000"}, "error": null} \ No newline at end of file diff --git a/bigml/tests/my_dataset/dataset_62e2bd65d432eb563000442e b/bigml/tests/my_dataset/dataset_62e2bd65d432eb563000442e new file mode 100644 index 00000000..0c24d0dd --- /dev/null +++ b/bigml/tests/my_dataset/dataset_62e2bd65d432eb563000442e @@ -0,0 +1 @@ +{"code": 200, "resource": "dataset/62e2bd65d432eb563000442e", "location": "https://bigml.io/andromeda/dataset/62e2bd65d432eb563000442e", "object": {"all_fields": true, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 9, "configuration": null, "configuration_status": false, "correlations": {}, "created": "2022-07-28T16:46:29.076000", "creator": "mmartin", "dataset_origin_status": true, "description": "", "download": {"code": 0, "decimal_separator": ".", "excluded_input_fields": [], "header": true, "input_fields": [], "message": "", "new_line": "lf", "preview": [], "separator": ","}, "evaluation": null, "excluded_fields": [], "field_types": {"categorical": 1, "datetime": 0, "image": 0, "items": 0, "numeric": 8, "path": 0, "preferred": 9, "regions": 0, "text": 0, "total": 9}, "fields": {"000000": {"column_number": 0, "datatype": "int8", "name": "pregnancies", "optype": "numeric", "order": 0, "preferred": true, "summary": {"counts": [[0, 25], [1, 25], [2, 23], [3, 19], [4, 10], [5, 12], [6, 10], [7, 8], [8, 3], [9, 6], [10, 8], [11, 2], [13, 2], [14, 1]], "exact_histogram": {"populations": [50, 42, 22, 18, 9, 10, 2, 1], "start": 0, "width": 2}, "kurtosis": 0.16147, "maximum": 14, "mean": 3.66234, "median": 3, "minimum": 0, "missing_count": 0, "population": 154, "skewness": 0.95878, "standard_deviation": 3.32198, "sum": 564, "sum_squares": 3754, "variance": 11.03557}}, "000001": {"column_number": 1, "datatype": "int16", "name": "plasma glucose", "optype": "numeric", "order": 1, "preferred": true, "summary": {"bins": [[56, 1], [72, 1], [77.33333, 3], [81, 3], [83.71429, 7], [87.8, 5], [91.42857, 7], [95.14286, 7], [99.63636, 11], [102.6, 5], [107, 13], [111.22222, 9], [115.22222, 9], [120.25, 4], [123.5, 14], [128.81818, 11], [132.25, 4], [135, 3], [138.25, 4], [141, 1], [143.66667, 3], [146.42857, 7], [150.5, 2], [154.66667, 3], [161.5, 2], [164, 1], [170.5, 2], [176, 3], [179.66667, 3], [184, 2], [189, 1], [194.33333, 3]], "exact_histogram": {"populations": [1, 0, 0, 1, 3, 9, 6, 10, 9, 11, 13, 13, 6, 14, 10, 8, 7, 4, 7, 4, 1, 3, 0, 2, 4, 4, 1, 2, 1], "start": 55, "width": 5}, "kurtosis": 0.04986, "maximum": 196, "mean": 120.01948, "median": 115.5, "minimum": 56, "missing_count": 0, "population": 154, "skewness": 0.67008, "standard_deviation": 28.73919, "sum": 18483, "sum_squares": 2344689, "variance": 825.94079}}, "000002": {"column_number": 2, "datatype": "int8", "name": "blood pressure", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[0, 7], [24, 1], [48, 2], [52, 2], [54, 4], [56, 3], [58, 2], [60, 8], [61, 1], [62, 11], [64, 11], [65, 1], [66, 6], [68, 7], [70, 14], [72, 7], [74, 7], [75, 1], [76, 7], [78, 11], [80, 6], [82, 6], [84, 2], [85, 1], [86, 6], [88, 8], [90, 3], [92, 4], [94, 2], [100, 1], [104, 1], [110, 1]], "exact_histogram": {"populations": [7, 0, 0, 0, 1, 0, 0, 0, 0, 2, 6, 5, 31, 14, 28, 19, 14, 15, 9, 0, 2, 0, 1], "start": 0, "width": 5}, "kurtosis": 5.20109, "maximum": 110, "mean": 69.09091, "median": 70, "minimum": 0, "missing_count": 0, "population": 154, "skewness": -1.89704, "standard_deviation": 19.33684, "sum": 10640, "sum_squares": 792336, "variance": 373.91325}}, "000003": {"column_number": 3, "datatype": "int8", "name": "triceps skin thickness", "optype": "numeric", "order": 3, "preferred": true, "summary": {"bins": [[0, 38], [8, 1], [10.5, 4], [12.25, 4], [15.4, 5], [17, 1], [18.5, 10], [20.5, 6], [23, 7], [25.2, 10], [27.5, 10], [29, 4], [30, 9], [31, 5], [32, 4], [33, 3], [34, 2], [35, 2], [36, 5], [37, 2], [38, 1], [39, 5], [40, 1], [41, 2], [42, 3], [43, 1], [44, 1], [46, 3], [49, 1], [50, 1], [54, 2], [60, 1]], "exact_histogram": {"populations": [38, 0, 0, 0, 1, 4, 4, 3, 3, 10, 6, 7, 8, 7, 9, 14, 7, 4, 7, 6, 3, 4, 1, 3, 1, 1, 0, 2, 0, 0, 1], "start": 0, "width": 2}, "kurtosis": -0.89167, "maximum": 60, "mean": 21.57143, "median": 25, "minimum": 0, "missing_count": 0, "population": 154, "skewness": -0.05392, "standard_deviation": 15.31194, "sum": 3322, "sum_squares": 107532, "variance": 234.45565}}, "000004": {"column_number": 4, "datatype": "int16", "name": "insulin", "optype": "numeric", "order": 4, "preferred": true, "summary": {"bins": [[0, 79], [18, 1], [24, 2], [42, 4], [47, 3], [58, 2], [67, 4], [76.66667, 6], [82, 1], [94, 2], [100, 2], [106.875, 8], [114.33333, 3], [120, 1], [125, 1], [132.33333, 3], [140, 2], [146, 1], [156.85714, 7], [165, 3], [170, 1], [176, 1], [181.75, 4], [194, 1], [228, 1], [249.5, 2], [277.5, 2], [284.66667, 3], [300, 1], [360, 1], [402, 1], [478, 1]], "exact_histogram": {"populations": [80, 2, 8, 11, 3, 13, 5, 8, 7, 5, 0, 1, 2, 2, 3, 1, 0, 0, 1, 0, 1, 0, 0, 1], "start": 0, "width": 20}, "kurtosis": 2.76998, "maximum": 478, "mean": 68.12338, "median": 0, "minimum": 0, "missing_count": 0, "population": 154, "skewness": 1.62114, "standard_deviation": 93.77705, "sum": 10491, "sum_squares": 2060185, "variance": 8794.13501}}, "000005": {"column_number": 5, "datatype": "double", "name": "bmi", "optype": "numeric", "order": 5, "preferred": true, "summary": {"bins": [[0, 2], [18.2, 1], [19.5, 2], [20.15, 2], [21.05, 2], [21.8, 2], [22.65, 4], [23.325, 4], [24.35, 4], [25.4, 6], [26.175, 4], [27.44667, 15], [28.71667, 6], [29.81111, 9], [31.31667, 6], [32.2, 9], [33.06667, 3], [34.12353, 17], [35.05, 4], [35.66667, 6], [36.68571, 14], [38.16667, 6], [39.24, 5], [40.3, 2], [42.14, 5], [43.1, 6], [44.1, 2], [46.1, 1], [46.8, 1], [49.45, 2], [52.3, 1], [67.1, 1]], "exact_histogram": {"populations": [2, 0, 0, 4, 17, 36, 40, 34, 15, 4, 1, 0, 0, 1], "start": 0, "width": 5}, "kurtosis": 3.57751, "maximum": 67.1, "mean": 32.29351, "median": 32.4, "minimum": 0, "missing_count": 0, "population": 154, "skewness": -0.11158, "standard_deviation": 8.21685, "sum": 4973.2, "sum_squares": 170932.12, "variance": 67.51669}}, "000006": {"column_number": 6, "datatype": "double", "name": "diabetes pedigree", "optype": "numeric", "order": 6, "preferred": true, "summary": {"bins": [[0.111, 3], [0.15418, 11], [0.19875, 16], [0.24761, 23], [0.27833, 6], [0.309, 4], [0.33375, 12], [0.3698, 15], [0.39933, 3], [0.426, 1], [0.45025, 8], [0.48875, 4], [0.52225, 4], [0.551, 1], [0.57383, 6], [0.598, 3], [0.633, 2], [0.6606, 5], [0.6895, 2], [0.733, 4], [0.76367, 3], [0.787, 1], [0.816, 1], [0.838, 2], [0.878, 2], [0.933, 1], [0.95825, 4], [1.0645, 2], [1.098, 2], [1.189, 1], [1.224, 1], [1.893, 1]], "exact_histogram": {"populations": [21, 39, 31, 15, 13, 10, 8, 5, 5, 3, 2, 1, 0, 0, 0, 0, 0, 1], "start": 0.1, "width": 0.1}, "kurtosis": 4.04452, "maximum": 1.893, "mean": 0.44171, "median": 0.3565, "minimum": 0.1, "missing_count": 0, "population": 154, "skewness": 1.65698, "standard_deviation": 0.28009, "sum": 68.023, "sum_squares": 42.04958, "variance": 0.07845}}, "000007": {"column_number": 7, "datatype": "int8", "name": "age", "optype": "numeric", "order": 7, "preferred": true, "summary": {"bins": [[21.375, 24], [23.5, 18], [25.40909, 22], [27.5, 16], [29.42857, 14], [31, 6], [32, 5], [33, 1], [34, 4], [35, 1], [36, 1], [37, 5], [38, 3], [40, 5], [41, 5], [42, 4], [43, 2], [44, 1], [45, 3], [47, 1], [48, 1], [49, 1], [51, 1], [52, 1], [54, 1], [55, 1], [56, 1], [58, 2], [62, 1], [65, 1], [66, 1], [69, 1]], "exact_histogram": {"populations": [15, 18, 22, 17, 16, 12, 6, 5, 6, 3, 10, 6, 4, 1, 2, 1, 1, 2, 1, 2, 0, 1, 1, 1, 1], "start": 20, "width": 2}, "kurtosis": 1.75384, "maximum": 69, "mean": 31.55844, "median": 28, "minimum": 21, "missing_count": 0, "population": 154, "skewness": 1.43278, "standard_deviation": 10.43498, "sum": 4860, "sum_squares": 170034, "variance": 108.88872}}, "000008": {"column_number": 8, "datatype": "string", "name": "diabetes", "optype": "categorical", "order": 8, "preferred": true, "summary": {"categories": [["false", 106], ["true", 48]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "fields_meta": {"count": 9, "effective_fields": 9, "limit": -1, "offset": 0, "parent_optypes": {}, "preferred": 9, "provenances": {}, "query_total": 9, "total": 9}, "input_fields": ["000000", "000001", "000002", "000003", "000004", "000005", "000006", "000007", "000008"], "juxtapose": false, "locale": "en_US", "missing_numeric_rows": 0, "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "name": "diabetes", "name_options": "154 instances, 9 fields (1 categorical, 8 numeric), sample rate=0.2, out of bag", "new_fields": [], "number_of_anomalies": 0, "number_of_anomalyscores": 0, "number_of_associations": 0, "number_of_associationsets": 0, "number_of_batchanomalyscores": 0, "number_of_batchcentroids": 0, "number_of_batchpredictions": 0, "number_of_batchprojections": 0, "number_of_batchtopicdistributions": 0, "number_of_centroids": 0, "number_of_clusters": 0, "number_of_correlations": 0, "number_of_deepnets": 0, "number_of_ensembles": 0, "number_of_evaluations": 1, "number_of_forecasts": 0, "number_of_linearregressions": 0, "number_of_logisticregressions": 0, "number_of_models": 0, "number_of_optimls": 0, "number_of_pca": 0, "number_of_predictions": 0, "number_of_projections": 0, "number_of_statisticaltests": 0, "number_of_timeseries": 0, "number_of_topicdistributions": 0, "number_of_topicmodels": 0, "objective_field": {"column_number": 8, "datatype": "string", "id": "000008", "name": "diabetes", "optype": "categorical", "order": 8, "term_analysis": {"enabled": true}}, "optiml": null, "optiml_status": false, "origin_batch_dataset": null, "origin_batch_dataset_status": false, "origin_batch_model": null, "origin_batch_model_status": false, "origin_batch_resource": null, "origin_batch_status": false, "origin_dataset": "dataset/62e2bd555687096969004659", "out_of_bag": true, "output_fields": [], "price": 0.0, "private": true, "project": null, "refresh_field_types": false, "refresh_objective": false, "refresh_preferred": false, "replacement": false, "resource": "dataset/62e2bd65d432eb563000442e", "row_offset": 0, "row_step": 1, "rows": 154, "sample_rate": 0.8, "seed": "bigml", "shared": false, "size": 5251, "source": "source/62e2bd535687096969004656", "source_status": true, "sql_output_fields": [], "statisticaltest": null, "status": {"bytes": 26191, "code": 5, "elapsed": 1017, "extracted_count": 0, "field_errors": {}, "message": "The dataset has been created", "progress": 1, "row_format_errors": {"total": 0}, "serialized_rows": 154}, "subscription": true, "tags": [], "tde_download": {"code": 0, "excluded_input_fields": [], "input_fields": [], "message": "", "preview": []}, "term_limit": 1000, "timeseries": null, "timeseries_status": false, "type": 0, "updated": "2022-07-28T16:46:40.984000"}, "error": null} \ No newline at end of file diff --git a/bigml/tests/my_dataset/dataset_62e464778be2aa335a001548 b/bigml/tests/my_dataset/dataset_62e464778be2aa335a001548 new file mode 100644 index 00000000..29dfe07c --- /dev/null +++ b/bigml/tests/my_dataset/dataset_62e464778be2aa335a001548 @@ -0,0 +1 @@ +{"code": 200, "resource": "dataset/62e464778be2aa335a001548", "location": "https://bigml.io/andromeda/dataset/62e464778be2aa335a001548", "object": {"all_but": ["000004"], "all_fields": false, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 9, "configuration": null, "configuration_status": false, "correlations": {}, "created": "2022-07-29T22:51:35.732000", "creator": "mmartin", "dataset_origin_status": true, "description": "", "download": {"code": 0, "decimal_separator": ".", "excluded_input_fields": [], "header": true, "input_fields": [], "message": "", "new_line": "lf", "preview": [], "separator": ","}, "evaluation": null, "excluded_fields": [], "field_types": {"categorical": 2, "datetime": 0, "image": 0, "items": 0, "numeric": 7, "path": 0, "preferred": 9, "regions": 0, "text": 0, "total": 9}, "fields": {"000000": {"column_number": 0, "datatype": "int8", "generated": false, "name": "pregnancies", "optype": "numeric", "order": 0, "preferred": true, "summary": {"counts": [[0, 25], [1, 25], [2, 23], [3, 19], [4, 10], [5, 12], [6, 10], [7, 8], [8, 3], [9, 6], [10, 8], [11, 2], [13, 2], [14, 1]], "exact_histogram": {"populations": [50, 42, 22, 18, 9, 10, 2, 1], "start": 0, "width": 2}, "kurtosis": 0.16147, "maximum": 14, "mean": 3.66234, "median": 3, "minimum": 0, "missing_count": 0, "population": 154, "skewness": 0.95878, "standard_deviation": 3.32198, "sum": 564, "sum_squares": 3754, "variance": 11.03557}}, "000001": {"column_number": 1, "datatype": "int16", "generated": false, "name": "plasma glucose", "optype": "numeric", "order": 1, "preferred": true, "summary": {"bins": [[56, 1], [72, 1], [77.33333, 3], [81, 3], [83.71429, 7], [87.8, 5], [91.42857, 7], [95.14286, 7], [99.63636, 11], [102.6, 5], [107, 13], [111.22222, 9], [115.22222, 9], [120.25, 4], [123.5, 14], [128.81818, 11], [132.25, 4], [135, 3], [138.25, 4], [141, 1], [143.66667, 3], [146.42857, 7], [150.5, 2], [154.66667, 3], [161.5, 2], [164, 1], [170.5, 2], [176, 3], [179.66667, 3], [184, 2], [189, 1], [194.33333, 3]], "exact_histogram": {"populations": [1, 0, 0, 1, 3, 9, 6, 10, 9, 11, 13, 13, 6, 14, 10, 8, 7, 4, 7, 4, 1, 3, 0, 2, 4, 4, 1, 2, 1], "start": 55, "width": 5}, "kurtosis": 0.04986, "maximum": 196, "mean": 120.01948, "median": 115.5, "minimum": 56, "missing_count": 0, "population": 154, "skewness": 0.67008, "standard_deviation": 28.73919, "sum": 18483, "sum_squares": 2344689, "variance": 825.94079}}, "000002": {"column_number": 2, "datatype": "int8", "generated": false, "name": "blood pressure", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[0, 7], [24, 1], [48, 2], [52, 2], [54, 4], [56, 3], [58, 2], [60, 8], [61, 1], [62, 11], [64, 11], [65, 1], [66, 6], [68, 7], [70, 14], [72, 7], [74, 7], [75, 1], [76, 7], [78, 11], [80, 6], [82, 6], [84, 2], [85, 1], [86, 6], [88, 8], [90, 3], [92, 4], [94, 2], [100, 1], [104, 1], [110, 1]], "exact_histogram": {"populations": [7, 0, 0, 0, 1, 0, 0, 0, 0, 2, 6, 5, 31, 14, 28, 19, 14, 15, 9, 0, 2, 0, 1], "start": 0, "width": 5}, "kurtosis": 5.20109, "maximum": 110, "mean": 69.09091, "median": 70, "minimum": 0, "missing_count": 0, "population": 154, "skewness": -1.89704, "standard_deviation": 19.33684, "sum": 10640, "sum_squares": 792336, "variance": 373.91325}}, "000003": {"column_number": 3, "datatype": "int8", "generated": false, "name": "triceps skin thickness", "optype": "numeric", "order": 3, "preferred": true, "summary": {"bins": [[0, 38], [8, 1], [10.5, 4], [12.25, 4], [15.4, 5], [17, 1], [18.5, 10], [20.5, 6], [23, 7], [25.2, 10], [27.5, 10], [29, 4], [30, 9], [31, 5], [32, 4], [33, 3], [34, 2], [35, 2], [36, 5], [37, 2], [38, 1], [39, 5], [40, 1], [41, 2], [42, 3], [43, 1], [44, 1], [46, 3], [49, 1], [50, 1], [54, 2], [60, 1]], "exact_histogram": {"populations": [38, 0, 0, 0, 1, 4, 4, 3, 3, 10, 6, 7, 8, 7, 9, 14, 7, 4, 7, 6, 3, 4, 1, 3, 1, 1, 0, 2, 0, 0, 1], "start": 0, "width": 2}, "kurtosis": -0.89167, "maximum": 60, "mean": 21.57143, "median": 25, "minimum": 0, "missing_count": 0, "population": 154, "skewness": -0.05392, "standard_deviation": 15.31194, "sum": 3322, "sum_squares": 107532, "variance": 234.45565}}, "000005": {"column_number": 4, "datatype": "double", "generated": false, "name": "bmi", "optype": "numeric", "order": 4, "preferred": true, "summary": {"bins": [[0, 2], [18.2, 1], [19.5, 2], [20.15, 2], [21.05, 2], [21.8, 2], [22.65, 4], [23.325, 4], [24.35, 4], [25.4, 6], [26.175, 4], [27.44667, 15], [28.71667, 6], [29.81111, 9], [31.31667, 6], [32.2, 9], [33.06667, 3], [34.12353, 17], [35.05, 4], [35.66667, 6], [36.68571, 14], [38.16667, 6], [39.24, 5], [40.3, 2], [42.14, 5], [43.1, 6], [44.1, 2], [46.1, 1], [46.8, 1], [49.45, 2], [52.3, 1], [67.1, 1]], "exact_histogram": {"populations": [2, 0, 0, 4, 17, 36, 40, 34, 15, 4, 1, 0, 0, 1], "start": 0, "width": 5}, "kurtosis": 3.57751, "maximum": 67.1, "mean": 32.29351, "median": 32.4, "minimum": 0, "missing_count": 0, "population": 154, "skewness": -0.11158, "standard_deviation": 8.21685, "sum": 4973.2, "sum_squares": 170932.12, "variance": 67.51669}}, "000006": {"column_number": 5, "datatype": "double", "generated": false, "name": "diabetes pedigree", "optype": "numeric", "order": 5, "preferred": true, "summary": {"bins": [[0.111, 3], [0.15418, 11], [0.19875, 16], [0.24761, 23], [0.27833, 6], [0.309, 4], [0.33375, 12], [0.3698, 15], [0.39933, 3], [0.426, 1], [0.45025, 8], [0.48875, 4], [0.52225, 4], [0.551, 1], [0.57383, 6], [0.598, 3], [0.633, 2], [0.6606, 5], [0.6895, 2], [0.733, 4], [0.76367, 3], [0.787, 1], [0.816, 1], [0.838, 2], [0.878, 2], [0.933, 1], [0.95825, 4], [1.0645, 2], [1.098, 2], [1.189, 1], [1.224, 1], [1.893, 1]], "exact_histogram": {"populations": [21, 39, 31, 15, 13, 10, 8, 5, 5, 3, 2, 1, 0, 0, 0, 0, 0, 1], "start": 0.1, "width": 0.1}, "kurtosis": 4.04452, "maximum": 1.893, "mean": 0.44171, "median": 0.3565, "minimum": 0.1, "missing_count": 0, "population": 154, "skewness": 1.65698, "standard_deviation": 0.28009, "sum": 68.023, "sum_squares": 42.04958, "variance": 0.07845}}, "000007": {"column_number": 6, "datatype": "int8", "generated": false, "name": "age", "optype": "numeric", "order": 6, "preferred": true, "summary": {"bins": [[21.375, 24], [23.5, 18], [25.40909, 22], [27.5, 16], [29.42857, 14], [31, 6], [32, 5], [33, 1], [34, 4], [35, 1], [36, 1], [37, 5], [38, 3], [40, 5], [41, 5], [42, 4], [43, 2], [44, 1], [45, 3], [47, 1], [48, 1], [49, 1], [51, 1], [52, 1], [54, 1], [55, 1], [56, 1], [58, 2], [62, 1], [65, 1], [66, 1], [69, 1]], "exact_histogram": {"populations": [15, 18, 22, 17, 16, 12, 6, 5, 6, 3, 10, 6, 4, 1, 2, 1, 1, 2, 1, 2, 0, 1, 1, 1, 1], "start": 20, "width": 2}, "kurtosis": 1.75384, "maximum": 69, "mean": 31.55844, "median": 28, "minimum": 21, "missing_count": 0, "population": 154, "skewness": 1.43278, "standard_deviation": 10.43498, "sum": 4860, "sum_squares": 170034, "variance": 108.88872}}, "000008": {"column_number": 7, "datatype": "string", "generated": false, "name": "diabetes", "optype": "categorical", "order": 7, "preferred": true, "summary": {"categories": [["false", 106], ["true", 48]], "missing_count": 0}, "term_analysis": {"enabled": true}}, "100008": {"column_number": 8, "datatype": "string", "description": "", "generated": true, "label": "", "name": "age_range", "optype": "categorical", "order": 8, "preferred": true, "provenance": "flatline", "summary": {"categories": [["1st third", 55], ["2nd third", 50], ["3rd third", 49]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "fields_meta": {"count": 9, "effective_fields": 9, "limit": -1, "offset": 0, "parent_optypes": {}, "preferred": 9, "provenances": {"flatline": 1}, "query_total": 9, "total": 9}, "input_fields": ["000000", "000001", "000002", "000003", "000004", "000005", "000006", "000007", "000008"], "juxtapose": false, "locale": "en_US", "missing_numeric_rows": 0, "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "name": "diabetes [extended]", "name_options": "154 instances, 9 fields (2 categorical, 7 numeric)", "new_fields": [{"description": "", "generator": ["percentile-label", "age", "1st third", "2nd third", "3rd third"], "label": "", "name": "age_range"}], "number_of_anomalies": 0, "number_of_anomalyscores": 0, "number_of_associations": 0, "number_of_associationsets": 0, "number_of_batchanomalyscores": 0, "number_of_batchcentroids": 0, "number_of_batchpredictions": 0, "number_of_batchprojections": 0, "number_of_batchtopicdistributions": 0, "number_of_centroids": 0, "number_of_clusters": 0, "number_of_correlations": 0, "number_of_deepnets": 0, "number_of_ensembles": 0, "number_of_evaluations": 0, "number_of_forecasts": 0, "number_of_linearregressions": 0, "number_of_logisticregressions": 0, "number_of_models": 0, "number_of_optimls": 0, "number_of_pca": 0, "number_of_predictions": 0, "number_of_projections": 0, "number_of_statisticaltests": 0, "number_of_timeseries": 0, "number_of_topicdistributions": 0, "number_of_topicmodels": 0, "objective_field": {"column_number": 7, "datatype": "string", "generated": false, "id": "000008", "name": "diabetes", "optype": "categorical", "order": 7, "term_analysis": {"enabled": true}}, "optiml": null, "optiml_status": false, "origin_batch_dataset": null, "origin_batch_dataset_status": false, "origin_batch_model": null, "origin_batch_model_status": false, "origin_batch_resource": null, "origin_batch_status": false, "origin_dataset": "dataset/62e2bd65d432eb563000442e", "out_of_bag": false, "output_fields": [{"generator": "(all-but \"000004\")", "ids": ["000000", "000001", "000002", "000003", "000005", "000006", "000007", "000008"], "json_generator": ["all-but", "000004"], "names": ["pregnancies", "plasma glucose", "blood pressure", "triceps skin thickness", "bmi", "diabetes pedigree", "age", "diabetes"]}, {"description": "", "generator": "(percentile-label \"age\" \"1st third\" \"2nd third\" \"3rd third\")", "ids": ["100008"], "json_generator": ["percentile-label", "age", "1st third", "2nd third", "3rd third"], "label": "", "name": "age_range", "names": ["age_range"]}], "price": 0.0, "private": true, "project": null, "refresh_field_types": false, "refresh_objective": false, "refresh_preferred": false, "replacement": false, "resource": "dataset/62e464778be2aa335a001548", "row_offset": 0, "row_step": 1, "rows": 154, "sample_rate": 1.0, "shared": false, "size": 6006, "source": "source/62e2bd535687096969004656", "source_status": true, "sql_output_fields": [], "statisticaltest": null, "status": {"bytes": 5251, "code": 5, "elapsed": 1488, "extracted_count": 0, "field_errors": {}, "message": "The dataset has been created", "progress": 1, "row_format_errors": {"total": 0}, "serialized_rows": 154}, "subscription": true, "tags": [], "tde_download": {"code": 0, "excluded_input_fields": [], "input_fields": [], "message": "", "preview": []}, "term_limit": 1000, "timeseries": null, "timeseries_status": false, "type": 0, "updated": "2022-07-29T22:51:38.889000"}, "error": null} \ No newline at end of file diff --git a/bigml/tests/my_dataset/my_flatline_ds.json b/bigml/tests/my_dataset/my_flatline_ds.json new file mode 100644 index 00000000..d7dcfcad --- /dev/null +++ b/bigml/tests/my_dataset/my_flatline_ds.json @@ -0,0 +1 @@ +{"code": 200, "resource": "dataset/62e954f3aba2df1257001252", "location": "https://bigml.io/andromeda/dataset/62e954f3aba2df1257001252", "object": {"all_but": ["000005"], "all_fields": false, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 9, "configuration": null, "configuration_status": false, "correlations": {}, "created": "2022-08-02T16:46:43.382000", "creator": "mmartin", "dataset_origin_status": true, "description": "", "download": {"code": 0, "decimal_separator": ".", "excluded_input_fields": [], "header": true, "input_fields": [], "message": "", "new_line": "lf", "preview": [], "separator": ","}, "evaluation": null, "excluded_fields": [], "field_types": {"categorical": 2, "datetime": 0, "image": 0, "items": 0, "numeric": 7, "path": 0, "preferred": 9, "regions": 0, "text": 0, "total": 9}, "fields": {"000000": {"column_number": 0, "datatype": "int8", "generated": false, "name": "pregnancies", "optype": "numeric", "order": 0, "preferred": true, "summary": {"counts": [[0, 25], [1, 25], [2, 23], [3, 19], [4, 10], [5, 12], [6, 10], [7, 8], [8, 3], [9, 6], [10, 8], [11, 2], [13, 2], [14, 1]], "exact_histogram": {"populations": [50, 42, 22, 18, 9, 10, 2, 1], "start": 0, "width": 2}, "kurtosis": 0.16147, "maximum": 14, "mean": 3.66234, "median": 3, "minimum": 0, "missing_count": 0, "population": 154, "skewness": 0.95878, "standard_deviation": 3.32198, "sum": 564, "sum_squares": 3754, "variance": 11.03557}}, "000001": {"column_number": 1, "datatype": "int16", "generated": false, "name": "plasma glucose", "optype": "numeric", "order": 1, "preferred": true, "summary": {"bins": [[56, 1], [72, 1], [77.33333, 3], [81, 3], [83.71429, 7], [87.8, 5], [91.42857, 7], [95.14286, 7], [99.63636, 11], [102.6, 5], [107, 13], [111.22222, 9], [115.22222, 9], [120.25, 4], [123.5, 14], [128.81818, 11], [132.25, 4], [135, 3], [138.25, 4], [141, 1], [143.66667, 3], [146.42857, 7], [150.5, 2], [154.66667, 3], [161.5, 2], [164, 1], [170.5, 2], [176, 3], [179.66667, 3], [184, 2], [189, 1], [194.33333, 3]], "exact_histogram": {"populations": [1, 0, 0, 1, 3, 9, 6, 10, 9, 11, 13, 13, 6, 14, 10, 8, 7, 4, 7, 4, 1, 3, 0, 2, 4, 4, 1, 2, 1], "start": 55, "width": 5}, "kurtosis": 0.04986, "maximum": 196, "mean": 120.01948, "median": 115.5, "minimum": 56, "missing_count": 0, "population": 154, "skewness": 0.67008, "standard_deviation": 28.73919, "sum": 18483, "sum_squares": 2344689, "variance": 825.94079}}, "000002": {"column_number": 2, "datatype": "int8", "generated": false, "name": "blood pressure", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[0, 7], [24, 1], [48, 2], [52, 2], [54, 4], [56, 3], [58, 2], [60, 8], [61, 1], [62, 11], [64, 11], [65, 1], [66, 6], [68, 7], [70, 14], [72, 7], [74, 7], [75, 1], [76, 7], [78, 11], [80, 6], [82, 6], [84, 2], [85, 1], [86, 6], [88, 8], [90, 3], [92, 4], [94, 2], [100, 1], [104, 1], [110, 1]], "exact_histogram": {"populations": [7, 0, 0, 0, 1, 0, 0, 0, 0, 2, 6, 5, 31, 14, 28, 19, 14, 15, 9, 0, 2, 0, 1], "start": 0, "width": 5}, "kurtosis": 5.20109, "maximum": 110, "mean": 69.09091, "median": 70, "minimum": 0, "missing_count": 0, "population": 154, "skewness": -1.89704, "standard_deviation": 19.33684, "sum": 10640, "sum_squares": 792336, "variance": 373.91325}}, "000003": {"column_number": 3, "datatype": "int8", "generated": false, "name": "triceps skin thickness", "optype": "numeric", "order": 3, "preferred": true, "summary": {"bins": [[0, 38], [8, 1], [10.5, 4], [12.25, 4], [15.4, 5], [17, 1], [18.5, 10], [20.5, 6], [23, 7], [25.2, 10], [27.5, 10], [29, 4], [30, 9], [31, 5], [32, 4], [33, 3], [34, 2], [35, 2], [36, 5], [37, 2], [38, 1], [39, 5], [40, 1], [41, 2], [42, 3], [43, 1], [44, 1], [46, 3], [49, 1], [50, 1], [54, 2], [60, 1]], "exact_histogram": {"populations": [38, 0, 0, 0, 1, 4, 4, 3, 3, 10, 6, 7, 8, 7, 9, 14, 7, 4, 7, 6, 3, 4, 1, 3, 1, 1, 0, 2, 0, 0, 1], "start": 0, "width": 2}, "kurtosis": -0.89167, "maximum": 60, "mean": 21.57143, "median": 25, "minimum": 0, "missing_count": 0, "population": 154, "skewness": -0.05392, "standard_deviation": 15.31194, "sum": 3322, "sum_squares": 107532, "variance": 234.45565}}, "000006": {"column_number": 4, "datatype": "double", "generated": false, "name": "diabetes pedigree", "optype": "numeric", "order": 4, "preferred": true, "summary": {"bins": [[0.111, 3], [0.15418, 11], [0.19875, 16], [0.24761, 23], [0.27833, 6], [0.309, 4], [0.33375, 12], [0.3698, 15], [0.39933, 3], [0.426, 1], [0.45025, 8], [0.48875, 4], [0.52225, 4], [0.551, 1], [0.57383, 6], [0.598, 3], [0.633, 2], [0.6606, 5], [0.6895, 2], [0.733, 4], [0.76367, 3], [0.787, 1], [0.816, 1], [0.838, 2], [0.878, 2], [0.933, 1], [0.95825, 4], [1.0645, 2], [1.098, 2], [1.189, 1], [1.224, 1], [1.893, 1]], "exact_histogram": {"populations": [21, 39, 31, 15, 13, 10, 8, 5, 5, 3, 2, 1, 0, 0, 0, 0, 0, 1], "start": 0.1, "width": 0.1}, "kurtosis": 4.04452, "maximum": 1.893, "mean": 0.44171, "median": 0.3565, "minimum": 0.1, "missing_count": 0, "population": 154, "skewness": 1.65698, "standard_deviation": 0.28009, "sum": 68.023, "sum_squares": 42.04958, "variance": 0.07845}}, "000007": {"column_number": 5, "datatype": "int8", "generated": false, "name": "age", "optype": "numeric", "order": 5, "preferred": true, "summary": {"bins": [[21.375, 24], [23.5, 18], [25.40909, 22], [27.5, 16], [29.42857, 14], [31, 6], [32, 5], [33, 1], [34, 4], [35, 1], [36, 1], [37, 5], [38, 3], [40, 5], [41, 5], [42, 4], [43, 2], [44, 1], [45, 3], [47, 1], [48, 1], [49, 1], [51, 1], [52, 1], [54, 1], [55, 1], [56, 1], [58, 2], [62, 1], [65, 1], [66, 1], [69, 1]], "exact_histogram": {"populations": [15, 18, 22, 17, 16, 12, 6, 5, 6, 3, 10, 6, 4, 1, 2, 1, 1, 2, 1, 2, 0, 1, 1, 1, 1], "start": 20, "width": 2}, "kurtosis": 1.75384, "maximum": 69, "mean": 31.55844, "median": 28, "minimum": 21, "missing_count": 0, "population": 154, "skewness": 1.43278, "standard_deviation": 10.43498, "sum": 4860, "sum_squares": 170034, "variance": 108.88872}}, "000008": {"column_number": 6, "datatype": "string", "generated": false, "name": "diabetes", "optype": "categorical", "order": 6, "preferred": true, "summary": {"categories": [["false", 106], ["true", 48]], "missing_count": 0}, "term_analysis": {"enabled": true}}, "100008": {"column_number": 7, "datatype": "string", "description": "", "generated": true, "label": "", "name": "age_range", "optype": "categorical", "order": 7, "preferred": true, "provenance": "flatline", "summary": {"categories": [["1st third", 55], ["2nd third", 50], ["3rd third", 49]], "missing_count": 0}, "term_analysis": {"enabled": true}}, "100009": {"column_number": 8, "datatype": "double", "description": "", "generated": true, "label": "", "name": "glucose half", "optype": "numeric", "order": 8, "preferred": true, "provenance": "flatline", "summary": {"bins": [[28, 1], [36, 1], [38.66667, 3], [40.5, 3], [41.85714, 7], [43.9, 5], [45.71429, 7], [47.57143, 7], [49.81818, 11], [51.3, 5], [53.5, 13], [55.61111, 9], [57.61111, 9], [60.125, 4], [61.75, 14], [64.40909, 11], [66.125, 4], [67.5, 3], [69.125, 4], [70.5, 1], [71.83333, 3], [73.21429, 7], [75.25, 2], [77.33333, 3], [80.75, 2], [82, 1], [85.25, 2], [88, 3], [89.83333, 3], [92, 2], [94.5, 1], [97.16667, 3]], "exact_histogram": {"populations": [1, 0, 4, 15, 19, 24, 19, 24, 15, 11, 5, 3, 6, 5, 3], "start": 25, "width": 5}, "kurtosis": 0.04986, "maximum": 98, "mean": 60.00974, "median": 57.75, "minimum": 28, "missing_count": 0, "population": 154, "skewness": 0.67008, "standard_deviation": 14.36959, "sum": 9241.5, "sum_squares": 586172.25, "variance": 206.4852}}}, "fields_meta": {"count": 9, "effective_fields": 9, "limit": -1, "offset": 0, "parent_optypes": {}, "preferred": 9, "provenances": {"flatline": 2}, "query_total": 9, "total": 9}, "input_fields": ["000000", "000001", "000002", "000003", "000005", "000006", "000007", "000008", "100008"], "juxtapose": false, "locale": "en_US", "missing_numeric_rows": 0, "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "name": "diabetes transf test", "name_options": "154 instances, 9 fields (2 categorical, 7 numeric)", "new_fields": [{"description": "", "generator": "(/ (f \"plasma glucose\") 2)", "label": "", "name": "glucose half"}], "number_of_anomalies": 0, "number_of_anomalyscores": 0, "number_of_associations": 0, "number_of_associationsets": 0, "number_of_batchanomalyscores": 0, "number_of_batchcentroids": 0, "number_of_batchpredictions": 1, "number_of_batchprojections": 0, "number_of_batchtopicdistributions": 0, "number_of_centroids": 0, "number_of_clusters": 0, "number_of_correlations": 0, "number_of_deepnets": 0, "number_of_ensembles": 0, "number_of_evaluations": 0, "number_of_forecasts": 0, "number_of_linearregressions": 0, "number_of_logisticregressions": 0, "number_of_models": 1, "number_of_optimls": 0, "number_of_pca": 0, "number_of_predictions": 0, "number_of_projections": 0, "number_of_statisticaltests": 0, "number_of_timeseries": 0, "number_of_topicdistributions": 0, "number_of_topicmodels": 0, "objective_field": {"column_number": 6, "datatype": "string", "generated": false, "id": "000008", "name": "diabetes", "optype": "categorical", "order": 6, "term_analysis": {"enabled": true}}, "optiml": null, "optiml_status": false, "origin_batch_dataset": null, "origin_batch_dataset_status": false, "origin_batch_model": null, "origin_batch_model_status": false, "origin_batch_resource": null, "origin_batch_status": false, "origin_dataset": "dataset/62e464778be2aa335a001548", "out_of_bag": false, "output_fields": [{"generator": "(all-but \"000005\")", "ids": ["000000", "000001", "000002", "000003", "000006", "000007", "000008", "100008"], "json_generator": ["all-but", "000005"], "names": ["pregnancies", "plasma glucose", "blood pressure", "triceps skin thickness", "diabetes pedigree", "age", "diabetes", "age_range"]}, {"description": "", "generator": "(/ (f \"plasma glucose\") 2)", "ids": ["100009"], "json_generator": ["/", ["f", "plasma glucose"], 2], "label": "", "name": "glucose half", "names": ["glucose half"]}], "price": 0.0, "private": true, "project": null, "refresh_field_types": false, "refresh_objective": false, "refresh_preferred": false, "replacement": false, "resource": "dataset/62e954f3aba2df1257001252", "row_offset": 0, "row_step": 1, "rows": 154, "sample_rate": 1.0, "shared": false, "size": 6006, "source": "source/62e2bd535687096969004656", "source_status": true, "sql_output_fields": [], "statisticaltest": null, "status": {"bytes": 6006, "code": 5, "elapsed": 2366, "extracted_count": 0, "field_errors": {}, "message": "The dataset has been created", "progress": 1, "row_format_errors": {"total": 0}, "serialized_rows": 154}, "subscription": true, "tags": [], "tde_download": {"code": 0, "excluded_input_fields": [], "input_fields": [], "message": "", "preview": []}, "term_limit": 1000, "timeseries": null, "timeseries_status": false, "type": 0, "updated": "2022-08-03T15:19:48.959000"}, "error": null} \ No newline at end of file diff --git a/bigml/tests/my_ensemble/model_59db76eb9b356c2c97004802.py b/bigml/tests/my_ensemble/model_59db76eb9b356c2c97004802.py index 45f6fa34..c63911e9 100644 --- a/bigml/tests/my_ensemble/model_59db76eb9b356c2c97004802.py +++ b/bigml/tests/my_ensemble/model_59db76eb9b356c2c97004802.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python + # -*- coding: utf-8 -*- def predict_final(prefix=None, assignment=None, diff --git a/bigml/tests/my_ensemble/model_59db76eb9b356c2c97004804.py b/bigml/tests/my_ensemble/model_59db76eb9b356c2c97004804.py index 4c79e585..32c47f4c 100644 --- a/bigml/tests/my_ensemble/model_59db76eb9b356c2c97004804.py +++ b/bigml/tests/my_ensemble/model_59db76eb9b356c2c97004804.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python + # -*- coding: utf-8 -*- def predict_final(prefix=None, assignment=None, diff --git a/bigml/tests/my_ensemble/model_59db76eb9b356c2c97004806.py b/bigml/tests/my_ensemble/model_59db76eb9b356c2c97004806.py index c97ec427..925cfb07 100644 --- a/bigml/tests/my_ensemble/model_59db76eb9b356c2c97004806.py +++ b/bigml/tests/my_ensemble/model_59db76eb9b356c2c97004806.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python + # -*- coding: utf-8 -*- def predict_final(prefix=None, assignment=None, diff --git a/bigml/tests/my_no_root_ensemble/ensemble.json b/bigml/tests/my_no_root_ensemble/ensemble.json new file mode 100644 index 00000000..99791a67 --- /dev/null +++ b/bigml/tests/my_no_root_ensemble/ensemble.json @@ -0,0 +1 @@ +{"code": 200, "resource": "ensemble/6182d9bfb3ef374f3c0031de", "location": "https://bigml.io/andromeda/ensemble/6182d9bfb3ef374f3c0031de", "object": {"boosting": null, "category": 0, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2021-11-03T18:49:35.307000", "creator": "merce_demo", "credits": 0.018146514892578125, "credits_per_prediction": 0.0, "dataset": "dataset/604f5f06cb4f96592d004959", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "distributions": [{"importance": [["000002", 0.72739], ["000003", 0.27261]], "predictions": {"categories": [["Iris-setosa", 54], ["Iris-versicolor", 49], ["Iris-virginica", 47]]}, "training": {"categories": [["Iris-setosa", 54], ["Iris-versicolor", 49], ["Iris-virginica", 47]]}}, {"importance": [["000002", 0.94621], ["000000", 0.02353], ["000003", 0.01864], ["000001", 0.01161]], "predictions": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 49], ["Iris-virginica", 51]]}, "training": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 49], ["Iris-virginica", 51]]}}, {"importance": [["000002", 0.93437], ["000003", 0.05554], ["000001", 0.01009]], "predictions": {"categories": [["Iris-setosa", 52], ["Iris-versicolor", 44], ["Iris-virginica", 54]]}, "training": {"categories": [["Iris-setosa", 52], ["Iris-versicolor", 44], ["Iris-virginica", 54]]}}], "ensemble": {"fields": {"000000": {"column_number": 0, "datatype": "double", "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "order": 3, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}}, "ensemble_sample": {"rate": 1.0, "replacement": true, "seed": "986709f2dd2340b1b860954a3ea806c3"}, "error_models": 0, "fields_meta": {"count": 5, "limit": 1000, "offset": 0, "query_total": 5, "total": 5}, "finished_models": 3, "focus_field": null, "focus_field_name": null, "importance": {"000000": 0.00784, "000001": 0.00723, "000002": 0.86933, "000003": 0.1156}, "input_fields": ["000000", "000001", "000002", "000003"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "models": ["model/6182d9c7f731fb7252001d37", "model/6182d9c7f731fb7252001d39", "model/6182d9c8f731fb7252001d3b"], "name": "iris", "name_options": "bootstrap decision forest, 512-node, 3-model, pruned, deterministic order", "node_threshold": 512, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_models": 3, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "000004", "objective_field_details": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "order": 4}, "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["000004"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": "project/604f5ee5c1c0000b90003cc5", "randomize": false, "range": null, "replacement": false, "resource": "ensemble/6182d9bfb3ef374f3c0031de", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4757, "source": "source/604f5ef647d775129e0011a7", "source_status": true, "split_candidates": 32, "split_field": null, "split_field_name": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 4644, "message": "The ensemble has been created", "progress": 1}, "subscription": false, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2021-11-03T18:49:44.894000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/tests/my_no_root_ensemble/model_6182d9c7f731fb7252001d37 b/bigml/tests/my_no_root_ensemble/model_6182d9c7f731fb7252001d37 new file mode 100644 index 00000000..70d044f6 --- /dev/null +++ b/bigml/tests/my_no_root_ensemble/model_6182d9c7f731fb7252001d37 @@ -0,0 +1 @@ +{"code": 200, "resource": "model/6182d9c7f731fb7252001d37", "location": "https://bigml.io/andromeda/model/6182d9c7f731fb7252001d37", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2021-11-03T18:49:43.557000", "creator": "merce_demo", "credits": 0.0, "credits_per_prediction": 0.0, "dataset": "dataset/604f5f06cb4f96592d004959", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": true, "ensemble_id": "6182d9bfb3ef374f3c0031de", "ensemble_index": 0, "excluded_fields": [], "fields_meta": {"count": 5, "limit": 1000, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000002", "000003"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["Iris-setosa", 54], ["Iris-versicolor", 49], ["Iris-virginica", 47]]}, "training": {"categories": [["Iris-setosa", 54], ["Iris-versicolor", 49], ["Iris-virginica", 47]]}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "order": 3, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000002", 0.72739], ["000003", 0.27261]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "preferred": true}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "preferred": true}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "preferred": true, "term_analysis": {"enabled": true}}}, "node_threshold": 512, "root": {"children": [{"children": [{"confidence": 0.91799, "count": 43, "id": 2, "objective_summary": {"categories": [["Iris-virginica", 43]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 1.75}}, {"children": [{"children": [{"confidence": 0.43849, "count": 3, "id": 5, "objective_summary": {"categories": [["Iris-virginica", 3]]}, "output": "Iris-virginica", "predicate": {"field": "000002", "operator": ">", "value": 5.45}}, {"children": [{"confidence": 0.43849, "count": 3, "id": 7, "objective_summary": {"categories": [["Iris-versicolor", 3]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": ">", "value": 1.55}}, {"confidence": 0.20654, "count": 1, "id": 8, "objective_summary": {"categories": [["Iris-virginica", 1]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": "<=", "value": 1.55}}], "confidence": 0.30064, "count": 4, "id": 6, "objective_summary": {"categories": [["Iris-versicolor", 3], ["Iris-virginica", 1]]}, "output": "Iris-versicolor", "predicate": {"field": "000002", "operator": "<=", "value": 5.45}}], "confidence": 0.25045, "count": 7, "id": 4, "objective_summary": {"categories": [["Iris-virginica", 4], ["Iris-versicolor", 3]]}, "output": "Iris-virginica", "predicate": {"field": "000002", "operator": ">", "value": 4.95}}, {"confidence": 0.92292, "count": 46, "id": 9, "objective_summary": {"categories": [["Iris-versicolor", 46]]}, "output": "Iris-versicolor", "predicate": {"field": "000002", "operator": "<=", "value": 4.95}}], "confidence": 0.82141, "count": 53, "id": 3, "objective_summary": {"categories": [["Iris-versicolor", 49], ["Iris-virginica", 4]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": "<=", "value": 1.75}}], "confidence": 0.41196, "count": 96, "id": 1, "objective_summary": {"categories": [["Iris-versicolor", 49], ["Iris-virginica", 47]]}, "output": "Iris-versicolor", "predicate": {"field": "000002", "operator": ">", "value": 2.45}}, {"confidence": 0.93358, "count": 54, "id": 10, "objective_summary": {"categories": [["Iris-setosa", 54]]}, "output": "Iris-setosa", "predicate": {"field": "000002", "operator": "<=", "value": 2.45}}], "confidence": 0.28756, "count": 150, "id": 0, "objective_summary": {"categories": [["Iris-setosa", 54], ["Iris-versicolor", 49], ["Iris-virginica", 47]]}, "output": "Iris-setosa", "predicate": true}}, "name": "iris - 0", "name_options": "512-node, pruned, deterministic order", "node_threshold": 512, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "000004", "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["000004"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": "project/604f5ee5c1c0000b90003cc5", "randomize": false, "range": null, "replacement": false, "resource": "model/6182d9c7f731fb7252001d37", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4757, "source": "source/604f5ef647d775129e0011a7", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 0, "message": "The model has been created", "progress": 0.0}, "subscription": false, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2021-11-03T18:49:43.918000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/tests/my_no_root_ensemble/model_6182d9c7f731fb7252001d37.py b/bigml/tests/my_no_root_ensemble/model_6182d9c7f731fb7252001d37.py new file mode 100644 index 00000000..e69de29b diff --git a/bigml/tests/my_no_root_ensemble/model_6182d9c7f731fb7252001d39 b/bigml/tests/my_no_root_ensemble/model_6182d9c7f731fb7252001d39 new file mode 100644 index 00000000..dc36581c --- /dev/null +++ b/bigml/tests/my_no_root_ensemble/model_6182d9c7f731fb7252001d39 @@ -0,0 +1 @@ +{"code": 200, "resource": "model/6182d9c7f731fb7252001d39", "location": "https://bigml.io/andromeda/model/6182d9c7f731fb7252001d39", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2021-11-03T18:49:43.942000", "creator": "merce_demo", "credits": 0.0, "credits_per_prediction": 0.0, "dataset": "dataset/604f5f06cb4f96592d004959", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": true, "ensemble_id": "6182d9bfb3ef374f3c0031de", "ensemble_index": 1, "excluded_fields": [], "fields_meta": {"count": 5, "limit": 1000, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000002", "000003"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 49], ["Iris-virginica", 51]]}, "training": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 49], ["Iris-virginica", 51]]}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "order": 3, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000002", 0.94621], ["000000", 0.02353], ["000003", 0.01864], ["000001", 0.01161]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000000": {"column_number": 0, "datatype": "double", "name": "sepal length", "optype": "numeric", "preferred": true}, "000001": {"column_number": 1, "datatype": "double", "name": "sepal width", "optype": "numeric", "preferred": true}, "000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "preferred": true}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "preferred": true}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "preferred": true, "term_analysis": {"enabled": true}}}, "node_threshold": 512}, "name": "iris - 1", "name_options": "512-node, pruned, deterministic order", "node_threshold": 512, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "000004", "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["000004"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": "project/604f5ee5c1c0000b90003cc5", "randomize": false, "range": null, "replacement": false, "resource": "model/6182d9c7f731fb7252001d39", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4757, "source": "source/604f5ef647d775129e0011a7", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 0, "message": "The model has been created", "progress": 0.0}, "subscription": false, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2021-11-03T18:49:44.363000", "white_box": false}, "error": null} diff --git a/bigml/tests/my_no_root_ensemble/model_6182d9c8f731fb7252001d3b b/bigml/tests/my_no_root_ensemble/model_6182d9c8f731fb7252001d3b new file mode 100644 index 00000000..cd9211db --- /dev/null +++ b/bigml/tests/my_no_root_ensemble/model_6182d9c8f731fb7252001d3b @@ -0,0 +1 @@ +{"code": 200, "resource": "model/6182d9c8f731fb7252001d3b", "location": "https://bigml.io/andromeda/model/6182d9c8f731fb7252001d3b", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2021-11-03T18:49:44.383000", "creator": "merce_demo", "credits": 0.0, "credits_per_prediction": 0.0, "dataset": "dataset/604f5f06cb4f96592d004959", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": true, "ensemble_id": "6182d9bfb3ef374f3c0031de", "ensemble_index": 2, "excluded_fields": [], "fields_meta": {"count": 5, "limit": 1000, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000002", "000003"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["Iris-setosa", 52], ["Iris-versicolor", 44], ["Iris-virginica", 54]]}, "training": {"categories": [["Iris-setosa", 52], ["Iris-versicolor", 44], ["Iris-virginica", 54]]}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "order": 3, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000002", 0.93437], ["000003", 0.05554], ["000001", 0.01009]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000000": {"column_number": 0, "datatype": "double", "name": "sepal length", "optype": "numeric", "preferred": true}, "000001": {"column_number": 1, "datatype": "double", "name": "sepal width", "optype": "numeric", "preferred": true}, "000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "preferred": true}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "preferred": true}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "preferred": true, "term_analysis": {"enabled": true}}}, "node_threshold": 512, "root": {"children": [{"children": [{"confidence": 0.92865, "count": 50, "id": 2, "objective_summary": {"categories": [["Iris-virginica", 50]]}, "output": "Iris-virginica", "predicate": {"field": "000002", "operator": ">", "value": 4.95}}, {"children": [{"children": [{"confidence": 0.20654, "count": 1, "id": 5, "objective_summary": {"categories": [["Iris-versicolor", 1]]}, "output": "Iris-versicolor", "predicate": {"field": "000001", "operator": ">", "value": 3.1}}, {"confidence": 0.5101, "count": 4, "id": 6, "objective_summary": {"categories": [["Iris-virginica", 4]]}, "output": "Iris-virginica", "predicate": {"field": "000001", "operator": "<=", "value": 3.1}}], "confidence": 0.37553, "count": 5, "id": 4, "objective_summary": {"categories": [["Iris-virginica", 4], ["Iris-versicolor", 1]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 1.7}}, {"confidence": 0.91799, "count": 43, "id": 7, "objective_summary": {"categories": [["Iris-versicolor", 43]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": "<=", "value": 1.7}}], "confidence": 0.80446, "count": 48, "id": 3, "objective_summary": {"categories": [["Iris-versicolor", 44], ["Iris-virginica", 4]]}, "output": "Iris-versicolor", "predicate": {"field": "000002", "operator": "<=", "value": 4.95}}], "confidence": 0.45247, "count": 98, "id": 1, "objective_summary": {"categories": [["Iris-virginica", 54], ["Iris-versicolor", 44]]}, "output": "Iris-virginica", "predicate": {"field": "000002", "operator": ">", "value": 2.45}}, {"confidence": 0.93121, "count": 52, "id": 8, "objective_summary": {"categories": [["Iris-setosa", 52]]}, "output": "Iris-setosa", "predicate": {"field": "000002", "operator": "<=", "value": 2.45}}], "confidence": 0.28756, "count": 150, "id": 0, "objective_summary": {"categories": [["Iris-virginica", 54], ["Iris-setosa", 52], ["Iris-versicolor", 44]]}, "output": "Iris-virginica", "predicate": true}}, "name": "iris - 2", "name_options": "512-node, pruned, deterministic order", "node_threshold": 512, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "000004", "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["000004"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": "project/604f5ee5c1c0000b90003cc5", "randomize": false, "range": null, "replacement": false, "resource": "model/6182d9c8f731fb7252001d3b", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4757, "source": "source/604f5ef647d775129e0011a7", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 0, "message": "The model has been created", "progress": 0.0}, "subscription": false, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2021-11-03T18:49:44.881000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/tests/pipeline3.zip b/bigml/tests/pipeline3.zip new file mode 100644 index 00000000..aae690e8 Binary files /dev/null and b/bigml/tests/pipeline3.zip differ diff --git a/bigml/tests/read_association_steps.py b/bigml/tests/read_association_steps.py deleted file mode 100644 index 22084214..00000000 --- a/bigml/tests/read_association_steps.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2015-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from world import world -from nose.tools import eq_ -from bigml.api import HTTP_OK - -#@step(r'I get the association "(.*)"') -def i_get_the_association(step, resource): - resource = world.api.get_association(resource) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.association = resource['object'] diff --git a/bigml/tests/read_batch_prediction_steps.py b/bigml/tests/read_batch_prediction_steps.py deleted file mode 100644 index 7ef65323..00000000 --- a/bigml/tests/read_batch_prediction_steps.py +++ /dev/null @@ -1,43 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2015-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -from world import world -from nose.tools import eq_ - -from bigml.api import HTTP_OK - -#@step(r'I get the batch prediction "(.*)"') -def i_get_the_batch_prediction(step, batch_prediction): - resource = world.api.get_batch_prediction(batch_prediction) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.batch_prediction = resource['object'] - -#@step(r'I get the batch centroid "(.*)"') -def i_get_the_batch_centroid(step, batch_centroid): - resource = world.api.get_batch_centroid(batch_centroid) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.batch_centroid = resource['object'] - -#@step(r'I get the batch anomaly score "(.*)"') -def i_get_the_batch_anomaly_score(step, batch_anomaly_score): - resource = world.api.get_batch_anomaly_score(batch_anomaly_score) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.batch_anomaly_score = resource['object'] diff --git a/bigml/tests/read_batch_projection_steps.py b/bigml/tests/read_batch_projection_steps.py deleted file mode 100644 index ce7600eb..00000000 --- a/bigml/tests/read_batch_projection_steps.py +++ /dev/null @@ -1,29 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2018-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -from world import world -from nose.tools import eq_ - -from bigml.api import HTTP_OK - -#@step(r'I get the batch projection "(.*)"') -def i_get_the_batch_projection(step, batch_projection): - resource = world.api.get_batch_projection(batch_projection) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.batch_projection = resource['object'] diff --git a/bigml/tests/read_cluster_steps.py b/bigml/tests/read_cluster_steps.py deleted file mode 100644 index 5ea69fd8..00000000 --- a/bigml/tests/read_cluster_steps.py +++ /dev/null @@ -1,36 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2015-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -from world import world -from nose.tools import eq_ - -from bigml.api import HTTP_OK - -#@step(r'I get the cluster "(.*)"') -def i_get_the_cluster(step, cluster): - resource = world.api.get_cluster(cluster) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.cluster = resource['object'] - -#@step(r'I get the centroid "(.*)"') -def i_get_the_centroid(step, centroid): - resource = world.api.get_centroid(centroid) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.centroid = resource['object'] diff --git a/bigml/tests/read_configuration_steps.py b/bigml/tests/read_configuration_steps.py deleted file mode 100644 index da99e3d3..00000000 --- a/bigml/tests/read_configuration_steps.py +++ /dev/null @@ -1,29 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2017-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -from world import world -from nose.tools import eq_ - -from bigml.api import HTTP_OK - -#@step(r'I get the configuration "(.*)"') -def i_get_the_configuration(step, configuration): - resource = world.api.get_configuration(configuration) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.configuration = resource['object'] diff --git a/bigml/tests/read_correlation_steps.py b/bigml/tests/read_correlation_steps.py deleted file mode 100644 index 45356131..00000000 --- a/bigml/tests/read_correlation_steps.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2015-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from world import world -from nose.tools import eq_ -from bigml.api import HTTP_OK - -#@step(r'I get the correlation "(.*)"') -def i_get_the_correlation(step, resource): - resource = world.api.get_correlation(resource) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.correlation = resource['object'] diff --git a/bigml/tests/read_dataset_steps.py b/bigml/tests/read_dataset_steps.py index 5979f6cf..026b361c 100644 --- a/bigml/tests/read_dataset_steps.py +++ b/bigml/tests/read_dataset_steps.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,no-member # -# Copyright 2012-2019 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -17,33 +17,26 @@ import json -from world import world -from bigml.api import HTTP_OK from bigml.fields import Fields -from nose.tools import eq_, assert_not_equal +from .world import world, eq_, ok_ -#@step(r'I get the dataset "(.*)"') -def i_get_the_dataset(step, dataset): - resource = world.api.get_dataset(dataset) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.dataset = resource['object'] - -#@step(r'I ask for the missing values counts in the fields') def i_get_the_missing_values(step): + """Step: I ask for the missing values counts in the fields""" resource = world.dataset fields = Fields(resource['fields']) - world.step_result = fields.missing_counts() + step.bigml["result"] = fields.missing_counts() -#@step(r'I ask for the error counts in the fields') def i_get_the_errors_values(step): + """Step: I ask for the error counts in the fields """ resource = world.dataset - world.step_result = world.api.error_counts(resource) + step.bigml["result"] = world.api.error_counts(resource) -#@step(r'the (missing values counts|error counts) dict is "(.*)"') -def i_get_the_properties_values(step, text, properties_dict): - assert_not_equal(None, properties_dict) - eq_(world.step_result, json.loads(properties_dict)) +def i_get_the_properties_values(step, properties_dict): + """Step: the (missing values counts|error counts) dict + is + """ + ok_(properties_dict is not None) + eq_(step.bigml["result"], json.loads(properties_dict)) diff --git a/bigml/tests/read_ensemble_steps.py b/bigml/tests/read_ensemble_steps.py deleted file mode 100644 index cc39ba53..00000000 --- a/bigml/tests/read_ensemble_steps.py +++ /dev/null @@ -1,29 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2012-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -from world import world -from nose.tools import eq_ - -from bigml.api import HTTP_OK - -#@step(r'I get the ensemble "(.*)"') -def i_get_the_ensemble(step, ensemble): - resource = world.api.get_ensemble(ensemble) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.ensemble = resource['object'] diff --git a/bigml/tests/read_evaluation_steps.py b/bigml/tests/read_evaluation_steps.py deleted file mode 100644 index 1131e163..00000000 --- a/bigml/tests/read_evaluation_steps.py +++ /dev/null @@ -1,29 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2012-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -from world import world -from nose.tools import eq_ - -from bigml.api import HTTP_OK - -#@step(r'I get the evaluation "(.*)"') -def i_get_the_evaluation(step, evaluation): - resource = world.api.get_evaluation(evaluation) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.evaluation = resource['object'] diff --git a/bigml/tests/read_execution_steps.py b/bigml/tests/read_execution_steps.py deleted file mode 100644 index 9562008e..00000000 --- a/bigml/tests/read_execution_steps.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2015-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from world import world -from nose.tools import eq_ -from bigml.api import HTTP_OK - -#@step(r'I get the execution "(.*)"') -def i_get_the_execution(step, resource): - resource = world.api.get_execution(resource) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.execution = resource['object'] diff --git a/bigml/tests/read_forecast_steps.py b/bigml/tests/read_forecast_steps.py deleted file mode 100644 index 9465730d..00000000 --- a/bigml/tests/read_forecast_steps.py +++ /dev/null @@ -1,28 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2017-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -from world import world -from nose.tools import eq_ -from bigml.api import HTTP_OK - -#@step(r'I get the forecast "(.*)"') -def i_get_the_forecast(step, forecast): - resource = world.api.get_forecast(forecast) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.prediction = resource['object'] diff --git a/bigml/tests/read_lda_steps.py b/bigml/tests/read_lda_steps.py deleted file mode 100644 index 5ecaef51..00000000 --- a/bigml/tests/read_lda_steps.py +++ /dev/null @@ -1,36 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2016-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -from world import world -from nose.tools import eq_ - -from bigml.api import HTTP_OK - -#@step(r'I get the topic model "(.*)"') -def i_get_the_topic_model(step, topic_model): - resource = world.api.get_topic_model(topic_model) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.topic_model = resource['object'] - -#@step(r'I get the topic distribution "(.*)"') -def i_get_the_topic_distribution(step, topic_distribution): - resource = world.api.get_topic_distribution(topic_distribution) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.topic_distribution = resource['object'] diff --git a/bigml/tests/read_library_steps.py b/bigml/tests/read_library_steps.py deleted file mode 100644 index 4e6cb71c..00000000 --- a/bigml/tests/read_library_steps.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2015-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from world import world -from nose.tools import eq_ -from bigml.api import HTTP_OK - -#@step(r'I get the library "(.*)"') -def i_get_the_library(step, resource): - resource = world.api.get_library(resource) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.library = resource['object'] diff --git a/bigml/tests/read_linear_steps.py b/bigml/tests/read_linear_steps.py deleted file mode 100644 index 49cb4e8b..00000000 --- a/bigml/tests/read_linear_steps.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from world import world -from nose.tools import eq_ -from bigml.api import HTTP_OK - -#@step(r'I get the linear "(.*)"') -def i_get_the_linear_regression(step, resource): - resource = world.api.get_linear_regression(resource) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.linear_regression = resource['object'] diff --git a/bigml/tests/read_model_steps.py b/bigml/tests/read_model_steps.py deleted file mode 100644 index a2d8fb5c..00000000 --- a/bigml/tests/read_model_steps.py +++ /dev/null @@ -1,61 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2012-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -from world import world -from nose.tools import eq_ - -from bigml.api import HTTP_OK - -#@step(r'I get the model "(.*)"') -def i_get_the_model(step, model): - resource = world.api.get_model(model) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.model = resource['object'] - - -#@step(r'I get the logistic regression model "(.*)"') -def i_get_the_logistic_model(step, model): - resource = world.api.get_logistic_regression(model) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.logistic_regression = resource['object'] - - -#@step(r'I get the deepnet model "(.*)"') -def i_get_the_deepnet_model(step, deepnet): - resource = world.api.get_deepnet(deepnet) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.deepnet = resource['object'] - - -#@step(r'I get the optiml "(.*)"') -def i_get_the_optiml(step, optiml): - resource = world.api.get_optiml(optiml) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.optiml = resource['object'] - - -#@step(r'I get the fusion "(.*)"') -def i_get_the_fusion(step, fusion): - resource = world.api.get_fusion(fusion) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.fusion = resource['object'] diff --git a/bigml/tests/read_pca_steps.py b/bigml/tests/read_pca_steps.py deleted file mode 100644 index ced8eff4..00000000 --- a/bigml/tests/read_pca_steps.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2018-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from world import world -from nose.tools import eq_ -from bigml.api import HTTP_OK - -#@step(r'I get the pca "(.*)"') -def i_get_the_pca(step, resource): - resource = world.api.get_pca(resource) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.pca = resource['object'] diff --git a/bigml/tests/read_prediction_steps.py b/bigml/tests/read_prediction_steps.py deleted file mode 100644 index 7541fff3..00000000 --- a/bigml/tests/read_prediction_steps.py +++ /dev/null @@ -1,28 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2012-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -from world import world -from nose.tools import eq_ -from bigml.api import HTTP_OK - -#@step(r'I get the prediction "(.*)"') -def i_get_the_prediction(step, prediction): - resource = world.api.get_prediction(prediction) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.prediction = resource['object'] diff --git a/bigml/tests/read_project_steps.py b/bigml/tests/read_project_steps.py deleted file mode 100644 index 18938da0..00000000 --- a/bigml/tests/read_project_steps.py +++ /dev/null @@ -1,26 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2014-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from world import world -from nose.tools import eq_ -from bigml.api import HTTP_OK - -def i_get_the_project(step, resource): - resource = world.api.get_project(resource) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.project = resource['object'] diff --git a/bigml/tests/read_projection_steps.py b/bigml/tests/read_projection_steps.py deleted file mode 100644 index d4212f19..00000000 --- a/bigml/tests/read_projection_steps.py +++ /dev/null @@ -1,28 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2018-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -from world import world -from nose.tools import eq_ -from bigml.api import HTTP_OK - -#@step(r'I get the prediction "(.*)"') -def i_get_the_projection(step, projection): - resource = world.api.get_projection(projection) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.projection = resource['object'] diff --git a/bigml/tests/read_resource_steps.py b/bigml/tests/read_resource_steps.py new file mode 100644 index 00000000..bf702e04 --- /dev/null +++ b/bigml/tests/read_resource_steps.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import time + +from datetime import datetime + +from bigml.api import HTTP_OK, get_status, get_resource_type + +from .world import world, logged_wait, eq_, ok_ + + +def wait_until_status_code_is(code1, code2, secs, resource_info): + """Waits for the resource to be finished and stores the resulting full + info in the corresponding dictionary. Attention, resource_info is + modified + """ + + start = datetime.utcnow() + delta = int(secs) * world.delta + resource_info = world.get_minimal_resource( + resource_info['resource']).get("object") + status = get_status(resource_info) + count = 0 + while (status['code'] != int(code1) and + status['code'] != int(code2)): + count += 1 + resource_type = get_resource_type(resource_info["resource"]) + logged_wait(start, delta, count, resource_type, status=status) + ok_((datetime.utcnow() - start).seconds < delta) + resource_info = world.get_minimal_resource( + resource_info['resource']).get("object") + status = get_status(resource_info) + if status['code'] == int(code2): + world.errors.append(resource_info) + eq_(status['code'], int(code1)) + time.sleep(0.1) # added to avoid synch mongo issues + return i_get_the_resource(resource_info) + + +def i_get_the_resource(resource_info): + """Step: I get the resource """ + resource = world.get_maximal_resource(resource_info["resource"]) + world.status = resource['code'] + eq_(world.status, HTTP_OK) + return resource['object'] diff --git a/bigml/tests/read_sample_steps.py b/bigml/tests/read_sample_steps.py deleted file mode 100644 index cb0df080..00000000 --- a/bigml/tests/read_sample_steps.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2015-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from world import world -from nose.tools import eq_ -from bigml.api import HTTP_OK - -#@step(r'I get the sample "(.*)"') -def i_get_the_sample(step, resource): - resource = world.api.get_sample(resource) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.sample = resource['object'] diff --git a/bigml/tests/read_script_steps.py b/bigml/tests/read_script_steps.py deleted file mode 100644 index b293b16e..00000000 --- a/bigml/tests/read_script_steps.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2015-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from world import world -from nose.tools import eq_ -from bigml.api import HTTP_OK - -#@step(r'I get the script "(.*)"') -def i_get_the_script(step, resource): - resource = world.api.get_script(resource) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.script = resource['object'] diff --git a/bigml/tests/read_source_steps.py b/bigml/tests/read_source_steps.py deleted file mode 100644 index ed2c8ac6..00000000 --- a/bigml/tests/read_source_steps.py +++ /dev/null @@ -1,35 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2012-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from world import world -from nose.tools import eq_ -from bigml.api import HTTP_OK - -#@step(r'I get the source "(.*)"') -def i_get_the_source(step, resource): - resource = world.api.get_source(resource) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.source = resource['object'] - -#@step(r'the source has DEV (True|False)') -def source_has_dev(step, boolean): - if boolean == 'False': - boolean = '' - boolean = bool(boolean) - dev = world.source['dev'] - eq_(dev, boolean) diff --git a/bigml/tests/read_statistical_tst_steps.py b/bigml/tests/read_statistical_tst_steps.py deleted file mode 100644 index d4e733d3..00000000 --- a/bigml/tests/read_statistical_tst_steps.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2015-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from nose.tools import eq_ -from world import world -from bigml.api import HTTP_OK - -#@step(r'I get the test "(.*)"') -def i_get_the_tst(step, resource): - resource = world.api.get_statistical_test(resource) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.statistical_test = resource['object'] diff --git a/bigml/tests/read_time_series_steps.py b/bigml/tests/read_time_series_steps.py deleted file mode 100644 index 8ed8f61c..00000000 --- a/bigml/tests/read_time_series_steps.py +++ /dev/null @@ -1,29 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2017-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -from world import world -from nose.tools import eq_ - -from bigml.api import HTTP_OK - -#@step(r'I get the time series "(.*)"') -def i_get_the_time_series(step, time_series): - resource = world.api.get_time_series(time_series) - world.status = resource['code'] - eq_(world.status, HTTP_OK) - world.time_series = resource['object'] diff --git a/bigml/tests/test_01_prediction.py b/bigml/tests/test_01_prediction.py index bac33cf4..7a97fd6d 100644 --- a/bigml/tests/test_01_prediction.py +++ b/bigml/tests/test_01_prediction.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,249 +20,251 @@ """ Testing prediction creation """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_model_steps as model_create -import create_cluster_steps as cluster_create -import create_anomaly_steps as anomaly_create -import create_lda_steps as topic_create -import create_prediction_steps as prediction_create +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_cluster_steps as cluster_create +from . import create_anomaly_steps as anomaly_create +from . import create_lda_steps as topic_create +from . import create_prediction_steps as prediction_create -class TestPrediction(object): +class TestPrediction: + """Test predictions""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating a prediction: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - When I create a prediction for "" - Then the prediction for "" is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | objective | prediction | - | ../data/iris.csv | 10 | 10 | 10 | {"petal width": 0.5} | 000004 | Iris-setosa | - | ../data/iris_sp_chars.csv | 10 | 10 | 10 | {"pétal&width\u0000": 0.5} | 000004 | Iris-setosa | + Scenario 1: Successfully creating a prediction: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + When I create a prediction for "" + Then the prediction for "" is "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction"] examples = [ - ['data/iris.csv', '30', '30', '30', '{"petal width": 0.5}', '000004', 'Iris-setosa'], - ['data/iris_sp_chars.csv', '30', '30', '30', '{"pétal&width\u0000": 0.5}', '000004', 'Iris-setosa']] + ['data/iris.csv', '30', '30', '30', + '{"petal width": 0.5}', '000004', 'Iris-setosa'], + ['data/iris_sp_chars.csv', '30', '30', '30', + '{"pétal&width\\u0000": 0.5}', '000004', 'Iris-setosa']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) - prediction_create.i_create_a_prediction(self, example[4]) - prediction_create.the_prediction_is(self, example[5], example[6]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"], + shared=example["data"]) + source_create.the_source_is_finished(self, example["source_wait"], + shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) def test_scenario2(self): """ - Scenario: Successfully creating a prediction from a source in a remote location + Scenario 2: Successfully creating a prediction from a source in a remote location - Given I create a data source using the url "" - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - When I create a prediction for "" - Then the prediction for "" is "" + Given I create a data source using the url "" + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + When I create a prediction for "" + Then the prediction for "" is "" - Examples: - | url | time_1 | time_2 | time_3 | data_input | objective | prediction | - | s3://bigml-public/csv/iris.csv | 10 | 10 | 10 | {"petal width": 0.5} | 000004 | Iris-setosa | """ - print self.test_scenario2.__doc__ + show_doc(self.test_scenario2) + headers = ["url", "wait_source", "wait_dataset", "wait_model", + "input_data", "objective_id", "prediction"] examples = [ - ['s3://bigml-public/csv/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa']] + ['s3://bigml-public/csv/iris.csv', '10', '10', '10', + '{"petal width": 0.5}', '000004', 'Iris-setosa']] for example in examples: - print "\nTesting with:\n", example - source_create.i_create_using_url(self, example[0]) - source_create.the_source_is_finished(self, example[1]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_create_using_url(self, example["url"]) + source_create.the_source_is_finished(self, example["wait_source"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["wait_dataset"]) model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) - prediction_create.i_create_a_prediction(self, example[4]) - prediction_create.the_prediction_is(self, example[5], example[6]) + model_create.the_model_is_finished_in_less_than( + self, example["wait_model"]) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) def test_scenario3(self): """ - Scenario: Successfully creating a prediction from a asynchronous uploaded file: - Given I create a data source uploading a "" file in asynchronous mode - And I wait until the source has been created less than secs - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - When I create a prediction for "" - Then the prediction for "" is "" - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | data_input | objective | prediction | - | ../data/iris.csv | 10 | 10 | 10 | 10 | {"petal width": 0.5} | 000004 | Iris-setosa | + Scenario 3: Successfully creating a prediction from inline data source: + Given I create a data source from inline data slurped from "" + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + When I create a prediction for "" + Then the prediction for "" is "" """ - print self.test_scenario3.__doc__ + show_doc(self.test_scenario3) + headers = ["data", "wait_source", "wait_dataset", "wait_model", + "input_data", "objective_id", "prediction"] examples = [ - ['data/iris.csv', '10', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa']] + ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', + '000004', 'Iris-setosa']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file_async(self, example[0]) - source_create.the_source_has_been_created_async(self, example[1]) - source_create.the_source_is_finished(self, example[2]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_create_using_dict_data( + self, example["data"]) + source_create.the_source_is_finished(self, example["wait_source"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[3]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["wait_dataset"]) model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[4]) - prediction_create.i_create_a_prediction(self, example[5]) - prediction_create.the_prediction_is(self, example[6], example[7]) - + model_create.the_model_is_finished_in_less_than( + self, example["wait_model"]) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) def test_scenario4(self): """ - Scenario: Successfully creating a prediction from inline data source: - Given I create a data source from inline data slurped from "" - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - When I create a prediction for "" - Then the prediction for "" is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | objective | prediction | - | ../data/iris.csv | 10 | 10 | 10 | {"petal width": 0.5} | 000004 | Iris-setosa | + Scenario 4: Successfully creating a centroid and the associated dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster + And I wait until the cluster is ready less than secs + When I create a centroid for "" + And I check the centroid is ok + Then the centroid is "" + And I create a dataset from the cluster and the centroid + And I wait until the dataset is ready less than secs + And I check that the dataset is created for the cluster and the centroid """ - print self.test_scenario4.__doc__ + show_doc(self.test_scenario4) + headers = ["data", "wait_source", "wait_dataset", "wait_cluster", + "input_data", "centroid"] examples = [ - ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa']] + ['data/diabetes.csv', '10', '20', '20', + '{"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84,' + ' "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8,' + ' "diabetes pedigree": 0.551, "age": 31, "diabetes": "true"}', + 'Cluster 3']] for example in examples: - print "\nTesting with:\n", example - source_create.i_create_using_dict_data(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) - prediction_create.i_create_a_prediction(self, example[4]) - prediction_create.the_prediction_is(self, example[5], example[6]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished(self, example["wait_source"], + shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["wait_dataset"], shared=example["data"]) + cluster_create.i_create_a_cluster(self, shared=example["data"]) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["wait_cluster"], shared=example["data"]) + prediction_create.i_create_a_centroid(self, example["input_data"]) + prediction_create.the_centroid_is(self, example["centroid"]) def test_scenario5(self): """ - Scenario: Successfully creating a centroid and the associated dataset: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a cluster - And I wait until the cluster is ready less than secs - When I create a centroid for "" - And I check the centroid is ok - Then the centroid is "" - And I create a dataset from the cluster and the centroid - And I wait until the dataset is ready less than secs - And I check that the dataset is created for the cluster and the centroid - - Examples: - | data | time_1 | time_2 | time_3 | data_input | centroid | - | ../data/diabetes.csv | 10 | 20 | 20 | {"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": "true"} | Cluster 3 | + Scenario 5: Successfully creating an anomaly score: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an anomaly detector from a dataset + And I wait until the anomaly detector is ready less than secs + When I create an anomaly score for "" + Then the anomaly score is "" """ - print self.test_scenario5.__doc__ + show_doc(self.test_scenario5) + headers = ["data", "wait_source", "wait_dataset", "wait_anomaly", + "input_data", "score"] examples = [ - ['data/diabetes.csv', '10', '20', '20', '{"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": "true"}', 'Cluster 3']] + ['data/tiny_kdd.csv', '10', '10', '100', + '{"src_bytes": 350}', '0.92846'], + ['data/iris_sp_chars.csv', '10', '10', '100', + '{"pétal&width\\u0000": 300}', '0.89313']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - cluster_create.i_create_a_cluster(self) - cluster_create.the_cluster_is_finished_in_less_than(self, example[3]) - prediction_create.i_create_a_centroid(self, example[4]) - prediction_create.the_centroid_is(self, example[5]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["wait_source"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["wait_dataset"], shared=example["data"]) + anomaly_create.i_create_an_anomaly(self, shared=example["data"]) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["wait_anomaly"], shared=example["data"]) + prediction_create.i_create_an_anomaly_score( + self, example["input_data"]) + prediction_create.the_anomaly_score_is(self, example["score"]) def test_scenario6(self): """ - Scenario: Successfully creating an anomaly score: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create an anomaly detector from a dataset - And I wait until the anomaly detector is ready less than secs - When I create an anomaly score for "" - Then the anomaly score is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | score | - | ../data/tiny_kdd.csv | 10 | 10 | 100 | {"src_bytes": 350} | 0.92618 | - | ../data/iris_sp_chars.csv | 10 | 10 | 100 | {"pétal&width\u0000": 300} | 0.90198 | - """ - print self.test_scenario6.__doc__ - examples = [ - ['data/tiny_kdd.csv', '10', '10', '100', '{"src_bytes": 350}', '0.92846'], - ['data/iris_sp_chars.csv', '10', '10', '100', '{"pétal&width\u0000": 300}', '0.89313']] - for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - anomaly_create.i_create_an_anomaly(self) - anomaly_create.the_anomaly_is_finished_in_less_than(self, example[3]) - prediction_create.i_create_an_anomaly_score(self, example[4]) - prediction_create.the_anomaly_score_is(self, example[5]) - - - def test_scenario7(self): - """ - Scenario: Successfully creating a Topic Model: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I update the source with params "" - And I create a dataset - And I wait until the dataset is ready less than secs - When I create a Topic Model from a dataset - Then I wait until the Topic Model is ready less than secs - - Examples: - | data | time_1 | time_2 | time_3 | params - | ../data/movies.csv | 10 | 10 | 100 | {"fields": {"genre": {"optype": "items", "item_analysis": {"separator": "$"}}, "title": {"optype": "text"}}} + Scenario 6: Successfully creating a Topic Model: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + When I create a Topic Model from a dataset + Then I wait until the Topic Model is ready less than secs """ - print self.test_scenario7.__doc__ + show_doc(self.test_scenario6) + headers = ["data", "wait_source", "wait_dataset", "wait_topic", + "source_params"] examples = [ - ['data/movies.csv', '10', '10', '100', '{"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}, "000006": {"optype": "text"}}}']] + ['data/movies.csv', '10', '10', '100', + '{"fields": {"000007": {"optype": "items", "item_analysis":' + ' {"separator": "$"}}, "000006": {"optype": "text"}}}']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - source_create.i_update_source_with(self, data=example[4]) - source_create.the_source_is_finished(self, example[1]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished(self, example["wait_source"]) + source_create.i_update_source_with(self, example["source_params"]) + source_create.the_source_is_finished(self, example["wait_source"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["wait_dataset"]) topic_create.i_create_a_topic_model(self) - topic_create.the_topic_model_is_finished_in_less_than(self, example[3]) + topic_create.the_topic_model_is_finished_in_less_than( + self, example["wait_topic"]) diff --git a/bigml/tests/test_03_local_prediction.py b/bigml/tests/test_03_local_prediction.py index 04635920..e746accd 100644 --- a/bigml/tests/test_03_local_prediction.py +++ b/bigml/tests/test_03_local_prediction.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,44 +20,201 @@ """ Testing local prediction """ -from world import world, setup_module, teardown_module -import compare_predictions_steps as prediction_compare +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import compare_predictions_steps as prediction_compare +from . import create_ensemble_steps as ensemble_create +from . import create_prediction_steps as prediction_create -class TestLocalPrediction(object): +class TestLocalPrediction: + """Testing local predictions """ - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating a prediction from a local model in a json file: - Given I create a local model from a "" file - When I create a local prediction for "" with confidence - Then the local prediction is "" - And the local prediction's confidence is "" + Scenario 1: Successfully creating a prediction from a local model in a json file: + Given I create a local model from a "" file + When I create a local prediction for "" with confidence + Then the local prediction is "" + And the local prediction's confidence is "" + """ + show_doc(self.test_scenario1) + headers = ["file_path", "input_data", "prediction", "confidence"] + examples = [ + ['data/iris_model.json', '{"petal length": 0.5}', 'Iris-setosa', + '0.90594'], + ['data/iris_model.json', '{"petal length": "0.5"}', 'Iris-setosa', + '0.90594']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + prediction_compare.i_create_a_local_model_from_file( + self, example["file_path"]) + prediction_compare.i_create_a_local_prediction_with_confidence( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) + + def test_scenario2(self): + """ + Scenario 2: Successfully creating a prediction from a local model in a json file: + Given I create a local model using SupervisedModel from a "" file + When I create a local prediction for "" with confidence + Then the local prediction is "" + And the local prediction's confidence is "" + """ + show_doc(self.test_scenario2) + headers = ["file_path", "input_data", "prediction", "confidence"] + examples = [ + ['data/iris_model.json', '{"petal length": 0.5}', 'Iris-setosa', + '0.90594'], + ['data/iris_model.json', '{"petal length": "0.5"}', 'Iris-setosa', + '0.90594']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + prediction_compare.i_create_a_local_supervised_model_from_file( + self, example["file_path"]) + prediction_compare.i_create_a_local_prediction_with_confidence( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) + + + def test_scenario3(self): + """ + Scenario 3: Successfully creating a local prediction from an Ensemble created from file storage: + Given I create a local Ensemble from path "" + When I create a local ensemble prediction with confidence for "" + Then the local prediction is "" + And the local prediction's confidence is "" + And the local probabilities are "" + """ + show_doc(self.test_scenario3) + headers = ["file_path", "input_data", "prediction", "confidence", + "probabilities"] + examples = [ + ['bigml/tests/my_no_root_ensemble/ensemble.json', + '{"petal width": 0.5}', 'Iris-setosa', '0.3533', + '["0.3533", "0.31", "0.33666"]' ]] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + ensemble_create.create_local_ensemble( + self, path=example["file_path"]) + prediction_create.create_local_ensemble_prediction_probabilities( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) + prediction_compare.the_local_probabilities_are( + self, example["probabilities"]) + + def test_scenario4(self): + """ + Scenario 4: Successfully creating a local prediction from an Ensemble created from file storage: + Given I create a local SupervisedModel from path "" + When I create a local ensemble prediction with confidence for "" + Then the local prediction is "" + And the local prediction's confidence is "" + And the local probabilities are "" + """ + show_doc(self.test_scenario4) + headers = ["file_path", "input_data", "prediction", "confidence", + "probabilities"] + examples = [ + ['bigml/tests/my_no_root_ensemble/ensemble.json', + '{"petal width": 0.5}', 'Iris-setosa', '0.3533', + '["0.3533", "0.31", "0.33666"]' ]] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + prediction_compare.i_create_a_local_supervised_model_from_file( + self, example["file_path"]) + prediction_compare.i_create_a_local_prediction_with_confidence( + self, example["input_data"]) + prediction_compare.i_create_local_probabilities( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) + prediction_compare.the_local_probabilities_are( + self, example["probabilities"]) - Examples: - | model | data_input | prediction | confidence - | ../data/iris_model.json | {"petal length": 0.5} | Iris-setosa | 0.90594 + def test_scenario5(self): + """ + Scenario 5: Successfully creating a prediction from a local images deepnet in a json file: + Given I create a local deepnet from a "" file + When I create a local prediction for "" + Then the local prediction is "" + """ + show_doc(self.test_scenario5) + headers = ["file_path", "input_data", "operation_settings", + "prediction"] + examples = [ + ['data/imgs_deepnet.zip', "data/images/cats/pexels-pixabay-33358.jpg", + {"region_score_threshold": 0.7}, + ('{"prediction": [{"box": [0.68164, 0.30469, 0.79688, 0.36979], ' + '"label": "eye", "score": 0.79633}, ' + '{"box": [0.38086, 0.27865, 0.50391, 0.36068], ' + '"label": "eye", "score": 0.74563}]}')]] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + prediction_compare.i_create_a_local_deepnet_from_zip_file( + self, example["file_path"], + operation_settings=example["operation_settings"]) + prediction_compare.i_create_a_local_regions_prediction( + self, example["input_data"]) + prediction_compare.the_local_regions_prediction_is( + self, example["prediction"]) + def test_scenario6(self): + """ + Scenario 6: Successfully creating a prediction from a ShapWrapper of a model in a json file: + Given I create a local model using ShapWrapper from a "" file + When I create a local prediction for "" + Then the local prediction is "" + When I create a local probabilities prediction for "" + Then the local probabilities prediction is "" """ - print self.test_scenario1.__doc__ + import numpy as np + show_doc(self.test_scenario6) + headers = ["file_path", "numpy_input", "prediction", "proba_prediction"] examples = [ - ['data/iris_model.json', '{"petal length": 0.5}', 'Iris-setosa', '0.90594'], - ['data/iris_model.json', '{"petal length": "0.5"}', 'Iris-setosa', '0.90594']] + ['data/iris_model.json', np.asarray([np.asarray([0.5,1.0,1.0])]), + 0., [0.9818, 0.00921, 0.00899]]] for example in examples: - print "\nTesting with:\n", example - prediction_compare.i_create_a_local_model_from_file(self, example[0]) - prediction_compare.i_create_a_local_prediction_with_confidence(self, example[1]) - prediction_compare.the_local_prediction_is(self, example[2]) - prediction_compare.the_local_prediction_confidence_is(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + prediction_compare.i_create_a_local_shap_wrapper_from_file( + self, example["file_path"]) + prediction_compare.i_create_a_shap_local_prediction( + self, example["numpy_input"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.i_create_shap_local_probabilities( + self, example["numpy_input"]) + prediction_compare.the_local_proba_prediction_is( + self, example["proba_prediction"]) diff --git a/bigml/tests/test_04_multivote_prediction.py b/bigml/tests/test_04_multivote_prediction.py index 604b5a13..b66f5abd 100644 --- a/bigml/tests/test_04_multivote_prediction.py +++ b/bigml/tests/test_04_multivote_prediction.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,44 +20,41 @@ """ Testing MultiVote predictions """ -from world import world, setup_module, teardown_module -import compute_multivote_prediction_steps as multivote_prediction +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import compute_multivote_prediction_steps as multivote_prediction -class TestMultiVotePrediction(object): +class TestMultiVotePrediction: + """Testing MultiVote methods""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully computing predictions combinations: - Given I create a MultiVote for the set of predictions in file - When I compute the prediction with confidence using method "" - And I compute the prediction without confidence using method "" - Then the combined prediction is "" - And the combined prediction without confidence is "" - And the confidence for the combined prediction is - - Examples: - | predictions | method | prediction | confidence | - | ../data/predictions_c.json| 0 | a | 0.45047 | - | ../data/predictions_c.json| 1 | a | 0.55202 | - | ../data/predictions_c.json| 2 | a | 0.40363 | - | ../data/predictions_r.json| 0 | 1.55555556667 | 0.40008 | - | ../data/predictions_r.json| 1 | 1.59376845074 | 0.24837 | - | ../data/predictions_r.json| 2 | 1.55555556667 | 0.40008 | + Scenario 1: Successfully computing predictions combinations: + Given I create a MultiVote for the set of predictions in file + When I compute the prediction with confidence using method "" + And I compute the prediction without confidence using method "" + Then the combined prediction is "" + And the combined prediction without confidence is "" + And the confidence for the combined prediction is """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["predictions_file", "method", "prediction", "confidence"] examples = [ ['data/predictions_c.json', '0', 'a', '0.45047'], ['data/predictions_c.json', '1', 'a', '0.55202'], @@ -65,10 +63,17 @@ def test_scenario1(self): ['data/predictions_r.json', '1', '1.59376845074', '0.24837'], ['data/predictions_r.json', '2', '1.55555556667', '0.40008']] for example in examples: - print "\nTesting with:\n", example - multivote_prediction.i_create_a_multivote(self, example[0]) - multivote_prediction.compute_prediction(self, example[1]) - multivote_prediction.compute_prediction_no_confidence(self, example[1]) - multivote_prediction.check_combined_prediction(self, example[2]) - multivote_prediction.check_combined_prediction_no_confidence(self, example[2]) - multivote_prediction.check_combined_confidence(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + multivote_prediction.i_create_a_multivote( + self, example["predictions_file"]) + multivote_prediction.compute_prediction( + self, example["method"]) + multivote_prediction.compute_prediction_no_confidence( + self, example["method"]) + multivote_prediction.check_combined_prediction( + self, example["prediction"]) + multivote_prediction.check_combined_prediction_no_confidence( + self, example["prediction"]) + multivote_prediction.check_combined_confidence( + self, example["confidence"]) diff --git a/bigml/tests/test_05_compare_predictions.py b/bigml/tests/test_05_compare_predictions.py index 4455585d..7cebde55 100644 --- a/bigml/tests/test_05_compare_predictions.py +++ b/bigml/tests/test_05_compare_predictions.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,788 +20,556 @@ """ Comparing remote and local predictions """ -from world import world, setup_module, teardown_module, show_doc -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_model_steps as model_create -import create_association_steps as association_create -import create_cluster_steps as cluster_create -import create_anomaly_steps as anomaly_create -import create_prediction_steps as prediction_create -import compare_predictions_steps as prediction_compare -import create_lda_steps as topic_create +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_prediction_steps as prediction_create +from . import compare_predictions_steps as prediction_compare -class TestComparePrediction(object): +class TestComparePrediction: + """Comparing remote and local predictions""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully comparing predictions: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - And I create a local model - When I create a prediction for "" - Then the prediction for "" is "" - And I create a local prediction for "" - Then the local prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | objective | prediction | - - """ + Scenario 1: Successfully comparing predictions: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction"] examples = [ - ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa'], - ['data/iris.csv', '10', '10', '10', '{"petal length": 6, "petal width": 2}', '000004', 'Iris-virginica'], - ['data/iris.csv', '10', '10', '10', '{"petal length": 4, "petal width": 1.5}', '000004', 'Iris-versicolor'], - ['data/iris_sp_chars.csv', '10', '10', '10', '{"pétal.length": 4, "pétal&width\u0000": 1.5}', '000004', 'Iris-versicolor']] - show_doc(self.test_scenario1, examples) + ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', + '000004', 'Iris-setosa'], + ['data/iris.csv', '10', '10', '10', + '{"petal length": 6, "petal width": 2}', '000004', + 'Iris-virginica'], + ['data/iris.csv', '10', '10', '10', + '{"petal length": 4, "petal width": 1.5}', '000004', + 'Iris-versicolor'], + ['data/iris_sp_chars.csv', '10', '10', '10', + '{"pétal.length": 4, "pétal&width\\u0000": 1.5}', '000004', + 'Iris-versicolor']] + show_doc(self.test_scenario1) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"], + shared=example["data"]) + source_create.the_source_is_finished(self, example["source_wait"], + shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) prediction_compare.i_create_a_local_model(self) - prediction_create.i_create_a_prediction(self, example[4]) - prediction_create.the_prediction_is(self, example[5], example[6]) - prediction_compare.i_create_a_local_prediction(self, example[4]) - prediction_compare.the_local_prediction_is(self, example[6]) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) def test_scenario2(self): """ - Scenario: Successfully comparing predictions with text options: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I update the source with params "" - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - And I create a local model - When I create a prediction for "" - Then the prediction for "" is "" - And I create a local prediction for "" - Then the local prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | options | data_input | objective | prediction | - - """ + Scenario 2: Successfully comparing predictions with text options: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf", "input_data", "objective_id", "prediction"] examples = [ - ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile call"}', '000000', 'spam'], - ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'], - ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile calls"}', '000000', 'spam'], - ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'], - ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "Mobile call"}', '000000', 'spam'], - ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "A normal message"}', '000000', 'ham'], - ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}', '000000', 'spam'], - ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "Ok"}', '000000', 'ham'], - ['data/movies.csv', '20', '20', '30', '{"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}}', '{"genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student"}', '000009', '3.92135'], - ['data/text_missing.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{}', "000003", 'swap']] - show_doc(self.test_scenario2, examples) + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": true, "stem_words": true, ' + '"use_stopwords": false, "language": "en"}}}}', + '{"Message": "Mobile call"}', '000000', 'spam'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": true, "stem_words": true, ' + '"use_stopwords": false, "language": "en"}}}}', + '{"Message": "A normal message"}', '000000', 'ham'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": false, "stem_words": false, ' + '"use_stopwords": false, "language": "en"}}}}', + '{"Message": "Mobile calls"}', '000000', 'spam'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": false, "stem_words": false, ' + '"use_stopwords": false, "language": "en"}}}}', + '{"Message": "A normal message"}', '000000', 'ham'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": false, "stem_words": true, ' + '"use_stopwords": true, "language": "en"}}}}', + '{"Message": "Mobile call"}', '000000', 'spam'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": false, "stem_words": true, ' + '"use_stopwords": true, "language": "en"}}}}', + '{"Message": "A normal message"}', '000000', 'ham'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"token_mode": "full_terms_only", "language": "en"}}}}', + '{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every ' + 'week just txt NOKIA to 87077 Get txting and tell ur mates. zed ' + 'POBox 36504 W45WQ norm150p/tone 16+"}', '000000', 'spam'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"token_mode": "full_terms_only", "language": "en"}}}}', + '{"Message": "Ok"}', '000000', 'ham'], + ['data/movies.csv', '20', '20', '30', + '{"fields": {"000007": {"optype": "items", "item_analysis": ' + '{"separator": "$"}}}}', '{"genres": "Adventure$Action", ' + '"timestamp": 993906291, "occupation": "K-12 student"}', + '000009', '3.92135'], + ['data/text_missing.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"token_mode": "all", "language": "en"}}, "000000": {"optype": ' + '"text", "term_analysis": {"token_mode": "all", ' + '"language": "en"}}}}', '{}', "000003", 'swap']] + show_doc(self.test_scenario2) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - source_create.i_update_source_with(self, example[4]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) prediction_compare.i_create_a_local_model(self) - prediction_create.i_create_a_prediction(self, example[5]) - prediction_create.the_prediction_is(self, example[6], example[7]) - prediction_compare.i_create_a_local_prediction(self, example[5]) - prediction_compare.the_local_prediction_is(self, example[7]) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) def test_scenario3(self): """ - Scenario: Successfully comparing predictions with proportional missing strategy: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - And I create a local model - When I create a proportional missing strategy prediction for "" - Then the prediction for "" is "" - And the confidence for the prediction is "" - And I create a proportional missing strategy local prediction for "" - Then the local prediction is "" - And the local prediction's confidence is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | objective | prediction | confidence | - - """ + Scenario 3: Successfully comparing predictions with proportional missing strategy: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + When I create a proportional missing strategy prediction for "" + Then the prediction for "" is "" + And the confidence for the prediction is "" + And I create a proportional missing strategy local prediction for "" + Then the local prediction is "" + And the local prediction's confidence is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "confidence"] examples = [ - ['data/iris.csv', '50', '30', '30', '{}', '000004', 'Iris-setosa', '0.2629'], - ['data/grades.csv', '50', '30', '30', '{}', '000005', '68.62224', '27.5358'], - ['data/grades.csv', '50', '30', '30', '{"Midterm": 20}', '000005', '40.46667', '54.89713'], - ['data/grades.csv', '50', '30', '30', '{"Midterm": 20, "Tutorial": 90, "TakeHome": 100}', '000005', '28.06', '25.65806']] - show_doc(self.test_scenario3, examples) + ['data/iris.csv', '50', '30', '30', '{}', '000004', 'Iris-setosa', + '0.2629'], + ['data/grades.csv', '50', '30', '30', '{}', '000005', '68.62224', + '27.5358'], + ['data/grades.csv', '50', '30', '30', '{"Midterm": 20}', '000005', + '40.46667', '54.89713'], + ['data/grades.csv', '50', '30', '30', + '{"Midterm": 20, "Tutorial": 90, "TakeHome": 100}', '000005', + '28.06', '25.65806']] + show_doc(self.test_scenario3) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) prediction_compare.i_create_a_local_model(self) - prediction_create.i_create_a_proportional_prediction(self, example[4]) - prediction_create.the_prediction_is(self, example[5], example[6]) - prediction_create.the_confidence_is(self, example[7]) - prediction_compare.i_create_a_proportional_local_prediction(self, example[4]) - prediction_compare.the_local_prediction_is(self, example[6]) - prediction_compare.the_local_prediction_confidence_is(self, example[7]) + prediction_create.i_create_a_proportional_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_create.the_confidence_is(self, example["confidence"]) + prediction_compare.i_create_a_proportional_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) def test_scenario4(self): """ - Scenario: Successfully comparing predictions with proportional missing strategy for missing_splits models: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model with missing splits - And I wait until the model is ready less than secs - And I create a local model - When I create a proportional missing strategy prediction for "" - Then the prediction for "" is "" - And the confidence for the prediction is "" - And I create a proportional missing strategy local prediction for "" - Then the local prediction is "" - And the local prediction's confidence is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | objective | prediction | confidence | - """ + Scenario 4: Successfully comparing predictions with proportional missing strategy for missing_splits models: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with missing splits + And I wait until the model is ready less than secs + And I create a local model + When I create a proportional missing strategy prediction for "" + Then the prediction for "" is "" + And the confidence for the prediction is "" + And I create a proportional missing strategy local prediction for "" + Then the local prediction is "" + And the local prediction's confidence is "" + And the highest local prediction's confidence is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "confidence"] examples = [ - ['data/iris_missing2.csv', '10', '10', '10', '{"petal width": 1}', '000004', 'Iris-setosa', '0.8064'], - ['data/iris_missing2.csv', '10', '10', '10', '{"petal width": 1, "petal length": 4}', '000004', 'Iris-versicolor', '0.7847']] - show_doc(self.test_scenario4, examples) + ['data/iris_missing2.csv', '10', '10', '10', + '{"petal width": 1}', '000004', 'Iris-setosa', '0.8064'], + ['data/iris_missing2.csv', '10', '10', '10', + '{"petal width": 1, "petal length": 4}', '000004', + 'Iris-versicolor', '0.7847'], + ['data/missings_reg.csv', '10', '10', '10', '{"x2": 4}', + '000002', '1.33333', '1.62547'] +] + show_doc(self.test_scenario4) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) model_create.i_create_a_model_with_missing_splits(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) prediction_compare.i_create_a_local_model(self) - prediction_create.i_create_a_proportional_prediction(self, example[4]) - prediction_create.the_prediction_is(self, example[5], example[6]) - prediction_create.the_confidence_is(self, example[7]) - prediction_compare.i_create_a_proportional_local_prediction(self, example[4]) - prediction_compare.the_local_prediction_is(self, example[6]) - prediction_compare.the_local_prediction_confidence_is(self, example[7]) + prediction_create.i_create_a_proportional_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_create.the_confidence_is(self, example["confidence"]) + prediction_compare.i_create_a_proportional_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) + prediction_compare.the_highest_local_prediction_confidence_is( + self, example["input_data"], example["confidence"]) def test_scenario5(self): """ - Scenario: Successfully comparing logistic regression predictions: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a logistic regression model - And I wait until the logistic regression model is ready less than secs - And I create a local logistic regression model - When I create a logistic regression prediction for "" - Then the logistic regression prediction is "" - And I create a local logistic regression prediction for "" - Then the local logistic regression prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | prediction | - """ + Scenario 5: Successfully comparing logistic regression predictions: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression model + And I wait until the logistic regression model is ready less than secs + And I create a local logistic regression model + When I create a logistic regression prediction for "" + Then the logistic regression prediction is "" + And I create a local logistic regression prediction for "" + Then the local logistic regression prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "prediction"] examples = [ - ['data/iris.csv', '10', '10', '50', '{"petal width": 0.5, "petal length": 0.5, "sepal width": 0.5, "sepal length": 0.5}', 'Iris-versicolor'], - ['data/iris.csv', '10', '10', '50', '{"petal width": 2, "petal length": 6, "sepal width": 0.5, "sepal length": 0.5}', 'Iris-versicolor'], - ['data/iris.csv', '10', '10', '50', '{"petal width": 1.5, "petal length": 4, "sepal width": 0.5, "sepal length": 0.5}', 'Iris-versicolor'], - ['data/iris.csv', '10', '10', '50', '{"petal length": 1}', 'Iris-setosa'], - ['data/iris_sp_chars.csv', '10', '10', '50', '{"pétal.length": 4, "pétal&width\u0000": 1.5, "sépal&width": 0.5, "sépal.length": 0.5}', 'Iris-versicolor'], - ['data/price.csv', '10', '10', '50', '{"Price": 1200}', 'Product1']] - show_doc(self.test_scenario5, examples) + ['data/iris.csv', '10', '10', '50', + '{"petal width": 0.5, "petal length": 0.5, "sepal width": 0.5, ' + '"sepal length": 0.5}', 'Iris-versicolor'], + ['data/iris.csv', '10', '10', '50', + '{"petal width": 2, "petal length": 6, "sepal width": 0.5, ' + '"sepal length": 0.5}', 'Iris-versicolor'], + ['data/iris.csv', '10', '10', '50', + '{"petal width": 1.5, "petal length": 4, "sepal width": 0.5, ' + '"sepal length": 0.5}', 'Iris-versicolor'], + ['data/iris.csv', '10', '10', '50', + '{"petal length": 1}', 'Iris-setosa'], + ['data/iris_sp_chars.csv', '10', '10', '50', + '{"pétal.length": 4, "pétal&width\\u0000": 1.5, "sépal&width": ' + '0.5, "sépal.length": 0.5}', 'Iris-versicolor'], + ['data/price.csv', '10', '10', '50', '{"Price": 1200}', + 'Product1']] + show_doc(self.test_scenario5) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_logistic_model(self) - model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model( + self, shared=example["data"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) prediction_compare.i_create_a_local_logistic_model(self) - prediction_create.i_create_a_logistic_prediction(self, example[4]) - prediction_create.the_logistic_prediction_is(self, example[5]) - prediction_compare.i_create_a_local_prediction(self, example[4]) - prediction_compare.the_local_prediction_is(self, example[5]) + prediction_create.i_create_a_logistic_prediction( + self, example["input_data"]) + prediction_create.the_logistic_prediction_is( + self, example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) def test_scenario6(self): """ - Scenario: Successfully comparing predictions with text options: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I update the source with params "" - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a logistic regression model - And I wait until the logistic regression model is ready less than secs - And I create a local logistic regression model - When I create a logistic regression prediction for "" - Then the logistic regression prediction is "" - And I create a local logistic regression prediction for "" - Then the local logistic regression prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | options | data_input | prediction | - - """ + Scenario 6: Successfully comparing predictions with text options: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression model + And I wait until the logistic regression model is ready less than secs + And I create a local logistic regression model + When I create a logistic regression prediction for "" + Then the logistic regression prediction is "" + And I create a local logistic regression prediction for "" + Then the local logistic regression prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf", "input_data", "prediction"] examples = [ - ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile call"}', 'ham'], - ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', 'ham'], - ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "Mobile calls"}', 'ham'], - ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Message": "A normal message"}', 'ham'], - ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "Mobile call"}', 'ham'], - ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Message": "A normal message"}', 'ham'], - ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}', 'ham'], - ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Message": "Ok"}', 'ham']] - show_doc(self.test_scenario6, examples) + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": true, "stem_words": true, ' + '"use_stopwords": false, "language": "en"}}}}', + '{"Message": "Mobile call"}', 'ham'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": true, "stem_words": true, ' + '"use_stopwords": false, "language": "en"}}}}', + '{"Message": "A normal message"}', 'ham'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": false, "stem_words": false, ' + '"use_stopwords": false, "language": "en"}}}}', + '{"Message": "Mobile calls"}', 'ham'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": false, "stem_words": false, ' + '"use_stopwords": false, "language": "en"}}}}', + '{"Message": "A normal message"}', 'ham'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": false, "stem_words": true, ' + '"use_stopwords": true, "language": "en"}}}}', + '{"Message": "Mobile call"}', 'ham'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": false, "stem_words": true, ' + '"use_stopwords": true, "language": "en"}}}}', + '{"Message": "A normal message"}', 'ham'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"token_mode": "full_terms_only", "language": "en"}}}}', + '{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every ' + 'week just txt NOKIA to 87077 Get txting and tell ur mates. zed ' + 'POBox 36504 W45WQ norm150p/tone 16+"}', 'ham'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"token_mode": "full_terms_only", "language": "en"}}}}', + '{"Message": "Ok"}', 'ham']] + show_doc(self.test_scenario6) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - source_create.i_update_source_with(self, example[4]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) model_create.i_create_a_logistic_model(self) - model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) prediction_compare.i_create_a_local_logistic_model(self) - prediction_create.i_create_a_logistic_prediction(self, example[5]) - prediction_create.the_logistic_prediction_is(self, example[6]) - prediction_compare.i_create_a_local_prediction(self, example[5]) - prediction_compare.the_local_prediction_is(self, example[6]) - - - def test_scenario8(self): - """ - Scenario: Successfully comparing predictions with text options and proportional missing strategy: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I update the source with params "" - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - And I create a local model - When I create a proportional missing strategy prediction for "" - Then the prediction for "" is "" - And I create a proportional missing strategy local prediction for "" - Then the local prediction is "" - - Examples: - - """ + prediction_create.i_create_a_logistic_prediction( + self, example["input_data"]) + prediction_create.the_logistic_prediction_is( + self, example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + + def test_scenario7(self): + """ + Scenario 7: Successfully comparing predictions with text options and proportional missing strategy: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + When I create a proportional missing strategy prediction for "" + Then the prediction for "" is "" + And I create a proportional missing strategy local prediction for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf", "input_data", "objective_id", "prediction"] examples = [ - ['data/text_missing.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{}', "000003",'swap'], - ['data/text_missing.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}, "000000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', '{"category1": "a"}', "000003",'paperwork']] - show_doc(self.test_scenario8, examples) + ['data/text_missing.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"token_mode": "all", "language": "en"}}, "000000": {"optype": ' + '"text", "term_analysis": {"token_mode": "all", ' + '"language": "en"}}}}', '{}', "000003",'swap'], + ['data/text_missing.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"token_mode": "all", "language": "en"}}, "000000": {"optype": ' + '"text", "term_analysis": {"token_mode": "all", ' + '"language": "en"}}}}', '{"category1": "a"}', "000003", + 'paperwork']] + show_doc(self.test_scenario7) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - source_create.i_update_source_with(self, example[4]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) prediction_compare.i_create_a_local_model(self) - prediction_create.i_create_a_proportional_prediction(self, example[5]) - prediction_create.the_prediction_is(self, example[6], example[7]) - prediction_compare.i_create_a_proportional_local_prediction(self, example[5]) - prediction_compare.the_local_prediction_is(self, example[7]) - - - def test_scenario9(self): - """ - Scenario: Successfully comparing predictions with text options: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I update the source with params "" - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a logistic regression model with objective "" and parms "" - And I wait until the logistic regression model is ready less than secs - And I create a local logistic regression model - When I create a logistic regression prediction for "" - Then the logistic regression prediction is "" - And the logistic regression probability for the prediction is "" - And I create a local logistic regression prediction for "" - Then the local logistic regression prediction is "" - And the local logistic regression probability for the prediction is "" - - """ - examples = [ - ['data/iris.csv', '20', '20', '130', '{"fields": {"000000": {"optype": "categorical"}}}', '{"species": "Iris-setosa"}', '5.0', 0.0394, "000000", '{"field_codings": [{"field": "species", "coding": "dummy", "dummy_class": "Iris-setosa"}]}'], - ['data/iris.csv', '20', '20', '130', '{"fields": {"000000": {"optype": "categorical"}}}', '{"species": "Iris-setosa"}', '5.0', 0.051, "000000", '{"balance_fields": false, "field_codings": [{"field": "species", "coding": "contrast", "coefficients": [[1, 2, -1, -2]]}]}'], - ['data/iris.csv', '20', '20', '130', '{"fields": {"000000": {"optype": "categorical"}}}', '{"species": "Iris-setosa"}', '5.0', 0.051, "000000", '{"balance_fields": false, "field_codings": [{"field": "species", "coding": "other", "coefficients": [[1, 2, -1, -2]]}]}'], - ['data/iris.csv', '20', '20', '130', '{"fields": {"000000": {"optype": "categorical"}}}', '{"species": "Iris-setosa"}', '5.0', 0.0417, "000000", '{"bias": false}']] - show_doc(self.test_scenario9, examples) + prediction_create.i_create_a_proportional_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_proportional_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) - for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - source_create.i_update_source_with(self, example[4]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[9]) - model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) - prediction_compare.i_create_a_local_logistic_model(self) - prediction_create.i_create_a_logistic_prediction(self, example[5]) - prediction_create.the_logistic_prediction_is(self, example[6]) - prediction_create.the_logistic_probability_is(self, example[7]) - prediction_compare.i_create_a_local_prediction(self, example[5]) - prediction_compare.the_local_prediction_is(self, example[6]) - prediction_compare.the_local_probability_is(self, example[7]) - - def test_scenario10(self): - """ - Scenario: Successfully comparing predictions with proportional missing strategy and balanced models: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a balanced model - And I wait until the model is ready less than secs - And I create a local model - When I create a proportional missing strategy prediction for "" - Then the prediction for "" is "" - And the confidence for the prediction is "" - And I create a proportional missing strategy local prediction for "" - Then the local prediction is "" - And the local prediction's confidence is "" - And I create local probabilities for "" - Then the local probabilities are "" - Examples: - | data | time_1 | time_2 | time_3 | data_input | objective | prediction | confidence | - - """ - examples = [ - ['data/iris_unbalanced.csv', '10', '10', '10', '{}', '000004', 'Iris-setosa', '0.25284', '[0.33333, 0.33333, 0.33333]'], - ['data/iris_unbalanced.csv', '10', '10', '10', '{"petal length":1, "sepal length":1, "petal width": 1, "sepal width": 1}', '000004', 'Iris-setosa', '0.7575', '[1.0, 0.0, 0.0]']] - show_doc(self.test_scenario10, examples) - for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_balanced_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) - prediction_compare.i_create_a_local_model(self) - prediction_create.i_create_a_proportional_prediction(self, example[4]) - prediction_create.the_prediction_is(self, example[5], example[6]) - prediction_compare.i_create_a_proportional_local_prediction(self, example[4]) - prediction_compare.the_local_prediction_is(self, example[6]) - prediction_create.the_confidence_is(self, example[7]) - prediction_compare.the_local_prediction_confidence_is(self, example[7]) - prediction_compare.i_create_local_probabilities(self, example[4]) - prediction_compare.the_local_probabilities_are(self, example[8]) - - def test_scenario11(self): - """ - Scenario: Successfully comparing predictions for logistic regression with balance_fields: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I update the source with params "" - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a logistic regression model with objective "" and flags - And I wait until the logistic regression model is ready less than secs - And I create a local logistic regression model - When I create a logistic regression prediction for "" - Then the logistic regression prediction is "" - And the logistic regression probability for the prediction is "" - And I create a local logistic regression prediction for "" - Then the local logistic regression prediction is "" - And the local logistic regression probability for the prediction is "" - - Examples: - | data | time_1 | time_2 | objective | time_3 | options | data_input | prediction | probability - - """ - examples = [ - ['data/movies.csv', '20', '20', '180', '{"fields": {"000000": {"name": "user_id", "optype": "numeric"},' - ' "000001": {"name": "gender", "optype": "categorical"},' - ' "000002": {"name": "age_range", "optype": "categorical"},' - ' "000003": {"name": "occupation", "optype": "categorical"},' - ' "000004": {"name": "zipcode", "optype": "numeric"},' - ' "000005": {"name": "movie_id", "optype": "numeric"},' - ' "000006": {"name": "title", "optype": "text"},' - ' "000007": {"name": "genres", "optype": "items",' - '"item_analysis": {"separator": "$"}},' - '"000008": {"name": "timestamp", "optype": "numeric"},' - '"000009": {"name": "rating", "optype": "categorical"}},' - '"source_parser": {"separator": ";"}}', '{"timestamp": "999999999"}', '4', 0.4079, "000009", '{"balance_fields": false}'], - ['data/movies.csv', '20', '20', '180', '{"fields": {"000000": {"name": "user_id", "optype": "numeric"},' - ' "000001": {"name": "gender", "optype": "categorical"},' - ' "000002": {"name": "age_range", "optype": "categorical"},' - ' "000003": {"name": "occupation", "optype": "categorical"},' - ' "000004": {"name": "zipcode", "optype": "numeric"},' - ' "000005": {"name": "movie_id", "optype": "numeric"},' - ' "000006": {"name": "title", "optype": "text"},' - ' "000007": {"name": "genres", "optype": "items",' - '"item_analysis": {"separator": "$"}},' - '"000008": {"name": "timestamp", "optype": "numeric"},' - '"000009": {"name": "rating", "optype": "categorical"}},' - '"source_parser": {"separator": ";"}}', '{"timestamp": "999999999"}', '4', 0.2547, "000009", '{"normalize": true}'], - ['data/movies.csv', '20', '20', '180', '{"fields": {"000000": {"name": "user_id", "optype": "numeric"},' - ' "000001": {"name": "gender", "optype": "categorical"},' - ' "000002": {"name": "age_range", "optype": "categorical"},' - ' "000003": {"name": "occupation", "optype": "categorical"},' - ' "000004": {"name": "zipcode", "optype": "numeric"},' - ' "000005": {"name": "movie_id", "optype": "numeric"},' - ' "000006": {"name": "title", "optype": "text"},' - ' "000007": {"name": "genres", "optype": "items",' - '"item_analysis": {"separator": "$"}},' - '"000008": {"name": "timestamp", "optype": "numeric"},' - '"000009": {"name": "rating", "optype": "categorical"}},' - '"source_parser": {"separator": ";"}}', '{"timestamp": "999999999"}', '4', 0.2547, "000009", '{"balance_fields": true, "normalize": true}']] - show_doc(self.test_scenario11, examples) - for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - source_create.i_update_source_with(self, example[4]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[9]) - model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) - prediction_compare.i_create_a_local_logistic_model(self) - prediction_create.i_create_a_logistic_prediction(self, example[5]) - prediction_create.the_logistic_prediction_is(self, example[6]) - prediction_create.the_logistic_probability_is(self, example[7]) - prediction_compare.i_create_a_local_prediction(self, example[5]) - prediction_compare.the_local_prediction_is(self, example[6]) - prediction_compare.the_local_probability_is(self, example[7]) - - def test_scenario12(self): - """ - Scenario: Successfully comparing logistic regression predictions with constant fields: - - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I update the dataset with "" - And I wait until the dataset is ready less than secs - And I create a logistic regression model - And I wait until the logistic regression model is ready less than secs - And I create a local logistic regression model - When I create a logistic regression prediction for "" - Then the logistic regression prediction is "" - And I create a local logistic regression prediction for "" - Then the local logistic regression prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 |time_4| data_input | prediction | field_id - - """ - examples = [ - ['data/constant_field.csv', '10', '10', '50', '10','{"a": 1, "b": 1, "c": 1}', 'a', '{"fields": {"000000": {"preferred": true}}}']] - show_doc(self.test_scenario12, examples) - - for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - dataset_create.i_update_dataset_with(self, example[7]) - dataset_create.the_dataset_is_finished_in_less_than(self, example[4]) - model_create.i_create_a_logistic_model(self) - model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) - prediction_compare.i_create_a_local_logistic_model(self) - prediction_create.i_create_a_logistic_prediction(self, example[5]) - prediction_create.the_logistic_prediction_is(self, example[6]) - prediction_compare.i_create_a_local_prediction(self, example[5]) - prediction_compare.the_local_prediction_is(self, example[6]) - - def test_scenario13(self): - """ - Scenario: Successfully comparing predictions: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - And I create a local model - When I create a prediction for "" - Then the prediction for "" is "" - And I create a local prediction for "" - Then the local prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | objective | prediction | - - """ - examples = [ - ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa', "tmp/my_model.json", "my_test"], - ['data/iris.csv', '10', '10', '10', '{"petal length": 6, "petal width": 2}', '000004', 'Iris-virginica', "tmp/my_model.json", "my_test"], - ['data/iris.csv', '10', '10', '10', '{"petal length": 4, "petal width": 1.5}', '000004', 'Iris-versicolor', "tmp/my_model.json", "my_test"], - ['data/iris_sp_chars.csv', '10', '10', '10', '{"pétal.length": 4, "pétal&width\u0000": 1.5}', '000004', 'Iris-versicolor', "tmp/my_model_2.json", "my_test"]] - show_doc(self.test_scenario13, examples) - for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - args = '{"tags": ["%s"]}' % example[8] - model_create.i_create_a_model_with(self, data=args) - model_create.the_model_is_finished_in_less_than(self, example[3]) - model_create.i_export_model(self, False, example[7]) # no pmml - prediction_compare.i_create_a_local_model_from_file(self, example[7]) - prediction_create.i_create_a_prediction(self, example[4]) - prediction_create.the_prediction_is(self, example[5], example[6]) - prediction_compare.i_create_a_local_prediction(self, example[4]) - prediction_compare.the_local_prediction_is(self, example[6]) - model_create.i_export_tags_model(self, example[7], example[8]) - prediction_compare.i_create_a_local_model_from_file(self, example[7]) - prediction_compare.i_create_a_local_prediction(self, example[4]) - prediction_compare.the_local_prediction_is(self, example[6]) - - def test_scenario14(self): - """ - Scenario: Successfully comparing predictions with supervised model: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - And I create a local supervised model - When I create a prediction for "" - Then the prediction for "" is "" - And I create a local prediction for "" - Then the local prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | objective | prediction | - - """ - examples = [ - ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa'], - ['data/iris.csv', '10', '10', '10', '{"petal length": 6, "petal width": 2}', '000004', 'Iris-virginica'], - ['data/iris.csv', '10', '10', '10', '{"petal length": 4, "petal width": 1.5}', '000004', 'Iris-versicolor'], - ['data/iris_sp_chars.csv', '10', '10', '10', '{"pétal.length": 4, "pétal&width\u0000": 1.5}', '000004', 'Iris-versicolor']] - show_doc(self.test_scenario14, examples) - for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) - prediction_compare.i_create_a_local_supervised_model(self) - prediction_create.i_create_a_prediction(self, example[4]) - prediction_create.the_prediction_is(self, example[5], example[6]) - prediction_compare.i_create_a_local_prediction(self, example[4]) - prediction_compare.the_local_prediction_is(self, example[6]) - - - def test_scenario15(self): - """ - Scenario: Successfully comparing predictions with text options: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a logistic regression model with objective "" - And I wait until the logistic regression model is ready less than secs - And I create a local logistic regression model - When I create a logistic regression prediction for "" - Then the logistic regression prediction is "" - And the logistic regression probability for the prediction is "" - And I create a local logistic regression prediction for "" - Then the local logistic regression prediction is "" - And the local logistic regression probability for the prediction is "" - - Examples: - | data | time_1 | time_2 | objective | time_3 | options | data_input | prediction | probability - - """ - examples = [ - ['data/iris.csv', '20', '20', '180', '{"weight_field": "000000", "missing_numerics": false}', '{"petal width": 1.5, "petal length": 2, "sepal width":1}', 'Iris-versicolor', '0.9547', '000004']] - show_doc(self.test_scenario15, examples) - for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[4]) - model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) - prediction_compare.i_create_a_local_logistic_model(self) - prediction_create.i_create_a_logistic_prediction(self, example[5]) - prediction_create.the_logistic_prediction_is(self, example[6]) - prediction_create.the_logistic_probability_is(self, example[7]) - prediction_compare.i_create_a_local_prediction(self, example[5]) - prediction_compare.the_local_prediction_is(self, example[6]) - prediction_compare.the_local_probability_is(self, example[7]) - - def test_scenario16(self): - """ - Scenario: Successfully comparing remote and local predictions - with raw date input: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - And I create a local model - When I create a prediction for "" - Then the prediction for "" is "" - And I create a local prediction for "" - Then the local prediction is "" - Examples: - |data|time_1|time_2|time_3|data_input|objective|prediction| - """ - examples = [ - ['data/dates2.csv', '20', '20', '25', - '{"time-1": "1910-05-08T19:10:23.106", "cat-0":"cat2"}', - '000002', -1.01482], - ['data/dates2.csv', '20', '20', '25', - '{"time-1": "1920-06-30T20:21:20.320", "cat-0":"cat1"}', - '000002', 0.78406], - ['data/dates2.csv', '20', '20', '25', - '{"time-1": "1932-01-30T19:24:11.440", "cat-0":"cat2"}', - '000002', -0.98757], - ['data/dates2.csv', '20', '20', '25', - '{"time-1": "1950-11-06T05:34:05.252", "cat-0":"cat1"}', - '000002', 0.27538], - ['data/dates2.csv', '20', '20', '25', - '{"time-1": "1969-7-14 17:36", "cat-0":"cat2"}', - '000002', -0.06256], - ['data/dates2.csv', '20', '20', '25', - '{"time-1": "2001-01-05T23:04:04.693", "cat-0":"cat2"}', - '000002', 0.9832], - ['data/dates2.csv', '20', '20', '25', - '{"time-1": "2011-04-01T00:16:45.747", "cat-0":"cat2"}', - '000002', -0.5977], - ['data/dates2.csv', '20', '20', '25', - '{"time-1": "1969-W29-1T17:36:39Z", "cat-0":"cat1"}', - '000002', -0.06256], - ['data/dates2.csv', '20', '20', '25', - '{"time-1": "Mon Jul 14 17:36 +0000 1969", "cat-0":"cat1"}', - '000002', -0.06256]] - show_doc(self.test_scenario16, examples) - for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) - prediction_compare.i_create_a_local_model(self) - prediction_create.i_create_a_prediction(self, example[4]) - prediction_create.the_prediction_is(self, example[5], example[6]) - prediction_compare.i_create_a_local_prediction(self, example[4]) - prediction_compare.the_local_prediction_is(self, example[6]) - - def test_scenario17(self): - """ - Scenario: Successfully comparing remote and local predictions - with raw date input: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a logistic regression model - And I wait until the logistic regression is ready less - than secs - And I create a local logistic regression model - When I create a prediction for "" - Then the prediction for "" is "" - And the logistic regression probability for the prediction - is "" - And I create a local prediction for "" - Then the local prediction is "" - And the local logistic regression probability for the - prediction is "" - Examples: - |data|time_1|time_2|time_3|data_input|objective|prediction - |probability| + def test_scenario8(self): """ + Scenario 8: Successfully comparing predictions with text options: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression model with objective "" and parms "" + And I wait until the logistic regression model is ready less than secs + And I create a local logistic regression model + When I create a logistic regression prediction for "" + Then the logistic regression prediction is "" + And the logistic regression probability for the prediction is "" + And I create a local logistic regression prediction for "" + Then the local logistic regression prediction is "" + And the local logistic regression probability for the prediction is "" + + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf", "input_data", "prediction", "probability", + "objective_id", "model_conf"] examples = [ - ['data/dates2.csv', '20', '20', '45', - '{"time-1": "1910-05-08T19:10:23.106", "target-1":0.722}', - 'cat0', 0.75024], - ['data/dates2.csv', '20', '20', '45', - '{"time-1": "1920-06-30T20:21:20.320", "target-1":0.12}', - 'cat0', 0.75821], - ['data/dates2.csv', '20', '20', '45', - '{"time-1": "1932-01-30T19:24:11.440", "target-1":0.32}', - 'cat0', 0.71498], - ['data/dates2.csv', '20', '20', '45', - '{"time-1": "1950-11-06T05:34:05.252", "target-1":0.124}', - 'cat0', 0.775], - ['data/dates2.csv', '20', '20', '45', - '{"time-1": "1969-7-14 17:36", "target-1":0.784}', - 'cat0', 0.73663], - ['data/dates2.csv', '20', '20', '45', - '{"time-1": "2001-01-05T23:04:04.693", "target-1":0.451}', - 'cat0', 0.6822], - ['data/dates2.csv', '20', '20', '45', - '{"time-1": "2011-04-01T00:16:45.747", "target-1":0.42}', - 'cat0', 0.71107], - ['data/dates2.csv', '20', '20', '45', - '{"time-1": "1969-W29-1T17:36:39Z", "target-1":0.67}', - 'cat0', 0.73663], - ['data/dates2.csv', '20', '20', '45', - '{"time-1": "Mon Jul 14 17:36 +0000 1969", "target-1":0.005}', - 'cat0', 0.73663]] - show_doc(self.test_scenario17, examples) + ['data/iris.csv', '20', '20', '130', + '{"fields": {"000000": {"optype": "categorical"}}}', + '{"species": "Iris-setosa"}', '5.0', 0.0394, "000000", + '{"field_codings": [{"field": "species", "coding": "dummy", ' + '"dummy_class": "Iris-setosa"}]}'], + ['data/iris.csv', '20', '20', '130', '{"fields": {"000000": ' + '{"optype": "categorical"}}}', '{"species": "Iris-setosa"}', + '5.0', 0.051, "000000", '{"balance_fields": false, ' + '"field_codings": [{"field": "species", "coding": "contrast", ' + '"coefficients": [[1, 2, -1, -2]]}]}'], + ['data/iris.csv', '20', '20', '130', + '{"fields": {"000000": {"optype": "categorical"}}}', + '{"species": "Iris-setosa"}', '5.0', 0.051, "000000", + '{"balance_fields": false, "field_codings": [{"field": "species",' + ' "coding": "other", "coefficients": [[1, 2, -1, -2]]}]}'], + ['data/iris.csv', '20', '20', '130', + '{"fields": {"000000": {"optype": "categorical"}}}', + '{"species": "Iris-setosa"}', '5.0', 0.0417, "000000", + '{"bias": false}']] + show_doc(self.test_scenario8) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, - example[2]) - model_create.i_create_a_logistic_model(self) - model_create.the_logistic_model_is_finished_in_less_than(self, - example[3]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + model_create.i_create_a_logistic_model_with_objective_and_parms( + self, example["objective_id"], example["model_conf"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) prediction_compare.i_create_a_local_logistic_model(self) - prediction_create.i_create_a_logistic_prediction(self, example[4]) - prediction_create.the_logistic_prediction_is(self, example[5]) - prediction_create.the_logistic_probability_is(self, example[6]) - prediction_compare.i_create_a_local_prediction(self, example[4]) - prediction_compare.the_local_prediction_is(self, example[5]) - prediction_compare.the_local_probability_is(self, example[6]) + prediction_create.i_create_a_logistic_prediction( + self, example["input_data"]) + prediction_create.the_logistic_prediction_is( + self, example["prediction"]) + prediction_create.the_logistic_probability_is( + self, example["probability"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_probability_is( + self, example["probability"]) diff --git a/bigml/tests/test_05_compare_predictions_b.py b/bigml/tests/test_05_compare_predictions_b.py new file mode 100644 index 00000000..65097657 --- /dev/null +++ b/bigml/tests/test_05_compare_predictions_b.py @@ -0,0 +1,640 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Comparing remote and local predictions + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_prediction_steps as prediction_create +from . import compare_predictions_steps as prediction_compare + + +class TestComparePrediction: + """Testing local and remote predictions""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario10(self): + """ + Scenario: Successfully comparing predictions with proportional missing strategy and balanced models: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a balanced model + And I wait until the model is ready less than secs + And I create a local model + When I create a proportional missing strategy prediction for "" + Then the prediction for "" is "" + And the confidence for the prediction is "" + And I create a proportional missing strategy local prediction for "" + Then the local prediction is "" + And the local prediction's confidence is "" + And I create local probabilities for "" + Then the local probabilities are "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", + "confidence", "probabilities"] + examples = [ + ['data/iris_unbalanced.csv', '10', '10', '10', '{}', '000004', + 'Iris-setosa', '0.25284', '[0.33333, 0.33333, 0.33333]'], + ['data/iris_unbalanced.csv', '10', '10', '10', + '{"petal length":1, "sepal length":1, "petal width": 1, ' + '"sepal width": 1}', '000004', 'Iris-setosa', '0.7575', + '[1.0, 0.0, 0.0]']] + show_doc(self.test_scenario10) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"], + shared=example["data"]) + source_create.the_source_is_finished(self, example["source_wait"], + shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_balanced_model(self) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_model(self) + prediction_create.i_create_a_proportional_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_proportional_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_create.the_confidence_is( + self, example["confidence"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) + prediction_compare.i_create_local_probabilities( + self, example["input_data"]) + prediction_compare.the_local_probabilities_are( + self, example["probabilities"]) + + def test_scenario11(self): + """ + Scenario: Successfully comparing predictions for logistic regression with balance_fields: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression model with objective "" and flags + And I wait until the logistic regression model is ready less than secs + And I create a local logistic regression model + When I create a logistic regression prediction for "" + Then the logistic regression prediction is "" + And the logistic regression probability for the prediction is "" + And I create a local logistic regression prediction for "" + Then the local logistic regression prediction is "" + And the local logistic regression probability for the prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf", "input_data", "prediction", "probability", + "objective_id", "model_conf"] + examples = [ + ['data/movies.csv', '20', '20', '180', + '{"fields": {"000000": {"name": "user_id", "optype": "numeric"},' + ' "000001": {"name": "gender", "optype": "categorical"},' + ' "000002": {"name": "age_range", "optype": "categorical"},' + ' "000003": {"name": "occupation", "optype": "categorical"},' + ' "000004": {"name": "zipcode", "optype": "numeric"},' + ' "000005": {"name": "movie_id", "optype": "numeric"},' + ' "000006": {"name": "title", "optype": "text"},' + ' "000007": {"name": "genres", "optype": "items",' + '"item_analysis": {"separator": "$"}},' + '"000008": {"name": "timestamp", "optype": "numeric"},' + '"000009": {"name": "rating", "optype": "categorical"}},' + '"source_parser": {"separator": ";"}}', + '{"timestamp": "999999999"}', '4', 0.4079, "000009", + '{"balance_fields": false}'], + ['data/movies.csv', '20', '20', '180', + '{"fields": {"000000": {"name": "user_id", "optype": "numeric"},' + ' "000001": {"name": "gender", "optype": "categorical"},' + ' "000002": {"name": "age_range", "optype": "categorical"},' + ' "000003": {"name": "occupation", "optype": "categorical"},' + ' "000004": {"name": "zipcode", "optype": "numeric"},' + ' "000005": {"name": "movie_id", "optype": "numeric"},' + ' "000006": {"name": "title", "optype": "text"},' + ' "000007": {"name": "genres", "optype": "items",' + '"item_analysis": {"separator": "$"}},' + '"000008": {"name": "timestamp", "optype": "numeric"},' + '"000009": {"name": "rating", "optype": "categorical"}},' + '"source_parser": {"separator": ";"}}', + '{"timestamp": "999999999"}', '4', 0.2547, "000009", + '{"normalize": true}'], + ['data/movies.csv', '20', '20', '180', + '{"fields": {"000000": {"name": "user_id", "optype": "numeric"},' + ' "000001": {"name": "gender", "optype": "categorical"},' + ' "000002": {"name": "age_range", "optype": "categorical"},' + ' "000003": {"name": "occupation", "optype": "categorical"},' + ' "000004": {"name": "zipcode", "optype": "numeric"},' + ' "000005": {"name": "movie_id", "optype": "numeric"},' + ' "000006": {"name": "title", "optype": "text"},' + ' "000007": {"name": "genres", "optype": "items",' + '"item_analysis": {"separator": "$"}},' + '"000008": {"name": "timestamp", "optype": "numeric"},' + '"000009": {"name": "rating", "optype": "categorical"}},' + '"source_parser": {"separator": ";"}}', + '{"timestamp": "999999999"}', '4', 0.2547, "000009", + '{"balance_fields": true, "normalize": true}']] + show_doc(self.test_scenario11) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + model_create.i_create_a_logistic_model_with_objective_and_parms( + self, example["objective_id"], example["model_conf"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_logistic_model(self) + prediction_create.i_create_a_logistic_prediction( + self, example["input_data"]) + prediction_create.the_logistic_prediction_is( + self, example["prediction"]) + prediction_create.the_logistic_probability_is( + self, example["probability"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_probability_is( + self, example["probability"]) + + def test_scenario12(self): + """ + Scenario: Successfully comparing logistic regression predictions with constant fields: + + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I update the dataset with "" + And I wait until the dataset is ready less than secs + And I create a logistic regression model + And I wait until the logistic regression model is ready less than secs + And I create a local logistic regression model + When I create a logistic regression prediction for "" + Then the logistic regression prediction is "" + And I create a local logistic regression prediction for "" + Then the local logistic regression prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "prediction", "dataset_conf"] + examples = [ + ['data/constant_field.csv', '10', '20', '50', + '{"a": 1, "b": 1, "c": 1}', 'a', + '{"fields": {"000000": {"preferred": true}}}']] + show_doc(self.test_scenario12) + + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_create.i_update_dataset_with(self, example["dataset_conf"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + model_create.i_create_a_logistic_model(self) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_logistic_model(self) + prediction_create.i_create_a_logistic_prediction( + self, example["input_data"]) + prediction_create.the_logistic_prediction_is( + self, example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario13(self): + """ + Scenario: Successfully comparing predictions: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" + And I export the model with tags "" + And I create a local model from file "" + And I create a local prediction for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", + "model_file", "model_tags"] + examples = [ + ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', + '000004', 'Iris-setosa', "tmp/my_model.json", "my_test"], + ['data/iris.csv', '10', '10', '10', + '{"petal length": 6, "petal width": 2}', '000004', + 'Iris-virginica', "tmp/my_model.json", "my_test"], + ['data/iris.csv', '10', '10', '10', + '{"petal length": 4, "petal width": 1.5}', '000004', + 'Iris-versicolor', "tmp/my_model.json", "my_test"], + ['data/iris_sp_chars.csv', '10', '10', '10', + '{"pétal.length": 4, "pétal&width\\u0000": 1.5}', '000004', + 'Iris-versicolor', "tmp/my_model_2.json", "my_test"]] + show_doc(self.test_scenario13) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset( + self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + args = '{"tags": ["%s"]}' % example["model_tags"] + model_create.i_create_a_model_with(self, data=args) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_export_model( + self, False, example["model_file"]) # no pmml + prediction_compare.i_create_a_local_model_from_file( + self, example["model_file"]) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + model_create.i_export_tags_model( + self, example["model_file"], example["model_tags"]) + prediction_compare.i_create_a_local_model_from_file( + self, example["model_file"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario14(self): + """ + Scenario: Successfully comparing predictions with supervised model: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local supervised model + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction"] + examples = [ + ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', + '000004', 'Iris-setosa'], + ['data/iris.csv', '10', '10', '10', + '{"petal length": 6, "petal width": 2}', '000004', + 'Iris-virginica'], + ['data/iris.csv', '10', '10', '10', + '{"petal length": 4, "petal width": 1.5}', '000004', + 'Iris-versicolor'], + ['data/iris_sp_chars.csv', '10', '10', '10', + '{"pétal.length": 4, "pétal&width\\u0000": 1.5}', + '000004', 'Iris-versicolor']] + show_doc(self.test_scenario14) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_supervised_model(self) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario15(self): + """ + Scenario: Successfully comparing predictions with text options: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression model with objective "" and params "" + And I wait until the logistic regression model is ready less than secs + And I create a local logistic regression model + When I create a logistic regression prediction for "" + Then the logistic regression prediction is "" + And the logistic regression probability for the prediction is "" + And I create a local logistic regression prediction for "" + Then the local logistic regression prediction is "" + And the local logistic regression probability for the prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "model_conf", "input_data", "prediction", "probability", + "objective_id"] + examples = [ + ['data/iris.csv', '20', '20', '180', + '{"weight_field": "000000", "missing_numerics": false}', + '{"petal width": 1.5, "petal length": 2, "sepal width":1}', + 'Iris-versicolor', '0.9547', '000004']] + show_doc(self.test_scenario15) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished(self, example["source_wait"], + shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model_with_objective_and_parms( + self, example["objective_id"], example["model_conf"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_logistic_model(self) + prediction_create.i_create_a_logistic_prediction( + self, example["input_data"]) + prediction_create.the_logistic_prediction_is( + self, example["prediction"]) + prediction_create.the_logistic_probability_is( + self, example["probability"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_probability_is( + self, example["probability"]) + + def test_scenario16(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction"] + examples = [ + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "1910-05-08T19:10:23.106", "cat-0":"cat2"}', + '000002', -1.01482], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "1920-06-30T20:21:20.320", "cat-0":"cat1"}', + '000002', 0.78406], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "1932-01-30T19:24:11.440", "cat-0":"cat2"}', + '000002', -0.98757], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "1950-11-06T05:34:05.252", "cat-0":"cat1"}', + '000002', 0.27538], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "1969-7-14 17:36", "cat-0":"cat2"}', + '000002', -0.06256], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "2001-01-05T23:04:04.693", "cat-0":"cat2"}', + '000002', 0.9832], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "2011-04-01T00:16:45.747", "cat-0":"cat2"}', + '000002', -0.5977], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "1969-W29-1T17:36:39Z", "cat-0":"cat1"}', + '000002', -0.06256], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "Mon Jul 14 17:36 +0000 1969", "cat-0":"cat1"}', + '000002', -0.06256]] + show_doc(self.test_scenario16) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_model(self, pre_model=True) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"], + pre_model=self.bigml["local_pipeline"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario17(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression model + And I wait until the logistic regression is ready less + than secs + And I create a local logistic regression model + When I create a prediction for "" + Then the prediction is "" + And the logistic regression probability for the prediction + is "" + And I create a local prediction for "" + Then the local prediction is "" + And the local logistic regression probability for the + prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "prediction", "probability"] + examples = [ + ['data/dates2.csv', '20', '20', '45', + '{"time-1": "1910-05-08T19:10:23.106", "target-1":0.722}', + 'cat0', 0.75024], + ['data/dates2.csv', '20', '20', '45', + '{"time-1": "1920-06-30T20:21:20.320", "target-1":0.12}', + 'cat0', 0.75821], + ['data/dates2.csv', '20', '20', '45', + '{"time-1": "1932-01-30T19:24:11.440", "target-1":0.32}', + 'cat0', 0.71498], + ['data/dates2.csv', '20', '20', '45', + '{"time-1": "1950-11-06T05:34:05.252", "target-1":0.124}', + 'cat0', 0.775], + ['data/dates2.csv', '20', '20', '45', + '{"time-1": "1969-7-14 17:36", "target-1":0.784}', + 'cat0', 0.73663], + ['data/dates2.csv', '20', '20', '45', + '{"time-1": "2001-01-05T23:04:04.693", "target-1":0.451}', + 'cat0', 0.6822], + ['data/dates2.csv', '20', '20', '45', + '{"time-1": "2011-04-01T00:16:45.747", "target-1":0.42}', + 'cat0', 0.71107], + ['data/dates2.csv', '20', '20', '45', + '{"time-1": "1969-W29-1T17:36:39Z", "target-1":0.67}', + 'cat0', 0.73663], + ['data/dates2.csv', '20', '20', '45', + '{"time-1": "Mon Jul 14 17:36 +0000 1969", "target-1":0.005}', + 'cat0', 0.73663]] + show_doc(self.test_scenario17) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model( + self, shared=example["data"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_logistic_model(self, + pre_model=True) + prediction_create.i_create_a_logistic_prediction( + self, example["input_data"]) + prediction_create.the_logistic_prediction_is( + self, example["prediction"]) + prediction_create.the_logistic_probability_is( + self, example["probability"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"], + pre_model=self.bigml["local_pipeline"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_probability_is( + self, example["probability"]) + + def test_scenario18(self): + """ + Scenario: Successfully comparing predictions with proportional missing strategy for missing_splits models: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a weighted model with missing splits + And I wait until the model is ready less than secs + And I create a local model + When I create a proportional missing strategy prediction for "" + Then the prediction for "" is "" + And the confidence for the prediction is "" + And I create a proportional missing strategy local prediction for "" + Then the local prediction is "" + And the local prediction's confidence is "" + And the highest local prediction's confidence is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "confidence"] + examples = [ + ['data/missings_cat.csv', '10', '10', '10', '{"x2": 4}', + '000002', 'positive', '0.25241'] +] + show_doc(self.test_scenario18) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_weighted_model_with_missing_splits(self) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_model(self) + prediction_create.i_create_a_proportional_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_create.the_confidence_is( + self, example["confidence"]) + prediction_compare.i_create_a_proportional_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) + prediction_compare.the_highest_local_prediction_confidence_is( + self, example["input_data"], example["confidence"]) diff --git a/bigml/tests/test_06_batch_predictions.py b/bigml/tests/test_06_batch_predictions.py index 875ec5ce..89266f8b 100644 --- a/bigml/tests/test_06_batch_predictions.py +++ b/bigml/tests/test_06_batch_predictions.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,247 +20,300 @@ """ Creating batch predictions """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_model_steps as model_create -import create_ensemble_steps as ensemble_create -import create_cluster_steps as cluster_create -import create_anomaly_steps as anomaly_create -import create_batch_prediction_steps as batch_pred_create -import create_prediction_steps as prediction_create +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_ensemble_steps as ensemble_create +from . import create_cluster_steps as cluster_create +from . import create_anomaly_steps as anomaly_create +from . import create_batch_prediction_steps as batch_pred_create +from . import create_prediction_steps as prediction_create -class TestBatchPrediction(object): +class TestBatchPrediction: + """Testing Batch Prediction""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating a batch prediction: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - When I create a batch prediction for the dataset with the model - And I wait until the batch prediction is ready less than secs - And I download the created predictions file to "" - Then the batch prediction file is like "" - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | local_file | predictions_file | - | ../data/iris.csv | 30 | 30 | 50 | 50 | ./tmp/batch_predictions.csv |./data/batch_predictions.csv | - + Scenario: Successfully creating a batch prediction: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + When I create a batch prediction for the dataset with the model + And I wait until the batch prediction is ready less than secs + And I download the created predictions file to "" + Then the batch prediction file is like "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "batch_wait", "local_file", "predictions_file"] examples = [ - ['data/iris.csv', '30', '30', '50', '50', 'tmp/batch_predictions.csv', 'data/batch_predictions.csv']] + ['data/iris.csv', '30', '30', '50', '50', + 'tmp/batch_predictions.csv', 'data/batch_predictions.csv']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) batch_pred_create.i_create_a_batch_prediction(self) - batch_pred_create.the_batch_prediction_is_finished_in_less_than(self, example[4]) - batch_pred_create.i_download_predictions_file(self, example[5]) - batch_pred_create.i_check_predictions(self, example[6]) + batch_pred_create.the_batch_prediction_is_finished_in_less_than( + self, example["batch_wait"]) + batch_pred_create.i_download_predictions_file( + self, example["local_file"]) + batch_pred_create.i_check_predictions( + self, example["predictions_file"]) def test_scenario2(self): """ - Scenario: Successfully creating a batch prediction for an ensemble: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create an ensemble of models and tlp - And I wait until the ensemble is ready less than secs - When I create a batch prediction for the dataset with the ensemble and "" - And I wait until the batch prediction is ready less than secs - And I download the created predictions file to "" - Then the batch prediction file is like "" - - Examples: - | data | time_1 | time_2 | number_of_models | tlp | time_3 | time_4 | local_file | predictions_file | params - | ../data/iris.csv | 30 | 30 | 5 | 1 | 80 | 50 | ./tmp/batch_predictions.csv | ./data/batch_predictions_e.csv | {"combiner": 0} - - + Scenario: Successfully creating a batch prediction for an ensemble: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an ensemble of models + And I wait until the ensemble is ready less than secs + When I create a batch prediction for the dataset with the ensemble and "" + And I wait until the batch prediction is ready less than secs + And I download the created predictions file to "" + Then the batch prediction file is like "" """ - print self.test_scenario2.__doc__ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "number_of_models", + "model_wait", "batch_wait", "local_file", + "predictions_file", "batch_conf"] examples = [ - ['data/iris.csv', '30', '30', '5', '1', '180', '150', 'tmp/batch_predictions.csv', 'data/batch_predictions_e_c0.csv', {"combiner":0}], - ['data/iris.csv', '30', '30', '5', '1', '180', '150', 'tmp/batch_predictions.csv', 'data/batch_predictions_e_c1.csv', {"combiner":1, "confidence": True}], - ['data/iris.csv', '30', '30', '5', '1', '180', '150', 'tmp/batch_predictions.csv', 'data/batch_predictions_e_c2.csv', {"combiner":2, "confidence": True}], - ['data/iris.csv', '30', '30', '5', '1', '180', '150', 'tmp/batch_predictions.csv', 'data/batch_predictions_e_o_k_v.csv', {"operating_kind": "votes", "confidence": True}], - ['data/iris.csv', '30', '30', '5', '1', '180', '150', 'tmp/batch_predictions.csv', 'data/batch_predictions_e_o_k_p.csv', {"operating_kind": "probability", "probability": True}], - ['data/iris.csv', '30', '30', '5', '1', '180', '150', 'tmp/batch_predictions.csv', 'data/batch_predictions_e_o_k_c.csv', {"operating_kind": "confidence", "confidence": True}]] + ['data/iris.csv', '30', '30', '5', '180', '150', + 'tmp/batch_predictions.csv', 'data/batch_predictions_e_c0.csv', + {"combiner":0}], + ['data/iris.csv', '30', '30', '5', '180', '150', + 'tmp/batch_predictions.csv', 'data/batch_predictions_e_c1.csv', + {"combiner":1, "confidence": True}], + ['data/iris.csv', '30', '30', '5', '180', '150', + 'tmp/batch_predictions.csv', 'data/batch_predictions_e_c2.csv', + {"combiner":2, "confidence": True}], + ['data/iris.csv', '30', '30', '5', '180', '150', + 'tmp/batch_predictions.csv', 'data/batch_predictions_e_o_k_v.csv', + {"operating_kind": "votes", "confidence": True}], + ['data/iris.csv', '30', '30', '5', '180', '150', + 'tmp/batch_predictions.csv', 'data/batch_predictions_e_o_k_p.csv', + {"operating_kind": "probability", "probability": True}], + ['data/iris.csv', '30', '30', '5', '180', '150', + 'tmp/batch_predictions.csv', 'data/batch_predictions_e_o_k_c.csv', + {"operating_kind": "confidence", "confidence": True}]] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - ensemble_create.i_create_an_ensemble(self, example[3], example[4]) - ensemble_create.the_ensemble_is_finished_in_less_than(self, example[5]) - batch_pred_create.i_create_a_batch_prediction_ensemble(self, example[9]) - batch_pred_create.the_batch_prediction_is_finished_in_less_than(self, example[6]) - batch_pred_create.i_download_predictions_file(self, example[7]) - batch_pred_create.i_check_predictions(self, example[8]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_shared = "%s_%s" % (example["data"], + example["number_of_models"]) + ensemble_create.i_create_an_ensemble( + self, example["number_of_models"], + shared=ensemble_shared) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"], shared=ensemble_shared) + batch_pred_create.i_create_a_batch_prediction_ensemble( + self, example["batch_conf"]) + batch_pred_create.the_batch_prediction_is_finished_in_less_than( + self, example["batch_wait"]) + batch_pred_create.i_download_predictions_file( + self, example["local_file"]) + batch_pred_create.i_check_predictions( + self, example["predictions_file"]) def test_scenario3(self): """ - Scenario: Successfully creating a batch centroid from a cluster: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a cluster - And I wait until the cluster is ready less than secs - When I create a batch centroid for the dataset - And I check the batch centroid is ok - And I wait until the batch centroid is ready less than secs - And I download the created centroid file to "" - Then the batch centroid file is like "" - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | local_file | predictions_file | - | ../data/diabetes.csv | 50 | 50 | 50 | 50 | ./tmp/batch_predictions.csv |./data/batch_predictions_c.csv | - - + Scenario: Successfully creating a batch centroid from a cluster: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster + And I wait until the cluster is ready less than secs + When I create a batch centroid for the dataset + And I check the batch centroid is ok + And I wait until the batch centroid is ready less than secs + And I download the created centroid file to "" + Then the batch centroid file is like "" """ - print self.test_scenario3.__doc__ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "batch_wait", "local_file", "predictions_file"] examples = [ - ['data/diabetes.csv', '50', '50', '50', '50', 'tmp/batch_predictions.csv', 'data/batch_predictions_c.csv']] + ['data/diabetes.csv', '50', '50', '50', '50', + 'tmp/batch_predictions.csv', 'data/batch_predictions_c.csv']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) cluster_create.i_create_a_cluster(self) - cluster_create.the_cluster_is_finished_in_less_than(self, example[3]) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["model_wait"]) batch_pred_create.i_create_a_batch_prediction_with_cluster(self) - batch_pred_create.the_batch_centroid_is_finished_in_less_than(self, example[4]) - batch_pred_create.i_download_centroid_file(self, example[5]) - batch_pred_create.i_check_predictions(self, example[6]) + batch_pred_create.the_batch_centroid_is_finished_in_less_than( + self, example["batch_wait"]) + batch_pred_create.i_download_centroid_file( + self, example["local_file"]) + batch_pred_create.i_check_predictions( + self, example["predictions_file"]) def test_scenario4(self): """ - - Scenario: Successfully creating a source from a batch prediction: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - When I create a batch prediction for the dataset with the model - And I wait until the batch prediction is ready less than secs - Then I create a source from the batch prediction - And I wait until the source is ready less than secs - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | - | ../data/iris.csv | 30 | 30 | 50 | 50 | + Scenario: Successfully creating a source from a batch prediction: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + When I create a batch prediction for the dataset with the model + And I wait until the batch prediction is ready less than secs + Then I create a source from the batch prediction + And I wait until the source is ready less than secs """ - print self.test_scenario4.__doc__ + show_doc(self.test_scenario4) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "batch_wait"] examples = [ ['data/diabetes.csv', '30', '30', '50', '50']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) batch_pred_create.i_create_a_batch_prediction(self) - batch_pred_create.the_batch_prediction_is_finished_in_less_than(self, example[4]) + batch_pred_create.the_batch_prediction_is_finished_in_less_than( + self, example["batch_wait"]) batch_pred_create.i_create_a_source_from_batch_prediction(self) - source_create.the_source_is_finished(self, example[1]) + source_create.the_source_is_finished(self, example["source_wait"]) def test_scenario5(self): """ - Scenario: Successfully creating a batch anomaly score from an anomaly detector: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create an anomaly detector - And I wait until the anomaly detector is ready less than secs - When I create a batch anomaly score - And I check the batch anomaly score is ok - And I wait until the batch anomaly score is ready less than secs - And I download the created anomaly score file to "" - Then the batch anomaly score file is like "" - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | local_file | predictions_file | - | ../data/tiny_kdd.csv | 30 | 30 | 50 | 50 | ./tmp/batch_predictions.csv |./data/batch_predictions_a.csv | - + Scenario: Successfully creating a batch anomaly score from an anomaly detector: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an anomaly detector + And I wait until the anomaly detector is ready less than secs + When I create a batch anomaly score + And I check the batch anomaly score is ok + And I wait until the batch anomaly score is ready less than secs + And I download the created anomaly score file to "" + Then the batch anomaly score file is like "" """ - print self.test_scenario5.__doc__ + show_doc(self.test_scenario5) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "batch_wait", "local_file", "predictions_file"] examples = [ - ['data/tiny_kdd.csv', '30', '30', '50', '50', 'tmp/batch_predictions.csv', 'data/batch_predictions_a.csv']] + ['data/tiny_kdd.csv', '30', '30', '50', '50', + 'tmp/batch_predictions.csv', 'data/batch_predictions_a.csv']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - anomaly_create.i_create_an_anomaly(self) - anomaly_create.the_anomaly_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + anomaly_create.i_create_an_anomaly(self, shared=example["data"]) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) batch_pred_create.i_create_a_batch_prediction_with_anomaly(self) - batch_pred_create.the_batch_anomaly_score_is_finished_in_less_than(self, example[4]) - batch_pred_create.i_download_anomaly_score_file(self, example[5]) - batch_pred_create.i_check_predictions(self, example[6]) + batch_pred_create.the_batch_anomaly_score_is_finished_in_less_than( + self, example["batch_wait"]) + batch_pred_create.i_download_anomaly_score_file( + self, example["local_file"]) + batch_pred_create.i_check_predictions( + self, example["predictions_file"]) def test_scenario6(self): """ - Scenario: Successfully creating a batch prediction for a logistic regression: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a logistic regression - And I wait until the logistic regression is ready less than secs - When I create a batch prediction for the dataset with the logistic regression - And I wait until the batch prediction is ready less than secs - And I download the created predictions file to "" - Then the batch prediction file is like "" - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | local_file | predictions_file | - | ../data/iris.csv | 30 | 30 | 80 | 50 | ./tmp/batch_predictions.csv | ./data/batch_predictions_lr.csv | - + Scenario: Successfully creating a batch prediction for a logistic regression: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression + And I wait until the logistic regression is ready less than secs + When I create a batch prediction for the dataset with the logistic regression + And I wait until the batch prediction is ready less than secs + And I download the created predictions file to "" + Then the batch prediction file is like "" """ - print self.test_scenario6.__doc__ + show_doc(self.test_scenario6) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "batch_wait", "local_file", "predictions_file"] examples = [ - ['data/iris.csv', '30', '30', '80', '50', 'tmp/batch_predictions.csv', 'data/batch_predictions_lr.csv']] + ['data/iris.csv', '30', '30', '80', '50', + 'tmp/batch_predictions.csv', 'data/batch_predictions_lr.csv']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_logistic_model(self) - model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model( + self, shared=example["data"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) batch_pred_create.i_create_a_batch_prediction_logistic_model(self) - batch_pred_create.the_batch_prediction_is_finished_in_less_than(self, example[4]) - batch_pred_create.i_download_predictions_file(self, example[5]) - batch_pred_create.i_check_predictions(self, example[6]) + batch_pred_create.the_batch_prediction_is_finished_in_less_than( + self, example["batch_wait"]) + batch_pred_create.i_download_predictions_file( + self, example["local_file"]) + batch_pred_create.i_check_predictions( + self, example["predictions_file"]) diff --git a/bigml/tests/test_07_multimodel_batch_predictions.py b/bigml/tests/test_07_multimodel_batch_predictions.py index 19c2c034..a19ea4ca 100644 --- a/bigml/tests/test_07_multimodel_batch_predictions.py +++ b/bigml/tests/test_07_multimodel_batch_predictions.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,69 +20,75 @@ """ Creating Multimodel batch predictions """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_model_steps as model_create -import compare_predictions_steps as compare_pred +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import compare_predictions_steps as compare_pred -class TestMultimodelBatchPrediction(object): +class TestMultimodelBatchPrediction: + """Test MultiModel batch predictions""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating a batch prediction from a multi model: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I retrieve a list of remote models tagged with "" - And I create a local multi model - When I create a batch prediction for "" and save it in "" - And I combine the votes in "" - Then the plurality combined predictions are "" - And the confidence weighted predictions are "" - - Examples: - | data | time_1 | time_2 | time_3 | params | tag | data_input | path | predictions | - | ../data/iris.csv | 10 | 10 | 10 | {"tags":["mytag"]} | mytag | [{"petal width": 0.5}, {"petal length": 6, "petal width": 2}, {"petal length": 4, "petal width": 1.5}] | ./tmp | ["Iris-setosa", "Iris-virginica", "Iris-versicolor"] | - + Scenario: Successfully creating a batch prediction from a multi model: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I retrieve a list of remote models tagged with "" + And I create a local multi model + When I create a batch prediction for "" and save it in "" + And I combine the votes in "" + Then the plurality combined predictions are "" + And the confidence weighted predictions are "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "tags", "tag", "input_data", "path", "predictions"] examples = [ ['data/iris.csv', '10', '10', '10', '{"tags":["mytag"]}', 'mytag', '[{"petal width": 0.5}, {"petal length": 6, "petal width": 2}, {"petal length": 4, "petal width": 1.5}]', './tmp', '["Iris-setosa", "Iris-virginica", "Iris-versicolor"]']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model_with(self, example[4]) - model_create.the_model_is_finished_in_less_than(self, example[3]) - model_create.i_create_a_model_with(self, example[4]) - model_create.the_model_is_finished_in_less_than(self, example[3]) - model_create.i_create_a_model_with(self, example[4]) - model_create.the_model_is_finished_in_less_than(self, example[3]) - compare_pred.i_retrieve_a_list_of_remote_models(self, example[5]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model_with(self, example["tags"]) + model_create.the_model_is_finished_in_less_than(self, example["model_wait"]) + model_create.i_create_a_model_with(self, example["tags"]) + model_create.the_model_is_finished_in_less_than(self, example["model_wait"]) + model_create.i_create_a_model_with(self, example["tags"]) + model_create.the_model_is_finished_in_less_than(self, example["model_wait"]) + compare_pred.i_retrieve_a_list_of_remote_models(self, example["tag"]) compare_pred.i_create_a_local_multi_model(self) - compare_pred.i_create_a_batch_prediction(self, example[6], example[7]) - compare_pred.i_combine_the_votes(self, example[7]) - compare_pred.the_plurality_combined_prediction(self, example[8]) - compare_pred.the_confidence_weighted_prediction(self, example[8]) + compare_pred.i_create_a_batch_prediction(self, example["input_data"], example["path"]) + compare_pred.i_combine_the_votes(self, example["path"]) + compare_pred.the_plurality_combined_prediction(self, example["predictions"]) + compare_pred.the_confidence_weighted_prediction(self, example["predictions"]) diff --git a/bigml/tests/test_08_multimodel.py b/bigml/tests/test_08_multimodel.py index 20800b33..c9ac4d1b 100644 --- a/bigml/tests/test_08_multimodel.py +++ b/bigml/tests/test_08_multimodel.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,92 +20,106 @@ """ Creating model on lists of datasets """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_model_steps as model_create -import create_multimodel_steps as multimodel_create -import compare_predictions_steps as compare_pred +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_multimodel_steps as multimodel_create +from . import compare_predictions_steps as compare_pred -class TestMultimodel(object): +class TestMultimodel: + """Testing the MultiModel class methods""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating a model from a dataset list: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I store the dataset id in a list - And I create a dataset - And I wait until the dataset is ready less than secs - And I store the dataset id in a list - Then I create a model from a dataset list - And I wait until the model is ready less than secs - And I check the model stems from the original dataset list - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | - | ../data/iris.csv | 10 | 10 | 10 | 10 + Scenario: Successfully creating a model from a dataset list: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I store the dataset id in a list + And I create a dataset + And I wait until the dataset is ready less than secs + And I store the dataset id in a list + Then I create a model from a dataset list + And I wait until the model is ready less than secs + And I check the model stems from the original dataset list """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] examples = [ - ['data/iris.csv', '10', '10', '10', '10']] + ['data/iris.csv', '10', '10', '10']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) multimodel_create.i_store_dataset_id(self) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[3]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) multimodel_create.i_store_dataset_id(self) model_create.i_create_a_model_from_dataset_list(self) - model_create.the_model_is_finished_in_less_than(self, example[4]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) multimodel_create.i_check_model_datasets_and_datasets_ids(self) def test_scenario2(self): """ - Scenario: Successfully creating a model from a dataset list and predicting with it using median: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - And I create a local multi model - When I create a local multimodel batch prediction using median for - Then the local prediction is - - Examples: - | data | time_1 | time_2 | time_3 | input_data | prediction - | ../data/grades.csv | 10 | 10 | 10 | {'Tutorial': 99.47, 'Midterm': 53.12, 'TakeHome': 87.96} | 50 + Scenario: Successfully creating a model from a dataset list and predicting with it using median: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local multi model + When I create a local multimodel batch prediction using median for + Then the local prediction is """ - print self.test_scenario2.__doc__ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "prediction"] examples = [ - ['data/grades.csv', '30', '30', '30', '{"Tutorial": 99.47, "Midterm": 53.12, "TakeHome": 87.96}', 63.33]] + ['data/grades.csv', '30', '30', '30', + '{"Tutorial": 99.47, "Midterm": 53.12, "TakeHome": 87.96}', + 63.33]] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) world.list_of_models = [world.model] compare_pred.i_create_a_local_multi_model(self) - compare_pred.i_create_a_local_mm_median_batch_prediction(self, example[4]) - compare_pred.the_local_prediction_is(self, example[5]) + compare_pred.i_create_a_local_mm_median_batch_prediction( + self, example["input_data"]) + compare_pred.the_local_prediction_is(self, example["prediction"]) diff --git a/bigml/tests/test_09_ensemble_prediction.py b/bigml/tests/test_09_ensemble_prediction.py index fca8b80c..52b06872 100644 --- a/bigml/tests/test_09_ensemble_prediction.py +++ b/bigml/tests/test_09_ensemble_prediction.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,60 +20,77 @@ """ Creating ensembles predictions """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_ensemble_steps as ensemble_create -import create_prediction_steps as prediction_create +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_ensemble_steps as ensemble_create +from . import create_prediction_steps as prediction_create -class TestEnsemblePrediction(object): +class TestEnsemblePrediction: + """Testing Ensemble Predictions""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating a prediction from an ensemble: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create an ensemble of models and tlp - And I wait until the ensemble is ready less than secs - When I create an ensemble prediction for "" - And I wait until the prediction is ready less than secs - Then the prediction for "" is "" - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | number_of_models | tlp | data_input | objective | prediction | - | ../data/iris.csv | 10 | 10 | 50 | 20 | 5 | 1 | {"petal width": 0.5} | 000004 | Iris-versicolor | - | ../data/iris_sp_chars.csv | 10 | 10 | 50 | 20 | 5 | 1 | {"pétal&width\u0000": 0.5} | 000004 | Iris-versicolor | - | ../data/grades.csv | 10 | 10 | 150 | 20 | 10 | 1 | {"Assignment": 81.22, "Tutorial": 91.95, "Midterm": 79.38, "TakeHome": 105.93} | 000005 | 88.205575 | - | ../data/grades.csv | 10 | 10 | 150 | 20 | 10 | 1 | {"Assignment": 97.33, "Tutorial": 106.74, "Midterm": 76.88, "TakeHome": 108.89} | 000005 | 84.29401 | + Scenario: Successfully creating a prediction from an ensemble: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an ensemble of models + And I wait until the ensemble is ready less than secs + When I create an ensemble prediction for "" + And I wait until the prediction is ready less than secs + Then the prediction for "" is "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "prediction_wait", "number_of_models", "input_data", + "objective_id", "prediction"] examples = [ - ['data/iris.csv', '30', '30', '50', '20', '5', '1', '{"petal width": 0.5}', '000004', 'Iris-versicolor'], - ['data/iris_sp_chars.csv', '30', '30', '50', '20', '5', '1', '{"pétal&width\u0000": 0.5}', '000004', 'Iris-versicolor'], - ['data/grades.csv', '30', '30', '150', '20', '10', '1', '{"Assignment": 81.22, "Tutorial": 91.95, "Midterm": 79.38, "TakeHome": 105.93}', '000005', '84.556'], - ['data/grades.csv', '30', '30', '150', '20', '10', '1', '{"Assignment": 97.33, "Tutorial": 106.74, "Midterm": 76.88, "TakeHome": 108.89}', '000005', '73.13558']] + ['data/iris.csv', '30', '30', '50', '20', '5', + '{"petal width": 0.5}', '000004', 'Iris-versicolor'], + ['data/iris_sp_chars.csv', '30', '30', '50', '20', '5', + '{"pétal&width\\u0000": 0.5}', '000004', 'Iris-versicolor'], + ['data/grades.csv', '30', '30', '150', '20', '10', + '{"Assignment": 81.22, "Tutorial": 91.95, "Midterm": 79.38,' + ' "TakeHome": 105.93}', '000005', '84.556'], + ['data/grades.csv', '30', '30', '150', '20', '10', + '{"Assignment": 97.33, "Tutorial": 106.74, "Midterm": 76.88,' + ' "TakeHome": 108.89}', '000005', '73.13558']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - ensemble_create.i_create_an_ensemble(self, example[5], example[6]) - ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3]) - prediction_create.i_create_an_ensemble_prediction(self, example[7]) - prediction_create.the_prediction_is_finished_in_less_than(self, example[4]) - prediction_create.the_prediction_is(self, example[8], example[9]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_shared = "%s_%s" % (example["data"], + example["number_of_models"]) + ensemble_create.i_create_an_ensemble( + self, example["number_of_models"], shared=ensemble_shared) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"], shared=ensemble_shared) + prediction_create.i_create_an_ensemble_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is_finished_in_less_than( + self, example["prediction_wait"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) diff --git a/bigml/tests/test_10_local_ensemble_prediction.py b/bigml/tests/test_10_local_ensemble_prediction.py index 23cb79f6..2e35f1b0 100644 --- a/bigml/tests/test_10_local_ensemble_prediction.py +++ b/bigml/tests/test_10_local_ensemble_prediction.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,261 +20,267 @@ """ Creating local ensemble predictions """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_model_steps as model_create -import create_ensemble_steps as ensemble_create -import create_prediction_steps as prediction_create -import compare_predictions_steps as compare_pred +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_ensemble_steps as ensemble_create +from . import create_prediction_steps as prediction_create +from . import compare_predictions_steps as compare_pred -class TestEnsemblePrediction(object): - def setup(self): +class TestEnsemblePrediction: + """Testing local ensemble prediction""" + + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating a local prediction from an Ensemble: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create an ensemble of models and tlp - And I wait until the ensemble is ready less than secs - And I create a local Ensemble - When I create a local ensemble prediction with confidence for "" - Then the local prediction is "" - And the local prediction's confidence is "" - And the local probabilities are "" - - Examples: - | data | time_1 | time_2 | time_3 | number_of_models | tlp | data_input |prediction | confidence - | ../data/iris.csv | 10 | 10 | 50 | 5 | 1 | {"petal width": 0.5} | Iris-versicolor | 0.3687 | [0.3403, 0.4150, 0.2447] + Scenario: Successfully creating a local prediction from an Ensemble: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an ensemble of models + And I wait until the ensemble is ready less than secs + And I create a local Ensemble + When I create a local ensemble prediction with probabilities for "" + Then the local prediction is "" + And the local prediction's confidence is "" + And the local probabilities are "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "number_of_models", "input_data", + "prediction", "confidence", "probabilities"] examples = [ - ['data/iris.csv', '10', '10', '50', '5', '1', '{"petal width": 0.5}', 'Iris-versicolor', '0.415', '["0.3403", "0.4150", "0.2447"]' ]] + ['data/iris.csv', '10', '10', '50', '5', + '{"petal width": 0.5}', 'Iris-versicolor', '0.415', + '["0.3403", "0.4150", "0.2447"]' ]] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - ensemble_create.i_create_an_ensemble(self, example[4], example[5]) - ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_shared = "%s_%s" % (example["data"], + example["number_of_models"]) + ensemble_create.i_create_an_ensemble( + self, example["number_of_models"], shared=ensemble_shared) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"], shared=ensemble_shared) ensemble_create.create_local_ensemble(self) - prediction_create.create_local_ensemble_prediction_with_confidence(self, example[6]) - compare_pred.the_local_prediction_is(self, example[7]) - compare_pred.the_local_prediction_confidence_is(self, example[8]) - compare_pred.the_local_probabilities_are(self, example[9]) + prediction_create.create_local_ensemble_prediction_probabilities( + self, example["input_data"]) + compare_pred.the_local_prediction_is(self, example["prediction"]) + compare_pred.the_local_prediction_confidence_is( + self, example["confidence"]) + compare_pred.the_local_probabilities_are( + self, example["probabilities"]) def test_scenario2(self): """ - - Scenario: Successfully obtaining field importance from an Ensemble: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - When I create a local Ensemble with the last models - Then the field importance text is - - Examples: - | data | time_1 | time_2 |parms1 | time_3 |parms2 | time_4 |parms3| time_5 |number_of_models |field_importance - | ../data/iris.csv | 10 | 10 |{"input_fields": ["000000", "000001","000003", "000004"]} |20 |{"input_fields": ["000000", "000001","000002", "000004"]} | 20 |{"input_fields": ["000000", "000001","000002", "000003", "000004"]} | 20 | 3 |[["000002", 0.5269933333333333], ["000003", 0.38936], ["000000", 0.04662333333333333], ["000001", 0.037026666666666666]] + Scenario: Successfully obtaining field importance from an Ensemble: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + When I create a local Ensemble with the last models + Then the field importance text is """ - print self.test_scenario2.__doc__ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "model_conf1", "model_conf2", "model_conf3", + "number_of_models", "field_importance"] examples = [ - ['data/iris.csv', '10', '10', '{"input_fields": ["000000", "000001","000003", "000004"]}', '20', '{"input_fields": ["000000", "000001","000002", "000004"]}', '20', '{"input_fields": ["000000", "000001","000002", "000003", "000004"]}', '20', '3', '[["000002", 0.5269933333333333], ["000003", 0.38936], ["000000", 0.04662333333333333], ["000001", 0.037026666666666666]]']] + ['data/iris.csv', '10', '10', '20', + '{"input_fields": ["000000", "000001","000003", "000004"]}', + '{"input_fields": ["000000", "000001","000002", "000004"]}', + '{"input_fields": ["000000", "000001","000002", "000003",' + ' "000004"]}', '3', + '[["000002", 0.5269933333333333], ["000003", 0.38936],' + ' ["000000", 0.04662333333333333],' + '["000001", 0.037026666666666666]]']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model_with(self, example[3]) - model_create.the_model_is_finished_in_less_than(self, example[4]) - model_create.i_create_a_model_with(self, example[5]) - model_create.the_model_is_finished_in_less_than(self, example[6]) - model_create.i_create_a_model_with(self, example[7]) - model_create.the_model_is_finished_in_less_than(self, example[8]) - ensemble_create.create_local_ensemble_with_list(self, example[9]) - ensemble_create.field_importance_print(self, example[10]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model_with(self, example["model_conf1"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with(self, example["model_conf2"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with(self, example["model_conf3"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + ensemble_create.create_local_ensemble_with_list( + self, example["number_of_models"]) + ensemble_create.field_importance_print( + self, example["field_importance"]) def test_scenario3(self): """ - - Scenario: Successfully creating a local prediction from an Ensemble adding confidence: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create an ensemble of models and tlp - And I wait until the ensemble is ready less than secs - And I create a local Ensemble - When I create a local ensemble prediction for "" in JSON adding confidence - Then the local prediction is "" - And the local prediction's confidence is "" - - Examples: - | data | time_1 | time_2 | time_3 | number_of_models | tlp | data_input |prediction | confidence - | ../data/iris.csv | 10 | 10 | 50 | 5 | 1 | {"petal width": 0.5} | Iris-versicolor | 0.3687 + Scenario: Successfully creating a local prediction from an Ensemble adding confidence: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an ensemble of models + And I wait until the ensemble is ready less than secs + And I create a local Ensemble + When I create a local ensemble prediction for "" in JSON adding confidence + Then the local prediction is "" + And the local prediction's confidence is "" """ - print self.test_scenario3.__doc__ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "number_of_models", "input_data", "prediction", + "confidence"] examples = [ - ['data/iris.csv', '10', '10', '50', '5', '1', '{"petal width": 0.5}', 'Iris-versicolor', '0.415']] + ['data/iris.csv', '10', '10', '50', '5', + '{"petal width": 0.5}', 'Iris-versicolor', '0.415']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - ensemble_create.i_create_an_ensemble(self, example[4], example[5]) - ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_shared = "%s_%s" % (example["data"], + example["number_of_models"]) + ensemble_create.i_create_an_ensemble( + self, example["number_of_models"], shared=ensemble_shared) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"], shared=ensemble_shared) ensemble_create.create_local_ensemble(self) - prediction_create.create_local_ensemble_prediction_add_confidence(self, example[6]) - compare_pred.the_local_prediction_is(self, example[7]) - compare_pred.the_local_prediction_confidence_is(self, example[8]) + prediction_create.create_local_ensemble_prediction_add_confidence( + self, example["input_data"]) + compare_pred.the_local_prediction_is(self, example["prediction"]) + compare_pred.the_local_prediction_confidence_is( + self, example["confidence"]) def test_scenario4(self): """ - Scenario: Successfully obtaining field importance from an Ensemble created from local models: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - When I create a local Ensemble with the last local models - Then the field importance text is - - Examples: - | data | time_1 | time_2 |parms1 | time_3 |parms2 | time_4 |parms3| time_5 |number_of_models |field_importance - | ../data/iris.csv | 10 | 10 |{"input_fields": ["000000", "000001","000003", "000004"]} |20 |{"input_fields": ["000000", "000001","000002", "000004"]} | 20 |{"input_fields": ["000000", "000001","000002", "000003", "000004"]} | 20 | 3 |[["000002", 0.5269933333333333], ["000003", 0.38936], ["000000", 0.04662333333333333], ["000001", 0.037026666666666666]] + Scenario: Successfully obtaining field importance from an Ensemble created from local models: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + When I create a local Ensemble with the last local models + Then the field importance text is """ - print self.test_scenario4.__doc__ + show_doc(self.test_scenario4) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "model_conf1", "model_conf2", "model_conf3", + "number_of_models", "field_importance"] examples = [ - ['data/iris.csv', '10', '10', '{"input_fields": ["000000", "000001","000003", "000004"]}', '20', '{"input_fields": ["000000", "000001","000002", "000004"]}', '20', '{"input_fields": ["000000", "000001","000002", "000003", "000004"]}', '20', '3', '[["000002", 0.5269933333333333], ["000003", 0.38936], ["000000", 0.04662333333333333], ["000001", 0.037026666666666666]]']] + ['data/iris.csv', '10', '10', '30', + '{"input_fields": ["000000", "000001","000003", "000004"]}', + '{"input_fields": ["000000", "000001","000002", "000004"]}', + '{"input_fields": ["000000", "000001","000002", "000003",' + ' "000004"]}', '3', + '[["000002", 0.5269933333333333], ["000003", 0.38936],' + ' ["000000", 0.04662333333333333], ' + '["000001", 0.037026666666666666]]']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model_with(self, example[3]) - model_create.the_model_is_finished_in_less_than(self, example[4]) - model_create.i_create_a_model_with(self, example[5]) - model_create.the_model_is_finished_in_less_than(self, example[6]) - model_create.i_create_a_model_with(self, example[7]) - model_create.the_model_is_finished_in_less_than(self, example[8]) - ensemble_create.create_local_ensemble_with_list_of_local_models(self, example[9]) - ensemble_create.field_importance_print(self, example[10]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model_with(self, example["model_conf1"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with(self, example["model_conf2"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with(self, example["model_conf3"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + ensemble_create.create_local_ensemble_with_list_of_local_models( + self, example["number_of_models"]) + ensemble_create.field_importance_print( + self, example["field_importance"]) def test_scenario5(self): """ - Scenario: Successfully creating a local prediction from an Ensemble: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create an ensemble of models and tlp - And I wait until the ensemble is ready less than secs - And I create a local Ensemble - When I create a local ensemble prediction using median with confidence for "" - Then the local prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | number_of_models | tlp | data_input |prediction | - | ../data/grades.csv | 10 | 10 | 50 | 2 | 1 | {} | 67.5 | - """ - print self.test_scenario5.__doc__ - examples = [ - ['data/grades.csv', '30', '30', '50', '2', '1', '{}', 69.0934]] - for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - ensemble_create.i_create_an_ensemble(self, example[4], example[5]) - ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3]) - ensemble_create.create_local_ensemble(self) - prediction_create.create_local_ensemble_prediction_using_median_with_confidence(self, example[6]) - compare_pred.the_local_prediction_is(self, example[7]) - - def test_scenario6(self): - """ - Scenario: Successfully comparing predictions with raw date input: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create an ensemble - And I wait until the ensemble is ready less than secs - And I create a local ensemble - When I create a prediction for "" - Then the prediction for "" is "" - And I create a local prediction for "" - Then the local prediction is "" - Examples: - |data|time_1|time_2|time_3|data_input|objective|prediction| + Scenario: Successfully creating a local prediction from an Ensemble: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an ensemble of models + And I wait until the ensemble is ready less than secs + And I create a local Ensemble + When I create a local ensemble prediction using median with confidence for "" + Then the local prediction is "" """ + show_doc(self.test_scenario5) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "number_of_models", "input_data", "prediction"] examples = [ - ['data/dates2.csv', 10, 10, 50, - '{"time-1": "1910-05-08T19:10:23.106", "cat-0":"cat2"}', - '000002', -0.11052], - ['data/dates2.csv', 10, 10, 50, - '{"time-1": "1920-06-30T20:21:20.320", "cat-0":"cat1"}', - '000002', 0.79179], - ['data/dates2.csv', 10, 10, 50, - '{"time-1": "1932-01-30T19:24:11.450", "cat-0":"cat2"}', - '000002', -1.00834], - ['data/dates2.csv', 10, 10, 50, - '{"time-1": "1950-11-06T05:34:05.252", "cat-0":"cat1"}', - '000002', -0.14442], - ['data/dates2.csv', 10, 10, 50, - '{"time-1": "1969-7-14 17:36", "cat-0":"cat2"}', - '000002', -0.05469], - ['data/dates2.csv', 10, 10, 50, - '{"time-1": "2001-01-05T23:04:04.693", "cat-0":"cat2"}', - '000002', -0.23387], - ['data/dates2.csv', 10, 10, 50, - '{"time-1": "1969-W29-1T17:36:39Z", "cat-0":"cat1"}', - '000002', -0.05469], - ['data/dates2.csv', 10, 10, 50, - '{"time-1": "Mon Jul 14 17:36 +0000 1969", "cat-0":"cat1"}', - '000002', -0.05469]] - print self.test_scenario6.__doc__ + ['data/grades.csv', '30', '30', '50', '2', '{}', 69.0934]] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - ensemble_create.i_create_an_ensemble(self) - ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_shared = "%s_%s" % (example["data"], + example["number_of_models"]) + ensemble_create.i_create_an_ensemble( + self, example["number_of_models"], shared=ensemble_shared) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"], shared=ensemble_shared) ensemble_create.create_local_ensemble(self) - prediction_create.i_create_an_ensemble_prediction(self, example[4]) - prediction_create.the_prediction_is(self, example[5], example[6]) - prediction_create.create_local_ensemble_prediction_using_median_with_confidence(self, example[4]) - compare_pred.the_local_prediction_is(self, example[6]) + prediction_create.create_local_ensemble_prediction_using_median_with_confidence( + self, example["input_data"]) + compare_pred.the_local_prediction_is(self, example["prediction"]) diff --git a/bigml/tests/test_11_multimodel_prediction.py b/bigml/tests/test_11_multimodel_prediction.py index 77b44288..23021c1d 100644 --- a/bigml/tests/test_11_multimodel_prediction.py +++ b/bigml/tests/test_11_multimodel_prediction.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,113 +16,134 @@ # License for the specific language governing permissions and limitations # under the License. - """ Creating multimodel predictions """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_model_steps as model_create -import create_ensemble_steps as ensemble_create -import create_prediction_steps as prediction_create -import compare_predictions_steps as compare_pred +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_ensemble_steps as ensemble_create +from . import create_prediction_steps as prediction_create +from . import compare_predictions_steps as compare_pred -class TestMultimodelPrediction(object): +class TestMultimodelPrediction: + """Test MultiModel methods""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating a prediction from a multi model: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I retrieve a list of remote models tagged with "" - And I create a local multi model - When I create a local prediction for "" - Then the prediction for "" is "" - - Examples: - | data | time_1 | time_2 | time_3 | params | tag | data_input | prediction | - | ../data/iris.csv | 10 | 10 | 10 | {"tags":["mytag"]} | mytag | {"petal width": 0.5} | Iris-setosa | + Scenario: Successfully creating a prediction from a multi model: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I retrieve a list of remote models tagged with "" + And I create a local multi model + When I create a local prediction for "" + Then the prediction for "" is "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "tags", "tag", "input_data", "prediction"] examples = [ - ['data/iris.csv', '10', '10', '10', '{"tags":["mytag"]}', 'mytag', '{"petal width": 0.5}', 'Iris-setosa']] + ['data/iris.csv', '10', '10', '10', '{"tags":["mytag"]}', + 'mytag', '{"petal width": 0.5}', 'Iris-setosa']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model_with(self, example[4]) - model_create.the_model_is_finished_in_less_than(self, example[3]) - model_create.i_create_a_model_with(self, example[4]) - model_create.the_model_is_finished_in_less_than(self, example[3]) - model_create.i_create_a_model_with(self, example[4]) - model_create.the_model_is_finished_in_less_than(self, example[3]) - compare_pred.i_retrieve_a_list_of_remote_models(self, example[5]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished(self, example["source_wait"], + shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model_with(self, example["tags"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with(self, example["tags"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with(self, example["tags"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + compare_pred.i_retrieve_a_list_of_remote_models( + self, example["tag"]) compare_pred.i_create_a_local_multi_model(self) - compare_pred.i_create_a_local_prediction(self, example[6]) - compare_pred.the_local_prediction_is(self, example[7]) + compare_pred.i_create_a_local_prediction( + self, example["input_data"]) + compare_pred.the_local_prediction_is(self, example["prediction"]) def test_scenario2(self): """ - - Scenario: Successfully creating a local batch prediction from a multi model: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I retrieve a list of remote models tagged with "" - And I create a local multi model - When I create a batch multimodel prediction for "" - Then the predictions are "" - - Examples: - | data | time_1 | time_2 | time_3 | params | tag | data_inputs | predictions | - | ../data/iris.csv | 10 | 10 | 10 | {"tags":["mytag"]} | mytag | [{"petal width": 0.5}, {"petal length": 6, "petal width": 2}] | ["Iris-setosa", "Iris-virginica"] | + Scenario: Successfully creating a local batch prediction from a multi model: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I retrieve a list of remote models tagged with "" + And I create a local multi model + When I create a batch multimodel prediction for "" + Then the predictions are "" """ - print self.test_scenario2.__doc__ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "tags", "tag", "input_data", "predictions"] examples = [ - ['data/iris.csv', '10', '10', '10', '{"tags":["mytag"]}', 'mytag', '[{"petal width": 0.5}, {"petal length": 6, "petal width": 2}]', '["Iris-setosa", "Iris-virginica"]']] + ['data/iris.csv', '10', '10', '10', '{"tags":["mytag"]}', + 'mytag', '[{"petal width": 0.5}, {"petal length": 6, ' + '"petal width": 2}]', '["Iris-setosa", "Iris-virginica"]']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model_with(self, example[4]) - model_create.the_model_is_finished_in_less_than(self, example[3]) - model_create.i_create_a_model_with(self, example[4]) - model_create.the_model_is_finished_in_less_than(self, example[3]) - model_create.i_create_a_model_with(self, example[4]) - model_create.the_model_is_finished_in_less_than(self, example[3]) - compare_pred.i_retrieve_a_list_of_remote_models(self, example[5]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model_with(self, example["tags"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with(self, example["tags"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with(self, example["tags"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + compare_pred.i_retrieve_a_list_of_remote_models( + self, example["tag"]) compare_pred.i_create_a_local_multi_model(self) - compare_pred.i_create_a_batch_prediction_from_a_multi_model(self, example[6]) - compare_pred.the_batch_mm_predictions_are(self, example[7]) + compare_pred.i_create_a_batch_prediction_from_a_multi_model( + self, example["input_data"]) + compare_pred.the_batch_mm_predictions_are( + self, example["predictions"]) diff --git a/bigml/tests/test_12_public_model_prediction.py b/bigml/tests/test_12_public_model_prediction.py index fb31b9c7..cbfe2e36 100644 --- a/bigml/tests/test_12_public_model_prediction.py +++ b/bigml/tests/test_12_public_model_prediction.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,102 +20,71 @@ """ Creating public model predictions """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_model_steps as model_create -import create_prediction_steps as prediction_create -import compare_predictions_steps as compare_pred +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_prediction_steps as prediction_create +from . import compare_predictions_steps as compare_pred -class TestPublicModelPrediction(object): +class TestPublicModelPrediction: + """Testing published models""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating a prediction using a public model: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - And I make the model public - And I wait until the model is ready less than secs - And I check the model status using the model's public url - When I create a prediction for "" - Then the prediction for "" is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | objective | prediction | - | ../data/iris.csv | 10 | 10 | 10 | {"petal width": 0.5} | 000004 | Iris-setosa | + Scenario: Successfully creating a prediction using a public model: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I make the model public + And I wait until the model is ready less than secs + And I check the model status using the model's public url + When I create a prediction for "" + Then the prediction for "" is "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction"] examples = [ - ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa']] + ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', + '000004', 'Iris-setosa']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) model_create.make_the_model_public(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) model_create.model_from_public_url(self) - prediction_create.i_create_a_prediction(self, example[4]) - prediction_create.the_prediction_is(self, example[5], example[6]) - - def test_scenario2(self): - """ - Scenario: Successfully creating a prediction using a shared model: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - And I make the model shared - And I wait until the model is ready less than secs - And I get the model sharing info - And I check the model status using the model's shared url - And I check the model status using the model's shared key - And I create a local model - When I create a local prediction for "" - Then the local prediction is "" - - - Examples: - | data | time_1 | time_2 | time_3 | data_input | prediction | - | ../data/iris.csv | 10 | 10 | 10 | {"petal width": 0.5} | Iris-setosa | - """ - print self.test_scenario2.__doc__ - examples = [ - ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', 'Iris-setosa']] - for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) - model_create.make_the_model_shared(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) - model_create.get_sharing_info(self) - model_create.model_from_shared_url(self) - model_create.model_from_shared_key(self) - compare_pred.i_create_a_local_model(self) - compare_pred.i_create_a_local_prediction(self, example[4]) - compare_pred.the_local_prediction_is(self, example[5]) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) diff --git a/bigml/tests/test_13_public_dataset.py b/bigml/tests/test_13_public_dataset.py index 75802d53..94657661 100644 --- a/bigml/tests/test_13_public_dataset.py +++ b/bigml/tests/test_13_public_dataset.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,50 +20,57 @@ """ Creating public dataset """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create -class TestPublicDataset(object): +class TestPublicDataset: + """Testing published datasets """ - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating and reading a public dataset: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I make the dataset public - And I wait until the dataset is ready less than secs - When I get the dataset status using the dataset's public url - Then the dataset's status is FINISHED - - Examples: - | data | time_1 | time_2 | time_3 | - | ../data/iris.csv | 10 | 10 | 10 | + Scenario: Successfully creating and reading a public dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I make the dataset public + And I wait until the dataset is ready less than secs + When I get the dataset status using the dataset's public url + Then the dataset's status is FINISHED """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait"] examples = [ - ['data/iris.csv', '10', '10', '10']] + ['data/iris.csv', '10', '10']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file_from_stdin(self, example[0]) - source_create.the_source_is_finished(self, example[1]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file_from_stdin( + self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) dataset_create.make_the_dataset_public(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[3]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) dataset_create.build_local_dataset_from_public_url(self) dataset_create.dataset_status_finished(self) diff --git a/bigml/tests/test_14_create_evaluations.py b/bigml/tests/test_14_create_evaluations.py index 6fc6dff3..093dc638 100644 --- a/bigml/tests/test_14_create_evaluations.py +++ b/bigml/tests/test_14_create_evaluations.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,160 +20,223 @@ """ Creating evaluation """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_model_steps as model_create -import create_ensemble_steps as ensemble_create -import create_evaluation_steps as evaluation_create - -class TestEvaluation(object): - - def setup(self): +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_ensemble_steps as ensemble_create +from . import create_evaluation_steps as evaluation_create + +class TestEvaluation: + """Testing Evaluation methods""" + + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario1: Successfully creating an evaluation: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - When I create an evaluation for the model with the dataset - And I wait until the evaluation is ready less than secs - Then the measured "" is - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | measure | value | - | ../data/iris.csv | 30 | 30 | 30 | 30 | average_phi | 1 | + Scenario1: Successfully creating an evaluation: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + When I create an evaluation for the model with the dataset + And I wait until the evaluation is ready less than secs + Then the measured "" is """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "evaluation_wait", "metric", "value"] examples = [ ['data/iris.csv', '50', '50', '50', '50', 'average_phi', '1']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) evaluation_create.i_create_an_evaluation(self) - evaluation_create.the_evaluation_is_finished_in_less_than(self, example[4]) - evaluation_create.the_measured_measure_is_value(self, example[5], example[6]) + evaluation_create.the_evaluation_is_finished_in_less_than( + self, example["evaluation_wait"]) + evaluation_create.the_measured_measure_is_value( + self, example["metric"], example["value"]) def test_scenario2(self): """ - - Scenario2: Successfully creating an evaluation for an ensemble: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create an ensemble of models and tlp - And I wait until the ensemble is ready less than secs - When I create an evaluation for the ensemble with the dataset and "" - And I wait until the evaluation is ready less than secs - Then the measured "" is - - Examples: - | data | time_1 | time_2 | number_of_models | tlp | time_3 | time_4 | measure | value | params - | ../data/iris.csv | 30 | 30 | 5 | 1 | 50 | 30 | average_phi | 0.98029 | {"combiner": 0} - + Scenario2: Successfully creating an evaluation for an ensemble: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an ensemble of models + And I wait until the ensemble is ready less than secs + When I create an evaluation for the ensemble with the dataset and "evaluation_conf" + And I wait until the evaluation is ready less than secs + Then the measured "" is """ - print self.test_scenario2.__doc__ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "evaluation_wait", "number_of_models", + "metric", "value", "evaluation_conf"] examples = [ - ['data/iris.csv', '50', '50', '5', '1', '80', '80', 'average_phi', '0.98029', {"combiner": 0}], - ['data/iris.csv', '50', '50', '5', '1', '80', '80', 'average_phi', '0.95061', {"combiner": 1}], - ['data/iris.csv', '50', '50', '5', '1', '80', '80', 'average_phi', '0.98029', {"combiner": 2}], - ['data/iris.csv', '50', '50', '5', '1', '80', '80', 'average_phi', '0.98029', {"operating_kind": "votes"}], - ['data/iris.csv', '50', '50', '5', '1', '80', '80', 'average_phi', '0.97064', {"operating_kind": "probability"}], - ['data/iris.csv', '50', '50', '5', '1', '80', '80', 'average_phi', '0.95061', {"operating_kind": "confidence"}]] + ['data/iris.csv', '50', '50', '80', '80', '5', 'average_phi', + '0.98029', {"combiner": 0}], + ['data/iris.csv', '50', '50', '80', '80', '5', 'average_phi', + '0.95061', {"combiner": 1}], + ['data/iris.csv', '50', '50', '80', '80', '5', 'average_phi', + '0.98029', {"combiner": 2}], + ['data/iris.csv', '50', '50', '80', '80', '5', 'average_phi', + '0.98029', {"operating_kind": "votes"}], + ['data/iris.csv', '50', '50', '80', '80', '5', 'average_phi', + '0.97064', {"operating_kind": "probability"}], + ['data/iris.csv', '50', '50', '80', '80', '5', 'average_phi', + '0.95061', {"operating_kind": "confidence"}]] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - ensemble_create.i_create_an_ensemble(self, example[3], example[4]) - ensemble_create.the_ensemble_is_finished_in_less_than(self, example[5]) - evaluation_create.i_create_an_evaluation_ensemble(self, example[9]) - evaluation_create.the_evaluation_is_finished_in_less_than(self, example[6]) - evaluation_create.the_measured_measure_is_value(self, example[7], example[8]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_shared = "%s_%s" % (example["data"], + example["number_of_models"]) + ensemble_create.i_create_an_ensemble( + self, example["number_of_models"], shared=ensemble_shared) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"], shared=ensemble_shared) + evaluation_create.i_create_an_evaluation_ensemble( + self, example["evaluation_conf"]) + evaluation_create.the_evaluation_is_finished_in_less_than( + self, example["evaluation_wait"]) + evaluation_create.the_measured_measure_is_value( + self, example["metric"], example["value"]) def test_scenario3(self): """ - - Scenario3: Successfully creating an evaluation for a logistic regression: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a logistic regression - And I wait until the logistic regression is ready less than secs - When I create an evaluation for the logistic regression with the dataset - And I wait until the evaluation is ready less than secs - Then the measured "" is - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | measure | value | - | ../data/iris.csv | 30 | 30 | 50 | 30 | average_phi | 0.94107 | + Scenario3: Successfully creating an evaluation for a logistic regression: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression + And I wait until the logistic regression is ready less than secs + When I create an evaluation for the logistic regression with the dataset + And I wait until the evaluation is ready less than secs + Then the measured "" is """ - print self.test_scenario3.__doc__ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "evaluation_wait", "metric", "value"] examples = [ - ['data/iris.csv', '50', '50', '800', '80', 'average_phi', '0.89054']] + ['data/iris.csv', '50', '50', '800', '80', 'average_phi', + '0.89054']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_logistic_model(self) - model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) - evaluation_create.i_create_an_evaluation_logistic(self) - evaluation_create.the_evaluation_is_finished_in_less_than(self, example[4]) - evaluation_create.the_measured_measure_is_value(self, example[5], example[6]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model( + self, shared=example["data"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + evaluation_create.i_create_an_evaluation_logistic( + self) + evaluation_create.the_evaluation_is_finished_in_less_than( + self, example["evaluation_wait"]) + evaluation_create.the_measured_measure_is_value( + self, example["metric"], example["value"]) def test_scenario4(self): """ - - Scenario4: Successfully creating an evaluation for a deepnet: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a deepnet - And I wait until the deepnet is ready less than secs - When I create an evaluation for the deepnet with the dataset - And I wait until the evaluation is ready less than secs - Then the measured "" is - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | measure | value | - | ../data/iris.csv | 30 | 30 | 50 | 30 | average_phi | 0.95007 | + Scenario4: Successfully creating an evaluation for a deepnet: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a deepnet + And I wait until the deepnet is ready less than secs + When I create an evaluation for the deepnet with the dataset + And I wait until the evaluation is ready less than secs + Then the measured "" is + """ + show_doc(self.test_scenario4) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "evaluation_wait", "metric", "value"] + examples = [ + ['data/iris.csv', '50', '50', '800', '80', 'average_phi', + '0.98029']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_deepnet(self, shared=example["data"]) + model_create.the_deepnet_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + evaluation_create.i_create_an_evaluation_deepnet( + self) + evaluation_create.the_evaluation_is_finished_in_less_than( + self, example["evaluation_wait"]) + evaluation_create.the_measured_measure_is_value( + self, example["metric"], example["value"]) + + def test_scenario5(self): + """ + Scenario5: Successfully instantiating Evaluation: + Given a stored evaluation "" file + When I create an Evaluation for the JSON + Then the measured "" is """ - print self.test_scenario4.__doc__ + show_doc(self.test_scenario5) + headers = ["data", "metric", "value"] examples = [ - ['data/iris.csv', '50', '50', '800', '80', 'average_phi', '0.95007']] + ['data/classification_evaluation.json', 'phi', + 0.64837], + ['data/classification_evaluation.json', 'accuracy', + 0.91791], + ['data/classification_evaluation.json', 'precision', + 0.86639], + ['data/regression_evaluation.json', 'r_squared', + 0.9288]] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_deepnet(self) - model_create.the_deepnet_is_finished_in_less_than(self, example[3]) - evaluation_create.i_create_an_evaluation_deepnet(self) - evaluation_create.the_evaluation_is_finished_in_less_than(self, example[4]) - evaluation_create.the_measured_measure_is_value(self, example[5], example[6]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + evaluation_create.i_create_a_local_evaluation( + self, example["data"]) + evaluation_create.the_local_metric_is_value( + self, example["metric"], example["value"]) diff --git a/bigml/tests/test_15_download.py b/bigml/tests/test_15_download.py index b3b65b5b..415257e2 100644 --- a/bigml/tests/test_15_download.py +++ b/bigml/tests/test_15_download.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,82 +20,90 @@ """ Downloading dataset """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_model_steps as model_create +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create -class TestDownload(object): +class TestDownload: + """Testing downloads""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - - Scenario: Successfully exporting a dataset: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I download the dataset file to "" - Then file "" is like file "" - - Examples: - | data | time_1 | time_2 | local_file | - | ../data/iris.csv | 30 | 30 | ./tmp/exported_iris.csv | + Scenario: Successfully exporting a dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I download the dataset file to "" + Then file "" is like file "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "exported_file"] examples = [ ['data/iris.csv', '30', '30', 'tmp/exported_iris.csv']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - dataset_create.i_export_a_dataset(self, example[3]) - dataset_create.files_equal(self, example[3], example[0]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + dataset_create.i_export_a_dataset(self, example["exported_file"]) + dataset_create.files_equal( + self, example["exported_file"], example["data"]) def test_scenario2(self): """ - Scenario: Successfully creating a model and exporting it: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - And I export the <"pmml"> model to file "" - Then I check the model is stored in "" file in <"pmml"> - - Examples: - | data | time_1 | time_2 | time_3 | expected_file | pmml - | data/iris.csv | 10 | 10 | 10 | tmp/model/iris.json | false - | data/iris_sp_chars.csv | 10 | 10 | 10 | tmp/model/iris_sp_chars.pmml | true - + Scenario: Successfully creating a model and exporting it: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I export the <"pmml"> model to file "" + Then I check the model is stored in "" file in <"pmml"> """ - print self.test_scenario2.__doc__ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "exported_file", "pmml"] examples = [ ['data/iris.csv', '30', '30', '30', 'tmp/model/iris.json', False], ['data/iris_sp_chars.csv', '30', '30', '30', 'tmp/model/iris_sp_chars.pmml', True]] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) - model_create.i_export_model(self, example[5], example[4]) - model_create.i_check_model_stored(self, example[4], example[5]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + model_create.i_export_model( + self, example["pmml"], example["exported_file"]) + model_create.i_check_model_stored( + self, example["exported_file"], example["pmml"]) diff --git a/bigml/tests/test_16_sample_dataset.py b/bigml/tests/test_16_sample_dataset.py index 97c9ae39..186b76ef 100644 --- a/bigml/tests/test_16_sample_dataset.py +++ b/bigml/tests/test_16_sample_dataset.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,53 +20,93 @@ """ Creating sample dataset """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_sample_steps as sample_create +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_sample_steps as sample_create -class TestSampleDataset(object): - def setup(self): +class TestSampleDataset: + """Test for Sample methods""" + + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating a sample from a dataset: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a sample from a dataset - And I wait until the sample is ready less than secs - And I update the sample name to "" - When I wait until the sample is ready less than secs - Then the sample name is "" + Scenario: Successfully creating a sample from a dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a sample from a dataset + And I wait until the sample is ready less than secs + And I update the sample name to "" + When I wait until the sample is ready less than secs + Then the sample name is "" + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "sample_wait", + "sample_name"] + examples = [ + ['data/iris.csv', '10', '10', '10', 'my new sample name']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + sample_create.i_create_a_sample_from_dataset(self) + sample_create.the_sample_is_finished_in_less_than( + self, example["sample_wait"]) + sample_create.i_update_sample_name(self, example["sample_name"]) + sample_create.the_sample_is_finished_in_less_than( + self, example["sample_wait"]) + sample_create.i_check_sample_name(self, example["sample_name"]) - Examples: - | data | time_1 | time_2 | time_3 | time_4 | sample_name | - | ../data/iris.csv | 10 | 10 | 10 | 10 | my new sample name | + def test_scenario2(self): + """ + Scenario: Successfully cloning dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I clone the last dataset + And I wait until the dataset is ready less than secs + Then the new dataset is as the origin dataset """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait"] examples = [ - ['data/iris.csv', '10', '10', '10', '10', 'my new sample name']] + ['data/iris.csv', '30', '30']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + source = world.source["resource"] + source_create.clone_source(self, source) + source_create.the_source_is_finished(self, example["source_wait"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - sample_create.i_create_a_sample_from_dataset(self) - sample_create.the_sample_is_finished_in_less_than(self, example[3]) - sample_create.i_update_sample_name(self, example[5]) - sample_create.the_sample_is_finished_in_less_than(self, example[4]) - sample_create.i_check_sample_name(self, example[5]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset = world.dataset["resource"] + dataset_create.clone_dataset(self, dataset) + dataset_create.the_cloned_dataset_is(self, dataset) diff --git a/bigml/tests/test_17_split_dataset.py b/bigml/tests/test_17_split_dataset.py index 2eb4f80a..c570ea12 100644 --- a/bigml/tests/test_17_split_dataset.py +++ b/bigml/tests/test_17_split_dataset.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,52 +20,58 @@ """ Splitting dataset """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create -class TestSplitDataset(object): +class TestSplitDataset: + """Test dataset split""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating a split dataset: - Given I create a data source with "" uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a dataset extracting a sample - And I wait until the dataset is ready less than secs - When I compare the datasets' instances - Then the proportion of instances between datasets is - - Examples: - | data | time_1 | time_2 | time_3 | rate | - | ../data/iris.csv | 10 | 10 | 10 | 0.8 | + Scenario: Successfully creating a split dataset: + Given I create a data source with "" uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a dataset extracting a sample + And I wait until the dataset is ready less than secs + When I compare the datasets' instances + Then the proportion of instances between datasets is """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "rate", "source_conf"] examples = [ ['data/iris.csv', '10', '10', '10', '0.8', '{"category": 12}']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file_with_args(self, example[0], example[5]) - source_create.the_source_is_finished(self, example[1]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file_with_args( + self, example["data"], example["source_conf"]) + source_create.the_source_is_finished( + self, example["source_wait"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, - example[2]) - dataset_create.i_create_a_split_dataset(self, example[4]) - dataset_create.the_dataset_is_finished_in_less_than(self, - example[3]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_create.i_create_a_split_dataset(self, example["rate"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) dataset_create.i_compare_datasets_instances(self) - dataset_create.proportion_datasets_instances(self, example[4]) + dataset_create.proportion_datasets_instances(self, example["rate"]) diff --git a/bigml/tests/test_18_create_anomaly.py b/bigml/tests/test_18_create_anomaly.py index a204c1a7..b38adfa6 100644 --- a/bigml/tests/test_18_create_anomaly.py +++ b/bigml/tests/test_18_create_anomaly.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,112 +16,118 @@ # License for the specific language governing permissions and limitations # under the License. - """ Creating anomaly detector """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_anomaly_steps as anomaly_create -import create_multimodel_steps as mm_create +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_anomaly_steps as anomaly_create +from . import create_multimodel_steps as mm_create -class TestAnomaly(object): +class TestAnomaly: + """Test anomaly detector methods""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ - world.dataset_ids = [] + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ - world.dataset_ids = [] + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - - Scenario: Successfully creating an anomaly detector from a dataset and a dataset list: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - Then I create an anomaly detector from a dataset - And I wait until the anomaly detector is ready less than secs - And I check the anomaly detector stems from the original dataset - And I store the dataset id in a list - And I create a dataset - And I wait until the dataset is ready less than secs - And I store the dataset id in a list - Then I create an anomaly detector from a dataset list - And I wait until the anomaly detector is ready less than secs - And I check the anomaly detector stems from the original dataset list - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | - | ../data/tiny_kdd.csv | 40 | 40 | 80 | 100 + Scenario: Successfully creating an anomaly detector from a dataset and a dataset list: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + Then I create an anomaly detector from a dataset + And I wait until the anomaly detector is ready less than secs + And I check the anomaly detector stems from the original dataset + And I store the dataset id in a list + And I create a dataset + And I wait until the dataset is ready less than secs + And I store the dataset id in a list + Then I create an anomaly detector from a dataset list + And I wait until the anomaly detector is ready less than 'model_wait'> secs + And I check the anomaly detector stems from the original dataset list """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] examples = [ - ['data/tiny_kdd.csv', '40', '40', '40', '100']] + ['data/tiny_kdd.csv', '40', '40', '100']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, - example[2]) - anomaly_create.i_create_an_anomaly_from_dataset(self) - anomaly_create.the_anomaly_is_finished_in_less_than(self, - example[4]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset( + self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + anomaly_create.i_create_an_anomaly_from_dataset( + self, shared=example["data"]) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) anomaly_create.i_check_anomaly_dataset_and_datasets_ids(self) mm_create.i_store_dataset_id(self) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, - example[3]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) mm_create.i_store_dataset_id(self) anomaly_create.i_create_an_anomaly_from_dataset_list(self) - anomaly_create.the_anomaly_is_finished_in_less_than(self, - example[4]) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"]) anomaly_create.i_check_anomaly_datasets_and_datasets_ids(self) def test_scenario2(self): """ + Scenario: Successfully creating an anomaly detector from a dataset and generating the anomalous dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + Then I create an anomaly detector of anomalies from a dataset + And I wait until the anomaly detector is ready less than secs + And I create a dataset with only the anomalies + And I wait until the dataset is ready less than secs + And I check that the dataset has rows - Scenario: Successfully creating an anomaly detector from a dataset and generating the anomalous dataset: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - Then I create an anomaly detector of anomalies from a dataset - And I wait until the anomaly detector is ready less than secs - And I create a dataset with only the anomalies - And I wait until the dataset is ready less than secs - And I check that the dataset has rows - - Examples: - | data | time_1 | time_2 | time_3 |time_4| rows| - | ../data/iris_anomalous.csv | 40 | 40 | 80 | 40 | 1 + Examples: + | data | time_1 | time_2 | time_3 |time_4| rows| + | ../data/iris_anomalous.csv | 40 | 40 | 80 | 40 | 1 """ - print self.test_scenario2.__doc__ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", "rows"] examples = [ - ['data/iris_anomalous.csv', '40', '40', '80', '40', '1']] + ['data/iris_anomalous.csv', '40', '40', '80', '1']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, - example[2]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) anomaly_create.i_create_an_anomaly_with_top_n_from_dataset( - self, example[5]) - anomaly_create.the_anomaly_is_finished_in_less_than(self, - example[3]) + self, example["rows"]) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"]) anomaly_create.create_dataset_with_anomalies(self) - dataset_create.the_dataset_is_finished_in_less_than(self, - example[4]) - anomaly_create.the_dataset_has_n_rows(self, example[5]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["model_wait"]) + anomaly_create.the_dataset_has_n_rows(self, example["rows"]) diff --git a/bigml/tests/test_19_missing_and_errors.py b/bigml/tests/test_19_missing_and_errors.py index f6023b52..22326c08 100644 --- a/bigml/tests/test_19_missing_and_errors.py +++ b/bigml/tests/test_19_missing_and_errors.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,123 +20,147 @@ """ Creating datasets with missing values and errors counters """ -from world import world, setup_module, teardown_module, show_doc -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import read_dataset_steps as dataset_read -import create_prediction_steps as prediction_create -import compare_predictions_steps as prediction_compare -import create_model_steps as model_create +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import read_dataset_steps as dataset_read +from . import create_prediction_steps as prediction_create +from . import compare_predictions_steps as prediction_compare +from . import create_model_steps as model_create -class TestMissingsAndErrors(object): +class TestMissingsAndErrors: + """Testing Missings and Errors retrieval""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully obtaining missing values counts: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I update the source with params "" - And I create a dataset - And I wait until the dataset is ready less than secs - When I ask for the missing values counts in the fields - Then the missing values counts dict is "" - - Examples: - | data | time_1 | params | time_2 |missing_values | - | ../data/iris_missing.csv | 30 | {"fields": {"000000": {"optype": "numeric"}}} |30 |{"000000": 1} | + Scenario: Successfully obtaining missing values counts: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + When I ask for the missing values counts in the fields + Then the missing values counts dict is "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "source_conf", "dataset_wait", + "missing_values"] examples = [ - ['data/iris_missing.csv', '30', '{"fields": {"000000": {"optype": "numeric"}}}', '30', '{"000000": 1}']] + ['data/iris_missing.csv', '30', + '{"fields": {"000000": {"optype": "numeric"}}}', '30', + '{"000000": 1}']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - source_create.i_update_source_with(self, example[2]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) + source_create.the_source_is_finished(self, example["source_wait"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, - example[3]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["source_wait"]) dataset_read.i_get_the_missing_values(self) dataset_read.i_get_the_properties_values( - self, 'missing values count', example[4]) + self, example["missing_values"]) def test_scenario2(self): """ - Scenario: Successfully obtaining parsing error counts: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I update the source with params "" - And I create a dataset - And I wait until the dataset is ready less than secs - When I ask for the error counts in the fields - Then the error counts dict is "" - - Examples: - | data | time_1 | params | time_2 |error_values | - | ../data/iris_missing.csv | 30 | {"fields": {"000000": {"optype": "numeric"}}} |30 |{"000000": 1} | + Scenario: Successfully obtaining parsing error counts: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + When I ask for the error counts in the fields + Then the error counts dict is "" """ - print self.test_scenario2.__doc__ + print(self.test_scenario2.__doc__) + headers = ["data", "source_wait", "source_conf", + "dataset_wait", "error_values"] examples = [ - ['data/iris_missing.csv', '30', '{"fields": {"000000": {"optype": "numeric"}}}', '30', '{"000000": 1}']] + ['data/iris_missing.csv', '30', + '{"fields": {"000000": {"optype": "numeric"}}}', 30, + '{"000000": 1}']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - source_create.i_update_source_with(self, example[2]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, - example[3]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) dataset_read.i_get_the_errors_values(self) dataset_read.i_get_the_properties_values( - self, 'error counts', example[4]) + self, example["error_values"]) def test_scenario3(self): """ - Scenario: Successfully comparing predictions: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - And I create a local model - When I create a prediction for "" - Then the prediction for "" is "" - And I create a local prediction for "" - Then the local prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | objective | prediction | - + Scenario: Successfully comparing predictions: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf", "input_data", "objective_id", "prediction"] examples = [ - ['data/iris_missing.csv', '30', '{"fields": {"000000": {"optype": "numeric"}}, "source_parser": {"missing_tokens": ["foo"]}}', '30', '{"sepal length": "foo", "petal length": 3}', '000004', 'Iris-versicolor'], - ['data/iris_missing.csv', '30', '{"fields": {"000000": {"optype": "numeric"}}, "source_parser": {"missing_tokens": ["foo"]}}', '30', '{"sepal length": "foo", "petal length": 5, "petal width": 1.5}', '000004', 'Iris-virginica']] + ['data/iris_missing.csv', '30', '30', '50', + '{"fields": {"000000": {"optype": "numeric"}}, ' + '"source_parser": {"missing_tokens": ["foo"]}}', + '{"sepal length": "foo", "petal length": 3}', + '000004', 'Iris-versicolor'], + ['data/iris_missing.csv', '30', '30', '50', + '{"fields": {"000000": {"optype": "numeric"}}, ' + '"source_parser": {"missing_tokens": ["foo"]}}', + '{"sepal length": "foo", "petal length": 5, ' + '"petal width": 1.5}', '000004', 'Iris-virginica']] - show_doc(self.test_scenario3, examples) + show_doc(self.test_scenario3) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - source_create.i_update_source_with(self, example[2]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) + source_create.i_update_source_with( + self, example["source_conf"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[3]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) prediction_compare.i_create_a_local_model(self) - prediction_create.i_create_a_prediction(self, example[4]) - prediction_create.the_prediction_is(self, example[5], example[6]) - prediction_compare.i_create_a_local_prediction(self, example[4]) - prediction_compare.the_local_prediction_is(self, example[6]) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) diff --git a/bigml/tests/test_20_rename_duplicated_names.py b/bigml/tests/test_20_rename_duplicated_names.py index 57642fcf..ac2def75 100644 --- a/bigml/tests/test_20_rename_duplicated_names.py +++ b/bigml/tests/test_20_rename_duplicated_names.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,55 +20,67 @@ """ Renaming duplicated names in fields """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_model_steps as model_create -import compare_predictions_steps as compare_preds +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import compare_predictions_steps as compare_preds -class TestDuplicatedFields(object): +class TestDuplicatedFields: + """Test working with different fields with identical names""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully changing duplicated field names: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset with "" - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - And I create a local model - Then "" field's name is changed to "" - - Examples: - | data | time_1 | time_2 | time_3 | options | field_id | new_name - | ../data/iris.csv | 20 | 20 | 30 | {"fields": {"000001": {"name": "species"}}} | 000001 | species1 - | ../data/iris.csv | 20 | 20 | 30 | {"fields": {"000001": {"name": "petal width"}}} | 000001 | petal width1 + Scenario: Successfully changing duplicated field names: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset with "" + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + Then "" field's name is changed to "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "dataset_conf", "field_id", "new_name"] examples = [ - ['data/iris.csv', '20', '20', '30', '{"fields": {"000001": {"name": "species"}}}', '000001', 'species1'], - ['data/iris.csv', '20', '20', '30', '{"fields": {"000001": {"name": "petal width"}}}', '000003', 'petal width3']] + ['data/iris.csv', '20', '20', '30', + '{"fields": {"000001": {"name": "species"}}}', + '000001', 'species1'], + ['data/iris.csv', '20', '20', '30', + '{"fields": {"000001": {"name": "petal width"}}}', + '000003', 'petal width3']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset_with(self, example[4]) - dataset_create.the_dataset_is_finished_in_less_than(self, - example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset_with( + self, example["dataset_conf"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) compare_preds.i_create_a_local_model(self) - model_create.field_name_to_new_name(self, example[5], example[6]) + model_create.field_name_to_new_name( + self, example["field_id"], example["new_name"]) diff --git a/bigml/tests/test_21_projects.py b/bigml/tests/test_21_projects.py index 38c63282..b58f6d0a 100644 --- a/bigml/tests/test_21_projects.py +++ b/bigml/tests/test_21_projects.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,26 +20,31 @@ """ Testing projects REST api calls """ -from world import world, setup_module, teardown_module -import create_project_steps as create -import delete_project_steps as delete +from .world import world, setup_module, teardown_module +from . import create_project_steps as create +from . import delete_project_steps as delete -class Test_projects(object): +class TestProjects: + """Testing project methods""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): + """Creating and updating project""" name = "my project" new_name = "my new project" create.i_create_project(self, name) diff --git a/bigml/tests/test_22_source_args.py b/bigml/tests/test_22_source_args.py index 2903512b..b66edc9e 100644 --- a/bigml/tests/test_22_source_args.py +++ b/bigml/tests/test_22_source_args.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import,no-member # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,43 +20,146 @@ """ Uploading source with structured args """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create +from bigml.api_handlers.resourcehandler import get_id -class TestUploadSource(object): +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create - def setup(self): + +class TestUploadSource: + """Testing source uploads""" + + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ + Scenario: Successfully uploading source: + Given I create a data source uploading a "" file with args "" + And I wait until the source is ready less than secs + Then the source exists and has args "" + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "source_conf"] + examples = [ + ['data/iris.csv', '30', '{"tags": ["my tag", "my second tag"]}'], + ['data/iris.csv', '30', '{"name": "Testing unicode names: áé"}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file_with_args( + self, example["data"], example["source_conf"]) + source_create.the_source_is_finished(self, example["source_wait"]) + source_create.source_has_args(self, example["source_conf"]) - Scenario: Successfully uploading source: - Given I create a data source uploading a "" file with args "" - And I wait until the source is ready less than secs - Then the source exists and has args "" + def test_scenario2(self): + """ + Scenario: Successfully creating composite source: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a data source uploading a "" file + And I wait until the source is ready less than secs + Then I create a composite from the last two sources + And I wait until the source is ready less than secs + Then the composite exists and has the previous two sources + """ + show_doc(self.test_scenario2) + headers = ["data", "source_wait"] + examples = [ + ['data/iris.csv', '30']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + sources = [] + source_create.i_upload_a_file( + self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) + sources.append(get_id(world.source["resource"])) + source_create.i_upload_a_file( + self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) + sources.append(get_id(world.source["resource"])) + source_create.i_create_composite(self, sources) + source_create.the_source_is_finished(self, example["source_wait"]) + for source in sources: + world.sources.remove("source/%s" % source) + source_create.the_composite_contains(self, sources) - Examples: - | data | time_1 | args | - | ../data/iris.csv | 30 | {"tags": ["my tag", "my second tag"]} - | ../data/iris.csv | 30 | {"name": "Testing unicode names: áé"}]} + def test_scenario3(self): + """ + Scenario: Successfully cloning source: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I clone the last source + And I wait until the source is ready less than secs + Then the new source the first one as origin + """ + show_doc(self.test_scenario3) + headers = ["data", "source_wait"] + examples = [ + ['data/iris.csv', '30']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + source = world.source["resource"] + source_create.clone_source(self, source) + source_create.the_source_is_finished( + self, example["source_wait"]) + source_create.the_cloned_source_origin_is(self, source) + def test_scenario4(self): """ - print self.test_scenario1.__doc__ + Scenario: Successfully adding annotatations to composite source: + Given I create an annotated images data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + Then the new dataset has annotations in the field + """ + headers = ["data", "source_wait", "dataset_wait", "annotations_num", + "annotations_field"] examples = [ - ['data/iris.csv', '30', '{"tags": ["my tag", "my second tag"]}'], - ['data/iris.csv', '30', '{"name": "Testing unicode names: áé"}']] + ['data/images/metadata.json', '500', '500', '12', + '100002'], + ['data/images/metadata_compact.json', '500', '500', '3', + '100003'], + ['data/images/metadata_list.json', '500', '500', '3', + '100003']] + show_doc(self.test_scenario4) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file_with_args(self, example[0], example[2]) - source_create.the_source_is_finished(self, example[1]) - source_create.source_has_args(self, example[2]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_create_annotated_source( + self, + example["data"], + args={"image_analysis": {"enabled": False, + "extracted_features": []}}) + source_create.the_source_is_finished( + self, example["source_wait"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_create.check_annotations(self, + example["annotations_field"], + example["annotations_num"]) + diff --git a/bigml/tests/test_23_local_model_info.py b/bigml/tests/test_23_local_model_info.py index 1abdd233..8ee0ac97 100644 --- a/bigml/tests/test_23_local_model_info.py +++ b/bigml/tests/test_23_local_model_info.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,239 +20,279 @@ """ Testing local model information output methods """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_model_steps as model_create -import compare_predictions_steps as prediction_compare -import inspect_model_steps as inspect_model +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import compare_predictions_steps as prediction_compare +from . import inspect_model_steps as inspect_model -class TestLocalModelOutputs(object): +class TestLocalModelOutputs: + """Testing local model code generators""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ Scenario: Successfully creating a model and translate the tree model into a set of IF-THEN rules: Given I create a data source uploading a "" file - And I wait until the source is ready less than secs + And I wait until the source is ready less than secs And I create a dataset - And I wait until the dataset is ready less than secs + And I wait until the dataset is ready less than secs And I create a model - And I wait until the model is ready less than secs + And I wait until the model is ready less than secs And I create a local model And I translate the tree into IF_THEN rules - Then I check the output is like "" expected file - - Examples: - | data | time_1 | time_2 | time_3 | expected_file | - | data/iris.csv | 10 | 10 | 10 | data/model/if_then_rules_iris.txt | - | data/iris_sp_chars.csv | 10 | 10 | 10 | data/model/if_then_rules_iris_sp_chars.txt | - | data/spam.csv | 20 | 20 | 30 | data/model/if_then_rules_spam.txt | - | data/grades.csv | 10 | 10 | 10 | data/model/if_then_rules_grades.txt | - | data/diabetes.csv | 20 | 20 | 30 | data/model/if_then_rules_diabetes.txt | - | data/iris_missing2.csv | 10 | 10 | 10 | data/model/if_then_rules_iris_missing2.txt | - | data/tiny_kdd.csv | 20 | 20 | 30 | data/model/if_then_rules_tiny_kdd.txt | - + Then I check the output is like "" expected file """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "output_file"] examples = [ - ['data/iris.csv', '30', '30', '30', 'data/model/if_then_rules_iris.txt'], - ['data/iris_sp_chars.csv', '30', '30', '30', 'data/model/if_then_rules_iris_sp_chars.txt'], - ['data/spam.csv', '30', '30', '30', 'data/model/if_then_rules_spam.txt'], - ['data/grades.csv', '30', '30', '30', 'data/model/if_then_rules_grades.txt'], - ['data/diabetes.csv', '30', '30', '30', 'data/model/if_then_rules_diabetes.txt'], - ['data/iris_missing2.csv', '30', '30', '30', 'data/model/if_then_rules_iris_missing2.txt'], - ['data/tiny_kdd.csv', '30', '30', '30', 'data/model/if_then_rules_tiny_kdd.txt']] + ['data/iris.csv', '30', '30', '30', + 'data/model/if_then_rules_iris.txt'], + ['data/iris_sp_chars.csv', '30', '30', '30', + 'data/model/if_then_rules_iris_sp_chars.txt'], + ['data/spam.csv', '30', '30', '30', + 'data/model/if_then_rules_spam.txt'], + ['data/grades.csv', '30', '30', '30', + 'data/model/if_then_rules_grades.txt'], + ['data/diabetes.csv', '30', '30', '30', + 'data/model/if_then_rules_diabetes.txt'], + ['data/iris_missing2.csv', '30', '30', '30', + 'data/model/if_then_rules_iris_missing2.txt'], + ['data/tiny_kdd.csv', '30', '30', '30', + 'data/model/if_then_rules_tiny_kdd.txt']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished(self, example["source_wait"], + shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than(self, + example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) prediction_compare.i_create_a_local_model(self) inspect_model.i_translate_the_tree_into_IF_THEN_rules(self) - inspect_model.i_check_if_the_output_is_like_expected_file(self, example[4]) + inspect_model.i_check_if_the_output_is_like_expected_file( + self, example["output_file"]) def test_scenario2(self): """ Scenario: Successfully creating a model with missing values and translate the tree model into a set of IF-THEN rules: Given I create a data source uploading a "" file - And I wait until the source is ready less than secs + And I wait until the source is ready less than secs And I create a dataset - And I wait until the dataset is ready less than secs + And I wait until the dataset is ready less than secs And I create a model - And I wait until the model is ready less than secs + And I wait until the model is ready less than secs And I create a local model And I translate the tree into IF_THEN rules - Then I check the output is like "" expected file - - Examples: - | data | time_1 | time_2 | time_3 | expected_file | - | data/iris_missing2.csv | 10 | 10 | 10 | data/model/if_then_rules_iris_missing2_MISSINGS.txt | - + Then I check the output is like "" expected file """ - print self.test_scenario2.__doc__ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "output_file"] examples = [ ['data/iris_missing2.csv', '10', '10', '30', 'data/model/if_then_rules_iris_missing2_MISSINGS.txt']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset( + self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than(self, + example["dataset_wait"], shared=example["data"]) model_create.i_create_a_model_with_missing_splits(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) prediction_compare.i_create_a_local_model(self) inspect_model.i_translate_the_tree_into_IF_THEN_rules(self) - inspect_model.i_check_if_the_output_is_like_expected_file(self, example[4]) + inspect_model.i_check_if_the_output_is_like_expected_file( + self, example["output_file"]) def test_scenario3(self): """ Scenario: Successfully creating a model and translate the tree model into a set of IF-THEN rules: Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I update the source with "" waiting less than secs + And I wait until the source is ready less than secs + And I update the source with "" waiting less than secs And I create a dataset - And I wait until the dataset is ready less than secs + And I wait until the dataset is ready less than secs And I create a model - And I wait until the model is ready less than secs + And I wait until the model is ready less than secs And I create a local model And I translate the tree into IF_THEN rules - Then I check the output is like "" expected file - - Examples: - | data | time_1 | time_2 | time_3 | options | expected_file | - | data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_1.txt | - | data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false}}}} | data/model/if_then_rules_spam_textanalysis_2.txt | - | data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_3.txt | - | data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_4.txt | - | data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_5.txt | - | data/spam.csv | 20 | 20 | 30 | {"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}} | data/model/if_then_rules_spam_textanalysis_6.txt | - + Then I check the output is like "" expected file """ - print self.test_scenario3.__doc__ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf", "output_file"] examples = [ - ['data/spam.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}','data/model/if_then_rules_spam_textanalysis_1.txt'], - ['data/spam.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false}}}}', 'data/model/if_then_rules_spam_textanalysis_2.txt'], - ['data/spam.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_3.txt'], - ['data/spam.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_4.txt'], - ['data/spam.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_5.txt'], - ['data/spam.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', 'data/model/if_then_rules_spam_textanalysis_6.txt']] + ['data/spam.csv', '30', '30', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": true, "stem_words": true, "use_stopwords": ' + 'false, "language": "en"}}}}', + 'data/model/if_then_rules_spam_textanalysis_1.txt'], + ['data/spam.csv', '30', '30', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": true, "stem_words": true, ' + '"use_stopwords": false}}}}', + 'data/model/if_then_rules_spam_textanalysis_2.txt'], + ['data/spam.csv', '30', '30', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": false, "stem_words": false, ' + '"use_stopwords": false, "language": "en"}}}}', + 'data/model/if_then_rules_spam_textanalysis_3.txt'], + ['data/spam.csv', '30', '30', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": false, "stem_words": true, "use_stopwords": ' + 'true, "language": "en"}}}}', + 'data/model/if_then_rules_spam_textanalysis_4.txt'], + ['data/spam.csv', '30', '30', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"token_mode": "full_terms_only", "language": "en"}}}}', + 'data/model/if_then_rules_spam_textanalysis_5.txt'], + ['data/spam.csv', '30', '30', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": true, "stem_words": true, "use_stopwords": ' + 'false, "language": "en"}}}}', + 'data/model/if_then_rules_spam_textanalysis_6.txt']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - source_create.i_update_source_with(self, example[4]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + dataset_create.the_dataset_is_finished_in_less_than(self, + example["dataset_wait"]) model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) prediction_compare.i_create_a_local_model(self) inspect_model.i_translate_the_tree_into_IF_THEN_rules(self) - inspect_model.i_check_if_the_output_is_like_expected_file(self, example[5]) - + inspect_model.i_check_if_the_output_is_like_expected_file( + self, example["output_file"]) def test_scenario4(self): """ Scenario: Successfully creating a model and check its data distribution: Given I create a data source uploading a "" file - And I wait until the source is ready less than secs + And I wait until the source is ready less than secs And I create a dataset - And I wait until the dataset is ready less than secs + And I wait until the dataset is ready less than secs And I create a model - And I wait until the model is ready less than secs + And I wait until the model is ready less than secs And I create a local model And I translate the tree into IF_THEN rules - Then I check the data distribution with "" file - - Examples: - | data | time_1 | time_2 | time_3 | expected_file | - | data/iris.csv | 10 | 10 | 10 | data/model/data_distribution_iris.txt | - | data/iris_sp_chars.csv | 10 | 10 | 10 | data/model/data_distribution_iris_sp_chars.txt | - | data/spam.csv | 20 | 20 | 30 | data/model/data_distribution_spam.txt | - | data/grades.csv | 10 | 10 | 10 | data/model/data_distribution_grades.txt | - | data/diabetes.csv | 20 | 20 | 30 | data/model/data_distribution_diabetes.txt | - | data/iris_missing2.csv | 10 | 10 | 10 | data/model/data_distribution_iris_missing2.txt | - | data/tiny_kdd.csv | 20 | 20 | 30 | data/model/data_distribution_tiny_kdd.txt | - + Then I check the data distribution with "" file """ - print self.test_scenario4.__doc__ + show_doc(self.test_scenario4) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "output_file"] examples = [ - ['data/iris.csv', '30', '30', '30', 'data/model/data_distribution_iris.txt'], - ['data/iris_sp_chars.csv', '30', '30', '30', 'data/model/data_distribution_iris_sp_chars.txt'], - ['data/spam.csv', '30', '30', '30', 'data/model/data_distribution_spam.txt'], - ['data/grades.csv', '30', '30', '30', 'data/model/data_distribution_grades.txt'], - ['data/diabetes.csv', '30', '30', '30', 'data/model/data_distribution_diabetes.txt'], - ['data/iris_missing2.csv', '30', '30', '30', 'data/model/data_distribution_iris_missing2.txt'], - ['data/tiny_kdd.csv', '30', '30', '30', 'data/model/data_distribution_tiny_kdd.txt']] + ['data/iris.csv', '30', '30', '30', + 'data/model/data_distribution_iris.txt'], + ['data/iris_sp_chars.csv', '30', '30', '30', + 'data/model/data_distribution_iris_sp_chars.txt'], + ['data/spam.csv', '30', '30', '30', + 'data/model/data_distribution_spam.txt'], + ['data/grades.csv', '30', '30', '30', + 'data/model/data_distribution_grades.txt'], + ['data/diabetes.csv', '30', '30', '30', + 'data/model/data_distribution_diabetes.txt'], + ['data/iris_missing2.csv', '30', '30', '30', + 'data/model/data_distribution_iris_missing2.txt'], + ['data/tiny_kdd.csv', '30', '30', '30', + 'data/model/data_distribution_tiny_kdd.txt']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than(self, + example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) prediction_compare.i_create_a_local_model(self) - inspect_model.i_check_the_data_distribution(self, example[4]) - + inspect_model.i_check_the_data_distribution( + self, example["output_file"]) def test_scenario5(self): """ Scenario: Successfully creating a model and check its predictions distribution: Given I create a data source uploading a "" file - And I wait until the source is ready less than secs + And I wait until the source is ready less than secs And I create a dataset - And I wait until the dataset is ready less than secs + And I wait until the dataset is ready less than secs And I create a model - And I wait until the model is ready less than secs + And I wait until the model is ready less than secs And I create a local model And I translate the tree into IF_THEN rules - Then I check the predictions distribution with "" file - - Examples: - | data | time_1 | time_2 | time_3 | expected_file | - | data/iris.csv | 10 | 10 | 10 | data/model/predictions_distribution_iris.txt | - | data/iris_sp_chars.csv | 10 | 10 | 10 | data/model/predictions_distribution_iris_sp_chars.txt | - | data/spam.csv | 20 | 20 | 30 | data/model/predictions_distribution_spam.txt | - | data/grades.csv | 10 | 10 | 10 | data/model/predictions_distribution_grades.txt | - | data/diabetes.csv | 20 | 20 | 30 | data/model/predictions_distribution_diabetes.txt | - | data/iris_missing2.csv | 10 | 10 | 10 | data/model/predictions_distribution_iris_missing2.txt | - | data/tiny_kdd.csv | 20 | 20 | 30 | data/model/predictions_distribution_tiny_kdd.txt | - + Then I check the predictions distribution with "" file """ - print self.test_scenario5.__doc__ + show_doc(self.test_scenario5) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "output_file"] examples = [ - ['data/iris.csv', '30', '30', '30', 'data/model/predictions_distribution_iris.txt'], - ['data/iris_sp_chars.csv', '30', '30', '30', 'data/model/predictions_distribution_iris_sp_chars.txt'], - ['data/spam.csv', '30', '30', '30', 'data/model/predictions_distribution_spam.txt'], - ['data/grades.csv', '30', '30', '30', 'data/model/predictions_distribution_grades.txt'], - ['data/diabetes.csv', '30', '30', '30', 'data/model/predictions_distribution_diabetes.txt'], - ['data/iris_missing2.csv', '30', '30', '30', 'data/model/predictions_distribution_iris_missing2.txt'], - ['data/tiny_kdd.csv', '30', '30', '30', 'data/model/predictions_distribution_tiny_kdd.txt']] + ['data/iris.csv', '30', '30', '30', + 'data/model/predictions_distribution_iris.txt'], + ['data/iris_sp_chars.csv', '30', '30', '30', + 'data/model/predictions_distribution_iris_sp_chars.txt'], + ['data/spam.csv', '30', '30', '30', + 'data/model/predictions_distribution_spam.txt'], + ['data/grades.csv', '30', '30', '30', + 'data/model/predictions_distribution_grades.txt'], + ['data/diabetes.csv', '30', '30', '30', + 'data/model/predictions_distribution_diabetes.txt'], + ['data/iris_missing2.csv', '30', '30', '30', + 'data/model/predictions_distribution_iris_missing2.txt'], + ['data/tiny_kdd.csv', '30', '30', '30', + 'data/model/predictions_distribution_tiny_kdd.txt']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than(self, + example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) prediction_compare.i_create_a_local_model(self) - inspect_model.i_check_the_predictions_distribution(self, example[4]) + inspect_model.i_check_the_predictions_distribution( + self, example["output_file"]) def test_scenario6(self): @@ -266,34 +307,80 @@ def test_scenario6(self): And I create a local model And I translate the tree into IF_THEN rules Then I check the model summary with "" file + """ + show_doc(self.test_scenario6) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "output_file"] + examples = [ + ['data/iris.csv', '30', '30', '30', + 'data/model/summarize_iris.txt'], + ['data/iris_sp_chars.csv', '30', '30', '30', + 'data/model/summarize_iris_sp_chars.txt'], + ['data/spam.csv', '30', '30', '30', + 'data/model/summarize_spam.txt'], + ['data/grades.csv', '30', '30', '30', + 'data/model/summarize_grades.txt'], + ['data/diabetes.csv', '30', '30', '30', + 'data/model/summarize_diabetes.txt'], + ['data/iris_missing2.csv', '30', '30', '30', + 'data/model/summarize_iris_missing2.txt'], + ['data/tiny_kdd.csv', '30', '30', '30', + 'data/model/summarize_tiny_kdd.txt']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset( + self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than(self, + example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_model(self) + inspect_model.i_check_the_model_summary_with( + self, example["output_file"]) - Examples: - | data | time_1 | time_2 | time_3 | expected_file | - | data/iris.csv | 10 | 10 | 10 | data/model/summarize_iris.txt | - | data/iris_sp_chars.csv | 10 | 10 | 10 | data/model/summarize_iris_sp_chars.txt | - | data/spam.csv | 20 | 20 | 30 | data/model/summarize_spam.txt | - | data/grades.csv | 10 | 10 | 10 | data/model/summarize_grades.txt | - | data/diabetes.csv | 20 | 20 | 30 | data/model/summarize_diabetes.txt | - | data/iris_missing2.csv | 10 | 10 | 10 | data/model/summarize_iris_missing2.txt | - | data/tiny_kdd.csv | 20 | 20 | 30 | data/model/summarize_tiny_kdd.txt | - + def test_scenario7(self): """ - print self.test_scenario6.__doc__ + Scenario: Unit tests for output generators: + Given I read a model from "" file + And I create a local model + And I create a distribution, list fields and a tree CSV + Then I check distribution with "" file + Then I check list_fields with "" file + Then I check tree CSV with "" file + """ + + show_doc(self.test_scenario7) + headers = ["data", "distribution", "list_fields", "tree_csv"] examples = [ - ['data/iris.csv', '30', '30', '30', 'data/model/summarize_iris.txt'], - ['data/iris_sp_chars.csv', '30', '30', '30', 'data/model/summarize_iris_sp_chars.txt'], - ['data/spam.csv', '30', '30', '30', 'data/model/summarize_spam.txt'], - ['data/grades.csv', '30', '30', '30', 'data/model/summarize_grades.txt'], - ['data/diabetes.csv', '30', '30', '30', 'data/model/summarize_diabetes.txt'], - ['data/iris_missing2.csv', '30', '30', '30', 'data/model/summarize_iris_missing2.txt'], - ['data/tiny_kdd.csv', '30', '30', '30', 'data/model/summarize_tiny_kdd.txt']] + ['data/model/iris.json', + 'data/model/distribution_iris.txt', + 'data/model/list_fields.txt', + 'data/model/tree_csv.txt'], + ['data/model/regression.json', + 'data/model/rdistribution_iris.txt', + 'data/model/rlist_fields.txt', + 'data/model/rtree_csv.txt'], + ['data/model/w_iris.json', + 'data/model/wdistribution_iris.txt', + 'data/model/wlist_fields.txt', + 'data/model/wtree_csv.txt'], + ['data/model/w_regression.json', + 'data/model/wrdistribution_iris.txt', + 'data/model/wrlist_fields.txt', + 'data/model/wrtree_csv.txt']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + world.debug=True + model_create.i_read_model_file(self, example["data"]) prediction_compare.i_create_a_local_model(self) - inspect_model.i_check_the_model_summary_with(self, example[4]) + inspect_model.i_check_print_distribution( + self, example["distribution"]) + inspect_model.i_list_fields(self, example["list_fields"]) + inspect_model.i_create_tree_csv(self, example["tree_csv"]) diff --git a/bigml/tests/test_24_cluster_derived.py b/bigml/tests/test_24_cluster_derived.py index 26366cf1..5e565463 100644 --- a/bigml/tests/test_24_cluster_derived.py +++ b/bigml/tests/test_24_cluster_derived.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,110 +20,125 @@ """ Creating datasets and models associated to a cluster """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_model_steps as model_create -import create_cluster_steps as cluster_create -import compare_predictions_steps as prediction_compare - -class TestClusterDerived(object): - - def setup(self): +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_cluster_steps as cluster_create +from . import compare_predictions_steps as prediction_compare + +class TestClusterDerived: + """Testing resources derived from clusters""" + + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating datasets for first centroid of a cluster: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a cluster - And I wait until the cluster is ready less than secs - When I create a dataset associated to centroid "" - And I wait until the dataset is ready less than secs - Then the dataset is associated to the centroid "" of the cluster - - Examples: - | data | time_1 | time_2 | time_3 | centroid_id | time_4 | - | ../data/iris.csv | 10 | 10 | 40 | 000001 | 10 | - + Scenario: Successfully creating datasets for first centroid of a cluster: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster + And I wait until the cluster is ready less than secs + When I create a dataset associated to centroid "" + And I wait until the dataset is ready less than secs + Then the dataset is associated to the centroid "" of the cluster """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "centroid_id"] examples = [ - ['data/iris.csv', '10', '10', '40', '000001', '10']] + ['data/iris.csv', '10', '10', '40', '000001']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - cluster_create.i_create_a_cluster(self) - cluster_create.the_cluster_is_finished_in_less_than(self, example[3]) - dataset_create.i_create_a_dataset_from_cluster(self, example[4]) - dataset_create.the_dataset_is_finished_in_less_than(self, example[5]) - dataset_create.is_associated_to_centroid_id(self, example[4]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + cluster_create.i_create_a_cluster(self, shared=example["data"]) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset_from_cluster( + self, example["centroid_id"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_create.is_associated_to_centroid_id( + self, example["centroid_id"]) def test_scenario2(self): """ - Scenario: Successfully creating models for first centroid of a cluster: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a cluster with options "" - And I wait until the cluster is ready less than secs - When I create a model associated to centroid "" - And I wait until the model is ready less than secs - Then the model is associated to the centroid "" of the cluster - - Examples: - | data | time_1 | time_2 | time_3 | centroid_id | time_4 | - | ../data/iris.csv | 10 | 10 | 40 | 000001 | 10 | - + Scenario: Successfully creating models for first centroid of a cluster: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster with options "" + And I wait until the cluster is ready less than secs + When I create a model associated to centroid "" + And I wait until the model is ready less than secs + Then the model is associated to the centroid "" of the cluster """ - print self.test_scenario2.__doc__ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "centroid_id", "model_conf"] examples = [ - ['data/iris.csv', '10', '10', '40', '000001', '10', '{"model_clusters": true}']] + ['data/iris.csv', '10', '10', '40', '000001', + '{"model_clusters": true}']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - cluster_create.i_create_a_cluster_with_options(self, example[6]) - cluster_create.the_cluster_is_finished_in_less_than(self, example[3]) - model_create.i_create_a_model_from_cluster(self, example[4]) - model_create.the_model_is_finished_in_less_than(self, example[5]) - model_create.is_associated_to_centroid_id(self, example[4]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + cluster_create.i_create_a_cluster_with_options( + self, example["model_conf"]) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_from_cluster( + self, example["centroid_id"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.is_associated_to_centroid_id( + self, example["centroid_id"]) def test_scenario3(self): """ - Scenario: Successfully getting the closest point in a cluster: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a cluster - And I wait until the cluster is ready less than secs - And I create a local cluster - Then the data point in the cluster closest to "" is "" - - Examples: - | data | time_1 | time_2 | time_3 | reference | closest | - + Scenario: Successfully getting the closest point in a cluster: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster + And I wait until the cluster is ready less than secs + And I create a local cluster + Then the data point in the cluster closest to "" is "" """ - print self.test_scenario3.__doc__ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "reference", "closest"] examples = [ ['data/iris.csv', '10', '10', '40', '{"petal length": 1.4, "petal width": 0.2,' @@ -137,45 +153,55 @@ def test_scenario3(self): '{"distance": 0.0, "data":' ' {"Message": "mobile", "Type": "spam"}}']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - cluster_create.i_create_a_cluster(self) - cluster_create.the_cluster_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + cluster_create.i_create_a_cluster(self, shared=example["data"]) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) prediction_compare.i_create_a_local_cluster(self) - cluster_create.closest_in_cluster(self, example[4], example[5]) + cluster_create.closest_in_cluster( + self, example["reference"], example["closest"]) def test_scenario4(self): """ - Scenario: Successfully getting the closest centroid in a cluster: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a cluster - And I wait until the cluster is ready less than secs - And I create a local cluster - Then the centroid in the cluster closest to "" is "" - - Examples: - | data | time_1 | time_2 | time_3 | reference | closest | - + Scenario: Successfully getting the closest centroid in a cluster: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster + And I wait until the cluster is ready less than secs + And I create a local cluster + Then the centroid in the cluster closest to "" is "" """ - print self.test_scenario4.__doc__ + show_doc(self.test_scenario4) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "reference", "closest"] examples = [ ['data/spam_4w.csv', '10', '10', '40', '{"Message": "free"}', '000005']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - cluster_create.i_create_a_cluster(self) - cluster_create.the_cluster_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + cluster_create.i_create_a_cluster(self, shared=example["data"]) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) prediction_compare.i_create_a_local_cluster(self) - cluster_create.closest_centroid_in_cluster(self, example[4], example[5]) + cluster_create.closest_centroid_in_cluster( + self, example["reference"], example["closest"]) diff --git a/bigml/tests/test_25_correlation.py b/bigml/tests/test_25_correlation.py index f5a140d8..27f4c029 100644 --- a/bigml/tests/test_25_correlation.py +++ b/bigml/tests/test_25_correlation.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,53 +20,64 @@ """ Creating correlation """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_correlation_steps as correlation_create +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_correlation_steps as correlation_create -class TestCorrelation(object): +class TestCorrelation: + """Test Correlation methods""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating a correlation from a dataset: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a correlation from a dataset - And I wait until the correlation is ready less than secs - And I update the correlation name to "" - When I wait until the correlation is ready less than secs - Then the correlation name is "" - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | correlation_name | - | ../data/iris.csv | 10 | 10 | 20 | 20 | my new correlation name | + Scenario: Successfully creating a correlation from a dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a correlation from a dataset + And I wait until the correlation is ready less than secs + And I update the correlation name to "" + When I wait until the correlation is ready less than secs + Then the correlation name is "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "correlation_name"] examples = [ - ['data/iris.csv', '10', '10', '20', '20', 'my new correlation name']] + ['data/iris.csv', '10', '10', '20', 'my new correlation name']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) correlation_create.i_create_a_correlation_from_dataset(self) - correlation_create.the_correlation_is_finished_in_less_than(self, example[3]) - correlation_create.i_update_correlation_name(self, example[5]) - correlation_create.the_correlation_is_finished_in_less_than(self, example[4]) - correlation_create.i_check_correlation_name(self, example[5]) + correlation_create.the_correlation_is_finished_in_less_than( + self, example["model_wait"]) + correlation_create.i_update_correlation_name( + self, example["correlation_name"]) + correlation_create.the_correlation_is_finished_in_less_than( + self, example["model_wait"]) + correlation_create.i_check_correlation_name( + self, example["correlation_name"]) diff --git a/bigml/tests/test_26_statistical_test.py b/bigml/tests/test_26_statistical_test.py index 731392e0..b09ebd48 100644 --- a/bigml/tests/test_26_statistical_test.py +++ b/bigml/tests/test_26_statistical_test.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,53 +20,65 @@ """ Creating test """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_statistical_tst_steps as statistical_tst_create +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_statistical_tst_steps as statistical_tst_create -class TestStatisticalTest(object): +class TestStatisticalTest: + """Test Statistica Test methods""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating an statistical test from a dataset: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create an statistical test from a dataset - And I wait until the statistical test is ready less than secs - And I update the statistical test name to "" - When I wait until the statistical test is ready less than secs - Then the statistical test name is "" - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | test_name | - | ../data/iris.csv | 10 | 10 | 20 | 20 | my new statistical test name | + Scenario: Successfully creating an statistical test from a dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an statistical test from a dataset + And I wait until the statistical test is ready less than secs + And I update the statistical test name to "" + When I wait until the statistical test is ready less than secs + Then the statistical test name is "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "test_name"] examples = [ - ['data/iris.csv', '10', '10', '20', '20', 'my new statistical test name']] + ['data/iris.csv', '10', '10', '20', '20', + 'my new statistical test name']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) statistical_tst_create.i_create_a_tst_from_dataset(self) - statistical_tst_create.the_tst_is_finished_in_less_than(self, example[3]) - statistical_tst_create.i_update_tst_name(self, example[5]) - statistical_tst_create.the_tst_is_finished_in_less_than(self, example[4]) - statistical_tst_create.i_check_tst_name(self, example[5]) + statistical_tst_create.the_tst_is_finished_in_less_than( + self, example["model_wait"]) + statistical_tst_create.i_update_tst_name( + self, example["test_name"]) + statistical_tst_create.the_tst_is_finished_in_less_than( + self, example["model_wait"]) + statistical_tst_create.i_check_tst_name( + self, example["test_name"]) diff --git a/bigml/tests/test_27_fields.py b/bigml/tests/test_27_fields.py index b81aaa37..bd461f04 100644 --- a/bigml/tests/test_27_fields.py +++ b/bigml/tests/test_27_fields.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,100 +20,128 @@ """ Testing Fields object properties """ -from world import world, setup_module, teardown_module -import fields_steps -import create_source_steps as source_create -import create_dataset_steps as dataset_create +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import fields_steps +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create -class TestFields(object): +class TestFields: + """Tests Fields class methods """ - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating a Fields object: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a Fields object from the source with objective column "" - Then the object id is "" - - Examples: - | data | time_1 | objective_column | objective_id | - | ../data/iris.csv | 10 | 0 | 000000 | + Scenario: Successfully creating a Fields object: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a Fields object from the source with objective column "" + Then the object id is "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "objective_column", "objective_id"] examples = [ ['data/iris.csv', '10', '0', '000000']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - fields_steps.create_fields(self, example[2]) - fields_steps.check_objective(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + fields_steps.create_fields(self, example["objective_column"]) + fields_steps.check_objective(self, example["objective_id"]) def test_scenario2(self): """ - Scenario: Successfully creating a Fields object and a summary fields file: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a Fields object from the dataset with objective column "" - And I export a summary fields file "" - Then I check that the file "" is like "" - - Examples: - | data | time_1 | objective_column | summary_file| expected_file | time_2 - | ../data/iris.csv | 10 | 0 | fields_summary.csv | data/fields/fields_summary.csv | 10 + Scenario: Successfully creating a Fields object and a summary fields file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a Fields object from the dataset with objective column "" + And I export a summary fields file "" + Then I check that the file "" is like "" """ - print self.test_scenario2.__doc__ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "objective_column", + "summary_file", "expected_file"] examples = [ - ['data/iris.csv', '10', '0', 'fields_summary.csv', 'data/fields/fields_summary.csv', '10']] + ['data/iris.csv', '10', '10', '0', 'fields_summary.csv', + 'data/fields/fields_summary.csv']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[5]) - fields_steps.create_fields_from_dataset(self, example[2]) - fields_steps.generate_summary(self, example[3]) - fields_steps.check_summary_like_expected(self, example[3], example[4]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + fields_steps.create_fields_from_dataset( + self, example["objective_column"]) + fields_steps.generate_summary(self, example["summary_file"]) + fields_steps.check_summary_like_expected( + self, example["summary_file"], example["expected_file"]) def test_scenario3(self): """ - Scenario: Successfully creating a Fields object and a modified fields structure from a file: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a Fields object from the dataset with objective column "" - And I import a summary fields file "" as a fields structure - Then I check the new field structure has field "" as "" - - Examples: - | data | time_1 | objective_column | summary_file| field_id | optype | time_2 - | ../data/iris.csv | 10 | 0 | fields_summary_modified.csv | 000000 | categorical | 10 + Scenario: Successfully creating a Fields object and a modified fields structure from a file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a Fields object from the dataset with objective column "" + And I import a summary fields file "" as a fields structure + And I clone the source to open it + And I update the source with the file "" + And I update the dataset with the file "" + Then I check the new field structure has field "" as "" + And I check the source has field "" as "" """ - print self.test_scenario3.__doc__ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "objective_column", + "summary_file", "field_id", "optype"] examples = [ - ['data/iris.csv', '10', '0', 'data/fields/fields_summary_modified.csv', '000000', 'categorical', '10']] + ['data/iris.csv', '10', '10', '0', + 'data/fields/fields_summary_modified.csv', '000000', + 'categorical']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[6]) - fields_steps.create_fields_from_dataset(self, example[2]) - fields_steps.import_summary_file(self, example[3]) - fields_steps.check_field_type(self, example[4], example[5]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + fields_steps.create_fields_from_dataset( + self, example["objective_column"]) + source_create.clone_source(self, world.source["resource"]) + source_create.the_source_is_finished(self, example["source_wait"]) + fields_steps.import_summary_file(self, example["summary_file"]) + fields_steps.update_with_summary_file( + self, world.source, example["summary_file"]) + fields_steps.update_with_summary_file( + self, world.dataset, example["summary_file"]) + fields_steps.check_field_type( + self, example["field_id"], example["optype"]) + fields_steps.check_resource_field_type( + self, world.source, example["field_id"], example["optype"]) diff --git a/bigml/tests/test_28_association.py b/bigml/tests/test_28_association.py index 2322e4ec..7e5bec63 100644 --- a/bigml/tests/test_28_association.py +++ b/bigml/tests/test_28_association.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,120 +20,147 @@ """ Creating association """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_association_steps as association_create +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_association_steps as association_create -class TestAssociation(object): +class TestAssociation: + """Test for associations""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating associations from a dataset: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create associations from a dataset - And I wait until the association is ready less than secs - And I update the association name to "" - When I wait until the association is ready less than secs - Then the association name is "" - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | association_name | - | ../data/iris.csv | 10 | 10 | 20 | 50 | my new association name | + Scenario: Successfully creating associations from a dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create associations from a dataset + And I wait until the association is ready less than secs + And I update the association name to "" + When I wait until the association is ready less than secs + Then the association name is "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "association_name"] examples = [ - ['data/iris.csv', '10', '10', '20', '50', 'my new association name']] + ['data/iris.csv', '10', '10', '50', 'my new association name']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) association_create.i_create_an_association_from_dataset(self) - association_create.the_association_is_finished_in_less_than(self, example[3]) - association_create.i_update_association_name(self, example[5]) - association_create.the_association_is_finished_in_less_than(self, example[4]) - association_create.i_check_association_name(self, example[5]) + association_create.the_association_is_finished_in_less_than( + self, example["model_wait"]) + association_create.i_update_association_name( + self, example["association_name"]) + association_create.the_association_is_finished_in_less_than( + self, example["model_wait"]) + association_create.i_check_association_name( + self, example["association_name"]) def test_scenario2(self): """ - Scenario: Successfully creating local association object: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create an association from a dataset - And I wait until the association is ready less than secs - And I create a local association - When I get the rules for <"item_list"> - Then the first rule is "" - - Examples: - | data | time_1 | time_2 | time_3 | item_list | JSON_rule | - | ../data/tiny_mushrooms.csv | 10 | 20 | 50 | ["Edible"] | {'p_value': 2.08358e-17, 'confidence': 1, 'lift': 1.12613, 'lhs': [14], 'leverage': 0.07885, 'lhs_cover': [0.704, 176], 'rhs_cover': [0.888, 222], 'rhs': [1], 'support': [0.704, 176], 'rule_id': u'000038'} - + Scenario: Successfully creating local association object: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an association from a dataset + And I wait until the association is ready less than secs + And I create a local association + When I get the rules for <"item_list"> + Then the first rule is "" """ - print self.test_scenario2.__doc__ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "item_list", "JSON_rule"] examples = [ - ['data/tiny_mushrooms.csv', '10', '20', '50', ["Edible"], {'p_value': 5.26971e-31, 'confidence': 1, 'rhs_cover': [0.488, 122], 'leverage': 0.24986, 'rhs': [19], 'rule_id': u'000002', 'lift': 2.04918, 'lhs': [0, 21, 16, 7], 'lhs_cover': [0.488, 122], 'support': [0.488, 122]}]] + ['data/tiny_mushrooms.csv', '10', '20', '50', ["Edible"], + {'p_value': 5.26971e-31, 'confidence': 1, + 'rhs_cover': [0.488, 122], 'leverage': 0.24986, + 'rhs': [19], 'rule_id': '000002', 'lift': 2.04918, + 'lhs': [0, 21, 16, 7], 'lhs_cover': [0.488, 122], + 'support': [0.488, 122]}]] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) association_create.i_create_an_association_from_dataset(self) - association_create.the_association_is_finished_in_less_than(self, example[3]) + association_create.the_association_is_finished_in_less_than( + self, example["model_wait"]) association_create.i_create_a_local_association(self) - association_create.i_get_rules_for_item_list(self, example[4]) - association_create.the_first_rule_is(self, example[5]) + association_create.i_get_rules_for_item_list( + self, example["item_list"]) + association_create.the_first_rule_is( + self, example["JSON_rule"]) def test_scenario3(self): """ - Scenario: Successfully creating local association object: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create an association with search strategy "" from a dataset - And I wait until the association is ready less than secs - And I create a local association - When I get the rules for <"item_list"> - Then the first rule is "" - - Examples: - | data | time_1 | time_2 | time_3 | item_list | JSON_rule | seach strategy - | ../data/tiny_mushrooms.csv | 10 | 20 | 50 | ["Edible"] | {'confidence': 1, 'leverage': 0.07885, 'lhs': [14], 'lhs_cover': [0.704, 176], 'lift': 1.12613, 'p_value': 2.08358e-17, 'rhs': [1], 'rhs_cover': [0.888, 222], 'rule_id': u'000038', 'support': [0.704, 176]}| lhs_cover - - + Scenario: Successfully creating local association object: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an association with search strategy "" from a dataset + And I wait until the association is ready less than secs + And I create a local association + When I get the rules for <"item_list"> + Then the first rule is "" """ - print self.test_scenario3.__doc__ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "item_list", "JSON_rule", "strategy"] examples = [ - ['data/tiny_mushrooms.csv', '10', '20', '50', ["Edible"], {'p_value': 2.08358e-17, 'confidence': 0.79279, 'rhs_cover': [0.704, 176], 'leverage': 0.07885, 'rhs': [11], 'rule_id': u'000007', 'lift': 1.12613, 'lhs': [0], 'lhs_cover': [0.888, 222], 'support': [0.704, 176]}, 'lhs_cover']] + ['data/tiny_mushrooms.csv', '10', '20', '50', ["Edible"], + {'p_value': 2.08358e-17, 'confidence': 0.79279, + 'rhs_cover': [0.704, 176], 'leverage': 0.07885, + 'rhs': [11], 'rule_id': '000007', 'lift': 1.12613, + 'lhs': [0], 'lhs_cover': [0.888, 222], + 'support': [0.704, 176]}, 'lhs_cover']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - association_create.i_create_an_association_with_strategy_from_dataset(self, example[6]) - association_create.the_association_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + association_create.i_create_an_association_with_strategy_from_dataset( + self, example["strategy"]) + association_create.the_association_is_finished_in_less_than( + self, example["model_wait"]) association_create.i_create_a_local_association(self) - association_create.i_get_rules_for_item_list(self, example[4]) - association_create.the_first_rule_is(self, example[5]) + association_create.i_get_rules_for_item_list( + self, example["item_list"]) + association_create.the_first_rule_is(self, example["JSON_rule"]) diff --git a/bigml/tests/test_29_script.py b/bigml/tests/test_29_script.py index 02036c01..eb5bc752 100644 --- a/bigml/tests/test_29_script.py +++ b/bigml/tests/test_29_script.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,43 +20,50 @@ """ Creating and updating scripts """ -from world import world, setup_module, teardown_module -import create_script_steps as script_create +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_script_steps as script_create -class TestScript(object): +class TestScript: + """Testint script methods""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating a whizzml script: - Given I create a whizzml script from a excerpt of code "" - And I wait until the script is ready less than secs - And I update the script with "", "" - And I wait until the script is ready less than secs - Then the script code is "" and the value of "" is "" - - Examples: - | source_code | time_1 | time_2 | param | param_value - | (+ 1 1) | 10 | 10 | name | my script + Scenario: Successfully creating a whizzml script: + Given I create a whizzml script from a excerpt of code "" + And I wait until the script is ready less than secs + And I update the script with "", "" + And I wait until the script is ready less than secs + Then the script code is "" and the value of "" is "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["source_code", "script_wait", "param", "param_value"] examples = [ - ['(+ 1 1)', '10', '10', 'name', 'my script']] + ['(+ 1 1)', '30', 'name', 'my script']] for example in examples: - print "\nTesting with:\n", example - script_create.i_create_a_script(self, example[0]) - script_create.the_script_is_finished(self, example[1]) - script_create.i_update_a_script(self, example[3], example[4]) - script_create.the_script_is_finished(self, example[2]) - script_create.the_script_code_and_attributes(self, example[0], example[3], example[4]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + script_create.i_create_a_script(self, example["source_code"]) + script_create.the_script_is_finished(self, example["script_wait"]) + script_create.i_update_a_script( + self, example["param"], example["param_value"]) + script_create.the_script_is_finished(self, example["script_wait"]) + script_create.the_script_code_and_attributes( + self, example["source_code"], + example["param"], + example["param_value"]) diff --git a/bigml/tests/test_30_execution.py b/bigml/tests/test_30_execution.py index eaa2d912..e1864d5c 100644 --- a/bigml/tests/test_30_execution.py +++ b/bigml/tests/test_30_execution.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,83 +20,139 @@ """ Creating and updating scripts """ -from world import world, setup_module, teardown_module -import create_script_steps as script_create -import create_execution_steps as execution_create +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_script_steps as script_create +from . import create_execution_steps as execution_create -class TestExecution(object): +class TestExecution: + """Testing local executions""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating a whizzml script execution: - Given I create a whizzml script from a excerpt of code "" - And I wait until the script is ready less than secs - And I create a whizzml script execution from an existing script - And I wait until the execution is ready less than secs - And I update the execution with "", "" - And I wait until the execution is ready less than secs - Then the script id is correct, the value of "" is "" and the result is "" - - Examples: - - | source_code | time_1 | time_2 | time_3 | param | param_value | result - | (+ 1 1) | 10 | 10 | 10 | name | my execution | 2 + Scenario: Successfully creating a whizzml script execution: + Given I create a whizzml script from a excerpt of code "" + And I wait until the script is ready less than secs + And I create a whizzml script execution from an existing script + And I wait until the execution is ready less than secs + And I update the execution with "", "" + And I wait until the execution is ready less than secs + And I create a local execution + Then the script id is correct, the value of "" is "" and the result is "" + And the local execution result is "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["source_code", "script_wait", "execution_wait", "param", + "param_value", "result"] examples = [ - ['(+ 1 1)', '10', '10', '10', 'name', 'my execution', 2]] + ['(+ 1 1)', '30', '30', 'name', 'my execution', 2]] for example in examples: - print "\nTesting with:\n", example - script_create.i_create_a_script(self, example[0]) - script_create.the_script_is_finished(self, example[1]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + script_create.i_create_a_script(self, example["source_code"]) + script_create.the_script_is_finished(self, example["script_wait"]) execution_create.i_create_an_execution(self) - execution_create.the_execution_is_finished(self, example[2]) - execution_create.i_update_an_execution(self, example[4], example[5]) - execution_create.the_execution_is_finished(self, example[3]) - execution_create.the_execution_and_attributes(self, example[4], example[5], example[6]) + execution_create.the_execution_is_finished( + self, example["execution_wait"]) + execution_create.i_update_an_execution( + self, example["param"], example["param_value"]) + execution_create.the_execution_is_finished( + self, example["execution_wait"]) + execution_create.create_local_execution(self) + execution_create.the_execution_and_attributes( + self, example["param"], example["param_value"], + example["result"]) + execution_create.the_local_execution_result_is( + self, example["result"]) def test_scenario2(self): """ - Scenario: Successfully creating a whizzml script execution from a list of scripts: - Given I create a whizzml script from a excerpt of code "" - And I wait until the script is ready less than secs - And I create a whizzml script from a excerpt of code "" - And I wait until the script is ready less than secs - And I create a whizzml script execution from the last two scripts - And I wait until the execution is ready less than secs - And I update the execution with "", "" - And I wait until the execution is ready less than secs - Then the script ids are correct, the value of "" is "" and the result is "" - - Examples: + Scenario: Successfully creating a whizzml script execution from a list of scripts: + Given I create a whizzml script from a excerpt of code "" + And I wait until the script is ready less than secs + And I create a whizzml script from a excerpt of code "" + And I wait until the script is ready less than secs + And I create a whizzml script execution from the last two scripts + And I wait until the execution is ready less than secs + And I update the execution with "", "" + And I wait until the execution is ready less than secs + Then the script ids are correct, the value of "" is "" and the result is "" + """ + show_doc(self.test_scenario2) + headers = ["source_code", "script_wait", "execution_wait", "param", + "param_value", "result"] + examples = [ + ['(+ 1 1)', '100', '100', 'name', 'my execution', [2, 2]]] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + script_create.i_create_a_script(self, example["source_code"]) + script_create.the_script_is_finished(self, example["script_wait"]) + script_create.i_create_a_script(self, example["source_code"]) + script_create.the_script_is_finished(self, example["script_wait"]) + execution_create.i_create_an_execution_from_list( + self, number_of_scripts=2) + execution_create.the_execution_is_finished( + self, example["execution_wait"]) + execution_create.i_update_an_execution( + self, example["param"], example["param_value"]) + execution_create.the_execution_is_finished( + self, example["execution_wait"]) + execution_create.the_execution_ids_and_attributes( + self, 2, example["param"], example["param_value"], + example["result"]) - | source_code | time_1 | time_2 | time_3 | param | param_value | result - | (+ 1 1) | 10 | 10 | 10 | name | my execution | [2, 2] + def test_scenario3(self): """ - print self.test_scenario2.__doc__ + Scenario: Successfully creating a whizzml script execution from a local or remote file: + Given I create a whizzml script from a excerpt of code "" + And I wait until the script is ready less than secs + And I create a whizzml script from a excerpt of code "" + And I wait until the script is ready less than secs + And I create a whizzml script execution from the last two scripts + And I wait until the execution is ready less than secs + And I update the execution with "", "" + And I wait until the execution is ready less than secs + Then the script ids are correct, the value of "" is "" and the result is "" + """ + show_doc(self.test_scenario2) + headers = ["source_code", "script_wait", "execution_wait", "param", + "param_value", "result"] examples = [ - ['(+ 1 1)', '10', '10', '10', 'name', 'my execution', [2, 2]]] + ['data/one_plus_one.whizzml', '50', '50', 'name', + 'my execution', 2], + ['https://gist.github.com/mmerce/49e0a69cab117b6a11fb490140326020', + '30', '30', 'name', 'my execution', 2]] for example in examples: - print "\nTesting with:\n", example - script_create.i_create_a_script(self, example[0]) - script_create.the_script_is_finished(self, example[1]) - script_create.i_create_a_script(self, example[0]) - script_create.the_script_is_finished(self, example[1]) - execution_create.i_create_an_execution_from_list(self, 2) - execution_create.the_execution_is_finished(self, example[2]) - execution_create.i_update_an_execution(self, example[4], example[5]) - execution_create.the_execution_is_finished(self, example[3]) - execution_create.the_execution_ids_and_attributes(self, 2, example[4], example[5], example[6]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + script_create.i_create_a_script_from_file_or_url( + self, example["source_code"]) + script_create.the_script_is_finished( + self, example["script_wait"]) + execution_create.i_create_an_execution(self) + execution_create.the_execution_is_finished( + self, example["execution_wait"]) + execution_create.i_update_an_execution( + self, example["param"], example["param_value"]) + execution_create.the_execution_is_finished( + self, example["execution_wait"]) + execution_create.the_execution_and_attributes( + self, example["param"], example["param_value"], + example["result"]) diff --git a/bigml/tests/test_31_library.py b/bigml/tests/test_31_library.py index b489058b..9de406c8 100644 --- a/bigml/tests/test_31_library.py +++ b/bigml/tests/test_31_library.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,43 +20,51 @@ """ Creating and updating scripts """ -from world import world, setup_module, teardown_module -import create_library_steps as library_create +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_library_steps as library_create -class TestLibrary(object): +class TestLibrary: + """Testing Library methods""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating a whizzml library: - Given I create a whizzml library from a excerpt of code "" - And I wait until the library is ready less than secs - And I update the library with "", "" - And I wait until the library is ready less than secs - Then the library code is "" and the value of "" is "" - - Examples: - | source_code | time_1 | time_2 | param | param_value - | (define (mu x) (+ x 1)) | 10 | 10 | name | my library + Scenario: Successfully creating a whizzml library: + Given I create a whizzml library from a excerpt of code "" + And I wait until the library is ready less than secs + And I update the library with "", "" + And I wait until the library is ready less than secs + Then the library code is "" and the value of "" is "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["source_code", "library_wait", "param", "param_value"] examples = [ - ['(define (mu x) (+ x 1))', '10', '10', 'name', 'my library']] + ['(define (mu x) (+ x 1))', '10', 'name', 'my library']] for example in examples: - print "\nTesting with:\n", example - library_create.i_create_a_library(self, example[0]) - library_create.the_library_is_finished(self, example[1]) - library_create.i_update_a_library(self, example[3], example[4]) - library_create.the_library_is_finished(self, example[2]) - library_create.the_library_code_and_attributes(self, example[0], example[3], example[4]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + library_create.i_create_a_library(self, example["source_code"]) + library_create.the_library_is_finished( + self, example["library_wait"]) + library_create.i_update_a_library( + self, example["param"], example["param_value"]) + library_create.the_library_is_finished( + self, example["library_wait"]) + library_create.the_library_code_and_attributes( + self, example["source_code"], example["param"], + example["param_value"]) diff --git a/bigml/tests/test_32_topic_model_prediction.py b/bigml/tests/test_32_topic_model_prediction.py index 446d5709..fd26e407 100644 --- a/bigml/tests/test_32_topic_model_prediction.py +++ b/bigml/tests/test_32_topic_model_prediction.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2016-2019 BigML +# Copyright 2016-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,11 +20,14 @@ """ Creating a local Topic distribution from Topic Model """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_lda_steps as topic_create -import compute_lda_prediction_steps as lda_predict +import sys + +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_lda_steps as topic_create +from . import compute_lda_prediction_steps as lda_predict # This model is from the bigmlcom/streaming-lda; the associated test is @@ -42,7 +46,7 @@ [0, 1, 2, 0], [1, 2, 0, 0], [0, 0, 2, 0]], - "termset": [u"cycling", u"playing", u"shouldn't", u"uńąnimous court"], + "termset": ["cycling", "playing", "shouldn't", "uńąnimous court"], "options": {}, "topics": [{"name": "Topic 1", "id": "000000", @@ -76,39 +80,40 @@ } -class TestTopicModel(object): +class TestTopicModel: + """Test Topic Model Predictions""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario 1: Successfully creating a local Topic Distribution - Given I have a block of text and an LDA model - And I use the model to predict the topic distribution - Then the value of the distribution matches the expected distribution - - Examples: - | model | text | expected_distribution | - | {...} | "hello, world!" | [0.5, 0.3, 0.2] | + Scenario 1: Successfully creating a local Topic Distribution + Given I have a block of text and an LDA model + And I use the model to predict the topic distribution + Then the value of the distribution matches the expected distribution """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["model", "text", "expected_distribution"] examples = [ # This example is a replication of a test in bigmlcom/streaming-lda [ DUMMY_MODEL, - {"TEST TEXT": u"uńąnimous court 'UŃĄNIMOUS COURT' " - u"`play``the plays PLAYing SHOULDN'T CYCLE " - u"cycling shouldn't uńąnimous or court's"}, + {"TEST TEXT": "uńąnimous court 'UŃĄNIMOUS COURT' " + "`play``the plays PLAYing SHOULDN'T CYCLE " + "cycling shouldn't uńąnimous or court's"}, [ {"name": 'Topic 1', "probability": 0.1647366}, {"name": 'Topic 2', "probability": 0.1885310}, @@ -119,38 +124,45 @@ def test_scenario1(self): ] for ex in examples: - print "\nTesting with:\n", ex[1] - lda_predict.i_make_a_prediction(self, ex[0], ex[1], ex[2]) + ex = dict(zip(headers, ex)) + show_method(self, self.bigml["method"], ex) + lda_predict.i_make_a_prediction( + self, ex["model"], ex["text"], ex["expected_distribution"]) def test_scenario2(self): """ - Scenario 2: Successfully creating Topic Model from a dataset: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create topic model from a dataset - And I wait until the topic model is ready less than secs - And I update the topic model name to "" - When I wait until the topic_model is ready less than secs - Then the topic model name is "" - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | topic_model_name | params - | ../data/spam.csv | 100 | 100 | 200 | 500 | my new topic model name | '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}' + Scenario 2: Successfully creating Topic Model from a dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create topic model from a dataset + And I wait until the topic model is ready less than secs + And I update the topic model name to "" + When I wait until the topic_model is ready less than secs + Then the topic model name is "" """ - print self.test_scenario2.__doc__ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "topic_model_name", "source_conf"] examples = [ - ['data/spam.csv', '100', '100', '10000', '500', 'my new topic model name', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}']] + ['data/spam.csv', '100', '100', '100', 'my new topic model name', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - source_create.i_update_source_with(self, example[6]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) topic_create.i_create_a_topic_model(self) - topic_create.the_topic_model_is_finished_in_less_than(self, example[3]) - topic_create.i_update_topic_model_name(self, example[5]) - topic_create.the_topic_model_is_finished_in_less_than(self, example[4]) - topic_create.i_check_topic_model_name(self, example[5]) + topic_create.the_topic_model_is_finished_in_less_than( + self, example["model_wait"]) + topic_create.i_update_topic_model_name( + self, example["topic_model_name"]) + topic_create.the_topic_model_is_finished_in_less_than( + self, example["model_wait"]) + topic_create.i_check_topic_model_name( + self, example["topic_model_name"]) diff --git a/bigml/tests/test_33_compare_predictions.py b/bigml/tests/test_33_compare_predictions.py index a79ba412..cf322c36 100644 --- a/bigml/tests/test_33_compare_predictions.py +++ b/bigml/tests/test_33_compare_predictions.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,55 +20,60 @@ """ Comparing remote and local predictions """ -from world import world, setup_module, teardown_module, show_doc -from bigml.util import PY3 -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_model_steps as model_create -import create_ensemble_steps as ensemble_create -import create_association_steps as association_create -import create_cluster_steps as cluster_create -import create_anomaly_steps as anomaly_create -import create_prediction_steps as prediction_create -import compare_predictions_steps as prediction_compare -import create_lda_steps as topic_create +import json +from .world import world, setup_module, teardown_module, show_doc, \ + show_method, res_filename +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_ensemble_steps as ensemble_create +from . import create_linear_steps as linear_create +from . import create_association_steps as association_create +from . import create_cluster_steps as cluster_create +from . import create_anomaly_steps as anomaly_create +from . import create_prediction_steps as prediction_create +from . import compare_predictions_steps as prediction_compare +from . import create_lda_steps as topic_create -class TestComparePrediction(object): - def setup(self): +class TestComparePrediction: + """Test local and remote predictions""" + + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully comparing centroids with or without text options: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I update the source with params "" - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a cluster - And I wait until the cluster is ready less than secs - And I create a local cluster - When I create a centroid for "" - Then the centroid is "" with distance "" - And I create a local centroid for "" - Then the local centroid is "" with distance "" - - Examples headers: - | data | time_1 | time_2 | time_3 | options | data_input | centroid | distance | - + Scenario: Successfully comparing centroids with or without text options: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster + And I wait until the cluster is ready less than secs + And I create a local cluster + When I create a centroid for "" + Then the centroid is "" with distance "" + And I create a local centroid for "" + Then the local centroid is "" with distance "" """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf", "input_data", "centroid", "distance"] examples = [ ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "Mobile call"}', 'Cluster 0', '0.25'], ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false}}}}', '{"Type": "ham", "Message": "A normal message"}', 'Cluster 0', '0.5'], @@ -80,498 +86,821 @@ def test_scenario1(self): ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Type": "", "Message": ""}', 'Cluster 6', '0.5'], ['data/diabetes.csv', '20', '20', '30', '{"fields": {}}', '{"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": "true"}', 'Cluster 3', '0.5033378686559257'], ['data/diabetes.csv', '20', '20', '30', '{"fields": {}}', '{"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": true}', 'Cluster 3', '0.5033378686559257'], - ['data/iris_sp_chars.csv', '20', '20', '30', '{"fields": {}}', '{"pétal.length":1, "pétal&width\u0000": 2, "sépal.length":1, "sépal&width": 2, "spécies": "Iris-setosa"}', 'Cluster 7', '0.8752380218327035'], + ['data/iris_sp_chars.csv', '20', '20', '30', '{"fields": {}}', '{"pétal.length":1, "pétal&width\\u0000": 2, "sépal.length":1, "sépal&width": 2, "spécies": "Iris-setosa"}', 'Cluster 7', '0.8752380218327035'], ['data/movies.csv', '20', '20', '30', '{"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}}', '{"gender": "Female", "age_range": "18-24", "genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student", "zipcode": 59583, "rating": 3}', 'Cluster 3', '0.62852']] - show_doc(self.test_scenario1, examples) + show_doc(self.test_scenario1) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - source_create.i_update_source_with(self, example[4]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) cluster_create.i_create_a_cluster(self) - cluster_create.the_cluster_is_finished_in_less_than(self, example[3]) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["model_wait"]) prediction_compare.i_create_a_local_cluster(self) - prediction_create.i_create_a_centroid(self, example[5]) - prediction_create.the_centroid_is_with_distance(self, example[6], example[7]) - prediction_compare.i_create_a_local_centroid(self, example[5]) - prediction_compare.the_local_centroid_is(self, example[6], example[7]) + prediction_create.i_create_a_centroid( + self, example["input_data"]) + prediction_create.the_centroid_is_with_distance( + self, example["centroid"], example["distance"]) + prediction_compare.i_create_a_local_centroid( + self, example["input_data"]) + prediction_compare.the_local_centroid_is( + self, example["centroid"], example["distance"]) def test_scenario2(self): """ - Scenario: Successfully comparing centroids with configuration options: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a cluster with options "" - And I wait until the cluster is ready less than secs - And I create a local cluster - When I create a centroid for "" - Then the centroid is "" with distance "" - And I create a local centroid for "" - Then the local centroid is "" with distance "" - - Examples: - | data | time_1 | time_2 | time_3 | options | data_input | centroid | distance | full_data_input + Scenario: Successfully comparing centroids with configuration options: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster with options "" + And I wait until the cluster is ready less than secs + And I create a local cluster + When I create a centroid for "" + Then the centroid is "" with distance "" + And I create a local centroid for "" + Then the local centroid is "" with distance "" + And I create a local bigml model prediction for "" + Then the local centroid is "" with distance "" """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "model_conf", "input_data_l", "centroid", "distance", + "input_data"] examples = [ - ['data/iris.csv', '30', '30', '30', '{"summary_fields": ["sepal width"]}', '{"petal length": 1, "petal width": 1, "sepal length": 1, "species": "Iris-setosa"}', 'Cluster 2', '1.16436', '{"petal length": 1, "petal width": 1, "sepal length": 1, "species": "Iris-setosa"}'], - ['data/iris.csv', '20', '20', '30', '{"default_numeric_value": "zero"}', '{"petal length": 1}', 'Cluster 4', '1.41215', '{"petal length": 1, "petal width": 0, "sepal length": 0, "sepal width": 0, "species": ""}']] - show_doc(self.test_scenario2, examples) + ['data/iris.csv', '30', '30', '30', + '{"summary_fields": ["sepal width"]}', + '{"petal length": 1, "petal width": 1, "sepal length": 1, ' + '"species": "Iris-setosa"}', 'Cluster 2', '1.16436', + '{"petal length": 1, "petal width": 1, "sepal length": 1, ' + '"species": "Iris-setosa"}'], + ['data/iris.csv', '20', '20', '30', + '{"default_numeric_value": "zero"}', + '{"petal length": 1}', 'Cluster 4', '1.41215', + '{"petal length": 1, "petal width": 0, "sepal length": 0, ' + '"sepal width": 0, "species": ""}']] + show_doc(self.test_scenario2) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - cluster_create.i_create_a_cluster_with_options(self, example[4]) - cluster_create.the_cluster_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + cluster_create.i_create_a_cluster_with_options( + self, example["model_conf"]) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["model_wait"]) prediction_compare.i_create_a_local_cluster(self) - prediction_create.i_create_a_centroid(self, example[8]) - prediction_create.the_centroid_is_with_distance(self, example[6], example[7]) - prediction_compare.i_create_a_local_centroid(self, example[5]) - prediction_compare.the_local_centroid_is(self, example[6], example[7]) + prediction_create.i_create_a_centroid( + self, example["input_data"]) + prediction_create.the_centroid_is_with_distance( + self, example["centroid"], example["distance"]) + prediction_compare.i_create_a_local_centroid( + self, example["input_data_l"]) + prediction_compare.the_local_centroid_is( + self, example["centroid"], example["distance"]) + prediction_compare.i_create_a_local_bigml_model(self, + model_type="cluster") + prediction_compare.i_create_a_local_bigml_model_prediction( + self, example["input_data_l"], prediction_type="centroid") + prediction_compare.the_local_centroid_is( + self, example["centroid"], example["distance"]) def test_scenario3(self): """ - Scenario: Successfully comparing scores from anomaly detectors: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create an anomaly detector - And I wait until the anomaly detector is ready less than secs - And I create a local anomaly detector - When I create an anomaly score for "" - Then the anomaly score is "" - And I create a local anomaly score for "" - Then the local anomaly score is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | score | - + Scenario: Successfully comparing scores from anomaly detectors: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an anomaly detector with params "" + And I wait until the anomaly detector is ready less than secs + And I create a local anomaly detector + When I create an anomaly score for "" + Then the anomaly score is "" + And I create a local anomaly score for "" + Then the local anomaly score is "" + And I create a local bigml model prediction for "" + Then the local anomaly score is "" """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "score", "model_conf"] examples = [ - ['data/tiny_kdd.csv', '30', '30', '30', '{"000020": 255.0, "000004": 183.0, "000016": 4.0, "000024": 0.04, "000025": 0.01, "000026": 0.0, "000019": 0.25, "000017": 4.0, "000018": 0.25, "00001e": 0.0, "000005": 8654.0, "000009": "0", "000023": 0.01, "00001f": 123.0}', '0.69802']] - show_doc(self.test_scenario3, examples) + ['data/tiny_kdd.csv', '30', '30', '80', + '{"000020": 255.0, "000004": 183.0, "000016": 4.0, ' + '"000024": 0.04, "000025": 0.01, "000026": 0.0, "000019": 0.25, ' + '"000017": 4.0, "000018": 0.25, "00001e": 0.0, "000005": 8654.0, ' + '"000009": "0", "000023": 0.01, "00001f": 123.0}', '0.69802', + '{}'], + ['data/repeat_iris.csv', '30', '30', '80', + '{"sepal width":3.5, "petal width": 0.2, "sepal length": 5.1, ' + '"petal length": 1.4, "species": "Iris-setosa"}', '0.50', + '{"normalize_repeats": false}'], + ['data/repeat_iris.csv', '30', '30', '80', + '{"sepal width":3.5, "petal width": 0.2, "sepal length": 5.1, ' + '"petal length": 1.4, "species": "Iris-setosa"}', '0.36692', + '{"normalize_repeats": true}'], + ['data/repeat_iris.csv', '30', '30', '80', + '{"sepal width":3.2, "petal width": 1.5, "sepal length": 6.4, ' + '"petal length": 4.5, "species": "Iris-versicolor"}', '0.76131', + '{"normalize_repeats": true}']] + show_doc(self.test_scenario3) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - anomaly_create.i_create_an_anomaly(self) - anomaly_create.the_anomaly_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + anomaly_create.i_create_an_anomaly_with_params( + self, example["model_conf"]) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"]) prediction_compare.i_create_a_local_anomaly(self) - prediction_create.i_create_an_anomaly_score(self, example[4]) - prediction_create.the_anomaly_score_is(self, example[5]) - prediction_compare.i_create_a_local_anomaly_score(self, example[4]) - prediction_compare.the_local_anomaly_score_is(self, example[5]) - + prediction_create.i_create_an_anomaly_score( + self, example["input_data"]) + prediction_create.the_anomaly_score_is(self, example["score"]) + prediction_compare.i_create_a_local_anomaly_score( + self, example["input_data"]) + prediction_compare.the_local_anomaly_score_is( + self, example["score"]) + prediction_compare.i_create_a_local_bigml_model(self, + model_type="anomaly") + prediction_compare.i_create_a_local_bigml_model_prediction( + self, example["input_data"], prediction_type="anomaly_score") + prediction_compare.the_local_bigml_prediction_is( + self, float(example["score"]), prediction_type="anomaly_score", + key="score", precision=4) def test_scenario4(self): """ - Scenario: Successfully comparing topic distributions: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I update the source with params "" - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a topic model - And I wait until the topic model is ready less than secs - And I create a local topic model - When I create a topic distribution for "" - Then the topic distribution is "" - And I create a local topic distribution for "" - Then the local topic distribution is "" - - Examples headers: - | data | time_1 | time_2 | time_3 | options | data_input | topic distribution | - + Scenario: Successfully comparing topic distributions: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a topic model + And I wait until the topic model is ready less than secs + And I create a local topic model + When I create a topic distribution for "" + Then the topic distribution is "" + And I create a local topic distribution for "" + Then the local topic distribution is "" """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf", "input_data", "topic_distribution"] examples = [ - ['data/spam.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "Mobile call"}', '[0.51133, 0.00388, 0.00574, 0.00388, 0.00388, 0.00388, 0.00388, 0.00388, 0.00388, 0.00388, 0.00388, 0.44801]'], - ['data/spam.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat..."}', '[0.39188, 0.00643, 0.00264, 0.00643, 0.08112, 0.00264, 0.37352, 0.0115, 0.00707, 0.00327, 0.00264, 0.11086]']] - show_doc(self.test_scenario4, examples) + ['data/spam.csv', '30', '30', '80', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": true, "stem_words": true, ' + '"use_stopwords": false, "language": "en"}}}}', + '{"Type": "ham", "Message": "Mobile call"}', + '[0.51133, 0.00388, 0.00574, 0.00388, 0.00388, 0.00388, ' + '0.00388, 0.00388, 0.00388, 0.00388, 0.00388, 0.44801]'], + ['data/spam.csv', '30', '30', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": true, "stem_words": true, ' + '"use_stopwords": false, "language": "en"}}}}', + '{"Type": "ham", "Message": "Go until jurong point, crazy.. ' + 'Available only in bugis n great world la e buffet... Cine ' + 'there got amore wat..."}', + '[0.39188, 0.00643, 0.00264, 0.00643, 0.08112, 0.00264, ' + '0.37352, 0.0115, 0.00707, 0.00327, 0.00264, 0.11086]']] + show_doc(self.test_scenario4) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - source_create.i_update_source_with(self, example[4]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) topic_create.i_create_a_topic_model(self) - topic_create.the_topic_model_is_finished_in_less_than(self, example[3]) + topic_create.the_topic_model_is_finished_in_less_than( + self, example["model_wait"]) prediction_compare.i_create_a_local_topic_model(self) - topic_create.i_create_a_topic_distribution(self, example[5]) - prediction_compare.the_topic_distribution_is(self, example[6]) - topic_create.i_create_a_local_topic_distribution(self, example[5]) - prediction_compare.the_local_topic_distribution_is(self, example[6]) + topic_create.i_create_a_topic_distribution( + self, example["input_data"]) + prediction_compare.the_topic_distribution_is( + self, example["topic_distribution"]) + topic_create.i_create_a_local_topic_distribution( + self, example["input_data"]) + prediction_compare.the_local_topic_distribution_is( + self, example["topic_distribution"]) + prediction_compare.i_create_a_local_bigml_model(self, + model_type="topic_model") + prediction_compare.i_create_a_local_bigml_model_prediction( + self, example["input_data"], + prediction_type="topic_distribution") + ref_distribution = dict( + zip([t["name"] for t in self.bigml["local_model"].topics], + json.loads(example["topic_distribution"]))) + prediction_compare.the_local_bigml_prediction_is( + self, ref_distribution, prediction_type="topic_distribution", + precision=4) def test_scenario5(self): """ - Scenario: Successfully comparing association sets: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I update the source with params "" - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the association is ready less than secs - And I create a local association - When I create an association set for "" - Then the association set is like the contents of "" - And I create a local association set for "" - Then the local association set is like the contents of "" - + Scenario: Successfully comparing association sets: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the association is ready less than secs + And I create a local association + When I create an association set for "" + Then the association set is like the contents of "" + And I create a local association set for "" + Then the local association set is like the contents of "" + And I create a local bigml model prediction for "" + Then the local bigml model prediction is "" """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf", "association_set_file", "input_data"] examples = [ - ['data/groceries.csv', '20', '20', '30', '{"fields": {"00000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', 'data/associations/association_set.json', '{"field1": "cat food"}']] - show_doc(self.test_scenario5, examples) - + ['data/groceries.csv', '20', '20', '50', '{"fields": {"00000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', 'data/associations/association_set.json', '{"field1": "cat food"}']] + show_doc(self.test_scenario5) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - source_create.i_update_source_with(self, example[4]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) association_create.i_create_an_association_from_dataset(self) - association_create.the_association_is_finished_in_less_than(self, example[3]) + association_create.the_association_is_finished_in_less_than( + self, example["model_wait"]) prediction_compare.i_create_a_local_association(self) - prediction_create.i_create_an_association_set(self, example[6]) - prediction_compare.the_association_set_is_like_file(self, example[5]) - prediction_compare.i_create_a_local_association_set(self, example[6]) - prediction_compare.the_local_association_set_is_like_file(self, example[5]) + prediction_create.i_create_an_association_set( + self, example["input_data"]) + prediction_compare.the_association_set_is_like_file( + self, example["association_set_file"]) + prediction_compare.i_create_a_local_association_set( + self, example["input_data"]) + prediction_compare.the_local_association_set_is_like_file( + self, example["association_set_file"]) + prediction_compare.i_create_a_local_bigml_model(self, + model_type="association") + prediction_compare.i_create_a_local_bigml_model_prediction( + self, example["input_data"], prediction_type="rules") + with open(res_filename(example["association_set_file"])) as handler: + rules = {"rules": json.load(handler)} + prediction_compare.the_local_bigml_prediction_is( + self, rules, prediction_type="rules", precision=4) def test_scenario6(self): """ - Scenario: Successfully comparing predictions for ensembles: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create an ensemble with "" - And I wait until the ensemble is ready less than secs - And I create a local ensemble - When I create a prediction for "" - Then the prediction for "" is "" - And I create a local prediction for "" - Then the local prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | objective | prediction | params - - ['data/iris.csv', '10', '10', '120', '{"petal width": 0.5}', '000004', 'Iris-versicolor', '{"number_of_models": 5}'], - ['data/iris.csv', '10', '10', '120', '{"petal length": 6, "petal width": 2}', '000004', 'Iris-virginica', '{"number_of_models": 5}'], - ['data/iris.csv', '10', '10', '120', '{"petal length": 4, "petal width": 1.5}', '000004', 'Iris-versicolor', '{"number_of_models": 5}'], - ['data/grades.csv', '10', '10', '120', '{"Midterm": 20}', '000005', 46.261364, '{"number_of_models": 5}'], - ['data/iris.csv', '10', '10', '120', '{"petal width": 0.5}', '000004', 'Iris-setosa', '{"boosting": {"iterations": 5}, "number_of_models": 5}'], - ['data/iris.csv', '10', '10', '120', '{"petal length": 6, "petal width": 2}', '000004', 'Iris-virginica', '{"boosting": {"iterations": 5}, "number_of_models": 5}'], - ['data/iris.csv', '10', '10', '120', '{"petal length": 4, "petal width": 1.5}', '000004', 'Iris-versicolor', '{"boosting": {"iterations": 5}, "number_of_models": 5}'], - - + Scenario: Successfully comparing predictions for ensembles: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an ensemble with "" + And I wait until the ensemble is ready less than secs + And I create a local ensemble + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "model_conf"] examples = [ - ['data/iris_unbalanced.csv', '30', '30', '120', '{"petal width": 4}', '000004', 'Iris-virginica', '{"boosting": {"iterations": 5}, "number_of_models": 5}'], - ['data/grades.csv', '30', '30', '120', '{"Midterm": 20}', '000005', 61.61036, '{"boosting": {"iterations": 5}, "number_of_models": 5}']] - show_doc(self.test_scenario6, examples) - + ['data/iris_unbalanced.csv', '30', '30', '120', + '{"petal width": 4}', '000004', 'Iris-virginica', + '{"boosting": {"iterations": 5}, "number_of_models": 5}'], + ['data/grades.csv', '30', '30', '120', '{"Midterm": 20}', + '000005', 61.61036, + '{"boosting": {"iterations": 5}, "number_of_models": 5}']] + show_doc(self.test_scenario6) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - ensemble_create.i_create_an_ensemble_with_params(self, example[7]) - ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_create.i_create_an_ensemble_with_params( + self, example["model_conf"]) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"]) ensemble_create.create_local_ensemble(self) - prediction_create.i_create_an_ensemble_prediction(self, example[4]) - prediction_create.the_prediction_is(self, example[5], example[6]) - prediction_compare.i_create_a_local_ensemble_prediction(self, example[4]) - prediction_compare.the_local_prediction_is(self, example[6]) - + prediction_create.i_create_an_ensemble_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_ensemble_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) def test_scenario7(self): """ - Scenario: Successfully comparing predictions for ensembles with proportional missing strategy: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create an esemble with "" - And I wait until the ensemble is ready less than secs - And I create a local ensemble - When I create a proportional missing strategy prediction for "" - Then the prediction for "" is "" - And the confidence for the prediction is "" - And I create a proportional missing strategy local prediction for "" - Then the local prediction is "" - And the local prediction's confidence is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | objective | prediction | confidence | params - ['data/iris.csv', '10', '10', '50', '{}', '000004', 'Iris-virginica', '0.33784', '{"boosting": {"iterations": 5}}'], + Scenario: Successfully comparing predictions for ensembles with proportional missing strategy: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an esemble with "" + And I wait until the ensemble is ready less than secs + And I create a local ensemble + When I create a proportional missing strategy prediction for "" with <"operating"> + Then the prediction for "" is "" + And the confidence for the prediction is "" + And I create a proportional missing strategy local prediction for "" with <"operating"> + Then the local prediction is "" + And the local prediction's confidence is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "confidence", + "model_conf", "operating"] + examples = [ + ['data/iris.csv', '30', '30', '80', '{}', '000004', 'Iris-virginica', '0.33784', '{"boosting": {"iterations": 5}}', {}], + ['data/iris.csv', '30', '30', '80', '{}', '000004', 'Iris-versicolor', '0.27261', '{"number_of_models": 5"}', {"operating_kind": "confidence"}], + ['data/grades.csv', '30', '30', '50', '{}', '000005', '70.505792', '30.7161', '{"number_of_models": 5}', {}]] + show_doc(self.test_scenario7) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_create.i_create_an_ensemble_with_params( + self, example["model_conf"]) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"]) + ensemble_create.create_local_ensemble(self) + prediction_create.i_create_an_ensemble_proportional_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_create.the_confidence_is(self, example["confidence"]) + prediction_create.create_local_ensemble_proportional_prediction_with_confidence( + self, example["input_data"], example["operating"]) + prediction_compare.the_local_ensemble_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) + + def test_scenario7b(self): + """ + Scenario: Successfully comparing predictions for ensembles with proportional missing strategy: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an esemble with "" + And I wait until the ensemble is ready less than secs + And I create a local ensemble + When I create a proportional missing strategy prediction for "" with <"operating"> + Then the prediction for "" is "" + And the confidence for the prediction is "" + And I create a proportional missing strategy local prediction for "" with <"operating"> + Then the local prediction is "" + And the local prediction's confidence is "" """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "confidence", + "model_conf", "operating"] examples = [ - ['data/iris.csv', '30', '30', '50', '{}', '000004', 'Iris-virginica', '0.33784', '{"boosting": {"iterations": 5}}', {}], - ['data/iris.csv', '30', '30', '50', '{}', '000004', 'Iris-versicolor', '0.27261', '{"number_of_models": 5"}', {"operating_kind": "confidence"}], - ['data/grades.csv', '30', '30', '50', '{}', '000005', '70.505792', '30.7161', '{"number_of_models": 5}', {}], - ['data/grades.csv', '30', '30', '50', '{"Midterm": 20}', '000005', '54.82214', '25.89672', '{"number_of_models": 5}', {"operating_kind": "confidence"}], - ['data/grades.csv', '30', '30', '50', '{"Midterm": 20}', '000005', '45.4573', '29.58403', '{"number_of_models": 5}', {}], - ['data/grades.csv', '30', '30', '50', '{"Midterm": 20, "Tutorial": 90, "TakeHome": 100}', '000005', '42.814', '31.51804', '{"number_of_models": 5}', {}]] - show_doc(self.test_scenario7, examples) - + ['data/grades.csv', '30', '30', '80', + '{"Midterm": 20}', '000005', '54.82214', '25.89672', + '{"number_of_models": 5}', {"operating_kind": "confidence"}], + ['data/grades.csv', '30', '30', '80', '{"Midterm": 20}', + '000005', '45.4573', '29.58403', '{"number_of_models": 5}', {}], + ['data/grades.csv', '30', '30', '80', + '{"Midterm": 20, "Tutorial": 90, "TakeHome": 100}', '000005', + '42.814', '31.51804', '{"number_of_models": 5}', {}]] + show_doc(self.test_scenario7b) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - ensemble_create.i_create_an_ensemble_with_params(self, example[8]) - ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + ensemble_create.i_create_an_ensemble_with_params( + self, example["model_conf"]) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"]) ensemble_create.create_local_ensemble(self) - prediction_create.i_create_an_ensemble_proportional_prediction(self, example[4], example[9]) - prediction_create.the_prediction_is(self, example[5], example[6]) - prediction_create.the_confidence_is(self, example[7]) - prediction_create.create_local_ensemble_proportional_prediction_with_confidence(self, example[4], example[9]) - prediction_compare.the_local_ensemble_prediction_is(self, example[6]) - prediction_compare.the_local_prediction_confidence_is(self, example[7]) + prediction_create.i_create_an_ensemble_proportional_prediction( + self, example["input_data"], example["operating"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_create.the_confidence_is(self, example["confidence"]) + prediction_create.create_local_ensemble_proportional_prediction_with_confidence( + self, example["input_data"], example["operating"]) + prediction_compare.the_local_ensemble_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) def test_scenario8(self): """ - Scenario: Successfully comparing predictions for ensembles: - Given I create a local ensemble predictor from "" - And I create a local prediction for "" - Then the local prediction is "" - - Examples: - | directory | data_input | prediction - + Scenario: Successfully comparing predictions for ensembles: + Given I create a local ensemble predictor from "" + And I create a local prediction for "" + Then the local prediction is "" """ + headers = ["directory", "input_data", "prediction"] examples = [ ['bigml/tests/my_ensemble', '{"petal width": 4}', 68.1258030739]] - show_doc(self.test_scenario6, examples) - + show_doc(self.test_scenario8) for example in examples: - print "\nTesting with:\n", example - ensemble_create.create_local_ensemble_predictor(self, example[0]) - prediction_compare.i_create_a_local_ensemble_prediction(self, example[1]) - prediction_compare.the_local_ensemble_prediction_is(self, example[2]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + ensemble_create.create_local_ensemble_predictor( + self, example["directory"]) + prediction_compare.i_create_a_local_ensemble_prediction( + self, example["input_data"]) + prediction_compare.the_local_ensemble_prediction_is( + self, example["prediction"]) def test_scenario9(self): """ - Scenario: Successfully comparing predictions for ensembles with proportional missing strategy in a supervised model: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create an esemble with "" - And I wait until the ensemble is ready less than secs - And I create a local ensemble - When I create a proportional missing strategy prediction for "" - Then the prediction for "" is "" - And the confidence for the prediction is "" - And I create a proportional missing strategy local prediction for "" - Then the local prediction is "" - And the local prediction's confidence is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | objective | prediction | confidence | params - ['data/iris.csv', '10', '10', '50', '{}', '000004', 'Iris-virginica', '0.33784', '{"boosting": {"iterations": 5}}'], - - + Scenario: Successfully comparing predictions for ensembles with proportional missing strategy in a supervised model: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an esemble with "" + And I wait until the ensemble is ready less than secs + And I create a local ensemble + When I create a proportional missing strategy prediction for "" with <"operating"> + Then the prediction for "" is "" + And the confidence for the prediction is "" + And I create a proportional missing strategy local prediction for "" with <"operating"> + Then the local prediction is "" + And the local prediction's confidence is "" + And I create a local bigml model + Then the local prediction is "" + And the local prediction's confidence is "" """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "confidence", + "model_conf", "operating"] examples = [ - ['data/iris.csv', '10', '10', '50', '{}', '000004', 'Iris-virginica', '0.33784', '{"boosting": {"iterations": 5}}', {}], - ['data/iris.csv', '10', '10', '50', '{}', '000004', 'Iris-versicolor', '0.27261', '{"number_of_models": 5"}', {"operating_kind": "confidence"}]] - show_doc(self.test_scenario7, examples) - + ['data/iris.csv', '10', '10', '80', '{}', '000004', 'Iris-virginica', '0.33784', '{"boosting": {"iterations": 5}}', {}], + ['data/iris.csv', '10', '10', '80', '{}', '000004', 'Iris-versicolor', '0.27261', '{"number_of_models": 5"}', {"operating_kind": "confidence"}]] + show_doc(self.test_scenario9) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - ensemble_create.i_create_an_ensemble_with_params(self, example[8]) - ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_create.i_create_an_ensemble_with_params( + self, example["model_conf"]) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"]) ensemble_create.create_local_supervised_ensemble(self) - prediction_create.i_create_an_ensemble_proportional_prediction(self, example[4], example[9]) - prediction_create.the_prediction_is(self, example[5], example[6]) - prediction_create.the_confidence_is(self, example[7]) - prediction_create.create_local_ensemble_proportional_prediction_with_confidence(self, example[4], example[9]) - prediction_compare.the_local_ensemble_prediction_is(self, example[6]) - prediction_compare.the_local_prediction_confidence_is(self, example[7]) - + prediction_create.i_create_an_ensemble_proportional_prediction( + self, example["input_data"], example["operating"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_create.the_confidence_is(self, example["confidence"]) + prediction_create.create_local_ensemble_proportional_prediction_with_confidence( + self, example["input_data"], example["operating"]) + prediction_compare.the_local_ensemble_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) + ensemble_create.create_local_bigml_ensemble(self) + prediction_compare.the_local_ensemble_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) def test_scenario10(self): """ - Scenario: Successfully comparing predictions for fusions: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I retrieve a list of remote models tagged with "" - And I create a fusion from a list of models - And I wait until the fusion is ready less than secs - And I create a local fusion - When I create a prediction for "" - Then the prediction for "" is "" - And I create a local prediction for "" - Then the local prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | params| tag | data_input | objective | prediction | params - + Scenario: Successfully comparing predictions for fusions: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I retrieve a list of remote models tagged with "" + And I create a fusion from a list of models + And I wait until the fusion is ready less than secs + And I create a local fusion + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", "tag", + "input_data", "objective_id", "prediction"] examples = [ - ['data/iris_unbalanced.csv', '30', '30', '120', '120', 'my_fusion_tag', '{"petal width": 4}', '000004', 'Iris-virginica'], - ['data/grades.csv', '30', '30', '120', '120', 'my_fusion_tag_reg', '{"Midterm": 20}', '000005', 43.65286]] - show_doc(self.test_scenario10, examples) - + ['data/iris_unbalanced.csv', '30', '30', '120', + 'my_fusion_tag', '{"petal width": 4}', '000004', + 'Iris-virginica'], + ['data/grades.csv', '30', '30', '120', + 'my_fusion_tag_reg', '{"Midterm": 20}', '000005', 43.65286]] + show_doc(self.test_scenario10) for example in examples: - print "\nTesting with:\n", example - tag = "%s_%s" % (example[5], PY3) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + tag = example["tag"] tag_args = '{"tags":["%s"]}' % tag - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) model_create.i_create_a_model_with(self, tag_args) - model_create.the_model_is_finished_in_less_than(self, example[3]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) model_create.i_create_a_model_with(self, tag_args) - model_create.the_model_is_finished_in_less_than(self, example[3]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) model_create.i_create_a_model_with(self, tag_args) - model_create.the_model_is_finished_in_less_than(self, example[3]) - prediction_compare.i_retrieve_a_list_of_remote_models(self, tag) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_retrieve_a_list_of_remote_models( + self, tag) model_create.i_create_a_fusion(self) - model_create.the_fusion_is_finished_in_less_than(self, example[4]) + model_create.the_fusion_is_finished_in_less_than( + self, example["model_wait"]) prediction_compare.i_create_a_local_fusion(self) - prediction_create.i_create_a_fusion_prediction(self, example[6]) - prediction_create.the_prediction_is(self, example[7], example[8]) - prediction_compare.i_create_a_local_prediction(self, example[6]) - prediction_compare.the_local_prediction_is(self, example[8]) - + prediction_create.i_create_a_fusion_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) def test_scenario11(self): """ - Scenario: Successfully comparing predictions in operating points for fusions: - Scenario: Successfully comparing predictions for fusions: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I retrieve a list of remote models tagged with "" - And I create a fusion from a list of models - And I wait until the fusion is ready less than secs - And I create a local fusion - When I create a prediction for "" in "" - Then the prediction for "" is "" - And I create a local fusion prediction for "" in "" - Then the local ensemble prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | params| tag | data_input | objective | prediction | params | operating_point - - + Scenario: Successfully comparing predictions in operating points for fusions: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I retrieve a list of remote models tagged with "" + And I create a fusion from a list of models + And I wait until the fusion is ready less than secs + And I create a local fusion + When I create a prediction for "" in "" + Then the prediction for "" is "" + And I create a local fusion prediction for "" in "" + Then the local ensemble prediction is "" """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", "tag", + "input_data", "objective_id", "prediction", + "operating_point"] examples = [ - ['data/iris_unbalanced.csv', '30', '30', '120', '120', 'my_fusion_tag_11', '{"petal width": 4}', '000004', 'Iris-virginica', {"kind": "probability", "threshold": 0.1, "positive_class": "Iris-setosa"}], - ['data/iris_unbalanced.csv', '30', '30', '120', '120', 'my_fusion_tag_11_b', '{"petal width": 4}', '000004', 'Iris-virginica', {"kind": "probability", "threshold": 0.9, "positive_class": "Iris-setosa"}]] - show_doc(self.test_scenario11, examples) - + ['data/iris_unbalanced.csv', '30', '30', '120', + 'my_fusion_tag_11', '{"petal width": 4}', '000004', + 'Iris-virginica', + {"kind": "probability", "threshold": 0.1, + "positive_class": "Iris-setosa"}], + ['data/iris_unbalanced.csv', '30', '30', '120', + 'my_fusion_tag_11_b', '{"petal width": 4}', + '000004', 'Iris-virginica', + {"kind": "probability", "threshold": 0.9, + "positive_class": "Iris-setosa"}]] + show_doc(self.test_scenario11) for example in examples: - print "\nTesting with:\n", example - tag = "%s_%s" % (example[5], PY3) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + tag = example["tag"] tag_args = '{"tags":["%s"]}' % tag - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) model_create.i_create_a_model_with(self, tag_args) - model_create.the_model_is_finished_in_less_than(self, example[3]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) model_create.i_create_a_model_with(self, tag_args) - model_create.the_model_is_finished_in_less_than(self, example[3]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) model_create.i_create_a_model_with(self, tag_args) - model_create.the_model_is_finished_in_less_than(self, example[3]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) prediction_compare.i_retrieve_a_list_of_remote_models(self, tag) model_create.i_create_a_fusion(self) - model_create.the_fusion_is_finished_in_less_than(self, example[4]) + model_create.the_fusion_is_finished_in_less_than( + self, example["model_wait"]) prediction_compare.i_create_a_local_fusion(self) - prediction_create.i_create_a_fusion_prediction_op(self, example[6], example[9]) - prediction_create.the_prediction_is(self, example[7], example[8]) - prediction_compare.i_create_a_local_prediction_op(self, example[6], example[9]) - prediction_compare.the_local_prediction_is(self, example[8]) + prediction_create.i_create_a_fusion_prediction_op( + self, example["input_data"], example["operating_point"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction_op( + self, example["input_data"], example["operating_point"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) def test_scenario12(self): """ - Scenario: Successfully comparing predictions for fusions: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I retrieve a list of remote models tagged with "" - And I create a fusion from a list of models - And I wait until the fusion is ready less than secs - And I create a local fusion - When I create a prediction for "" - Then the prediction for "" is "" - And I create a local prediction for "" - Then the local prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | params| tag | data_input | objective | prediction | params - + Scenario: Successfully comparing predictions for fusions: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I retrieve a list of remote models tagged with "" + And I create a fusion from a list of models + And I wait until the fusion is ready less than secs + And I create a local fusion + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" """ - tag = "my_fusion_tag_12_%s" % PY3 - tag_reg = "my_fusion_tag_12_reg_%s" % PY3 + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "model_conf", "tag", "input_data", "objective_id", + "prediction"] + tag = "my_fusion_tag_12" + tag_reg = "my_fusion_tag_12_reg" examples = [ - ['data/iris_unbalanced.csv', '30', '30', '120', '120', '{"tags":["%s"], "sample_rate": 0.8, "seed": "bigml"}' % tag, tag, '{"petal width": 4}', '000004', 'Iris-virginica'], - ['data/grades.csv', '30', '30', '120', '120', '{"tags":["%s"], "sample_rate": 0.8, "seed": "bigml"}' % tag_reg, tag_reg, '{"Midterm": 20}', '000005', 44.37625]] - show_doc(self.test_scenario12, examples) - + ['data/iris_unbalanced.csv', '30', '30', '120', + '{"tags":["%s"], "sample_rate": 0.8, "seed": "bigml"}' % tag, tag, + '{"petal width": 4}', '000004', 'Iris-virginica'], + ['data/grades.csv', '30', '30', '120', + '{"tags":["%s"], "sample_rate": 0.8, "seed": "bigml"}' % tag_reg, + tag_reg, '{"Midterm": 20}', '000005', 44.37625]] + show_doc(self.test_scenario12) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model_with(self, example[5]) - model_create.the_model_is_finished_in_less_than(self, example[3]) - model_create.i_create_a_model_with(self, example[5]) - model_create.the_model_is_finished_in_less_than(self, example[3]) - model_create.i_create_a_model_with(self, example[5]) - model_create.the_model_is_finished_in_less_than(self, example[3]) - prediction_compare.i_retrieve_a_list_of_remote_models(self, example[6]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model_with( + self, example["model_conf"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with( + self, example["model_conf"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with( + self, example["model_conf"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_retrieve_a_list_of_remote_models( + self, example["tag"]) model_create.i_create_a_fusion_with_weights(self) - model_create.the_fusion_is_finished_in_less_than(self, example[4]) + model_create.the_fusion_is_finished_in_less_than( + self, example["model_wait"]) prediction_compare.i_create_a_local_fusion(self) - prediction_create.i_create_a_fusion_prediction(self, example[7]) - prediction_create.the_prediction_is(self, example[8], example[9]) - prediction_compare.i_create_a_local_prediction(self, example[7]) - prediction_compare.the_local_prediction_is(self, example[9]) + prediction_create.i_create_a_fusion_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario13(self): + """ + Scenario: Successfully comparing predictions for fusions: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than <"dataset_wait"> secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I retrieve a list of remote models tagged with "" + And I create a fusion from a list of models + And I wait until the fusion is ready less than secs + And I create a local fusion + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "tag", "input_data", "objective_id", "prediction"] + examples = [ + ['data/grades.csv', '30', '30', '120', 'my_fusion_tag_lreg', + '{"000000": 10, "000001": 10, "000002": 10, "000003": 10, ' + '"000004": 10}', '000005', 21.01712]] + show_doc(self.test_scenario13) + + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + tag = example["tag"] + tag_args = '{"tags":["%s"]}' % tag + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + linear_create.i_create_a_linear_regression_with_params( + self, tag_args) + linear_create.the_linear_regression_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_retrieve_a_list_of_remote_linear_regressions( + self, tag) + model_create.i_create_a_fusion(self) + model_create.the_fusion_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_fusion(self) + prediction_create.i_create_a_fusion_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario14(self): + """ + Scenario: Successfully comparing predictions for ensembles: + Given I load the full ensemble information from "" + And I create a local ensemble from the ensemble + models list + And I create a local prediction for "" + Then the local prediction is "" + """ + headers = ["directory", "input_data", "prediction"] + examples = [ + ['bigml/tests/mlflow_ensemble', '{"plasma glucose": 240}', 'true']] + show_doc(self.test_scenario14) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + model_list = ensemble_create.load_full_ensemble( + self, example["directory"]) + ensemble_create.create_local_ensemble_from_list( + self, model_list) + prediction_compare.i_create_a_local_ensemble_prediction( + self, example["input_data"]) + prediction_compare.the_local_ensemble_prediction_is( + self, example["prediction"]) diff --git a/bigml/tests/test_34_time_series.py b/bigml/tests/test_34_time_series.py index a8aa6d34..4b5fb472 100644 --- a/bigml/tests/test_34_time_series.py +++ b/bigml/tests/test_34_time_series.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2017-2019 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,61 +20,84 @@ """ Creating time series forecasts """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_time_series_steps as time_series_create -import create_forecast_steps as forecast_create +import json +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_time_series_steps as time_series_create +from . import create_forecast_steps as forecast_create +from . import compare_predictions_steps as prediction_compare -class TestTimeSeries(object): - def setup(self): +class TestTimeSeries: + """Testing Time Series methods""" + + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating forecasts from a dataset: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create time-series from a dataset - And I wait until the time series is ready less than secs - And I update the time series name to "" - When I wait until the time series is ready less than secs - Then the time series name is "" - And I create a forecast for "" - Then the forecasts are "" - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | time_series_name |input_data | forecast_points - | ../data/grades.csv | 10 | 10 | 20 | 50 | my new time_series name | - {"000005": {"horizon": 5}], {}} + Scenario: Successfully creating forecasts from a dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create time-series from a dataset + And I wait until the time series is ready less than secs + And I update the time series name to "" + When I wait until the time series is ready less than secs + Then the time series name is "" + And I create a forecast for "" + Then the forecasts are "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "time_series_name", "input_data", "forecast_points"] examples = [ - ['data/grades.csv', '30', '30', '50', '50', 'my new time series name', - '{"000005": {"horizon": 5}}', '{"000005": [{"point_forecast": [73.96192, 74.04106, 74.12029, 74.1996, 74.27899], "model": "M,M,N"}]}']] + ['data/grades.csv', '30', '30', '50', 'my new time series name', + '{"000005": {"horizon": 5}}', + '{"000005": [{"point_forecast": [73.96192, 74.04106, 74.12029, ' + '74.1996, 74.27899], "model": "M,M,N"}]}']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) time_series_create.i_create_a_time_series(self) - time_series_create.the_time_series_is_finished_in_less_than(self, example[3]) - time_series_create.i_update_time_series_name(self, example[5]) - time_series_create.the_time_series_is_finished_in_less_than(self, example[4]) - time_series_create.i_check_time_series_name(self, example[5]) - forecast_create.i_create_a_forecast(self, example[6]) - forecast_create.the_forecast_is(self, example[7]) + time_series_create.the_time_series_is_finished_in_less_than( + self, example["model_wait"]) + time_series_create.i_update_time_series_name( + self, example["time_series_name"]) + time_series_create.the_time_series_is_finished_in_less_than( + self, example["model_wait"]) + time_series_create.i_check_time_series_name( + self, example["time_series_name"]) + forecast_create.i_create_a_forecast( + self, example["input_data"]) + forecast_create.the_forecast_is(self, example["forecast_points"]) + prediction_compare.i_create_a_local_bigml_model(self, + model_type="time_series") + prediction_compare.i_create_a_local_bigml_model_prediction( + self, example["input_data"], prediction_type="forecast") + forecast_points = json.loads(example["forecast_points"]) + prediction_compare.the_local_bigml_prediction_is( + self, {"forecast": forecast_points}, prediction_type="forecast") diff --git a/bigml/tests/test_35_b_compare_predictions.py b/bigml/tests/test_35_b_compare_predictions.py new file mode 100644 index 00000000..7b768ff6 --- /dev/null +++ b/bigml/tests/test_35_b_compare_predictions.py @@ -0,0 +1,118 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Comparing remote and local predictions + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_time_series_steps as time_series_create +from . import create_forecast_steps as forecast_create +from . import compare_forecasts_steps as forecast_compare +from . import create_pca_steps as pca_create +from . import create_projection_steps as projection_create +from . import compare_predictions_steps as compare_predictions + + +class TestComparePrediction: + """Testing local model predictions""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully comparing forecasts from time series: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a time series with "" + And I wait until the time series is ready less than secs + And I create a local time series + When I create a forecast for "" + Then the forecast is "" + And I create a local forecast for "" + Then the local forecast is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "forecast", "model_conf"] + examples = [ + ['data/grades.csv', '30', '30', '120', + '{"000005": {"horizon": 5}}', + '{"000005": [{"point_forecast": [73.96192, 74.04106, 74.12029, ' + '74.1996, 74.27899], "model": "M,M,N"}]}', + '{"objective_fields": ["000001", "000005"]}'], + ['data/grades.csv', '30', '30', '120', + '{"000005": {"horizon": 5, "ets_models": {"names": ["M,N,N"], ' + '"criterion": "aic", "limit": 3}}}', + '{"000005": [{"point_forecast": [68.39832, 68.39832, 68.39832, ' + '68.39832, 68.39832], "model": "M,N,N"}]}', + '{"objective_fields": ["000001", "000005"]}'], + ['data/grades.csv', '30', '30', '120', + '{"000005": {"horizon": 5, "ets_models": {"names": ["A,A,N"], ' + '"criterion": "aic", "limit": 3}}}', + '{"000005": [{"point_forecast": [72.46247, 72.56247, 72.66247, ' + '72.76247, 72.86247], "model": "A,A,N"}]}', + '{"objective_fields": ["000001", "000005"]}'], + ['data/grades.csv', '30', '30', '120', + '{"000005": {"horizon": 5}, "000001": {"horizon": 3, ' + '"ets_models": {"criterion": "aic", "limit": 2}}}', + '{"000005": [{"point_forecast": [73.96192, 74.04106, ' + '74.12029, 74.1996, 74.27899], "model": "M,M,N"}], ' + '"000001": [{"point_forecast": [55.51577, 89.69111, 82.04935],' + ' "model": "A,N,A"}, {"point_forecast": [56.67419, 91.89657, ' + '84.70017], "model": "A,A,A"}]}', + '{"objective_fields": ["000001", "000005"]}']] + show_doc(self.test_scenario1) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + time_series_create.i_create_a_time_series_with_params( + self, example["model_conf"]) + time_series_create.the_time_series_is_finished_in_less_than( + self, example["model_wait"]) + time_series_create.create_local_time_series(self) + forecast_create.i_create_a_forecast(self, example["input_data"]) + forecast_create.the_forecast_is(self, example["forecast"]) + forecast_compare.i_create_a_local_forecast( + self, example["input_data"]) + forecast_compare.the_local_forecast_is(self, example["forecast"]) diff --git a/bigml/tests/test_35_c_compare_predictions.py b/bigml/tests/test_35_c_compare_predictions.py new file mode 100644 index 00000000..0a39e66d --- /dev/null +++ b/bigml/tests/test_35_c_compare_predictions.py @@ -0,0 +1,140 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Comparing remote and local predictions + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_time_series_steps as time_series_create +from . import create_forecast_steps as forecast_create +from . import compare_forecasts_steps as forecast_compare +from . import create_pca_steps as pca_create +from . import create_projection_steps as projection_create +from . import compare_predictions_steps as compare_predictions + + +class TestComparePrediction: + """Test local and remote predictions""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario3(self): + """ + Scenario: Successfully comparing forecasts from time series with "M" seasonality + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a time series with "" + And I wait until the time series is ready less than secs + And I create a local time series + When I create a forecast for "" + Then the forecast is "" + And I create a local forecast for "" + Then the local forecast is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "forecast", "model_conf"] + examples = [ + ['data/grades.csv', '30', '30', '120', '{"000005": {"horizon": 5, "ets_models": {"names": ["M,N,M"], "criterion": "aic", "limit": 3}}}', '{"000005": [{"point_forecast": [68.99775, 72.76777, 66.5556, 70.90818, 70.92998], "model": "M,N,M"}]}', '{"objective_fields": ["000001", "000005"], "period": 12}'], + ['data/grades.csv', '30', '30', '120', '{"000005": {"horizon": 5, "ets_models": {"names": ["M,A,M"], "criterion": "aic", "limit": 3}}}', '{"000005": [{"point_forecast": [70.65993, 78.20652, 69.64806, 75.43716, 78.13556], "model": "M,A,M"}]}', '{"objective_fields": ["000001", "000005"], "period": 12}']] + show_doc(self.test_scenario3) + + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + time_series_create.i_create_a_time_series_with_params( + self, example["model_conf"]) + time_series_create.the_time_series_is_finished_in_less_than( + self, example["model_wait"]) + time_series_create.create_local_time_series(self) + forecast_create.i_create_a_forecast(self, example["input_data"]) + forecast_create.the_forecast_is(self, example["forecast"]) + forecast_compare.i_create_a_local_forecast( + self, example["input_data"]) + forecast_compare.the_local_forecast_is( + self, example["forecast"]) + + def test_scenario3b(self): + """ + Scenario: Successfully comparing forecasts from time series with "M" seasonality + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a time series with "" + And I wait until the time series is ready less than secs + And I create a local time series + When I create a forecast for "" + Then the forecast is "" + And I create a local forecast for "" + Then the local forecast is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "forecast", "model_conf"] + examples = [ + ['data/grades.csv', '30', '30', '120', + '{"000005": {"horizon": 5, "ets_models": {"names": ["M,M,M"], ' + '"criterion": "aic", "limit": 3}}}', + '{"000005": [{"point_forecast": [71.75055, 80.67195, 70.81368, ' + '79.84999, 78.27634], "model": "M,M,M"}]}', + '{"objective_fields": ["000001", "000005"], "period": 12}']] + show_doc(self.test_scenario3) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + time_series_create.i_create_a_time_series_with_params( + self, example["model_conf"]) + time_series_create.the_time_series_is_finished_in_less_than( + self, example["model_wait"]) + time_series_create.create_local_time_series(self) + forecast_create.i_create_a_forecast(self, example["input_data"]) + forecast_create.the_forecast_is(self, example["forecast"]) + forecast_compare.i_create_a_local_forecast( + self, example["input_data"]) + forecast_compare.the_local_forecast_is(self, example["forecast"]) diff --git a/bigml/tests/test_35_compare_predictions.py b/bigml/tests/test_35_compare_predictions.py index d695d60d..248b9520 100644 --- a/bigml/tests/test_35_compare_predictions.py +++ b/bigml/tests/test_35_compare_predictions.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2017-2019 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,201 +20,111 @@ """ Comparing remote and local predictions """ -from world import world, setup_module, teardown_module, show_doc -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_model_steps as model_create -import create_time_series_steps as time_series_create -import create_forecast_steps as forecast_create -import compare_forecasts_steps as forecast_compare -import create_pca_steps as pca_create -import create_projection_steps as projection_create -import compare_predictions_steps as compare_predictions +import sys +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_time_series_steps as time_series_create +from . import create_forecast_steps as forecast_create +from . import compare_forecasts_steps as forecast_compare +from . import create_pca_steps as pca_create +from . import create_projection_steps as projection_create +from . import compare_predictions_steps as compare_predictions -class TestComparePrediction(object): - def setup(self): - """ - Debug information - """ - print "\n-------------------\nTests in: %s\n" % __name__ +class TestComparePrediction: + """Testing local and remote predictions""" - def teardown(self): + def setup_method(self, method): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ - - - def test_scenario1(self): - """ - Scenario: Successfully comparing forecasts from time series: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a time series with "" - And I wait until the time series is ready less than secs - And I create a local time series - When I create a forecast for "" - Then the forecast is "" - And I create a local forecast for "" - Then the local forecast is "" - - Examples: - | data | time_1 | time_2 | time_3 | input_data | forecasts | params - ['data/grades.csv', '10', '10', '120', '{"000005": {"horizon": 5, "ets_models": {"names": ["A,Ad,N"], "criterion": "aic", "limit": 3}}}', '{"000005": [{"point_forecast": [69.90959, 69.92755, 69.94514, 69.96236, 69.97922], "model": "A,Ad,N"}]}', '{"objective_fields": ["000001", "000005"]}'], - - - """ - examples = [ - ['data/grades.csv', '30', '30', '120', '{"000005": {"horizon": 5}}', '{"000005": [{"point_forecast": [73.96192, 74.04106, 74.12029, 74.1996, 74.27899], "model": "M,M,N"}]}', '{"objective_fields": ["000001", "000005"]}'], - ['data/grades.csv', '30', '30', '120', '{"000005": {"horizon": 5, "ets_models": {"names": ["M,N,N"], "criterion": "aic", "limit": 3}}}', '{"000005": [{"point_forecast": [68.39832, 68.39832, 68.39832, 68.39832, 68.39832], "model": "M,N,N"}]}', '{"objective_fields": ["000001", "000005"]}'], - ['data/grades.csv', '30', '30', '120', '{"000005": {"horizon": 5, "ets_models": {"names": ["A,A,N"], "criterion": "aic", "limit": 3}}}', '{"000005": [{"point_forecast": [72.46247, 72.56247, 72.66247, 72.76247, 72.86247], "model": "A,A,N"}]}', '{"objective_fields": ["000001", "000005"]}'], - ['data/grades.csv', '30', '30', '120', '{"000005": {"horizon": 5}, "000001": {"horizon": 3, "ets_models": {"criterion": "aic", "limit": 2}}}', '{"000005": [{"point_forecast": [73.96192, 74.04106, 74.12029, 74.1996, 74.27899], "model": "M,M,N"}], "000001": [{"point_forecast": [55.51577, 89.69111, 82.04935], "model": "A,N,A"}, {"point_forecast": [56.67419, 91.89657, 84.70017], "model": "A,A,A"}]}', '{"objective_fields": ["000001", "000005"]}']] - show_doc(self.test_scenario1, examples) - - for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - time_series_create.i_create_a_time_series_with_params(self, example[6]) - time_series_create.the_time_series_is_finished_in_less_than(self, example[3]) - time_series_create.create_local_time_series(self) - forecast_create.i_create_a_forecast(self, example[4]) - forecast_create.the_forecast_is(self, example[5]) - forecast_compare.i_create_a_local_forecast(self, example[4]) - forecast_compare.the_local_forecast_is(self, example[5]) - - - def test_scenario2(self): - """ - Scenario: Successfully comparing forecasts from time series with "A" seasonality - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a time series with "" - And I wait until the time series is ready less than secs - And I create a local time series - When I create a forecast for "" - Then the forecast is "" - And I create a local forecast for "" - Then the local forecast is "" - - Examples: - | data | time_1 | time_2 | time_3 | input_data | forecasts | params - ['data/grades.csv', '10', '10', '120', '{"000005": {"horizon": 5, "ets_models": {"names": ["A,Ad,A"], "criterion": "aic", "limit": 3}}}', '{"000005": [{"point_forecast":[66.16225, 72.17308, 66.65573, 73.09698, 70.51449], "model": "A,Ad,A"}]}', '{"objective_fields": ["000001", "000005"], "period": 12}'] - """ - examples = [ - - ['data/grades.csv', '30', '30', '120', '{"000005": {"horizon": 5}}', '{"000005": [{"point_forecast": [73.96192, 74.04106, 74.12029, 74.1996, 74.27899], "model": "M,M,N"}]}', '{"objective_fields": ["000001", "000005"], "period": 12}'], - ['data/grades.csv', '30', '30', '120', '{"000005": {"horizon": 5, "ets_models": {"names": ["M,N,A"], "criterion": "aic", "limit": 3}}}', '{"000005": [{"point_forecast": [67.43222, 68.24468, 64.14437, 67.5662, 67.79028], "model": "M,N,A"}]}', '{"objective_fields": ["000001", "000005"], "period": 12}'], - ['data/grades.csv', '30', '30', '120', '{"000005": {"horizon": 5, "ets_models": {"names": ["A,A,A"], "criterion": "aic", "limit": 3}}}', '{"000005": [{"point_forecast": [74.73553, 71.6163, 71.90264, 76.4249, 75.06982], "model": "A,A,A"}]}', '{"objective_fields": ["000001", "000005"], "period": 12}']] - show_doc(self.test_scenario2, examples) + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - time_series_create.i_create_a_time_series_with_params(self, example[6]) - time_series_create.the_time_series_is_finished_in_less_than(self, example[3]) - time_series_create.create_local_time_series(self) - forecast_create.i_create_a_forecast(self, example[4]) - forecast_create.the_forecast_is(self, example[5]) - forecast_compare.i_create_a_local_forecast(self, example[4]) - forecast_compare.the_local_forecast_is(self, example[5]) - - def test_scenario3(self): + def teardown_method(self): """ - Scenario: Successfully comparing forecasts from time series with "M" seasonality - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a time series with "" - And I wait until the time series is ready less than secs - And I create a local time series - When I create a forecast for "" - Then the forecast is "" - And I create a local forecast for "" - Then the local forecast is "" - - Examples: - | data | time_1 | time_2 | time_3 | input_data | forecasts | params - -, - ['data/grades.csv', '10', '10', '120', '{"000005": {"horizon": 5, "ets_models": {"names": ["M,Ad,M"], "criterion": "aic", "limit": 3}}}', '{"000005": [{"point_forecast": [73.75816, 74.60699, 66.71212, 72.49586, 71.76787], "model": "M,Ad,M"}]}', '{"objective_fields": ["000001", "000005"], "period": 12}'], - ['data/grades.csv', '10', '10', '120', '{"000005": {"horizon": 5, "ets_models": {"names": ["M,Md,M"], "criterion": "aic", "limit": 3}}}', '{"000005": [{"point_forecast": [74.3725, 75.02963, 67.15826, 73.19628, 71.66919], "model": "M,Md,M"}]}', '{"objective_fields": ["000001", "000005"], "period": 12}'] - + Debug information """ - examples = [ - ['data/grades.csv', '30', '30', '120', '{"000005": {"horizon": 5, "ets_models": {"names": ["M,N,M"], "criterion": "aic", "limit": 3}}}', '{"000005": [{"point_forecast": [68.99775, 72.76777, 66.5556, 70.90818, 70.92998], "model": "M,N,M"}]}', '{"objective_fields": ["000001", "000005"], "period": 12}'], - ['data/grades.csv', '30', '30', '120', '{"000005": {"horizon": 5, "ets_models": {"names": ["M,A,M"], "criterion": "aic", "limit": 3}}}', '{"000005": [{"point_forecast": [70.65993, 78.20652, 69.64806, 75.43716, 78.13556], "model": "M,A,M"}]}', '{"objective_fields": ["000001", "000005"], "period": 12}'], - ['data/grades.csv', '30', '30', '120', '{"000005": {"horizon": 5, "ets_models": {"names": ["M,M,M"], "criterion": "aic", "limit": 3}}}', '{"000005": [{"point_forecast": [71.75055, 80.67195, 70.81368, 79.84999, 78.27634], "model": "M,M,M"}]}', '{"objective_fields": ["000001", "000005"], "period": 12}']] - show_doc(self.test_scenario3, examples) - - for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - time_series_create.i_create_a_time_series_with_params(self, example[6]) - time_series_create.the_time_series_is_finished_in_less_than(self, example[3]) - time_series_create.create_local_time_series(self) - forecast_create.i_create_a_forecast(self, example[4]) - forecast_create.the_forecast_is(self, example[5]) - forecast_compare.i_create_a_local_forecast(self, example[4]) - forecast_compare.the_local_forecast_is(self, example[5]) - + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario4(self): """ Scenario: Successfully comparing forecasts from time series with trivial models Given I create a data source uploading a "" file - And I wait until the source is ready less than secs + And I wait until the source is ready less than secs And I create a dataset - And I wait until the dataset is ready less than secs - And I create a time series with "" - And I wait until the time series is ready less than secs + And I wait until the dataset is ready less than secs + And I create a time series with "" + And I wait until the time series is ready less than secs And I create a local time series When I create a forecast for "" - Then the forecast is "" - And I create a local forecast for "" - Then the local forecast is "" - - Examples: - | data | time_1 | time_2 | time_3 | input_data | forecasts | params - + Then the forecast is "" + And I create a local forecast for "" + Then the local forecast is "" """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "forecast", "model_conf"] examples = [ - ['data/grades.csv', '10', '1000', '1000', '{"000005": {"horizon": 5, "ets_models": {"names": ["naive"]}}}', '{"000005": [{"point_forecast": [61.39, 61.39, 61.39, 61.39, 61.39], "model": "naive"}]}', '{"objective_fields": ["000001", "000005"], "period": 1}'], - ['data/grades.csv', '10', '1000', '1000', '{"000005": {"horizon": 5, "ets_models": {"names": ["naive"]}}}', '{"000005": [{"point_forecast": [78.89, 61.39, 78.89, 61.39, 78.89], "model": "naive"}]}', '{"objective_fields": ["000001", "000005"], "period": 2}'], - ['data/grades.csv', '10', '1000', '1000', '{"000005": {"horizon": 5, "ets_models": {"names": ["mean"]}}}', '{"000005": [{"point_forecast": [68.45974, 68.45974, 68.45974, 68.45974, 68.45974], "model": "mean"}]}', '{"objective_fields": ["000001", "000005"], "period": 1}'], - ['data/grades.csv', '10', '1000', '1000', '{"000005": {"horizon": 5, "ets_models": {"names": ["mean"]}}}', '{"000005": [{"point_forecast": [69.79553, 67.15821, 69.79553, 67.15821, 69.79553], "model": "mean"}]}', '{"objective_fields": ["000001", "000005"], "period": 2}'], - ['data/grades.csv', '10', '1000', '1000', '{"000005": {"horizon": 5, "ets_models": {"names": ["drift"]}}}', '{"000005": [{"point_forecast": [61.50545, 61.6209, 61.73635, 61.8518, 61.96725], "model": "drift"}]}', '{"objective_fields": ["000001", "000005"], "period": 1}'], - ['data/grades.csv', '10', '1000', '1000', '{"000005": {"horizon": 5, "ets_models": {"names": ["drift"]}}}', '{"000005": [{"point_forecast": [61.50545, 61.6209, 61.73635, 61.8518, 61.96725], "model": "drift"}]}', '{"objective_fields": ["000001", "000005"], "period": 2}']] - show_doc(self.test_scenario4, examples) - + ['data/grades.csv', '10', '100', '100', + '{"000005": {"horizon": 5, "ets_models": {"names": ["naive"]}}}', + '{"000005": [{"point_forecast": [61.39, 61.39, 61.39, 61.39, ' + '61.39], "model": "naive"}]}', + '{"objective_fields": ["000001", "000005"], "period": 1}'], + ['data/grades.csv', '10', '100', '100', + '{"000005": {"horizon": 5, "ets_models": {"names": ["naive"]}}}', + '{"000005": [{"point_forecast": [78.89, 61.39, 78.89, 61.39, ' + '78.89], "model": "naive"}]}', + '{"objective_fields": ["000001", "000005"], "period": 2}'], + ['data/grades.csv', '10', '100', '100', + '{"000005": {"horizon": 5, "ets_models": {"names": ["mean"]}}}', + '{"000005": [{"point_forecast": [68.45974, 68.45974, 68.45974, ' + '68.45974, 68.45974], "model": "mean"}]}', + '{"objective_fields": ["000001", "000005"], "period": 1}'], + ['data/grades.csv', '10', '100', '100', + '{"000005": {"horizon": 5, "ets_models": {"names": ["mean"]}}}', + '{"000005": [{"point_forecast": [69.79553, 67.15821, 69.79553, ' + '67.15821, 69.79553], "model": "mean"}]}', + '{"objective_fields": ["000001", "000005"], "period": 2}'], + ['data/grades.csv', '10', '100', '100', + '{"000005": {"horizon": 5, "ets_models": {"names": ["drift"]}}}', + '{"000005": [{"point_forecast": [61.50545, 61.6209, 61.73635, ' + '61.8518, 61.96725], "model": "drift"}]}', + '{"objective_fields": ["000001", "000005"], "period": 1}'], + ['data/grades.csv', '10', '100', '100', + '{"000005": {"horizon": 5, "ets_models": {"names": ["drift"]}}}', + '{"000005": [{"point_forecast": [61.50545, 61.6209, 61.73635, ' + '61.8518, 61.96725], "model": "drift"}]}', + '{"objective_fields": ["000001", "000005"], "period": 2}']] + show_doc(self.test_scenario4) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - time_series_create.i_create_a_time_series_with_params(self, example[6]) - time_series_create.the_time_series_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + time_series_create.i_create_a_time_series_with_params( + self, example["model_conf"]) + time_series_create.the_time_series_is_finished_in_less_than( + self, example["model_wait"]) time_series_create.create_local_time_series(self) - forecast_create.i_create_a_forecast(self, example[4]) - forecast_create.the_forecast_is(self, example[5]) - forecast_compare.i_create_a_local_forecast(self, example[4]) - forecast_compare.the_local_forecast_is(self, example[5]) + forecast_create.i_create_a_forecast( + self, example["input_data"]) + forecast_create.the_forecast_is( + self, example["forecast"]) + forecast_compare.i_create_a_local_forecast( + self, example["input_data"]) + forecast_compare.the_local_forecast_is( + self, example["forecast"]) def test_scenario5(self): @@ -228,153 +139,90 @@ def test_scenario5(self): And I create a local PCA When I create a projection for "" Then the projection is "" - And I create a local projection for "" + And I create a local projection for "" Then the local projection is "" - - Examples: - | data | time_1 | time_2 | time_3 | input_data | projection | params - - """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "projection", "model_conf"] examples = [ ['data/iris.csv', '30', '30', '120', '{}', - '{"PC2": 0, "PC3": 0, "PC1": 0, "PC6": 0, "PC4": 5e-05, "PC5": 0}', '{}'], + '{"PC2": 0, "PC3": 0, "PC1": 0, "PC6": 0, "PC4": 5e-05, ' + '"PC5": 0}', '{}'], ['data/iris.csv', '30', '30', '120', '{"petal length": 1}', - '{"PC2": 0.08708, "PC3": 0.20929, "PC1": 1.56084, "PC6": -1.34463, "PC4": 0.7295, "PC5": -1.00876}', '{}'], - ['data/iris.csv', '30', '30', '120', '{"species": "Iris-versicolor"}', - '{"PC2": 1.8602, "PC3": -2.00864, "PC1": -0.61116, "PC6": -0.66983, "PC4": -2.44618, "PC5": 0.43414}', '{}'], - ['data/iris.csv', '30', '30', '120', '{"petal length": 1, "sepal length": 0, "petal width": 0, "sepal width": 0, "species": "Iris-versicolor"}', - '{"PC2": 7.18009, "PC3": 6.51511, "PC1": 2.78155, "PC6": 0.21372, "PC4": -1.94865, "PC5": 0.57646}', '{}']] - show_doc(self.test_scenario5, examples) - + '{"PC2": 0.08708, "PC3": 0.20929, "PC1": 1.56084, ' + '"PC6": -1.34463, "PC4": 0.7295, "PC5": -1.00876}', '{}']] + show_doc(self.test_scenario5) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - pca_create.i_create_a_pca_with_params(self, example[6]) - pca_create.the_pca_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + pca_create.i_create_a_pca_with_params( + self, example["model_conf"]) + pca_create.the_pca_is_finished_in_less_than( + self, example["model_wait"]) compare_predictions.create_local_pca(self) - projection_create.i_create_a_projection(self, example[4]) - projection_create.the_projection_is(self, example[5]) - compare_predictions.i_create_a_local_projection(self, example[4]) - compare_predictions.the_local_projection_is(self, example[5]) - + projection_create.i_create_a_projection( + self, example["input_data"]) + projection_create.the_projection_is( + self, example["projection"]) + compare_predictions.i_create_a_local_projection( + self, example["input_data"]) + compare_predictions.the_local_projection_is( + self, example["projection"]) - - def test_scenario6(self): + def test_scenario5_b(self): """ Scenario: Successfully comparing projections for PCAs: Given I create a data source uploading a "" file - And I wait until the source is ready less than secs + And I wait until the source is ready less than secs And I create a dataset - And I wait until the dataset is ready less than secs - And I create a PCA with "" - And I wait until the PCA is ready less than secs + And I wait until the dataset is ready less than secs + And I create a PCA with "" + And I wait until the PCA is ready less than secs And I create a local PCA When I create a projection for "" Then the projection is "" - And I create a local projection for "" + And I create a local projection for "" Then the local projection is "" - - Examples: - | data | time_1 | time_2 | time_3 | input_data | projection | params - - - """ - examples = [ - ['data/spam_tiny.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all"}}}}', '{"Message": "early"}', '{}', '{"PC40": 0.00416, "PC38": 0.08267, "PC39": 0.00033, "PC18": 0.28094, "PC19": -0.15056, "PC14": 0.20643, "PC15": 0.23931, "PC16": 0.03251, "PC17": 0.02776, "PC10": 0.1424, "PC11": 0.4059, "PC12": -0.1238, "PC13": 0.15131, "PC43": 0.29617, "PC42": 1.0091, "PC41": 0, "PC25": 0.07164, "PC24": -0.29904, "PC27": -0.1331, "PC26": -0.18572, "PC21": 0.25616, "PC20": 0.30424, "PC23": -0.45775, "PC22": -0.3362, "PC47": -0.13757, "PC49": 0.01864, "PC48": 0.04742, "PC29": -0.16286, "PC28": 0.42207, "PC32": -0.05917, "PC46": -0.05018, "PC31": -0.13973, "PC45": -0.05015, "PC36": 0.03017, "PC44": 0, "PC37": -0.06093, "PC34": 0.25821, "PC35": -0.22194, "PC33": -0.23398, "PC8": 0.01159, "PC9": -0.16042, "PC2": -0.09202, "PC3": 0.14371, "PC1": 0.65114, "PC6": -0.43034, "PC7": -0.02563, "PC4": -0.04947, "PC5": -0.07796, "PC50": -0.00769, "PC30": 0.07813}'], - ['data/spam_tiny.csv', '30', '30', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "all"}}}}', '{"Message": "mobile call"}', '{}', '{"PC40": 0.31818, "PC38": 0.06912, "PC39": -0.14342, "PC18": 0.22382, "PC19": 0.18518, "PC14": 0.89231, "PC15": 0.05046, "PC16": -0.00241, "PC17": 0.54501, "PC10": -0.26463, "PC11": 0.30251, "PC12": 1.16327, "PC13": 0.16973, "PC43": 0.11952, "PC42": 1.05499, "PC41": 0.51263, "PC25": 0.02467, "PC24": -0.65128, "PC27": 0.48916, "PC26": -0.45228, "PC21": -0.44167, "PC20": 0.76896, "PC23": 0.29398, "PC22": 0.06425, "PC47": 0.70416, "PC49": -0.30313, "PC48": 0.12976, "PC29": -0.34, "PC28": 0.17406, "PC32": -0.06411, "PC46": 0.69257, "PC31": 0.07523, "PC45": -0.03461, "PC36": 0.29732, "PC44": 0.14516, "PC37": -0.19109, "PC34": 0.58399, "PC35": 0.37608, "PC33": -0.00378, "PC8": -0.88156, "PC9": 0.38233, "PC2": -0.56685, "PC3": 0.56321, "PC1": 0.49171, "PC6": -0.09854, "PC7": -1.24639, "PC4": 1.50134, "PC5": -0.03161, "PC50": 0.17349, "PC30": -1.29612}']] - show_doc(self.test_scenario6, examples) - for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - source_create.i_update_source_with(self, example[4]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - pca_create.i_create_a_pca_with_params(self, example[6]) - pca_create.the_pca_is_finished_in_less_than(self, example[3]) - projection_create.i_create_a_projection(self, example[5]) - projection_create.the_projection_is(self, example[7]) - compare_predictions.create_local_pca(self) - compare_predictions.i_create_a_local_projection(self, example[5]) - compare_predictions.the_local_projection_is(self, example[7]) - - def test_scenario7(self): - """ - Scenario: Successfully comparing remote and local predictions - with raw date input for PCAs: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a PC - And I wait until the PCA is ready less than secs - And I create a local PCA - When I create a projection for "" - Then the projection is "" - And I create a local projection for "" - Then the local projection is "" - - Examples: - | data | time_1 | time_2 | time_3 | input_data | projection - - """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "projection", "model_conf"] examples = [ - ['data/dates2.csv', '20', '30', '60', - '{"time-1":"1910-05-08T19:10:23.106","cat-0":"cat2","target-2":0.4}', - '{"PC8": -1.54293, "PC9": -0.94836, "PC2": 0.78176, "PC3": -0.62082,\ - "PC1": 0.89614, "PC10": 1.06575, "PC11": 1.3211, "PC4": 1.90088, \ - "PC5": 0.24197, "PC7": -0.37701, "PC6": 2.25007}'], - ['data/dates2.csv', '20', '30', '60', - '{"time-1":"1920-06-30T20:21:20.320","cat-0":"cat1","target-2":0.2}', - '{"PC8": 0.3148, "PC9": -0.61742, "PC2": 0.93411, "PC3": 1.80286,\ - "PC1": 0.36425, "PC10": 0.7364, "PC11": 2.25863, "PC4": -1.50319,\ - "PC5": 0.17088, "PC7": 0.51738, "PC6": 0.42403}'], - ['data/dates2.csv', '20', '30', '60', - '{"time-1":"1932-01-30T19:24:11.440","cat-0":"cat2","target-2":0.1}', - '{"PC8": -0.86728, "PC9": -1.85164, "PC2": 2.13206, "PC3": 0.58449,\ - "PC1": 0.28379, "PC10": 2.05465, "PC11": 0.44372, "PC4": 1.27236,\ - "PC5": 0.99468, "PC7": -0.32496, "PC6": 0.52217}'], - ['data/dates2.csv', '20', '30', '60', - '{"time-1":"1950-11-06T05:34:05.602","cat-0":"cat1" ,"target-2":0.9}', - '{"PC8": 2.49563, "PC9": -0.57774, "PC2": -0.76354, "PC3": 0.19215,\ - "PC1": 0.99197, "PC10": -1.21017, "PC11": 1.55778, "PC4": -0.24013,\ - "PC5": -0.38492, "PC7": 1.82607, "PC6": 0.3736}'], - ['data/dates2.csv', '20', '30', '60', - '{"time-1":"1969-7-14 17:36","cat-0":"cat2","target-2":0.9}', - '{"PC8": -0.41111, "PC9": -5.32959, "PC2": -1.25322, "PC3": 2.93113,\ - "PC1": 2.07444, "PC10": 4.8808, "PC11": 0.4185, "PC4": 3.13876,\ - "PC5": 3.70259, "PC7": 0.55665, "PC6": 5.16873}'], - ['data/dates2.csv', '20', '30', '60', - '{"time-1":"2001-01-05T23:04:04.693","cat-0":"cat2","target-2":0.01}', - '{"PC8": -1.10654, "PC9": -0.34137, "PC2": 1.73362, "PC3": -0.34799,\ - "PC1": 2.32583, "PC10": 0.94566, "PC11": 0.53787, "PC4": 2.77385,\ - "PC5": -0.1017, "PC7": 0.20156, "PC6": -0.44476}'], - ['data/dates2.csv', '20', '30', '60', - '{"time-1":"2011-04-01T00:16:45.747","cat-0":"cat2","target-2":0.32}', - '{"PC8": -0.514, "PC9": 0.38349, "PC2": -0.27037, "PC3": -1.82588,\ - "PC1": 1.05737, "PC10": 0.08607, "PC11": -0.97078, "PC4": 2.10426,\ - "PC5": 1.86843, "PC7": 1.55632, "PC6": 0.42395}'], - ['data/dates2.csv', '20', '30', '60', - '{"time-1":"1969-W29-1T17:36:39Z","cat-0":"cat1","target-2":0.87}', - '{"PC8": 2.05525, "PC9": 1.50754, "PC2": 6.27524, "PC3": 7.74224,\ - "PC1": 5.30354, "PC10": -6.40442, "PC11": 6.90365, "PC4": -1.44431,\ - "PC5": 2.16179, "PC7": 1.35718, "PC6": 5.02426}']] - show_doc(self.test_scenario7, examples) + ['data/iris.csv', '30', '30', '120', + '{"species": "Iris-versicolor"}', + '{"PC2": 1.8602, "PC3": -2.00864, "PC1": -0.61116, ' + '"PC6": -0.66983, "PC4": -2.44618, "PC5": 0.43414}', '{}'], + ['data/iris.csv', '30', '30', '120', + '{"petal length": 1, "sepal length": 0, "petal width": 0, ' + '"sepal width": 0, "species": "Iris-versicolor"}', + '{"PC2": 7.18009, "PC3": 6.51511, "PC1": 2.78155, ' + '"PC6": 0.21372, "PC4": -1.94865, "PC5": 0.57646}', '{}']] + show_doc(self.test_scenario5) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - pca_create.i_create_a_pca(self) - pca_create.the_pca_is_finished_in_less_than(self, example[3]) - projection_create.i_create_a_projection(self, example[4]) - projection_create.the_projection_is(self, example[5]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + pca_create.i_create_a_pca_with_params( + self, example["model_conf"]) + pca_create.the_pca_is_finished_in_less_than( + self, example["model_wait"]) compare_predictions.create_local_pca(self) - compare_predictions.i_create_a_local_projection(self, example[4]) - compare_predictions.the_local_projection_is(self, example[5]) + projection_create.i_create_a_projection( + self, example["input_data"]) + projection_create.the_projection_is(self, example["projection"]) + compare_predictions.i_create_a_local_projection( + self, example["input_data"]) + compare_predictions.the_local_projection_is( + self, example["projection"]) diff --git a/bigml/tests/test_35_d_compare_predictions.py b/bigml/tests/test_35_d_compare_predictions.py new file mode 100644 index 00000000..442ac2cf --- /dev/null +++ b/bigml/tests/test_35_d_compare_predictions.py @@ -0,0 +1,110 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Comparing remote and local predictions + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_time_series_steps as time_series_create +from . import create_forecast_steps as forecast_create +from . import compare_forecasts_steps as forecast_compare +from . import create_pca_steps as pca_create +from . import create_projection_steps as projection_create +from . import compare_predictions_steps as compare_predictions + + +class TestComparePrediction: + """Test local and remote predictions""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario2(self): + """ + Scenario: Successfully comparing forecasts from time series with "A" seasonality + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a time series with "" + And I wait until the time series is ready less than secs + And I create a local time series + When I create a forecast for "" + Then the forecast is "" + And I create a local forecast for "" + Then the local forecast is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "forecast", "model_conf"] + examples = [ + + ['data/grades.csv', '30', '30', '300', + '{"000005": {"horizon": 5}}', + '{"000005": [{"point_forecast": [73.96192, 74.04106, 74.12029, ' + '74.1996, 74.27899], "model": "M,M,N"}]}', + '{"objective_fields": ["000001", "000005"], "period": 12}'], + ['data/grades.csv', '30', '30', '300', + '{"000005": {"horizon": 5, "ets_models": {"names": ["M,N,A"], ' + '"criterion": "aic", "limit": 3}}}', + '{"000005": [{"point_forecast": [67.43222, 68.24468, ' + '64.14437, 67.5662, 67.79028], "model": "M,N,A"}]}', + '{"objective_fields": ["000001", "000005"], "period": 12}'], + ['data/grades.csv', '30', '30', '300', + '{"000005": {"horizon": 5, "ets_models": {"names": ["A,A,A"], ' + '"criterion": "aic", "limit": 3}}}', + '{"000005": [{"point_forecast": [74.73553, 71.6163, 71.90264, ' + '76.4249, 75.06982], "model": "A,A,A"}]}', + '{"objective_fields": ["000001", "000005"], "period": 12}']] + show_doc(self.test_scenario2) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + time_series_create.i_create_a_time_series_with_params( + self, example["model_conf"]) + time_series_create.the_time_series_is_finished_in_less_than( + self, example["model_wait"]) + time_series_create.create_local_time_series(self) + forecast_create.i_create_a_forecast(self, example["input_data"]) + forecast_create.the_forecast_is(self, example["forecast"]) + forecast_compare.i_create_a_local_forecast( + self, example["input_data"]) + forecast_compare.the_local_forecast_is(self, example["forecast"]) diff --git a/bigml/tests/test_35_e_compare_predictions.py b/bigml/tests/test_35_e_compare_predictions.py new file mode 100644 index 00000000..b998b1a4 --- /dev/null +++ b/bigml/tests/test_35_e_compare_predictions.py @@ -0,0 +1,224 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Comparing remote and local predictions + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_time_series_steps as time_series_create +from . import create_forecast_steps as forecast_create +from . import compare_forecasts_steps as forecast_compare +from . import create_pca_steps as pca_create +from . import create_projection_steps as projection_create +from . import compare_predictions_steps as compare_predictions + + +class TestComparePrediction: + """Test predictions""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario6(self): + """ + Scenario: Successfully comparing projections for PCAs: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a PCA with "" + And I wait until the PCA is ready less than secs + And I create a local PCA + When I create a projection for "" + Then the projection is "" + And I create a local projection for "" + Then the local projection is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf", "input_data", "model_conf", "projection"] + examples = [ + ['data/spam_tiny.csv', '30', '30', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"token_mode": "all"}}}}', '{"Message": "early"}', '{}', + '{"PC40": 0.00416, "PC38": 0.08267, "PC39": 0.00033, "PC18": 0.28094, ' + '"PC19": -0.15056, "PC14": 0.20643, "PC15": 0.23931, "PC16": 0.03251, ' + '"PC17": 0.02776, "PC10": 0.1424, "PC11": 0.4059, "PC12": -0.1238, ' + '"PC13": 0.15131, "PC43": 0.29617, "PC42": 1.0091, "PC41": 0, ' + '"PC25": 0.07164, "PC24": -0.29904, "PC27": -0.1331, "PC26": -0.18572, ' + '"PC21": 0.25616, "PC20": 0.30424, "PC23": -0.45775, "PC22": -0.3362, ' + '"PC47": -0.13757, "PC49": 0.01864, "PC48": 0.04742, "PC29": -0.16286, ' + '"PC28": 0.42207, "PC32": -0.05917, "PC46": -0.05018, "PC31": -0.13973, ' + '"PC45": -0.05015, "PC36": 0.03017, "PC44": 0, "PC37": -0.06093, ' + '"PC34": 0.25821, "PC35": -0.22194, "PC33": -0.23398, "PC8": 0.01159, ' + '"PC9": -0.16042, "PC2": -0.09202, "PC3": 0.14371, "PC1": 0.65114, ' + '"PC6": -0.43034, "PC7": -0.02563, "PC4": -0.04947, "PC5": -0.07796, ' + '"PC50": -0.00769, "PC30": 0.07813}'], + ['data/spam_tiny.csv', '30', '30', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"token_mode": "all"}}}}', '{"Message": "mobile call"}','{}', + '{"PC40": 0.31818, "PC38": 0.06912, "PC39": -0.14342, "PC18": 0.22382, ' + '"PC19": 0.18518, "PC14": 0.89231, "PC15": 0.05046, "PC16": -0.00241, ' + '"PC17": 0.54501, "PC10": -0.26463, "PC11": 0.30251, "PC12": 1.16327, ' + '"PC13": 0.16973, "PC43": 0.11952, "PC42": 1.05499, "PC41": 0.51263, ' + '"PC25": 0.02467, "PC24": -0.65128, "PC27": 0.48916, "PC26": -0.45228, ' + '"PC21": -0.44167, "PC20": 0.76896, "PC23": 0.29398, "PC22": 0.06425, ' + '"PC47": 0.70416, "PC49": -0.30313, "PC48": 0.12976, "PC29": -0.34, ' + '"PC28": 0.17406, "PC32": -0.06411, "PC46": 0.69257, "PC31": 0.07523, ' + '"PC45": -0.03461, "PC36": 0.29732, "PC44": 0.14516, "PC37": -0.19109, ' + '"PC34": 0.58399, "PC35": 0.37608, "PC33": -0.00378, "PC8": -0.88156, ' + '"PC9": 0.38233, "PC2": -0.56685, "PC3": 0.56321, "PC1": 0.49171, ' + '"PC6": -0.09854, "PC7": -1.24639, "PC4": 1.50134, "PC5": -0.03161, ' + '"PC50": 0.17349, "PC30": -1.29612}']] + show_doc(self.test_scenario6) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + pca_create.i_create_a_pca_with_params(self, example["model_conf"]) + pca_create.the_pca_is_finished_in_less_than( + self, example["model_wait"]) + projection_create.i_create_a_projection( + self, example["input_data"]) + projection_create.the_projection_is(self, example["projection"]) + compare_predictions.create_local_pca(self) + compare_predictions.i_create_a_local_projection( + self, example["input_data"]) + compare_predictions.the_local_projection_is( + self, example["projection"]) + + def test_scenario7(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input for PCAs: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a PCA + And I wait until the PCA is ready less than secs + And I create a local PCA + When I create a projection for "" + Then the projection is "" + And I create a local projection for "" + Then the local projection is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "projection"] + examples = [ + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1910-05-08T19:10:23.106","cat-0":"cat2",' + '"target-2":0.4}', + '{"PC8": -1.54293, "PC9": -0.94836, "PC2": 0.78176, ' + '"PC3": -0.62082, "PC1": 0.89614, "PC10": 1.06575, ' + '"PC11": 1.3211, "PC4": 1.90088, "PC5": 0.24197, ' + '"PC7": -0.37701, "PC6": 2.25007}'], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1920-06-30T20:21:20.320","cat-0":"cat1",' + '"target-2":0.2}', + '{"PC8": 0.3148, "PC9": -0.61742, "PC2": 0.93411, ' + '"PC3": 1.80286, "PC1": 0.36425, "PC10": 0.7364, ' + '"PC11": 2.25863, "PC4": -1.50319, "PC5": 0.17088, ' + '"PC7": 0.51738, "PC6": 0.42403}'], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1932-01-30T19:24:11.440","cat-0":"cat2",' + '"target-2":0.1}', + '{"PC8": -0.86728, "PC9": -1.85164, "PC2": 2.13206, ' + '"PC3": 0.58449, "PC1": 0.28379, "PC10": 2.05465, ' + '"PC11": 0.44372, "PC4": 1.27236, "PC5": 0.99468, ' + '"PC7": -0.32496, "PC6": 0.52217}'], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1950-11-06T05:34:05.602","cat-0":"cat1" ,' + '"target-2":0.9}', + '{"PC8": 2.49563, "PC9": -0.57774, "PC2": -0.76354, ' + '"PC3": 0.19215, "PC1": 0.99197, "PC10": -1.21017, ' + '"PC11": 1.55778, "PC4": -0.24013, "PC5": -0.38492, ' + '"PC7": 1.82607, "PC6": 0.3736}'], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1969-7-14 17:36","cat-0":"cat2","target-2":0.9}', + '{"PC8": -0.41111, "PC9": -5.32959, "PC2": -1.25322, ' + '"PC3": 2.93113, "PC1": 2.07444, "PC10": 4.8808, ' + '"PC11": 0.4185, "PC4": 3.13876, "PC5": 3.70259, ' + '"PC7": 0.55665, "PC6": 5.16873}'], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"2001-01-05T23:04:04.693","cat-0":"cat2",' + '"target-2":0.01}', + '{"PC8": -1.10654, "PC9": -0.34137, "PC2": 1.73362, ' + '"PC3": -0.34799, "PC1": 2.32583, "PC10": 0.94566, ' + '"PC11": 0.53787, "PC4": 2.77385, "PC5": -0.1017, ' + '"PC7": 0.20156, "PC6": -0.44476}'], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"2011-04-01T00:16:45.747","cat-0":"cat2",' + '"target-2":0.32}', + '{"PC8": -0.514, "PC9": 0.38349, "PC2": -0.27037, ' + '"PC3": -1.82588, "PC1": 1.05737, "PC10": 0.08607, ' + '"PC11": -0.97078, "PC4": 2.10426, "PC5": 1.86843, ' + '"PC7": 1.55632, "PC6": 0.42395}'], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1969-W29-1T17:36:39Z","cat-0":"cat1",' + '"target-2":0.87}', + '{"PC8": 2.05525, "PC9": 1.50754, "PC2": 6.27524, ' + '"PC3": 7.74224, "PC1": 5.30354, "PC10": -6.40442, ' + '"PC11": 6.90365, "PC4": -1.44431, "PC5": 2.16179, ' + '"PC7": 1.35718, "PC6": 5.02426}']] + show_doc(self.test_scenario7) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + pca_create.i_create_a_pca(self, shared=example["data"]) + pca_create.the_pca_is_finished_in_less_than( + self, example["model_wait"]) + projection_create.i_create_a_projection( + self, example["input_data"]) + projection_create.the_projection_is( + self, example["projection"]) + compare_predictions.create_local_pca(self, pre_model=True) + compare_predictions.i_create_a_local_projection( + self, example["input_data"], + pre_model=self.bigml["local_pipeline"]) + compare_predictions.the_local_projection_is( + self, example["projection"]) diff --git a/bigml/tests/test_36_compare_predictions.py b/bigml/tests/test_36_compare_predictions.py index 677f2039..c8a76e3d 100644 --- a/bigml/tests/test_36_compare_predictions.py +++ b/bigml/tests/test_36_compare_predictions.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2017-2019 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,627 +20,616 @@ """ Comparing remote and local predictions """ -from world import world, setup_module, teardown_module, show_doc -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_model_steps as model_create -import create_ensemble_steps as ensemble_create -import create_linear_steps as linear_create -import create_prediction_steps as prediction_create -import compare_predictions_steps as prediction_compare +import json +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_anomaly_steps as anomaly_create +from . import create_model_steps as model_create +from . import create_ensemble_steps as ensemble_create +from . import create_linear_steps as linear_create +from . import create_prediction_steps as prediction_create +from . import compare_predictions_steps as prediction_compare -class TestComparePrediction(object): - def setup(self): +class TestComparePrediction: + """Test local and remote predictions""" + + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully comparing predictions for deepnets: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a deepnet with objective "" and "" - And I wait until the deepnet is ready less than secs - And I create a local deepnet - When I create a prediction for "" - Then the prediction for "" is "" - And I create a local prediction for "" - Then the local prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | objective | prediction | params, - - + Scenario: Successfully comparing predictions for deepnets: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a deepnet with objective "" and "" + And I wait until the deepnet is ready less than secs + And I create a local deepnet + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "model_conf"] examples = [ - ['data/iris.csv', '30', '50', '30000', '{"petal width": 4}', '000004', 'Iris-virginica', '{}'], - ['data/iris.csv', '30', '50', '30000', '{"sepal length": 4.1, "sepal width": 2.4}', '000004', 'Iris-setosa', '{}'], - ['data/iris_missing2.csv', '30', '50', '30000', '{}', '000004', 'Iris-setosa', '{}'], - ['data/grades.csv', '30', '50', '30000', '{}', '000005', 42.15473, '{}'], - ['data/spam.csv', '30', '50', '30000', '{}', '000000', 'ham', '{}']] - show_doc(self.test_scenario1, examples) - + ['data/iris.csv', '30', '50', '60', '{"petal width": 4}', '000004', + 'Iris-virginica', '{}'], + ['data/iris.csv', '30', '50', '60', + '{"sepal length": 4.1, "sepal width": 2.4}', '000004', + 'Iris-versicolor', '{}'], + ['data/iris_missing2.csv', '30', '50', '60', '{}', '000004', + 'Iris-versicolor', '{}'], + ['data/grades.csv', '30', '50', '60', '{}', '000005', 47.04852, + '{}'], + ['data/spam.csv', '30', '50', '60', '{}', '000000', 'ham', '{}']] + show_doc(self.test_scenario1) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_deepnet_with_objective_and_params(self, example[5], example[7]) - model_create.the_deepnet_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_deepnet_with_objective_and_params( + self, example["objective_id"], example["model_conf"]) + model_create.the_deepnet_is_finished_in_less_than( + self, example["model_wait"]) prediction_compare.i_create_a_local_deepnet(self) - prediction_create.i_create_a_deepnet_prediction(self, example[4]) - prediction_create.the_prediction_is(self, example[5], example[6]) - prediction_compare.i_create_a_local_deepnet_prediction(self, example[4]) - prediction_compare.the_local_prediction_is(self, example[6]) - + prediction_create.i_create_a_deepnet_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"], + precision=3) + prediction_compare.i_create_a_local_deepnet_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"], precision=3) def test_scenario2(self): """ - Scenario: Successfully comparing predictions in operating points for models: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - And I create a local model - When I create a prediction for "" in "" - Then the prediction for "" is "" - And I create a local prediction for "" in "" - Then the local prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | prediction | operating_point - - + Scenario: Successfully comparing predictions in operating points for models: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + When I create a prediction for "" in "" + Then the prediction for "" is "" + And I create a local prediction for "" in "" + Then the local prediction is "" """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "prediction", "operating_point", + "objective_id"] examples = [ - ['data/iris.csv', '10', '50', '50', '{"petal width": 4}', 'Iris-setosa', {"kind": "probability", "threshold": 0.1, "positive_class": "Iris-setosa"}, "000004"], - ['data/iris.csv', '10', '50', '50', '{"petal width": 4}', 'Iris-versicolor', {"kind": "probability", "threshold": 0.9, "positive_class": "Iris-setosa"}, "000004"], - ['data/iris.csv', '10', '50', '50', '{"sepal length": 4.1, "sepal width": 2.4}', 'Iris-setosa', {"kind": "confidence", "threshold": 0.1, "positive_class": "Iris-setosa"}, "000004"], - ['data/iris.csv', '10', '50', '50', '{"sepal length": 4.1, "sepal width": 2.4}', 'Iris-versicolor', {"kind": "confidence", "threshold": 0.9, "positive_class": "Iris-setosa"}, "000004"]] - show_doc(self.test_scenario2, examples) - + ['data/iris.csv', '10', '50', '50', '{"petal width": 4}', + 'Iris-setosa', + {"kind": "probability", "threshold": 0.1, + "positive_class": "Iris-setosa"}, "000004"], + ['data/iris.csv', '10', '50', '50', '{"petal width": 4}', + 'Iris-versicolor', + {"kind": "probability", "threshold": 0.9, + "positive_class": "Iris-setosa"}, "000004"], + ['data/iris.csv', '10', '50', '50', + '{"sepal length": 4.1, "sepal width": 2.4}', 'Iris-setosa', + {"kind": "confidence", "threshold": 0.1, + "positive_class": "Iris-setosa"}, "000004"], + ['data/iris.csv', '10', '50', '50', + '{"sepal length": 4.1, "sepal width": 2.4}', 'Iris-versicolor', + {"kind": "confidence", "threshold": 0.9, + "positive_class": "Iris-setosa"}, "000004"]] + show_doc(self.test_scenario2) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) prediction_compare.i_create_a_local_model(self) - prediction_create.i_create_a_prediction_op(self, example[4], example[6]) - prediction_create.the_prediction_is(self, example[7], example[5]) - prediction_compare.i_create_a_local_prediction_op(self, example[4], example[6]) - prediction_compare.the_local_prediction_is(self, example[5]) - + prediction_create.i_create_a_prediction_op( + self, example["input_data"], example["operating_point"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction_op( + self, example["input_data"], example["operating_point"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) def test_scenario3(self): """ - Scenario: Successfully comparing predictions for deepnets with operating point: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a deepnet with objective "" and "" - And I wait until the deepnet is ready less than secs - And I create a local deepnet - When I create a prediction with operating point "" for "" - Then the prediction for "" is "" - And I create a local prediction with operating point "" for "" - Then the local prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | objective | prediction | params | operating_point, - - + Scenario: Successfully comparing predictions for deepnets with operating point: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a deepnet with objective "" and "" + And I wait until the deepnet is ready less than secs + And I create a local deepnet + When I create a prediction with operating point "" for "" + Then the prediction for "" is "" + And I create a local prediction with operating point "" for "" + Then the local prediction is "" """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "model_conf", + "operating_point"] examples = [ - ['data/iris.csv', '10', '50', '30000', '{"petal width": 4}', '000004', 'Iris-versicolor', '{}', {"kind": "probability", "threshold": 1, "positive_class": "Iris-virginica"}]] - show_doc(self.test_scenario3, examples) - + ['data/iris.csv', '10', '50', '60', '{"petal width": 4}', '000004', + 'Iris-setosa', '{}', {"kind": "probability", "threshold": 1, + "positive_class": "Iris-virginica"}]] + show_doc(self.test_scenario3) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_deepnet_with_objective_and_params(self, example[5], example[7]) - model_create.the_deepnet_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_deepnet_with_objective_and_params( + self, example["objective_id"], example["model_conf"]) + model_create.the_deepnet_is_finished_in_less_than( + self, example["model_wait"]) prediction_compare.i_create_a_local_deepnet(self) - prediction_create.i_create_a_deepnet_prediction_with_op(self, example[4], example[8]) - prediction_create.the_prediction_is(self, example[5], example[6]) - prediction_compare.i_create_a_local_deepnet_prediction_with_op(self, example[4], example[8]) - prediction_compare.the_local_prediction_is(self, example[6]) - + prediction_create.i_create_a_deepnet_prediction_with_op( + self, example["input_data"], example["operating_point"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_deepnet_prediction_with_op( + self, example["input_data"], example["operating_point"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) def test_scenario4(self): """ - Scenario: Successfully comparing predictions in operating points for ensembles: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create an ensemble - And I wait until the ensemble is ready less than secs - And I create a local ensemble - When I create a prediction for "" in "" - Then the prediction for "" is "" - And I create a local ensemble prediction for "" in "" - Then the local ensemble prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | prediction | operating_point - - + Scenario: Successfully comparing predictions in operating points for ensembles: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an ensemble + And I wait until the ensemble is ready less than secs + And I create a local ensemble + When I create a prediction for "" in "" + Then the prediction for "" is "" + And I create a local ensemble prediction for "" in "" + Then the local ensemble prediction is "" """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "prediction", "operating_point", + "objective_id"] examples = [ - ['data/iris.csv', '10', '50', '50', '{"petal width": 4}', 'Iris-setosa', {"kind": "probability", "threshold": 0.1, "positive_class": "Iris-setosa"}, "000004"], - ['data/iris.csv', '10', '50', '50', '{"petal width": 4}', 'Iris-virginica', {"kind": "probability", "threshold": 0.9, "positive_class": "Iris-setosa"}, "000004"], - ['data/iris.csv', '10', '50', '50', '{"sepal length": 4.1, "sepal width": 2.4}', 'Iris-setosa', {"kind": "confidence", "threshold": 0.1, "positive_class": "Iris-setosa"}, "000004"], - ['data/iris.csv', '10', '50', '50', '{"sepal length": 4.1, "sepal width": 2.4}', 'Iris-versicolor', {"kind": "confidence", "threshold": 0.9, "positive_class": "Iris-setosa"}, "000004"]] - show_doc(self.test_scenario4, examples) - + ['data/iris.csv', '10', '50', '50', '{"petal width": 4}', + 'Iris-setosa', + {"kind": "probability", "threshold": 0.1, + "positive_class": "Iris-setosa"}, "000004"], + ['data/iris.csv', '10', '50', '50', '{"petal width": 4}', + 'Iris-virginica', + {"kind": "probability", "threshold": 0.9, + "positive_class": "Iris-setosa"}, "000004"], + ['data/iris.csv', '10', '50', '50', + '{"sepal length": 4.1, "sepal width": 2.4}', 'Iris-setosa', + {"kind": "confidence", "threshold": 0.1, + "positive_class": "Iris-setosa"}, "000004"], + ['data/iris.csv', '10', '50', '50', + '{"sepal length": 4.1, "sepal width": 2.4}', 'Iris-versicolor', + {"kind": "confidence", "threshold": 0.9, + "positive_class": "Iris-setosa"}, "000004"]] + show_doc(self.test_scenario4) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - ensemble_create.i_create_an_ensemble(self) - ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_create.i_create_an_ensemble(self, shared=example["data"]) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) ensemble_create.create_local_ensemble(self) - prediction_create.i_create_an_ensemble_prediction_op(self, example[4], example[6]) - prediction_create.the_prediction_is(self, example[7], example[5]) - prediction_compare.i_create_a_local_ensemble_prediction_op(self, example[4], example[6]) - prediction_compare.the_local_prediction_is(self, example[5]) - + prediction_create.i_create_an_ensemble_prediction_op( + self, example["input_data"], example["operating_point"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_ensemble_prediction_op( + self, example["input_data"], example["operating_point"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) def test_scenario5(self): """ - Scenario: Successfully comparing predictions in operating kind for models: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - And I create a local model - When I create a prediction for "" in "" - Then the prediction for "" is "" - And I create a local prediction for "" in "" - Then the local prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | prediction | operating_point - - + Scenario: Successfully comparing predictions in operating kind for models: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + When I create a prediction for "" in "" + Then the prediction for "" is "" + And I create a local prediction for "" in "" + Then the local prediction is "" """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "prediction", "operating_kind", + "objective_id"] examples = [ - ['data/iris.csv', '10', '50', '50', '{"petal length": 2.46, "sepal length": 5}', 'Iris-versicolor', "probability", "000004"], - ['data/iris.csv', '10', '50', '50', '{"petal length": 2.46, "sepal length": 5}', 'Iris-versicolor', "confidence", "000004"], - ['data/iris.csv', '10', '50', '50', '{"petal length": 2}', 'Iris-setosa', "probability", "000004"], - ['data/iris.csv', '10', '50', '50', '{"petal length": 2}', 'Iris-setosa', "confidence", "000004"]] - show_doc(self.test_scenario5, examples) - + ['data/iris.csv', '10', '50', '50', + '{"petal length": 2.46, "sepal length": 5}', 'Iris-versicolor', + "probability", "000004"], + ['data/iris.csv', '10', '50', '50', + '{"petal length": 2.46, "sepal length": 5}', 'Iris-versicolor', + "confidence", "000004"], + ['data/iris.csv', '10', '50', '50', '{"petal length": 2}', + 'Iris-setosa', "probability", "000004"], + ['data/iris.csv', '10', '50', '50', '{"petal length": 2}', + 'Iris-setosa', "confidence", "000004"]] + show_doc(self.test_scenario5) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) prediction_compare.i_create_a_local_model(self) - prediction_create.i_create_a_prediction_op_kind(self, example[4], example[6]) - prediction_create.the_prediction_is(self, example[7], example[5]) - prediction_compare.i_create_a_local_prediction_op_kind(self, example[4], example[6]) - prediction_compare.the_local_prediction_is(self, example[5]) - + prediction_create.i_create_a_prediction_op_kind( + self, example["input_data"], example["operating_kind"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction_op_kind( + self, example["input_data"], example["operating_kind"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) def test_scenario6(self): """ - Scenario: Successfully comparing predictions for deepnets with operating kind: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a deepnet with objective "" and "" - And I wait until the deepnet is ready less than secs - And I create a local deepnet - When I create a prediction with operating kind "" for "" - Then the prediction for "" is "" - And I create a local prediction with operating point "" for "" - Then the local prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | objective | prediction | params | operating_point, - - + Scenario: Successfully comparing predictions for deepnets with operating kind: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a deepnet with objective "" and "" + And I wait until the deepnet is ready less than secs + And I create a local deepnet + When I create a prediction with operating kind "" for "" + Then the prediction for "" is "" + And I create a local prediction with operating point "" for "" + Then the local prediction is "" """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "model_conf", + "operating_kind"] examples = [ - ['data/iris.csv', '10', '50', '30000', '{"petal length": 2.46}', '000004', 'Iris-setosa', '{}', "probability"], - ['data/iris.csv', '10', '50', '30000', '{"petal length": 2}', '000004', 'Iris-setosa', '{}', "probability"]] - show_doc(self.test_scenario6, examples) - + ['data/iris.csv', '10', '50', '60', '{"petal length": 2.46}', + '000004', 'Iris-setosa', '{}', "probability"], + ['data/iris.csv', '10', '50', '60', '{"petal length": 6}', + '000004', 'Iris-versicolor', '{}', "probability"]] + show_doc(self.test_scenario6) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_deepnet_with_objective_and_params(self, example[5], example[7]) - model_create.the_deepnet_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_deepnet_with_objective_and_params( + self, example["objective_id"], example["model_conf"]) + model_create.the_deepnet_is_finished_in_less_than( + self, example["model_wait"]) prediction_compare.i_create_a_local_deepnet(self) - prediction_create.i_create_a_deepnet_prediction_op_kind(self, example[4], example[8]) - prediction_create.the_prediction_is(self, example[5], example[6]) - prediction_compare.i_create_a_local_deepnet_prediction_op_kind(self, example[4], example[8]) - prediction_compare.the_local_prediction_is(self, example[6]) - + prediction_create.i_create_a_deepnet_prediction_op_kind( + self, example["input_data"], example["operating_kind"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_deepnet_prediction_op_kind( + self, example["input_data"], example["operating_kind"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) def test_scenario7(self): """ - Scenario: Successfully comparing predictions in operating points for ensembles: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create an ensemble - And I wait until the ensemble is ready less than secs - And I create a local ensemble - When I create a prediction for "" in "" - Then the prediction for "" is "" - And I create a local ensemble prediction for "" in "" - Then the local ensemble prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | prediction | operating_kind - - + Scenario: Successfully comparing predictions in operating points for ensembles: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an ensemble + And I wait until the ensemble is ready less than secs + And I create a local ensemble + When I create a prediction for "" in "" + Then the prediction for "" is "" + And I create a local ensemble prediction for "" in "" + Then the local ensemble prediction is "" """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "prediction", "operating_kind", + "objective_id"] examples = [ - ['data/iris.csv', '10', '50', '50', '{"petal length": 2.46}', 'Iris-versicolor', "probability", "000004"], - ['data/iris.csv', '10', '50', '50', '{"petal length": 2}', 'Iris-setosa', "probability", "000004"], - ['data/iris.csv', '10', '50', '50', '{"petal length": 2.46}', 'Iris-versicolor', "confidence", "000004"], - ['data/iris.csv', '10', '50', '50', '{"petal length": 2}', 'Iris-setosa', "confidence", "000004"], - ['data/iris.csv', '10', '50', '50', '{"petal length": 2.46}', 'Iris-versicolor', "votes", "000004"], - ['data/iris.csv', '10', '50', '50', '{"petal length": 1}', 'Iris-setosa', "votes", "000004"]] - show_doc(self.test_scenario7, examples) - + ['data/iris.csv', '10', '50', '50', '{"petal length": 2.46}', + 'Iris-versicolor', "probability", "000004"], + ['data/iris.csv', '10', '50', '50', '{"petal length": 2}', + 'Iris-setosa', "probability", "000004"], + ['data/iris.csv', '10', '50', '50', '{"petal length": 2.46}', + 'Iris-versicolor', "confidence", "000004"], + ['data/iris.csv', '10', '50', '50', '{"petal length": 2}', + 'Iris-setosa', "confidence", "000004"], + ['data/iris.csv', '10', '50', '50', '{"petal length": 2.46}', + 'Iris-versicolor', "votes", "000004"], + ['data/iris.csv', '10', '50', '50', '{"petal length": 1}', + 'Iris-setosa', "votes", "000004"]] + show_doc(self.test_scenario7) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - ensemble_create.i_create_an_ensemble(self) - ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_create.i_create_an_ensemble(self, shared=example["data"]) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) ensemble_create.create_local_ensemble(self) - prediction_create.i_create_an_ensemble_prediction_op_kind(self, example[4], example[6]) - prediction_create.the_prediction_is(self, example[7], example[5]) - prediction_compare.i_create_a_local_ensemble_prediction_op_kind(self, example[4], example[6]) - prediction_compare.the_local_prediction_is(self, example[5]) + prediction_create.i_create_an_ensemble_prediction_op_kind( + self, example["input_data"], example["operating_kind"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_ensemble_prediction_op_kind( + self, example["input_data"], example["operating_kind"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) def test_scenario8(self): """ - Scenario: Successfully comparing predictions for logistic regressions with operating kind: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a logistic regression with objective "" - And I wait until the logistic regression is ready less than secs - And I create a local logistic regression - When I create a prediction with operating kind "" for "" - Then the prediction for "" is "" - And I create a local prediction with operating point "" for "" - Then the local prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | objective | prediction | params | operating_point, - - + Scenario: Successfully comparing predictions for logistic regressions with operating kind: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression with objective "" + And I wait until the logistic regression is ready less than secs + And I create a local logistic regression + When I create a prediction with operating kind "" for "" + Then the prediction for "" is "" + And I create a local prediction with operating point "" for "" + Then the local prediction is "" """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", + "operating_kind"] examples = [ - ['data/iris.csv', '10', '50', '30000', '{"petal length": 5}', '000004', 'Iris-versicolor', '{}', "probability"], - ['data/iris.csv', '10', '50', '30000', '{"petal length": 2}', '000004', 'Iris-setosa', '{}', "probability"]] - show_doc(self.test_scenario8, examples) - + ['data/iris.csv', '10', '50', '60', '{"petal length": 5}', + '000004', 'Iris-versicolor', "probability"], + ['data/iris.csv', '10', '50', '60', '{"petal length": 2}', + '000004', 'Iris-setosa', "probability"]] + show_doc(self.test_scenario8) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_logistic_model(self) - model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model( + self, shared=example["data"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) prediction_compare.i_create_a_local_logistic_model(self) - prediction_create.i_create_a_logistic_prediction_with_op_kind(self, example[4], example[8]) - prediction_create.the_prediction_is(self, example[5], example[6]) - prediction_compare.i_create_a_local_logistic_prediction_op_kind(self, example[4], example[8]) - prediction_compare.the_local_prediction_is(self, example[6]) - + prediction_create.i_create_a_logistic_prediction_with_op_kind( + self, example["input_data"], example["operating_kind"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_logistic_prediction_op_kind( + self, example["input_data"], example["operating_kind"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) def test_scenario9(self): """ - Scenario: Successfully comparing predictions for logistic regressions with operating kind and supervised model: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a logistic regression with objective "" - And I wait until the logistic regression is ready less than secs - And I create a local supervised model - When I create a prediction with operating kind "" for "" - Then the prediction for "" is "" - And I create a local prediction with operating point "" for "" - Then the local prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | objective | prediction | params | operating_point, - - + Scenario: Successfully comparing predictions for logistic regressions with operating kind and supervised model: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression with objective "" + And I wait until the logistic regression is ready less than secs + And I create a local supervised model + When I create a prediction with operating kind "" for "" + Then the prediction for "" is "" + And I create a local prediction with operating point "" for "" + Then the local prediction is "" """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", + "operating_kind"] examples = [ - ['data/iris.csv', '10', '50', '30000', '{"petal length": 5}', '000004', 'Iris-versicolor', '{}', "probability"], - ['data/iris.csv', '10', '50', '30000', '{"petal length": 2}', '000004', 'Iris-setosa', '{}', "probability"]] - show_doc(self.test_scenario9, examples) - + ['data/iris.csv', '10', '50', '60', '{"petal length": 5}', + '000004', 'Iris-versicolor', "probability"], + ['data/iris.csv', '10', '50', '60', '{"petal length": 2}', + '000004', 'Iris-setosa', "probability"]] + show_doc(self.test_scenario9) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_logistic_model(self) - model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) - prediction_compare.i_create_a_local_supervised_model(self, model_type="logistic_regression") - prediction_create.i_create_a_logistic_prediction_with_op_kind(self, example[4], example[8]) - prediction_create.the_prediction_is(self, example[5], example[6]) - prediction_compare.i_create_a_local_logistic_prediction_op_kind(self, example[4], example[8]) - prediction_compare.the_local_prediction_is(self, example[6]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model( + self, shared=example["data"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_supervised_model( + self, model_type="logistic_regression") + prediction_create.i_create_a_logistic_prediction_with_op_kind( + self, example["input_data"], example["operating_kind"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_logistic_prediction_op_kind( + self, example["input_data"], example["operating_kind"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.i_create_a_local_bigml_model( + self, model_type="logistic_regression") + prediction_compare.i_create_a_local_bigml_model_prediction( + self, example["input_data"], prediction_type="prediction", + operating_kind=example["operating_kind"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) def test_scenario10(self): """ - Scenario: Successfully comparing predictions for linear regression: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a linear regression with objective "" and "" - And I wait until the linear regression is ready less than secs - And I create a local linear regression - When I create a prediction for "" - Then the prediction for "" is "" - And I create a local prediction for "" - Then the local prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | objective | prediction | params - + Scenario: Successfully comparing predictions for linear regression: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a linear regression with objective "" and "" + And I wait until the linear regression is ready less than secs + And I create a local linear regression + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "model_conf", + "operating_kind"] examples = [ - ['data/grades.csv', '10', '50', '30000', '{"000000": 1, "000001": 1, "000002": 1}', '000005', 29.63024, '{"input_fields": ["000000", "000001", "000002"]}'], - ['data/iris.csv', '10', '50', '30000', '{"000000": 1, "000001": 1, "000004": "Iris-virginica"}', '000003', 1.21187, '{"input_fields": ["000000", "000001", "000004"]}'], - ['data/movies.csv', '10', '50', '30000', '{"000007": "Action"}', '000009', 4.33333, '{"input_fields": ["000007"]}'], - ['data/movies.csv', '10', '50', '30000', '{"000006": "1999"}', '000009', 3.28427, '{"input_fields": ["000006"], "bias": false}']] - show_doc(self.test_scenario10, examples) - + ['data/grades.csv', '10', '50', '60', + '{"000000": 1, "000001": 1, "000002": 1}', '000005', 29.63024, + '{"input_fields": ["000000", "000001", "000002"]}'], + ['data/iris.csv', '10', '50', '60', + '{"000000": 1, "000001": 1, "000004": "Iris-virginica"}', + '000003', 1.21187, + '{"input_fields": ["000000", "000001", "000004"]}'], + ['data/movies.csv', '10', '50', '60', '{"000007": "Action"}', + '000009', 4.33333, '{"input_fields": ["000007"]}'], + ['data/movies.csv', '10', '50', '60', '{"000006": "1999"}', + '000009', 3.28427, '{"input_fields": ["000006"], "bias": false}']] + show_doc(self.test_scenario10) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - linear_create.i_create_a_linear_regression_with_objective_and_params( \ - self, example[5], example[7]) - linear_create.the_linear_regression_is_finished_in_less_than( \ - self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + linear_create.i_create_a_linear_regression_with_objective_and_params( + self, example["objective_id"], example["model_conf"]) + linear_create.the_linear_regression_is_finished_in_less_than( + self, example["model_wait"]) prediction_compare.i_create_a_local_linear(self) - prediction_create.i_create_a_linear_prediction(self, example[4]) - prediction_create.the_prediction_is(self, example[5], example[6]) - prediction_compare.i_create_a_local_linear_prediction(self, example[4]) - prediction_compare.the_local_prediction_is(self, example[6]) + prediction_create.i_create_a_linear_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_linear_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) def test_scenario11(self): """ - Scenario: Successfully comparing remote and local predictions - with raw date input for linear regression: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a linear regression - And I wait until the linear regression is ready - less than secs - And I create a local linear regression - When I create a prediction for "" - Then the prediction for "" is "" - And I create a local prediction for "" - Then the local prediction is "" - - Examples: - |data|time_1|time_2|time_3|data_input|objective|prediction - + Scenario: Successfully comparing predictions for logistic regressions with operating point: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression with objective "" + And I wait until the logistic regression is ready less than secs + And I create a local logistic regression + When I create a prediction with operating point "" for "" + Then the prediction for "" is "" + And I create a local prediction with operating point "" for "" + Then the local prediction is "" """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "model_conf", + "operating_point"] examples = [ - ['data/dates2.csv', '20', '20', '25', - '{"time-1": "1910-05-08T19:10:23.106", "cat-0":"cat2"}', - '000002', -0.01284], - ['data/dates2.csv', '20', '20', '25', - '{"time-1": "1920-06-30T20:21:20.320", "cat-0":"cat1"}', - '000002', -0.09459], - ['data/dates2.csv', '20', '20', '25', - '{"time-1": "1932-01-30T19:24:11.440", "cat-0":"cat2"}', - '000002', -0.02259], - ['data/dates2.csv', '20', '20', '25', - '{"time-1": "1950-11-06T05:34:05.252", "cat-0":"cat1"}', - '000002', -0.06754], - ['data/dates2.csv', '20', '20', '25', - '{"time-1": "2001-01-05T23:04:04.693", "cat-0":"cat2"}', - '000002', 0.05204], - ['data/dates2.csv', '20', '20', '25', - '{"time-1": "2011-04-01T00:16:45.747", "cat-0":"cat2"}', - '000002', 0.05878]] - show_doc(self.test_scenario11, examples) - + ['data/iris.csv', '10', '50', '60', '{"petal width": 4}', '000004', + 'Iris-versicolor', '{"default_numeric_value": "mean"}', + {"kind": "probability", "threshold": 1, + "positive_class": "Iris-virginica"}]] + show_doc(self.test_scenario11) for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, - example[2]) - linear_create.i_create_a_linear_regression(self) - linear_create.the_linear_regression_is_finished_in_less_than(self, - example[3]) - prediction_compare.i_create_a_local_linear(self) - prediction_create.i_create_a_linear_prediction(self, example[4]) - prediction_create.the_prediction_is(self, example[5], example[6]) - prediction_compare.i_create_a_local_linear_prediction(self, - example[4]) - prediction_compare.the_local_prediction_is(self, example[6]) - - def test_scenario12(self): - """ - Scenario: Successfully comparing remote and local predictions - with raw date input for deepnet: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a deepnet - And I wait until the deepnet is ready - less than secs - And I create a local deepnet - When I create a prediction for "" - Then the prediction for "" is "" - And I create a local prediction for "" - Then the local prediction is "" - - Examples: - |data|time_1|time_2|time_3|data_input|objective|prediction - - - - - - - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "1910-05-08T19:10:23.106", "cat-0":"cat2"}', - '000002', 0.04082], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "2011-04-01T00:16:45.747", "cat-0":"cat2"}', - '000002', 0.02919], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "1969-W29-1T17:36:39Z", "cat-0":"cat1"}', - '000002', 0.0199], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "1969-W29-1T17:36:39Z", "cat-0":"cat1"}', - '000002', 0.0199], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "1969-W29-1T17:36:39Z", "cat-0":"cat1"}', - '000002', 0.0199], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "1969-W29-1T17:36:39Z", "cat-0":"cat1"}', - '000002', 0.0199], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "1920-06-45T20:21:20.320", "cat-0":"cat1"}', - '000002', 0.0199], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "2001-01-05T23:04:04.693", "cat-0":"cat2"}', - '000002', 0.28517], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "1950-11-06T05:34:05.602", "cat-0":"cat1"}', - '000002', -0.05673], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "1950-11-06T05:34:05.602", "cat-0":"cat1"}', - '000002', -0.05673], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "1950-11-06T05:34:05.602", "cat-0":"cat1"}', - '000002', -0.05673], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "1950-11-06T05:34:05.602", "cat-0":"cat1"}', - '000002', -0.05673], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "1932-01-30T19:24:11.440", "cat-0":"cat2"}', - '000002', 0.16183], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "Mon Jul 14 17:36 +0000 1969", "cat-0":"cat1"}', - '000002', 0.0199] - - """ - examples = [ - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "1910-05-08T19:10:23.106", "cat-0":"cat2"}', - '000002', 0.04082], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "2011-04-01T00:16:45.747", "cat-0":"cat2"}', - '000002', 0.02919], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "1969-W29-1T17:36:39Z", "cat-0":"cat1"}', - '000002', 0.0199], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "1969-W29-1T17:36:39Z", "cat-0":"cat1"}', - '000002', 0.0199], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "1969-W29-1T17:36:39Z", "cat-0":"cat1"}', - '000002', 0.0199], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "1969-W29-1T17:36:39Z", "cat-0":"cat1"}', - '000002', 0.0199], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "1920-06-45T20:21:20.320", "cat-0":"cat1"}', - '000002', 0.0199], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "2001-01-05T23:04:04.693", "cat-0":"cat2"}', - '000002', 0.28517], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "1950-11-06T05:34:05.602", "cat-0":"cat1"}', - '000002', -0.05673], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "1950-11-06T05:34:05.602", "cat-0":"cat1"}', - '000002', -0.05673], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "1950-11-06T05:34:05.602", "cat-0":"cat1"}', - '000002', -0.05673], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "1950-11-06T05:34:05.602", "cat-0":"cat1"}', - '000002', -0.05673], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "1932-01-30T19:24:11.440", "cat-0":"cat2"}', - '000002', 0.16183], - ['data/dates2.csv', '20', '45', '60', - '{"time-1": "Mon Jul 14 17:36 +0000 1969", "cat-0":"cat1"}', - '000002', 0.0199] -] - show_doc(self.test_scenario12, examples) - - for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_no_suggest_deepnet(self) - model_create.the_deepnet_is_finished_in_less_than(self, example[3]) - prediction_compare.i_create_a_local_deepnet(self) - prediction_create.i_create_a_deepnet_prediction(self, example[4]) - prediction_create.the_prediction_is(self, example[5], example[6]) - prediction_compare.i_create_a_local_deepnet_prediction(self, - example[4]) - prediction_compare.the_local_prediction_is(self, example[6]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model_with_objective_and_parms( + self, example["objective_id"], example["model_conf"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_logistic_model(self) + prediction_create.i_create_a_logistic_prediction_with_op( + self, example["input_data"], example["operating_point"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction_op( + self, example["input_data"], example["operating_point"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) diff --git a/bigml/tests/test_37_configuration.py b/bigml/tests/test_37_configuration.py index 548435ea..1c4ba9ac 100644 --- a/bigml/tests/test_37_configuration.py +++ b/bigml/tests/test_37_configuration.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,33 +20,40 @@ """ Creating configuration """ -from world import world, setup_module, teardown_module -import create_configuration_steps as config_create +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_configuration_steps as config_create -class TestConfiguration(object): +class TestConfiguration: + """Test for Configuration methods""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating configuration: - Given I create a configuration from "" info - And I update the configuration name to "" - When I wait until the configuration is ready less than secs - Then the configuration name is "" - And the configuration contents are "" + Scenario: Successfully creating configuration: + Given I create a configuration from "" info + And I update the configuration name to "" + When I wait until the configuration is ready less than secs + Then the configuration name is "" + And the configuration contents are "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["configurations", "configuration_wait", + "configuration_name"] examples = [ [{ "dataset": { @@ -53,9 +61,15 @@ def test_scenario1(self): } }, '10', {"name": 'my new configuration name'}]] for example in examples: - print "\nTesting with:\n", example - config_create.i_create_configuration(self, example[0]) - config_create.i_update_configuration(self, example[2]) - config_create.the_configuration_is_finished_in_less_than(self, example[1]) - config_create.i_check_configuration_name(self, example[2]) - config_create.i_check_configuration_conf(self, example[0]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + config_create.i_create_configuration( + self, example["configurations"]) + config_create.i_update_configuration( + self, example["configuration_name"]) + config_create.the_configuration_is_finished_in_less_than( + self, example["configuration_wait"]) + config_create.i_check_configuration_name( + self, example["configuration_name"]) + config_create.i_check_configuration_conf( + self, example["configurations"]) diff --git a/bigml/tests/test_38_organization.py b/bigml/tests/test_38_organization.py index 6f56cccf..4187a474 100644 --- a/bigml/tests/test_38_organization.py +++ b/bigml/tests/test_38_organization.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2018-2019 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -20,15 +21,16 @@ """ import os +import shutil from bigml.api import BigML -from world import world -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_model_steps as model_create -import create_prediction_steps as prediction_create +from .world import world, show_doc, show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_prediction_steps as prediction_create try: @@ -46,18 +48,19 @@ def setup_module(): # Project or Organization IDs world.bck_api = world.api - world.api = BigML(world.USERNAME, world.API_KEY, debug=world.debug, + world.api = BigML(world.username, world.api_key, debug=world.debug, organization=BIGML_ORGANIZATION) - print world.api.connection_info() + print(world.api.connection_info()) world.bck_project_id = world.project_id world.project_id = world.api.create_project( \ {"name": world.test_project_name})['resource'] - world.api = BigML(world.USERNAME, world.API_KEY, debug=world.debug, + world.api = BigML(world.username, world.api_key, debug=world.debug, project=world.project_id) - print world.api.connection_info() + print("New connection: ", world.api.connection_info()) world.clear() +#pylint: disable=locally-disabled,broad-except def teardown_module(): """Operations to be performed after each module @@ -69,64 +72,73 @@ def teardown_module(): if not world.debug: try: world.delete_resources() - except Exception, exc: - print exc - world.api = BigML(world.USERNAME, world.API_KEY, debug=world.debug, + except Exception as exc: + print(exc) + world.api = BigML(world.username, world.api_key, debug=world.debug, organization=BIGML_ORGANIZATION) project_stats = world.api.get_project( \ world.project_id)['object']['stats'] - for resource_type, value in project_stats.items(): + for resource_type, value in list(project_stats.items()): if value['count'] != 0: # assert False, ("Increment in %s: %s" % (resource_type, value)) - print "WARNING: Increment in %s: %s" % (resource_type, value) + print("WARNING: Increment in %s: %s" % (resource_type, value)) world.api.delete_project(world.project_id) world.project_id = world.bck_project_id world.api = world.bck_api - print world.api.connection_info() + print("New connection: ", world.api.connection_info()) -class TestOrgPrediction(object): +class TestOrgPrediction: + """Testing predictions for organization resources""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating a prediction in an organization: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - When I create a prediction for "" - Then the prediction for "" is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | objective | prediction | - | ../data/iris.csv | 10 | 10 | 10 | {"petal width": 0.5} | 000004 | Iris-setosa | - + Scenario: Successfully creating a prediction in an organization: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + When I create a prediction for "" + Then the prediction for "" is "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction"] examples = [ - ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa']] + ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', + '000004', 'Iris-setosa']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) - prediction_create.i_create_a_prediction(self, example[4]) - prediction_create.the_prediction_is(self, example[5], example[6]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) diff --git a/bigml/tests/test_38_project_connection.py b/bigml/tests/test_38_project_connection.py index 9d5b1789..7175d8a6 100644 --- a/bigml/tests/test_38_project_connection.py +++ b/bigml/tests/test_38_project_connection.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import,broad-except # -# Copyright 2018-2019 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -20,16 +21,17 @@ """ import os +import shutil from bigml.api import BigML -from world import world -from world import setup_module as general_setup_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_model_steps as model_create -import create_prediction_steps as prediction_create +from .world import world, eq_, show_method +from .world import setup_module as general_setup_module +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_prediction_steps as prediction_create @@ -41,12 +43,11 @@ def setup_module(): general_setup_module() world.bck_api = world.api - world.api = BigML(world.USERNAME, world.API_KEY, debug=world.debug, + world.api = BigML(world.username, world.api_key, debug=world.debug, project=world.project_id) - print world.api.connection_info() + print(world.api.connection_info()) world.clear() - def teardown_module(): """Operations to be performed after each module @@ -58,66 +59,73 @@ def teardown_module(): if not world.debug: try: world.delete_resources() - except Exception, exc: - print exc + except Exception as exc: + print(exc) project_stats = world.api.get_project( \ world.project_id)['object']['stats'] - for resource_type, value in project_stats.items(): + for resource_type, value in list(project_stats.items()): if value['count'] != 0: # assert False, ("Increment in %s: %s" % (resource_type, value)) - print "WARNING: Increment in %s: %s" % (resource_type, value) + print("WARNING: Increment in %s: %s" % (resource_type, value)) world.api.delete_project(world.project_id) world.project_id = None world.api = world.bck_api - print world.api.connection_info() + print(world.api.connection_info()) -class TestProjPrediction(object): +class TestProjPrediction: + """Testing predictions in organization's project """ - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating a prediction with a user's project connection: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And the source is in the project - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - When I create a prediction for "" - Then the prediction for "" is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | objective | prediction | - | ../data/iris.csv | 10 | 10 | 10 | {"petal width": 0.5} | 000004 | Iris-setosa | - + Scenario: Successfully creating a prediction with a user's project connection: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And the source is in the project + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + When I create a prediction for "" + Then the prediction for "" is "" """ - print self.test_scenario1.__doc__ + print(self.test_scenario1.__doc__) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective", "prediction"] examples = [ ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file_with_project_conn(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - assert world.source['project'] == world.project_id + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file_with_project_conn( + self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) + eq_(world.source['project'], world.project_id) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - assert world.dataset['project'] == world.project_id + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + eq_(world.dataset['project'], world.project_id) model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) - assert world.model['project'] == world.project_id - prediction_create.i_create_a_prediction(self, example[4]) - prediction_create.the_prediction_is(self, example[5], example[6]) - assert world.prediction['project'] == world.project_id + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + eq_(world.model['project'], world.project_id) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective"], example["prediction"]) + eq_(world.prediction['project'], world.project_id) diff --git a/bigml/tests/test_39_optiml_fusion.py b/bigml/tests/test_39_optiml_fusion.py index a33d5679..0ff5992f 100644 --- a/bigml/tests/test_39_optiml_fusion.py +++ b/bigml/tests/test_39_optiml_fusion.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2018-2019 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,346 +20,432 @@ """ Creating optimls and fusions """ -from world import world, setup_module, teardown_module -import create_model_steps as model_create -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import compare_predictions_steps as compare_pred -import create_prediction_steps as prediction_create -import create_evaluation_steps as evaluation_create -import create_batch_prediction_steps as batch_pred_create +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_model_steps as model_create +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import compare_predictions_steps as compare_pred +from . import create_prediction_steps as prediction_create +from . import create_evaluation_steps as evaluation_create +from . import create_batch_prediction_steps as batch_pred_create -class TestOptimlFusion(object): +class TestOptimlFusion: + """Testing OptiML and Fusion methods""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario 1: Successfully creating an optiml from a dataset: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create an optiml from a dataset - And I wait until the optiml is ready less than secs - And I update the optiml name to "" - When I wait until the optiml is ready less than secs - Then the optiml name is "" - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | optiml_name | - | ../data/iris.csv | 10 | 10 | 2000 | 20 | my new optiml name | + Scenario 1: Successfully creating an optiml from a dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an optiml from a dataset + And I wait until the optiml is ready less than secs + And I update the optiml name to "" + When I wait until the optiml is ready less than secs + Then the optiml name is "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "optiml_name"] examples = [ - ['data/iris.csv', '10', '10', '10000', '20', 'my new optiml name']] + ['data/iris.csv', '10', '10', '300', 'my new optiml name']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) model_create.i_create_an_optiml_with_objective_and_params( \ self, parms='{"max_training_time": %s, "model_types": ' '["model", "logisticregression"]}' % \ - (int(float(example[3])/1000) - 1)) - model_create.the_optiml_is_finished_in_less_than(self, example[3]) - model_create.i_update_optiml_name(self, example[5]) - model_create.the_optiml_is_finished_in_less_than(self, example[4]) - model_create.i_check_optiml_name(self, example[5]) + (int(float(example["model_wait"])/10) - 1)) + model_create.the_optiml_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_update_optiml_name(self, example["optiml_name"]) + model_create.the_optiml_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_check_optiml_name(self, example["optiml_name"]) def test_scenario2(self): """ - Scenario 2: Successfully creating a fusion: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I retrieve a list of remote models tagged with "" - And I create a fusion from a list of models - And I wait until the fusion is ready less than secs - And I update the fusion name to "" - When I wait until the fusion is ready less than secs - And I create a prediction for "" - Then the fusion name is "" - And the prediction for "" is "" - And I create an evaluation for the fusion with the dataset - And I wait until the evaluation is ready less than secs - Then the measured "" is - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | fusion_name | data_input | objective | prediction - | ../data/iris.csv | 10 | 10 | 20 | 20 | my new fusion name | {"petal length": 1, "petal width": 1} | "000004" | "Iris-setosa" + Scenario 2: Successfully creating a fusion: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I retrieve a list of remote models tagged with "" + And I create a fusion from a list of models + And I wait until the fusion is ready less than secs + And I update the fusion name to "" + When I wait until the fusion is ready less than secs + And I create a prediction for "" + Then the fusion name is "" + And the prediction for "" is "" + And I create an evaluation for the fusion with the dataset + And I wait until the evaluation is ready less than secs + Then the measured "" is """ - print self.test_scenario2.__doc__ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "fusion_wait", "evaluation_wait", "fusion_name", + "model_conf", "tag", "input_data", "objective_id", + "prediction", "metric", "value"] examples = [ - ['data/iris.csv', '10', '10', '20', '20', 'my new fusion name', + ['data/iris.csv', '10', '10', '50', '50', '50', + 'my new fusion name', '{"tags":["my_fusion_2_tag"]}', 'my_fusion_2_tag', '{"petal width": 1.75, "petal length": 2.45}', "000004", "Iris-setosa", 'average_phi', '1.0']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model_with(self, example[6]) - model_create.the_model_is_finished_in_less_than(self, example[3]) - model_create.i_create_a_model_with(self, example[6]) - model_create.the_model_is_finished_in_less_than(self, example[3]) - model_create.i_create_a_model_with(self, example[6]) - model_create.the_model_is_finished_in_less_than(self, example[3]) - compare_pred.i_retrieve_a_list_of_remote_models(self, example[7]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + model_create.i_create_a_model_with( + self, example["model_conf"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with( + self, example["model_conf"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with( + self, example["model_conf"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + compare_pred.i_retrieve_a_list_of_remote_models( + self, example["tag"]) model_create.i_create_a_fusion(self) - model_create.the_fusion_is_finished_in_less_than(self, example[3]) - model_create.i_update_fusion_name(self, example[5]) - model_create.the_fusion_is_finished_in_less_than(self, example[4]) - model_create.i_check_fusion_name(self, example[5]) - prediction_create.i_create_a_fusion_prediction(self, example[8]) - prediction_create.the_prediction_is(self, example[9], example[10]) + model_create.the_fusion_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_update_fusion_name(self, example["fusion_name"]) + model_create.the_fusion_is_finished_in_less_than( + self, example["fusion_wait"]) + model_create.i_check_fusion_name(self, example["fusion_name"]) + prediction_create.i_create_a_fusion_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) evaluation_create.i_create_an_evaluation_fusion(self) - evaluation_create.the_evaluation_is_finished_in_less_than(self, example[3]) - evaluation_create.the_measured_measure_is_value(self, example[11], example[12]) + evaluation_create.the_evaluation_is_finished_in_less_than( + self, example["evaluation_wait"]) + evaluation_create.the_measured_measure_is_value( + self, example["metric"], example["value"]) def test_scenario3(self): """ - Scenario 3: Successfully creating a fusion: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I retrieve a list of remote models tagged with "" - And I create a fusion from a list of models - And I wait until the fusion is ready less than secs - When I create a batch prediction for the dataset with the fusion - And I wait until the batch prediction is ready less than secs - And I download the created predictions file to "" - Then the batch prediction file is like "" - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | tag | local_file | predictions_file | - | ../data/iris.csv | 10 | 10 | 20 | 20 | mytag | ./tmp/batch_predictions.csv | ./data/batch_predictions_fs.csv | + Scenario 3: Successfully creating a fusion: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I retrieve a list of remote models tagged with "" + And I create a fusion from a list of models + And I wait until the fusion is ready less than secs + When I create a batch prediction for the dataset with the fusion + And I wait until the batch prediction is ready less than secs + And I download the created predictions file to "" + Then the batch prediction file is like "" """ - print self.test_scenario3.__doc__ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "fusion_wait", "batch_wait", "model_conf", "tag", + "local_file", "predictions_file"] examples = [ - ['data/iris.csv', '10', '10', '20', '20', + ['data/iris.csv', '10', '10', '30', '30', '30', '{"tags":["my_fusion_3_tag"]}', 'my_fusion_3_tag', 'tmp/batch_predictions.csv', 'data/batch_predictions_fs.csv']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model_with(self, example[5]) - model_create.the_model_is_finished_in_less_than(self, example[3]) - model_create.i_create_a_model_with(self, example[5]) - model_create.the_model_is_finished_in_less_than(self, example[3]) - model_create.i_create_a_model_with(self, example[5]) - model_create.the_model_is_finished_in_less_than(self, example[3]) - compare_pred.i_retrieve_a_list_of_remote_models(self, example[6]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model_with( + self, example["model_conf"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with( + self, example["model_conf"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with( + self, example["model_conf"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + compare_pred.i_retrieve_a_list_of_remote_models( + self, example["tag"]) model_create.i_create_a_fusion(self) - model_create.the_fusion_is_finished_in_less_than(self, example[3]) + model_create.the_fusion_is_finished_in_less_than( + self, example["fusion_wait"]) batch_pred_create.i_create_a_batch_prediction_fusion(self) - batch_pred_create.the_batch_prediction_is_finished_in_less_than(self, example[4]) - batch_pred_create.i_download_predictions_file(self, example[7]) - batch_pred_create.i_check_predictions(self, example[8]) - + batch_pred_create.the_batch_prediction_is_finished_in_less_than( + self, example["batch_wait"]) + batch_pred_create.i_download_predictions_file( + self, example["local_file"]) + batch_pred_create.i_check_predictions( + self, example["predictions_file"]) def test_scenario4(self): """ - Scenario 4: Successfully creating a fusion: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a logistic regression with "" - And I wait until the logistic regression is ready less than secs - And I create a logistic regression with "" - And I wait until the logistic regression is ready less than secs - And I retrieve a list of remote logistic regression tagged with "" - And I create a fusion from a list of models - And I wait until the fusion is ready less than secs - When I create a prediction for "" - Then the prediction for "" is "" - And the local logistic regression probability for the prediction is "" - And I create a local fusion prediction for "" - Then the local fusion prediction is "" - And the local fusion probability for the prediction is "" - - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | data_input | objective | prediction - | ../data/iris.csv | 10 | 10 | 20 | 20 | {"petal length": 1, "petal width": 1} | "000004" | "Iris-setosa" + Scenario 4: Successfully creating a fusion: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a logistic regression with "" + And I wait until the logistic regression is ready less than secs + And I create a logistic regression with "" + And I wait until the logistic regression is ready less than secs + And I retrieve a list of remote logistic regression tagged with "" + And I create a fusion from a list of models + And I wait until the fusion is ready less than secs + When I create a prediction for "" + Then the prediction for "" is "" + And the local logistic regression probability for the prediction is "" + And I create a local fusion prediction for "" + Then the local fusion prediction is "" + And the local fusion probability for the prediction is "" + And the local fusion confidence for the prediction is "" """ - print self.test_scenario4.__doc__ + show_doc(self.test_scenario4) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "fusion_wait", "model_conf", "tag", "input_data", + "objective_id", "prediction", "probability", "confidence"] examples = [ - ['data/iris.csv', '10', '10', '20', '20', + ['data/iris.csv', '10', '10', '30', '30', '{"tags":["my_fusion_4_tag"], "missing_numerics": true}', 'my_fusion_4_tag', '{"petal width": 1.75, "petal length": 2.45}', "000004", - "Iris-setosa", '0.4727']] + "Iris-setosa", '0.4726', '0.4726']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[5]) - model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) - model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[5]) - model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) - compare_pred.i_retrieve_a_list_of_remote_logistic_regressions(self, example[6]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model_with_objective_and_parms( + self, example["objective_id"], example["model_conf"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_logistic_model_with_objective_and_parms( + self, example["objective_id"], example["model_conf"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + compare_pred.i_retrieve_a_list_of_remote_logistic_regressions( + self, example["tag"]) model_create.i_create_a_fusion(self) - model_create.the_fusion_is_finished_in_less_than(self, example[3]) + model_create.the_fusion_is_finished_in_less_than( + self, example["fusion_wait"]) compare_pred.i_create_a_local_fusion(self) - prediction_create.i_create_a_fusion_prediction(self, example[7]) - prediction_create.the_prediction_is(self, example[8], example[9]) - prediction_create.the_fusion_probability_is(self, example[10]) - compare_pred.i_create_a_local_prediction(self, example[7]) - compare_pred.the_local_prediction_is(self, example[9]) - compare_pred.the_local_probability_is(self, example[10]) - + prediction_create.i_create_a_fusion_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_create.the_fusion_probability_is( + self, example["probability"]) + compare_pred.i_create_a_local_prediction( + self, example["input_data"]) + compare_pred.the_local_prediction_is( + self, example["prediction"]) + compare_pred.the_local_probability_is( + self, example["probability"]) + compare_pred.the_local_confidence_is( + self, example["confidence"]) def test_scenario5(self): """ - Scenario 5: Successfully creating a fusion: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a logistic regression with "" - And I wait until the logistic regression is ready less than secs - And I create a logistic regression with "" - And I wait until the logistic regression is ready less than secs - And I retrieve a list of remote logistic regression tagged with "" - And I create a fusion from a list of models - And I wait until the fusion is ready less than secs - When I create a prediction for "" - Then the prediction for "" is "" - And the fusion probability for the prediction is "" - And I create a local fusion prediction for "" - Then the local fusion prediction is "" - And the local fusion probability for the prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | data_input | objective | prediction - | ../data/iris.csv | 10 | 10 | 20 | 20 | {"petal length": 1, "petal width": 1} | "000004" | "Iris-setosa" + Scenario 5: Successfully creating a fusion: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a logistic regression with "" + And I wait until the logistic regression is ready less than secs + And I create a logistic regression with "" + And I wait until the logistic regression is ready less than secs + And I retrieve a list of remote logistic regression tagged with "" + And I create a fusion from a list of models + And I wait until the fusion is ready less than secs + When I create a prediction for "" + Then the prediction for "" is "" + And the fusion probability for the prediction is "" + And I create a local fusion prediction for "" + Then the local fusion prediction is "" + And the local fusion probability for the prediction is "" """ - print self.test_scenario5.__doc__ + show_doc(self.test_scenario5) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "fusion_wait", "model_conf1", "model_conf2", "tag", + "input_data", "objective_id", "prediction", "probability"] examples = [ - ['data/iris.csv', '10', '10', '20', '20', + ['data/iris.csv', '10', '10', '30', '30', '{"tags":["my_fusion_5_tag"], "missing_numerics": true}', + '{"tags":["my_fusion_5_tag"], "missing_numerics": false, ' + '"balance_fields": false }', 'my_fusion_5_tag', '{"petal width": 1.75, "petal length": 2.45}', "000004", "Iris-setosa", - '0.4727', - '{"tags":["my_fusion_5_tag"], "missing_numerics": false, "balance_fields": false }']] + '0.4726']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[5]) - model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) - model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[11]) - model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) - compare_pred.i_retrieve_a_list_of_remote_logistic_regressions(self, example[6]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model_with_objective_and_parms( + self, example["objective_id"], example["model_conf1"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_logistic_model_with_objective_and_parms( + self, example["objective_id"], example["model_conf2"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + compare_pred.i_retrieve_a_list_of_remote_logistic_regressions( + self, example["tag"]) model_create.i_create_a_fusion(self) - model_create.the_fusion_is_finished_in_less_than(self, example[3]) + model_create.the_fusion_is_finished_in_less_than( + self, example["fusion_wait"]) compare_pred.i_create_a_local_fusion(self) - prediction_create.i_create_a_fusion_prediction(self, example[7]) - prediction_create.the_prediction_is(self, example[8], example[9]) - prediction_create.the_fusion_probability_is(self, example[10]) - compare_pred.i_create_a_local_prediction(self, example[7]) - compare_pred.the_local_prediction_is(self, example[9]) - compare_pred.the_local_probability_is(self, example[10]) - + prediction_create.i_create_a_fusion_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_create.the_fusion_probability_is( + self, example["probability"]) + compare_pred.i_create_a_local_prediction( + self, example["input_data"]) + compare_pred.the_local_prediction_is( + self, example["prediction"]) + compare_pred.the_local_probability_is( + self, example["probability"]) def test_scenario6(self): """ - Scenario 6: Successfully creating a fusion: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a logistic regression with "" - And I wait until the logistic regression is ready less than secs - And I create a logistic regression with "" - And I wait until the logistic regression is ready less than secs - And I retrieve a list of remote logistic regression tagged with "" - And I create a fusion from a list of models and weights - And I wait until the fusion is ready less than secs - When I create a prediction for "" - Then the prediction for "" is "" - And the fusion probability for the prediction is "" - And I create a local fusion prediction for "" - Then the local fusion prediction is "" - And the local fusion probability for the prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | data_input | objective | prediction - | ../data/iris.csv | 10 | 10 | 20 | 20 | {"petal length": 1, "petal width": 1} | "000004" | "Iris-setosa" + Scenario 6: Successfully creating a fusion: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a logistic regression with "" + And I wait until the logistic regression is ready less than secs + And I create a logistic regression with "" + And I wait until the logistic regression is ready less than secs + And I retrieve a list of remote logistic regression tagged with "" + And I create a fusion from a list of models and weights "" + And I wait until the fusion is ready less than secs + When I create a prediction for "" + Then the prediction for "" is "" + And the fusion probability for the prediction is "" + And I create a local fusion prediction for "" + Then the local fusion prediction is "" + And the local fusion probability for the prediction is "" """ - print self.test_scenario6.__doc__ + show_doc(self.test_scenario6) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "fusion_wait", "model_conf1", "model_conf2", "tag", + "input_data", "objective_id", + "prediction", "probability", "fusion_weights"] examples = [ - ['data/iris.csv', '10', '10', '20', '20', + ['data/iris.csv', '10', '10', '30', '30', '{"tags":["my_fusion_6_tag"], "missing_numerics": true}', + '{"tags":["my_fusion_6_tag"], "missing_numerics": false, ' + '"balance_fields": false }', 'my_fusion_6_tag', '{"petal width": 1.75, "petal length": 2.45}', "000004", "Iris-setosa", - '0.4727', - '{"tags":["my_fusion_6_tag"], "missing_numerics": false, "balance_fields": false }', '[1, 2]']] + '0.4726', '[1, 2]']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[5]) - model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) - model_create.i_create_a_logistic_model_with_objective_and_parms(self, example[8], example[11]) - model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) - compare_pred.i_retrieve_a_list_of_remote_logistic_regressions(self, example[6]) - model_create.i_create_a_fusion_with_weights(self, example[12]) - model_create.the_fusion_is_finished_in_less_than(self, example[3]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model_with_objective_and_parms( + self, example["objective_id"], example["model_conf1"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_logistic_model_with_objective_and_parms( + self, example["objective_id"], example["model_conf2"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + compare_pred.i_retrieve_a_list_of_remote_logistic_regressions( + self, example["tag"]) + model_create.i_create_a_fusion_with_weights( + self, example["fusion_weights"]) + model_create.the_fusion_is_finished_in_less_than( + self, example["fusion_wait"]) compare_pred.i_create_a_local_fusion(self) - prediction_create.i_create_a_fusion_prediction(self, example[7]) - prediction_create.the_prediction_is(self, example[8], example[9]) - prediction_create.the_fusion_probability_is(self, example[10]) - compare_pred.i_create_a_local_prediction(self, example[7]) - compare_pred.the_local_prediction_is(self, example[9]) - compare_pred.the_local_probability_is(self, example[10]) + prediction_create.i_create_a_fusion_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_create.the_fusion_probability_is( + self, example["probability"]) + compare_pred.i_create_a_local_prediction( + self, example["input_data"]) + compare_pred.the_local_prediction_is(self, example["prediction"]) + compare_pred.the_local_probability_is(self, example["probability"]) diff --git a/bigml/tests/test_40_local_from_file.py b/bigml/tests/test_40_local_from_file.py index 1ab003a8..c8311285 100644 --- a/bigml/tests/test_40_local_from_file.py +++ b/bigml/tests/test_40_local_from_file.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2018-2019 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,396 +20,536 @@ """ Creating tests for building local models from files """ -from world import world, setup_module, teardown_module -import create_model_steps as model_create -import create_linear_steps as linear_create -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_ensemble_steps as ensemble_create -import create_anomaly_steps as anomaly_create -import create_time_series_steps as timeseries_create -import create_association_steps as association_create -import create_cluster_steps as cluster_create -import create_lda_steps as topic_create -import compare_predictions_steps as prediction_compare -from bigml.util import PY3 +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_model_steps as model_create +from . import create_linear_steps as linear_create +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_ensemble_steps as ensemble_create +from . import create_anomaly_steps as anomaly_create +from . import create_time_series_steps as timeseries_create +from . import create_association_steps as association_create +from . import create_cluster_steps as cluster_create +from . import create_lda_steps as topic_create +from . import compare_predictions_steps as prediction_compare -class TestLocalFromFile(object): - def setup(self): +class TestLocalFromFile: + """Testing locally generated code""" + + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario 1: Successfully creating a local model from an exported file: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - And I export the "" model to "" - When I create a local model from the file "" - Then the model ID and the local model ID match - Examples: - | data | time_1 | time_2 | time_3 | pmml | exported_file - | ../data/iris.csv | 10 | 10 | 10 | False | ./tmp/model.json + Scenario 1: Successfully creating a local model from an exported file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with params "" + And I wait until the model is ready less than secs + And I export the "" model to "" + When I create a local model from the file "" + Then the model ID and the local model ID match + And the prediction for "" is "" + And the number of leaves is "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "pmml", "exported_file", "input_data", "prediction", + "model_conf", 'leaves#'] examples = [ - ['data/iris.csv', '10', '10', '10', False, './tmp/model.json']] + ['data/iris.csv', '10', '10', '10', False, + './tmp/model.json', {}, "Iris-setosa", '{}', 9], + ['data/iris.csv', '10', '10', '10', False, + './tmp/model_dft.json', {}, "Iris-versicolor", + '{"default_numeric_value": "mean"}', 9]] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_model(self) - model_create.the_model_is_finished_in_less_than(self, example[3]) - model_create.i_export_model(self, example[4], example[5]) - model_create.i_create_local_model_from_file(self, example[5]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model_with(self, example["model_conf"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_export_model( + self, example["pmml"], example["exported_file"]) + model_create.i_create_local_model_from_file( + self, example["exported_file"]) model_create.check_model_id_local_id(self) - + model_create.local_model_prediction_is( + self, example["input_data"], example["prediction"]) + model_create.check_leaves_number(self, example["leaves#"]) def test_scenario2(self): """ - Scenario 2: Successfully creating a local ensemble from an exported file: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create an ensemble - And I wait until the ensemble is ready less than secs - And I export the ensemble to "" - When I create a local ensemble from the file "" - Then the ensemble ID and the local ensemble ID match - Examples: - | data | time_1 | time_2 | time_3 | exported_file - | ../data/iris.csv | 10 | 10 | 50 | ./tmp/ensemble.json + Scenario 2: Successfully creating a local ensemble from an exported file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an ensemble with "" + And I wait until the ensemble is ready less than secs + And I export the ensemble to "" + When I create a local ensemble from the file "" + Then the ensemble ID and the local ensemble ID match + And the prediction for "" is "" """ - print self.test_scenario2.__doc__ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "exported_file", "input_data", "prediction", + "model_conf"] examples = [ - ['data/iris.csv', '10', '10', '50', './tmp/ensemble.json']] + ['data/iris.csv', '10', '10', '50', './tmp/ensemble.json', + {}, {'probability': 0.35714, 'prediction': 'Iris-versicolor'}, + '{}'], + ['data/iris.csv', '10', '10', '50', './tmp/ensemble_dft.json', + {}, {'probability': 0.98209, 'prediction': 'Iris-versicolor'}, + '{"default_numeric_value": "mean"}']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - ensemble_create.i_create_an_ensemble(self) - ensemble_create.the_ensemble_is_finished_in_less_than(self, example[3]) - ensemble_create.i_export_ensemble(self, example[4]) - ensemble_create.i_create_local_ensemble_from_file(self, example[4]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_create.i_create_an_ensemble_with_params( + self, example["model_conf"]) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"]) + ensemble_create.i_export_ensemble(self, example["exported_file"]) + ensemble_create.i_create_local_ensemble_from_file( + self, example["exported_file"]) ensemble_create.check_ensemble_id_local_id(self) - + model_create.local_ensemble_prediction_is( + self, example["input_data"], example["prediction"]) def test_scenario3(self): """ - Scenario 3: Successfully creating a local logistic regression from an exported file: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a logistic regression - And I wait until the logistic regression is ready less than secs - And I export the logistic regression to "" - When I create a local logistic regression from the file "" - Then the logistic regression ID and the local logistic regression ID match - Examples: - | data | time_1 | time_2 | time_3 | exported_file - | ../data/iris.csv | 10 | 10 | 50 | ./tmp/logistic.json + Scenario 3: Successfully creating a local logistic regression from an exported file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression with "" + And I wait until the logistic regression is ready less than secs + And I export the logistic regression to "" + When I create a local logistic regression from the file "" + Then the logistic regression ID and the local logistic regression ID match + And the prediction for "" is "" """ - print self.test_scenario3.__doc__ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "exported_file", "input_data", "prediction", + "model_conf"] examples = [ - ['data/iris.csv', '10', '10', '50', './tmp/logistic.json']] + ['data/iris.csv', '10', '10', '50', './tmp/logistic.json', {}, + 'Iris-versicolor', '{}'], + ['data/iris.csv', '10', '10', '50', './tmp/logistic_dft.json', {}, + 'Iris-virginica', '{"default_numeric_value": "maximum"}']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_logistic_model(self) - model_create.the_logistic_model_is_finished_in_less_than(self, example[3]) - model_create.i_export_logistic_regression(self, example[4]) - model_create.i_create_local_logistic_regression_from_file(self, example[4]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model_with_objective_and_parms( + self, parms=example["model_conf"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_export_logistic_regression( + self, example["exported_file"]) + model_create.i_create_local_logistic_regression_from_file( + self, example["exported_file"]) model_create.check_logistic_regression_id_local_id(self) - + model_create.local_logistic_prediction_is( + self, example["input_data"], example["prediction"]) def test_scenario4(self): """ - Scenario 4: Successfully creating a local deepnet from an exported file: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a deepnet - And I wait until the deepnet is ready less than secs - And I export the deepnet to "" - When I create a local deepnet from the file "" - Then the deepnet ID and the local deepnet ID match - Examples: - | data | time_1 | time_2 | time_3 | exported_file - | ../data/iris.csv | 10 | 10 | 50 | ./tmp/deepnet.json + Scenario 4: Successfully creating a local deepnet from an exported file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a deepnet with "" + And I wait until the deepnet is ready less than secs + And I export the deepnet to "" + When I create a local deepnet from the file "" + Then the deepnet ID and the local deepnet ID match + And the prediction for "" is "" """ - print self.test_scenario4.__doc__ + show_doc(self.test_scenario4) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "exported_file", "input_data", "prediction", + "model_conf"] examples = [ - ['data/iris.csv', '10', '10', '500', './tmp/deepnet.json']] + ['data/iris.csv', '10', '10', '500', './tmp/deepnet.json', {}, + 'Iris-versicolor', '{}'], + ['data/iris.csv', '10', '10', '500', './tmp/deepnet_dft.json', {}, + 'Iris-versicolor', '{"default_numeric_value": "maximum"}']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - model_create.i_create_a_deepnet(self) - model_create.the_deepnet_is_finished_in_less_than(self, example[3]) - model_create.i_export_deepnet(self, example[4]) - model_create.i_create_local_deepnet_from_file(self, example[4]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_deepnet_with_objective_and_params( + self, parms=example["model_conf"]) + model_create.the_deepnet_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_export_deepnet(self, example["exported_file"]) + model_create.i_create_local_deepnet_from_file( + self, example["exported_file"]) model_create.check_deepnet_id_local_id(self) - + model_create.local_deepnet_prediction_is( + self, example["input_data"], example["prediction"]) def test_scenario5(self): """ - Scenario 5: Successfully creating a local cluster from an exported file: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a cluster - And I wait until the cluster is ready less than secs - And I export the cluster to "" - When I create a local cluster from the file "" - Then the cluster ID and the local cluster ID match - Examples: - | data | time_1 | time_2 | time_3 | exported_file - | ../data/iris.csv | 10 | 10 | 50 | ./tmp/cluster.json + Scenario 5: Successfully creating a local cluster from an exported file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster with "" + And I wait until the cluster is ready less than secs + And I export the cluster to "" + When I create a local cluster from the file "" + Then the cluster ID and the local cluster ID match + And the prediction for "" is "" """ - print self.test_scenario5.__doc__ + show_doc(self.test_scenario5) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "exported_file", "input_data", "prediction", + "model_conf"] examples = [ - ['data/iris.csv', '10', '10', '500', './tmp/cluster.json']] + ['data/iris.csv', '10', '10', '500', './tmp/cluster.json', + {"petal length": 2, "petal width": 2, "sepal length": 2, + "sepal width": 2, "species": "Iris-setosa"}, + {'centroid_id': '000007', 'centroid_name': 'Cluster 7', + 'distance': 0.7340597799442431}, '{}'], + ['data/iris.csv', '10', '10', '500', './tmp/cluster_dft.json', {}, + {'centroid_id': '000005', 'centroid_name': 'Cluster 5', + 'distance': 0.502695797586787}, + '{"default_numeric_value": "maximum"}']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - cluster_create.i_create_a_cluster(self) - cluster_create.the_cluster_is_finished_in_less_than(self, example[3]) - cluster_create.i_export_cluster(self, example[4]) - cluster_create.i_create_local_cluster_from_file(self, example[4]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + cluster_create.i_create_a_cluster_with_options( + self, example["model_conf"]) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["model_wait"]) + cluster_create.i_export_cluster(self, example["exported_file"]) + cluster_create.i_create_local_cluster_from_file( + self, example["exported_file"]) cluster_create.check_cluster_id_local_id(self) - + model_create.local_cluster_prediction_is( + self, example["input_data"], example["prediction"]) def test_scenario6(self): """ - Scenario 6: Successfully creating a local anomaly from an exported file: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create an anomaly - And I wait until the anomaly is ready less than secs - And I export the anomaly to "" - When I create a local anomaly from the file "" - Then the anomaly ID and the local anomaly ID match - Examples: - | data | time_1 | time_2 | time_3 | exported_file - | ../data/iris.csv | 10 | 10 | 50 | ./tmp/anomaly.json + Scenario 6: Successfully creating a local anomaly from an exported file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an anomaly with "" + And I wait until the anomaly is ready less than secs + And I export the anomaly to "" + When I create a local anomaly from the file "" + Then the anomaly ID and the local anomaly ID match + And the prediction for "" is "" """ - print self.test_scenario6.__doc__ + show_doc(self.test_scenario6) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "exported_file", "input_data", "prediction", + "model_conf"] examples = [ - ['data/iris.csv', '10', '10', '500', './tmp/anomaly.json']] + ['data/iris.csv', '10', '10', '500', './tmp/anomaly.json', + {"petal length": 2, "petal width": 2, "sepal length": 2, + "sepal width": 2, "species": "Iris-setosa"}, + 0.64387, '{}'], + ['data/iris.csv', '10', '10', '500', + './tmp/anomaly_dft.json', {}, 0.77699, + '{"default_numeric_value": "maximum"}']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - anomaly_create.i_create_an_anomaly(self) - anomaly_create.the_anomaly_is_finished_in_less_than(self, example[3]) - anomaly_create.i_export_anomaly(self, example[4]) - anomaly_create.i_create_local_anomaly_from_file(self, example[4]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + anomaly_create.i_create_an_anomaly_with_params( + self, example["model_conf"]) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"]) + anomaly_create.i_export_anomaly(self, example["exported_file"]) + anomaly_create.i_create_local_anomaly_from_file( + self, example["exported_file"]) anomaly_create.check_anomaly_id_local_id(self) + model_create.local_anomaly_prediction_is( + self, example["input_data"], example["prediction"]) def test_scenario7(self): """ - Scenario 7: Successfully creating a local association from an exported file: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create an association - And I wait until the association is ready less than secs - And I export the association to "" - When I create a local association from the file "" - Then the association ID and the local association ID match - Examples: - | data | time_1 | time_2 | time_3 | exported_file - | ../data/iris.csv | 10 | 10 | 50 | ./tmp/association.json + Scenario 7: Successfully creating a local association from an exported file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an association with "" + And I wait until the association is ready less than secs + And I export the association to "" + When I create a local association from the file "" + Then the association ID and the local association ID match + And the prediction for "" is "" """ - print self.test_scenario7.__doc__ + show_doc(self.test_scenario7) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "exported_file", "input_data", "prediction", "model_conf"] examples = [ - ['data/iris.csv', '10', '10', '500', './tmp/association.json']] + ['data/iris.csv', '10', '10', '500', './tmp/association.json', {}, + [], '{}'], + ['data/iris.csv', '10', '10', '500', './tmp/association_dft.json', + {}, [{'score': 0.12, 'rules': ['00000d'], 'item': { + 'complement': False, 'count': 50, 'field_id': '000004', + 'name': 'Iris-versicolor'}}], + '{"default_numeric_value": "mean"}']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - association_create.i_create_an_association_from_dataset(self) - association_create.the_association_is_finished_in_less_than(self, example[3]) - association_create.i_export_association(self, example[4]) - association_create.i_create_local_association_from_file(self, example[4]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + association_create.i_create_an_association_from_dataset_with_params( + self, example["model_conf"]) + association_create.the_association_is_finished_in_less_than( + self, example["model_wait"]) + association_create.i_export_association( + self, example["exported_file"]) + association_create.i_create_local_association_from_file( + self, example["exported_file"]) association_create.check_association_id_local_id(self) + model_create.local_association_prediction_is( + self, example["input_data"], example["prediction"]) def test_scenario8(self): """ - Scenario 8: Successfully creating a local topic model from an exported file: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a topic model - And I wait until the topic model is ready less than secs - And I export the topic model to "" - When I create a local topic model from the file "" - Then the topic model ID and the local topic model ID match - Examples: - | data | time_1 | time_2 | time_3 | exported_file - | ../data/iris.csv | 10 | 10 | 50 | ./tmp/topic_model.json + Scenario 8: Successfully creating a local topic model from an exported file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a topic model + And I wait until the topic model is ready less than secs + And I export the topic model to "" + When I create a local topic model from the file "" + Then the topic model ID and the local topic model ID match """ - print self.test_scenario8.__doc__ + show_doc(self.test_scenario8) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "exported_file", "source_conf"] examples = [ ['data/spam.csv', '10', '10', '500', './tmp/topic_model.json', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - source_create.i_update_source_with(self, example[5]) - source_create.the_source_is_finished(self, example[1]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) + source_create.the_source_is_finished(self, example["source_wait"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) topic_create.i_create_a_topic_model(self) - topic_create.the_topic_model_is_finished_in_less_than(self, example[3]) - topic_create.i_export_topic_model(self, example[4]) - topic_create.i_create_local_topic_model_from_file(self, example[4]) + topic_create.the_topic_model_is_finished_in_less_than( + self, example["model_wait"]) + topic_create.i_export_topic_model( + self, example["exported_file"]) + topic_create.i_create_local_topic_model_from_file( + self, example["exported_file"]) topic_create.check_topic_model_id_local_id(self) def test_scenario9(self): """ - Scenario 9: Successfully creating a local association from an exported file: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a time series - And I wait until the time series is ready less than secs - And I export the time series to "" - When I create a local time series from the file "" - Then the time series ID and the local time series ID match - Examples: - | data | time_1 | time_2 | time_3 | exported_file - | ../data/iris.csv | 10 | 10 | 50 | ./tmp/time_series.json + Scenario 9: Successfully creating a local time series from an exported file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a time series with "" + And I wait until the time series is ready less than secs + And I export the time series to "" + When I create a local time series from the file "" + Then the time series ID and the local time series ID match """ - print self.test_scenario9.__doc__ + show_doc(self.test_scenario9) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "exported_file"] examples = [ ['data/iris.csv', '10', '10', '500', './tmp/time_series.json']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) timeseries_create.i_create_a_time_series(self) - timeseries_create.the_time_series_is_finished_in_less_than(self, example[3]) - timeseries_create.i_export_time_series(self, example[4]) - timeseries_create.i_create_local_time_series_from_file(self, example[4]) + timeseries_create.the_time_series_is_finished_in_less_than( + self, example["model_wait"]) + timeseries_create.i_export_time_series( + self, example["exported_file"]) + timeseries_create.i_create_local_time_series_from_file( + self, example["exported_file"]) timeseries_create.check_time_series_id_local_id(self) - def test_scenario10(self): """ - Scenario 10: Successfully creating a local fusion from an exported file: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I create a model with "" - And I wait until the model is ready less than secs - And I retrieve a list of remote models tagged with "" - And I create a fusion from a list of models - And I wait until the fusion is ready less than secs - And I export the fusion to "" - When I create a local fusion from the file "" - Then the fusion ID and the local fusion ID match - Examples: - | data | time_1 | time_2 | time_3 | exported_file | params | tag - | ../data/iris.csv | 10 | 10 | 50 | ./tmp/fusion.json + Scenario 10: Successfully creating a local fusion from an exported file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I retrieve a list of remote models tagged with "" + And I create a fusion from a list of models + And I wait until the fusion is ready less than secs + And I export the fusion to "" + When I create a local fusion from the file "" + Then the fusion ID and the local fusion ID match """ - print self.test_scenario10.__doc__ + show_doc(self.test_scenario10) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "exported_file", "tag"] examples = [ - ['data/iris.csv', '10', '10', '50', './tmp/fusion.json', 'my_fusion_tag']] + ['data/iris.csv', '10', '10', '50', './tmp/fusion.json', + 'my_fusion_tag']] for example in examples: - print "\nTesting with:\n", example - tag = "%s_%s" % (example[5], PY3) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + tag = example["tag"] tag_args = '{"tags":["%s"]}' % tag - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) model_create.i_create_a_model_with(self, tag_args) - model_create.the_model_is_finished_in_less_than(self, example[3]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) model_create.i_create_a_model_with(self, tag_args) - model_create.the_model_is_finished_in_less_than(self, example[3]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) model_create.i_create_a_model_with(self, tag_args) - model_create.the_model_is_finished_in_less_than(self, example[3]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) prediction_compare.i_retrieve_a_list_of_remote_models(self, tag) model_create.i_create_a_fusion(self) - model_create.the_fusion_is_finished_in_less_than(self, example[3]) - model_create.i_export_fusion(self, example[4]) - model_create.i_create_local_fusion_from_file(self, example[4]) + model_create.the_fusion_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_export_fusion(self, example["exported_file"]) + model_create.i_create_local_fusion_from_file( + self, example["exported_file"]) model_create.check_fusion_id_local_id(self) - def test_scenario11(self): """ - Scenario 11: Successfully creating a local linear regression from an exported file: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a linear regression - And I wait until the linear regression is ready less than secs - And I export the linear regression to "" - When I create a local linear regression from the file "" - Then the linear regression ID and the local linear regression ID match - Examples: - | data | time_1 | time_2 | time_3 | exported_file - | ../data/grades.csv | 10 | 10 | 50 | ./tmp/linear.json + Scenario 11: Successfully creating a local linear regression from an exported file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a linear regression with "" + And I wait until the linear regression is ready less than secs + And I export the linear regression to "" + When I create a local linear regression from the file "" + Then the linear regression ID and the local linear regression ID match + And the prediction for "" is "" """ - print self.test_scenario11.__doc__ + show_doc(self.test_scenario11) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "exported_file", "input_data", "prediction", "model_conf"] examples = [ - ['data/grades.csv', '20', '20', '50', './tmp/linear.json']] + ['data/grades.csv', '20', '20', '50', './tmp/linear.json', + {"Prefix": 5, "Assignment": 57.14, "Tutorial": 34.09, + "Midterm": 64, "TakeHome": 40, "Final": 50}, 54.69551, + '{}'], + ['data/grades.csv', '20', '20', '50', './tmp/linear_dft.json', {}, + 100.33246, '{"default_numeric_value": "maximum"}']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - linear_create.i_create_a_linear_regression_from_dataset(self) - linear_create.the_linear_regression_is_finished_in_less_than(self, example[3]) - model_create.i_export_linear_regression(self, example[4]) - model_create.i_create_local_linear_regression_from_file(self, example[4]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + linear_create.i_create_a_linear_regression_with_objective_and_params( + self, params=example["model_conf"]) + linear_create.the_linear_regression_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_export_linear_regression( + self, example["exported_file"]) + model_create.i_create_local_linear_regression_from_file( + self, example["exported_file"]) model_create.check_linear_regression_id_local_id(self) + model_create.local_linear_prediction_is( + self, example["input_data"], example["prediction"]) diff --git a/bigml/tests/test_41_multidataset.py b/bigml/tests/test_41_multidataset.py index 0ae5c39d..e0c8f1b3 100644 --- a/bigml/tests/test_41_multidataset.py +++ b/bigml/tests/test_41_multidataset.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2018-2019 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,122 +20,136 @@ """ Creating a sampled multidataset """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create -class TestMultiDataset(object): +class TestMultiDataset: + """Test datasets and multidatasets""" - def setup(self): + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating a sampled multi-dataset: - Given I create a data source with "" uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a multi-dataset with sample rates - And I wait until the multi-dataset is ready less than secs - When I compare the datasets' instances - Then the proportion of instances between datasets is - - Examples: - | data | time_1 | time_2 | time_3 | rate |rates - | ../data/iris.csv | 10 | 10 | 10 | 0.5 |[0.2, 0.3] + Scenario: Successfully creating a sampled multi-dataset: + Given I create a data source with "" uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a multi-dataset with sample rates + And I wait until the multi-dataset is ready less than secs + When I compare the datasets' instances + Then the proportion of instances between datasets is """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "rate", + "rates"] examples = [ - ['data/iris.csv', '10', '10', '10', '0.5', '[0.2, 0.3]']] + ['data/iris.csv', '50', '50', '0.5', '[0.2, 0.3]']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file_with_args(self, example[0], '{}') - source_create.the_source_is_finished(self, example[1]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file_with_args( + self, example["data"], '{}') + source_create.the_source_is_finished( + self, example["source_wait"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, - example[2]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, - example[2]) - dataset_create.i_create_a_multidataset(self, example[5]) - dataset_create.the_dataset_is_finished_in_less_than(self, - example[3]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_create.i_create_a_multidataset( + self, example["rates"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) dataset_create.i_compare_datasets_instances(self) - dataset_create.proportion_datasets_instances(self, example[4]) + dataset_create.proportion_datasets_instances( + self, example["rate"]) def test_scenario2(self): """ - Scenario: Successfully creating a single dataset multi-dataset: - Given I create a data source with "" uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a multi-dataset with sample rates - And I wait until the multi-dataset is ready less than secs - When I compare the datasets' instances - Then the proportion of instances between datasets is - - Examples: - | data | time_1 | time_2 | time_3 | rate |rates - | ../data/iris.csv | 10 | 10 | 10 | 0.2 |[0.2] + Scenario: Successfully creating a single dataset multi-dataset: + Given I create a data source with "" uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a multi-dataset with sample rates + And I wait until the multi-dataset is ready less than secs + When I compare the datasets' instances + Then the proportion of instances between datasets is """ - print self.test_scenario2.__doc__ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "rate", + "rates"] examples = [ - ['data/iris.csv', '10', '10', '10', '0.2', '[0.2]']] + ['data/iris.csv', '50', '50', '0.2', '[0.2]']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file_with_args(self, example[0], '{}') - source_create.the_source_is_finished(self, example[1]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file_with_args( + self, example["data"], '{}') + source_create.the_source_is_finished( + self, example["source_wait"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, - example[2]) - dataset_create.i_create_a_multidataset(self, example[5]) - dataset_create.the_dataset_is_finished_in_less_than(self, - example[3]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_create.i_create_a_multidataset( + self, example["rates"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) dataset_create.i_compare_datasets_instances(self) - dataset_create.proportion_datasets_instances(self, example[4]) + dataset_create.proportion_datasets_instances( + self, example["rate"]) def test_scenario3(self): """ - Scenario: Successfully creating a sampled multi-dataset with sample: - Given I create a data source with "" uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a multi-dataset with same dataset and the first sample rate - And I wait until the multi-dataset is ready less than secs - When I compare the datasets' instances - Then the proportion of instances between datasets is - - Examples: - | data | time_1 | time_2 | time_3 | rate |rates - | ../data/iris.csv | 10 | 10 | 10 | 1.3 |[1, 0.3] + Scenario: Successfully creating a sampled multi-dataset with sample: + Given I create a data source with "" uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a multi-dataset with same dataset and the first sample rate + And I wait until the multi-dataset is ready less than secs + When I compare the datasets' instances + Then the proportion of instances between datasets is """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "rate", + "rates"] examples = [ - ['data/iris.csv', '10', '10', '10', '1.3', '[1, 0.3]']] + ['data/iris.csv', '50', '50', '1.3', '[1, 0.3]']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file_with_args(self, example[0], '{}') - source_create.the_source_is_finished(self, example[1]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file_with_args( + self, example["data"], '{}') + source_create.the_source_is_finished( + self, example["source_wait"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, - example[2]) - dataset_create.i_create_a_multidataset_mixed_format(self, example[5]) - dataset_create.the_dataset_is_finished_in_less_than(self, - example[3]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_create.i_create_a_multidataset_mixed_format( + self, example["rates"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) dataset_create.i_compare_datasets_instances(self) - dataset_create.proportion_datasets_instances(self, example[4]) + dataset_create.proportion_datasets_instances( + self, example["rate"]) diff --git a/bigml/tests/test_42_pca.py b/bigml/tests/test_42_pca.py index 090fb26a..706305bf 100644 --- a/bigml/tests/test_42_pca.py +++ b/bigml/tests/test_42_pca.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2018-2019 BigML +# Copyright 2018-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,125 +20,144 @@ """ Creating PCA """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_pca_steps as pca_create -import create_projection_steps as projection_create -import create_batch_projection_steps as batch_proj_create - -class TestPCA(object): - - def setup(self): +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_pca_steps as pca_create +from . import create_projection_steps as projection_create +from . import create_batch_projection_steps as batch_proj_create + +class TestPCA: + """Testing PCA methods""" + + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating a PCA from a dataset: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a PCA from a dataset - And I wait until the PCA is ready less than secs - And I update the PCA name to "" - When I wait until the PCA is ready less than secs - Then the PCA name is "" - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | pca_name | - | ../data/iris.csv | 10 | 10 | 20 | 20 | my new pca name | + Scenario: Successfully creating a PCA from a dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a PCA from a dataset + And I wait until the PCA is ready less than secs + And I update the PCA name to "" + When I wait until the PCA is ready less than secs + Then the PCA name is "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "pca_name"] examples = [ - ['data/iris.csv', '10', '10', '20', '20', 'my new pca name']] + ['data/iris.csv', '10', '10', '40', 'my new pca name']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) pca_create.i_create_a_pca_from_dataset(self) - pca_create.the_pca_is_finished_in_less_than(self, example[3]) - pca_create.i_update_pca_name(self, example[5]) - pca_create.the_pca_is_finished_in_less_than(self, example[4]) - pca_create.i_check_pca_name(self, example[5]) + pca_create.the_pca_is_finished_in_less_than( + self, example["model_wait"]) + pca_create.i_update_pca_name(self, example["pca_name"]) + pca_create.the_pca_is_finished_in_less_than( + self, example["model_wait"]) + pca_create.i_check_pca_name(self, example["pca_name"]) - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) def test_scenario2(self): """ - Scenario: Successfully creating a projection: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a pca - And I wait until the pca is ready less than secs - When I create a projection for "" - Then the projection is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | projection | - | ../data/iris.csv | 10 | 10 | 10 | {"petal width": 0.5} | '{"PC-0": 0.46547, "PC-1": 0.13724, "PC-2": -0.01666, "PC-3": 3.28995, "PC-4": 4.60383, "PC-5": 2.22108}' | + Scenario: Successfully creating a projection: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a pca + And I wait until the pca is ready less than secs + When I create a projection for "" + Then the projection is "" """ - print self.test_scenario2.__doc__ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "projection"] examples = [ - ['data/iris.csv', '30', '30', '30', '{"petal width": 0.5}', '{"PC2": 0.1593, "PC3": -0.01286, "PC1": 0.91648, "PC6": 0.27284, "PC4": 1.29255, "PC5": 0.75196}']] + ['data/iris.csv', '30', '30', '50', '{"petal width": 0.5}', + '{"PC2": 0.1593, "PC3": -0.01286, "PC1": 0.91648, ' + '"PC6": 0.27284, "PC4": 1.29255, "PC5": 0.75196}']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) pca_create.i_create_a_pca(self) - pca_create.the_pca_is_finished_in_less_than(self, example[3]) - projection_create.i_create_a_projection(self, example[4]) - projection_create.the_projection_is(self, example[5]) - - print "\nEnd of tests in: %s\n-------------------\n" % __name__ - + pca_create.the_pca_is_finished_in_less_than( + self, example["model_wait"]) + projection_create.i_create_a_projection( + self, example["input_data"]) + projection_create.the_projection_is( + self, example["projection"]) def test_scenario3(self): """ - Scenario: Successfully creating a batch projection: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a pca - And I wait until the pca is ready less than secs - When I create a batch projection for the dataset with the pca - And I wait until the batch projection is ready less than secs - And I download the created projections file to "" - Then the batch projection file is like "" - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | local_file | predictions_file | - | ../data/iris.csv | 30 | 30 | 50 | 50 | ./tmp/batch_projections.csv |./data/batch_projections.csv | - + Scenario: Successfully creating a batch projection: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a pca + And I wait until the pca is ready less than secs + When I create a batch projection for the dataset with the pca + And I wait until the batch projection is ready less than secs + And I download the created projections file to "" + Then the batch projection file is like "" """ - print self.test_scenario3.__doc__ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "batch_wait", "local_file", "projections_file"] examples = [ - ['data/iris.csv', '30', '30', '50', '50', 'tmp/batch_projections.csv', 'data/batch_projections.csv']] + ['data/iris.csv', '30', '30', '50', '50', + 'tmp/batch_projections.csv', 'data/batch_projections.csv']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) pca_create.i_create_a_pca(self) - pca_create.the_pca_is_finished_in_less_than(self, example[3]) + pca_create.the_pca_is_finished_in_less_than( + self, example["model_wait"]) batch_proj_create.i_create_a_batch_projection(self) - batch_proj_create.the_batch_projection_is_finished_in_less_than(self, example[4]) - batch_proj_create.i_download_projections_file(self, example[5]) - batch_proj_create.i_check_projections(self, example[6]) + batch_proj_create.the_batch_projection_is_finished_in_less_than( + self, example["batch_wait"]) + batch_proj_create.i_download_projections_file( + self, example["local_file"]) + batch_proj_create.i_check_projections( + self, example["projections_file"]) diff --git a/bigml/tests/test_43_linear.py b/bigml/tests/test_43_linear.py index 2de5148c..a9a20ecb 100644 --- a/bigml/tests/test_43_linear.py +++ b/bigml/tests/test_43_linear.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import # -# Copyright 2019 BigML +# Copyright 2019-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -19,127 +20,154 @@ """ Creating Linear Regression """ -from world import world, setup_module, teardown_module -import create_source_steps as source_create -import create_dataset_steps as dataset_create -import create_linear_steps as linear_create -import create_prediction_steps as prediction_create -import create_batch_prediction_steps as batch_pred_create - -class TestLinearRegression(object): - - def setup(self): +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_linear_steps as linear_create +from . import create_prediction_steps as prediction_create +from . import create_batch_prediction_steps as batch_pred_create + +class TestLinearRegression: + """Testing Linear Regression methods""" + + def setup_method(self, method): """ Debug information """ - print "\n-------------------\nTests in: %s\n" % __name__ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) - def teardown(self): + def teardown_method(self): """ Debug information """ - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} def test_scenario1(self): """ - Scenario: Successfully creating a linear regression from a dataset: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a linear regression from a dataset - And I wait until the linear regression is ready less than secs - And I update the linear regression name to "" - When I wait until the linear regression is ready less than secs - Then the linear regression name is "" - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | linear_name | - | ../data/iris.csv | 10 | 10 | 20 | 20 | my new linear regression name | + Scenario: Successfully creating a linear regression from a dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a linear regression from a dataset + And I wait until the linear regression is ready less than secs + And I update the linear regression name to "" + When I wait until the linear regression is ready less than secs + Then the linear regression name is "" """ - print self.test_scenario1.__doc__ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "linear_name"] examples = [ - ['data/grades.csv', '100', '100', '200', '200', 'my new linear regression name']] + ['data/grades.csv', '100', '100', '200', 'my new linear regression name']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) linear_create.i_create_a_linear_regression_from_dataset(self) - linear_create.the_linear_regression_is_finished_in_less_than(self, example[3]) - linear_create.i_update_linear_regression_name(self, example[5]) - linear_create.the_linear_regression_is_finished_in_less_than(self, example[4]) - linear_create.i_check_linear_name(self, example[5]) + linear_create.the_linear_regression_is_finished_in_less_than( + self, example["model_wait"]) + linear_create.i_update_linear_regression_name( + self, example["linear_name"]) + linear_create.the_linear_regression_is_finished_in_less_than( + self, example["model_wait"]) + linear_create.i_check_linear_name(self, example["linear_name"]) - print "\nEnd of tests in: %s\n-------------------\n" % __name__ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) def test_scenario2(self): """ - Scenario: Successfully creating a prediction from linear regression: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a pca - And I wait until the linear regression is ready less than secs - When I create a prediction for "" - Then the prediction is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input |objective | prediction | - + Scenario: Successfully creating a prediction from linear regression: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a pca + And I wait until the linear regression is ready less than secs + When I create a prediction for "" + Then the prediction is "" """ - print self.test_scenario2.__doc__ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "model_conf"] examples = [ - ['data/grades.csv', '30', '30', '30', '{"000000": 0.5, "000001": 1, "000002": 1, "000003": 1}', "000005", '2.27312', '{}'], - ['data/grades.csv', '30', '30', '30', '{"000000": 0.5, "000001": 1, "000002": 1, "000003": 1}', "000005", '8.19619', '{"bias": false}'], - ['data/dates.csv', '30', '30', '30', '{"test-num1": 23, "test-num2" : 54, "test-date.day-of-month":2, "test-date.month":12, "test-date.day-of-week": 2, "test-date.year": 2012}', "000003", '48.27679', '{"bias": false}']] + ['data/grades.csv', '30', '30', '50', + '{"000000": 0.5, "000001": 1, "000002": 1, "000003": 1}', + "000005", '2.27312', '{}'], + ['data/grades.csv', '30', '30', '50', + '{"000000": 0.5, "000001": 1, "000002": 1, "000003": 1}', + "000005", '8.19619', '{"bias": false}'], + ['data/dates.csv', '30', '30', '30', + '{"test-num1": 23, "test-num2" : 54, "test-date.day-of-month":2, ' + '"test-date.month":12, "test-date.day-of-week": 2, ' + '"test-date.year": 2012}', "000003", '48.27679', + '{"bias": false}']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) - linear_create.i_create_a_linear_regression_with_objective_and_params(self, example[5], example[7]) - linear_create.the_linear_regression_is_finished_in_less_than(self, example[3]) - prediction_create.i_create_a_linear_prediction(self, example[4]) - prediction_create.the_prediction_is(self, example[5], example[6]) - - print "\nEnd of tests in: %s\n-------------------\n" % __name__ - + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + linear_create.i_create_a_linear_regression_with_objective_and_params( + self, example["objective_id"], example["model_conf"]) + linear_create.the_linear_regression_is_finished_in_less_than( + self, example["model_wait"]) + prediction_create.i_create_a_linear_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) def test_scenario3(self): """ - Scenario: Successfully creating a batch prediction from a linear regression: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a linear regression - And I wait until the linear regression is ready less than secs - When I create a batch prediction for the dataset with the linear regression - And I wait until the batch predictin is ready less than secs - And I download the created predictions file to "" - Then the batch prediction file is like "" - - Examples: - | data | time_1 | time_2 | time_3 | time_4 | local_file | predictions_file | - - + Scenario: Successfully creating a batch prediction from a linear regression: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a linear regression + And I wait until the linear regression is ready less than secs + When I create a batch prediction for the dataset with the linear regression + And I wait until the batch predictin is ready less than secs + And I download the created predictions file to "" + Then the batch prediction file is like "" """ - print self.test_scenario3.__doc__ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "batch_wait", "local_file", "predictions_file"] examples = [ - ['data/grades.csv', '30', '30', '50', '50', 'tmp/batch_predictions.csv', 'data/batch_predictions_linear.csv']] + ['data/grades.csv', '30', '30', '50', '50', + 'tmp/batch_predictions.csv', 'data/batch_predictions_linear.csv']] for example in examples: - print "\nTesting with:\n", example - source_create.i_upload_a_file(self, example[0]) - source_create.the_source_is_finished(self, example[1]) - dataset_create.i_create_a_dataset(self) - dataset_create.the_dataset_is_finished_in_less_than(self, example[2]) + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) linear_create.i_create_a_linear_regression_from_dataset(self) - linear_create.the_linear_regression_is_finished_in_less_than(self, example[3]) + linear_create.the_linear_regression_is_finished_in_less_than( + self, example["model_wait"]) batch_pred_create.i_create_a_linear_batch_prediction(self) - batch_pred_create.the_batch_prediction_is_finished_in_less_than(self, example[4]) - batch_pred_create.i_download_predictions_file(self, example[5]) - batch_pred_create.i_check_predictions(self, example[6]) + batch_pred_create.the_batch_prediction_is_finished_in_less_than( + self, example["batch_wait"]) + batch_pred_create.i_download_predictions_file( + self, example["local_file"]) + batch_pred_create.i_check_predictions( + self, example["predictions_file"]) diff --git a/bigml/tests/test_44_compare_predictions.py b/bigml/tests/test_44_compare_predictions.py new file mode 100644 index 00000000..c50a6350 --- /dev/null +++ b/bigml/tests/test_44_compare_predictions.py @@ -0,0 +1,442 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +""" Comparing remote and local predictions + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_association_steps as association_create +from . import create_cluster_steps as cluster_create +from . import create_anomaly_steps as anomaly_create +from . import create_prediction_steps as prediction_create +from . import compare_predictions_steps as prediction_compare + + +class TestComparePrediction: + """Test local and remote predictions""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input for anomaly detectors + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an anomaly detector + And I wait until the anomaly detector is ready less + than secs + And I create a local anomaly detector + And I enable the pre-modeling pipeline + When I create an anomaly score for "" + Then the anomaly score is "" + And I create a local anomaly score for "" + Then the local anomaly score is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "score"] + examples = [ + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1910-05-08T19:10:23.106","cat-0":"cat2","target-2":0.4}', + 0.52477], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1920-06-30T20:21:20.320","cat-0":"cat1","target-2":0.2}', + 0.50654]] + show_doc(self.test_scenario1, examples) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + anomaly_create.i_create_an_anomaly(self) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_anomaly(self, pre_model=True) + prediction_create.i_create_an_anomaly_score( + self, example["input_data"]) + prediction_create.the_anomaly_score_is( + self, example["score"]) + prediction_compare.i_create_a_local_anomaly_score( + self, example["input_data"], pre_model=self.bigml["local_pipeline"]) + prediction_compare.the_local_anomaly_score_is( + self, example["score"]) + + def test_scenario1b(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input for anomaly detectors + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an anomaly detector + And I wait until the anomaly detector is ready less + than secs + And I create a local anomaly detector + When I create an anomaly score for "" + Then the anomaly score is "" + And I create a local anomaly score for "" + Then the local anomaly score is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "score"] + examples = [ + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1932-01-30T19:24:11.440","cat-0":"cat2","target-2":0.1}', + 0.54343], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1950-11-06T05:34:05.602","cat-0":"cat1" ,"target-2":0.9}', + 0.5202]] + show_doc(self.test_scenario1b) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + anomaly_create.i_create_an_anomaly(self, shared=example["data"]) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_anomaly(self, pre_model=True) + prediction_create.i_create_an_anomaly_score( + self, example["input_data"]) + prediction_create.the_anomaly_score_is( + self, example["score"]) + prediction_compare.i_create_a_local_anomaly_score( + self, example["input_data"], pre_model=self.bigml["local_pipeline"]) + prediction_compare.the_local_anomaly_score_is( + self, example["score"]) + + + def test_scenario1b_a(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input for anomaly detectors + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an anomaly detector + And I wait until the anomaly detector is ready less + than secs + And I create a local anomaly detector + When I create an anomaly score for "" + Then the anomaly score is "" + And I create a local anomaly score for "" + Then the local anomaly score is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "score"] + examples = [ + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1969-7-14 17:36","cat-0":"cat2","target-2":0.9}', + 0.93639]] + show_doc(self.test_scenario1b_a) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + anomaly_create.i_create_an_anomaly(self, shared=example["data"]) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_anomaly(self, pre_model=True) + prediction_create.i_create_an_anomaly_score( + self, example["input_data"]) + prediction_create.the_anomaly_score_is( + self, example["score"]) + prediction_compare.i_create_a_local_anomaly_score( + self, example["input_data"], pre_model=self.bigml["local_pipeline"]) + prediction_compare.the_local_anomaly_score_is( + self, example["score"]) + + def test_scenario1c(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input for anomaly detectors + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an anomaly detector + And I wait until the anomaly detector is ready less + than secs + And I create a local anomaly detector + When I create an anomaly score for "" + Then the anomaly score is "" + And I create a local anomaly score for "" + Then the local anomaly score is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "score"] + examples = [ + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"2001-01-05T23:04:04.693","cat-0":"cat2","target-2":0.01}', + 0.54911], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"2011-04-01T00:16:45.747","cat-0":"cat2","target-2":0.32}', + 0.52477]] + show_doc(self.test_scenario1c) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + anomaly_create.i_create_an_anomaly(self, shared=example["data"]) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_anomaly(self, pre_model=True) + prediction_create.i_create_an_anomaly_score( + self, example["input_data"]) + prediction_create.the_anomaly_score_is(self, example["score"]) + prediction_compare.i_create_a_local_anomaly_score( + self, example["input_data"], pre_model=self.bigml["local_pipeline"]) + prediction_compare.the_local_anomaly_score_is( + self, example["score"]) + + def test_scenario1c_a(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input for anomaly detectors + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an anomaly detector + And I wait until the anomaly detector is ready less + than secs + And I create a local anomaly detector + When I create an anomaly score for "" + Then the anomaly score is "" + And I create a local anomaly score for "" + Then the local anomaly score is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "score"] + examples = [ + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1969-W29-1T17:36:39Z","cat-0":"cat1","target-2":0.87}', + 0.93678], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"Mon Jul 14 17:36 +0000 1969","cat-0":"cat1","target-2":0}', + 0.93717]] + show_doc(self.test_scenario1c_a) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + anomaly_create.i_create_an_anomaly(self, shared=example["data"]) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_anomaly(self, pre_model=True) + prediction_create.i_create_an_anomaly_score( + self, example["input_data"]) + prediction_create.the_anomaly_score_is( + self, example["score"]) + prediction_compare.i_create_a_local_anomaly_score( + self, example["input_data"], pre_model=self.bigml["local_pipeline"]) + prediction_compare.the_local_anomaly_score_is( + self, example["score"]) + + def test_scenario2(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input for cluster + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster + And I wait until the cluster is ready less than secs + And I create a local cluster + When I create a centroid for "" + Then the centroid is "" with distance "" + And I create a local centroid for "" + Then the local centroid is "" with + distance "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "centroid", "distance"] + examples = [ + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1910-05-08T19:10:23.106","cat-0":"cat2","target-2":0.4}', + "Cluster 2", 0.92112], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1920-06-30T20:21:20.320","cat-0":"cat1","target-2":0.2}', + "Cluster 3", 0.77389]] + show_doc(self.test_scenario2) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + cluster_create.i_create_a_cluster(self, shared=example["data"]) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_cluster(self, pre_model=True) + prediction_create.i_create_a_centroid( + self, example["input_data"]) + prediction_create.the_centroid_is_with_distance( + self, example["centroid"], example["distance"]) + prediction_compare.i_create_a_local_centroid( + self, example["input_data"], pre_model=self.bigml["local_pipeline"]) + prediction_compare.the_local_centroid_is( + self, example["centroid"], example["distance"]) + + def test_scenario2_a(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input for cluster + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster + And I wait until the cluster is ready less than secs + And I create a local cluster + When I create a centroid for "" + Then the centroid is "" with distance "" + And I create a local centroid for "" + Then the local centroid is "" with + distance "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "centroid", "distance"] + examples = [ + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1932-01-30T19:24:11.440","cat-0":"cat2","target-2":0.1}', + "Cluster 0", 0.87855], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1950-11-06T05:34:05.602","cat-0":"cat1" ,"target-2":0.9}', + "Cluster 6", 0.83506]] + show_doc(self.test_scenario2_a) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + cluster_create.i_create_a_cluster(self, shared=example["data"]) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_cluster(self, pre_model=True) + prediction_create.i_create_a_centroid( + self, example["input_data"]) + prediction_create.the_centroid_is_with_distance( + self, example["centroid"], example["distance"]) + prediction_compare.i_create_a_local_centroid( + self, example["input_data"], pre_model=self.bigml["local_pipeline"]) + prediction_compare.the_local_centroid_is( + self, example["centroid"], example["distance"]) + + def test_scenario3(self): + """ + Scenario: Successfully comparing association sets: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the association is ready less than secs + And I create a local association + When I create an association set for "" + Then the association set is like the contents of + "" + And I create a local association set for "" + Then the local association set is like the contents of + "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "association_set_file"] + examples = [['data/dates2.csv', '20', '30', '80', '{"target-2": -1}', + 'data/associations/association_set2.json']] + show_doc(self.test_scenario3) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + association_create.i_create_an_association_from_dataset( + self, shared=example["data"]) + association_create.the_association_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_association( + self, pre_model=True) + prediction_create.i_create_an_association_set( + self, example["input_data"]) + prediction_compare.the_association_set_is_like_file( + self, example["association_set_file"]) + prediction_compare.i_create_a_local_association_set( + self, example["input_data"], pre_model=self.bigml["local_pipeline"]) + prediction_compare.the_local_association_set_is_like_file( + self, example["association_set_file"]) diff --git a/bigml/tests/test_45_external_connector.py b/bigml/tests/test_45_external_connector.py new file mode 100644 index 00000000..deac2c94 --- /dev/null +++ b/bigml/tests/test_45_external_connector.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2018-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating external connectors + +""" +import json + +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_external_steps as connector_create + +class TestExternalConnector: + """Testing external connector creation""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating an external connector: + Given I create an external connector from environment vars + And I wait until the external connector is ready less than secs + And I update the external connector with args + And the external connector has arguments + """ + show_doc(self.test_scenario1) + headers = ["conn_wait", "args"] + examples = [ + ['20', '{"name": "my connector name"}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + connector_create.i_create_external_connector(self) + connector_create.the_external_connector_is_finished( + self, example["conn_wait"]) + connector_create.i_update_external_connector_with( + self, example["args"]) + connector_create.the_external_connector_is_finished( + self, example["conn_wait"]) + connector_create.external_connector_has_args( + example["args"]) diff --git a/bigml/tests/test_46_model_cloning.py b/bigml/tests/test_46_model_cloning.py new file mode 100644 index 00000000..70c32743 --- /dev/null +++ b/bigml/tests/test_46_model_cloning.py @@ -0,0 +1,426 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2020 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating clones for models + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_ensemble_steps as ensemble_create +from . import create_linear_steps as linear_create +from . import create_cluster_steps as cluster_create +from . import create_lda_steps as topic_create +from . import create_anomaly_steps as anomaly_create +from . import create_association_steps as association_create +from . import create_time_series_steps as time_create +from . import create_pca_steps as pca_create + + +class TestCloning: + """Testing cloned resources creation""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating a clone from a model: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I clone the model + Then the origin model is the previous model + And I share and clone the shared model + Then the origin model is the previous model + + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] + examples = [ + ['data/iris.csv', '10', '10', '10']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model = world.model["resource"] + model_create.make_the_model_shared(self, cloneable=True) + model_hash = "shared/model/%s" % world.model["shared_hash"] + model_create.clone_model(self, model) + model_create.the_cloned_model_is(self, model) + model_create.clone_model(self, model_hash) + model_create.the_cloned_model_is(self, model) + + def test_scenario2(self): + """ + Scenario: Successfully creating a clone from a ensemble: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an ensemble + And I wait until the ensemble is ready less than secs + Then the origin ensemble is the previous ensemble + """ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] + examples = [ + ['data/iris.csv', '10', '10', '30']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_create.i_create_an_ensemble(self, shared=example["data"]) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + ensemble = world.ensemble["resource"] + ensemble_create.clone_ensemble(self, ensemble) + ensemble_create.the_cloned_ensemble_is(self, ensemble) + + def test_scenario3(self): + """ + Scenario: Successfully creating a clone from a deepnet: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a quick deepnet + And I wait until the deepnet is ready less than secs + Then the origin deepnet is the previous deepnet + """ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] + examples = [ + ['data/iris.csv', '10', '10', '100']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_quick_deepnet(self) + model_create.the_deepnet_is_finished_in_less_than( + self, example["model_wait"]) + deepnet = world.deepnet["resource"] + model_create.clone_deepnet(self, deepnet) + model_create.the_cloned_deepnet_is(self, deepnet) + + def test_scenario4(self): + """ + Scenario: Successfully creating a clone from a logistic regression: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression + And I wait until the logistic regression is ready less than secs + Then the origin logistic regression is the previous logistic regression + """ + show_doc(self.test_scenario4) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] + examples = [ + ['data/iris.csv', '10', '10', '30']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model(self, shared=example["data"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + logistic_regression = world.logistic_regression["resource"] + model_create.clone_logistic_regression(self, logistic_regression) + model_create.the_cloned_logistic_regression_is( + self, logistic_regression) + + def test_scenario5(self): + """ + Scenario: Successfully creating a clone from a linear regression: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a linear regression + And I wait until the linear regression is ready less than secs + Then the origin linear regression is the previous linear regression + """ + show_doc(self.test_scenario5) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] + examples = [ + ['data/iris.csv', '10', '10', '30']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + linear_create.i_create_a_linear_regression_from_dataset( + self, shared=example["data"]) + linear_create.the_linear_regression_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + linear_regression = world.linear_regression["resource"] + linear_create.clone_linear_regression(self, linear_regression) + linear_create.the_cloned_linear_regression_is( + self, linear_regression) + + def test_scenario6(self): + """ + Scenario: Successfully creating a clone from a cluster: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster + And I wait until the cluster is ready less than secs + Then the origin cluster is the previous cluster + """ + show_doc(self.test_scenario6) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] + examples = [ + ['data/iris.csv', '10', '10', '30']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + cluster_create.i_create_a_cluster(self, shared=example["data"]) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + cluster = world.cluster["resource"] + cluster_create.clone_cluster(self, cluster) + cluster_create.the_cloned_cluster_is( + self, cluster) + + def test_scenario7(self): + """ + Scenario: Successfully creating a clone from a topic model: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a topic model + And I wait until the topic model is ready less than secs + Then the origin topic model is the previous topic model + """ + show_doc(self.test_scenario7) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf"] + examples = [ + ['data/spam.csv', '10', '10', '100', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) + source_create.i_update_source_with( + self, example["source_conf"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + topic_create.i_create_a_topic_model(self) + topic_create.the_topic_model_is_finished_in_less_than( + self, example["model_wait"]) + topic_model = world.topic_model["resource"] + topic_create.clone_topic_model(self, topic_model) + topic_create.the_cloned_topic_model_is( + self, topic_model) + + + def test_scenario8(self): + """ + Scenario: Successfully creating a clone from an anomaly detector: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an anomaly detector + And I wait until the anomaly detector is ready less than secs + Then the origin anomaly detector is the previous anomaly detector + """ + show_doc(self.test_scenario8) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] + examples = [ + ['data/iris.csv', '10', '10', '100']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + anomaly_create.i_create_an_anomaly(self, shared=example["data"]) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + anomaly = world.anomaly["resource"] + anomaly_create.clone_anomaly(self, anomaly) + anomaly_create.the_cloned_anomaly_is( + self, anomaly) + + def test_scenario9(self): + """ + Scenario: Successfully creating a clone from an association: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an association + And I wait until the association is ready less than secs + Then the origin association is the previous association + """ + show_doc(self.test_scenario9) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] + examples = [ + ['data/iris.csv', '10', '10', '100']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + association_create.i_create_an_association_from_dataset( + self, shared=example["data"]) + association_create.the_association_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + association = world.association["resource"] + association_create.clone_association(self, association) + association_create.the_cloned_association_is( + self, association) + + def test_scenario10(self): + """ + Scenario: Successfully creating a clone from a time series: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a time series + And I wait until the time series is ready less than secs + Then the origin time series is the previous time series + """ + show_doc(self.test_scenario10) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] + examples = [ + ['data/iris.csv', '10', '10', '100']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + time_create.i_create_a_time_series(self) + time_create.the_time_series_is_finished_in_less_than( + self, example["model_wait"]) + time_series = world.time_series["resource"] + time_create.clone_time_series(self, time_series) + time_create.the_cloned_time_series_is( + self, time_series) + + def test_scenario11(self): + """ + Scenario: Successfully creating a clone from a pca: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a pca + And I wait until the pca is ready less than secs + Then the origin pca is the previous pca + """ + show_doc(self.test_scenario11) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] + examples = [ + ['data/iris.csv', '10', '10', '100']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + pca_create.i_create_a_pca(self, shared=example["data"]) + pca_create.the_pca_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + pca = world.pca["resource"] + pca_create.clone_pca(self, pca) + pca_create.the_cloned_pca_is(self, pca) diff --git a/bigml/tests/test_47_webhooks.py b/bigml/tests/test_47_webhooks.py new file mode 100644 index 00000000..3206f0ef --- /dev/null +++ b/bigml/tests/test_47_webhooks.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import,invalid-name +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Checking webhooks secrets + +""" +import json + +from collections import OrderedDict +from bigml.webhooks import check_signature + +from .world import world, setup_module, teardown_module, show_doc, \ + show_method, ok_ + + +BIGML_SECRET = 'mysecret' + +BIGML_REQUEST_MOCKUP = { + "body": { + 'event': 'finished', + 'message': 'The source has been created', + 'resource': 'source/627eceb1d432eb7338001d4b', + 'timestamp': '2022-05-13 21:33:39 GMT' + }, + "META": { + 'HTTP_X_BIGML_SIGNATURE': "sha1=af38d979e8582d678653a8059ca0821daeedebbd" + } +} + + +class RequestMockup: + """Test for webhooks with secrets""" + + def __init__(self, request_dict): + self.body = json.dumps(request_dict["body"], sort_keys=True) + self.meta = request_dict["META"] + + +class TestWebhook: + """Testing webhooks""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Testing webhook secret signature + """ + show_doc(self.test_scenario1) + ok_(check_signature(RequestMockup(BIGML_REQUEST_MOCKUP), + BIGML_SECRET)) diff --git a/bigml/tests/test_48_local_dataset.py b/bigml/tests/test_48_local_dataset.py new file mode 100644 index 00000000..eabd52f1 --- /dev/null +++ b/bigml/tests/test_48_local_dataset.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Testing local dataset transformations + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import compare_dataset_steps as dataset_compare + + +class TestLocalDataset: + """Testing Local class for datasets""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario 1: Successfully creating a transformation from a local dataset in a json file: + Given I create a local dataset from a "" file + Then the transformed data for "" is "" + """ + show_doc(self.test_scenario1) + headers = ["dataset_file", "input_data", "output_data"] + examples = [ + ['bigml/tests/my_dataset/my_flatline_ds.json', + '{"plasma glucose": 120, "age": 30, "bmi": 46}', + '{"plasma glucose": 120, "age": 30, "glucose half": 60}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + dataset_compare.i_create_a_local_dataset_from_file( + self, example["dataset_file"]) + dataset_compare.the_transformed_data_is( + self, example["input_data"], example["output_data"]) diff --git a/bigml/tests/test_49_local_pipeline.py b/bigml/tests/test_49_local_pipeline.py new file mode 100644 index 00000000..651a87a3 --- /dev/null +++ b/bigml/tests/test_49_local_pipeline.py @@ -0,0 +1,382 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Testing local dataset transformations + +""" +import os +import json + +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import compare_pipeline_steps as pipeline_compare +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_anomaly_steps as anomaly_create +from . import create_model_steps as model_create +from . import create_ensemble_steps as ensemble_create +from . import create_linear_steps as linear_create +from . import create_prediction_steps as prediction_create +from . import compare_predictions_steps as prediction_compare + + +class TestLocalPipeline: + """Testing local Pipeline methods""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario 1: Successfully creating a local pipeline from a model and anomaly detector: + Given I expand the zip file "" that contain "" + And I create a local pipeline for "" named "" + Then the transformed data for "" is "" + """ + show_doc(self.test_scenario1) + headers = ["pipeline_file", "models_list", "name", "input_data", + "output_data"] + examples = [ + ['bigml/tests/pipeline3.zip', + '["anomaly/631a6a968f679a2d2d000319",' + ' "model/631a6a6f8f679a2d31000445"]', + "pipeline3", + '{"plasma glucose": 120, "age": 30, "bmi": 46}', + '{"plasma glucose": 120, "age": 30, "glucose half": 60,' + ' "age_range": "2nd third", "bmi": 46,' + ' "score": 0.85456,' + ' "prediction": "false", "probability": 0.6586746586746587}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + pipeline_compare.i_expand_file_with_models_list( + self, example["pipeline_file"], example["models_list"]) + pipeline_compare.i_create_a_local_pipeline_from_models_list( + self, example["models_list"], example["name"], + storage=os.path.splitext(example["pipeline_file"])[0]) + pipeline_compare.the_pipeline_transformed_data_is( + self, example["input_data"], example["output_data"]) + + def test_scenario2(self): + """ + Scenario 2: Successfully creating a local pipeline from two BMLPipelines + Given I expand the zip file "" that contain "" + And I create a local pipeline for "" named "" + And I create a local pipeline for "" named "" + And I create a local pipeline "" for both pipelines + Then the transformed data for "" is "" + """ + show_doc(self.test_scenario2) + headers = ["pipeline_file", "models_list", "model1", "name1", + "model2", "name2", "name", "input_data", "output_data"] + examples = [ + ['bigml/tests/pipeline3.zip', + '["anomaly/631a6a968f679a2d2d000319",' + ' "model/631a6a6f8f679a2d31000445"]', + '["model/631a6a6f8f679a2d31000445"]', + "pipeline1", + '["anomaly/631a6a968f679a2d2d000319"]', + "pipeline2", + "pipeline3", + '{"plasma glucose": 120, "age": 30, "bmi": 46}', + '{"plasma glucose": 120, "age": 30, "glucose half": 60,' + ' "age_range": "2nd third", "bmi": 46,' + ' "score": 0.85456,' + ' "prediction": "false", "probability": 0.6586746586746587}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + pipeline_compare.i_expand_file_with_models_list( + self, example["pipeline_file"], example["models_list"]) + pipe1 = pipeline_compare.i_create_a_local_pipeline_from_models_list( + self, example["model1"], example["name1"], + storage=os.path.splitext(example["pipeline_file"])[0]) + pipe2 = pipeline_compare.i_create_a_local_pipeline_from_models_list( + self, example["model2"], example["name2"], + storage=os.path.splitext(example["pipeline_file"])[0]) + pipeline_compare.i_create_composed_pipeline(self, [pipe1, pipe2], + example["name"]) + pipeline_compare.the_pipeline_transformed_data_is( + self, example["input_data"], example["output_data"]) + + def test_scenario3(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input for linear regression: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a linear regression + And I wait until the linear regression is ready + less than secs + And I create a local pipeline for the linear regression named "" + When I create a prediction for "" + Then the prediction for "" is "" + And the prediction in the transformed data for "" is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "name"] + examples = [ + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "1910-05-08T19:10:23.106", "cat-0":"cat2"}', + '000002', -0.01284, "pipeline1"], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "1920-06-30T20:21:20.320", "cat-0":"cat1"}', + '000002', -0.09459, "pipeline2"], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "1932-01-30T19:24:11.440", "cat-0":"cat2"}', + '000002', -0.02259, "pipeline3"], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "1950-11-06T05:34:05.252", "cat-0":"cat1"}', + '000002', -0.06754, "pipeline4"], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "2001-01-05T23:04:04.693", "cat-0":"cat2"}', + '000002', 0.05204, "pipeline5"], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "2011-04-01T00:16:45.747", "cat-0":"cat2"}', + '000002', 0.05878, "pipeline6"]] + show_doc(self.test_scenario3) + + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + linear_create.i_create_a_linear_regression( + self, shared=example["data"]) + linear_create.the_linear_regression_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + pipeline_compare.i_create_a_local_pipeline_from_models_list( + self, [world.linear_regression["resource"]], example["name"]) + prediction_create.i_create_a_linear_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + pipeline_compare.the_pipeline_result_key_is( + self, example["input_data"], "prediction", + example["prediction"]) + + def test_scenario4(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input for deepnet: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a deepnet + And I wait until the deepnet is ready less than secs + And I create a local pipeline for the deepnet named "" + When I create a prediction for "" + Then the prediction for "" is "" + And the prediction in the transformed data for "" is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "name"] + examples = [ + ['data/dates2.csv', '20', '45', '160', + '{"time-1": "1910-05-08T19:10:23.106", "cat-0":"cat2"}', + '000002', -0.4264, "pipeline1"], + ['data/dates2.csv', '20', '45', '160', + '{"time-1": "2011-04-01T00:16:45.747", "cat-0":"cat2"}', + '000002', 0.11985, "pipeline2"], + ['data/dates2.csv', '20', '45', '160', + '{"time-1": "1969-W29-1T17:36:39Z", "cat-0":"cat1"}', + '000002', -0.08211, "pipeline3"], + ['data/dates2.csv', '20', '45', '160', + '{"time-1": "1920-06-45T20:21:20.320", "cat-0":"cat1"}', + '000002', -0.08211, "pipeline4"], + ['data/dates2.csv', '20', '45', '160', + '{"time-1": "2001-01-05T23:04:04.693", "cat-0":"cat2"}', + '000002', 0.00388, "pipeline5"], + ['data/dates2.csv', '20', '45', '160', + '{"time-1": "1950-11-06T05:34:05.602", "cat-0":"cat1"}', + '000002', -0.04976, "pipeline6"], + ['data/dates2.csv', '20', '45', '160', + '{"time-1": "1932-01-30T19:24:11.440", "cat-0":"cat2"}', + '000002', -0.36264, "pipeline7"], + ['data/dates2.csv', '20', '45', '160', + '{"time-1": "Mon Jul 14 17:36 +0000 1969", "cat-0":"cat1"}', + '000002', -0.08211, "pipeline8"]] + show_doc(self.test_scenario4) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + deepnet_shared = "%s_no_sug" % example["data"] + model_create.i_create_a_no_suggest_deepnet( + self, shared=deepnet_shared) + model_create.the_deepnet_is_finished_in_less_than( + self, example["model_wait"], shared=deepnet_shared) + pipeline_compare.i_create_a_local_pipeline_from_models_list( + self, [world.deepnet["resource"]], example["name"]) + prediction_create.i_create_a_deepnet_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"], + precision=4) + pipeline_compare.the_pipeline_result_key_is( + self, example["input_data"], "prediction", + example["prediction"], precision=4) + + def test_scenario5(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw input for deepnets with images: + Given I create an annotated images data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a deepnet with parms + And I wait until the deepnet is ready + less than secs + And I create a local pipeline for the deepnet named "" + When I create a prediction for "" + Then the prediction for "" is "" + When I create a prediction for "" + Then the prediction for "" is "" + And the prediction in the transformed data for "" is "" + And the probability in the transformed data for "" is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "model_conf", "image_fields", + "name"] + examples = [ + ['data/images/metadata.json', '500', '500', '600', + '{"image_id": "data/fruits1e.jpg", "label":"f1"}', + '100003', {"objective_field": "100003", + "number_of_hidden_layers": 1, + "suggest_structure": False, + "missing_numerics": True, + "max_training_time": 100, + "hidden_layers": [{ + "activation_function": "tanh", + "number_of_nodes": 10}]}, + ['image_id'], "pipeline1"]] + show_doc(self.test_scenario5) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_create_annotated_source( + self, + example["data"], + args={"image_analysis": {"enabled": False, + "extracted_features": []}}) + source_create.the_source_is_finished( + self, example["source_wait"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + model_create.i_create_a_deepnet_with_objective_and_params( + self, example["objective_id"], + json.dumps(example["model_conf"])) + model_create.the_deepnet_is_finished_in_less_than( + self, example["model_wait"]) + pipeline_compare.i_create_a_local_pipeline_from_models_list( + self, [world.deepnet["resource"]], example["name"]) + prediction_create.i_create_a_deepnet_prediction( + self, example["input_data"], example["image_fields"]) + prediction = world.prediction["output"] + probability = world.prediction["probability"] + pipeline_compare.i_create_a_local_pipeline_from_models_list( + self, [world.deepnet["resource"]], example["name"]) + pipeline_compare.the_pipeline_result_key_is( + self, example["input_data"], "prediction", prediction, + precision=4) + pipeline_compare.the_pipeline_result_key_is( + self, example["input_data"], "probability", probability, + precision=2) + + def test_scenario6(self): + """ + Scenario: Successfully comparing remote and local anomaly scores + with raw input for dataset with images: + Given I create an annotated images data source uploading a "" file and + + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an anomaly detector + And I wait until the anomaly is ready + less than secs + And I create a local pipeline for the anomaly detector named "" + When I create an anomaly score for "" + Then the anomaly score is "" + And the anomaly score in the transformed data for "" is "" + """ + headers = ["data", "extracted_features", "source_wait", "dataset_wait", + "anomaly_wait", "input_data", "score", "name"] + examples = [ + ['data/images/fruits_hist.zip', + ["dimensions", "average_pixels", "level_histogram", + "histogram_of_gradients", ["wavelet_subbands", 5], + ["pretrained_cnn", "mobilenetv2"]], + '500', '500', '600', + '{"image_id": "data/fruits1e.jpg"}', 0.39902, "pipeline1"]] + show_doc(self.test_scenario6) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file_with_args( + self, + example["data"], + args=json.dumps({"image_analysis": { + "enabled": True, + "extracted_features": example["extracted_features"]}})) + source_create.the_source_is_finished( + self, example["source_wait"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + anomaly_create.i_create_an_anomaly(self) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["anomaly_wait"]) + pipeline_compare.i_create_a_local_pipeline_from_models_list( + self, [world.anomaly["resource"]], example["name"]) + prediction_create.i_create_an_anomaly_score( + self, example["input_data"]) + score = world.anomaly_score["score"] + prediction_create.the_anomaly_score_is( + self, world.anomaly_score["score"]) + pipeline_compare.the_pipeline_result_key_is( + self, example["input_data"], "score", score) diff --git a/bigml/tests/test_99_cleaning.py b/bigml/tests/test_99_cleaning.py new file mode 100644 index 00000000..1f80e98e --- /dev/null +++ b/bigml/tests/test_99_cleaning.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import,no-self-use +# +# Copyright 2018-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating external connectors + +""" + +from .world import world, teardown_fn, setup_module, ok_ + + +class TestCleaningProject: + """Artifact to clean all the created resources after each test execution""" + + def setup_method(self): + """ + Debug information + """ + print("\nFinal cleaning\n") + + def test_final(self): + """Final empty test """ + ok_(True) + + def teardown_method(self): + """ + Debug information + """ + teardown_fn(force=True) + print("\nEnd of tests.") diff --git a/bigml/tests/world.py b/bigml/tests/world.py index 7e85fbca..f3c86ba2 100644 --- a/bigml/tests/world.py +++ b/bigml/tests/world.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2015-2019 BigML +# Copyright 2015-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -20,20 +19,30 @@ """ import os +import sys +import re import shutil import time -import pkg_resources import datetime +import pprint +import json +import math +import pytest from bigml.api import BigML -from bigml.api import HTTP_OK, HTTP_NO_CONTENT, HTTP_UNAUTHORIZED -from bigml.constants import IRREGULAR_PLURALS, RENAMED_RESOURCES +from bigml.api import HTTP_NO_CONTENT, HTTP_NOT_FOUND +from bigml.constants import IRREGULAR_PLURALS, RENAMED_RESOURCES, \ + TINY_RESOURCE, ALL_FIELDS +from bigml.api_handlers.externalconnectorhandler import get_env_connection_info +from bigml.util import get_exponential_wait + MAX_RETRIES = 10 RESOURCE_TYPES = [ 'cluster', 'fusion', 'optiml', + 'composite', 'source', 'dataset', 'prediction', @@ -65,7 +74,8 @@ 'linearregression', 'script', 'execution', - 'library' + 'library', + 'externalconnector' ] irregular_plurals = {} @@ -84,25 +94,111 @@ def show_doc(self, examples=None): """ Shows the name and documentation of the method passed as argument """ - print "%s:\n%s" % (self.__name__, self.__doc__) + print("%s:\n%s" % (self.__name__, self.__doc__)) if examples: - print " |%s" % \ + print(" |%s" % \ "\n |".join(["|".join([str(item) for item in example]) for - example in examples]) + example in examples])) + +def show_method(self, method, example): + """Prints the test class and method of the current test""" + class_name = re.sub(".*'(.*)'.*", "\\1", str(self.__class__)) + print("\nTesting %s %s with:\n" % (class_name, method), example) + + +def float_round(value, precision=5): + """Rounding if float""" + if isinstance(value, float): + return round(value, precision) + return value + + +def flatten_shared(): + """Returns the list of IDs stored in the world.shared structure """ + ids_list = [] + for _, value in world.shared.items(): + for _, resource in value.items(): + ids_list.append(resource["resource"]) + return ids_list -class World(object): +def sort_dict(item): + """ + Sort nested dict + """ + if isinstance(item, list): + return [sort_dict(v) for v in item] + if not isinstance(item, dict): + return item + return {k: sort_dict(v) for k, v in sorted(item.items())} + + +def eq_(*args, msg=None, precision=None): + """Wrapper to assert. If precision is set, previous rounding""" + new_args = list(args) + if isinstance(args[0], dict): + assert isinstance(args[1], dict) + for index, arg in enumerate(new_args): + new_args[index] = list(dict(sorted(arg.items())).values()) + if precision is not None: + if isinstance(new_args[0], list): + if msg is None: + msg = "Comparing: %s" % new_args + assert all(len(new_args[0]) == len(b) for b in new_args[1:]), msg + pairs = zip(new_args[0], new_args[1]) + if msg is None: + msg = "Comparing: %s" % new_args + assert all(float_round(a, precision) == float_round(b, precision) + for a, b in pairs), msg + else: + for index, arg in enumerate(new_args): + new_args[index] = float_round(arg, precision) + if msg is None: + msg = "Comparing: %s" % new_args + assert all(new_args[0] == b for b in new_args[1:]), msg + else: + if isinstance(new_args[0], (dict, list)): + for index, arg in enumerate(new_args): + new_args[index] = sort_dict(new_args[index]) + if msg is None: + msg = "expected: %s, got: %s" % (new_args[0], new_args[1]) + assert new_args[0] == new_args[1], msg + + +def ok_(value, msg=None): + """Wrapper to assert.""" + if msg is None: + assert value + else: + assert value, msg + + +def approx_(number_a, number_b, msg=None, precision=5): + """Wrapper for pytest approx function""" + epsilon = math.pow(0.1, precision) + if msg is None: + msg = "%s != %s" % (repr(number_a), repr(number_b)) + assert number_a == pytest.approx(number_b, abs=epsilon), msg + + +class World: + """Object to store common test resources""" def __init__(self): - self.USERNAME = None - self.API_KEY = None + self.username = None + self.api_key = None self.api = None self.debug = False try: self.debug = bool(os.environ.get('BIGML_DEBUG', 0)) except ValueError: pass + self.short_debug = False + try: + self.short_debug = bool(os.environ.get('BIGML_SHORT_DEBUG', 0)) + except ValueError: + pass self.clear() self.dataset_ids = [] self.fields_properties_dict = {} @@ -112,18 +208,37 @@ def __init__(self): self.project_id = None self.print_connection_info() self.delta = int(os.environ.get('BIGML_DELTA', '1')) - + self.errors = [] + self.shared = {} def print_connection_info(self): - self.USERNAME = os.environ.get('BIGML_USERNAME') - self.API_KEY = os.environ.get('BIGML_API_KEY') - if self.USERNAME is None or self.API_KEY is None: + """Prints the variables used for the connection authentication""" + self.username = os.environ.get('BIGML_USERNAME') + self.api_key = os.environ.get('BIGML_API_KEY') + self.external_conn = get_env_connection_info() + + if self.username is None or self.api_key is None: assert False, ("Tests use the BIGML_USERNAME and BIGML_API_KEY" " environment variables to authenticate the" " connection, but they seem to be unset. Please," "set them before testing.") - self.api = BigML(self.USERNAME, self.API_KEY, debug=self.debug) - print self.api.connection_info() + self.api = BigML(self.username, self.api_key, debug=self.debug, + short_debug=self.short_debug, + storage=(None if not (self.debug or self.short_debug) + else "./debug_storage")) + print("----------------------------------------------------------") + print(self.api.connection_info()) + print(self.external_connection_info()) + print("----------------------------------------------------------") + + def external_connection_info(self): + """Printable string: The information used to connect to a external + data source + + """ + info = "External data connection config:\n%s" % \ + pprint.pformat(self.external_conn, indent=4) + return info def clear(self): """Clears the stored resources' ids @@ -134,70 +249,139 @@ def clear(self): setattr(self, RENAMED_RESOURCES.get(resource_type, resource_type), None) - def delete_resources(self): - """Deletes the created objects + def _delete_resources(self, object_list, resource_type): + """Deletes resources grouped by type""" + if object_list: + print("Deleting %s %s" % (len(object_list), + plural(resource_type))) + kwargs = {} + if resource_type == "composite": + resource_type = "source" + kwargs = {"query_string": "delete_all=true"} + delete_method = self.api.deleters[resource_type] + for obj_id in object_list: + counter = 0 + print("Deleting %s" % obj_id) + result = delete_method(obj_id, **kwargs) + while (result['code'] not in [HTTP_NO_CONTENT, + HTTP_NOT_FOUND] and + counter < MAX_RETRIES): + print("Delete failed for %s. Retrying" % obj_id) + time.sleep(3 * self.delta) + counter += 1 + result = delete_method(obj_id, **kwargs) + if counter == MAX_RETRIES: + print ("Retries to delete the created resources are" + " exhausted. Failed to delete.") - """ + def delete_resources(self): + """Deletes the created objects""" + keepers = flatten_shared() + for resource_type in RESOURCE_TYPES: + object_list = getattr(self, plural(resource_type)) + object_list.reverse() + object_list = [obj for obj in object_list if obj not in keepers] + self._delete_resources(object_list, resource_type) + if world.errors: + print("Failed resources: \n\n") + for resource in world.errors: + print(json.dumps(resource["status"], indent=4)) + + def store_resources(self): + """Stores the created objects """ for resource_type in RESOURCE_TYPES: object_list = set(getattr(self, plural(resource_type))) if object_list: - print "Deleting %s %s" % (len(object_list), - plural(resource_type)) - delete_method = self.api.deleters[resource_type] + print("Storing %s %s" % (len(object_list), + plural(resource_type))) + if resource_type == "composite": + resource_type = "source" + store_method = self.api.getters[resource_type] for obj_id in object_list: - counter = 0 - result = delete_method(obj_id) - while (result['code'] != HTTP_NO_CONTENT and - counter < MAX_RETRIES): - print "Delete failed for %s. Retrying" % obj_id - time.sleep(3 * self.delta) - counter += 1 - result = delete_method(obj_id) - if counter == MAX_RETRIES: - print ("Retries to delete the created resources are" - " exhausted. Failed to delete.") + result = store_method(obj_id) + self.api.ok(result) + + def get_minimal_resource(self, resource_id): + """Retrieving resource info in a minimal way to get status info""" + return self.api.get_resource( + resource_id, query_string=TINY_RESOURCE) + + def get_maximal_resource(self, resource_id): + """Retrieving all resource info for local handling""" + return self.api.get_resource( + resource_id, query_string=ALL_FIELDS) world = World() -def res_filename(file): - return pkg_resources.resource_filename('bigml', "../../../%s" % file) +def res_filename(filename): + """Returns path to a data filename""" + directory = os.path.dirname(sys.modules['bigml'].__file__) + return os.path.join(os.path.dirname(directory), filename) + def setup_module(): """Operations to be performed before each module """ if world.project_id is None: - world.project_id = world.api.create_project( \ - {"name": world.test_project_name})['resource'] + if "project" not in world.shared: + world.shared["project"] = {} + world.shared["project"]["common"] = world.api.create_project( \ + {"name": world.test_project_name}) + world.project_id = world.shared["project"]["common"]['resource'] + print("Creating common project: ", world.project_id) world.clear() + def teardown_module(): """Operations to be performed after each module """ + print("Teardown module ---------------------------") + teardown_fn(force=False) + - if os.path.exists('./tmp'): - shutil.rmtree('./tmp') +def teardown_fn(force=False): + """Operations to be performed after a certain point """ + if not world.debug and not world.short_debug: + if os.path.exists('./tmp'): + shutil.rmtree('./tmp') - if not world.debug: world.delete_resources() - project_stats = world.api.get_project( \ - world.project_id)['object']['stats'] - for resource_type, value in project_stats.items(): - if value['count'] != 0: - # assert False, ("Increment in %s: %s" % (resource_type, value)) - print "WARNING: Increment in %s: %s" % (resource_type, value) - world.api.delete_project(world.project_id) - world.project_id = None + if force: + world.api.delete_project(world.project_id) + del world.shared["project"] + world.project_id = None + else: + world.store_resources() -def teardown_class(): - """Operations to be performed after each class +def logged_wait(start, delta, count, res_description, progress=0, status=None): + """Comparing the elapsed time to the expected delta and waiting for + the next sleep period. """ - world.dataset_ids = [] - world.local_ensemble = None - world.local_model = None - world.local_deepnet = None + if status is not None: + progress = status.get("progress", 0) + status_code = status.get("code") + progress_dumping = progress if progress > 0.8 \ + else 0 # dumping when almost finished + wait_time = min(get_exponential_wait( + ((1.0 - progress_dumping) * delta / 100.0) + 0.5, count), delta) + message = "" + if status is not None: + message =" (status: %s, progress: %s)" % ( + status_code, + progress) + print("Waiting for %s%s %s secs." % ( + res_description, + message, + wait_time)) + time.sleep(wait_time) + elapsed = (datetime.datetime.utcnow() - start).seconds + if elapsed > delta / 2.0: + print("%s seconds waiting for %s" % \ + (elapsed, res_description)) + ok_(elapsed < delta) diff --git a/bigml/timeseries.py b/bigml/timeseries.py index 69dbee99..62c6b2f5 100644 --- a/bigml/timeseries.py +++ b/bigml/timeseries.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2017-2019 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -21,7 +20,7 @@ embedded into your application without needing to send requests to BigML.io. -This module cannot only save you a few credits, but also enormously +This module can help you enormously to reduce the latency for each prediction and let you use your logistic regressions offline. @@ -42,14 +41,15 @@ import logging import re import sys -import StringIO +import io import pprint from bigml.api import FINISHED -from bigml.api import get_status, get_api_connection -from bigml.util import utf8 +from bigml.api import get_status, get_api_connection, get_time_series_id +from bigml.util import utf8, use_cache, load from bigml.basemodel import get_resource_dict, extract_objective from bigml.modelfields import ModelFields +from bigml.constants import DECIMALS from bigml.tssubmodels import SUBMODELS from bigml.tsoutconstants import SUBMODELS_CODE, TRIVIAL_MODEL, \ SEASONAL_CODE, FORECAST_FUNCTION, USAGE_DOC @@ -79,7 +79,8 @@ def compute_forecasts(submodels, horizon): forecasts.append( \ {"model": name, - "point_forecast": SUBMODELS[trend](*args)}) + "point_forecast": [round(value, DECIMALS) for value in + SUBMODELS[trend](*args)]}) return forecasts @@ -134,10 +135,19 @@ class TimeSeries(ModelFields): """ - def __init__(self, time_series, api=None): + def __init__(self, time_series, api=None, cache_get=None): + + if use_cache(cache_get): + # using a cache to store the model attributes + self.__dict__ = load(get_time_series_id(time_series), cache_get) + return self.resource_id = None + self.name = None + self.description = None + self.dataset_id = None self.input_fields = [] + self.default_numeric_value = None self.objective_fields = [] self.all_numeric_objectives = False self.period = 1 @@ -149,23 +159,28 @@ def __init__(self, time_series, api=None): self.time_range = {} self.field_parameters = {} self._forecast = {} - self.api = get_api_connection(api) + api = get_api_connection(api) self.resource_id, time_series = get_resource_dict( \ - time_series, "timeseries", api=self.api) + time_series, "timeseries", api=api) if 'object' in time_series and \ isinstance(time_series['object'], dict): time_series = time_series['object'] try: + self.dataset_id = time_series.get('dataset') + self.name = time_series.get("name") + self.description = time_series.get("description") self.input_fields = time_series.get("input_fields", []) + self.default_numeric_value = time_series.get( \ + "default_numeric_value") self._forecast = time_series.get("forecast") self.objective_fields = time_series.get( "objective_fields", []) objective_field = time_series['objective_field'] if \ time_series.get('objective_field') else \ time_series['objective_fields'] - except KeyError: + except (AttributeError, KeyError): raise ValueError("Failed to find the time series expected " "JSON structure. Check your arguments.") if 'time_series' in time_series and \ @@ -178,7 +193,7 @@ def __init__(self, time_series, api=None): if not self.input_fields: self.input_fields = [ \ field_id for field_id, _ in - sorted(self.fields.items(), + sorted(list(self.fields.items()), key=lambda x: x[1].get("column_number"))] self.all_numeric_objectives = time_series_info.get( \ 'all_numeric_objectives') @@ -212,7 +227,7 @@ def forecast(self, input_data=None): """ if not input_data: forecasts = {} - for field_id, value in self._forecast.items(): + for field_id, value in list(self._forecast.items()): forecasts[field_id] = [] for forecast in value: local_forecast = {} @@ -225,14 +240,13 @@ def forecast(self, input_data=None): # Checks and cleans input_data leaving only the fields used as # objective fields in the model - new_data = self.filter_objectives( \ + norm_input_data = self.filter_objectives( \ input_data) - input_data = new_data # filter submodels: filtering the submodels in the time-series # model to be used in the prediction filtered_submodels = {} - for field_id, field_input in input_data.items(): + for field_id, field_input in list(norm_input_data.items()): filter_info = field_input.get("ets_models", {}) if not filter_info: filter_info = DEFAULT_SUBMODEL @@ -240,12 +254,21 @@ def forecast(self, input_data=None): self.ets_models[field_id], filter_info) forecasts = {} - for field_id, submodels in filtered_submodels.items(): + for field_id, submodels in list(filtered_submodels.items()): forecasts[field_id] = compute_forecasts(submodels, \ - input_data[field_id]["horizon"]) + norm_input_data[field_id]["horizon"]) return forecasts + def predict(self, input_data, full=False): + """Method to homogeneize the local models interface for all BigML + models. It returns the forecast method result. + """ + forecast = self.forecast(input_data) + if full: + return {"forecast": forecast} + return forecast + def filter_objectives(self, input_data, full=False): """Filters the keys given in input_data checking against the @@ -258,8 +281,7 @@ def filter_objectives(self, input_data, unused_fields = [] new_input = {} if isinstance(input_data, dict): - - for key, value in input_data.items(): + for key, value in list(input_data.items()): if key not in self.fields: key = self.inverted_fields.get(key, key) if key in self.input_fields: @@ -268,7 +290,7 @@ def filter_objectives(self, input_data, unused_fields.append(key) # raise error if no horizon is provided - for key, value in input_data.items(): + for key, value in list(input_data.items()): value = self.normalize(value) if not isinstance(value, dict): raise ValueError( \ @@ -280,7 +302,7 @@ def filter_objectives(self, input_data, "Each field in input data must contain at" "least a \"horizon\" attribute.") if any(key not in SUBMODEL_KEYS for key in \ - value.get("ets_models", {}).keys()): + list(value.get("ets_models", {}).keys())): raise ValueError( \ "Only %s allowed as keys in each fields submodel" " filter." % ", ".join(SUBMODEL_KEYS)) @@ -288,23 +310,22 @@ def filter_objectives(self, input_data, result = (new_input, unused_fields) if full else \ new_input return result - else: - LOGGER.error("Failed to read input data in the expected" - " {field:value} format.") - return ({}, []) if full else {} + LOGGER.error("Failed to read input data in the expected" + " {field:value} format.") + return ({}, []) if full else {} def python(self, out=sys.stdout): """Generates the code in python that creates the forecasts """ - attributes = [u"l", u"b", u"s", u"phi", u"value", u"slope"] + attributes = ["l", "b", "s", "phi", "value", "slope"] components = {} model_components = {} model_names = [] out.write(utf8(USAGE_DOC % (self.resource_id, self.fields[self.objective_id]["name"]))) - output = [u"COMPONENTS = \\"] - for field_id, models in self.ets_models.items(): + output = ["COMPONENTS = \\"] + for field_id, models in list(self.ets_models.items()): for model in models: final_state = model.get("final_state", {}) attrs = {} @@ -319,12 +340,12 @@ def python(self, out=sys.stdout): field_name = self.fields[field_id]["name"] if field_name not in components: components[field_name] = model_components - partial_output = StringIO.StringIO() + partial_output = io.StringIO() pprint.pprint(components, stream=partial_output) for line in partial_output.getvalue().split("\n"): - output.append(u"%s%s" % (INDENT, line)) + output.append("%s%s" % (INDENT, line)) - out.write(utf8(u"\n".join(output))) + out.write(utf8("\n".join(output))) model_names = list(set(model_names)) if any(name in model_names for name in ["naive", "mean"]): @@ -339,8 +360,8 @@ def python(self, out=sys.stdout): for trend in trends: models_function.append("\"%s\": _%s_forecast" % (trend, trend)) out.write(utf8(SUBMODELS_CODE[trend])) - out.write(utf8(u"\n\nMODELS = \\\n")) + out.write(utf8("\n\nMODELS = \\\n")) out.write(utf8("%s%s%s" % \ - (u" {", u",\n ".join(models_function), u"}"))) + (" {", ",\n ".join(models_function), "}"))) out.write(utf8(FORECAST_FUNCTION)) diff --git a/bigml/topicmodel.py b/bigml/topicmodel.py index 7a3874e0..abc87b5f 100644 --- a/bigml/topicmodel.py +++ b/bigml/topicmodel.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2016-2019 BigML +# Copyright 2016-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -47,9 +46,12 @@ from bigml.api import FINISHED -from bigml.api import get_status, get_api_connection +from bigml.api import get_status, get_api_connection, get_topic_model_id from bigml.basemodel import get_resource_dict from bigml.modelfields import ModelFields +from bigml.util import use_cache, load, dump, dumps, get_data_format, \ + get_formatted_data, format_data, get_data_transformations +from bigml.constants import OUT_NEW_FIELDS, OUT_NEW_HEADERS, INTERNAL LOGGER = logging.getLogger('BigML') @@ -60,23 +62,35 @@ SAMPLES_PER_TOPIC = 128 CODE_TO_NAME = { - "da": u'danish', - "nl": u'dutch', - "en": u'english', - "fi": u'finnish', - "fr": u'french', - "de": u'german', - "hu": u'hungarian', - "it": u'italian', - "nn": u'norwegian', - "pt": u'portuguese', - "ro": u'romanian', - "ru": u'russian', - "es": u'spanish', - "sv": u'swedish', - "tr": u'turkish' + "da": 'danish', + "nl": 'dutch', + "en": 'english', + "fi": 'finnish', + "fr": 'french', + "de": 'german', + "hu": 'hungarian', + "it": 'italian', + "nn": 'norwegian', + "pt": 'portuguese', + "ro": 'romanian', + "ru": 'russian', + "es": 'spanish', + "sv": 'swedish', + "tr": 'turkish' } + +def distribution_to_dict(distribution): + """Returns a dictionary as topic_name: probability for the + topic distribution. + """ + prediction_dict = {} + for topic_info in distribution: + prediction_dict.update({topic_info["name"]: + topic_info["probability"]}) + return prediction_dict + + class TopicModel(ModelFields): """ A lightweight wrapper around a Topic Model. @@ -84,11 +98,22 @@ class TopicModel(ModelFields): to generate topic distributions for input documents locally. """ + #pylint: disable=locally-disabled,c-extension-no-member,invalid-name + def __init__(self, topic_model, api=None, cache_get=None): - def __init__(self, topic_model, api=None): + self.lang = None + self.stemmer = None + if use_cache(cache_get): + # using a cache to store the model attributes + self.__dict__ = load(get_topic_model_id(topic_model), cache_get) + if self.lang in CODE_TO_NAME: + self.stemmer = Stemmer.Stemmer(CODE_TO_NAME[self.lang]) + return self.resource_id = None - self.stemmer = None + self.name = None + self.description = None + self.parent_id = None self.seed = None self.case_sensitive = False self.bigrams = False @@ -97,13 +122,20 @@ def __init__(self, topic_model, api=None): self.phi = None self.term_to_index = None self.topics = [] - self.api = get_api_connection(api) + api = get_api_connection(api) self.resource_id, topic_model = get_resource_dict( \ - topic_model, "topicmodel", api=self.api) + topic_model, "topicmodel", api=api) if 'object' in topic_model and isinstance(topic_model['object'], dict): topic_model = topic_model['object'] + try: + self.parent_id = topic_model.get('dataset') + self.name = topic_model.get("name") + self.description = topic_model.get("description") + except AttributeError: + raise ValueError("Failed to find the expected " + "JSON structure. Check your arguments.") if 'topic_model' in topic_model \ and isinstance(topic_model['topic_model'], dict): @@ -114,9 +146,9 @@ def __init__(self, topic_model, api=None): self.topics = model['topics'] if 'language' in model and model['language'] is not None: - lang = model['language'] - if lang in CODE_TO_NAME: - self.stemmer = Stemmer.Stemmer(CODE_TO_NAME[lang]) + self.lang = model['language'] + if self.lang in CODE_TO_NAME: + self.stemmer = Stemmer.Stemmer(CODE_TO_NAME[self.lang]) self.term_to_index = {self.stem(term): index for index, term in enumerate(model['termset'])} @@ -164,14 +196,14 @@ def distribution(self, input_data): # Checks and cleans input_data leaving the fields used in the model input_data = self.filter_input_data(input_data) - return self.distribution_for_text("\n\n".join(input_data.values())) + return self.distribution_for_text("\n\n".join(list(input_data.values()))) def distribution_for_text(self, text): """Returns the topic distribution of the given `text`, which can either be a string or a list of strings """ - if isinstance(text, (str, unicode)): + if isinstance(text, str): astr = text else: # List of strings @@ -189,8 +221,7 @@ def stem(self, term): """ if not self.stemmer: return term - else: - return self.stemmer.stemWord(term) + return self.stemmer.stemWord(term) def append_bigram(self, out_terms, first, second): """Takes two terms and appends the index of their concatenation to the @@ -216,7 +247,7 @@ def tokenize(self, astr): space_was_sep = False saw_char = False - text = unicode(astr) + text = str(astr) index = 0 length = len(text) @@ -264,7 +295,7 @@ def next_char(text, index): last_term = term_out - if char == " " or char == "\n": + if char in [" ", "\n"]: space_was_sep = True tstem = self.stem(term_out) @@ -368,3 +399,77 @@ def infer(self, list_of_indices): return [(sample_counts[k] + self.alpha) / normalizer for k in range(self.ntopics)] + + def predict(self, input_data, full=False): + """Method to homogeneize the local models interface for all BigML + models. It returns the distribution method result. + """ + distribution = self.distribution(input_data) + if full: + return distribution_to_dict(distribution) + return distribution + + def batch_predict(self, input_data_list, outputs=None, **kwargs): + """Creates a batch prediction for a list of inputs using the local + supervised model. Allows to define some output settings to + decide the fields to be added to the input_data (prediction, + probability, etc.) and the name that we want to assign to these new + fields. The outputs argument accepts a dictionary with keys + "output_fields", to contain a list of the prediction properties to add + (["prediction", "probability"] by default) and "output_headers", to + contain a list of the headers to be used when adding them (identical + to "output_fields" list, by default). + + :param input_data_list: List of input data to be predicted + :type input_data_list: list or Panda's dataframe + :param dict outputs: properties that define the headers and fields to + be added to the input data + :return: the list of input data plus the predicted values + :rtype: list or Panda's dataframe depending on the input type in + input_data_list + + """ + if outputs is None: + outputs = {} + new_fields = outputs.get(OUT_NEW_FIELDS, [topic["name"] for topic + in self.topics]) + new_headers = outputs.get(OUT_NEW_HEADERS, new_fields) + if len(new_fields) > len(new_headers): + new_headers.expand(new_fields[len(new_headers):]) + else: + new_headers = new_headers[0: len(new_fields)] + data_format = get_data_format(input_data_list) + inner_data_list = get_formatted_data(input_data_list, INTERNAL) + for index, input_data in enumerate(inner_data_list): + prediction = self.distribution(input_data, **kwargs) + prediction_dict = distribution_to_dict(prediction) + for ikey, key in enumerate(new_fields): + inner_data_list[index][new_headers[ikey]] = prediction_dict[ + key] + if data_format != INTERNAL: + return format_data(inner_data_list, out_format=data_format) + return inner_data_list + + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + Avoiding to set it in a Mixin to maintain the current dump function. + """ + return get_data_transformations(self.resource_id, self.parent_id) + + def dump(self, output=None, cache_set=None): + """Uses msgpack to serialize the resource object + If cache_set is filled with a cache set method, the method is called + + """ + self_vars = vars(self).copy() + del self_vars["stemmer"] + dump(self_vars, output=output, cache_set=cache_set) + + def dumps(self): + """Uses msgpack to serialize the resource object to a string + + """ + self_vars = vars(self).copy() + del self_vars["stemmer"] + dumps(self_vars) diff --git a/bigml/tree.py b/bigml/tree.py deleted file mode 100644 index 6609ddaf..00000000 --- a/bigml/tree.py +++ /dev/null @@ -1,974 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2013-2019 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -"""Tree structure for the BigML local Model - -This module defines an auxiliary Tree structure that is used in the local Model -to make predictions locally or embedded into your application without needing -to send requests to BigML.io. - -""" -import keyword -import numbers -import math - -try: - from scipy import stats -except ImportError: - pass - -from bigml.predicate import Predicate -from bigml.prediction import Prediction -from bigml.predicate import TM_TOKENS, TM_FULL_TERM, TM_ALL -from bigml.util import sort_fields, slugify, split, utf8, PRECISION -from bigml.multivote import ws_confidence, merge_distributions, merge_bins -from bigml.multivote import BINS_LIMIT -from bigml.tree_utils import tableau_string, filter_nodes, missing_branch, \ - none_value, one_branch -from bigml.tree_utils import PYTHON_OPERATOR, MAX_ARGS_LENGTH, INDENT, \ - TERM_OPTIONS, ITEM_OPTIONS - -MISSING_OPERATOR = { - "=": "is", - "!=": "is not" -} - -T_MISSING_OPERATOR = { - "=": "ISNULL(", - "!=": "NOT ISNULL(" -} - -LAST_PREDICTION = 0 -PROPORTIONAL = 1 - -DISTRIBUTION_GROUPS = ['bins', 'counts', 'categories'] - - -def get_instances(distribution): - """Returns the total number of instances in a distribution - - """ - return sum(x[1] for x in distribution) if distribution else 0 - - -def mean(distribution): - """Computes the mean of a distribution in the [[point, instances]] syntax - - """ - addition = 0.0 - count = 0.0 - for point, instances in distribution: - addition += point * instances - count += instances - if count > 0: - return addition / count - return float('nan') - - -def unbiased_sample_variance(distribution, distribution_mean=None): - """Computes the standard deviation of a distribution in the - [[point, instances]] syntax - - """ - addition = 0.0 - count = 0.0 - if (distribution_mean is None or not - isinstance(distribution_mean, numbers.Number)): - distribution_mean = mean(distribution) - for point, instances in distribution: - addition += ((point - distribution_mean) ** 2) * instances - count += instances - if count > 1: - return addition / (count - 1) - return float('nan') - - -def regression_error(distribution_variance, population, r_z=1.96): - """Computes the variance error - - """ - if population > 0: - chi_distribution = stats.chi2(population) - ppf = chi_distribution.ppf(1 - math.erf(r_z / math.sqrt(2))) - if ppf != 0: - error = distribution_variance * (population - 1) / ppf - error = error * ((math.sqrt(population) + r_z) ** 2) - return math.sqrt(error / population) - return float('nan') - - -def extract_distribution(summary): - """Extracts the distribution info from the objective_summary structure - in any of its grouping units: bins, counts or categories - - """ - for group in DISTRIBUTION_GROUPS: - if group in summary: - return group, summary.get(group) - - -def dist_median(distribution, count): - """Returns the median value for a distribution - - """ - counter = 0 - previous_value = None - for value, instances in distribution: - counter += instances - if counter > count / 2.0: - if (not count % 2 and (counter - 1) == (count / 2) and - previous_value is not None): - return (value + previous_value) / 2.0 - return value - previous_value = value - return None - - -class Tree(object): - """A tree-like predictive model. - - """ - def __init__(self, tree, fields, objective_field=None, - root_distribution=None, parent_id=None, ids_map=None, - subtree=True, tree_info=None): - - self.fields = fields - self.objective_id = objective_field - self.output = tree['output'] - - if tree['predicate'] is True: - self.predicate = True - else: - self.predicate = Predicate( - tree['predicate']['operator'], - tree['predicate']['field'], - tree['predicate']['value'], - tree['predicate'].get('term', None)) - if 'id' in tree: - self.id = tree['id'] - self.parent_id = parent_id - if isinstance(ids_map, dict): - ids_map[self.id] = self - else: - self.id = None - - children = [] - if 'children' in tree: - for child in tree['children']: - children.append(self.__class__( \ - child, - self.fields, - objective_field=objective_field, - parent_id=self.id, - ids_map=ids_map, - subtree=subtree, - tree_info=tree_info)) - - self.children = children - self.regression = self.is_regression() - tree_info['regression'] = (self.regression and - tree_info.get('regression', True)) - self.count = tree['count'] - self.confidence = tree.get('confidence', None) - self.distribution = None - self.max = None - self.min = None - self.weighted = False - summary = None - if 'distribution' in tree: - self.distribution = tree['distribution'] - elif 'objective_summary' in tree: - summary = tree['objective_summary'] - (self.distribution_unit, - self.distribution) = extract_distribution(summary) - if 'weighted_objective_summary' in tree: - summary = tree['weighted_objective_summary'] - (self.weighted_distribution_unit, - self.weighted_distribution) = extract_distribution(summary) - self.weight = tree['weight'] - self.weighted = True - else: - summary = root_distribution - (self.distribution_unit, - self.distribution) = extract_distribution(summary) - if self.regression: - tree_info['max_bins'] = max(tree_info.get('max_bins', 0), - len(self.distribution)) - self.median = None - if summary: - self.median = summary.get('median') - if not self.median: - self.median = dist_median(self.distribution, self.count) - self.max = summary.get('maximum') or \ - max([value for [value, _] in self.distribution]) - self.min = summary.get('minimum') or \ - min([value for [value, _] in self.distribution]) - self.impurity = None - if not self.regression and self.distribution is not None: - self.impurity = self.gini_impurity() - - def gini_impurity(self): - """Returns the gini impurity score associated to the distribution - in the node - - """ - purity = 0.0 - if self.distribution is None: - return None - for _, instances in self.distribution: - purity += math.pow(instances / float(self.count), 2) - return 1.0 - purity - - def list_fields(self, out): - """Lists a description of the model's fields. - - """ - out.write(utf8(u'<%-32s : %s>\n' % ( - self.fields[self.objective_id]['name'], - self.fields[self.objective_id]['optype']))) - out.flush() - - for field in [(val['name'], val['optype']) for key, val in - sort_fields(self.fields) - if key != self.objective_id]: - out.write(utf8(u'[%-32s : %s]\n' % (field[0], field[1]))) - out.flush() - return self.fields - - def is_regression(self): - """Checks if the subtree structure can be a regression - - """ - def is_classification(node): - """Checks if the node's value is a category - - """ - return isinstance(node.output, basestring) - - classification = is_classification(self) - if classification: - return False - if not self.children: - return True - else: - return not any([is_classification(child) - for child in self.children]) - - def get_leaves(self, path=None, filter_function=None): - """Returns a list that includes all the leaves of the tree. - - """ - leaves = [] - if path is None: - path = [] - if not isinstance(self.predicate, bool): - path.append(self.predicate.to_lisp_rule(self.fields)) - - if self.children: - for child in self.children: - leaves += child.get_leaves(path=path[:], - filter_function=filter_function) - else: - leaf = { - 'id': self.id, - 'confidence': self.confidence, - 'count': self.count, - 'distribution': self.distribution, - 'impurity': self.impurity, - 'output': self.output, - 'path': path} - if hasattr(self, 'weighted_distribution'): - leaf.update( \ - {"weighted_distribution": self.weighted_distribution, - "weight": self.weight}) - if (not hasattr(filter_function, '__call__') - or filter_function(leaf)): - leaves += [leaf] - return leaves - - def predict(self, input_data, path=None, missing_strategy=LAST_PREDICTION): - """Makes a prediction based on a number of field values. - - The input fields must be keyed by Id. There are two possible - strategies to predict when the value for the splitting field - is missing: - 0 - LAST_PREDICTION: the last issued prediction is returned. - 1 - PROPORTIONAL: as we cannot choose between the two branches - in the tree that stem from this split, we consider both. The - algorithm goes on until the final leaves are reached and - all their predictions are used to decide the final prediction. - """ - - if path is None: - path = [] - if missing_strategy == PROPORTIONAL: - (final_distribution, - d_min, - d_max, - last_node, - population, - parent_node) = self.predict_proportional(input_data, path=path) - - if self.regression: - # singular case: - # when the prediction is the one given in a 1-instance node - if len(final_distribution.items()) == 1: - prediction, instances = final_distribution.items()[0] - if instances == 1: - return Prediction( - last_node.output, - path, - last_node.confidence, - distribution=(last_node.distribution if not \ - self.weighted else \ - last_node.weighted_distribution), - count=instances, - median=last_node.median, - distribution_unit=last_node.distribution_unit, - children=last_node.children, - d_min=last_node.min, - d_max=last_node.max) - # when there's more instances, sort elements by their mean - distribution = [list(element) for element in - sorted(final_distribution.items(), - key=lambda x: x[0])] - distribution_unit = ('bins' if len(distribution) > BINS_LIMIT - else 'counts') - distribution = merge_bins(distribution, BINS_LIMIT) - total_instances = sum([instances - for _, instances in distribution]) - if len(distribution) == 1: - # where there's only one bin, there will be no error, but - # we use a correction derived from the parent's error - prediction = distribution[0][0] - if total_instances < 2: - total_instances = 1 - try: - # some strange models can have nodes with no confidence - confidence = round(parent_node.confidence / - math.sqrt(total_instances), - PRECISION) - except AttributeError: - confidence = None - else: - prediction = mean(distribution) - confidence = round(regression_error( - unbiased_sample_variance(distribution, prediction), - total_instances), PRECISION) - return Prediction( - prediction, - path, - confidence, - distribution=distribution, - count=total_instances, - median=dist_median(distribution, total_instances), - distribution_unit=distribution_unit, - children=last_node.children, - d_min=d_min, - d_max=d_max) - else: - distribution = [list(element) for element in - sorted(final_distribution.items(), - key=lambda x: (-x[1], x[0]))] - return Prediction( - distribution[0][0], - path, - ws_confidence(distribution[0][0], final_distribution, - ws_n=population), - distribution=distribution, - count=population, - median=None, - distribution_unit='categorical', - children=last_node.children) - - else: - if self.children: - for child in self.children: - if child.predicate.apply(input_data, self.fields): - path.append(child.predicate.to_rule(self.fields)) - return child.predict(input_data, path=path) - - if self.weighted: - output_distribution = self.weighted_distribution - output_unit = self.weighted_distribution_unit - else: - output_distribution = self.distribution - output_unit = self.distribution_unit - - return Prediction( - self.output, - path, - self.confidence, - distribution=output_distribution, - count=get_instances(output_distribution), - median=None if not self.regression else self.median, - distribution_unit=output_unit, - children=self.children, - d_min=None if not self.regression else self.min, - d_max=None if not self.regression else self.max) - - def predict_proportional(self, input_data, path=None, - missing_found=False, median=False, parent=None): - """Makes a prediction based on a number of field values averaging - the predictions of the leaves that fall in a subtree. - - Each time a splitting field has no value assigned, we consider - both branches of the split to be true, merging their - predictions. The function returns the merged distribution and the - last node reached by a unique path. - - """ - - if path is None: - path = [] - - final_distribution = {} - if not self.children: - distribution = self.distribution if not self.weighted else \ - self.weighted_distribution - return (merge_distributions({}, dict((x[0], x[1]) - for x in distribution)), - self.min, self.max, self, self.count, parent) - if one_branch(self.children, input_data) or \ - self.fields[split(self.children)]["optype"] in \ - ["text", "items"]: - for child in self.children: - if child.predicate.apply(input_data, self.fields): - new_rule = child.predicate.to_rule(self.fields) - if new_rule not in path and not missing_found: - path.append(new_rule) - return child.predict_proportional(input_data, path, - missing_found, median, - parent=self) - else: - # missing value found, the unique path stops - missing_found = True - minimums = [] - maximums = [] - population = 0 - for child in self.children: - (subtree_distribution, subtree_min, - subtree_max, _, subtree_pop, _) = \ - child.predict_proportional(input_data, path, - missing_found, median, - parent=self) - if subtree_min is not None: - minimums.append(subtree_min) - if subtree_max is not None: - maximums.append(subtree_max) - population += subtree_pop - final_distribution = merge_distributions( - final_distribution, subtree_distribution) - return (final_distribution, - min(minimums) if minimums else None, - max(maximums) if maximums else None, self, population, - self) - - def generate_rules(self, depth=0, ids_path=None, subtree=True): - """Translates a tree model into a set of IF-THEN rules. - - """ - rules = u"" - children = filter_nodes(self.children, ids=ids_path, - subtree=subtree) - if children: - for child in children: - rules += (u"%s IF %s %s\n" % - (INDENT * depth, - child.predicate.to_rule(self.fields, 'slug'), - "AND" if child.children else "THEN")) - rules += child.generate_rules(depth + 1, ids_path=ids_path, - subtree=subtree) - else: - rules += (u"%s %s = %s\n" % - (INDENT * depth, - (self.fields[self.objective_id]['slug'] - if self.objective_id else "Prediction"), - self.output)) - return rules - - def rules(self, out, ids_path=None, subtree=True): - """Prints out an IF-THEN rule version of the tree. - - """ - for field in [(key, val) for key, val in sort_fields(self.fields)]: - - slug = slugify(self.fields[field[0]]['name']) - self.fields[field[0]].update(slug=slug) - out.write(utf8(self.generate_rules(ids_path=ids_path, - subtree=subtree))) - out.flush() - - def python_body(self, depth=1, cmv=None, input_map=False, - ids_path=None, subtree=True): - """Translate the model into a set of "if" python statements. - - `depth` controls the size of indentation. As soon as a value is missing - that node is returned without further evaluation. - - """ - - def map_data(field, missing=False): - """Returns the subject of the condition in map format when - more than MAX_ARGS_LENGTH arguments are used. - """ - if input_map: - if missing: - return "data.get('%s')" % field - else: - return "data['%s']" % field - return field - if cmv is None: - cmv = [] - body = u"" - term_analysis_fields = [] - item_analysis_fields = [] - children = filter_nodes(self.children, ids=ids_path, - subtree=subtree) - if children: - field = split(children) - has_missing_branch = (missing_branch(children) or - none_value(children)) - # the missing is singled out as a special case only when there's - # no missing branch in the children list - if not has_missing_branch and \ - self.fields[field]["optype"] not in ["text", "items"] and \ - self.fields[field]['slug'] not in cmv: - body += (u"%sif (%s is None):\n" % - (INDENT * depth, - map_data(self.fields[field]['slug'], True))) - if self.fields[self.objective_id]['optype'] == 'numeric': - value = self.output - else: - value = repr(self.output) - body += (u"%sreturn %s\n" % - (INDENT * (depth + 1), - value)) - cmv.append(self.fields[field]['slug']) - - for child in children: - field = child.predicate.field - pre_condition = u"" - if has_missing_branch and child.predicate.value is not None: - negation = u"" if child.predicate.missing else u" not" - connection = u"or" if child.predicate.missing else u"and" - pre_condition = ( - u"%s is%s None %s " % ( - map_data(self.fields[field]['slug'], True), - negation, - connection)) - if not child.predicate.missing: - cmv.append(self.fields[field]['slug']) - optype = self.fields[field]['optype'] - if (optype == 'numeric' or optype == 'text' or - optype == 'items' - or child.predicate.value is None): - value = child.predicate.value - else: - value = repr(child.predicate.value) - if optype == 'text' or optype == 'items': - if optype == 'text': - term_analysis_fields.append((field, - child.predicate.term)) - matching_function = "term_matches" - else: - item_analysis_fields.append((field, - child.predicate.term)) - matching_function = "item_matches" - - body += ( - u"%sif (%s%s(%s, \"%s\", %s\"%s\") %s %s):" - u"\n" % - (INDENT * depth, pre_condition, matching_function, - map_data(self.fields[field]['slug'], - False), - self.fields[field]['slug'], - ('u' if isinstance(child.predicate.term, unicode) - else ''), - child.predicate.term.replace("\"", "\\\""), - PYTHON_OPERATOR[child.predicate.operator], - value)) - else: - operator = (MISSING_OPERATOR[child.predicate.operator] if - child.predicate.value is None else - PYTHON_OPERATOR[child.predicate.operator]) - if child.predicate.value is None: - cmv.append(self.fields[field]['slug']) - body += ( - u"%sif (%s%s %s %s):\n" % - (INDENT * depth, pre_condition, - map_data(self.fields[field]['slug'], - False), - operator, - value)) - next_level = child.python_body(depth + 1, cmv=cmv[:], - input_map=input_map, - ids_path=ids_path, - subtree=subtree) - body += next_level[0] - term_analysis_fields.extend(next_level[1]) - item_analysis_fields.extend(next_level[2]) - else: - if self.fields[self.objective_id]['optype'] == 'numeric': - value = self.output - else: - value = repr(self.output) - body = u"%sreturn %s\n" % (INDENT * depth, value) - - return body, term_analysis_fields, item_analysis_fields - - def python(self, out, docstring, input_map=False, - ids_path=None, subtree=True): - """Writes a python function that implements the model. - - """ - args = [] - parameters = sort_fields(self.fields) - if not input_map: - input_map = len(parameters) > MAX_ARGS_LENGTH - reserved_keywords = keyword.kwlist if not input_map else None - prefix = "_" if not input_map else "" - for field in [(key, val) for key, val in parameters]: - slug = slugify(self.fields[field[0]]['name'], - reserved_keywords=reserved_keywords, prefix=prefix) - self.fields[field[0]].update(slug=slug) - if not input_map: - if field[0] != self.objective_id: - args.append("%s=None" % (slug)) - if input_map: - args.append("data={}") - predictor_definition = (u"def predict_%s" % - self.fields[self.objective_id]['slug']) - depth = len(predictor_definition) + 1 - predictor = u"%s(%s):\n" % ( - predictor_definition, - (",\n" + " " * depth).join(args)) - predictor_doc = (INDENT + u"\"\"\" " + docstring + - u"\n" + INDENT + u"\"\"\"\n") - body, term_analysis_predicates, item_analysis_predicates = \ - self.python_body(input_map=input_map, - ids_path=ids_path, - subtree=subtree) - terms_body = u"" - if term_analysis_predicates or item_analysis_predicates: - terms_body = self.term_analysis_body(term_analysis_predicates, - item_analysis_predicates) - predictor += predictor_doc + terms_body + body - out.write(utf8(predictor)) - out.flush() - - def term_analysis_body(self, term_analysis_predicates, - item_analysis_predicates): - """ Writes auxiliary functions to handle the term and item - analysis fields - - """ - body = u"" - # static content - body += """ - import re - - tm_tokens = '%s' - tm_full_term = '%s' - tm_all = '%s' - -""" % (TM_TOKENS, TM_FULL_TERM, TM_ALL) - if term_analysis_predicates: - body += """ - def term_matches(text, field_name, term): - \"\"\" Counts the number of occurences of term and its variants in text - - \"\"\" - if text is None: - text = "" - forms_list = term_forms[field_name].get(term, [term]) - options = term_analysis[field_name] - token_mode = options.get('token_mode', tm_tokens) - case_sensitive = options.get('case_sensitive', False) - first_term = forms_list[0] - if token_mode == tm_full_term: - return full_term_match(text, first_term, case_sensitive) - else: - # In token_mode='all' we will match full terms using equals and - # tokens using contains - if token_mode == tm_all and len(forms_list) == 1: - pattern = re.compile(r'^.+\\b.+$', re.U) - if re.match(pattern, first_term): - return full_term_match(text, first_term, case_sensitive) - return term_matches_tokens(text, forms_list, case_sensitive) - - - def full_term_match(text, full_term, case_sensitive): - \"\"\"Counts the match for full terms according to the case_sensitive - option - - \"\"\" - if not case_sensitive: - text = text.lower() - full_term = full_term.lower() - return 1 if text == full_term else 0 - - def get_tokens_flags(case_sensitive): - \"\"\"Returns flags for regular expression matching depending on text - analysis options - - \"\"\" - flags = re.U - if not case_sensitive: - flags = (re.I | flags) - return flags - - - def term_matches_tokens(text, forms_list, case_sensitive): - \"\"\" Counts the number of occurences of the words in forms_list in - the text - - \"\"\" - flags = get_tokens_flags(case_sensitive) - expression = ur'(\\b|_)%s(\\b|_)' % '(\\\\b|_)|(\\\\b|_)'.join(forms_list) - pattern = re.compile(expression, flags=flags) - matches = re.findall(pattern, text) - return len(matches) - -""" - - term_analysis_options = set([predicate[0] for predicate in - term_analysis_predicates]) - term_analysis_predicates = set(term_analysis_predicates) - body += """ - term_analysis = {""" - for field_id in term_analysis_options: - field = self.fields[field_id] - body += """ - \"%s\": {""" % field['slug'] - for option in field['term_analysis']: - if option in TERM_OPTIONS: - body += """ - \"%s\": %s,""" % (option, repr(field['term_analysis'][option])) - body += """ - },""" - body += """ - }""" - term_forms = {} - fields = self.fields - for field_id, term in term_analysis_predicates: - alternatives = [] - field = fields[field_id] - if field['slug'] not in term_forms: - term_forms[field['slug']] = {} - all_forms = field['summary'].get('term_forms', {}) - if all_forms: - alternatives = all_forms.get(term, []) - if alternatives: - terms = [term] - terms.extend(all_forms.get(term, [])) - term_forms[field['slug']][term] = terms - body += """ - term_forms = {""" - for field in term_forms: - body += """ - \"%s\": {""" % field - for term in term_forms[field]: - body += """ - u\"%s\": %s,""" % (term, term_forms[field][term]) - body += """ - }, - """ - body += """ - } -""" - if item_analysis_predicates: - body += """ - def item_matches(text, field_name, item): - \"\"\" Counts the number of occurences of item in text - - \"\"\" - if text is None: - text = "" - options = item_analysis[field_name] - separator = options.get('separator', ' ') - regexp = options.get('separator_regexp') - if regexp is None: - regexp = r\"%s\" % separator - return count_items_matches(text, item, regexp) - - - def count_items_matches(text, item, regexp): - \"\"\" Counts the number of occurences of the item in the text - - \"\"\" - expression = r'(^|%s)%s($|%s)' % (regexp, item, regexp) - pattern = re.compile(expression, flags=re.U) - matches = re.findall(pattern, text) - return len(matches) -""" - - item_analysis_options = set([predicate[0] for predicate in - item_analysis_predicates]) - item_analysis_predicates = set(item_analysis_predicates) - body += """ - item_analysis = {""" - for field_id in item_analysis_options: - field = self.fields[field_id] - body += """ - \"%s\": {""" % field['slug'] - for option in field['item_analysis']: - if option in ITEM_OPTIONS: - body += """ - \"%s\": %s,""" % (option, repr(field['item_analysis'][option])) - body += """ - },""" - body += """ - } -""" - - return body - - def tableau_body(self, body=u"", conditions=None, cmv=None, - ids_path=None, subtree=True): - """Translate the model into a set of "if" statements in Tableau syntax - - `depth` controls the size of indentation. As soon as a value is missing - that node is returned without further evaluation. - - """ - - if cmv is None: - cmv = [] - if body: - alternate = u"ELSEIF" - else: - if conditions is None: - conditions = [] - alternate = u"IF" - - children = filter_nodes(self.children, ids=ids_path, - subtree=subtree) - if children: - field = split(children) - has_missing_branch = (missing_branch(children) or - none_value(children)) - # the missing is singled out as a special case only when there's - # no missing branch in the children list - if (not has_missing_branch and - self.fields[field]['name'] not in cmv): - conditions.append("ISNULL([%s])" % self.fields[field]['name']) - body += (u"%s %s THEN " % - (alternate, " AND ".join(conditions))) - if self.fields[self.objective_id]['optype'] == 'numeric': - value = self.output - else: - value = tableau_string(self.output) - body += (u"%s\n" % value) - cmv.append(self.fields[field]['name']) - alternate = u"ELSEIF" - del conditions[-1] - - for child in children: - pre_condition = u"" - post_condition = u"" - if has_missing_branch and child.predicate.value is not None: - negation = u"" if child.predicate.missing else u"NOT " - connection = u"OR" if child.predicate.missing else u"AND" - pre_condition = ( - u"(%sISNULL([%s]) %s " % ( - negation, self.fields[field]['name'], connection)) - if not child.predicate.missing: - cmv.append(self.fields[field]['name']) - post_condition = u")" - optype = self.fields[child.predicate.field]['optype'] - if child.predicate.value is None: - value = "" - elif optype == 'text' or optype == 'items': - return u"" - elif optype == 'numeric': - value = child.predicate.value - else: - value = repr(child.predicate.value) - - operator = (u"" if child.predicate.value is None else - PYTHON_OPERATOR[child.predicate.operator]) - if child.predicate.value is None: - pre_condition = ( - T_MISSING_OPERATOR[child.predicate.operator]) - post_condition = u")" - - conditions.append("%s[%s]%s%s%s" % ( - pre_condition, - self.fields[child.predicate.field]['name'], - operator, - value, - post_condition)) - body = child.tableau_body(body, conditions[:], cmv=cmv[:], - ids_path=ids_path, subtree=subtree) - del conditions[-1] - else: - if self.fields[self.objective_id]['optype'] == 'numeric': - value = self.output - else: - value = tableau_string(self.output) - body += ( - u"%s %s THEN" % (alternate, " AND ".join(conditions))) - body += u" %s\n" % value - - return body - - def tableau(self, out, ids_path=None, subtree=True): - """Writes a Tableau function that implements the model. - - """ - body = self.tableau_body(ids_path=ids_path, subtree=subtree) - if not body: - return False - out.write(utf8(body)) - out.flush() - return True - - def get_nodes_info(self, headers=None, leaves_only=False): - """Yields the information associated to each of the tree nodes - - """ - row = [] - if not self.regression: - category_dict = dict(self.distribution) - for header in headers: - if header == self.fields[self.objective_id]['name']: - row.append(self.output) - continue - if header in ['confidence', 'error']: - row.append(self.confidence) - continue - if header == 'impurity': - row.append(self.impurity) - continue - if self.regression and header.startswith('bin'): - for bin_value, bin_instances in self.distribution: - row.append(bin_value) - row.append(bin_instances) - break - if not self.regression: - row.append(category_dict.get(header)) - while len(row) < len(headers): - row.append(None) - if not leaves_only or not self.children: - yield row - - if self.children: - for child in self.children: - for row in child.get_nodes_info(headers, - leaves_only=leaves_only): - yield row diff --git a/bigml/tree_utils.py b/bigml/tree_utils.py index 7d0c0d4a..ed033dbf 100644 --- a/bigml/tree_utils.py +++ b/bigml/tree_utils.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2017-2019 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -25,10 +24,13 @@ import locale import sys -from urlparse import urlparse +from urllib.parse import urlparse +from functools import reduce from unidecode import unidecode -from bigml.util import split + +from bigml.predicate_utils.utils import LT, LE, EQ, NE, GE, GT, IN +from bigml.util import asciify DEFAULT_LOCALE = 'en_US.UTF-8' TM_TOKENS = 'tokens_only' @@ -41,20 +43,19 @@ MAX_ARGS_LENGTH = 10 -INDENT = u' ' +INDENT = ' ' # Map operator str to its corresponding python operator PYTHON_OPERATOR = { - "<": "<", - "<=": "<=", - "=": "==", - "!=": "!=", - "/=": "!=", - ">=": ">=", - ">": ">" + LT: "<", + LE: "<=", + EQ: "==", + NE: "!=", + GE: ">=", + GT: ">", + IN: "in" } - # reserved keywords CS_KEYWORDS = [ @@ -143,11 +144,38 @@ ] +def add_distribution(model): + """Adding the distribution attribute + + """ + summary = model.fields[model.objective_id]['summary'] + if 'bins' in summary: + distribution = summary['bins'] + elif 'counts' in summary: + distribution = summary['counts'] + elif 'categories' in summary: + distribution = summary['categories'] + else: + distribution = [] + model.distribution = distribution + + +def split(children): + """Returns the field that is used by the node to make a decision. + + """ + field = {child.predicate.field for child in children} + + if len(field) == 1: + return field.pop() + return None + + def java_string(text): """Transforms string output for java, cs, and objective-c code """ - text = "%s" % text + text = f"{text}" return text.replace(""", "\"").replace("\"", "\\\"") @@ -163,7 +191,7 @@ def ruby_string(text): """ out = python_string(text) - if isinstance(text, unicode): + if isinstance(text, str): return out[1:] return out @@ -173,11 +201,11 @@ def sort_fields(fields): """ fathers = [(key, val) for key, val in - sorted(fields.items(), + sorted(list(fields.items()), key=lambda k: k[1]['column_number']) if not 'auto_generated' in val] children = [(key, val) for key, val in - sorted(fields.items(), + sorted(list(fields.items()), key=lambda k: k[1]['column_number']) if 'auto_generated' in val] children.reverse() @@ -199,14 +227,12 @@ def slugify(name, reserved_keywords=None, prefix=''): """Translates a field name into a variable name. """ - if len(name) == 0: - # case of empty name? - return name - - name = unidecode(name).lower() - name = re.sub(r'\W+', '_', name) - if name[0].isdigit(): - name = "field_" + name + name = asciify(name) + try: + if name[0].isdigit(): + name = "field_" + name + except IndexError: + name = "unnamed_field" if reserved_keywords: if name in reserved_keywords: name = prefix + name @@ -215,9 +241,9 @@ def slugify(name, reserved_keywords=None, prefix=''): def plural(text, num): """Pluralizer: adds "s" at the end of a string if a given number is > 1 - """ - return "%s%s" % (text, "s"[num == 1:]) + suffix = "s"[num == 1:] + return f"{text}{suffix}" def prefix_as_comment(comment_prefix, text): @@ -300,12 +326,12 @@ def docstring_comment(model): """Returns the docstring describing the model. """ - docstring = (u"Predictor for %s from %s" % ( - model.tree.fields[model.tree.objective_id]['name'], - model.resource_id)) - model.description = (unicode( \ + name = model.fields[model.objective_id]['name'] + resource_id = model.resource_id + docstring = f"Predictor for {name} from {resource_id}" + model.description = (str( \ model.description).strip() \ - or u'Predictive model by BigML - Machine Learning Made Easy') + or 'Predictive model by BigML - Machine Learning Made Easy') return docstring @@ -314,19 +340,20 @@ def java_class_definition(model): """ docstring = model.java_comment() - field_obj = model.tree.fields[model.tree.objective_id] + field_obj = model.fields[model.objective_id] if not 'CamelCase' in field_obj: field_obj['CamelCase'] = to_camel_java(field_obj['name'], False) + description = model.description.replace('\n', '\n * ') + field_camelcase = field_obj['CamelCase'] output = \ -u""" +f""" /** -* %s -* %s +* {docstring} +* {description} */ -public class %s { -""" % (docstring, - model.description.replace('\n', '\n * '), - field_obj['CamelCase']) +public class {field_camelcase} +""" + output += "{" return output @@ -339,30 +366,32 @@ def signature_name_vb(text, model): obj_field_for_name = to_camel_vb(text, False).replace("V_", "") obj_field_for_name = obj_field_for_name.title() header = "" + name = model.fields[model.objective_id]['name'] + resource_id = model.resource_id + description = model.description if model.description else \ + default_description if model: - header = u""" + header = f""" ' -' Predictor for %s from %s -' %s +' Predictor for {name} from {resource_id} +' {description} ' -""" % (model.tree.fields[model.tree.objective_id]['name'], - model.resource_id, - model.description if model.description else default_description) - return ("Predict{0}".format(obj_field_for_name), header) +""" + return (f"Predict{obj_field_for_name}", header) def localize(number): """Localizes `number` to show commas appropriately. """ - return locale.format("%d", number, grouping=True) + return locale.format_string("%d", number, grouping=True) def is_url(value): """Returns True if value is a valid URL. """ - url = isinstance(value, basestring) and urlparse(value) + url = isinstance(value, str) and urlparse(value) return url and url.scheme and url.netloc and url.path @@ -372,18 +401,19 @@ def print_distribution(distribution, out=sys.stdout): """ total = reduce(lambda x, y: x + y, [group[1] for group in distribution]) - output = u"" + output = "" for group in distribution: - output += u" %s: %.2f%% (%d instance%s)\n" % ( \ - group[0], - round(group[1] * 1.0 / total, 4) * 100, - group[1], - "" if group[1] == 1 else "s") + substr1 = group[0] + substr2 = round(group[1] * 1.0 / total, 4) * 100 + substr3 = group[1] + substr4 = "" if group[1] == 1 else "s" + output += (f" {substr1}: {substr2:.2f}% ({substr3} " + f"instance{substr4})\n") out.write(output) out.flush() -def filter_nodes(nodes_list, ids=None, subtree=True): +def old_filter_nodes(nodes_list, ids=None, subtree=True): """Filters the contents of a nodes_list. If any of the nodes is in the ids list, the rest of nodes are removed. If none is in the ids list we include or exclude the nodes depending on the subtree flag. @@ -406,14 +436,14 @@ def missing_branch(children): """Checks if the missing values are assigned to a special branch """ - return any([child.predicate.missing for child in children]) + return any(child.predicate.missing for child in children) def none_value(children): """Checks if the predicate has a None value """ - return any([child.predicate.value is None for child in children]) + return any(child.predicate.value is None for child in children) def one_branch(children, input_data): @@ -430,6 +460,6 @@ def tableau_string(text): """ value = repr(text) - if isinstance(text, unicode): + if isinstance(text, str): return value[1:] return value diff --git a/bigml/tsoutconstants.py b/bigml/tsoutconstants.py index 4287a649..7903a6f6 100644 --- a/bigml/tsoutconstants.py +++ b/bigml/tsoutconstants.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2017-2019 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -20,7 +19,7 @@ """ SUBMODELS_CODE = {"naive": \ -u""" +""" def _naive_forecast(components, horizon): \"\"\"Computing the forecast for the naive model @@ -30,7 +29,7 @@ def _naive_forecast(components, horizon): """, "mean": \ -u""" +""" def _mean_forecast(components, horizon): \"\"\"Computing the forecast for the mean model @@ -38,7 +37,7 @@ def _mean_forecast(components, horizon): return _trivial_forecast(submodel, horizon) """, "drift": \ -u""" +""" def _drift_forecast(components, horizon): \"\"\"Computing the forecast for the drift model @@ -50,7 +49,7 @@ def _drift_forecast(components, horizon): return points """, "N": \ -u""" +""" def _N_forecast(components, horizon, seasonality): \"\"\"Computing the forecast for the trend=N models @@ -68,7 +67,7 @@ def _N_forecast(components, horizon, seasonality): return points """, "A": \ -u""" +""" def _A_forecast(components, horizon, seasonality): \"\"\"Computing the forecast for the trend=A models @@ -87,7 +86,7 @@ def _A_forecast(components, horizon, seasonality): return points """, "Ad": \ -u""" +""" def _Ad_forecast(components, horizon, seasonality): \"\"\"Computing the forecast for the trend=Ad model @@ -112,7 +111,7 @@ def _Ad_forecast(components, horizon, seasonality): return points """, "M": \ -u""" +""" def _M_forecast(components, horizon, seasonality): \"\"\"Computing the forecast for the trend=M model @@ -131,7 +130,7 @@ def _M_forecast(components, horizon, seasonality): return points """, "Md": \ -u""" +""" def _Md_forecast(components, horizon, seasonality): \"\"\"Computing the forecast for the trend=Md model @@ -158,7 +157,7 @@ def _Md_forecast(components, horizon, seasonality): """} TRIVIAL_MODEL = \ -u""" +""" def _trivial_forecast(components, horizon): \"\"\"Computing the forecast for the trivial models @@ -203,7 +202,7 @@ def season_contribution(s_list, step): """ FORECAST_FUNCTION = \ -u""" +""" def forecast(field, model_name, horizon=50): \"\"\"Forecast using the user-given model type and horizon @@ -221,7 +220,7 @@ def forecast(field, model_name, horizon=50): """ USAGE_DOC = \ -u"""\"\"\"Local forecast for BigML's Time Series %s. +"""\"\"\"Local forecast for BigML's Time Series %s. Time Series Forecast by BigML - Machine Learning Made Easy diff --git a/bigml/tssubmodels.py b/bigml/tssubmodels.py index dc31f967..1e055af8 100644 --- a/bigml/tssubmodels.py +++ b/bigml/tssubmodels.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=invalid-name # -# Copyright 2017-2019 BigML +# Copyright 2017-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -42,8 +42,7 @@ def season_contribution(s_list, step): period = len(s_list) index = abs(- period + 1 + step % period) return s_list[index] - else: - return 0 + return 0 def trivial_forecast(submodel, horizon): @@ -187,7 +186,7 @@ def Md_forecast(submodel, horizon, seasonality): return points -SUBMODELS = dict([\ - (name[0: -9].replace("_", ","), obj) for name, obj in +SUBMODELS = { + name[0: -9].replace("_", ","): obj for name, obj in inspect.getmembers(sys.modules[__name__]) - if inspect.isfunction(obj) and name.endswith('_forecast')]) + if inspect.isfunction(obj) and name.endswith('_forecast')} diff --git a/bigml/util.py b/bigml/util.py index d07bc5f3..df6b5d67 100644 --- a/bigml/util.py +++ b/bigml/util.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2012-2019 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -29,14 +28,21 @@ import random import ast import datetime -from urlparse import urlparse +import logging -import unidecode +from urllib.parse import urlparse +from unidecode import unidecode +import msgpack import bigml.constants as c -PY3 = sys.version > '3' +try: + from pandas import DataFrame + PANDAS_READY = True +except ImportError: + PANDAS_READY = False + DEFAULT_LOCALE = 'en_US.UTF-8' WINDOWS_DEFAULT_LOCALE = 'English' LOCALE_SYNONYMS = { @@ -87,10 +93,10 @@ } PYTHON_TYPE_MAP = { - "categorical": [unicode, str], + "categorical": [str, str], "numeric": [int, float], - "text": [unicode, str], - "items": [unicode, str] + "text": [str, str], + "items": [str, str] } PREDICTIONS_FILE_SUFFIX = '_predictions.csv' @@ -106,6 +112,8 @@ DFT_STORAGE = "./storage" DFT_STORAGE_FILE = os.path.join(DFT_STORAGE, "BigML_%s.json") +DECIMAL_DIGITS = 5 + def python_map_type(value): """Maps a BigML type to equivalent Python types. @@ -113,8 +121,7 @@ def python_map_type(value): """ if value in PYTHON_TYPE_MAP: return PYTHON_TYPE_MAP[value] - else: - return [unicode, str] + return [str, str] def invert_dictionary(dictionary, field='name'): @@ -124,50 +131,30 @@ def invert_dictionary(dictionary, field='name'): It does not check whether new keys are duplicated though. """ - return dict([[value[field], key] - for key, value in dictionary.items()]) - - -def slugify(name, reserved_keywords=None, prefix=''): - """Translates a field name into a variable name. - - """ - name = unidecode.unidecode(name).lower() - name = re.sub(r'\W+', '_', name) - try: - if name[0].isdigit(): - name = "field_" + name - except IndexError: - name = "unnamed_field" - if reserved_keywords: - if name in reserved_keywords: - name = prefix + name - return name + return {value[field]: key for key, value in dictionary.items()} def localize(number): """Localizes `number` to show commas appropriately. """ - return locale.format("%d", number, grouping=True) + return locale.format_string("%d", number, grouping=True) def is_url(value): """Returns True if value is a valid URL. """ - url = isinstance(value, basestring) and urlparse(value) + url = isinstance(value, str) and urlparse(value) return url and url.scheme and url.netloc and url.path -def split(children): - """Returns the field that is used by the node to make a decision. +def is_in_progress(resource): + """Returns True if the resource has no error and has not finished yet """ - field = set([child.predicate.field for child in children]) - - if len(field) == 1: - return field.pop() + return resource.get("error") is None \ + and get_status(resource).get("code") != c.FINISHED def markdown_cleanup(text): @@ -206,36 +193,14 @@ def prefix_as_comment(comment_prefix, text): return text.replace('\n', '\n' + comment_prefix) -def sort_fields(fields): - """Sort fields by their column_number but put children after parents. +def utf8(bytes_str): + """Returns utf-8 string for bytes or string objects """ - fathers = [(key, val) for key, val in - sorted(fields.items(), key=lambda k: k[1]['column_number']) - if 'auto_generated' not in val] - children = [(key, val) for key, val in - sorted(fields.items(), key=lambda k: k[1]['column_number']) - if 'auto_generated' in val] - children.reverse() - fathers_keys = [father[0] for father in fathers] - for child in children: - try: - index = fathers_keys.index(child[1]['parent_ids'][0]) - except ValueError: - index = -1 - - if index >= 0: - fathers.insert(index + 1, child) - else: - fathers.append(child) - return fathers - - -def utf8(text): - """Returns text in utf-8 encoding - - """ - return text.encode("utf-8") + try: + return str(bytes_str, 'utf-8') + except TypeError: + return bytes_str def map_type(value): @@ -244,8 +209,7 @@ def map_type(value): """ if value in TYPE_MAP: return TYPE_MAP[value] - else: - return str + return str def locale_synonyms(main_locale, locale_alias): @@ -256,15 +220,13 @@ def locale_synonyms(main_locale, locale_alias): if language_code not in LOCALE_SYNONYMS: return False alternatives = LOCALE_SYNONYMS[language_code] - if isinstance(alternatives[0], basestring): - return main_locale in alternatives and locale_alias in alternatives - else: - result = False - for subgroup in alternatives: - if main_locale in subgroup: - result = locale_alias in subgroup - break - return result + if isinstance(alternatives[0], str): + return locale_alias in alternatives + result = False + for subgroup in alternatives: + result = locale_alias in subgroup + break + return result def bigml_locale(locale_alias): @@ -277,16 +239,15 @@ def bigml_locale(locale_alias): if language_code not in LOCALE_SYNONYMS: return None alternatives = LOCALE_SYNONYMS[language_code] - if isinstance(alternatives[0], basestring): + if isinstance(alternatives[0], str): return (alternatives[0] if locale_alias in alternatives else None) - else: - result = None - for subgroup in alternatives: - if locale_alias in subgroup: - result = subgroup[0] - break - return result + result = None + for subgroup in alternatives: + if locale_alias in subgroup: + result = subgroup[0] + break + return result def find_locale(data_locale=DEFAULT_LOCALE, verbose=False): @@ -334,9 +295,60 @@ def find_locale(data_locale=DEFAULT_LOCALE, verbose=False): new_locale = locale.setlocale(locale.LC_NUMERIC, '') if verbose and not locale_synonyms(data_locale, new_locale): - print ("WARNING: Unable to find %s locale, using %s instead. This " + print(("WARNING: Unable to find %s locale, using %s instead. This " "might alter numeric fields values.\n") % (data_locale, - new_locale) + new_locale)) + + +def asciify(name): + """Translating to ascii and underscores """ + + if len(name) == 0: + # case of empty name? + return name + + name = unidecode(name).lower() + name = re.sub(r'\W+', '_', name) + return name + + +def res_filename(storage_dir, resource_id, extension=None): + """Returns a filename from a resource id""" + basename = asciify(resource_id) + if extension is None: + extension = "" + basename = f"{basename}{extension}" + filename = os.path.join(storage_dir, basename) + return filename + + +def fs_cache_get(storage_dir, minimized=True): + """Returns a function that retrieves a minimized resource from the file + system + """ + extension = ".min" if minimized else "" + def cache_get(resource_id): + filename = res_filename(storage_dir, asciify(resource_id), extension) + if not os.path.exists(filename): + raise ValueError(f"Failed to find the dump file {filename}.") + with open(filename, "rb") as handler: + return handler.read() + + return cache_get + + +def fs_cache_set(storage_dir, minimized=True): + """Returns a function that stores a minimized resource in the file system """ + extension = ".min" if minimized else "" + check_dir(storage_dir) + + def cache_set(resource_id, msg): + filename = res_filename(storage_dir, asciify(resource_id), extension) + with open(filename, "wb") as handler: + handler.write(msg) + return filename + + return cache_set def get_predictions_file_name(model, path): @@ -345,10 +357,8 @@ def get_predictions_file_name(model, path): """ if isinstance(model, dict) and 'resource' in model: model = model['resource'] - return "%s%s%s_%s" % (path, - os.sep, - model.replace("/", "_"), - PREDICTIONS_FILE_SUFFIX) + filename = res_filename(path, model) + return f"{filename}_{PREDICTIONS_FILE_SUFFIX}" def clear_console_line(out=sys.stdout, length=PROGRESS_BAR_WIDTH): @@ -376,13 +386,9 @@ def console_log(message, out=sys.stdout, length=PROGRESS_BAR_WIDTH, :param reset: whether the line has to be reused and cursor reset to the beggining of it """ - if reset: clear_console_line(out=out, length=length) reset_console_line(out=out, length=length) - if (out == sys.stdout and sys.platform == "win32" and sys.stdout.isatty() - and not PY3): - message = message.decode('utf8').encode('850') out.write(message) if reset: reset_console_line(out=out, length=length) @@ -400,8 +406,8 @@ def strip_affixes(value, field): """Strips prefixes and suffixes if present """ - if not isinstance(value, unicode): - value = unicode(value, "utf-8") + if not isinstance(value, str): + value = str(value, "utf-8") if 'prefix' in field and value.startswith(field['prefix']): value = value[len(field['prefix']):] if 'suffix' in field and value.endswith(field['suffix']): @@ -413,7 +419,10 @@ def cast(input_data, fields): """Checks expected type in input data values, strips affixes and casts """ - for (key, value) in input_data.items(): + for (key, value) in list(input_data.items()): + # inputs not in fieldsor empty + if key not in fields or value is None: + continue # strings given as booleans if isinstance(value, bool) and \ fields[key]['optype'] == 'categorical' and \ @@ -430,31 +439,33 @@ def cast(input_data, fields): # converting boolean to the corresponding string input_data.update({key: booleans[str(value)]}) except ValueError: - raise ValueError(u"Mismatch input data type in field " - u"\"%s\" for value %s. String expected" % + raise ValueError("Mismatch input data type in field " + "\"%s\" for value %s. String expected" % (fields[key]['name'], value)) # numerics given as strings elif ( - (fields[key]['optype'] == 'numeric' and - isinstance(value, basestring)) or - (fields[key]['optype'] != 'numeric' and - not isinstance(value, basestring))): + (fields[key]['optype'] == NUMERIC and + isinstance(value, str)) or + (fields[key]['optype'] != NUMERIC and + not isinstance(value, str))): try: - if fields[key]['optype'] == 'numeric': + if fields[key]['optype'] == NUMERIC: value = strip_affixes(value, fields[key]) input_data.update({key: map_type(fields[key] ['optype'])(value)}) except ValueError: - raise ValueError(u"Mismatch input data type in field " - u"\"%s\" for value %s." % + raise ValueError("Mismatch input data type in field " + "\"%s\" for value %s." % (fields[key]['name'], value)) - elif (fields[key]['optype'] == 'numeric' and + elif (fields[key]['optype'] == NUMERIC and isinstance(value, bool)): - raise ValueError(u"Mismatch input data type in field " - u"\"%s\" for value %s. Numeric expected." % + raise ValueError("Mismatch input data type in field " + "\"%s\" for value %s. Numeric expected." % (fields[key]['name'], value)) + if fields[key]['optype'] == NUMERIC and isinstance(value, float): + input_data.update({key: round(value, DECIMAL_DIGITS)}) def check_dir(path): @@ -463,7 +474,7 @@ def check_dir(path): """ if os.path.exists(path): if not os.path.isdir(path): - raise ValueError(u"The given path is not a directory") + raise ValueError("The given path is not a directory") elif len(path) > 0: os.makedirs(path) return path @@ -536,11 +547,12 @@ def is_status_final(resource): """Try whether a resource is in a final state """ + status = {} try: status = get_status(resource) except ValueError: - status['code'] = None - return status['code'] in [c.FINISHED, c.FAULTY] + pass + return status.get('code') in [c.FINISHED, c.FAULTY] def save_json(resource, path): @@ -551,9 +563,10 @@ def save_json(resource, path): resource_json = json.dumps(resource) return save(resource_json, path) except ValueError: - print "The resource has an invalid JSON format" + print("The resource has an invalid JSON format") except IOError: - print "Failed writing resource to %s" % path + print("Failed writing resource to %s" % path) + return None def save(content, path): @@ -580,9 +593,8 @@ def plural(text, num): def get_exponential_wait(wait_time, retry_count): """Computes the exponential wait time used in next request using the base values provided by the user: - - wait_time: starting wait time - - retries: total number of retries - - retries_left: retries left + - wait_time: starting wait time (seconds) + - retries_count: number of retries """ delta = (retry_count ** 2) * wait_time / 2 @@ -594,7 +606,7 @@ def check_no_missing_numerics(input_data, fields, weight_field=None): """Checks whether some numeric fields are missing in the input data """ - for field_id, field in fields.items(): + for field_id, field in list(fields.items()): if (field['optype'] == NUMERIC and (weight_field is None or \ field_id != weight_field) and \ not field_id in input_data): @@ -602,6 +614,7 @@ def check_no_missing_numerics(input_data, fields, weight_field=None): " data must contain values for all numeric" " fields to get a prediction.") +#pylint: disable=locally-disabled,too-many-boolean-expressions def check_no_training_missings(input_data, fields, weight_field=None, objective_id=None): """Checks whether some input fields are missing in the input data @@ -632,3 +645,130 @@ def flatten(inner_array): new_array.append(element) return new_array + + +def use_cache(cache_get): + """Checks whether the user has provided a cache get function to retrieve + local models. + + """ + return cache_get is not None and hasattr(cache_get, '__call__') + + +def dump(local_attrs, output=None, cache_set=None): + """Uses msgpack to serialize the local resource object + If cache_set is filled with a cache set method, the method is called + + """ + if use_cache(cache_set): + dump_string = msgpack.dumps(local_attrs) + cache_set(local_attrs["resource_id"], dump_string) + else: + msgpack.pack(local_attrs, output) + + +def dumps(local_attrs): + """Uses msgpack to serialize the anomaly object to a string + + """ + + return msgpack.dumps(local_attrs) + + +def load(resource_id, cache_get): + """Uses msgpack to load the resource stored by ID + + """ + + return msgpack.loads(cache_get(resource_id)) + + +def filter_by_extension(file_list, extension_list): + """Returns the files that match the given extensions + + """ + return [filename for filename in file_list if + os.path.splitext(filename)[1].replace(".", "").lower() + in extension_list] + + +def infer_field_type(field, value): + """Returns a dictionary containing the name and optype of the objective + field as inferred from the corresponding value + """ + if isinstance(value, str): + optype = "categorical" + elif isinstance(value, list): + optype = "regions" + else: + optype = "numeric" + return {"name": field, "optype": optype} + + +def is_image(filename): + """Checking whether the file is an image based on its extension """ + return os.path.splitext(filename)[1].replace(".", "").lower() \ + in c.IMAGE_EXTENSIONS + + +def get_data_format(input_data_list): + """Returns the format used in input_data_list: DataFrame or + list of dicts. + + """ + if PANDAS_READY and isinstance(input_data_list, DataFrame): + return c.DATAFRAME + if isinstance(input_data_list, list) and (len(input_data_list) == 0 or + isinstance(input_data_list[0], dict)): + return c.INTERNAL + raise ValueError("Data is expected to be provided as a list of " + "dictionaries or Pandas' DataFrame.") + + +#pylint: disable=locally-disabled,comparison-with-itself +def format_data(input_data_list, out_format=None): + """Transforms the input data format to the one expected """ + if out_format == c.DATAFRAME: + input_data_list = DataFrame.from_dict(input_data_list) + elif out_format == c.INTERNAL: + input_data_list = input_data_list.to_dict('records') + # pandas nan, NaN, etc. outputs need to be changed to None + for row in input_data_list: + for key, value in row.items(): + if value != value: + row[key] = None + return input_data_list + + +def get_formatted_data(input_data_list, out_format=None): + """Checks the type of data and transforms if needed """ + current_format = get_data_format(input_data_list) + if current_format != out_format: + inner_data_list = format_data(input_data_list, out_format) + else: + inner_data_list = input_data_list.copy() + return inner_data_list + + +#pylint: disable=locally-disabled,import-outside-toplevel +def get_data_transformations(resource_id, parent_id): + """Returns the pipeline that contains the tranformations and derived + features created from the raw data to the actual resource. + + """ + if parent_id is None: + raise ValueError("Failed to find the dataset information " + "needed to buid the data transformations " + "pipeline.") + from bigml.pipeline.pipeline import BMLPipeline + return BMLPipeline("dt-%s" % resource_id, [parent_id]) + + +def sensenet_logging(): + """Removes warnings unnecessary logging when using sensenet""" + logging.disable(logging.WARNING) + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' + os.environ["TF_USE_LEGACY_KERAS"] = "1" + import tensorflow as tf + tf.autograph.set_verbosity(0) + logging.getLogger("tensorflow").setLevel(logging.ERROR) diff --git a/bigml/version.py b/bigml/version.py index 50f690ce..68512901 100644 --- a/bigml/version.py +++ b/bigml/version.py @@ -1 +1 @@ -__version__ = '4.27.1' +__version__ = '9.8.3' diff --git a/bigml/webhooks.py b/bigml/webhooks.py new file mode 100644 index 00000000..a1f762e5 --- /dev/null +++ b/bigml/webhooks.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Utilities for webhooks + +""" +import json +import hmac + +try: + from hashlib import sha1 +except ImportError: + import sha + sha1 = sha.sha + + +SORTING_SEQUENCE = ["timestamp", "message", "resource", "event"] + + +def dict_to_msg(obj): + """Builds a representation of the dict object in a specific key sequence""" + pair_list = [] + for key in SORTING_SEQUENCE: + pair_list.append("'%s': '%s'" % (key, obj.get(key))) + return "{%s}" % ", ".join(pair_list) + + +def compute_signature(msg, secret, encoding="utf-8"): + """Computes the signature used by BigML when issuing the webhook call""" + return hmac.new( + secret.encode(encoding), + msg=msg.encode(encoding), + digestmod=sha1 + ).hexdigest() + + +def check_signature(request, secret): + """Checks the signature when the webhook has been given one""" + sig_header = request.meta['HTTP_X_BIGML_SIGNATURE'].replace('sha1=', '') + payload = request.body + computed_sig = compute_signature(payload, secret) + if sig_header == computed_sig: + return True + # code for old version of the msg hash + payload = dict_to_msg(json.loads(payload)) + computed_sig = compute_signature(payload, secret) + if sig_header == computed_sig: + return True + return False diff --git a/data/associations/association_set.json b/data/associations/association_set.json index ea830917..f11d8d02 100644 --- a/data/associations/association_set.json +++ b/data/associations/association_set.json @@ -1 +1 @@ -[{"rules": ["000003"], "item": {"count": 16, "complement": false, "field_id": "000000", "name": "hygiene articles"}, "score": 0.01609}] \ No newline at end of file +[{"item": {"complement": false, "count": 16, "field_id": "000000", "name": "hygiene articles"}, "rules": ["000002"], "score": 0.01609}] diff --git a/data/classification_evaluation.json b/data/classification_evaluation.json new file mode 100644 index 00000000..13a12431 --- /dev/null +++ b/data/classification_evaluation.json @@ -0,0 +1 @@ +{"code": 200, "resource": "evaluation/64b5b07f79c6023e9583c16f", "location": "https://bigml.io/andromeda/evaluation/64b5b07f79c6023e9583c16f", "object": {"boosted_ensemble": false, "category": 0, "code": 200, "combiner": null, "configuration": null, "configuration_status": false, "created": "2023-07-17T21:19:59.247000", "creator": "mmartin", "dataset": "dataset/64b5b07a79c602298f37d884", "dataset_status": true, "datasets": [], "deepnet": "", "description": "", "ensemble": "", "evaluations": null, "excluded_fields": [], "fields_map": {"000001": "000001", "000003": "000003", "000004": "000004", "000005": "000005", "000006": "000006", "000007": "000007", "000009": "000009", "00000a": "00000a", "00000c": "00000c", "00000d": "00000d", "000010": "000010", "000011": "000011", "000012": "000012", "000013": "000013"}, "fusion": "", "input_fields": [], "linearregression": "", "locale": "en-US", "logisticregression": "", "max_rows": 134, "missing_strategy": 0, "model": "model/64b5b05079c602298f37d881", "model_status": true, "model_type": 0, "name": "Stdin input vs. Stdin input", "name_options": "512-node, pruned, deterministic order, operating kind=probability", "number_of_models": 1, "objective_field_descriptors": {"000013": {"column_number": 19, "datatype": "string", "name": "Churn", "optype": "categorical", "order": 19, "preferred": true, "term_analysis": {"enabled": true}}}, "objective_fields": ["000013"], "objective_fields_names": ["Churn"], "operating_kind": "probability", "optiml": null, "optiml_status": false, "out_of_bag": false, "performance": 0.81925, "private": true, "project": null, "range": null, "replacement": false, "resource": "evaluation/64b5b07f79c6023e9583c16f", "result": {"class_names": ["False", "True"], "mode": {"accuracy": 0.85075, "average_area_under_pr_curve": 0, "average_area_under_roc_curve": 0, "average_balanced_accuracy": 0.5, "average_f_measure": 0.45968, "average_kendalls_tau_b": 0, "average_ks_statistic": 0, "average_max_phi": 0, "average_phi": 0, "average_precision": 0.42537, "average_recall": 0.5, "average_spearmans_rho": 0, "confusion_matrix": [[114, 0], [20, 0]], "per_class_statistics": [{"accuracy": 0.85075, "balanced_accuracy": 0.5, "class_name": "False", "f_measure": 0.91935, "phi_coefficient": 0, "precision": 0.85075, "present_in_test_data": true, "recall": 1}, {"accuracy": 0.85075, "balanced_accuracy": 0.5, "class_name": "True", "f_measure": 0, "phi_coefficient": 0, "precision": 0, "present_in_test_data": true, "recall": 0}]}, "model": {"accuracy": 0.91791, "average_area_under_pr_curve": 0.90567, "average_area_under_roc_curve": 0.92588, "average_balanced_accuracy": 0.78684, "average_f_measure": 0.81925, "average_kendalls_tau_b": 0.46897, "average_ks_statistic": 0.76491, "average_max_phi": 0.76491, "average_phi": 0.64837, "average_precision": 0.86639, "average_recall": 0.78684, "average_spearmans_rho": 0.5368, "confusion_matrix": [[111, 3], [8, 12]], "per_class_statistics": [{"accuracy": 0.91791, "area_under_pr_curve": 0.9843, "area_under_roc_curve": 0.92588, "balanced_accuracy": 0.78684, "class_name": "False", "f_measure": 0.95279, "gain_curve": [[0, 0, 0.99933], [0.3209, 0.37719, 0.99838], [0.5, 0.57895, 0.99531], [0.52985, 0.60526, 0.99497], [0.6194, 0.71053, 0.99437], [0.67164, 0.76316, 0.99218], [0.69403, 0.78947, 0.98995], [0.79851, 0.90351, 0.98721], [0.81343, 0.92105, 0.98593], [0.82836, 0.9386, 0.98437], [0.85075, 0.96491, 0.97655], [0.85821, 0.96491, 0.9531], [0.87313, 0.96491, 0.92964], [0.88806, 0.97368, 0.42964], [0.89552, 0.98246, 0.28643], [0.91045, 1, 0.17186], [0.91791, 1, 0.14321], [0.92537, 1, 0.09548], [0.93284, 1, 0.06138], [0.96269, 1, 0.04296], [1, 1, null]], "kendalls_tau_b": 0.46897, "ks_statistic": [0.76491, 0.97655], "lift_curve": [[0, 0, 0.99933], [0.3209, 1.17544, 0.99838], [0.5, 1.15789, 0.99531], [0.52985, 1.14233, 0.99497], [0.6194, 1.14711, 0.99437], [0.67164, 1.13626, 0.99218], [0.69403, 1.13752, 0.98995], [0.79851, 1.1315, 0.98721], [0.81343, 1.1323, 0.98593], [0.82836, 1.13308, 0.98437], [0.85075, 1.1342, 0.97655], [0.85821, 1.12433, 0.9531], [0.87313, 1.10511, 0.92964], [0.88806, 1.09642, 0.42964], [0.89552, 1.09708, 0.28643], [0.91045, 1.09836, 0.17186], [0.91791, 1.08943, 0.14321], [0.92537, 1.08065, 0.09548], [0.93284, 1.072, 0.06138], [0.96269, 1.03876, 0.04296], [1, 1, null]], "max_phi": [0.76491, 0.97655], "negative_cdf": [[0, 0, 0.99933], [0.3209, 0, 0.99838], [0.5, 0.05, 0.99531], [0.52985, 0.1, 0.99497], [0.6194, 0.1, 0.99437], [0.67164, 0.15, 0.99218], [0.69403, 0.15, 0.98995], [0.79851, 0.2, 0.98721], [0.81343, 0.2, 0.98593], [0.82836, 0.2, 0.98437], [0.85075, 0.2, 0.97655], [0.85821, 0.25, 0.9531], [0.87313, 0.35, 0.92964], [0.88806, 0.4, 0.42964], [0.89552, 0.4, 0.28643], [0.91045, 0.4, 0.17186], [0.91791, 0.45, 0.14321], [0.92537, 0.5, 0.09548], [0.93284, 0.55, 0.06138], [0.96269, 0.75, 0.04296], [1, 1, null]], "per_threshold_confusion_matrices": [[[114, 20, 0, 0], null], [[114, 15, 5, 0], 0.04296], [[114, 11, 9, 0], 0.06138], [[114, 10, 10, 0], 0.09548], [[114, 9, 11, 0], 0.14321], [[114, 8, 12, 0], 0.17186], [[112, 8, 12, 2], 0.28643], [[111, 8, 12, 3], 0.42964], [[110, 7, 13, 4], 0.92964], [[110, 5, 15, 4], 0.9531], [[110, 4, 16, 4], 0.97655], [[107, 4, 16, 7], 0.98437], [[105, 4, 16, 9], 0.98593], [[103, 4, 16, 11], 0.98721], [[90, 3, 17, 24], 0.98995], [[87, 3, 17, 27], 0.99218], [[81, 2, 18, 33], 0.99437], [[69, 2, 18, 45], 0.99497], [[66, 1, 19, 48], 0.99531], [[43, 0, 20, 71], 0.99838], [[0, 0, 20, 114], 0.99933]], "phi_coefficient": 0.64837, "pr_curve": [[0, 1, 0.99933], [0.37719, 1, 0.99838], [0.57895, 0.98507, 0.99531], [0.60526, 0.97183, 0.99497], [0.71053, 0.9759, 0.99437], [0.76316, 0.96667, 0.99218], [0.78947, 0.96774, 0.98995], [0.90351, 0.96262, 0.98721], [0.92105, 0.9633, 0.98593], [0.9386, 0.96396, 0.98437], [0.96491, 0.96491, 0.97655], [0.96491, 0.95652, 0.9531], [0.96491, 0.94017, 0.92964], [0.97368, 0.93277, 0.42964], [0.98246, 0.93333, 0.28643], [1, 0.93443, 0.17186], [1, 0.92683, 0.14321], [1, 0.91935, 0.09548], [1, 0.912, 0.06138], [1, 0.88372, 0.04296], [1, 0.85075, null]], "precision": 0.93277, "present_in_test_data": true, "recall": 0.97368, "roc_curve": [[0, 0, 0.99933], [0, 0.37719, 0.99838], [0.05, 0.57895, 0.99531], [0.1, 0.60526, 0.99497], [0.1, 0.71053, 0.99437], [0.15, 0.76316, 0.99218], [0.15, 0.78947, 0.98995], [0.2, 0.90351, 0.98721], [0.2, 0.92105, 0.98593], [0.2, 0.9386, 0.98437], [0.2, 0.96491, 0.97655], [0.25, 0.96491, 0.9531], [0.35, 0.96491, 0.92964], [0.4, 0.97368, 0.42964], [0.4, 0.98246, 0.28643], [0.4, 1, 0.17186], [0.45, 1, 0.14321], [0.5, 1, 0.09548], [0.55, 1, 0.06138], [0.75, 1, 0.04296], [1, 1, null]], "spearmans_rho": 0.5368}, {"accuracy": 0.91791, "area_under_pr_curve": 0.82704, "area_under_roc_curve": 0.92588, "balanced_accuracy": 0.78684, "class_name": "True", "f_measure": 0.68571, "gain_curve": [[0, 0, 0.95704], [0.03731, 0.25, 0.93862], [0.06716, 0.45, 0.90452], [0.07463, 0.5, 0.85679], [0.08209, 0.55, 0.82814], [0.08955, 0.6, 0.71357], [0.10448, 0.6, 0.57036], [0.11194, 0.6, 0.07036], [0.12687, 0.65, 0.0469], [0.14179, 0.75, 0.02345], [0.14925, 0.8, 0.01563], [0.17164, 0.8, 0.01407], [0.18657, 0.8, 0.01279], [0.20149, 0.8, 0.01005], [0.30597, 0.85, 0.00782], [0.32836, 0.85, 0.00563], [0.3806, 0.9, 0.00503], [0.47015, 0.9, 0.00469], [0.5, 0.95, 0.00162], [0.6791, 1, 0.00067], [1, 1, null]], "kendalls_tau_b": 0.46897, "ks_statistic": [0.76491, 0.01563], "lift_curve": [[0, 0, 0.95704], [0.03731, 6.7, 0.93862], [0.06716, 6.7, 0.90452], [0.07463, 6.7, 0.85679], [0.08209, 6.7, 0.82814], [0.08955, 6.7, 0.71357], [0.10448, 5.74286, 0.57036], [0.11194, 5.36, 0.07036], [0.12687, 5.12353, 0.0469], [0.14179, 5.28947, 0.02345], [0.14925, 5.36, 0.01563], [0.17164, 4.66087, 0.01407], [0.18657, 4.288, 0.01279], [0.20149, 3.97037, 0.01005], [0.30597, 2.77805, 0.00782], [0.32836, 2.58864, 0.00563], [0.3806, 2.36471, 0.00503], [0.47015, 1.91429, 0.00469], [0.5, 1.9, 0.00162], [0.6791, 1.47253, 0.00067], [1, 1, null]], "max_phi": [0.76491, 0.01563], "negative_cdf": [[0, 0, 0.95704], [0.03731, 0, 0.93862], [0.06716, 0, 0.90452], [0.07463, 0, 0.85679], [0.08209, 0, 0.82814], [0.08955, 0, 0.71357], [0.10448, 0.01754, 0.57036], [0.11194, 0.02632, 0.07036], [0.12687, 0.03509, 0.0469], [0.14179, 0.03509, 0.02345], [0.14925, 0.03509, 0.01563], [0.17164, 0.0614, 0.01407], [0.18657, 0.07895, 0.01279], [0.20149, 0.09649, 0.01005], [0.30597, 0.21053, 0.00782], [0.32836, 0.23684, 0.00563], [0.3806, 0.28947, 0.00503], [0.47015, 0.39474, 0.00469], [0.5, 0.42105, 0.00162], [0.6791, 0.62281, 0.00067], [1, 1, null]], "per_threshold_confusion_matrices": [[[20, 114, 0, 0], null], [[20, 71, 43, 0], 0.00067], [[19, 48, 66, 1], 0.00162], [[18, 45, 69, 2], 0.00469], [[18, 33, 81, 2], 0.00503], [[17, 27, 87, 3], 0.00563], [[17, 24, 90, 3], 0.00782], [[16, 11, 103, 4], 0.01005], [[16, 9, 105, 4], 0.01279], [[16, 7, 107, 4], 0.01407], [[16, 4, 110, 4], 0.01563], [[15, 4, 110, 5], 0.02345], [[13, 4, 110, 7], 0.0469], [[12, 3, 111, 8], 0.07036], [[12, 2, 112, 8], 0.57036], [[12, 0, 114, 8], 0.71357], [[11, 0, 114, 9], 0.82814], [[10, 0, 114, 10], 0.85679], [[9, 0, 114, 11], 0.90452], [[5, 0, 114, 15], 0.93862], [[0, 0, 114, 20], 0.95704]], "phi_coefficient": 0.64837, "pr_curve": [[0, 1, 0.95704], [0.25, 1, 0.93862], [0.45, 1, 0.90452], [0.5, 1, 0.85679], [0.55, 1, 0.82814], [0.6, 1, 0.71357], [0.6, 0.85714, 0.57036], [0.6, 0.8, 0.07036], [0.65, 0.76471, 0.0469], [0.75, 0.78947, 0.02345], [0.8, 0.8, 0.01563], [0.8, 0.69565, 0.01407], [0.8, 0.64, 0.01279], [0.8, 0.59259, 0.01005], [0.85, 0.41463, 0.00782], [0.85, 0.38636, 0.00563], [0.9, 0.35294, 0.00503], [0.9, 0.28571, 0.00469], [0.95, 0.28358, 0.00162], [1, 0.21978, 0.00067], [1, 0.14925, null]], "precision": 0.8, "present_in_test_data": true, "recall": 0.6, "roc_curve": [[0, 0, 0.95704], [0, 0.25, 0.93862], [0, 0.45, 0.90452], [0, 0.5, 0.85679], [0, 0.55, 0.82814], [0, 0.6, 0.71357], [0.01754, 0.6, 0.57036], [0.02632, 0.6, 0.07036], [0.03509, 0.65, 0.0469], [0.03509, 0.75, 0.02345], [0.03509, 0.8, 0.01563], [0.0614, 0.8, 0.01407], [0.07895, 0.8, 0.01279], [0.09649, 0.8, 0.01005], [0.21053, 0.85, 0.00782], [0.23684, 0.85, 0.00563], [0.28947, 0.9, 0.00503], [0.39474, 0.9, 0.00469], [0.42105, 0.95, 0.00162], [0.62281, 1, 0.00067], [1, 1, null]], "spearmans_rho": 0.5368}]}, "random": {"accuracy": 0.47761, "average_area_under_pr_curve": 0, "average_area_under_roc_curve": 0, "average_balanced_accuracy": 0.40439, "average_f_measure": 0.385, "average_kendalls_tau_b": 0, "average_ks_statistic": 0, "average_max_phi": 0, "average_phi": -0.13666, "average_precision": 0.45116, "average_recall": 0.40439, "average_spearmans_rho": 0, "confusion_matrix": [[58, 56], [14, 6]], "per_class_statistics": [{"accuracy": 0.47761, "balanced_accuracy": 0.40439, "class_name": "False", "f_measure": 0.62366, "phi_coefficient": -0.13666, "precision": 0.80556, "present_in_test_data": true, "recall": 0.50877}, {"accuracy": 0.47761, "balanced_accuracy": 0.40439, "class_name": "True", "f_measure": 0.14634, "phi_coefficient": -0.13666, "precision": 0.09677, "present_in_test_data": true, "recall": 0.3}]}}, "rows": 134, "sample_rate": 1.0, "sampled_rows": 134, "shared": false, "size": 11582, "status": {"code": 5, "elapsed": 3847, "message": "The evaluation has been created", "progress": 1}, "subscription": true, "tags": [], "timeseries": "", "type": 0, "updated": "2023-07-17T21:20:05.589000"}, "error": null} \ No newline at end of file diff --git a/data/fruits1e.jpg b/data/fruits1e.jpg new file mode 100644 index 00000000..99e7ffa4 Binary files /dev/null and b/data/fruits1e.jpg differ diff --git a/data/images/annotations.json b/data/images/annotations.json new file mode 100644 index 00000000..25ea91bd --- /dev/null +++ b/data/images/annotations.json @@ -0,0 +1 @@ +[{"file": "f1/fruits1f.png", "new_label": "True"}, {"file": "f1/fruits1.png", "new_label": "False"}, {"file": "f1/fruits1b.png", "new_label": "True"}, {"file": "f1/fruits1c.png", "new_label": "True"}, {"file": "f1/fruits1d.png", "new_label": "True"}, {"file": "f1/fruits1e.png", "new_label": "True"}, {"file": "f2/fruits2.png", "new_label": "False"}, {"file": "f2/fruits2f.png", "new_label": "False"}, {"file": "f2/fruits2d.png", "new_label": "False"}, {"file": "f2/fruits2e.png", "new_label": "False"}, {"file": "f2/fruits2b.png", "new_label": "False"}, {"file": "f2/fruits2c.png", "new_label": "False"}] diff --git a/data/images/annotations_compact.json b/data/images/annotations_compact.json new file mode 100644 index 00000000..294de440 --- /dev/null +++ b/data/images/annotations_compact.json @@ -0,0 +1,2 @@ +[{"file": "f1/fruits1f.png", "my_regions": "[[\"region1\" 0.2 0.2 0.4 0.4]]"}, + {"file": "f1/fruits1.png", "my_regions": "[[\"region2\" 0.3 0.3 0.5 0.5] [\"region1\" 0.6 0.6 0.8 0.8]]"}] diff --git a/data/images/annotations_list.json b/data/images/annotations_list.json new file mode 100644 index 00000000..ecfee3db --- /dev/null +++ b/data/images/annotations_list.json @@ -0,0 +1,2 @@ +[{"file": "f1/fruits1f.png", "my_regions": [{"label": "region1", "xmin": 0.2, "ymin": 0.2, "xmax": 0.4, "ymax": 0.4}]}, + {"file": "f1/fruits1.png", "my_regions": [{"label": "region2", "xmin": 0.2, "ymin": 0.2, "xmax": 0.4, "ymax": 0.4}, {"label": "region1", "xmin": 0.5, "ymin": 0.5, "xmax": 0.7, "ymax": 0.7}]}] diff --git a/data/images/cats/pexels-pixabay-33358.jpg b/data/images/cats/pexels-pixabay-33358.jpg new file mode 100644 index 00000000..0d0d8c73 Binary files /dev/null and b/data/images/cats/pexels-pixabay-33358.jpg differ diff --git a/data/images/fruits_hist.zip b/data/images/fruits_hist.zip new file mode 100644 index 00000000..14f21ac8 Binary files /dev/null and b/data/images/fruits_hist.zip differ diff --git a/data/images/fruits_name.zip b/data/images/fruits_name.zip new file mode 100644 index 00000000..e7d75cec Binary files /dev/null and b/data/images/fruits_name.zip differ diff --git a/data/images/metadata.json b/data/images/metadata.json new file mode 100644 index 00000000..ea5d1ba6 --- /dev/null +++ b/data/images/metadata.json @@ -0,0 +1,5 @@ +{"description": "Fruit images to test colour distributions", + "images_file": "./fruits_hist.zip", + "new_fields": [{"name": "new_label", "optype": "categorical"}], + "source_id": null, + "annotations": "./annotations.json"} diff --git a/data/images/metadata_compact.json b/data/images/metadata_compact.json new file mode 100644 index 00000000..45db412f --- /dev/null +++ b/data/images/metadata_compact.json @@ -0,0 +1,5 @@ +{"description": "Fruit images to test colour distributions with regions", + "images_file": "./fruits_hist.zip", + "new_fields": [{"name": "my_regions", "optype": "regions"}], + "source_id": null, + "annotations": "./annotations_compact.json"} diff --git a/data/images/metadata_list.json b/data/images/metadata_list.json new file mode 100644 index 00000000..1bf61c67 --- /dev/null +++ b/data/images/metadata_list.json @@ -0,0 +1,5 @@ +{"description": "Fruit images to test colour distributions with regions", + "images_file": "./fruits_hist.zip", + "new_fields": [{"name": "my_regions", "optype": "regions"}], + "source_id": null, + "annotations": "./annotations_list.json"} diff --git a/data/imgs_deepnet.zip b/data/imgs_deepnet.zip new file mode 100644 index 00000000..44f752b9 Binary files /dev/null and b/data/imgs_deepnet.zip differ diff --git a/data/missings_cat.csv b/data/missings_cat.csv new file mode 100644 index 00000000..d4ba0909 --- /dev/null +++ b/data/missings_cat.csv @@ -0,0 +1,8 @@ +x1,x2,y +1,3,positive +2,4,positive +7,0,positive +0,1,positive +3,5,negative +8,2,negative +0,1,negative diff --git a/data/missings_reg.csv b/data/missings_reg.csv new file mode 100644 index 00000000..def9c9a2 --- /dev/null +++ b/data/missings_reg.csv @@ -0,0 +1,8 @@ +x1,x2,y +1,3,1 +2,4,1 +7,0,1 +0,1,1 +3,5,2 +8,2,2 +0,1,2 diff --git a/data/model/distribution_iris.txt b/data/model/distribution_iris.txt new file mode 100644 index 00000000..464efae8 --- /dev/null +++ b/data/model/distribution_iris.txt @@ -0,0 +1,3 @@ + Iris-setosa: 33.33% (50 instances) + Iris-versicolor: 33.33% (50 instances) + Iris-virginica: 33.33% (50 instances) diff --git a/data/model/iris.json b/data/model/iris.json new file mode 100644 index 00000000..a7f4b9ff --- /dev/null +++ b/data/model/iris.json @@ -0,0 +1 @@ +{"code": 200, "resource": "model/5f44e5b70d052e69e6000ab0", "location": "https://bigml.io/andromeda/model/5f44e5b70d052e69e6000ab0", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2020-08-25T10:19:35.829000", "creator": "mmartin", "credits": 0, "credits_per_prediction": 0.0, "dataset": "dataset/5f29a563529963736c0116e9", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": false, "ensemble_id": "", "ensemble_index": 0, "excluded_fields": [], "fields_meta": {"count": 5, "limit": -1, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000002", "000003"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 54], ["Iris-virginica", 46]]}, "training": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]]}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "order": 3, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000002", 0.70392], ["000003", 0.29608]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "preferred": true}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "preferred": true}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "preferred": true, "term_analysis": {"enabled": true}}}, "node_threshold": 5, "root": {"children": [{"children": [{"confidence": 0.88664, "count": 46, "id": 2, "objective_summary": {"categories": [["Iris-virginica", 45], ["Iris-versicolor", 1]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 1.75}}, {"confidence": 0.8009, "count": 54, "id": 3, "objective_summary": {"categories": [["Iris-versicolor", 49], ["Iris-virginica", 5]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": "<=", "value": 1.75}}], "confidence": 0.40383, "count": 100, "id": 1, "objective_summary": {"categories": [["Iris-versicolor", 50], ["Iris-virginica", 50]]}, "output": "Iris-versicolor", "predicate": {"field": "000002", "operator": ">", "value": 2.45}}, {"confidence": 0.92865, "count": 50, "id": 4, "objective_summary": {"categories": [["Iris-setosa", 50]]}, "output": "Iris-setosa", "predicate": {"field": "000002", "operator": "<=", "value": 2.45}}], "confidence": 0.26289, "count": 150, "id": 0, "objective_summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]]}, "output": "Iris-setosa", "predicate": true}}, "name": "classification", "name_options": "5-node, pruned, deterministic order", "node_threshold": 5, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "000004", "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["000004"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": null, "randomize": false, "range": [1, 150], "replacement": false, "resource": "model/5f44e5b70d052e69e6000ab0", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4608, "source": "source/5f29a560529963736c0116e6", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 773, "message": "The model has been created", "progress": 1}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2020-08-25T10:19:46.420000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/data/model/list_fields.txt b/data/model/list_fields.txt new file mode 100644 index 00000000..e58c097a --- /dev/null +++ b/data/model/list_fields.txt @@ -0,0 +1,3 @@ + +[petal length : numeric] +[petal width : numeric] diff --git a/data/model/rdistribution_iris.txt b/data/model/rdistribution_iris.txt new file mode 100644 index 00000000..41258bcf --- /dev/null +++ b/data/model/rdistribution_iris.txt @@ -0,0 +1,22 @@ + 0.1: 3.33% (5 instances) + 0.2: 19.33% (29 instances) + 0.3: 4.67% (7 instances) + 0.4: 4.67% (7 instances) + 0.5: 0.67% (1 instance) + 0.6: 0.67% (1 instance) + 1: 4.67% (7 instances) + 1.1: 2.00% (3 instances) + 1.2: 3.33% (5 instances) + 1.3: 8.67% (13 instances) + 1.4: 5.33% (8 instances) + 1.5: 8.00% (12 instances) + 1.6: 2.67% (4 instances) + 1.7: 1.33% (2 instances) + 1.8: 8.00% (12 instances) + 1.9: 3.33% (5 instances) + 2: 4.00% (6 instances) + 2.1: 4.00% (6 instances) + 2.2: 2.00% (3 instances) + 2.3: 5.33% (8 instances) + 2.4: 2.00% (3 instances) + 2.5: 2.00% (3 instances) diff --git a/data/model/regression.json b/data/model/regression.json new file mode 100644 index 00000000..95dcc634 --- /dev/null +++ b/data/model/regression.json @@ -0,0 +1 @@ +{"code": 200, "resource": "model/5f44e603cb4f9665eb000562", "location": "https://bigml.io/andromeda/model/5f44e603cb4f9665eb000562", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2020-08-25T10:20:51.985000", "creator": "mmartin", "credits": 0, "credits_per_prediction": 0.0, "dataset": "dataset/5f29a563529963736c0116e9", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": false, "ensemble_id": "", "ensemble_index": 0, "excluded_fields": [], "fields_meta": {"count": 5, "limit": -1, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000002", "000004"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"counts": [[0.246, 50], [1.326, 50], [2.026, 50]]}, "training": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "maximum": 2.5, "median": 1.3, "minimum": 0.1}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "order": 3, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000004", 1]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "preferred": true}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "preferred": true, "term_analysis": {"enabled": true}}}, "node_threshold": 5, "root": {"children": [{"confidence": 0.1598, "count": 50, "id": 1, "objective_summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1]], "exact_histogram": {"populations": [5, 29, 7, 7, 1, 1], "start": 0.1, "width": 0.1}, "maximum": 0.6, "median": 0.2, "minimum": 0.1}, "output": 0.246, "predicate": {"field": "000004", "operator": "=", "value": "Iris-setosa"}}, {"children": [{"confidence": 0.41645, "count": 50, "id": 3, "objective_summary": {"counts": [[1.4, 1], [1.5, 2], [1.6, 1], [1.7, 1], [1.8, 11], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [3, 2, 16, 12, 11, 6], "start": 1.4, "width": 0.2}, "maximum": 2.5, "median": 2, "minimum": 1.4}, "output": 2.026, "predicate": {"field": "000004", "operator": "=", "value": "Iris-virginica"}}, {"confidence": 0.29985, "count": 50, "id": 4, "objective_summary": {"counts": [[1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 7], [1.5, 10], [1.6, 3], [1.7, 1], [1.8, 1]], "exact_histogram": {"populations": [7, 3, 5, 13, 7, 10, 3, 1, 1], "start": 1, "width": 0.1}, "maximum": 1.8, "median": 1.3, "minimum": 1}, "output": 1.326, "predicate": {"field": "000004", "operator": "!=", "value": "Iris-virginica"}}], "confidence": 0.5726, "count": 100, "id": 2, "objective_summary": {"counts": [[1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [7, 3, 5, 13, 8, 12, 4, 2, 12, 5, 6, 6, 3, 8, 3, 3], "start": 1, "width": 0.1}, "maximum": 2.5, "median": 1.6, "minimum": 1}, "output": 1.676, "predicate": {"field": "000004", "operator": "!=", "value": "Iris-setosa"}}], "confidence": 0.97442, "count": 150, "id": 0, "objective_summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "maximum": 2.5, "median": 1.3, "minimum": 0.1}, "output": 1.19933, "predicate": true}}, "name": "regression", "name_options": "5-node, pruned, deterministic order", "node_threshold": 5, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "000003", "objective_field_name": "petal width", "objective_field_type": "numeric", "objective_fields": ["000003"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": null, "randomize": false, "range": [1, 150], "replacement": false, "resource": "model/5f44e603cb4f9665eb000562", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4608, "source": "source/5f29a560529963736c0116e6", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 809, "message": "The model has been created", "progress": 1}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2020-08-25T10:20:53.143000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/data/model/rlist_fields.txt b/data/model/rlist_fields.txt new file mode 100644 index 00000000..93d0b61f --- /dev/null +++ b/data/model/rlist_fields.txt @@ -0,0 +1,2 @@ + +[species : categorical] diff --git a/data/model/rtree_csv.txt b/data/model/rtree_csv.txt new file mode 100644 index 00000000..50693454 --- /dev/null +++ b/data/model/rtree_csv.txt @@ -0,0 +1 @@ +[["petal width", "error", "bin0_value", "bin0_instances", "bin1_value", "bin1_instances", "bin2_value", "bin2_instances", "bin3_value", "bin3_instances", "bin4_value", "bin4_instances", "bin5_value", "bin5_instances", "bin6_value", "bin6_instances", "bin7_value", "bin7_instances", "bin8_value", "bin8_instances", "bin9_value", "bin9_instances", "bin10_value", "bin10_instances", "bin11_value", "bin11_instances", "bin12_value", "bin12_instances", "bin13_value", "bin13_instances", "bin14_value", "bin14_instances", "bin15_value", "bin15_instances", "bin16_value", "bin16_instances", "bin17_value", "bin17_instances", "bin18_value", "bin18_instances", "bin19_value", "bin19_instances", "bin20_value", "bin20_instances", "bin21_value", "bin21_instances"], [1.19933, 0.97442, 0.1, 5, 0.2, 29, 0.3, 7, 0.4, 7, 0.5, 1, 0.6, 1, 1, 7, 1.1, 3, 1.2, 5, 1.3, 13, 1.4, 8, 1.5, 12, 1.6, 4, 1.7, 2, 1.8, 12, 1.9, 5, 2, 6, 2.1, 6, 2.2, 3, 2.3, 8, 2.4, 3, 2.5, 3], [0.246, 0.1598, 0.1, 5, 0.2, 29, 0.3, 7, 0.4, 7, 0.5, 1, 0.6, 1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], [1.676, 0.5726, 1, 7, 1.1, 3, 1.2, 5, 1.3, 13, 1.4, 8, 1.5, 12, 1.6, 4, 1.7, 2, 1.8, 12, 1.9, 5, 2, 6, 2.1, 6, 2.2, 3, 2.3, 8, 2.4, 3, 2.5, 3, null, null, null, null, null, null, null, null, null, null, null, null], [2.026, 0.41645, 1.4, 1, 1.5, 2, 1.6, 1, 1.7, 1, 1.8, 11, 1.9, 5, 2, 6, 2.1, 6, 2.2, 3, 2.3, 8, 2.4, 3, 2.5, 3, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], [1.326, 0.29985, 1, 7, 1.1, 3, 1.2, 5, 1.3, 13, 1.4, 7, 1.5, 10, 1.6, 3, 1.7, 1, 1.8, 1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null]] \ No newline at end of file diff --git a/data/model/tree_csv.txt b/data/model/tree_csv.txt new file mode 100644 index 00000000..f3285ad0 --- /dev/null +++ b/data/model/tree_csv.txt @@ -0,0 +1 @@ +[["species", "confidence", "impurity", "Iris-setosa", "Iris-versicolor", "Iris-virginica"], ["Iris-setosa", 0.26289, 0.6666666666666667, 50, 50, 50], ["Iris-versicolor", 0.40383, 0.5, null, 50, 50], ["Iris-virginica", 0.88664, 0.04253308128544431, null, 1, 45], ["Iris-versicolor", 0.8009, 0.16803840877914955, null, 49, 5], ["Iris-setosa", 0.92865, 0.0, 50, null, null]] \ No newline at end of file diff --git a/data/model/w_iris.json b/data/model/w_iris.json new file mode 100644 index 00000000..cf39da14 --- /dev/null +++ b/data/model/w_iris.json @@ -0,0 +1 @@ +{"code": 200, "resource": "model/5f40433be84f94782b000774", "location": "https://bigml.io/andromeda/model/5f40433be84f94782b000774", "object": {"balance_objective": true, "boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2020-08-21T21:57:15.487000", "creator": "mmartin", "credits": 0, "credits_per_prediction": 0.0, "dataset": "dataset/5f29a563529963736c0116e9", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": false, "ensemble_id": "", "ensemble_index": 0, "excluded_fields": [], "fields_meta": {"count": 5, "limit": -1, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000002", "000003"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 54], ["Iris-virginica", 46]]}, "training": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]]}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "order": 3, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000002", 0.70392], ["000003", 0.29608]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "preferred": true}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "preferred": true}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "preferred": true, "term_analysis": {"enabled": true}}}, "node_threshold": 5, "root": {"children": [{"children": [{"confidence": 0.88664, "count": 46, "id": 2, "objective_summary": {"categories": [["Iris-virginica", 45], ["Iris-versicolor", 1]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 1.75}, "weight": 46, "weighted_objective_summary": {"categories": [["Iris-virginica", 45], ["Iris-versicolor", 1]]}}, {"confidence": 0.8009, "count": 54, "id": 3, "objective_summary": {"categories": [["Iris-versicolor", 49], ["Iris-virginica", 5]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": "<=", "value": 1.75}, "weight": 54, "weighted_objective_summary": {"categories": [["Iris-versicolor", 49], ["Iris-virginica", 5]]}}], "confidence": 0.40383, "count": 100, "id": 1, "objective_summary": {"categories": [["Iris-versicolor", 50], ["Iris-virginica", 50]]}, "output": "Iris-versicolor", "predicate": {"field": "000002", "operator": ">", "value": 2.45}, "weight": 100, "weighted_objective_summary": {"categories": [["Iris-versicolor", 50], ["Iris-virginica", 50]]}}, {"confidence": 0.92865, "count": 50, "id": 4, "objective_summary": {"categories": [["Iris-setosa", 50]]}, "output": "Iris-setosa", "predicate": {"field": "000002", "operator": "<=", "value": 2.45}, "weight": 50, "weighted_objective_summary": {"categories": [["Iris-setosa", 50]]}}], "confidence": 0.26289, "count": 150, "id": 0, "objective_summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]]}, "output": "Iris-setosa", "predicate": true, "weight": 150, "weighted_objective_summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]]}}}, "name": "XX", "name_options": "5-node, pruned, deterministic order, balanced", "node_threshold": 5, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "000004", "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["000004"], "objective_weights": [["Iris-setosa", 1], ["Iris-versicolor", 1], ["Iris-virginica", 1]], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": null, "randomize": false, "range": [1, 150], "replacement": false, "resource": "model/5f40433be84f94782b000774", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4608, "source": "source/5f29a560529963736c0116e6", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 762, "message": "The model has been created", "progress": 1}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2020-08-21T21:57:16.729000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/data/model/w_regression.json b/data/model/w_regression.json new file mode 100644 index 00000000..e969d8f7 --- /dev/null +++ b/data/model/w_regression.json @@ -0,0 +1 @@ +{"code": 200, "resource": "model/5f4031772fb31c3272000193", "location": "https://bigml.io/andromeda/model/5f4031772fb31c3272000193", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2020-08-21T20:41:27.095000", "creator": "mmartin", "credits": 0, "credits_per_prediction": 0.0, "dataset": "dataset/5f29a563529963736c0116e9", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": false, "ensemble_id": "", "ensemble_index": 0, "excluded_fields": [], "fields_meta": {"count": 5, "limit": -1, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000004"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"counts": [[0.25007, 50], [1.34282, 50], [2.03462, 50]]}, "training": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "maximum": 2.5, "median": 1.3, "minimum": 0.1}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "order": 3, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000004", 1]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "preferred": true}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "preferred": true, "term_analysis": {"enabled": true}}}, "node_threshold": 5, "root": {"children": [{"confidence": 0.15226, "count": 50, "id": 1, "objective_summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1]], "exact_histogram": {"populations": [5, 29, 7, 7, 1, 1], "start": 0.1, "width": 0.1}, "maximum": 0.6, "median": 0.2, "minimum": 0.1}, "output": 0.25007, "predicate": {"field": "000004", "operator": "=", "value": "Iris-setosa"}, "weight": 73.1, "weighted_objective_summary": {"counts": [[0.1, 6.9], [0.2, 41.9], [0.3, 10], [0.4, 11], [0.5, 1.7], [0.6, 1.6]], "exact_histogram": {"populations": [7, 42, 10, 11, 2, 2], "start": 0.1, "width": 0.1}, "maximum": 0.6, "median": 0.2, "minimum": 0.1}}, {"children": [{"confidence": 0.23853, "count": 50, "id": 3, "objective_summary": {"counts": [[1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 7], [1.5, 10], [1.6, 3], [1.7, 1], [1.8, 1]], "exact_histogram": {"populations": [7, 3, 5, 13, 7, 10, 3, 1, 1], "start": 1, "width": 0.1}, "maximum": 1.8, "median": 1.3, "minimum": 1}, "output": 1.34282, "predicate": {"field": "000004", "operator": "=", "value": "Iris-versicolor"}, "weight": 213, "weighted_objective_summary": {"counts": [[1, 25.4], [1.1, 10.7], [1.2, 21.2], [1.3, 54.3], [1.4, 31.5], [1.5, 45.8], [1.6, 14.3], [1.7, 5], [1.8, 4.8]], "exact_histogram": {"populations": [25, 11, 21, 54, 32, 46, 14, 5, 5], "start": 1, "width": 0.1}, "maximum": 1.8, "median": 1.3, "minimum": 1}}, {"confidence": 0.3259, "count": 50, "id": 4, "objective_summary": {"counts": [[1.4, 1], [1.5, 2], [1.6, 1], [1.7, 1], [1.8, 11], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [3, 2, 16, 12, 11, 6], "start": 1.4, "width": 0.2}, "maximum": 2.5, "median": 2, "minimum": 1.4}, "output": 2.03462, "predicate": {"field": "000004", "operator": "!=", "value": "Iris-versicolor"}, "weight": 277.6, "weighted_objective_summary": {"counts": [[1.4, 5.6], [1.5, 10.1], [1.6, 5.8], [1.7, 4.5], [1.8, 59.2], [1.9, 26.6], [2, 33.3], [2.1, 34.7], [2.2, 18.1], [2.3, 45.6], [2.4, 16.3], [2.5, 17.8]], "exact_histogram": {"populations": [16, 10, 86, 68, 64, 34], "start": 1.4, "width": 0.2}, "maximum": 2.5, "median": 2, "minimum": 1.4}}], "confidence": 0.48126, "count": 100, "id": 2, "objective_summary": {"counts": [[1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [7, 3, 5, 13, 8, 12, 4, 2, 12, 5, 6, 6, 3, 8, 3, 3], "start": 1, "width": 0.1}, "maximum": 2.5, "median": 1.6, "minimum": 1}, "output": 1.73426, "predicate": {"field": "000004", "operator": "!=", "value": "Iris-setosa"}, "weight": 490.6, "weighted_objective_summary": {"counts": [[1, 25.4], [1.1, 10.7], [1.2, 21.2], [1.3, 54.3], [1.4, 37.1], [1.5, 55.9], [1.6, 20.1], [1.7, 9.5], [1.8, 64], [1.9, 26.6], [2, 33.3], [2.1, 34.7], [2.2, 18.1], [2.3, 45.6], [2.4, 16.3], [2.5, 17.8]], "exact_histogram": {"populations": [25, 11, 21, 54, 37, 56, 20, 10, 64, 27, 33, 35, 18, 46, 16, 18], "start": 1, "width": 0.1}, "maximum": 2.5, "median": 1.8, "minimum": 1}}], "confidence": 0.72256, "count": 150, "id": 0, "objective_summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "maximum": 2.5, "median": 1.3, "minimum": 0.1}, "output": 1.5418, "predicate": true, "weight": 563.7, "weighted_objective_summary": {"counts": [[0.1, 6.9], [0.2, 41.9], [0.3, 10], [0.4, 11], [0.5, 1.7], [0.6, 1.6], [1, 25.4], [1.1, 10.7], [1.2, 21.2], [1.3, 54.3], [1.4, 37.1], [1.5, 55.9], [1.6, 20.1], [1.7, 9.5], [1.8, 64], [1.9, 26.6], [2, 33.3], [2.1, 34.7], [2.2, 18.1], [2.3, 45.6], [2.4, 16.3], [2.5, 17.8]], "exact_histogram": {"populations": [7, 52, 13, 2, 0, 36, 76, 93, 30, 91, 68, 64, 34], "start": 0, "width": 0.2}, "maximum": 2.5, "median": 1.6, "minimum": 0.1}}}, "name": "regression weighted", "name_options": "5-node, pruned, deterministic order, weight field: 000002", "node_threshold": 5, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "000003", "objective_field_name": "petal width", "objective_field_type": "numeric", "objective_fields": ["000003"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": null, "randomize": false, "range": [1, 150], "replacement": false, "resource": "model/5f4031772fb31c3272000193", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4608, "source": "source/5f29a560529963736c0116e6", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 1026, "message": "The model has been created", "progress": 1}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2020-08-25T10:21:25.313000", "weight_field": "000002", "white_box": false}, "error": null} \ No newline at end of file diff --git a/data/model/wdistribution_iris.txt b/data/model/wdistribution_iris.txt new file mode 100644 index 00000000..464efae8 --- /dev/null +++ b/data/model/wdistribution_iris.txt @@ -0,0 +1,3 @@ + Iris-setosa: 33.33% (50 instances) + Iris-versicolor: 33.33% (50 instances) + Iris-virginica: 33.33% (50 instances) diff --git a/data/model/wlist_fields.txt b/data/model/wlist_fields.txt new file mode 100644 index 00000000..e58c097a --- /dev/null +++ b/data/model/wlist_fields.txt @@ -0,0 +1,3 @@ + +[petal length : numeric] +[petal width : numeric] diff --git a/data/model/wrdistribution_iris.txt b/data/model/wrdistribution_iris.txt new file mode 100644 index 00000000..41258bcf --- /dev/null +++ b/data/model/wrdistribution_iris.txt @@ -0,0 +1,22 @@ + 0.1: 3.33% (5 instances) + 0.2: 19.33% (29 instances) + 0.3: 4.67% (7 instances) + 0.4: 4.67% (7 instances) + 0.5: 0.67% (1 instance) + 0.6: 0.67% (1 instance) + 1: 4.67% (7 instances) + 1.1: 2.00% (3 instances) + 1.2: 3.33% (5 instances) + 1.3: 8.67% (13 instances) + 1.4: 5.33% (8 instances) + 1.5: 8.00% (12 instances) + 1.6: 2.67% (4 instances) + 1.7: 1.33% (2 instances) + 1.8: 8.00% (12 instances) + 1.9: 3.33% (5 instances) + 2: 4.00% (6 instances) + 2.1: 4.00% (6 instances) + 2.2: 2.00% (3 instances) + 2.3: 5.33% (8 instances) + 2.4: 2.00% (3 instances) + 2.5: 2.00% (3 instances) diff --git a/data/model/wrlist_fields.txt b/data/model/wrlist_fields.txt new file mode 100644 index 00000000..93d0b61f --- /dev/null +++ b/data/model/wrlist_fields.txt @@ -0,0 +1,2 @@ + +[species : categorical] diff --git a/data/model/wrtree_csv.txt b/data/model/wrtree_csv.txt new file mode 100644 index 00000000..ba98237c --- /dev/null +++ b/data/model/wrtree_csv.txt @@ -0,0 +1 @@ +[["petal width", "error", "bin0_value", "bin0_instances", "bin1_value", "bin1_instances", "bin2_value", "bin2_instances", "bin3_value", "bin3_instances", "bin4_value", "bin4_instances", "bin5_value", "bin5_instances", "bin6_value", "bin6_instances", "bin7_value", "bin7_instances", "bin8_value", "bin8_instances", "bin9_value", "bin9_instances", "bin10_value", "bin10_instances", "bin11_value", "bin11_instances", "bin12_value", "bin12_instances", "bin13_value", "bin13_instances", "bin14_value", "bin14_instances", "bin15_value", "bin15_instances", "bin16_value", "bin16_instances", "bin17_value", "bin17_instances", "bin18_value", "bin18_instances", "bin19_value", "bin19_instances", "bin20_value", "bin20_instances", "bin21_value", "bin21_instances"], [1.5418, 0.72256, 0.1, 5, 0.2, 29, 0.3, 7, 0.4, 7, 0.5, 1, 0.6, 1, 1, 7, 1.1, 3, 1.2, 5, 1.3, 13, 1.4, 8, 1.5, 12, 1.6, 4, 1.7, 2, 1.8, 12, 1.9, 5, 2, 6, 2.1, 6, 2.2, 3, 2.3, 8, 2.4, 3, 2.5, 3], [0.25007, 0.15226, 0.1, 5, 0.2, 29, 0.3, 7, 0.4, 7, 0.5, 1, 0.6, 1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], [1.73426, 0.48126, 1, 7, 1.1, 3, 1.2, 5, 1.3, 13, 1.4, 8, 1.5, 12, 1.6, 4, 1.7, 2, 1.8, 12, 1.9, 5, 2, 6, 2.1, 6, 2.2, 3, 2.3, 8, 2.4, 3, 2.5, 3, null, null, null, null, null, null, null, null, null, null, null, null], [1.34282, 0.23853, 1, 7, 1.1, 3, 1.2, 5, 1.3, 13, 1.4, 7, 1.5, 10, 1.6, 3, 1.7, 1, 1.8, 1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], [2.03462, 0.3259, 1.4, 1, 1.5, 2, 1.6, 1, 1.7, 1, 1.8, 11, 1.9, 5, 2, 6, 2.1, 6, 2.2, 3, 2.3, 8, 2.4, 3, 2.5, 3, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null]] \ No newline at end of file diff --git a/data/model/wtree_csv.txt b/data/model/wtree_csv.txt new file mode 100644 index 00000000..f3285ad0 --- /dev/null +++ b/data/model/wtree_csv.txt @@ -0,0 +1 @@ +[["species", "confidence", "impurity", "Iris-setosa", "Iris-versicolor", "Iris-virginica"], ["Iris-setosa", 0.26289, 0.6666666666666667, 50, 50, 50], ["Iris-versicolor", 0.40383, 0.5, null, 50, 50], ["Iris-virginica", 0.88664, 0.04253308128544431, null, 1, 45], ["Iris-versicolor", 0.8009, 0.16803840877914955, null, 49, 5], ["Iris-setosa", 0.92865, 0.0, 50, null, null]] \ No newline at end of file diff --git a/data/one_plus_one.whizzml b/data/one_plus_one.whizzml new file mode 100644 index 00000000..3ed6dc80 --- /dev/null +++ b/data/one_plus_one.whizzml @@ -0,0 +1 @@ +(+ 1 1) diff --git a/data/regression_evaluation.json b/data/regression_evaluation.json new file mode 100644 index 00000000..6bb17e30 --- /dev/null +++ b/data/regression_evaluation.json @@ -0,0 +1 @@ +{"code": 200, "resource": "evaluation/64adcb654a1a2c0c57cb8784", "location": "https://bigml.io/andromeda/evaluation/64adcb654a1a2c0c57cb8784", "object": {"boosted_ensemble": false, "category": 0, "code": 200, "combiner": null, "configuration": null, "configuration_status": false, "created": "2023-07-11T21:36:37.670000", "creator": "mmartin", "dataset": "dataset/64adcb5f79c60236c3593ef5", "dataset_status": true, "datasets": [], "deepnet": "", "description": "", "ensemble": "", "evaluations": null, "excluded_fields": [], "fields_map": {"000000": "000000", "000001": "000001", "000002": "000002", "000003": "000003", "000004": "000004", "000005": "000005", "000006": "000006", "000007": "000007"}, "fusion": "", "input_fields": [], "linearregression": "", "locale": "en-US", "logisticregression": "", "max_rows": 4128, "missing_strategy": 0, "model": "model/64ad258d79c60271f4826e23", "model_status": true, "model_type": 0, "name": "Stdin input vs. Stdin input", "name_options": "512-node, pruned, deterministic order, operating kind=probability", "number_of_models": 1, "objective_field_descriptors": {"000007": {"column_number": 7, "datatype": "double", "name": "Longitude", "optype": "numeric", "order": 7, "preferred": true}}, "objective_fields": ["000007"], "objective_fields_names": ["Longitude"], "operating_kind": "probability", "optiml": null, "optiml_status": false, "out_of_bag": false, "performance": 0.9288, "private": true, "project": null, "range": null, "replacement": false, "resource": "evaluation/64adcb654a1a2c0c57cb8784", "result": {"mean": {"mean_absolute_error": 1.83374, "mean_squared_error": 4.0345, "r_squared": 0}, "model": {"mean_absolute_error": 0.30921, "mean_squared_error": 0.28725, "r_squared": 0.9288}, "random": {"mean_absolute_error": 2.93722, "mean_squared_error": 12.60007, "r_squared": -2.12308}}, "rows": 4128, "sample_rate": 1.0, "sampled_rows": 4128, "shared": false, "size": 354722, "status": {"code": 5, "elapsed": 3590, "message": "The evaluation has been created", "progress": 1}, "subscription": false, "tags": [], "timeseries": "", "type": 1, "updated": "2023-07-11T21:36:43.498000"}, "error": null} \ No newline at end of file diff --git a/data/repeat_iris.csv b/data/repeat_iris.csv new file mode 100644 index 00000000..ef2fa68f --- /dev/null +++ b/data/repeat_iris.csv @@ -0,0 +1,14 @@ +sepal length,sepal width,petal length,petal width,species +5.1,3.5,1.4,0.2,Iris-setosa +5.1,3.5,1.4,0.2,Iris-setosa +5.1,3.5,1.4,0.2,Iris-setosa +5.1,3.5,1.4,0.2,Iris-setosa +5.1,3.5,1.4,0.2,Iris-setosa +5.1,3.5,1.4,0.2,Iris-setosa +5.1,3.5,1.4,0.2,Iris-setosa +5.1,3.5,1.4,0.2,Iris-setosa +5.1,3.5,1.4,0.2,Iris-setosa +5.1,3.5,1.4,0.2,Iris-setosa +5.1,3.5,1.4,0.2,Iris-setosa +5.1,3.5,1.4,0.2,Iris-setosa +6.4,3.2,4.5,1.5,Iris-versicolor diff --git a/docs/101_anomaly.rst b/docs/101_anomaly.rst index bb5b9620..03fc9c31 100644 --- a/docs/101_anomaly.rst +++ b/docs/101_anomaly.rst @@ -1,5 +1,8 @@ -BigML Bindings: 101 - Using an anomaly detector -=============================================== +.. toctree:: + :hidden: + +101 - Anomaly detector usage +============================ Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to @@ -29,6 +32,28 @@ create an anomaly detector to produce a single anomaly score. # assigning an anomaly score to it anomaly_score = api.create_anomaly_score(anomaly, input_data) +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + +If you want to configure some of the attributes of your anomaly detector, +like the number of top anomalies retrieved, +you can use the second argument in the create call. + + +.. code-block:: python + + # step 5: creating an anomaly detector with a list of the 20 top anomalies + anomaly = api.create_anomaly(dataset, {"top_n": 20}) + # waiting for the anomaly detector to be finished + api.ok(anomaly) + +You can check all the available creation arguments in the `API documentation +`_. + If you want to assign scores to the original dataset (or a different dataset), you can do so by creating a `batch_anomaly_score` resource. In the example, we'll be assuming you already @@ -79,7 +104,7 @@ file to create the local `Anomaly` object: from bigml.api import BigML api = BigML() api.export("anomaly/5968ec46983efc21b000001b", - "filename": "my_anomaly.json") + filename="my_anomaly.json") # creating an anomaly object using the information in the file from bigml.anomaly import Anomaly local_anomaly = Anomaly("my_anomaly.json") diff --git a/docs/101_association.rst b/docs/101_association.rst index 331595b6..371456a2 100644 --- a/docs/101_association.rst +++ b/docs/101_association.rst @@ -1,5 +1,8 @@ -BigML Bindings: 101 - Using Association Discovery -================================================= +.. toctree:: + :hidden: + +101 - Association Discovery usage +================================= Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to @@ -27,6 +30,13 @@ create an association and produce association sets. # creating a single association set association_set = api.create_association_set(association, input_data) +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + You can also create association sets locally using the `Association` class in the `association` module. A simple example of that is: @@ -46,7 +56,7 @@ file to create the local `Association` object: from bigml.api import BigML api = BigML() api.export("association/5968ec46983efc21b000001b", - "filename": "my_association.json") + filename="my_association.json") # creating the association from the file from bigml.association import Association local_association = Association("my_association.json") diff --git a/docs/101_cluster.rst b/docs/101_cluster.rst index 2ffa6d25..d4998463 100644 --- a/docs/101_cluster.rst +++ b/docs/101_cluster.rst @@ -1,5 +1,8 @@ -BigML Bindings: 101 - Using a Cluster -===================================== +.. toctree:: + :hidden: + +101 - Cluster Usage +=================== Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to @@ -30,6 +33,13 @@ create a cluster and find the centroid associated to a single instance. # getting the associated centroid centroid = api.create_centroid(cluster, input_data) +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + If you want to find the centroids for many inputs at once, you can do so by creating a `batch_centroid` resource. You can create a `batch_centroid` using the same `dataset` that you used to built the `cluster` and this will produce a @@ -101,7 +111,7 @@ file to create the local `Cluster` object: from bigml.api import BigML api = BigML() api.export("cluster/5968ec46983efc21b000001b", - "filename": "my_cluster.json") + filename="my_cluster.json") # creating the cluster from the file from bigml.cluster import Cluster local_cluster = Cluster("my_cluster.json") diff --git a/docs/101_deepnet.rst b/docs/101_deepnet.rst index 6fb85e3f..c8f1d2c6 100644 --- a/docs/101_deepnet.rst +++ b/docs/101_deepnet.rst @@ -1,5 +1,8 @@ -BigML Bindings: 101 - Using a Deepnet Model -=========================================== +.. toctree:: + :hidden: + +101 - Deepnet usage +=================== Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to @@ -27,6 +30,14 @@ create a deepnet and produce a single prediction. # creating a single prediction prediction = api.create_prediction(deepnet, input_data) + +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + If you want to create predictions for many new inputs, you can do so by creating a `batch_prediction` resource. First, you will need to upload to the platform @@ -84,7 +95,7 @@ file to create the local `Deepnet` object: from bigml.api import BigML api = BigML() api.export("deepnet/5968ec46983efc21b000001b", - "filename": "my_deepnet.json") + filename="my_deepnet.json") # creating the deepnet from the file from bigml.deepnet import Deepnet local_deepnet = Deepnet("my_deepnet.json") diff --git a/docs/101_ensemble.rst b/docs/101_ensemble.rst index d7e9dc2a..0ca3f747 100644 --- a/docs/101_ensemble.rst +++ b/docs/101_ensemble.rst @@ -1,5 +1,8 @@ -BigML Bindings: 101 - Using an Ensemble -======================================= +.. toctree:: + :hidden: + +101 - Ensemble usage +==================== Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to @@ -52,6 +55,13 @@ created a `model` following the steps 0 to 5 in the previous snippet. api.download_batch_prediction(batch_prediction, filename='my_dir/my_predictions.csv') +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + The batch prediction output (as well as any of the resources created) can be configured using additional arguments in the corresponding create calls. For instance, to include all the information in the original dataset in the @@ -84,7 +94,7 @@ file to create the local `Ensemble` object: from bigml.api import BigML api = BigML() api.export("ensemble/5968ec46983efc21b000001b", - "filename": "my_ensemble.json") + filename="my_ensemble.json") # creating the ensemble from the file from bigml.ensemble import Ensemble local_ensemble = Ensemble("my_ensemble.json") diff --git a/docs/101_fusion.rst b/docs/101_fusion.rst index 69a33cc0..8b549759 100644 --- a/docs/101_fusion.rst +++ b/docs/101_fusion.rst @@ -1,5 +1,8 @@ -BigML Bindings: 101 - Using a Fusion Model -========================================== +.. toctree:: + :hidden: + +101 - Fusion usage +================== Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to @@ -24,6 +27,13 @@ already been created) and produce a single prediction. # creating a single prediction prediction = api.create_prediction(fusion, input_data) +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + If you want to create predictions for many new inputs, you can do so by creating a `batch_prediction` resource. First, you will need to upload to the platform @@ -82,10 +92,10 @@ files to create the local `Fusion` object: from bigml.api import BigML api = BigML() api.export("fusion/5968ec46983efc21b000001b", - "filename": "my_dir/my_fusion.json") + filename="my_dir/my_fusion.json") # creating the local fusion from the file from bigml.fusion import Fusion - local_fusion = Model("my_dir/my_fusion.json") + local_fusion = Fusion("my_dir/my_fusion.json") # predicting for some input data local_fusion.predict({"petal length": 2.45, "sepal length": 2, "petal width": 1.75, "sepal witdh": 3}) diff --git a/docs/101_images_classification.rst b/docs/101_images_classification.rst new file mode 100644 index 00000000..dd6fc4eb --- /dev/null +++ b/docs/101_images_classification.rst @@ -0,0 +1,62 @@ +.. toctree:: + :hidden: + +101 - Images Classification +=========================== + +Following the schema described in the `prediction workflow `_, +document, this is the code snippet that shows the minimal workflow to +create a deepnet from an images dataset and produce a single prediction. + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a source from the data in your local + # "data/images/fruits_hist.zip" file. The file contains two folders, each + # of which contains a collection of images. The folder name will be used + # as label for each image it contains. + # The source is created disabling image analysis, as we want the deepnet + # model to take care of extracting the features. If not said otherwise, + # the analysis would be enabled and features like the histogram of + # gradients would be extracted to become part of the resulting dataset. + source = api.create_source("data/images/fruits_hist.zip", + args={"image_analysis": {"enabled": False}}) + # waiting for the source to be finished. Results will be stored in `source` + # and the new ``image_id`` and ``label`` fields will be generated in the + # source + api.ok(source) + # step 3: creating a dataset from the previously created `source` + dataset = api.create_dataset(source) + # waiting for the dataset to be finished + api.ok(dataset) + # step 5: creating a deepnet + deepnet = api.create_deepnet(dataset) + # waiting for the deepnet to be finished + api.ok(deepnet) + # the new input data to predict for should contain the path to the + # new image to be used for testing + input_data = {"image_id": "data/images/f2/fruits2.png"} + # creating a single prediction: The image file is uploaded to BigML, + # a new source is created for it and its ID is used as value + # for the ``image_id`` field in the input data to generate the prediction + prediction = api.create_prediction(deepnet, input_data) + +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + +You can also predict locally using the `Deepnet` +class in the `deepnet` module. A simple example of that is: + +.. code-block:: python + + from bigml.deepnet import Deepnet + local_deepnet = Deepnet("deepnet/5968ec46983efc21b000001c") + # predicting for some input data + input_data = {"image_id": "data/images/f2/fruits2.png"} + local_deepnet.predict(input_data) diff --git a/docs/101_images_feature_extraction.rst b/docs/101_images_feature_extraction.rst new file mode 100644 index 00000000..f649d650 --- /dev/null +++ b/docs/101_images_feature_extraction.rst @@ -0,0 +1,71 @@ +.. toctree:: + :hidden: + +101 - Images Feature Extraction +=============================== + +Following the schema described in the `prediction workflow `_, +document, this is the code snippet that shows the minimal workflow to +extract features from images and generate an enriched dataset that can be +used to train any kind of model. + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a source from the data in your local + # "data/images/fruits_hist.zip" file. The file contains two folders, each + # of which contains a collection of images. The folder name will be used + # as label for each image it contains. + # The source is created enabling image analysis and setting some of the + # available features (see the API documentation at + # https://bigml.com/api/sources?id=source-arguments + # for details). In particular, we extract histogram of gradients and + # average pixels. + extracted_features = ["average_pixels", "histogram_of_gradients"] + source = api.create_source("data/images/fruits_hist.zip", + args={"image_analysis": {"enabled": True, + "extracted_features": extracted_features}}) + # waiting for the source to be finished. Results will be stored in `source` + # and the new extracted features will be generated. + api.ok(source) + # step 3: creating a dataset from the previously created `source` + dataset = api.create_dataset(source) + # waiting for the dataset to be finished + api.ok(dataset) + # step 5: creating an anomaly detector + anomaly = api.create_anomaly(dataset) + # waiting for the anomaly detector to be finished + api.ok(anomaly) + # the new input data to predict for should contain the path to the + # new image to be used for testing + input_data = {"image_id": "data/images/f2/fruits2.png"} + # creating a single anomaly score: The image file is uploaded to BigML, + # a new source is created for it using the same image_analysis + # used in the image field, and its ID is used as value + # for the ``image_id`` field in the input data to generate the prediction + anomaly_score = api.create_anomaly_score(anomaly, input_data) + +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + +You can also create a local anomaly score using the `Anomaly` +class in the `anomaly` module. A simple example of that is: + +.. code-block:: python + + from bigml.anomaly import Anomaly + local_anomaly = Anomaly("anomaly/5968ec46983efc21b000001c") + # creating a pipeline to store the feature extraction transformations + feature_extraction_pipeline = local_anomaly.data_transformations() + # scoring for some input data. As pipelines transform lists of rows + # we build a list with the single input data and get the first + # element of the output list + input_data = feature_extraction_pipeline.transform( + [{"image_id": "data/images/f2/fruits2.png"}])[0] + local_anomaly.anomaly_score(input_data) diff --git a/docs/101_linear_regression.rst b/docs/101_linear_regression.rst index 514a8ab9..08f87889 100644 --- a/docs/101_linear_regression.rst +++ b/docs/101_linear_regression.rst @@ -1,5 +1,8 @@ -BigML Bindings: 101 - Using a Linear Regression -================================================= +.. toctree:: + :hidden: + +101 - Linear Regression usage +============================= Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to @@ -28,6 +31,13 @@ create a linear regression model and produce a single prediction. # creating a single prediction prediction = api.create_prediction(linear_regression, input_data) +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + If you want to create predictions for many new inputs, you can do so by creating a `batch_prediction` resource. First, you will need to upload to the platform @@ -91,7 +101,7 @@ file to create the local `LinearRegression` object: from bigml.api import BigML api = BigML() api.export("linearregression/5968ec46983efc21b000001b", - "filename": "my_linear_regression.json") + filename="my_linear_regression.json") # creating the linear regression from the file from bigml.linear import LinearRegression local_linear_regression = LinearRegression( \ diff --git a/docs/101_logistic_regression.rst b/docs/101_logistic_regression.rst index 23eea52a..8cda0471 100644 --- a/docs/101_logistic_regression.rst +++ b/docs/101_logistic_regression.rst @@ -1,5 +1,8 @@ -BigML Bindings: 101 - Using a Logistic Regression -================================================= +.. toctree:: + :hidden: + +101 - Logistic Regression usage +=============================== Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to @@ -27,6 +30,13 @@ create a logistic regression model and produce a single prediction. # creating a single prediction prediction = api.create_prediction(logistic_regression, input_data) +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + If you want to create predictions for many new inputs, you can do so by creating a `batch_prediction` resource. First, you will need to upload to the platform @@ -89,7 +99,7 @@ file to create the local `LogisticRegression` object: from bigml.api import BigML api = BigML() api.export("logisticregression/5968ec46983efc21b000001b", - "filename": "my_logistic_regression.json") + filename="my_logistic_regression.json") # creating the logistic regression from the file from bigml.logistic import LogisticRegression local_logistic_regression = LogisticRegression( \ diff --git a/docs/101_model.rst b/docs/101_model.rst index 85ed3d8c..a7bf1915 100644 --- a/docs/101_model.rst +++ b/docs/101_model.rst @@ -1,5 +1,8 @@ -BigML Bindings: 101 - Using a Decision Tree Model -================================================= +.. toctree:: + :hidden: + +101 - Decision Tree usage +========================= Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to @@ -27,6 +30,13 @@ create a decision tree model and produce a single prediction. # creating a single prediction prediction = api.create_prediction(model, input_data) +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + If you want to create predictions for many new inputs, you can do so by creating a `batch_prediction` resource. First, you will need to upload to the platform @@ -84,7 +94,7 @@ file to create the local `Model` object: from bigml.api import BigML api = BigML() api.export("model/5968ec46983efc21b000001b", - "filename": "my_model.json") + filename="my_model.json") # creating the model from the file from bigml.model import Model local_model = Model("my_model.json") diff --git a/docs/101_object_detection.rst b/docs/101_object_detection.rst new file mode 100644 index 00000000..b851366d --- /dev/null +++ b/docs/101_object_detection.rst @@ -0,0 +1,52 @@ +.. toctree:: + :hidden: + +101 - Images Object Detection +============================= + +Following the schema described in the `prediction workflow `_, +document, this is the code snippet that shows the minimal workflow to +create a deepnet and produce a single prediction. + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a source from the data in your local + # "data/images/cats.zip" file, that contains a collection of images + # and an "annotations.json" file with the corresponding annotations per + # image describing the regions labeled in the image + source = api.create_source("data/images/cats.zip") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(source) + # step 3: creating a dataset from the previously created `source` + dataset = api.create_dataset(source) + # waiting for the dataset to be finished + api.ok(dataset) + # step 5: creating a deepnet + deepnet = api.create_deepnet(dataset) + # waiting for the deepnet to be finished + api.ok(deepnet) + # the new input data to predict for + input_data = "data/images/cats_test/pexels-pixabay-33358.jpg" + # creating a single prediction + prediction = api.create_prediction(deepnet, input_data) + +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + +You can also predict locally using the `Deepnet` +class in the `deepnet` module. A simple example of that is: + +.. code-block:: python + + from bigml.deepnet import Deepnet + local_deepnet = Deepnet("deepnet/5968ec46983efc21b000001c") + # predicting for some input data + input_data = "data/images/cats_test/pexels-pixabay-33358.jpg" + local_deepnet.predict(input_data) diff --git a/docs/101_optiml.rst b/docs/101_optiml.rst new file mode 100644 index 00000000..cd1f7d2e --- /dev/null +++ b/docs/101_optiml.rst @@ -0,0 +1,45 @@ +.. toctree:: + :hidden: + +101 - OptiML usage +================== + +Following the schema described in the `prediction workflow `_, +document, this is the code snippet that shows the minimal workflow to +create an OptiML. + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a source from the data in your local "data/iris.csv" file + source = api.create_source("data/iris.csv") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(source) + # step 3: creating a dataset from the previously created `source` + dataset = api.create_dataset(source) + # waiting for the dataset to be finished + api.ok(dataset) + # step 5: creating an optiml + optiml = api.create_optiml(dataset) + # waiting for the optiml to be finished + api.ok(optiml) + +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + +If you want to configure some of the attributes of your optiml, like the +maximum training time, you can use the second argument in the create call. + + # step 5: creating an optiml with a maximum training time of 3600 seconds + optiml = api.create_optiml(dataset, {"max_training_time": 3600}) + # waiting for the optiml to be finished + api.ok(optiml) + +You can check all the available creation arguments in the `API documentation +`_. diff --git a/docs/101_pca.rst b/docs/101_pca.rst new file mode 100644 index 00000000..2138470a --- /dev/null +++ b/docs/101_pca.rst @@ -0,0 +1,135 @@ +.. toctree:: + :hidden: + +101 - PCA usage +=============== + +The PCA model is used to find the linear combination of your original +features that best describes your data. In that sense, the goal of the model +is to provide a transformation that allows dimensionality reduction. +Following the schema described in the `prediction workflow `_, +document, this is the code snippet that shows the minimal workflow to +create a PCA model and produce a single projection. + + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + # check how to set your credentials in the Authentication section + api = BigML() + # step 1: creating a source from the data in your local "data/iris.csv" file + source = api.create_source("data/iris.csv") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(source) + # step 3: creating a dataset from the previously created `source` + dataset = api.create_dataset(source) + # waiting for the dataset to be finished + api.ok(dataset) + # step 5: creating a PCA model + pca = api.create_pca(dataset) + # waiting for the PCA to be finished + api.ok(pca) + # the input data to project + input_data = {"petal length": 4, "sepal length": 2, "petal width": 1, + "sepal witdh": 3} + # getting the transformed components, the projection + projection = api.create_projection(pca, input_data) + +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + +If you want to configure some of the attributes of your PCA, +like selecting a default numeric value, you can use the second argument +in the create call. + + +.. code-block:: python + + # step 5: creating a PCA and using mean as numeric value when missing + pca = api.create_pca(dataset, {"default_numeric_value": "mean"}) + # waiting for the PCA to be finished + api.ok(pca) + +You can check all the available creation arguments in the `API documentation +`_. + +If you want to add the generated principal components to the original +dataset (or a different dataset), you can do so by creating +a `batch_projection` resource. In the example, we'll be assuming you already +created a `PCA` following the steps 0 to 5 in the previous snippet and +that you want to score the same data you used in the PCA model. + +.. code-block:: python + + test_dataset = dataset + # step 10: creating a batch projection + batch_projection = api.create_batch_projection(pca, test_dataset) + # waiting for the batch_projection to be finished + api.ok(batch_projection) + # downloading the results to your computer + api.download_batch_projection(batch_projection, + filename='my_dir/my_projection.csv') + +The batch projection output (as well as any of the resources created) +can be configured using additional arguments in the corresponding create calls. +For instance, to include all the information in the original dataset in the +output you would change `step 10` to: + +.. code-block:: python + + batch_projection = api.create_batch_projection(pca, test_dataset, + {"all_fields": True}) + +Check the `API documentation `_ to learn about the +available configuration options for any BigML resource. + +You can also project your data locally using the `PCA` +class in the `pca` module. A simple example of that is: + +.. code-block:: python + + from bigml.pca import PCA + local_pca = PCA("pca/6878ec46983efc21b000001b") + # Getting the projection of some input data + local_pca.projection({"petal length": 4, "sepal length": 2, + "petal width": 1, "sepal witdh": 3}) + +Or you could store first your PCA information in a file and use that +file to create the local `PCA` object: + +.. code-block:: python + + # downloading the anomaly detector JSON to a local file + from bigml.api import BigML + api = BigML() + api.export("pca/6878ec46983efc21b000001b", + filename="my_pca.json") + # creating a PCA object using the information in the file + from bigml.pca import PCA + local_pca = PCA("my_pca.json") + # getting the projection for some input data + local_pca.projection({"petal length": 4, "sepal length": 2, + "petal width": 1, "sepal witdh": 3}) + +If you want to get the projection locally for all the rows in a CSV file +(first line should contain the field headers): + +.. code-block:: python + + import csv + from bigml.pca import PCA + local_pca = PCA("pca/68714c667811dd5057000ab5") + with open("test_data.csv") as test_handler: + reader = csv.DictReader(test_handler) + for input_data in reader: + # predicting for all rows + print local_pca.projection(input_data) + +Every modeling resource in BigML has its corresponding local class. Check +the `Local resources `_ section of the +documentation to learn more about them. diff --git a/docs/101_scripting.rst b/docs/101_scripting.rst index 7834e30e..aa0f05a2 100644 --- a/docs/101_scripting.rst +++ b/docs/101_scripting.rst @@ -1,13 +1,22 @@ -BigML Bindings: 101 - Creating and executing scripts -==================================================== +.. toctree:: + :hidden: -The bindings offer methods to create and execute `WhizzML -_` scripts in the platform. +101 - Creating and executing scripts +==================================== + +The bindings offer methods to create and execute `WhizzML `_ +scripts in the platform. WhizzML is the DSL that allows you to automate tasks in BigML. These code snippets show examples to illustrate how to create and execute simple scripts: +Basic script, no inputs +----------------------- + +This is the code to create a simple script that creates a source from an +existing CSV file that is available in a remote URL: + .. code-block:: python from bigml.api import BigML @@ -25,10 +34,24 @@ simple scripts: # step 3: retrieving the result (e.g. "source/5ce6a55dc984177cf7000891") result = execution['object']['execution']['result'] +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + In this example. the `url` used is always the same, so no inputs are provided to the script. This is not a realistic situation, because usually scripts need user-provided inputs. The next example shows how to -add two variables, whose values will be provided as inputs: +add two variables, whose values will be provided as inputs. + +Basic script with inputs +------------------------ + +Scripts usually need some inputs to work. When defining the script, you need +to provide booth the code and the description of the inputs that it will +accept. .. code-block:: python @@ -54,10 +77,53 @@ add two variables, whose values will be provided as inputs: # step 3: retrieving the result (e.g. 3) result = execution['object']['execution']['result'] +And of course, you will usually store your code, inputs and outputs in files. +The ``create_script`` method can receive as first argument the path to a file +that contains the source code and the rest of arguments can be retrieved from +a JSON file using the standard tools available in Python. The previous +example could also be created from a file that contains the WhizzML code +and a metadata file that contains the inputs and outputs description as a +JSON. + +.. code-block:: python + + import json + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a script from the code stored in `my_script.whizzml` + # and the inputs and outputs metadata stored in `metadata.json` + + with open('./metadata.json') as json_file: + metadata = json.load(json_file) + script = api.create_script("./my_script.whizzml", metadata) + # waiting for the script to be finished. + api.ok(script) + +Or load the files from a gist url: + +.. code-block:: python + + import json + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a script from a gist + + gist_url = "https://gist.github.com/mmerce/49e0a69cab117b6a11fb490140326020" + script = api.create_script(gist_url) + # waiting for the script to be finished. + api.ok(script) + +Basic Execution +--------------- + In a full-fledged script, you will also produce some outputs that can be used in other scripts. This is an example of a script creating a dataset from a source that was generated from a remote URL. Both the URL and the source -name are provided by the user: +name are provided by the user. Once the script has been created, we +run it by creating an execution from it and placing the particular input values +that we want to apply it to. .. code-block:: python @@ -76,6 +142,7 @@ name are provided by the user: "type": "dataset"}]}) # waiting for the script to be finished. api.ok(script) + # step 2: executing the script with some particular inputs execution = api.create_execution( \ script, @@ -85,3 +152,56 @@ name are provided by the user: api.ok(execution) # step 3: retrieving the result (e.g. "dataset/5cae5ad4b72c6609d9000356") result = execution['object']['execution']['result'] + + +You can also use the ``Execution`` class to easily access the results, +outputs and output resources of an existing execution. +Just instantiate the class with the execution resource or ID: + +.. code-block:: python + + from bigml.execution import Execution + execution = Execution("execution/5cae5ad4b72c6609d9000468") + print "The result of the execution is %s" % execution.result + print " and the output for variable 'my_variable': %s" % \ + execution.outputs["my_variable"] + +Local and remote scripting +-------------------------- + +Any operation in BigML can be scripted by using the bindings locally +to call the API. However, the highest +efficiency, scalability and reproducibility will come only by using +WhizzML scripts in the platform to handle the Machine Learning workflow that +you need. Thus, in most situations, the bindings are used merely to +upload the data to the platform and create an execution that uses that data to +reproduce the same operations. Let's say that you have a WhizzML script that +generates a batch prediction based on an existing model. The only input +for the script will be the source ID that will be used to predict, and the +rest of steps will be handled by the WhizzML script. Therefore, in order to +use that on new data you'll need to upload that data to the platform and use +the resulting ID as input. + + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a script that uploads local data to create a `source` + source = api.create_source("my_local_file") + # waiting for the source to be finished. + api.ok(source) + + # step 2: executing the script to do a batch prediction with the new + # source as input + script = "script/5cae5ad4b72c6609d9000235" + execution = api.create_execution( \ + script, + {"inputs": [["source", source["resource"]]]}) + # waiting for the workflow to be finished + api.ok(execution) + # step 3: retrieving the result (e.g. "dataset/5cae5ad4b72c6609d9000356") + result = execution['object']['execution']['result'] + # step 4: maybe storing the result as a CSV + api.download_dataset(result, "my_predictions.csv") diff --git a/docs/101_topic_model.rst b/docs/101_topic_model.rst index 044e88be..065dcd2e 100644 --- a/docs/101_topic_model.rst +++ b/docs/101_topic_model.rst @@ -1,5 +1,8 @@ -BigML Bindings: 101 - Using a Topic Model -========================================= +.. toctree:: + :hidden: + +101 - Topic Model usage +======================= Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to @@ -27,6 +30,13 @@ create a topic model and produce a single topic distribution. # creating a single topic distribution topic_distribution = api.create_topic_distribution(topic_model, input_data) +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + Remember that your dataset needs to have at least a text field to be able to create a topic model. If you want to create topic distributions for many new inputs, you can do so by @@ -87,7 +97,7 @@ file to create the local `TopicModel` object: from bigml.api import BigML api = BigML() api.export("topicmodel/5968ec46983efc21b000001b", - "filename": "my_topic_model.json") + filename="my_topic_model.json") # creating the topic model from the file from bigml.topicmodel import TopicModel local_topic_model = TopicModel("my_topic_model.json") diff --git a/docs/101_ts.rst b/docs/101_ts.rst index 4a0c1d5d..ff5388b0 100644 --- a/docs/101_ts.rst +++ b/docs/101_ts.rst @@ -1,5 +1,8 @@ -BigML Bindings: 101 - Using a Time Series -========================================= +.. toctree:: + :hidden: + +101 - Time Series usage +======================= Following the schema described in the `prediction workflow `_, document, this is the code snippet that shows the minimal workflow to @@ -26,6 +29,13 @@ create a time series and produce a forecast. input_data = {"000005": {"horizon": 10}} # 10 points forecast for field ID 000005 forecast = api.create_forecast(time_series, {"000005": {"horizon": 10}}) +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + To learn more about the arguments that can be set in the `forecast` and `timeseries` creation calls and the response properties, please have a look at the `API documentation `_ @@ -50,7 +60,7 @@ file to create the local `TimeSeries` object: from bigml.api import BigML api = BigML() api.export("timeseries/5968ec46983efc21b000001b", - "filename": "my_time_series.json") + filename="my_time_series.json") # creating the time series from the file from bigml.timeseries import TimeSeries local_time_series = TimeSeries("my_timeseries.json") diff --git a/docs/api_sketch.rst b/docs/api_sketch.rst index 130c45b8..c5876449 100644 --- a/docs/api_sketch.rst +++ b/docs/api_sketch.rst @@ -1,3 +1,6 @@ +.. toctree:: + :hidden: + BigML Bindings: Modeling and prediction process =============================================== diff --git a/docs/conf.py b/docs/conf.py index 873256aa..ac951f6b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -30,7 +30,9 @@ # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = [] +extensions = [ + 'sphinx_rtd_theme' +] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -46,7 +48,7 @@ # General information about the project. project = u'BigML' -copyright = u'2011 - 2014, The BigML Team' +copyright = u'2011 - 2024, The BigML Team' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -56,7 +58,7 @@ # Read the version from bigml.__version__ without importing the package # (and thus attempting to import packages it depends on that may not be # installed yet). -init_py_path = os.path.join(project_path, 'bigml', '__init__.py') +init_py_path = os.path.join(project_path, 'bigml', 'version.py') version = re.search("__version__ = '([^']+)'", open(init_py_path).read()).group(1) # The full version, including alpha/beta/rc tags. @@ -101,7 +103,8 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'default' +# a list of builtin themes. +html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the diff --git a/docs/creating_resources.rst b/docs/creating_resources.rst new file mode 100644 index 00000000..2bd85d80 --- /dev/null +++ b/docs/creating_resources.rst @@ -0,0 +1,1552 @@ +.. toctree:: + :hidden: + +Creating Resources +================== + +Newly-created resources are returned in a dictionary with the following +keys: + +- **code**: If the request is successful you will get a + ``bigml.api.HTTP_CREATED`` (201) status code. In asynchronous file uploading + ``api.create_source`` calls, it will contain ``bigml.api.HTTP_ACCEPTED`` + (202) status code. Otherwise, it will be + one of the standard HTTP error codes `detailed in the + documentation `_. +- **resource**: The identifier of the new resource. +- **location**: The location of the new resource. +- **object**: The resource itself, as computed by BigML. +- **error**: If an error occurs and the resource cannot be created, it + will contain an additional code and a description of the error. In + this case, **location**, and **resource** will be ``None``. + +Resource creation is an asynchronous process, so the API will return a +response probably before the resource is totally processed and you'll need to +repeatedly call the API to see the evolution of the resource, reflected +in its status (see the `Statuses <#statuses>`_ section). +The bindings provide methods to help you do that. Please check the `waiting +for resources <#waiting_for_resources>`_ section to learn more about them. + +Statuses +~~~~~~~~ + +Resource creation is almost always asynchronous (with few exceptions, +like **projects**, **predictions**, and similar prediction-like results for +Unsupervised Models like **anomaly scores**, **centroids**, etc.) +Therefore, when you create a new source, a new dataset or a new model, even +if you receive an immediate response from the BigML servers, the full creation +of the resource can take from a few seconds to a few days, depending on +the size of the resource and BigML's load. A resource is not fully +created until its status is ``bigml.api.FINISHED``, or ``bigml.api.FAULTY`` if +some error occurs (like the one you would get when trying to upload an empty +file, a .ppt or a .doc). See the `documentation on status +codes `_ for the complete listing of +potential states and their semantics. + +Depending on your application +you might need to import the following constants: + +.. code-block:: python + + from bigml.api import WAITING + from bigml.api import QUEUED + from bigml.api import STARTED + from bigml.api import IN_PROGRESS + from bigml.api import SUMMARIZED + from bigml.api import FINISHED + from bigml.api import UPLOADING + from bigml.api import FAULTY + from bigml.api import UNKNOWN + from bigml.api import RUNNABLE + +Usually, you will simply need to wait until the resource is +in the ``bigml.api.FINISHED`` state for further processing. If that's the case, +the easiest way is calling the ``api.ok`` method and passing as first argument +the object that contains your resource: + +.. code-block:: python + + from bigml.api import BigML + api = BigML() # creates a connection to BigML's API + source = api.create_source('my_file.csv') # creates a source object + api.ok(source) # checks that the source is finished and updates ``source`` + +In this code, ``api.create_source`` will probably return a non-finished +``source`` object. Then, ``api.ok`` will query its status and update the +contents of the ``source`` variable with the retrieved information until it +reaches a ``bigml.api.FINISHED`` or ``bigml.api.FAULTY`` status. + + +Waiting for Resources +--------------------- + +As explained in the ``Create Resources`` section, the time needed to create +a completely finished resource can vary depending on many factors: the size +of the data to be used, the type of fields and the platform load, for +instance. In BigML, the API will answer to any creation request shortly +after receiving the creation call that starts the process. +Resources in BigML are any-time, meaning that the result contains partial but +correct information at any point of its evolution, so getting the information +of a resource which is still in progress can be useful. However, usually +you'll want to wait till the process ends to retrieve and use the resource. +The ``api.ok`` method is the mechanism provided for that, as: + +- It waits efficiently between API calls. The sleep time is modified to + be adapted to the resoruce process as given in its status. +- It adapts the parameters of the API call to minimize the amount of + information downloaded in each iteration while waiting for completion. +- It modifies the contents of the variable passed as argument to store there + the value of the resource returned by the API when it reaches the + finished or faulty state. +- It allows error handling and retries. + +Most of the time, no errors happen and a correctly finished resource is +generated. In this case, and following the example in the previous section, +the ``api.ok(source)`` method would return ``True`` and the variable +``source`` contents would be like: + +.. code-block:: python + + {"code": 200, + "resource": "source/5e4ee08e440ca1324410ccbd", + "location": "https://bigml.io/andromeda/source/5e4ee08e440ca1324410ccbd", + "error": None, + "object": {"code": 200, "fields": {...}, + ... + "status": {"code": 5, + "elapsed": 854, + "message": "The source has been created", + "progress": 1}} + } + +Where the ``object`` attribute of the dictionary would contain the response +of the last ``get`` call to the API. + +Nonetheless, two kinds of problem can arise when using ``api.ok``, +and both will cause the method to return ``False``. Firstly, +the HTTP connection that it needs to reach the API might fail. Than will +prevent the resource information retrieval and will be reflected in the +``code`` and ``error`` first-level attributes of the ``source`` new contents. + +.. code-block:: python + + {"code": 500, + "resource": "source/5e4ee08e440ca1324410ccbd", + "location": "https://bigml.io/andromeda/source/5e4ee08e440ca1324410ccbd", + "error": {"status": + {"code": 500, + "message": "The resource couldn't be retrieved", + "type": "transient"}}, + "object": {"code": 201, "fields": {...}, + ... + "status": {"code": 1, + "elapsed": 15, + "message": "The request has been queued and will be processed soon", + "progress": 0}} + } + +and as the call could not reach the API, the ``object`` attribute will not +be modified. + +In this case, the cause was a transient error, and we can decide that transient +error calls should be retried a certain amount of times. Just +set an ``error_retries`` argument: e.g. ``api.ok(source, error_retries=10)``. + +The second kind of error appears when the API can be correctly reached and +it returns a faulty resource. There's also a variety of reasons for a resource +to end in a ``bigml.api.FAULTY`` state, but an example would be trying to +create a source by uploading an empty file, or some kind of non-supported +file, like an .ipnb file. The API will accept the create task, and add the +new resource ID. Afterwards, it will realize that the uploaded contents are not +correct, so the ``api.ok`` call with get a resource in a faulty status. Let's +see what happens when trying to upload a zip file that does not contain images +or a CSV-like file. + +.. code-block:: python + + {"code": 200, + "resource": "source/5e4ee08e440ca1324410ccbd", + "location": "https://bigml.io/andromeda/source/5e4ee08e440ca1324410ccbd", + "error": None, + "object": {"code": 500, "fields": {...}, + ... + "status": {"code": -1, + "elapsed": 225, + "error": -2020, + "message": "Spreadsheet not parseable (please try to export to CSV): Encoding: application/zip", + "progress": 0}} + } + +In this case, according to the outer ``code`` and ``error`` +attributes (associated to HTTP failures) everything went smoothly, which is +correct because the ``api.ok`` method was able to connect to the API. +However, the ``object`` attribute (that contains the API response) +will show in the inner ``code`` attribute that describes the error and the +``status`` information will also contain a message describing the cause +of that error. As this particular error is not transient, no retrying will +be done even if the ``error_retries`` argument is set. + +Based on what we've seen, a safe way to check if we have been able to create +completely a resource in BigML would be checking the return value of the +``api.ok`` method. + +.. code-block:: python + + from bigml.api import BigML + api = BigML() + source = api.create_source('my_file.csv') # creates a source object + if api.ok(source): + # code that uses the finished source contents + show_fields(source) + else: + # code that handles the error + handle_error(source) + +An alternative that can also be used to check for errors is using the +``raise_on_error`` argument of the ``api.ok`` method, that will cause an +error to be raised in both the HTTP problem or faulty resource scenarios. + +.. code-block:: python + + from bigml.api import BigML + from bigml.exceptions import FaultyResourceError + api = BigML() + source = api.create_source('my_file.csv') # creates a source object + try: + api.ok(source) + except FaultyResourceError: + # code that handles the faulty resource error + handle_faulty_error(source) + except Exception: + # code that handles the HTTP connection errors + handle_http_error(source) + +The ``api.ok`` method is repeatedly calling the API but it sleeps for some +time between calls. The sleeping time is set by using an exponential function +that generates a random number in a range. The upper limit of that range is +increasing with the number of retries. When the progress of the resource +reaches 80%, the waiting times descend by applying a progress dumping. +The parameters like the initial +waiting time, the number of retries or the estimate of the maximum elapsed +time can be provided to fit every particular case. + + +.. code-block:: python + + dataset = api.get_dataset("anomaly/5e4ee08e440ca13244102dbd") + api.ok(dataset, wait_time=60, max_elapsed_estimate=300) + # if the first call response is not a finished resource, the + # method will sleep for 60 seconds and increase this sleep time + # boundary till the elapsed time goes over 5 minutes. When that + # happens and the resource is still not created, counters are + # initialized again and the sleep period will start from 60s + # repeating the increasing process. + +Sometimes, it can be useful to report the progress of the resource. To that +end, ``api.ok`` accepts a ``progress_cb`` callback function that will be called +every time that the status is checked internally. The progress will be a +decimal number in the [0, 1] range + +.. code-block:: python + + def progress_log(progress, resource): + """Logs the progress of a resource""" + resource_id = resource["resource"] + progress_percentage = int(progress * 100) + print(f"The progress of {resource_id} is {progress_percentage}%") + + dataset = api.get_dataset("anomaly/5e4ee08e440ca13244102dbd") + api.ok(dataset, progress_cb=progress_log) + + +As explained previously, the ``api.ok`` method updates the contents of the +variable that is given as first argument. If you prefer to wait +for the resource without side effects on that variable, you can +also use the ``check_resource`` function: + +.. code-block:: python + + check_resource(resource, api.get_source) + +that will constantly query the API until the resource gets to a FINISHED or +FAULTY state, or can also be used with ``wait_time`` (in seconds) +and ``retries`` +arguments to control the polling: + +.. code-block:: python + + check_resource(resource, api.get_source, wait_time=2, retries=20) + +The ``wait_time`` value is used as seed to a wait +interval that grows exponentially with the number of retries up to the given +``retries`` limit. + +However, in other scenarios you might need to control the complete +evolution of the resource, not only its final states. +There, you can query the status of any resource +with the ``status`` method, which simply returns its value and does not +update the contents of the associated variable: + +.. code-block:: python + + api.status(source) + api.status(dataset) + api.status(model) + api.status(prediction) + api.status(evaluation) + api.status(ensemble) + api.status(batch_prediction) + api.status(cluster) + api.status(centroid) + api.status(batch_centroid) + api.status(anomaly) + api.status(anomaly_score) + api.status(batch_anomaly_score) + api.status(sample) + api.status(correlation) + api.status(statistical_test) + api.status(logistic_regression) + api.status(association) + api.status(association_set) + api.status(topic_model) + api.status(topic_distribution) + api.status(batch_topic_distribution) + api.status(time_series) + api.status(forecast) + api.status(optiml) + api.status(fusion) + api.status(pca) + api.status(projection) + api.status(batch_projection) + api.status(linear_regression) + api.status(script) + api.status(execution) + api.status(library) + +Remember that, consequently, you will need to retrieve the resources +explicitly in your code to get the updated information. + + +Projects +~~~~~~~~ + +A special kind of resource is ``project``. Projects are repositories +for resources, intended to fulfill organizational purposes. Each project can +contain any other kind of resource, but the project that a certain resource +belongs to is determined by the one used in the ``source`` +they are generated from. Thus, when a source is created +and assigned a certain ``project_id``, the rest of resources generated from +this source will remain in this project. + +The REST calls to manage the ``project`` resemble the ones used to manage the +rest of resources. When you create a ``project``: + +.. code-block:: python + + from bigml.api import BigML + api = BigML() + + project = api.create_project({'name': 'my first project'}) + +the resulting resource is similar to the rest of resources, although shorter: + +.. code-block:: python + + {'code': 201, + 'resource': u'project/54a1bd0958a27e3c4c0002f0', + 'location': 'http://bigml.io/andromeda/project/54a1bd0958a27e3c4c0002f0', + 'object': {u'category': 0, + u'updated': u'2014-12-29T20:43:53.060045', + u'resource': u'project/54a1bd0958a27e3c4c0002f0', + u'name': u'my first project', + u'created': u'2014-12-29T20:43:53.060013', + u'tags': [], + u'private': True, + u'dev': None, + u'description': u''}, + 'error': None} + +and you can use its project id to get, update or delete it: + +.. code-block:: python + + project = api.get_project('project/54a1bd0958a27e3c4c0002f0') + api.update_project(project['resource'], + {'description': 'This is my first project'}) + + api.delete_project(project['resource']) + +**Important**: Deleting a non-empty project will also delete **all resources** +assigned to it, so please be extra-careful when using +the ``api.delete_project`` call. + +Creating External Connectors +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To create an external connector to an existing database you need to use the +``create_external_connector`` method. The only required parameter is the +dictionary that contains the information needed to connect to the particular +database/table. The attributes of the connection dictionary needed for the +method to work will depend on the type of database used. + +For instance, you can create a connection to an ``Elasticsearch`` database +hosted locally at port ``9200`` by calling: + +.. code-block:: python + + from bigml.api import BigML + api = BigML() + + external_connector = api.create_external_connector( \ + {"hosts": ["localhost:9200"]}, {"source": "elasticsearch"}) + +where the first argument contains the infromation about the host +and ``source`` contains the type of database to connec to (allowed types are: +``elasticsearch``, ``postgresql``, ``mysql``, ``sqlserver``). If no ``source`` +type is set, ``postgresql`` will be used as default value. + +You can add other properties to that second argument, like the name +to be used for this external +connector. All other arguments should be placed in the second parameter: + +.. code-block:: python + + from bigml.api import BigML + api = BigML() + + external_connector = api.create_external_connector( \ + {"hosts": ["localhost:9200"]}, + {"source": "elasticsearch", + "name": "My elasticsearch"}) + + +Creating Sources +~~~~~~~~~~~~~~~~ + +To create a source from a local data file, you can use the +``create_source`` method. The only required parameter is the path to the +data file (or file-like object). You can use a second optional parameter +to specify any of the +options for source creation described in the `BigML API +documentation `_. + +Here's a sample invocation: + +.. code-block:: python + + from bigml.api import BigML + api = BigML() + + source = api.create_source('./data/iris.csv', + {'name': 'my source', 'source_parser': {'missing_tokens': ['?']}}) + +or you may want to create a source from a file in a remote location: + +.. code-block:: python + + source = api.create_source('s3://bigml-public/csv/iris.csv', + {'name': 'my remote source', 'source_parser': {'missing_tokens': ['?']}}) + +or maybe reading the content from stdin: + +.. code-block:: python + + content = StringIO.StringIO(sys.stdin.read()) + source = api.create_source(content, + {'name': 'my stdin source', 'source_parser': {'missing_tokens': ['?']}}) + +or from an existing external connector: + +.. code-block:: python + + content = {"source": "postgresql", + "externalconnector_id": "5ea1d2f7c7736e160900001c", + "query": "select * from table_name"} + source = api.create_source(content, + {'name': 'my stdin source', 'source_parser': {'missing_tokens': ['?']}}) + +or using data stored in a local python variable. The following example +shows the two accepted formats: + +.. code-block:: python + + local = [['a', 'b', 'c'], [1, 2, 3], [4, 5, 6]] + local2 = [{'a': 1, 'b': 2, 'c': 3}, {'a': 4, 'b': 5, 'c': 6}] + source = api.create_source(local, {'name': 'inline source'}) + +As already mentioned, source creation is asynchronous. In both these examples, +the ``api.create_source`` call returns once the file is uploaded. +Then ``source`` will contain a resource whose status code will be either +``WAITING`` or ``QUEUED``. + +For local data files you can go one step further and use asynchronous +uploading: + +.. code-block:: python + + source = api.create_source('./data/iris.csv', + {'name': 'my source', 'source_parser': {'missing_tokens': ['?']}}, + async_load=True) + +In this case, the call fills `source` immediately with a primary resource like: + +.. code-block:: python + + {'code': 202, + 'resource': None, + 'location': None, + 'object': {'status': + {'progress': 0.99, + 'message': 'The upload is in progress', + 'code': 6}}, + 'error': None} + +where the ``source['object']`` status is set to ``UPLOADING`` and its +``progress`` is periodically updated with the current uploading +progress ranging from 0 to 1. When upload completes, this structure will be +replaced by the real resource info as computed by BigML. Therefore source's +status will eventually be (as it is in the synchronous upload case) +``WAITING`` or ``QUEUED``. + +You can retrieve the updated status at any time using the corresponding get +method. For example, to get the status of our source we would use: + +.. code-block:: python + + api.status(source) + +Creating Composite Sources (Images) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +BigML offers support to use images or collections of CSVs +in your Machine Learning models. +Uploading images to BigML is as easy as uploading any other file. Each +file will be ingested and a new source will be created from it. To build +Machine Learning models one typically needs lots of images and they are +usually uploaded in batches stored in +``.zip`` or ``.tar`` files. BigML is able to ingest such a file and creates a +``composite source`` from it +and for each file contained in the compressed file a +``component source`` will be created. Thus, a zip file containg two images +can be uploaded to BigML by using the ``create_source`` method: + + +.. code-block:: python + from bigml.api import BigML + api = BigML() + composite_source = api.create_source("images_zip.zip") + +and that operation will create three sources: one per image plus the composite +source that contains them. + +If you put together a bunch of image sources inside a composite, +that composite will also have format "image". If you create a dataset +from it, every row will correspond to one of the images in the composite, +and have a column representing the image data, and another its filename. +Also, BigML will extract around two hundred features per image by default, +representing its gradients histogram, and you can choose several others or +add labels to each image. Please, check the complete `API documentation about +composite sources `_ to +learn how to create them, update their contents while they are ``open`` +(editable) and ``close`` them so that you can create datasets and models +from them. Closing a source makes it immutable, but any source +can be cloned into a new source open to modification. + +.. code-block:: python + from bigml.api import BigML + api = BigML() + closed_source = "source/526fc344035d071ea3031d72" + open_source = api.clone_source(closed_source) + +Images are usually associated to other information, like labels or numeric +fields, which can be regarded as additional attributes related to that +image. The associated information can be described as annotations for +each of the images. These annotations can be +provided as a JSON file that contains the properties associated to +each image and the name of the image file, that is used as foreign key. +The meta information needed to create the structure of the composite source, +such as the fields to be associated and their types, +should also be included in the annotations file. +This is an example of the expected structure of the annotations file: + +.. code-block:: json + + {"description": "Fruit images to test colour distributions", + "images_file": "./fruits_hist.zip", + "new_fields": [{"name": "new_label", "optype": "categorical"}], + "source_id": null, + "annotations": [ + {"file": "f1/fruits1f.png", "new_label": "True"}, + {"file": "f1/fruits1.png", "new_label": "False"}, + {"file": "f2/fruits2e.png", "new_label": "False"}]} + +The ``images_file`` attribute should contain the path to zip-compressed +images file and the "annotations" attribute the corresponding +annotations. The ``new_fields`` attribute should be a list of the fields +used as annotations for the images. + +Also, if you prefer to keep your annotations in a separate file, you +can point to that file in the ``annotations`` attribute: + +.. code-block:: json + + {"description": "Fruit images to test colour distributions", + "images_file": "./fruits_hist.zip", + "new_fields": [{"name": "new_label", "optype": "categorical"}], + "source_id": null, + "annotations": "./annotations_detail.json"} + +The created source will contain the fields associated to the +uploaded images, plus an additional field named ``new_label`` with the +values defined in this file. + +If a source has already been created from this collection of images, +you can provide the ID of this source in the ``source_id`` attribute. +If the annotations file contains the source ID information, +the existing source will be updated to add the new annotations +(if still open for editing) or will be cloned (if the source is +closed for editing) and the new source will be updated. In both cases, +images won't be uploaded again. + + +Creating Datasets +~~~~~~~~~~~~~~~~~ + +Once you have created a source, you can create a dataset. The only +required argument to create a dataset is a source id. You can add all +the additional arguments accepted by BigML and documented in the +`Datasets section of the Developer's +documentation `_. + +For example, to create a dataset named "my dataset" with the first 1024 +bytes of a source, you can submit the following request: + +.. code-block:: python + + dataset = api.create_dataset(source, {"name": "my dataset", "size": 1024}) + +Upon success, the dataset creation job will be queued for execution, and +you can follow its evolution using ``api.status(dataset)``. + +As for the rest of resources, the create method will return an incomplete +object, that can be updated by issuing the corresponding +``api.get_dataset`` call until it reaches a ``FINISHED`` status. +Then you can export the dataset data to a CSV file using: + +.. code-block:: python + + api.download_dataset('dataset/526fc344035d071ea3031d75', + filename='my_dir/my_dataset.csv') + +You can also extract samples from an existing dataset and generate a new one +with them using the ``api.create_dataset`` method. The first argument should +be the origin dataset and the rest of arguments that set the range or the +sampling rate should be passed as a dictionary. For instance, to create a new +dataset extracting the 80% of instances from an existing one, you could use: + +.. code-block:: python + + dataset = api.create_dataset(origin_dataset, {"sample_rate": 0.8}) + +Similarly, if you want to split your source into training and test datasets, +you can set the `sample_rate` as before to create the training dataset and +use the `out_of_bag` option to assign the complementary subset of data to the +test dataset. If you set the `seed` argument to a value of your choice, you +will ensure a deterministic sampling, so that each time you execute this call +you will get the same datasets as a result and they will be complementary: + +.. code-block:: python + + origin_dataset = api.create_dataset(source) + train_dataset = api.create_dataset( + origin_dataset, {"name": "Dataset Name | Training", + "sample_rate": 0.8, "seed": "my seed"}) + test_dataset = api.create_dataset( + origin_dataset, {"name": "Dataset Name | Test", + "sample_rate": 0.8, "seed": "my seed", + "out_of_bag": True}) + +Sometimes, like for time series evaluations, it's important that the data +in your train and test datasets is ordered. In this case, the split +cannot be done at random. You will need to start from an ordered dataset and +decide the ranges devoted to training and testing using the ``range`` +attribute: + +.. code-block:: python + + origin_dataset = api.create_dataset(source) + train_dataset = api.create_dataset( + origin_dataset, {"name": "Dataset Name | Training", + "range": [1, 80]}) + test_dataset = api.create_dataset( + origin_dataset, {"name": "Dataset Name | Test", + "range": [81, 100]}) + + +It is also possible to generate a dataset from a list of datasets +(multidataset): + +.. code-block:: python + + dataset1 = api.create_dataset(source1) + dataset2 = api.create_dataset(source2) + multidataset = api.create_dataset([dataset1, dataset2]) + +Clusters can also be used to generate datasets containing the instances +grouped around each centroid. You will need the cluster id and the centroid id +to reference the dataset to be created. For instance, + +.. code-block:: python + + cluster = api.create_cluster(dataset) + cluster_dataset_1 = api.create_dataset(cluster, + args={'centroid': '000000'}) + +would generate a new dataset containing the subset of instances in the cluster +associated to the centroid id ``000000``. + +Existing datasets can also be cloned: + +.. code-block:: python + from bigml.api import BigML + api = BigML() + dataset = "dataset/526fc344035d071ea3031d76" + cloned_dataset = api.clone_dataset(dataset) + + +Creating Models +~~~~~~~~~~~~~~~ + +Once you have created a dataset you can create a model from it. If you don't +select one, the model will use the last field of the dataset as objective +field. The only required argument to create a model is a dataset id. +You can also +include in the request all the additional arguments accepted by BigML +and documented in the `Models section of the Developer's +documentation `_. + +For example, to create a model only including the first two fields and +the first 10 instances in the dataset, you can use the following +invocation: + +.. code-block:: python + + model = api.create_model(dataset, { + "name": "my model", "input_fields": ["000000", "000001"], "range": [1, 10]}) + +Again, the model is scheduled for creation, and you can retrieve its +status at any time by means of ``api.status(model)``. + +Models can also be created from lists of datasets. Just use the list of ids +as the first argument in the api call + +.. code-block:: python + + model = api.create_model([dataset1, dataset2], { + "name": "my model", "input_fields": ["000000", "000001"], "range": [1, 10]}) + +And they can also be generated as the result of a clustering procedure. When +a cluster is created, a model that predicts if a certain instance belongs to +a concrete centroid can be built by providing the cluster and centroid ids: + +.. code-block:: python + + model = api.create_model(cluster, { + "name": "model for centroid 000001", "centroid": "000001"}) + +if no centroid id is provided, the first one appearing in the cluster is used. + +Existing models can also be cloned: + +.. code-block:: python + from bigml.api import BigML + api = BigML() + model = "model/526fc344035d071ea3031d76" + cloned_model = api.clone_model(model) + + +Creating Clusters +~~~~~~~~~~~~~~~~~ + +If your dataset has no fields showing the objective information to +predict for the training data, you can still build a cluster +that will group similar data around +some automatically chosen points (centroids). Again, the only required +argument to create a cluster is the dataset id. You can also +include in the request all the additional arguments accepted by BigML +and documented in the `Clusters section of the Developer's +documentation `_. + +Let's create a cluster from a given dataset: + +.. code-block:: python + + cluster = api.create_cluster(dataset, {"name": "my cluster", + "k": 5}) + +that will create a cluster with 5 centroids. + +Existing clusters can also be cloned: + +.. code-block:: python + from bigml.api import BigML + api = BigML() + cluster = "cluster/526fc344035d071ea3031d76" + cloned_cluster = api.clone_cluster(cluster) + + +Creating Anomaly Detectors +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If your problem is finding the anomalous data in your dataset, you can +build an anomaly detector, that will use iforest to single out the +anomalous records. Again, the only required +argument to create an anomaly detector is the dataset id. You can also +include in the request all the additional arguments accepted by BigML +and documented in the `Anomaly detectors section of the Developer's +documentation `_. + +Let's create an anomaly detector from a given dataset: + +.. code-block:: python + + anomaly = api.create_anomaly(dataset, {"name": "my anomaly"}) + +that will create an anomaly resource with a `top_anomalies` block of the +most anomalous points. + +Existing anomaly detectors can also be cloned: + +.. code-block:: python + from bigml.api import BigML + api = BigML() + anomaly = "anomaly/526fc344035d071ea3031d76" + cloned_anomaly = api.clone_anomaly(anomaly) + + +Creating Associations +~~~~~~~~~~~~~~~~~~~~~ + +To find relations between the field values you can create an association +discovery resource. The only required argument to create an association +is a dataset id. +You can also +include in the request all the additional arguments accepted by BigML +and documented in the `Association section of the Developer's +documentation `_. + +For example, to create an association only including the first two fields and +the first 10 instances in the dataset, you can use the following +invocation: + +.. code-block:: python + + association = api.create_association(dataset, { \ + "name": "my association", "input_fields": ["000000", "000001"], \ + "range": [1, 10]}) + +Again, the association is scheduled for creation, and you can retrieve its +status at any time by means of ``api.status(association)``. + +Associations can also be created from lists of datasets. Just use the +list of ids as the first argument in the api call + +.. code-block:: python + + association = api.create_association([dataset1, dataset2], { \ + "name": "my association", "input_fields": ["000000", "000001"], \ + "range": [1, 10]}) + +Existing associations can also be cloned: + +.. code-block:: python + from bigml.api import BigML + api = BigML() + association = "association/526fc344035d071ea3031d76" + cloned_association = api.clone_association(association) + + +Creating Topic Models +~~~~~~~~~~~~~~~~~~~~~ + +To find which topics do your documents refer to you can create a topic model. +The only required argument to create a topic model +is a dataset id. +You can also +include in the request all the additional arguments accepted by BigML +and documented in the `Topic Model section of the Developer's +documentation `_. + +For example, to create a topic model including exactly 32 topics +you can use the following +invocation: + +.. code-block:: python + + topic_model = api.create_topic_model(dataset, { \ + "name": "my topics", "number_of_topics": 32}) + +Again, the topic model is scheduled for creation, and you can retrieve its +status at any time by means of ``api.status(topic_model)``. + +Topic models can also be created from lists of datasets. Just use the +list of ids as the first argument in the api call + +.. code-block:: python + + topic_model = api.create_topic_model([dataset1, dataset2], { \ + "name": "my topics", "number_of_topics": 32}) + +Existing topic models can also be cloned: + +.. code-block:: python + from bigml.api import BigML + api = BigML() + topic_model = "topicmodel/526fc344035d071ea3031d76" + cloned_topic_model = api.clone_topic_model(topic_model) + +Creating Time Series +~~~~~~~~~~~~~~~~~~~~ + +To forecast the behaviour of any numeric variable that depends on its +historical records you can use a time series. +The only required argument to create a time series +is a dataset id. +You can also +include in the request all the additional arguments accepted by BigML +and documented in the `Time Series section of the Developer's +documentation `_. + +For example, to create a time series including a forecast of 10 points +for the numeric values you can use the following +invocation: + +.. code-block:: python + + time_series = api.create_time_series(dataset, { \ + "name": "my time series", "horizon": 10}) + +Again, the time series is scheduled for creation, and you can retrieve its +status at any time by means of ``api.status(time_series)``. + +Time series also be created from lists of datasets. Just use the +list of ids as the first argument in the api call + +.. code-block:: python + + time_series = api.create_time_series([dataset1, dataset2], { \ + "name": "my time series", "horizon": 10}) + +Existing time series can also be cloned: + +.. code-block:: python + from bigml.api import BigML + api = BigML() + time_series = "timeseries/526fc344035d071ea3031d76" + cloned_time_series = api.clone_time_series(time_series) + + +Creating OptiML +~~~~~~~~~~~~~~~ + +To create an OptiML, the only required argument is a dataset id. +You can also +include in the request all the additional arguments accepted by BigML +and documented in the `OptiML section of the Developer's +documentation `_. + +For example, to create an OptiML which optimizes the accuracy of the model you +can use the following method + +.. code-block:: python + + optiml = api.create_optiml(dataset, { \ + "name": "my optiml", "metric": "accuracy"}) + +The OptiML is then scheduled for creation, and you can retrieve its +status at any time by means of ``api.status(optiml)``. + + +Creating Fusions +~~~~~~~~~~~~~~~~ + +To create a Fusion, the only required argument is a list of models. +You can also +include in the request all the additional arguments accepted by BigML +and documented in the `Fusion section of the Developer's +documentation `_. + +For example, to create a Fusion you can use this connection method: + +.. code-block:: python + + fusion = api.create_fusion(["model/5af06df94e17277501000010", + "model/5af06df84e17277502000019", + "deepnet/5af06df84e17277502000016", + "ensemble/5af06df74e1727750100000d"], + {"name": "my fusion"}) + +The Fusion is then scheduled for creation, and you can retrieve its +status at any time by means of ``api.status(fusion)``. + +Fusions can also be created by assigning some weights to each model in the +list. In this case, the argument for the create call will be a list of +dictionaries that contain the ``id`` and ``weight`` keys: + +.. code-block:: python + + fusion = api.create_fusion([{"id": "model/5af06df94e17277501000010", + "weight": 10}, + {"id": "model/5af06df84e17277502000019", + "weight": 20}, + {"id": "deepnet/5af06df84e17277502000016", + "weight": 5}], + {"name": "my weighted fusion"}) + + +Creating Predictions +~~~~~~~~~~~~~~~~~~~~ + +You can now use the model resource identifier together with some input +parameters to ask for predictions, using the ``create_prediction`` +method. You can also give the prediction a name: + +.. code-block:: python + + prediction = api.create_prediction(model, + {"sepal length": 5, + "sepal width": 2.5}, + {"name": "my prediction"}) + +To see the prediction you can use ``pprint``: + +.. code-block:: python + + api.pprint(prediction) + +Predictions can be created using any supervised model (model, ensemble, +logistic regression, linear regression, deepnet and fusion) as first argument. + +Creating Centroids +~~~~~~~~~~~~~~~~~~ + +To obtain the centroid associated to new input data, you +can now use the ``create_centroid`` method. Give the method a cluster +identifier and the input data to obtain the centroid. +You can also give the centroid predicition a name: + +.. code-block:: python + + centroid = api.create_centroid(cluster, + {"pregnancies": 0, + "plasma glucose": 118, + "blood pressure": 84, + "triceps skin thickness": 47, + "insulin": 230, + "bmi": 45.8, + "diabetes pedigree": 0.551, + "age": 31, + "diabetes": "true"}, + {"name": "my centroid"}) + +Creating Anomaly Scores +~~~~~~~~~~~~~~~~~~~~~~~ + +To obtain the anomaly score associated to new input data, you +can now use the ``create_anomaly_score`` method. Give the method an anomaly +detector identifier and the input data to obtain the score: + +.. code-block:: python + + anomaly_score = api.create_anomaly_score(anomaly, {"src_bytes": 350}, + args={"name": "my score"}) + +Creating Association Sets +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Using the association resource, you can obtain the consequent items associated +by its rules to your input data. These association sets can be obtained calling +the ``create_association_set`` method. The first argument is the association +ID or object and the next one is the input data. + +.. code-block:: python + + association_set = api.create_association_set( \ + association, {"genres": "Action$Adventure"}, \ + args={"name": "my association set"}) + + +Creating Topic Distributions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To obtain the topic distributions associated to new input data, you +can now use the ``create_topic_distribution`` method. Give +the method a topic model identifier and the input data to obtain the score: + +.. code-block:: python + + topic_distribution = api.create_topic_distribution( \ + topic_model, + {"Message": "The bubble exploded in 2007."}, + args={"name": "my topic distribution"}) + + +Creating Forecasts +~~~~~~~~~~~~~~~~~~ + +To obtain the forecast associated to a numeric variable, you +can now use the ``create_forecast`` method. Give +the method a time series identifier and the input data to obtain the forecast: + +.. code-block:: python + + forecast = api.create_forecast( \ + time_series, + {"Final": {"horizon": 10}}) + + +Creating Projections +~~~~~~~~~~~~~~~~~~~~ + +You can now use the PCA resource identifier together with some input +parameters to ask for the corresponding projections, +using the ``create_projection`` +method. You can also give the projection a name: + +.. code-block:: python + + projection = api.create_projection(pca, + {"sepal length": 5, + "sepal width": 2.5}, + {"name": "my projection"}) + + + +Creating Evaluations +~~~~~~~~~~~~~~~~~~~~ + +Once you have created a supervised learning model, +you can measure its perfomance by running a +dataset of test data through it and comparing its predictions to the objective +field real values. Thus, the required arguments to create an evaluation are +model id and a dataset id. You can also +include in the request all the additional arguments accepted by BigML +and documented in the `Evaluations section of the Developer's +documentation `_. + +For instance, to evaluate a previously created model using an existing dataset +you can use the following call: + +.. code-block:: python + + evaluation = api.create_evaluation(model, dataset, { + "name": "my model"}) + +Again, the evaluation is scheduled for creation and ``api.status(evaluation)`` +will show its state. + +Evaluations can also check the ensembles' performance. To evaluate an ensemble +you can do exactly what we just did for the model case, using the ensemble +object instead of the model as first argument: + +.. code-block:: python + + evaluation = api.create_evaluation(ensemble, dataset) + +Evaluations can be created using any supervised model (including time series) +as first argument. + +Creating ensembles +~~~~~~~~~~~~~~~~~~ + +To improve the performance of your predictions, you can create an ensemble +of models and combine their individual predictions. +The only required argument to create an ensemble is the dataset id: + +.. code-block:: python + + ensemble = api.create_ensemble('dataset/5143a51a37203f2cf7000972') + +BigML offers three kinds of ensembles. Two of them are known as ``Decision +Forests`` because they are built as collections of ``Decision trees`` +whose predictions +are aggregated using different combiners (``plurality``, +``confidence weighted``, ``probability weighted``) or setting a ``threshold`` +to issue the ensemble's +prediction. All ``Decision Forests`` use bagging to sample the +data used to build the underlying models. + +As an example of how to create a ``Decision Forest`` +with `20` models, you only need to provide the dataset ID that you want to +build the ensemble from and the number of models: + +.. code-block:: python + + args = {'number_of_models': 20} + ensemble = api.create_ensemble('dataset/5143a51a37203f2cf7000972', args) + +If no ``number_of_models`` is provided, the ensemble will contain 10 models. + +``Random Decision Forests`` fall +also into the ``Decision Forest`` category, +but they only use a subset of the fields chosen +at random at each split. To create this kind of ensemble, just use the +``randomize`` option: + +.. code-block:: python + + args = {'number_of_models': 20, 'randomize': True} + ensemble = api.create_ensemble('dataset/5143a51a37203f2cf7000972', args) + +The third kind of ensemble is ``Boosted Trees``. This type of ensemble uses +quite a different algorithm. The trees used in the ensemble don't have as +objective field the one you want to predict, and they don't aggregate the +underlying models' votes. Instead, the goal is adjusting the coefficients +of a function that will be used to predict. The +models' objective is, therefore, the gradient that minimizes the error +of the predicting function (when comparing its output +with the real values). The process starts with +some initial values and computes these gradients. Next step uses the previous +fields plus the last computed gradient field as +the new initial state for the next iteration. +Finally, it stops when the error is smaller than a certain threshold +or iterations reach a user-defined limit. +In classification problems, every category in the ensemble's objective field +would be associated with a subset of the ``Boosted Trees``. The objective of +each subset of trees +is adjustig the function to the probability of belonging +to this particular category. + +In order to build +an ensemble of ``Boosted Trees`` you need to provide the ``boosting`` +attributes. You can learn about the existing attributes in the `ensembles' +section of the API documentation `_, +but a typical attribute to be set would +be the maximum number of iterations: + +.. code-block:: python + + args = {'boosting': {'iterations': 20}} + ensemble = api.create_ensemble('dataset/5143a51a37203f2cf7000972', args) + +Existing ensembles can also be cloned: + +.. code-block:: python + from bigml.api import BigML + api = BigML() + ensembles = "ensembles/526fc344035d071ea3031d76" + cloned_ensembles = api.clone_ensembles(ensembles) + + +Creating Linear Regressions +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For regression problems, you can choose also linear regressions to model +your data. Linear regressions expect the predicted value for the objective +field to be computable as a linear combination of the predictors. + +As the rest of models, linear regressions can be created from a dataset by +calling the corresponding create method: + +.. code-block:: python + + linear_regression = api.create_linear_regression( \ + 'dataset/5143a51a37203f2cf7000972', + {"name": "my linear regression", + "objective_field": "my_objective_field"}) + +In this example, we created a linear regression named +``my linear regression`` and set the objective field to be +``my_objective_field``. Other arguments, like ``bias``, +can also be specified as attributes in arguments dictionary at +creation time. +Particularly for categorical fields, there are three different available +`field_codings`` options (``contrast``, ``other`` or the ``dummy`` +default coding). For a more detailed description of the +``field_codings`` attribute and its syntax, please see the `Developers API +Documentation +`_. + +Existing linear regressions can also be cloned: + +.. code-block:: python + from bigml.api import BigML + api = BigML() + linear_regression = "linearregression/526fc344035d071ea3031d76" + cloned_linear_regression = api.clone_linear_regression(linear_regression) + + +Creating logistic regressions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For classification problems, you can choose also logistic regressions to model +your data. Logistic regressions compute a probability associated to each class +in the objective field. The probability is obtained using a logistic +function, whose argument is a linear combination of the field values. + +As the rest of models, logistic regressions can be created from a dataset by +calling the corresponding create method: + +.. code-block:: python + + logistic_regression = api.create_logistic_regression( \ + 'dataset/5143a51a37203f2cf7000972', + {"name": "my logistic regression", + "objective_field": "my_objective_field"}) + +In this example, we created a logistic regression named +``my logistic regression`` and set the objective field to be +``my_objective_field``. Other arguments, like ``bias``, ``missing_numerics`` +and ``c`` can also be specified as attributes in arguments dictionary at +creation time. +Particularly for categorical fields, there are four different available +`field_codings`` options (``dummy``, ``contrast``, ``other`` or the ``one-hot`` +default coding). For a more detailed description of the +``field_codings`` attribute and its syntax, please see the `Developers API +Documentation +`_. + +Existing logistic regressions can also be cloned: + +.. code-block:: python + from bigml.api import BigML + api = BigML() + logistic_regression = "logisticregression/526fc344035d071ea3031d76" + cloned_logistic_regression = api.clone_logistic_regression( + logistic_regression) + +Creating Deepnets +~~~~~~~~~~~~~~~~~ + +Deepnets can also solve classification and regression problems. +Deepnets are an optimized version of Deep Neural Networks, +a class of machine-learned models inspired by the neural +circuitry of the human brain. In these classifiers, the input features +are fed to a group of "nodes" called a "layer". +Each node is essentially a function on the input that +transforms the input features into another value or collection of values. +Then the entire layer transforms an input vector into a new "intermediate" +feature vector. This new vector is fed as input to another layer of nodes. +This process continues layer by layer, until we reach the final "output" +layer of nodes, where the output is the network’s prediction: an array +of per-class probabilities for classification problems or a single, +real value for regression problems. + +Deepnets predictions compute a probability associated to each class +in the objective field for classification problems. +As the rest of models, deepnets can be created from a dataset by +calling the corresponding create method: + +.. code-block:: python + + deepnet = api.create_deepnet( \ + 'dataset/5143a51a37203f2cf7000972', + {"name": "my deepnet", + "objective_field": "my_objective_field"}) + +In this example, we created a deepnet named +``my deepnet`` and set the objective field to be +``my_objective_field``. Other arguments, like ``number_of_hidden_layers``, +``learning_rate`` +and ``missing_numerics`` can also be specified as attributes +in an arguments dictionary at +creation time. For a more detailed description of the +available attributes and its syntax, please see the `Developers API +Documentation +`_. + +Existing deepnets can also be cloned: + +.. code-block:: python + from bigml.api import BigML + api = BigML() + deepnet = "deepnet/526fc344035d071ea3031d76" + cloned_deepnet = api.clone_deepnet(deepnet) + + +Creating PCAs +~~~~~~~~~~~~~ + +In order to reduce the number of features used in the modeling steps, +you can use a PCA (Principal Component Analysis) to find out the best +combination of features that describe the variance of your data. +As the rest of models, PCAs can be created from a dataset by +calling the corresponding create method: + +.. code-block:: python + + pca = api.create_pca( \ + 'dataset/5143a51a37203f2cf7000972', + {"name": "my PCA"}) + +In this example, we created a PCA named +``my PCA``. Other arguments, like ``standardized`` +can also be specified as attributes in arguments dictionary at +creation time. +Please see the `Developers API +Documentation +`_. + +Creating Batch Predictions +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We have shown how to create predictions individually, but when the amount +of predictions to make increases, this procedure is far from optimal. In this +case, the more efficient way of predicting remotely is to create a dataset +containing the input data you want your model to predict from and to give its +id and the one of the model to the ``create_batch_prediction`` api call: + +.. code-block:: python + + batch_prediction = api.create_batch_prediction(model, dataset, { + "name": "my batch prediction", "all_fields": True, + "header": True, + "confidence": True}) + +In this example, setting ``all_fields`` to true causes the input +data to be included in the prediction output, ``header`` controls whether a +headers line is included in the file or not and ``confidence`` set to true +causes the confidence of the prediction to be appended. If none of these +arguments is given, the resulting file will contain the name of the +objective field as a header row followed by the predictions. + +As for the rest of resources, the create method will return an incomplete +object, that can be updated by issuing the corresponding +``api.get_batch_prediction`` call until it reaches a ``FINISHED`` status. +Then you can download the created predictions file using: + +.. code-block:: python + + api.download_batch_prediction('batchprediction/526fc344035d071ea3031d70', + filename='my_dir/my_predictions.csv') + +that will copy the output predictions to the local file given in +``filename``. If no ``filename`` is provided, the method returns a file-like +object that can be read as a stream: + +.. code-block:: python + + CHUNK_SIZE = 1024 + response = api.download_batch_prediction( + 'batchprediction/526fc344035d071ea3031d70') + chunk = response.read(CHUNK_SIZE) + if chunk: + print chunk + +The output of a batch prediction can also be transformed to a source object +using the ``source_from_batch_prediction`` method in the api: + +.. code-block:: python + + api.source_from_batch_prediction( + 'batchprediction/526fc344035d071ea3031d70', + args={'name': 'my_batch_prediction_source'}) + +This code will create a new source object, that can be used again as starting +point to generate datasets. + + +Creating Batch Centroids +~~~~~~~~~~~~~~~~~~~~~~~~ + +As described in the previous section, it is also possible to make centroids' +predictions in batch. First you create a dataset +containing the input data you want your cluster to relate to a centroid. +The ``create_batch_centroid`` call will need the id of the input +data dataset and the +cluster used to assign a centroid to each instance: + +.. code-block:: python + + batch_centroid = api.create_batch_centroid(cluster, dataset, { + "name": "my batch centroid", "all_fields": True, + "header": True}) + +Creating Batch Anomaly Scores +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Input data can also be assigned an anomaly score in batch. You train an +anomaly detector with your training data and then build a dataset from your +input data. The ``create_batch_anomaly_score`` call will need the id +of the dataset and of the +anomaly detector to assign an anomaly score to each input data instance: + +.. code-block:: python + + batch_anomaly_score = api.create_batch_anomaly_score(anomaly, dataset, { + "name": "my batch anomaly score", "all_fields": True, + "header": True}) + +Creating Batch Topic Distributions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Input data can also be assigned a topic distribution in batch. You train a +topic model with your training data and then build a dataset from your +input data. The ``create_batch_topic_distribution`` call will need the id +of the dataset and of the +topic model to assign a topic distribution to each input data instance: + +.. code-block:: python + + batch_topic_distribution = api.create_batch_topic_distribution( \ + topic_model, dataset, { + "name": "my batch topic distribution", "all_fields": True, + "header": True}) + +Creating Batch Projections +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Input data can also be assigned a projection in batch. You train a +PCA with your training data and then build a dataset from your +input data. The ``create_batch_projection`` call will need the id +of the input data dataset and of the +PCA to compute the projection that corresponds to each input data instance: + +.. code-block:: python + + batch_projection = api.create_batch_projection( \ + pca, dataset, { + "name": "my batch pca", "all_fields": True, + "header": True}) + +Cloning Resources +~~~~~~~~~~~~~~~~~ + +In the previous sections, you've been able to see that sources, +datasets and models can be cloned using the corresponding +``clone_[resource_type]`` method. + +.. code-block:: python + from bigml.api import BigML + api = BigML() + logistic_regression = "logisticregression/526fc344035d071ea3031d76" + cloned_logistic_regression = api.clone_logistic_regression( + logistic_regression) + +Usually, cloning is applied when someone +shares a resource with us and we need to use it in our account. In that case +the link to the shared resource contains a shared hash, which is at the end +of the URL. That shared ID can be used as input to clone it. + +.. code-block:: python + from bigml.api import BigML + api = BigML() + shared_deepnets = "shared/deepnet/s2KQBFQHMeIrbaTF5uncNsM8HKB" + cloned_deepnet = api.clone_deepnet(shared_deepnet) + +Sharing and cloning can be especially useful to useres that belong to +one ``Organization``. For privacy reasons, the projects created inside the +``Organization`` are not visible from the private user account environment and +vice versa. If those users create a resource in their private account and then +want to share it in a project that belongs to the organization, they can +create the corresponding secret link and use it to clone it in the +organization's project. That will, of course, need the connection to be +pointing to that specific project. + +.. code-block:: python + from bigml.api import BigML + org_project = "project/526fc344035d071ea3031436" + # Creating a connection to the organization's project + api = BigML(project=org_project) + shared_model = "shared/model/s2KQBFQHMeIrbaTF5uncNsM8HKB" + cloned_model = api.clone_model(model) diff --git a/docs/deleting_resources.rst b/docs/deleting_resources.rst new file mode 100644 index 00000000..56136fd9 --- /dev/null +++ b/docs/deleting_resources.rst @@ -0,0 +1,59 @@ +.. toctree:: + :hidden: + +Deleting Resources +================== + +Resources can be deleted individually using the corresponding method for +each type of resource. + +.. code-block:: python + + api.delete_source(source) + api.delete_dataset(dataset) + api.delete_model(model) + api.delete_prediction(prediction) + api.delete_evaluation(evaluation) + api.delete_ensemble(ensemble) + api.delete_batch_prediction(batch_prediction) + api.delete_cluster(cluster) + api.delete_centroid(centroid) + api.delete_batch_centroid(batch_centroid) + api.delete_anomaly(anomaly) + api.delete_anomaly_score(anomaly_score) + api.delete_batch_anomaly_score(batch_anomaly_score) + api.delete_sample(sample) + api.delete_correlation(correlation) + api.delete_statistical_test(statistical_test) + api.delete_logistic_regression(logistic_regression) + api.delete_linear_regression(linear_regression) + api.delete_association(association) + api.delete_association_set(association_set) + api.delete_topic_model(topic_model) + api.delete_topic_distribution(topic_distribution) + api.delete_batch_topic_distribution(batch_topic_distribution) + api.delete_time_series(time_series) + api.delete_forecast(forecast) + api.delete_fusion(fusion) + api.delete_pca(pca) + api.delete_deepnet(deepnet) + api.delete_projection(projection) + api.delete_batch_projection(batch_projection) + api.delete_project(project) + api.delete_script(script) + api.delete_library(library) + api.delete_execution(execution) + api.delete_external_connector(external_connector) + + +Each of the calls above will return a dictionary with the following +keys: + +- **code** If the request is successful, the code will be a + ``bigml.api.HTTP_NO_CONTENT`` (204) status code. Otherwise, it wil be + one of the standard HTTP error codes. See the `documentation on + status codes `_ for more + info. +- **error** If the request does not succeed, it will contain a + dictionary with an error code and a message. It will be ``None`` + otherwise. diff --git a/docs/index.rst b/docs/index.rst index 268d453f..b2f20837 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,12 +3,12 @@ BigML Python Bindings `BigML `_ makes machine learning easy by taking care of the details required to add data-driven decisions and predictive -power to your company. Unlike other machine learning services, BigML +power to your applications. Unlike other machine learning services, BigML creates `beautiful predictive models `_ that can be easily understood and interacted with. -These BigML Python bindings allow you to interact with BigML.io, the API +These BigML Python bindings allow you interacting with BigML.io, the API for BigML. You can use it to easily create, retrieve, list, update, and delete BigML resources (i.e., sources, datasets, models and, predictions). @@ -16,82 +16,144 @@ predictions). This module is licensed under the `Apache License, Version 2.0 `_. -Support -------- - -Please report problems and bugs to our `BigML.io issue -tracker `_. - -Discussions about the different bindings take place in the general -`BigML mailing list `_. Or join us -in our `Campfire chatroom `_. +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: Basic Usage + + quick_start + 101_model + 101_ensemble + 101_deepnet + 101_linear_regression + 101_logistic_regression + 101_optiml + 101_fusion + 101_ts + 101_cluster + 101_anomaly + 101_topic_model + 101_association + 101_pca + 101_scripting + 101_images_classification + 101_images_feature_extraction + 101_object_detection + + +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: Resource Management + + ml_resources + creating_resources + reading_resources + updating_resources + deleting_resources + + +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: Client and Server Automation + + local_resources + whizzml_resources Requirements ------------ -Python 2.7 and Python 3 are currently supported by these bindings. +Only ``Python 3`` versions are currently supported by these bindings. +Support for Python 2.7.X ended in version ``4.32.3``. The basic third-party dependencies are the `requests `_, -`poster `_, -`unidecode `_ and -`requests-toolbelt `_ -libraries. These -libraries are automatically installed during the setup. Support for Google -App Engine has been added as of version 3.0.0, using the `urlfetch` package -instead of `requests`. +`unidecode `_, +`requests-toolbelt `_, +`bigml-chronos `_, +`msgpack `_, +`numpy `_ and +`scipy `_ libraries. These +libraries are automatically installed during the basic setup. +Support for Google App Engine has been added as of version 3.0.0, +using the `urlfetch` package instead of `requests`. The bindings will also use ``simplejson`` if you happen to have it installed, but that is optional: we fall back to Python's built-in JSON libraries is ``simplejson`` is not found. -Additional `numpy `_ and -`scipy `_ libraries are needed in case you want to use -local predictions for regression models (including the error information) -using proportional missing strategy. As these are quite heavy libraries and -they are not heavily used in these bindings, -they are not included in the automatic installation -dependencies. The test suite includes some tests that will need these -libraries to be installed. - -Also in order to use local `Topic Model` predictions, you will need to install -`pystemmer `_. Using the `pip install` -command for this library can produce an error if your system lacks the -correct developer tools to compile it. In Windows, the error message -will include a link pointing to the needed Visual Studio version and in -OSX you'll need to install the Xcode developer tools. +`Node.js `_ is not installed by default, but will be +needed for `Local Pipelines `_ to work +when datasets containing new added features are part of the transformation +workflow. + +The bindings provide support to use the ``BigML`` platform to create, update, +get and delete resources, but also to produce local predictions using the +models created in ``BigML``. Most of them will be actionable with the basic +installation, but some additional dependencies are needed to use local +``Topic Models`` and Image Processing models. Please, refer to the +`Installation <#installation>`_ section for details. + +OS Requirements +~~~~~~~~~~~~~~~ + +The basic installation of the bindings is compatible and can be used +on Linux and Windows based Operating Systems. +However, the extra options that allow working with +image processing models (``[images]`` and ``[full]``) are only supported +and tested on Linux-based Operating Systems. +For image models, Windows OS is not recommended and cannot be supported out of +the box, because the specific compiler versions or dlls required are +unavailable in general. Installation ------------ -To install the latest stable release with -`pip `_ +To install the basic latest stable release with +`pip `_, please use: .. code-block:: bash $ pip install bigml -You can also install the development version of the bindings directly -from the Git repository +Support for local Topic Distributions (Topic Models' predictions) +and local predictions for datasets that include Images will only be +available as extras, because the libraries used for that are not +usually available in all Operative Systems. If you need to support those, +please check the `Installation Extras <#installation-extras>`_ section. + +Installation Extras +------------------- + +Local Topic Distributions support can be installed using: .. code-block:: bash - $ pip install -e git://github.com/bigmlcom/python.git#egg=bigml_python + pip install bigml[topics] + +Images local predictions support can be installed using: -Importing the module --------------------- +.. code-block:: bash -To import the module: + pip install bigml[images] -.. code-block:: python +The full set of features can be installed using: - import bigml.api +.. code-block:: bash -Alternatively you can just import the BigML class: + pip install bigml[full] -.. code-block:: python - from bigml.api import BigML +WARNING: Mind that installing these extras can require some extra work, as +explained in the `Requirements <#requirements>`_ section. + +You can also install the development version of the bindings directly +from the Git repository + +.. code-block:: bash + + $ pip install -e git://github.com/bigmlcom/python.git#egg=bigml_python Authentication -------------- @@ -264,198 +326,6 @@ created in this environment have been moved to a special project in the now unique ``Production Environment``, so this flag is no longer needed to work with them. -Quick Start ------------ - -Imagine that you want to use `this csv -file `_ containing the `Iris -flower dataset `_ to -predict the species of a flower whose ``petal length`` is ``2.45`` and -whose ``petal width`` is ``1.75``. A preview of the dataset is shown -below. It has 4 numeric fields: ``sepal length``, ``sepal width``, -``petal length``, ``petal width`` and a categorical field: ``species``. -By default, BigML considers the last field in the dataset as the -objective field (i.e., the field that you want to generate predictions -for). - -:: - - sepal length,sepal width,petal length,petal width,species - 5.1,3.5,1.4,0.2,Iris-setosa - 4.9,3.0,1.4,0.2,Iris-setosa - 4.7,3.2,1.3,0.2,Iris-setosa - ... - 5.8,2.7,3.9,1.2,Iris-versicolor - 6.0,2.7,5.1,1.6,Iris-versicolor - 5.4,3.0,4.5,1.5,Iris-versicolor - ... - 6.8,3.0,5.5,2.1,Iris-virginica - 5.7,2.5,5.0,2.0,Iris-virginica - 5.8,2.8,5.1,2.4,Iris-virginica - -You can easily generate a prediction following these steps: - -.. code-block:: python - - from bigml.api import BigML - - api = BigML() - - source = api.create_source('./data/iris.csv') - dataset = api.create_dataset(source) - model = api.create_model(dataset) - prediction = api.create_prediction(model, \ - {"petal width": 1.75, "petal length": 2.45}) - -You can then print the prediction using the ``pprint`` method: - -.. code-block:: python - - >>> api.pprint(prediction) - species for {"petal width": 1.75, "petal length": 2.45} is Iris-setosa - -Certainly, any of the resources created in BigML can be configured using -several arguments described in the `API documentation `_. -Any of these configuration arguments can be added to the ``create`` method -as a dictionary in the last optional argument of the calls: - -.. code-block:: python - - from bigml.api import BigML - - api = BigML() - - source_args = {"name": "my source", - "source_parser": {"missing_tokens": ["NULL"]}} - source = api.create_source('./data/iris.csv', source_args) - dataset_args = {"name": "my dataset"} - dataset = api.create_dataset(source, dataset_args) - model_args = {"objective_field": "species"} - model = api.create_model(dataset, model_args) - prediction_args = {"name": "my prediction"} - prediction = api.create_prediction(model, \ - {"petal width": 1.75, "petal length": 2.45}, - prediction_args) - -The ``iris`` dataset has a small number of instances, and usually will be -instantly created, so the ``api.create_`` calls will probably return the -finished resources outright. As BigML's API is asynchronous, -in general you will need to ensure -that objects are finished before using them by using ``api.ok``. - -.. code-block:: python - - from bigml.api import BigML - - api = BigML() - - source = api.create_source('./data/iris.csv') - api.ok(source) - dataset = api.create_dataset(source) - api.ok(dataset) - model = api.create_model(dataset) - api.ok(model) - prediction = api.create_prediction(model, \ - {"petal width": 1.75, "petal length": 2.45}) - -Note that the prediction -call is not followed by the ``api.ok`` method. Predictions are so quick to be -generated that, unlike the -rest of resouces, will be generated synchronously as a finished object. - -The example assumes that your objective field (the one you want to predict) -is the last field in the dataset. If that's not he case, you can explicitly -set the name of this field in the creation call using the ``objective_field`` -argument: - - -.. code-block:: python - - from bigml.api import BigML - - api = BigML() - - source = api.create_source('./data/iris.csv') - api.ok(source) - dataset = api.create_dataset(source) - api.ok(dataset) - model = api.create_model(dataset, {"objective_field": "species"}) - api.ok(model) - prediction = api.create_prediction(model, \ - {'sepal length': 5, 'sepal width': 2.5}) - - -You can also generate an evaluation for the model by using: - -.. code-block:: python - - test_source = api.create_source('./data/test_iris.csv') - api.ok(test_source) - test_dataset = api.create_dataset(test_source) - api.ok(test_dataset) - evaluation = api.create_evaluation(model, test_dataset) - api.ok(evaluation) - -If you set the ``storage`` argument in the ``api`` instantiation: - -.. code-block:: python - - api = BigML(storage='./storage') - -all the generated, updated or retrieved resources will be automatically -saved to the chosen directory. - -Alternatively, you can use the ``export`` method to explicitly -download the JSON information -that describes any of your resources in BigML to a particular file: - -.. code-block:: python - - api.export('model/5acea49a08b07e14b9001068', - filename="my_dir/my_model.json") - -This example downloads the JSON for the model and stores it in -the ``my_dir/my_model.json`` file. - -In the case of models that can be represented in a `PMML` syntax, the -export method can be used to produce the corresponding `PMML` file. - -.. code-block:: python - - api.export('model/5acea49a08b07e14b9001068', - filename="my_dir/my_model.pmml", - pmml=True) - -You can also retrieve the last resource with some previously given tag: - -.. code-block:: python - - api.export_last("foo", - resource_type="ensemble", - filename="my_dir/my_ensemble.json") - -which selects the last ensemble that has a ``foo`` tag. This mechanism can -be specially useful when retrieving retrained models that have been created -with a shared unique keyword as tag. - -For a descriptive overview of the steps that you will usually need to -follow to model -your data and obtain predictions, please see the `basic Workflow sketch -`_ -document. You can also check other simple examples in the following documents: - -- `model 101 <101_model.html>`_ -- `logistic regression 101 <101_logistic_regression.html>`_ -- `linear regression 101 <101_linear_regression.html>`_ -- `ensemble 101 <101_ensemble.html>`_ -- `cluster 101 <101_cluster>`_ -- `anomaly detector 101 <101_anomaly.html>`_ -- `association 101 <101_association.html>`_ -- `topic model 101 <101_topic_model.html>`_ -- `deepnet 101 <101_deepnet.html>`_ -- `time series 101 <101_ts.html>`_ -- `fusion 101 <101_fusion.html>`_ -- `scripting 101 <101_scripting.html>`_ Fields Structure ---------------- @@ -485,3023 +355,235 @@ use ``get_fields``: u'optype': u'categorical'}} When the number of fields becomes very large, it can be useful to exclude or -filter them. This can be done using a query string expression, for instance: +paginate them. This can be done using a query string expression, for instance: .. code-block:: python - >>> source = api.get_source(source, "limit=10&order_by=name") + >>> source = api.get_source(source, "offset=0;limit=10&order_by=name") would include in the retrieved dictionary the first 10 fields sorted by name. +There's a limit to the number of fields that will be included by default in +a resource description. If your resource has more than ``1000`` fields, +you can either paginate or force all the fields to be returned by using +``limit=-1`` as query string- -To handle the field structure you can use the ``Fields`` class. See the +To handle field structures you can use the ``Fields`` class. See the `Fields`_ section. -Dataset -------- +ML Resources +------------ -If you want to get some basic statistics for each field you can retrieve -the ``fields`` from the dataset as follows to get a dictionary keyed by -field id: +You'll find a description of the basic resources available in BigML in +`ML Resources `_ -.. code-block:: python +WhizzML Resources +----------------- - >>> dataset = api.get_dataset(dataset) - >>> api.pprint(api.get_fields(dataset)) - { u'000000': { u'column_number': 0, - u'datatype': u'double', - u'name': u'sepal length', - u'optype': u'numeric', - u'summary': { u'maximum': 7.9, - u'median': 5.77889, - u'minimum': 4.3, - u'missing_count': 0, - u'population': 150, - u'splits': [ 4.51526, - 4.67252, - 4.81113, +You'll learn about the scripting resources available in BigML in +`WhizzML Resources `_. WizzML is our scripting +language that will allow you to create any workflow. - [... snip ... ] +Managing Resources +------------------ - u'000004': { u'column_number': 4, - u'datatype': u'string', - u'name': u'species', - u'optype': u'categorical', - u'summary': { u'categories': [ [ u'Iris-versicolor', - 50], - [u'Iris-setosa', 50], - [ u'Iris-virginica', - 50]], - u'missing_count': 0}}} +You can learn how to create, update, retrieve, list and delete any resource in: +- `Creating Resources `_ +- `Updating Resources `_ +- `Deleting Resources `_ +- `Reading, listing and filtering Resources `_ -The field filtering options are also available using a query string expression, -for instance: +Local Resources +--------------- -.. code-block:: python +You can learn how to download and use in your local environment any of the +models created in the BigML platform in +`Local Resources `_. - >>> dataset = api.get_dataset(dataset, "limit=20") +Fields +------ -limits the number of fields that will be included in ``dataset`` to 20. +Once you have a resource, you can use the ``Fields`` class to generate a +representation that will allow you to easily list fields, get fields ids, get a +field id by name, column number, etc. -Model ------ +.. code-block:: python -One of the greatest things about BigML is that the models that it -generates for you are fully white-boxed. To get the explicit tree-like -predictive model for the example above: + from bigml.api import BigML + from bigml.fields import Fields + api = BigML() + source = api.get_source("source/5143a51a37203f2cf7000974") -.. code-block:: python + fields = Fields(source) - >>> model = api.get_model(model) - >>> api.pprint(model['object']['model']['root']) - {u'children': [ - {u'children': [ - {u'children': [{u'count': 38, - u'distribution': [[u'Iris-virginica', 38]], - u'output': u'Iris-virginica', - u'predicate': {u'field': u'000002', - u'operator': u'>', - u'value': 5.05}}, - u'children': [ - - [ ... ] - - {u'count': 50, - u'distribution': [[u'Iris-setosa', 50]], - u'output': u'Iris-setosa', - u'predicate': {u'field': u'000002', - u'operator': u'<=', - u'value': 2.45}}]}, - {u'count': 150, - u'distribution': [[u'Iris-virginica', 50], - [u'Iris-versicolor', 50], - [u'Iris-setosa', 50]], - u'output': u'Iris-virginica', - u'predicate': True}]}}} - -(Note that we have abbreviated the output in the snippet above for -readability: the full predictive model you'll get is going to contain -much more details). - -Again, filtering options are also available using a query string expression, -for instance: +you can also instantiate the Fields object from the fields dict itself: .. code-block:: python - >>> model = api.get_model(model, "limit=5") - -limits the number of fields that will be included in ``model`` to 5. + from bigml.api import BigML + from bigml.fields import Fields + api = BigML() + source = api.get_source("source/5143a51a37203f2cf7000974") -Evaluation ----------- + fields = Fields(source['object']['fields']) -The predictive performance of a model can be measured using many different -measures. In BigML these measures can be obtained by creating evaluations. To -create an evaluation you need the id of the model you are evaluating and the id -of the dataset that contains the data to be tested with. The result is shown -as: +The newly instantiated Fields object will give direct methods to retrieve +different fields properties: .. code-block:: python - >>> evaluation = api.get_evaluation(evaluation) - >>> api.pprint(evaluation['object']['result']) - { 'class_names': ['0', '1'], - 'mode': { 'accuracy': 0.9802, - 'average_f_measure': 0.495, - 'average_phi': 0, - 'average_precision': 0.5, - 'average_recall': 0.4901, - 'confusion_matrix': [[99, 0], [2, 0]], - 'per_class_statistics': [ { 'accuracy': 0.9801980198019802, - 'class_name': '0', - 'f_measure': 0.99, - 'phi_coefficient': 0, - 'precision': 1.0, - 'present_in_test_data': True, - 'recall': 0.9801980198019802}, - { 'accuracy': 0.9801980198019802, - 'class_name': '1', - 'f_measure': 0, - 'phi_coefficient': 0, - 'precision': 0.0, - 'present_in_test_data': True, - 'recall': 0}]}, - 'model': { 'accuracy': 0.9901, - 'average_f_measure': 0.89746, - 'average_phi': 0.81236, - 'average_precision': 0.99495, - 'average_recall': 0.83333, - 'confusion_matrix': [[98, 1], [0, 2]], - 'per_class_statistics': [ { 'accuracy': 0.9900990099009901, - 'class_name': '0', - 'f_measure': 0.9949238578680203, - 'phi_coefficient': 0.8123623944599232, - 'precision': 0.98989898989899, - 'present_in_test_data': True, - 'recall': 1.0}, - { 'accuracy': 0.9900990099009901, - 'class_name': '1', - 'f_measure': 0.8, - 'phi_coefficient': 0.8123623944599232, - 'precision': 1.0, - 'present_in_test_data': True, - 'recall': 0.6666666666666666}]}, - 'random': { 'accuracy': 0.50495, - 'average_f_measure': 0.36812, - 'average_phi': 0.13797, - 'average_precision': 0.74747, - 'average_recall': 0.51923, - 'confusion_matrix': [[49, 50], [0, 2]], - 'per_class_statistics': [ { 'accuracy': 0.504950495049505, - 'class_name': '0', - 'f_measure': 0.6621621621621622, - 'phi_coefficient': 0.1379728923974526, - 'precision': 0.494949494949495, - 'present_in_test_data': True, - 'recall': 1.0}, - { 'accuracy': 0.504950495049505, - 'class_name': '1', - 'f_measure': 0.07407407407407407, - 'phi_coefficient': 0.1379728923974526, - 'precision': 1.0, - 'present_in_test_data': True, - 'recall': 0.038461538461538464}]}} - -where two levels of detail are easily identified. For classifications, -the first level shows these keys: - -- **class_names**: A list with the names of all the categories for the objective field (i.e., all the classes) -- **mode**: A detailed result object. Measures of the performance of the classifier that predicts the mode class for all the instances in the dataset -- **model**: A detailed result object. -- **random**: A detailed result object. Measures the performance of the classifier that predicts a random class for all the instances in the dataset. - -and the detailed result objects include ``accuracy``, ``average_f_measure``, ``average_phi``, -``average_precision``, ``average_recall``, ``confusion_matrix`` -and ``per_class_statistics``. - -For regressions first level will contain these keys: - -- **mean**: A detailed result object. Measures the performance of the model that predicts the mean for all the instances in the dataset. -- **model**: A detailed result object. -- **random**: A detailed result object. Measures the performance of the model that predicts a random class for all the instances in the dataset. - -where the detailed result objects include ``mean_absolute_error``, -``mean_squared_error`` and ``r_squared`` (refer to -`developers documentation `_ for -more info on the meaning of these measures. - -Cluster -------- - -For unsupervised learning problems, the cluster is used to classify in a -limited number of groups your training data. The cluster structure is defined -by the centers of each group of data, named centroids, and the data enclosed -in the group. As for in the model's case, the cluster is a white-box resource -and can be retrieved as a JSON: + # Internal id of the 'sepal length' field + fields.field_id('sepal length') -.. code-block:: python + # Field name of field with column number 0 + fields.field_name(0) - >>> cluster = api.get_cluster(cluster) - >>> api.pprint(cluster['object']) - { 'balance_fields': True, - 'category': 0, - 'cluster_datasets': { '000000': '', '000001': '', '000002': ''}, - 'cluster_datasets_ids': { '000000': '53739b9ae4b0dad82b0a65e6', - '000001': '53739b9ae4b0dad82b0a65e7', - '000002': '53739b9ae4b0dad82b0a65e8'}, - 'cluster_seed': '2c249dda00fbf54ab4cdd850532a584f286af5b6', - 'clusters': { 'clusters': [ { 'center': { '000000': 58.5, - '000001': 26.8314, - '000002': 44.27907, - '000003': 14.37209}, - 'count': 56, - 'distance': { 'bins': [ [ 0.69602, - 2], - [ ... ] - [ 3.77052, - 1]], - 'maximum': 3.77052, - 'mean': 1.61711, - 'median': 1.52146, - 'minimum': 0.69237, - 'population': 56, - 'standard_deviation': 0.6161, - 'sum': 90.55805, - 'sum_squares': 167.31926, - 'variance': 0.37958}, - 'id': '000000', - 'name': 'Cluster 0'}, - { 'center': { '000000': 50.06, - '000001': 34.28, - '000002': 14.62, - '000003': 2.46}, - 'count': 50, - 'distance': { 'bins': [ [ 0.16917, - 1], - [ ... ] - [ 4.94699, - 1]], - 'maximum': 4.94699, - 'mean': 1.50725, - 'median': 1.3393, - 'minimum': 0.16917, - 'population': 50, - 'standard_deviation': 1.00994, - 'sum': 75.36252, - 'sum_squares': 163.56918, - 'variance': 1.01998}, - 'id': '000001', - 'name': 'Cluster 1'}, - { 'center': { '000000': 68.15625, - '000001': 31.25781, - '000002': 55.48438, - '000003': 19.96875}, - 'count': 44, - 'distance': { 'bins': [ [ 0.36825, - 1], - [ ... ] - [ 3.87216, - 1]], - 'maximum': 3.87216, - 'mean': 1.67264, - 'median': 1.63705, - 'minimum': 0.36825, - 'population': 44, - 'standard_deviation': 0.78905, - 'sum': 73.59627, - 'sum_squares': 149.87194, - 'variance': 0.6226}, - 'id': '000002', - 'name': 'Cluster 2'}], - 'fields': { '000000': { 'column_number': 0, - 'datatype': 'int8', - 'name': 'sepal length', - 'optype': 'numeric', - 'order': 0, - 'preferred': True, - 'summary': { 'bins': [ [ 43.75, - 4], - [ ... ] - [ 79, - 1]], - 'maximum': 79, - 'mean': 58.43333, - 'median': 57.7889, - 'minimum': 43, - 'missing_count': 0, - 'population': 150, - 'splits': [ 45.15258, - 46.72525, - 72.04226, - 76.47461], - 'standard_deviation': 8.28066, - 'sum': 8765, - 'sum_squares': 522385, - 'variance': 68.56935}}, - [ ... ] - [ 25, - 3]], - 'maximum': 25, - 'mean': 11.99333, - 'median': 13.28483, - 'minimum': 1, - 'missing_count': 0, - 'population': 150, - 'standard_deviation': 7.62238, - 'sum': 1799, - 'sum_squares': 30233, - 'variance': 58.10063}}}}, - 'code': 202, - 'columns': 4, - 'created': '2014-05-14T16:36:40.993000', - 'credits': 0.017578125, - 'credits_per_prediction': 0.0, - 'dataset': 'dataset/53739b88c8db63122b000411', - 'dataset_field_types': { 'categorical': 1, - 'datetime': 0, - 'numeric': 4, - 'preferred': 5, - 'text': 0, - 'total': 5}, - 'dataset_status': True, - 'dataset_type': 0, - 'description': '', - 'excluded_fields': ['000004'], - 'field_scales': None, - 'fields_meta': { 'count': 4, - 'limit': 1000, - 'offset': 0, - 'query_total': 4, - 'total': 4}, - 'input_fields': ['000000', '000001', '000002', '000003'], - 'k': 3, - 'locale': 'es-ES', - 'max_columns': 5, - 'max_rows': 150, - 'name': 'my iris', - 'number_of_batchcentroids': 0, - 'number_of_centroids': 0, - 'number_of_public_centroids': 0, - 'out_of_bag': False, - 'price': 0.0, - 'private': True, - 'range': [1, 150], - 'replacement': False, - 'resource': 'cluster/53739b98d994972da7001de9', - 'rows': 150, - 'sample_rate': 1.0, - 'scales': { '000000': 0.22445382597655375, - '000001': 0.4264213814821549, - '000002': 0.10528680248949522, - '000003': 0.2438379900517961}, - 'shared': False, - 'size': 4608, - 'source': 'source/53739b24d994972da7001ddd', - 'source_status': True, - 'status': { 'code': 5, - 'elapsed': 1009, - 'message': 'The cluster has been created', - 'progress': 1.0}, - 'subscription': True, - 'tags': [], - 'updated': '2014-05-14T16:40:26.234728', - 'white_box': False} - -(Note that we have abbreviated the output in the snippet above for -readability: the full predictive cluster you'll get is going to contain -much more details). - -Anomaly detector ----------------- + # Column number of field name 'petal length' + fields.field_column_number('petal length') -For anomaly detection problems, BigML anomaly detector uses iforest as an -unsupervised kind of model that detects anomalous data in a dataset. The -information it returns encloses a `top_anomalies` block -that contains a list of the most anomalous -points. For each, we capture a `score` from 0 to 1. The closer to 1, -the more anomalous. We also capture the `row` which gives values for -each field in the order defined by `input_fields`. Similarly we give -a list of `importances` which match the `row` values. These -importances tell us which values contributed most to the anomaly -score. Thus, the structure of an anomaly detector is similar to: + # Statistics of values in field name 'petal length') + fields.stats('petal length') -.. code-block:: python +Depending on the resource type, Fields information will vary. ``Sources`` will +have only the name, label, description, type of field (``optype``) while +``dataset`` resources will have also the ``preferred`` (whether a field will is +selectable as predictor), ``missing_count``, ``errors`` and a summary of +the values found in each field. This is due to the fact that the ``source`` +object is built by inspecting the contents of a sample of the uploaded file, +while the ``dataset`` resource really reads all the uploaded information. Thus, +dataset's fields structure will always be more complete than source's. - { 'category': 0, - 'code': 200, - 'columns': 14, - 'constraints': False, - 'created': '2014-09-08T18:51:11.893000', - 'credits': 0.11653518676757812, - 'credits_per_prediction': 0.0, - 'dataset': 'dataset/540dfa9d9841fa5c88000765', - 'dataset_field_types': { 'categorical': 21, - 'datetime': 0, - 'numeric': 21, - 'preferred': 14, - 'text': 0, - 'total': 42}, - 'dataset_status': True, - 'dataset_type': 0, - 'description': '', - 'excluded_fields': [], - 'fields_meta': { 'count': 14, - 'limit': 1000, - 'offset': 0, - 'query_total': 14, - 'total': 14}, - 'forest_size': 128, - 'input_fields': [ '000004', - '000005', - '000009', - '000016', - '000017', - '000018', - '000019', - '00001e', - '00001f', - '000020', - '000023', - '000024', - '000025', - '000026'], - 'locale': 'en_US', - 'max_columns': 42, - 'max_rows': 200, - 'model': { 'fields': { '000004': { 'column_number': 4, - 'datatype': 'int16', - 'name': 'src_bytes', - 'optype': 'numeric', - 'order': 0, - 'preferred': True, - 'summary': { 'bins': [ [ 143, - 2], - ... - [ 370, - 2]], - 'maximum': 370, - 'mean': 248.235, - 'median': 234.57157, - 'minimum': 141, - 'missing_count': 0, - 'population': 200, - 'splits': [ 159.92462, - 173.73312, - 188, - ... - 339.55228], - 'standard_deviation': 49.39869, - 'sum': 49647, - 'sum_squares': 12809729, - 'variance': 2440.23093}}, - '000005': { 'column_number': 5, - 'datatype': 'int32', - 'name': 'dst_bytes', - 'optype': 'numeric', - 'order': 1, - 'preferred': True, - ... - 'sum': 1030851, - 'sum_squares': 22764504759, - 'variance': 87694652.45224}}, - '000009': { 'column_number': 9, - 'datatype': 'string', - 'name': 'hot', - 'optype': 'categorical', - 'order': 2, - 'preferred': True, - 'summary': { 'categories': [ [ '0', - 199], - [ '1', - 1]], - 'missing_count': 0}, - 'term_analysis': { 'enabled': True}}, - '000016': { 'column_number': 22, - 'datatype': 'int8', - 'name': 'count', - 'optype': 'numeric', - 'order': 3, - 'preferred': True, - ... - 'population': 200, - 'standard_deviation': 5.42421, - 'sum': 1351, - 'sum_squares': 14981, - 'variance': 29.42209}}, - '000017': { ... }}}, - 'kind': 'iforest', - 'mean_depth': 12.314174107142858, - 'top_anomalies': [ { 'importance': [ 0.06768, - 0.01667, - 0.00081, - 0.02437, - 0.04773, - 0.22197, - 0.18208, - 0.01868, - 0.11855, - 0.01983, - 0.01898, - 0.05306, - 0.20398, - 0.00562], - 'row': [ 183.0, - 8654.0, - '0', - 4.0, - 4.0, - 0.25, - 0.25, - 0.0, - 123.0, - 255.0, - 0.01, - 0.04, - 0.01, - 0.0], - 'score': 0.68782}, - { 'importance': [ 0.05645, - 0.02285, - 0.0015, - 0.05196, - 0.04435, - 0.0005, - 0.00056, - 0.18979, - 0.12402, - 0.23671, - 0.20723, - 0.05651, - 0.00144, - 0.00612], - 'row': [ 212.0, - 1940.0, - '0', - 1.0, - 2.0, - 0.0, - 0.0, - 1.0, - 1.0, - 69.0, - 1.0, - 0.04, - 0.0, - 0.0], - 'score': 0.6239}, - ...], - 'trees': [ { 'root': { 'children': [ { 'children': [ { 'children': [ { 'children': [ { 'children': - [ { 'population': 1, - 'predicates': [ { 'field': '00001f', - 'op': '>', - 'value': 35.54357}]}, - - ... - { 'population': 1, - 'predicates': [ { 'field': '00001f', - 'op': '<=', - 'value': 35.54357}]}], - 'population': 2, - 'predicates': [ { 'field': '000005', - 'op': '<=', - 'value': 1385.5166}]}], - 'population': 3, - 'predicates': [ { 'field': '000020', - 'op': '<=', - 'value': 65.14308}, - { 'field': '000019', - 'op': '=', - 'value': 0}]}], - 'population': 105, - 'predicates': [ { 'field': '000017', - 'op': '<=', - 'value': 13.21754}, - { 'field': '000009', - 'op': 'in', - 'value': [ '0']}]}], - 'population': 126, - 'predicates': [ True, - { 'field': '000018', - 'op': '=', - 'value': 0}]}, - 'training_mean_depth': 11.071428571428571}]}, - 'name': "tiny_kdd's dataset anomaly detector", - 'number_of_batchscores': 0, - 'number_of_public_predictions': 0, - 'number_of_scores': 0, - 'out_of_bag': False, - 'price': 0.0, - 'private': True, - 'project': None, - 'range': [1, 200], - 'replacement': False, - 'resource': 'anomaly/540dfa9f9841fa5c8800076a', - 'rows': 200, - 'sample_rate': 1.0, - 'sample_size': 126, - 'seed': 'BigML', - 'shared': False, - 'size': 30549, - 'source': 'source/540dfa979841fa5c7f000363', - 'source_status': True, - 'status': { 'code': 5, - 'elapsed': 32397, - 'message': 'The anomaly detector has been created', - 'progress': 1.0}, - 'subscription': False, - 'tags': [], - 'updated': '2014-09-08T23:54:28.647000', - 'white_box': False} - -Note that we have abbreviated the output in the snippet above for -readability: the full anomaly detector you'll get is going to contain -much more details). - -The `trees` list contains the actual isolation forest, and it can be quite -large usually. That's why, this part of the resource should only be included -in downloads when needed. If you are only interested in other properties, such -as `top_anomalies`, you'll improve performance by excluding it, using the -`excluded=trees` query string in the API call: +In both cases, you can extract the summarized information available using +the ``summary_csv`` method: .. code-block:: python - anomaly = api.get_anomaly('anomaly/540dfa9f9841fa5c8800076a', \ - query_string='excluded=trees') - -Each node in an isolation tree can have multiple predicates. -For the node to be a valid branch when evaluated with a data point, all of its -predicates must be true. - -Samples -------- + from bigml.api import BigML + from bigml.fields import Fields + api = BigML() + dataset = api.get_dataset("dataset/5143a51a37203f2cf7300974") -To provide quick access to your row data you can create a ``sample``. Samples -are in-memory objects that can be queried for subsets of data by limiting -their size, the fields or the rows returned. The structure of a sample would -be:: + fields = Fields(dataset) + fields.summary_csv("my_fields_summary.csv") -Samples are not permanent objects. Once they are created, they will be -available as long as GETs are requested within periods smaller than -a pre-established TTL (Time to Live). The expiration timer of a sample is -reset every time a new GET is received. +In this example, the information will be stored in the +``my_fields_summary.csv`` file. For the typical ``iris.csv`` data file, the +summary will read: -If requested, a sample can also perform linear regression and compute -Pearson's and Spearman's correlations for either one numeric field -against all other numeric fields or between two specific numeric fields. +.. csv-table:: + :header: "field column","field ID","field name","field label","field description","field type","preferred","missing count","errors","contents summary","errors summary" + :widths: 5, 10, 20, 5, 5, 10, 10, 5, 5, 100, 10 -Correlations ------------- + 0,000000,sepal length,,,numeric,true,0,0,"[4.3, 7.9], mean: 5.84333", + 1,000001,sepal width,,,numeric,false,0,0,"[2, 4.4], mean: 3.05733", + 2,000002,petal length,,,numeric,true,0,0,"[1, 6.9], mean: 3.758", + 3,000003,petal width,,,numeric,true,0,0,"[0.1, 2.5], mean: 1.19933", + 4,000004,species,,,categorical,true,0,0,"3 categorìes: Iris-setosa (50), Iris-versicolor (50), Iris-virginica (50)", -A ``correlation`` resource contains a series of computations that reflect the -degree of dependence between the field set as objective for your predictions -and the rest of fields in your dataset. The dependence degree is obtained by -comparing the distributions in every objective and non-objective field pair, -as independent fields should have probabilistic -independent distributions. Depending on the types of the fields to compare, -the metrics used to compute the correlation degree will be: - -- for numeric to numeric pairs: - `Pearson's `_ - and `Spearman's correlation `_ - coefficients. -- for numeric to categorical pairs: - `One-way Analysis of Variance `_, with the - categorical field as the predictor variable. -- for categorical to categorical pairs: - `contingency table (or two-way table) `_, - `Chi-square test of independence `_ - , and `Cramer's V `_ - and `Tschuprow's T `_ coefficients. - -An example of the correlation resource JSON structure is: +Another utility in the ``Fields`` object will help you update the updatable +attributes of your source or dataset fields. For instance, if you +need to update the type associated to one field in your dataset, +you can change the ``field type`` +values in the previous file and use it to obtain the fields structure +needed to update your source: .. code-block:: python - >>> from bigml.api import BigML - >>> api = BigML() - >>> correlation = api.create_correlation('dataset/55b7a6749841fa2500000d41') - >>> api.ok(correlation) - >>> api.pprint(correlation['object']) - { u'category': 0, - u'clones': 0, - u'code': 200, - u'columns': 5, - u'correlations': { u'correlations': [ { u'name': u'one_way_anova', - u'result': { u'000000': { u'eta_square': 0.61871, - u'f_ratio': 119.2645, - u'p_value': 0, - u'significant': [ True, - True, - True]}, - u'000001': { u'eta_square': 0.40078, - u'f_ratio': 49.16004, - u'p_value': 0, - u'significant': [ True, - True, - True]}, - u'000002': { u'eta_square': 0.94137, - u'f_ratio': 1180.16118, - u'p_value': 0, - u'significant': [ True, - True, - True]}, - u'000003': { u'eta_square': 0.92888, - u'f_ratio': 960.00715, - u'p_value': 0, - u'significant': [ True, - True, - True]}}}], - u'fields': { u'000000': { u'column_number': 0, - u'datatype': u'double', - u'idx': 0, - u'name': u'sepal length', - u'optype': u'numeric', - u'order': 0, - u'preferred': True, - u'summary': { u'bins': [ [ 4.3, - 1], - [ 4.425, - 4], - ... - [ 7.9, - 1]], - u'kurtosis': -0.57357, - u'maximum': 7.9, - u'mean': 5.84333, - u'median': 5.8, - u'minimum': 4.3, - u'missing_count': 0, - u'population': 150, - u'skewness': 0.31175, - u'splits': [ 4.51526, - 4.67252, - 4.81113, - 4.89582, - 4.96139, - 5.01131, - ... - 6.92597, - 7.20423, - 7.64746], - u'standard_deviation': 0.82807, - u'sum': 876.5, - u'sum_squares': 5223.85, - u'variance': 0.68569}}, - u'000001': { u'column_number': 1, - u'datatype': u'double', - u'idx': 1, - u'name': u'sepal width', - u'optype': u'numeric', - u'order': 1, - u'preferred': True, - u'summary': { u'counts': [ [ 2, - 1], - [ 2.2, - ... - u'000004': { u'column_number': 4, - u'datatype': u'string', - u'idx': 4, - u'name': u'species', - u'optype': u'categorical', - u'order': 4, - u'preferred': True, - u'summary': { u'categories': [ [ u'Iris-setosa', - 50], - [ u'Iris-versicolor', - 50], - [ u'Iris-virginica', - 50]], - u'missing_count': 0}, - u'term_analysis': { u'enabled': True}}}, - u'significance_levels': [0.01, 0.05, 0.1]}, - u'created': u'2015-07-28T18:07:37.010000', - u'credits': 0.017581939697265625, - u'dataset': u'dataset/55b7a6749841fa2500000d41', - u'dataset_status': True, - u'dataset_type': 0, - u'description': u'', - u'excluded_fields': [], - u'fields_meta': { u'count': 5, - u'limit': 1000, - u'offset': 0, - u'query_total': 5, - u'total': 5}, - u'input_fields': [u'000000', u'000001', u'000002', u'000003'], - u'locale': u'en_US', - u'max_columns': 5, - u'max_rows': 150, - u'name': u"iris' dataset correlation", - u'objective_field_details': { u'column_number': 4, - u'datatype': u'string', - u'name': u'species', - u'optype': u'categorical', - u'order': 4}, - u'out_of_bag': False, - u'price': 0.0, - u'private': True, - u'project': None, - u'range': [1, 150], - u'replacement': False, - u'resource': u'correlation/55b7c4e99841fa24f20009bf', - u'rows': 150, - u'sample_rate': 1.0, - u'shared': False, - u'size': 4609, - u'source': u'source/55b7a6729841fa24f100036a', - u'source_status': True, - u'status': { u'code': 5, - u'elapsed': 274, - u'message': u'The correlation has been created', - u'progress': 1.0}, - u'subscription': True, - u'tags': [], - u'updated': u'2015-07-28T18:07:49.057000', - u'white_box': False} - -Note that the output in the snippet above has been abbreviated. As you see, the -``correlations`` attribute contains the information about each field -correlation to the objective field. - -Statistical Tests ------------------ + from bigml.api import BigML + from bigml.fields import Fields + api = BigML() + source = api.get_source("source/5143a51a37203f2cf7000974") -A ``statisticaltest`` resource contains a series of tests -that compare the -distribution of data in each numeric field of a dataset -to certain canonical distributions, -such as the -`normal distribution `_ -or `Benford's law `_ -distribution. Statistical test are useful in tasks such as fraud, normality, -or outlier detection. - -- Fraud Detection Tests: -Benford: This statistical test performs a comparison of the distribution of -first significant digits (FSDs) of each value of the field to the Benford's -law distribution. Benford's law applies to numerical distributions spanning -several orders of magnitude, such as the values found on financial balance -sheets. It states that the frequency distribution of leading, or first -significant digits (FSD) in such distributions is not uniform. -On the contrary, lower digits like 1 and 2 occur disproportionately -often as leading significant digits. The test compares the distribution -in the field to Bendford's distribution using a Chi-square goodness-of-fit -test, and Cho-Gaines d test. If a field has a dissimilar distribution, -it may contain anomalous or fraudulent values. - -- Normality tests: -These tests can be used to confirm the assumption that the data in each field -of a dataset is distributed according to a normal distribution. The results -are relevant because many statistical and machine learning techniques rely on -this assumption. -Anderson-Darling: The Anderson-Darling test computes a test statistic based on -the difference between the observed cumulative distribution function (CDF) to -that of a normal distribution. A significant result indicates that the -assumption of normality is rejected. -Jarque-Bera: The Jarque-Bera test computes a test statistic based on the third -and fourth central moments (skewness and kurtosis) of the data. Again, a -significant result indicates that the normality assumption is rejected. -Z-score: For a given sample size, the maximum deviation from the mean that -would expected in a sampling of a normal distribution can be computed based -on the 68-95-99.7 rule. This test simply reports this expected deviation and -the actual deviation observed in the data, as a sort of sanity check. - -- Outlier tests: -Grubbs: When the values of a field are normally distributed, a few values may -still deviate from the mean distribution. The outlier tests reports whether -at least one value in each numeric field differs significantly from the mean -using Grubb's test for outliers. If an outlier is found, then its value will -be returned. - -The JSON structure for ``statisticaltest`` resources is similar to this one: + fields = Fields(source) + fields_update_info = fields.new_fields_structure("my_fields_summary.csv") + source = api.update_source(source, \ + fields.filter_fields_update(fields_update_info)) + +where ``filter_fields_update`` will make sure that only the attributes that +can be updated in a source will be sent in the update request. +For both sources and datasets, the updatable attributes are ``name``, ``label`` +and ``description``. +In ``sources`` you can also update the type of the field (``optype``), and +in ``datasets`` you can update the ``preferred`` attribute. -.. code-block:: python +In addition to that, you can also easily ``pair`` a list of values with fields +ids what is very +useful to make predictions. - >>> statistical_test = api.create_statistical_test('dataset/55b7a6749841fa2500000d41') - >>> api.ok(statistical_test) - True - >>> api.pprint(statistical_test['object']) - { u'category': 0, - u'clones': 0, - u'code': 200, - u'columns': 5, - u'created': u'2015-07-28T18:16:40.582000', - u'credits': 0.017581939697265625, - u'dataset': u'dataset/55b7a6749841fa2500000d41', - u'dataset_status': True, - u'dataset_type': 0, - u'description': u'', - u'excluded_fields': [], - u'fields_meta': { u'count': 5, - u'limit': 1000, - u'offset': 0, - u'query_total': 5, - u'total': 5}, - u'input_fields': [u'000000', u'000001', u'000002', u'000003'], - u'locale': u'en_US', - u'max_columns': 5, - u'max_rows': 150, - u'name': u"iris' dataset test", - u'out_of_bag': False, - u'price': 0.0, - u'private': True, - u'project': None, - u'range': [1, 150], - u'replacement': False, - u'resource': u'statisticaltest/55b7c7089841fa25000010ad', - u'rows': 150, - u'sample_rate': 1.0, - u'shared': False, - u'size': 4609, - u'source': u'source/55b7a6729841fa24f100036a', - u'source_status': True, - u'status': { u'code': 5, - u'elapsed': 302, - u'message': u'The test has been created', - u'progress': 1.0}, - u'subscription': True, - u'tags': [], - u'statistical_tests': { u'ad_sample_size': 1024, - u'fields': { u'000000': { u'column_number': 0, - u'datatype': u'double', - u'idx': 0, - u'name': u'sepal length', - u'optype': u'numeric', - u'order': 0, - u'preferred': True, - u'summary': { u'bins': [ [ 4.3, - 1], - [ 4.425, - 4], - ... - [ 7.9, - 1]], - u'kurtosis': -0.57357, - u'maximum': 7.9, - u'mean': 5.84333, - u'median': 5.8, - u'minimum': 4.3, - u'missing_count': 0, - u'population': 150, - u'skewness': 0.31175, - u'splits': [ 4.51526, - 4.67252, - 4.81113, - 4.89582, - ... - 7.20423, - 7.64746], - u'standard_deviation': 0.82807, - u'sum': 876.5, - u'sum_squares': 5223.85, - u'variance': 0.68569}}, - ... - u'000004': { u'column_number': 4, - u'datatype': u'string', - u'idx': 4, - u'name': u'species', - u'optype': u'categorical', - u'order': 4, - u'preferred': True, - u'summary': { u'categories': [ [ u'Iris-setosa', - 50], - [ u'Iris-versicolor', - 50], - [ u'Iris-virginica', - 50]], - u'missing_count': 0}, - u'term_analysis': { u'enabled': True}}}, - u'fraud': [ { u'name': u'benford', - u'result': { u'000000': { u'chi_square': { u'chi_square_value': 506.39302, - u'p_value': 0, - u'significant': [ True, - True, - True]}, - u'cho_gaines': { u'd_statistic': 7.124311073683573, - u'significant': [ True, - True, - True]}, - u'distribution': [ 0, - 0, - 0, - 22, - 61, - 54, - 13, - 0, - 0], - u'negatives': 0, - u'zeros': 0}, - u'000001': { u'chi_square': { u'chi_square_value': 396.76556, - u'p_value': 0, - u'significant': [ True, - True, - True]}, - u'cho_gaines': { u'd_statistic': 7.503503138331123, - u'significant': [ True, - True, - True]}, - u'distribution': [ 0, - 57, - 89, - 4, - 0, - 0, - 0, - 0, - 0], - u'negatives': 0, - u'zeros': 0}, - u'000002': { u'chi_square': { u'chi_square_value': 154.20728, - u'p_value': 0, - u'significant': [ True, - True, - True]}, - u'cho_gaines': { u'd_statistic': 3.9229974017266054, - u'significant': [ True, - True, - True]}, - u'distribution': [ 50, - 0, - 11, - 43, - 35, - 11, - 0, - 0, - 0], - u'negatives': 0, - u'zeros': 0}, - u'000003': { u'chi_square': { u'chi_square_value': 111.4438, - u'p_value': 0, - u'significant': [ True, - True, - True]}, - u'cho_gaines': { u'd_statistic': 4.103257341299901, - u'significant': [ True, - True, - True]}, - u'distribution': [ 76, - 58, - 7, - 7, - 1, - 1, - 0, - 0, - 0], - u'negatives': 0, - u'zeros': 0}}}], - u'normality': [ { u'name': u'anderson_darling', - u'result': { u'000000': { u'p_value': 0.02252, - u'significant': [ False, - True, - True]}, - u'000001': { u'p_value': 0.02023, - u'significant': [ False, - True, - True]}, - u'000002': { u'p_value': 0, - u'significant': [ True, - True, - True]}, - u'000003': { u'p_value': 0, - u'significant': [ True, - True, - True]}}}, - { u'name': u'jarque_bera', - u'result': { u'000000': { u'p_value': 0.10615, - u'significant': [ False, - False, - False]}, - u'000001': { u'p_value': 0.25957, - u'significant': [ False, - False, - False]}, - u'000002': { u'p_value': 0.0009, - u'significant': [ True, - True, - True]}, - u'000003': { u'p_value': 0.00332, - u'significant': [ True, - True, - True]}}}, - { u'name': u'z_score', - u'result': { u'000000': { u'expected_max_z': 2.71305, - u'max_z': 2.48369}, - u'000001': { u'expected_max_z': 2.71305, - u'max_z': 3.08044}, - u'000002': { u'expected_max_z': 2.71305, - u'max_z': 1.77987}, - u'000003': { u'expected_max_z': 2.71305, - u'max_z': 1.70638}}}], - u'outliers': [ { u'name': u'grubbs', - u'result': { u'000000': { u'p_value': 1, - u'significant': [ False, - False, - False]}, - u'000001': { u'p_value': 0.26555, - u'significant': [ False, - False, - False]}, - u'000002': { u'p_value': 1, - u'significant': [ False, - False, - False]}, - u'000003': { u'p_value': 1, - u'significant': [ False, - False, - False]}}}], - u'significance_levels': [0.01, 0.05, 0.1]}, - u'updated': u'2015-07-28T18:17:11.829000', - u'white_box': False} - -Note that the output in the snippet above has been abbreviated. As you see, the -``statistical_tests`` attribute contains the ``fraud`, ``normality`` -and ``outliers`` -sections where the information for each field's distribution is stored. - -Logistic Regressions --------------------- - -A logistic regression is a supervised machine learning method for -solving classification problems. Each of the classes in the field -you want to predict, the objective field, is assigned a probability depending -on the values of the input fields. The probability is computed -as the value of a logistic function, -whose argument is a linear combination of the predictors' values. -You can create a logistic regression selecting which fields from your -dataset you want to use as input fields (or predictors) and which -categorical field you want to predict, the objective field. Then the -created logistic regression is defined by the set of coefficients in the -linear combination of the values. Categorical -and text fields need some prior work to be modelled using this method. They -are expanded as a set of new fields, one per category or term (respectively) -where the number of occurrences of the category or term is store. Thus, -the linear combination is made on the frequency of the categories or terms. - -The JSON structure for a logistic regression is: +For example, the following snippet may be useful to create local predictions +using a csv file as input: .. code-block:: python - >>> api.pprint(logistic_regression['object']) - { u'balance_objective': False, - u'category': 0, - u'code': 200, - u'columns': 5, - u'created': u'2015-10-09T16:11:08.444000', - u'credits': 0.017581939697265625, - u'credits_per_prediction': 0.0, - u'dataset': u'dataset/561304f537203f4c930001ca', - u'dataset_field_types': { u'categorical': 1, - u'datetime': 0, - u'effective_fields': 5, - u'numeric': 4, - u'preferred': 5, - u'text': 0, - u'total': 5}, - u'dataset_status': True, - u'description': u'', - u'excluded_fields': [], - u'fields_meta': { u'count': 5, - u'limit': 1000, - u'offset': 0, - u'query_total': 5, - u'total': 5}, - u'input_fields': [u'000000', u'000001', u'000002', u'000003'], - u'locale': u'en_US', - u'logistic_regression': { u'bias': 1, - u'c': 1, - u'coefficients': [ [ u'Iris-virginica', - [ -1.7074433493289376, - -1.533662474502423, - 2.47026986670851, - 2.5567582221085563, - -1.2158200612711925]], - [ u'Iris-setosa', - [ 0.41021712519841674, - 1.464162165246765, - -2.26003266131107, - -1.0210350909174153, - 0.26421852991732514]], - [ u'Iris-versicolor', - [ 0.42702327817072505, - -1.611817241669904, - 0.5763832839459982, - -1.4069842681625884, - 1.0946877732663143]]], - u'eps': 1e-05, - u'fields': { u'000000': { u'column_number': 0, - u'datatype': u'double', - u'name': u'sepal length', - u'optype': u'numeric', - u'order': 0, - u'preferred': True, - u'summary': { u'bins': [ [ 4.3, - 1], - [ 4.425, - 4], - [ 4.6, - 4], - ... - [ 7.9, - 1]], - u'kurtosis': -0.57357, - u'maximum': 7.9, - u'mean': 5.84333, - u'median': 5.8, - u'minimum': 4.3, - u'missing_count': 0, - u'population': 150, - u'skewness': 0.31175, - u'splits': [ 4.51526, - 4.67252, - 4.81113, - ... - 6.92597, - 7.20423, - 7.64746], - u'standard_deviation': 0.82807, - u'sum': 876.5, - u'sum_squares': 5223.85, - u'variance': 0.68569}}, - u'000001': { u'column_number': 1, - u'datatype': u'double', - u'name': u'sepal width', - u'optype': u'numeric', - u'order': 1, - u'preferred': True, - u'summary': { u'counts': [ [ 2, - 1], - [ 2.2, - 3], - ... - [ 4.2, - 1], - [ 4.4, - 1]], - u'kurtosis': 0.18098, - u'maximum': 4.4, - u'mean': 3.05733, - u'median': 3, - u'minimum': 2, - u'missing_count': 0, - u'population': 150, - u'skewness': 0.31577, - u'standard_deviation': 0.43587, - u'sum': 458.6, - u'sum_squares': 1430.4, - u'variance': 0.18998}}, - u'000002': { u'column_number': 2, - u'datatype': u'double', - u'name': u'petal length', - u'optype': u'numeric', - u'order': 2, - u'preferred': True, - u'summary': { u'bins': [ [ 1, - 1], - [ 1.16667, - 3], - ... - [ 6.6, - 1], - [ 6.7, - 2], - [ 6.9, - 1]], - u'kurtosis': -1.39554, - u'maximum': 6.9, - u'mean': 3.758, - u'median': 4.35, - u'minimum': 1, - u'missing_count': 0, - u'population': 150, - u'skewness': -0.27213, - u'splits': [ 1.25138, - 1.32426, - 1.37171, - ... - 6.02913, - 6.38125], - u'standard_deviation': 1.7653, - u'sum': 563.7, - u'sum_squares': 2582.71, - u'variance': 3.11628}}, - u'000003': { u'column_number': 3, - u'datatype': u'double', - u'name': u'petal width', - u'optype': u'numeric', - u'order': 3, - u'preferred': True, - u'summary': { u'counts': [ [ 0.1, - 5], - [ 0.2, - 29], - ... - [ 2.4, - 3], - [ 2.5, - 3]], - u'kurtosis': -1.33607, - u'maximum': 2.5, - u'mean': 1.19933, - u'median': 1.3, - u'minimum': 0.1, - u'missing_count': 0, - u'population': 150, - u'skewness': -0.10193, - u'standard_deviation': 0.76224, - u'sum': 179.9, - u'sum_squares': 302.33, - u'variance': 0.58101}}, - u'000004': { u'column_number': 4, - u'datatype': u'string', - u'name': u'species', - u'optype': u'categorical', - u'order': 4, - u'preferred': True, - u'summary': { u'categories': [ [ u'Iris-setosa', - 50], - [ u'Iris-versicolor', - 50], - [ u'Iris-virginica', - 50]], - u'missing_count': 0}, - u'term_analysis': { u'enabled': True}}}, - u'normalize': False, - u'regularization': u'l2'}, - u'max_columns': 5, - u'max_rows': 150, - u'name': u"iris' dataset's logistic regression", - u'number_of_batchpredictions': 0, - u'number_of_evaluations': 0, - u'number_of_predictions': 1, - u'objective_field': u'000004', - u'objective_field_name': u'species', - u'objective_field_type': u'categorical', - u'objective_fields': [u'000004'], - u'out_of_bag': False, - u'private': True, - u'project': u'project/561304c137203f4c9300016c', - u'range': [1, 150], - u'replacement': False, - u'resource': u'logisticregression/5617e71c37203f506a000001', - u'rows': 150, - u'sample_rate': 1.0, - u'shared': False, - u'size': 4609, - u'source': u'source/561304f437203f4c930001c3', - u'source_status': True, - u'status': { u'code': 5, - u'elapsed': 86, - u'message': u'The logistic regression has been created', - u'progress': 1.0}, - u'subscription': False, - u'tags': [u'species'], - u'updated': u'2015-10-09T16:14:02.336000', - u'white_box': False} - -Note that the output in the snippet above has been abbreviated. As you see, -the ``logistic_regression`` attribute stores the coefficients used in the -logistic function as well as the configuration parameters described in -the `developers section `_ . - - -Linear Regressions ------------------- - -A linear regression is a supervised machine learning method for -solving regression problems by computing the objective as a linear -combination of factors. The implementation is a multiple linear regression -that models the output as a linear combination of the predictors. -The coefficients are estimated doing a least-squares fit on the training data. - -As a linear combination can only be done using numeric values, non-numeric -fields need to be transformed to numeric ones following some rules: - -- Categorical fields will be encoded and each class appearance in input data - will convey a different contribution to the input vector. -- Text and items fields will be expanded to several numeric predictors, - each one indicating the number of occurences for a specific term. - Text fields without term analysis are excluded from the model. - -Therefore, the initial input data is transformed into an input vector with one -or may components per field. Also, if a field in the training data contains -missing data, the components corresponding to that field will include an -additional 1 or 0 value depending on whether the field is missing in the -input data or not. + test_reader = csv.reader(open(dir + test_set)) + local_model = Model(model) + for row in test_reader: + input_data = fields.pair([float(val) for val in row], objective_field) + prediction = local_model.predict(input_data) -The JSON structure for a linear regression is: +If you are interfacing with numpy-based libraries, you'll probably want to +generate or read the field values as a numpy array. The ``Fields`` object +offers the ``.from_numpy`` and ``.to_numpy`` methods to that end. In both, +categorial fields will be one-hot encoded automatically by assigning the +indices of the categories as presented in the corresponding field summary. .. code-block:: python - >>> api.pprint(linear_regression["object"]) - { u'category': 0, - u'code': 200, - u'columns': 4, - u'composites': None, - u'configuration': None, - u'configuration_status': False, - u'created': u'2019-02-20T21:02:40.027000', - u'creator': u'merce', - u'credits': 0.0, - u'credits_per_prediction': 0.0, - u'dataset': u'dataset/5c6dc06a983efc18e2000084', - u'dataset_field_types': { u'categorical': 0, - u'datetime': 0, - u'items': 0, - u'numeric': 6, - u'preferred': 6, - u'text': 0, - u'total': 6}, - u'dataset_status': True, - u'datasets': [], - u'default_numeric_value': None, - u'description': u'', - u'excluded_fields': [], - u'execution_id': None, - u'execution_status': None, - u'fields_maps': None, - u'fields_meta': { u'count': 4, - u'limit': 1000, - u'offset': 0, - u'query_total': 4, - u'total': 4}, - u'fusions': None, - u'input_fields': [u'000000', u'000001', u'000002'], - u'linear_regression': { u'bias': True, - u'coefficients': [ [-1.88196], - [0.475633], - [0.122468], - [30.9141]], - u'fields': { u'000000': { u'column_number': 0, - u'datatype': u'int8', - u'name': u'Prefix', - u'optype': u'numeric', - u'order': 0, - u'preferred': True, - u'summary': { u'counts': [ [ 4, - 1], - - ... - u'stats': { u'confidence_intervals': [ [ 5.63628], - [ 0.375062], - [ 0.348577], - [ 44.4112]], - u'mean_squared_error': 342.206, - u'number_of_parameters': 4, - u'number_of_samples': 77, - u'p_values': [ [0.512831], - [0.0129362], - [0.491069], - [0.172471]], - u'r_squared': 0.136672, - u'standard_errors': [ [ 2.87571], - [ 0.191361], - [ 0.177849], - [ 22.6592]], - u'sum_squared_errors': 24981, - u'xtx': [ [ 4242, - 48396.9, - 51273.97, - 568], - [ 48396.9, - 570177.6584, - 594274.3274, - 6550.52], - [ 51273.97, - 594274.3274, - 635452.7068, - 6894.24], - [ 568, - 6550.52, - 6894.24, - 77]], - u'z_scores': [ [-0.654436], - [2.48552], - [0.688609], - [1.36431]]}}, - u'locale': u'en_US', - u'max_columns': 6, - u'max_rows': 80, - u'name': u'grades', - u'name_options': u'bias', - u'number_of_batchpredictions': 0, - u'number_of_evaluations': 0, - u'number_of_predictions': 2, - u'number_of_public_predictions': 0, - u'objective_field': u'000005', - u'objective_field_name': u'Final', - u'objective_field_type': u'numeric', - u'objective_fields': [u'000005'], - u'operating_point': { }, - u'optiml': None, - u'optiml_status': False, - u'ordering': 0, - u'out_of_bag': False, - u'out_of_bags': None, - u'price': 0.0, - u'private': True, - u'project': u'project/5c6dc062983efc18d5000129', - u'range': None, - u'ranges': None, - u'replacement': False, - u'replacements': None, - u'resource': u'linearregression/5c6dc070983efc18e00001f1', - u'rows': 80, - u'sample_rate': 1.0, - u'sample_rates': None, - u'seed': None, - u'seeds': None, - u'shared': False, - u'size': 2691, - u'source': u'source/5c6dc064983efc18e00001ed', - u'source_status': True, - u'status': { u'code': 5, - u'elapsed': 62086, - u'message': u'The linear regression has been created', - u'progress': 1}, - u'subscription': True, - u'tags': [], - u'type': 0, - u'updated': u'2019-02-27T18:01:18.539000', - u'user_metadata': { }, - u'webhook': None, - u'weight_field': None, - u'white_box': False} - -Note that the output in the snippet above has been abbreviated. As you see, -the ``linear_regression`` attribute stores the coefficients used in the -linear function as well as the configuration parameters described in -the `developers section `_ . - - -Associations ------------- + from bigml.api import BigML + from bigml.fields import Fields + api = BigML() + model = api.get_model("model/5143a51a37203f2cf7000979") + fields = Fields(model) + # creating a numpy array for the following input data + np_inputs = fields.to_numpy({"petal length": 1}) + # creating an input data dictionary from a numpy array + input_data = fields.from_numpy(np_inputs) + +The numpy output of ``.to_numpy`` can be used in the +`ShapWrapper `_ object or other +functions that expect numpy arrays as inputs and the ``.from_numpy`` +output can be used in BigML local predictions as input. -Association Discovery is a popular method to find out relations among values -in high-dimensional datasets. - -A common case where association discovery is often used is -market basket analysis. This analysis seeks for customer shopping -patterns across large transactional -datasets. For instance, do customers who buy hamburgers and ketchup also -consume bread? - -Businesses use those insights to make decisions on promotions and product -placements. -Association Discovery can also be used for other purposes such as early -incident detection, web usage analysis, or software intrusion detection. - -In BigML, the Association resource object can be built from any dataset, and -its results are a list of association rules between the items in the dataset. -In the example case, the corresponding -association rule would have hamburguers and ketchup as the items at the -left hand side of the association rule and bread would be the item at the -right hand side. Both sides in this association rule are related, -in the sense that observing -the items in the left hand side implies observing the items in the right hand -side. There are some metrics to ponder the quality of these association rules: - -- Support: the proportion of instances which contain an itemset. - -For an association rule, it means the number of instances in the dataset which -contain the rule's antecedent and rule's consequent together -over the total number of instances (N) in the dataset. - -It gives a measure of the importance of the rule. Association rules have -to satisfy a minimum support constraint (i.e., min_support). - -- Coverage: the support of the antedecent of an association rule. -It measures how often a rule can be applied. - -- Confidence or (strength): The probability of seeing the rule's consequent -under the condition that the instances also contain the rule's antecedent. -Confidence is computed using the support of the association rule over the -coverage. That is, the percentage of instances which contain the consequent -and antecedent together over the number of instances which only contain -the antecedent. - -Confidence is directed and gives different values for the association -rules Antecedent → Consequent and Consequent → Antecedent. Association -rules also need to satisfy a minimum confidence constraint -(i.e., min_confidence). - -- Leverage: the difference of the support of the association -rule (i.e., the antecedent and consequent appearing together) and what would -be expected if antecedent and consequent where statistically independent. -This is a value between -1 and 1. A positive value suggests a positive -relationship and a negative value suggests a negative relationship. -0 indicates independence. - -Lift: how many times more often antecedent and consequent occur together -than expected if they where statistically independent. -A value of 1 suggests that there is no relationship between the antecedent -and the consequent. Higher values suggest stronger positive relationships. -Lower values suggest stronger negative relationships (the presence of the -antecedent reduces the likelihood of the consequent) - -As to the items used in association rules, each type of field is parsed to -extract items for the rules as follows: - -- Categorical: each different value (class) will be considered a separate item. -- Text: each unique term will be considered a separate item. -- Items: each different item in the items summary will be considered. -- Numeric: Values will be converted into categorical by making a -segmentation of the values. -For example, a numeric field with values ranging from 0 to 600 split -into 3 segments: -segment 1 → [0, 200), segment 2 → [200, 400), segment 3 → [400, 600]. -You can refine the behavior of the transformation using -`discretization `_ -and `field_discretizations `_. - -The JSON structure for an association resource is: +If missing values are present, the ``Fields`` object can return a dict +with the ids of the fields that contain missing values and its count. The +following example: .. code-block:: python + from bigml.fields import Fields + from bigml.api import BigML + api = BigML() + dataset = api.get_dataset("dataset/5339d42337203f233e000015") - >>> api.pprint(association['object']) - { - "associations":{ - "complement":false, - "discretization":{ - "pretty":true, - "size":5, - "trim":0, - "type":"width" - }, - "items":[ - { - "complement":false, - "count":32, - "field_id":"000000", - "name":"Segment 1", - "bin_end":5, - "bin_start":null - }, - { - "complement":false, - "count":49, - "field_id":"000000", - "name":"Segment 3", - "bin_end":7, - "bin_start":6 - }, - { - "complement":false, - "count":12, - "field_id":"000000", - "name":"Segment 4", - "bin_end":null, - "bin_start":7 - }, - { - "complement":false, - "count":19, - "field_id":"000001", - "name":"Segment 1", - "bin_end":2.5, - "bin_start":null - }, - ... - { - "complement":false, - "count":50, - "field_id":"000004", - "name":"Iris-versicolor" - }, - { - "complement":false, - "count":50, - "field_id":"000004", - "name":"Iris-virginica" - } - ], - "max_k": 100, - "min_confidence":0, - "min_leverage":0, - "min_lift":1, - "min_support":0, - "rules":[ - { - "confidence":1, - "id":"000000", - "leverage":0.22222, - "lhs":[ - 13 - ], - "lhs_cover":[ - 0.33333, - 50 - ], - "lift":3, - "p_value":0.000000000, - "rhs":[ - 6 - ], - "rhs_cover":[ - 0.33333, - 50 - ], - "support":[ - 0.33333, - 50 - ] - }, - { - "confidence":1, - "id":"000001", - "leverage":0.22222, - "lhs":[ - 6 - ], - "lhs_cover":[ - 0.33333, - 50 - ], - "lift":3, - "p_value":0.000000000, - "rhs":[ - 13 - ], - "rhs_cover":[ - 0.33333, - 50 - ], - "support":[ - 0.33333, - 50 - ] - }, - ... - { - "confidence":0.26, - "id":"000029", - "leverage":0.05111, - "lhs":[ - 13 - ], - "lhs_cover":[ - 0.33333, - 50 - ], - "lift":2.4375, - "p_value":0.0000454342, - "rhs":[ - 5 - ], - "rhs_cover":[ - 0.10667, - 16 - ], - "support":[ - 0.08667, - 13 - ] - }, - { - "confidence":0.18, - "id":"00002a", - "leverage":0.04, - "lhs":[ - 15 - ], - "lhs_cover":[ - 0.33333, - 50 - ], - "lift":3, - "p_value":0.0000302052, - "rhs":[ - 9 - ], - "rhs_cover":[ - 0.06, - 9 - ], - "support":[ - 0.06, - 9 - ] - }, - { - "confidence":1, - "id":"00002b", - "leverage":0.04, - "lhs":[ - 9 - ], - "lhs_cover":[ - 0.06, - 9 - ], - "lift":3, - "p_value":0.0000302052, - "rhs":[ - 15 - ], - "rhs_cover":[ - 0.33333, - 50 - ], - "support":[ - 0.06, - 9 - ] - } - ], - "rules_summary":{ - "confidence":{ - "counts":[ - [ - 0.18, - 1 - ], - [ - 0.24, - 1 - ], - [ - 0.26, - 2 - ], - ... - [ - 0.97959, - 1 - ], - [ - 1, - 9 - ] - ], - "maximum":1, - "mean":0.70986, - "median":0.72864, - "minimum":0.18, - "population":44, - "standard_deviation":0.24324, - "sum":31.23367, - "sum_squares":24.71548, - "variance":0.05916 - }, - "k":44, - "leverage":{ - "counts":[ - [ - 0.04, - 2 - ], - [ - 0.05111, - 4 - ], - [ - 0.05316, - 2 - ], - ... - [ - 0.22222, - 2 - ] - ], - "maximum":0.22222, - "mean":0.10603, - "median":0.10156, - "minimum":0.04, - "population":44, - "standard_deviation":0.0536, - "sum":4.6651, - "sum_squares":0.61815, - "variance":0.00287 - }, - "lhs_cover":{ - "counts":[ - [ - 0.06, - 2 - ], - [ - 0.08, - 2 - ], - [ - 0.10667, - 4 - ], - [ - 0.12667, - 1 - ], - ... - [ - 0.5, - 4 - ] - ], - "maximum":0.5, - "mean":0.29894, - "median":0.33213, - "minimum":0.06, - "population":44, - "standard_deviation":0.13386, - "sum":13.15331, - "sum_squares":4.70252, - "variance":0.01792 - }, - "lift":{ - "counts":[ - [ - 1.40625, - 2 - ], - [ - 1.5067, - 2 - ], - ... - [ - 2.63158, - 4 - ], - [ - 3, - 10 - ], - [ - 4.93421, - 2 - ], - [ - 12.5, - 2 - ] - ], - "maximum":12.5, - "mean":2.91963, - "median":2.58068, - "minimum":1.40625, - "population":44, - "standard_deviation":2.24641, - "sum":128.46352, - "sum_squares":592.05855, - "variance":5.04635 - }, - "p_value":{ - "counts":[ - [ - 0.000000000, - 2 - ], - [ - 0.000000000, - 4 - ], - [ - 0.000000000, - 2 - ], - ... - [ - 0.0000910873, - 2 - ] - ], - "maximum":0.0000910873, - "mean":0.0000106114, - "median":0.00000000, - "minimum":0.000000000, - "population":44, - "standard_deviation":0.0000227364, - "sum":0.000466903, - "sum_squares":0.0000000, - "variance":0.000000001 - }, - "rhs_cover":{ - "counts":[ - [ - 0.06, - 2 - ], - [ - 0.08, - 2 - ], - ... - [ - 0.42667, - 2 - ], - [ - 0.46667, - 3 - ], - [ - 0.5, - 4 - ] - ], - "maximum":0.5, - "mean":0.29894, - "median":0.33213, - "minimum":0.06, - "population":44, - "standard_deviation":0.13386, - "sum":13.15331, - "sum_squares":4.70252, - "variance":0.01792 - }, - "support":{ - "counts":[ - [ - 0.06, - 4 - ], - [ - 0.06667, - 2 - ], - [ - 0.08, - 2 - ], - [ - 0.08667, - 4 - ], - [ - 0.10667, - 4 - ], - [ - 0.15333, - 2 - ], - [ - 0.18667, - 4 - ], - [ - 0.19333, - 2 - ], - [ - 0.20667, - 2 - ], - [ - 0.27333, - 2 - ], - [ - 0.28667, - 2 - ], - [ - 0.3, - 4 - ], - [ - 0.32, - 2 - ], - [ - 0.33333, - 6 - ], - [ - 0.37333, - 2 - ] - ], - "maximum":0.37333, - "mean":0.20152, - "median":0.19057, - "minimum":0.06, - "population":44, - "standard_deviation":0.10734, - "sum":8.86668, - "sum_squares":2.28221, - "variance":0.01152 - } - }, - "search_strategy":"leverage", - "significance_level":0.05 - }, - "category":0, - "clones":0, - "code":200, - "columns":5, - "created":"2015-11-05T08:06:08.184000", - "credits":0.017581939697265625, - "dataset":"dataset/562fae3f4e1727141d00004e", - "dataset_status":true, - "dataset_type":0, - "description":"", - "excluded_fields":[ ], - "fields_meta":{ - "count":5, - "limit":1000, - "offset":0, - "query_total":5, - "total":5 - }, - "input_fields":[ - "000000", - "000001", - "000002", - "000003", - "000004" - ], - "locale":"en_US", - "max_columns":5, - "max_rows":150, - "name":"iris' dataset's association", - "out_of_bag":false, - "price":0, - "private":true, - "project":null, - "range":[ - 1, - 150 - ], - "replacement":false, - "resource":"association/5621b70910cb86ae4c000000", - "rows":150, - "sample_rate":1, - "shared":false, - "size":4609, - "source":"source/562fae3a4e1727141d000048", - "source_status":true, - "status":{ - "code":5, - "elapsed":1072, - "message":"The association has been created", - "progress":1 - }, - "subscription":false, - "tags":[ ], - "updated":"2015-11-05T08:06:20.403000", - "white_box":false - } -Note that the output in the snippet above has been abbreviated. As you see, -the ``associations`` attribute stores items, rules and metrics extracted -from the datasets as well as the configuration parameters described in -the `developers section `_ . - - -Topic Models ------------- - -A topic model is an unsupervised machine learning method -for unveiling all the different topics -underlying a collection of documents. -BigML uses Latent Dirichlet Allocation (LDA), one of the most popular -probabilistic methods for topic modeling. -In BigML, each instance (i.e. each row in your dataset) will -be considered a document and the contents of all the text fields -given as inputs will be automatically concatenated and considered the -document bag of words. - -Topic model is based on the assumption that any document -exhibits a mixture of topics. Each topic is composed of a set of words -which are thematically related. The words from a given topic have different -probabilities for that topic. At the same time, each word can be attributable -to one or several topics. So for example the word "sea" may be found in -a topic related with sea transport but also in a topic related to holidays. -Topic model automatically discards stop words and high -frequency words. - -Topic model's main applications include browsing, organizing and understanding -large archives of documents. It can been applied for information retrieval, -collaborative filtering, assessing document similarity among others. -The topics found in the dataset can also be very useful new features -before applying other models like classification, clustering, or -anomaly detection. - -The JSON structure for a topic model is: - -.. code-block:: python + fields = Fields(dataset) + fields.missing_counts() - >>> api.pprint(topic['object']) - { u'category': 0, - u'code': 200, - u'columns': 1, - u'configuration': None, - u'configuration_status': False, - u'created': u'2016-11-23T23:47:54.703000', - u'credits': 0.0, - u'credits_per_prediction': 0.0, - u'dataset': u'dataset/58362aa0983efc45a0000005', - u'dataset_field_types': { u'categorical': 1, - u'datetime': 0, - u'effective_fields': 672, - u'items': 0, - u'numeric': 0, - u'preferred': 2, - u'text': 1, - u'total': 2}, - u'dataset_status': True, - u'dataset_type': 0, - u'description': u'', - u'excluded_fields': [], - u'fields_meta': { u'count': 1, - u'limit': 1000, - u'offset': 0, - u'query_total': 1, - u'total': 1}, - u'input_fields': [u'000001'], - u'locale': u'en_US', - u'max_columns': 2, - u'max_rows': 656, - u'name': u"spam dataset's Topic Model ", - u'number_of_batchtopicdistributions': 0, - u'number_of_public_topicdistributions': 0, - u'number_of_topicdistributions': 0, - u'ordering': 0, - u'out_of_bag': False, - u'price': 0.0, - u'private': True, - u'project': None, - u'range': [1, 656], - u'replacement': False, - u'resource': u'topicmodel/58362aaa983efc45a1000007', - u'rows': 656, - u'sample_rate': 1.0, - u'shared': False, - u'size': 54740, - u'source': u'source/58362a69983efc459f000001', - u'source_status': True, - u'status': { u'code': 5, - u'elapsed': 3222, - u'message': u'The topic model has been created', - u'progress': 1.0}, - u'subscription': True, - u'tags': [], - u'topic_model': { u'alpha': 4.166666666666667, - u'beta': 0.1, - u'bigrams': False, - u'case_sensitive': False, - u'fields': { u'000001': { u'column_number': 1, - u'datatype': u'string', - u'name': u'Message', - u'optype': u'text', - u'order': 0, - u'preferred': True, - u'summary': { u'average_length': 78.14787, - u'missing_count': 0, - u'tag_cloud': [ [ u'call', - 72], - [ u'ok', - 36], - [ u'gt', - 34], - ... - [ u'worse', - 2], - [ u'worth', - 2], - [ u'write', - 2], - [ u'yest', - 2], - [ u'yijue', - 2]], - u'term_forms': { }}, - u'term_analysis': { u'case_sensitive': False, - u'enabled': True, - u'language': u'en', - u'stem_words': False, - u'token_mode': u'all', - u'use_stopwords': False}}}, - u'hashed_seed': 62146850, - u'language': u'en', - u'number_of_topics': 12, - u'term_limit': 4096, - u'term_topic_assignments': [ [ 0, - 5, - 0, - 1, - 0, - 19, - 0, - 0, - 19, - 0, - 1, - 0], - [ 0, - 0, - 0, - 13, - 0, - 0, - 0, - 0, - 5, - 0, - 0, - 0], - ... - [ 0, - 7, - 27, - 0, - 112, - 0, - 0, - 0, - 0, - 0, - 14, - 2]], - u'termset': [ u'000', - u'03', - u'04', - u'06', - u'08000839402', - u'08712460324', - ... - - u'yes', - u'yest', - u'yesterday', - u'yijue', - u'yo', - u'yr', - u'yup', - u'\xfc'], - u'top_n_terms': 10, - u'topicmodel_seed': u'26c386d781963ca1ea5c90dab8a6b023b5e1d180', - u'topics': [ { u'id': u'000000', - u'name': u'Topic 00', - u'probability': 0.09375, - u'top_terms': [ [ u'im', - 0.04849], - [ u'hi', - 0.04717], - [ u'love', - 0.04585], - [ u'please', - 0.02867], - [ u'tomorrow', - 0.02867], - [ u'cos', - 0.02823], - [ u'sent', - 0.02647], - [ u'da', - 0.02383], - [ u'meet', - 0.02207], - [ u'dinner', - 0.01898]]}, - { u'id': u'000001', - u'name': u'Topic 01', - u'probability': 0.08215, - u'top_terms': [ [ u'lt', - 0.1015], - [ u'gt', - 0.1007], - [ u'wish', - 0.03958], - [ u'feel', - 0.0272], - [ u'shit', - 0.02361], - [ u'waiting', - 0.02281], - [ u'stuff', - 0.02001], - [ u'name', - 0.01921], - [ u'comp', - 0.01522], - [ u'forgot', - 0.01482]]}, - ... - { u'id': u'00000b', - u'name': u'Topic 11', - u'probability': 0.0826, - u'top_terms': [ [ u'call', - 0.15084], - [ u'min', - 0.05003], - [ u'msg', - 0.03185], - [ u'home', - 0.02648], - [ u'mind', - 0.02152], - [ u'lt', - 0.01987], - [ u'bring', - 0.01946], - [ u'camera', - 0.01905], - [ u'set', - 0.01905], - [ u'contact', - 0.01781]]}], - u'use_stopwords': False}, - u'updated': u'2016-11-23T23:48:03.336000', - u'white_box': False} - -Note that the output in the snippet above has been abbreviated. - - -The topic model returns a list of top terms for each topic found in the data. -Note that topics are not labeled, so you have to infer their meaning according -to the words they are composed of. - -Once you build the topic model you can calculate each topic probability -for a given document by using Topic Distribution. -This information can be useful to find documents similarities based -on their thematic. - -As you see, -the ``topic_model`` attribute stores the topics and termset and term to -topic assignment, -as well as the configuration parameters described in -the `developers section `_ . - - -Time Series ------------ - -A time series model is a supervised learning method to forecast the future -values of a field based on its previously observed values. -It is used to analyze time based data when historical patterns can explain -the future behavior such as stock prices, sales forecasting, -website traffic, production and inventory analysis, weather forecasting, etc. -A time series model needs to be trained with time series data, -i.e., a field containing a sequence of equally distributed data points in time. - -BigML implements exponential smoothing to train time series models. -Time series data is modeled as a level component and it can optionally -include a trend (damped or not damped) and a seasonality -components. You can learn more about how to include these components and their -use in the `API documentation page `_. - -You can create a time series model selecting one or several fields from -your dataset, that will be the ojective fields. The forecast will compute -their future values. - - -The JSON structure for a time series is: +would output: .. code-block:: python - >>> api.pprint(time_series['object']) - { u'category': 0, - u'clones': 0, - u'code': 200, - u'columns': 1, - u'configuration': None, - u'configuration_status': False, - u'created': u'2017-07-15T12:49:42.601000', - u'credits': 0.0, - u'dataset': u'dataset/5968ec42983efc21b0000016', - u'dataset_field_types': { u'categorical': 0, - u'datetime': 0, - u'effective_fields': 6, - u'items': 0, - u'numeric': 6, - u'preferred': 6, - u'text': 0, - u'total': 6}, - u'dataset_status': True, - u'dataset_type': 0, - u'description': u'', - u'fields_meta': { u'count': 1, - u'limit': 1000, - u'offset': 0, - u'query_total': 1, - u'total': 1}, - u'forecast': { u'000005': [ { u'lower_bound': [ 30.14111, - 30.14111, - ... - 30.14111], - u'model': u'A,N,N', - u'point_forecast': [ 68.53181, - 68.53181, - ... - 68.53181, - 68.53181], - u'time_range': { u'end': 129, - u'interval': 1, - u'interval_unit': u'milliseconds', - u'start': 80}, - u'upper_bound': [ 106.92251, - 106.92251, - ... - 106.92251, - 106.92251]}, - { u'lower_bound': [ 35.44118, - 35.5032, - ... - 35.28083], - u'model': u'A,Ad,N', - ... - 66.83537, - 66.9465], - u'time_range': { u'end': 129, - u'interval': 1, - u'interval_unit': u'milliseconds', - u'start': 80}}]}, - u'horizon': 50, - u'locale': u'en_US', - u'max_columns': 6, - u'max_rows': 80, - u'name': u'my_ts_data', - u'name_options': u'period=1, range=[1, 80]', - u'number_of_evaluations': 0, - u'number_of_forecasts': 0, - u'number_of_public_forecasts': 0, - u'objective_field': u'000005', - u'objective_field_name': u'Final', - u'objective_field_type': u'numeric', - u'objective_fields': [u'000005'], - u'objective_fields_names': [u'Final'], - u'price': 0.0, - u'private': True, - u'project': None, - u'range': [1, 80], - u'resource': u'timeseries/596a0f66983efc53f3000000', - u'rows': 80, - u'shared': False, - u'short_url': u'', - u'size': 2691, - u'source': u'source/5968ec3c983efc218c000006', - u'source_status': True, - u'status': { u'code': 5, - u'elapsed': 8358, - u'message': u'The time series has been created', - u'progress': 1.0}, - u'subscription': True, - u'tags': [], - u'time_series': { u'all_numeric_objectives': False, - u'datasets': { u'000005': u'dataset/596a0f70983efc53f3000003'}, - u'ets_models': { u'000005': [ { u'aic': 831.30903, - u'aicc': 831.84236, - u'alpha': 0.00012, - u'beta': 0, - u'bic': 840.83713, - u'final_state': { u'b': 0, - u'l': 68.53181, - u's': [ 0]}, - u'gamma': 0, - u'initial_state': { u'b': 0, - u'l': 68.53217, - u's': [ 0]}, - u'name': u'A,N,N', - u'period': 1, - u'phi': 1, - u'r_squared': -0.0187, - u'sigma': 19.19535}, - { u'aic': 834.43049, - ... - u'slope': 0.11113, - u'value': 61.39}]}, - u'fields': { u'000005': { u'column_number': 5, - u'datatype': u'double', - u'name': u'Final', - u'optype': u'numeric', - u'order': 0, - u'preferred': True, - u'summary': { u'bins': [ [ 28.06, - 1], - [ 34.44, - ... - [ 108.335, - 2]], - ... - u'sum_squares': 389814.3944, - u'variance': 380.73315}}}, - u'period': 1, - u'time_range': { u'end': 79, - u'interval': 1, - u'interval_unit': u'milliseconds', - u'start': 0}}, - u'type': 0, - u'updated': u'2017-07-15T12:49:52.549000', - u'white_box': False} - - -OptiMLs -------- - -An OptiML is the result of an automated optimization process to find the -best model (type and configuration) to solve a particular -classification or regression problem. - -The selection process automates the usual time-consuming task of trying -different models and parameters and evaluating their results to find the -best one. Using the OptiML, non-experts can build top-performing models. + {'000003': 1, '000000': 1, '000001': 1} -You can create an OptiML selecting the ojective field to be predicted, the -evaluation metric to be used to rank the models tested in the process and -a maximum time for the task to be run. +if the there was a missing value in each of the fields whose ids are +``000003``, ``000000``, ``000001``. -The JSON structure for an OptiML is: +You can also obtain the counts of errors per field using the ``errors_count`` +method of the api: .. code-block:: python - >>> api.pprint(optiml["object"]) - { u'category': 0, - u'code': 200, - u'configuration': None, - u'configuration_status': False, - u'created': u'2018-05-17T20:23:00.060000', - u'creator': u'mmartin', - u'dataset': u'dataset/5afdb7009252732d930009e8', - u'dataset_status': True, - u'datasets': [ u'dataset/5afde6488bf7d551ee00081c', - u'dataset/5afde6488bf7d551fd00511f', - u'dataset/5afde6488bf7d551fe002e0f', - ... - u'dataset/5afde64d8bf7d551fd00512e'], - u'description': u'', - u'evaluations': [ u'evaluation/5afde65c8bf7d551fd00514c', - u'evaluation/5afde65c8bf7d551fd00514f', - ... - u'evaluation/5afde6628bf7d551fd005161'], - u'excluded_fields': [], - u'fields_meta': { u'count': 5, - u'limit': 1000, - u'offset': 0, - u'query_total': 5, - u'total': 5}, - u'input_fields': [u'000000', u'000001', u'000002', u'000003'], - u'model_count': { u'logisticregression': 1, u'model': 8, u'total': 9}, - u'models': [ u'model/5afde64e8bf7d551fd005131', - u'model/5afde64f8bf7d551fd005134', - u'model/5afde6518bf7d551fd005137', - u'model/5afde6538bf7d551fd00513a', - u'logisticregression/5afde6558bf7d551fd00513d', - ... - u'model/5afde65a8bf7d551fd005149'], - u'models_meta': { u'count': 9, u'limit': 1000, u'offset': 0, u'total': 9}, - u'name': u'iris', - u'name_options': u'9 total models (logisticregression: 1, model: 8), metric=max_phi, model candidates=18, max. training time=300', - u'objective_field': u'000004', - u'objective_field_details': { u'column_number': 4, - u'datatype': u'string', - u'name': u'species', - u'optype': u'categorical', - u'order': 4}, - u'objective_field_name': u'species', - u'objective_field_type': u'categorical', - u'objective_fields': [u'000004'], - u'optiml': { u'created_resources': { u'dataset': 10, - u'logisticregression': 11, - u'logisticregression_evaluation': 11, - u'model': 29, - u'model_evaluation': 29}, - u'datasets': [ { u'id': u'dataset/5afde6488bf7d551ee00081c', - u'name': u'iris', - u'name_options': u'120 instances, 5 fields (1 categorical, 4 numeric), sample rate=0.8'}, - { u'id': u'dataset/5afde6488bf7d551fd00511f', - u'name': u'iris', - u'name_options': u'30 instances, 5 fields (1 categorical, 4 numeric), sample rate=0.2, out of bag'}, - { u'id': u'dataset/5afde6488bf7d551fe002e0f', - u'name': u'iris', - u'name_options': u'120 instances, 5 fields (1 categorical, 4 numeric), sample rate=0.8'}, - ... - { u'id': u'dataset/5afde64d8bf7d551fd00512e', - u'name': u'iris', - u'name_options': u'120 instances, 5 fields (1 categorical, 4 numeric), sample rate=0.8'}], - u'fields': { u'000000': { u'column_number': 0, - u'datatype': u'double', - u'name': u'sepal length', - u'optype': u'numeric', - u'order': 0, - u'preferred': True, - u'summary': { u'bins': [ [ 4.3, - 1], - ... - [ 7.9, - 1]], - ... - u'sum': 179.9, - u'sum_squares': 302.33, - u'variance': 0.58101}}, - u'000004': { u'column_number': 4, - u'datatype': u'string', - u'name': u'species', - u'optype': u'categorical', - u'order': 4, - u'preferred': True, - u'summary': { u'categories': [ [ u'Iris-setosa', - 50], - [ u'Iris-versicolor', - 50], - [ u'Iris-virginica', - 50]], - u'missing_count': 0}, - u'term_analysis': { u'enabled': True}}}, - u'max_training_time': 300, - u'metric': u'max_phi', - u'model_types': [u'model', u'logisticregression'], - u'models': [ { u'evaluation': { u'id': u'evaluation/5afde65c8bf7d551fd00514c', - u'info': { u'accuracy': 0.96667, - u'average_area_under_pr_curve': 0.97867, - ... - u'per_class_statistics': [ { u'accuracy': 1, - u'area_under_pr_curve': 1, - ... - u'spearmans_rho': 0.82005}]}, - u'metric_value': 0.95356, - u'metric_variance': 0.00079, - u'name': u'iris vs. iris', - u'name_options': u'279-node, deterministic order, operating kind=probability'}, - u'evaluation_count': 3, - u'id': u'model/5afde64e8bf7d551fd005131', - u'importance': [ [ u'000002', - 0.70997], - [ u'000003', - 0.27289], - [ u'000000', - 0.0106], - [ u'000001', - 0.00654]], - u'kind': u'model', - u'name': u'iris', - u'name_options': u'279-node, deterministic order'}, - { u'evaluation': { u'id': u'evaluation/5afde65c8bf7d551fd00514f', - u'info': { u'accuracy': 0.93333, - - ... - [ u'000001', - 0.02133]], - u'kind': u'model', - u'name': u'iris', - u'name_options': u'12-node, randomize, deterministic order, balanced'}], - u'number_of_model_candidates': 18, - u'recent_evaluations': [ 0.90764, - 0.94952, - ... - 0.90427], - u'search_complete': True, - u'summary': { u'logisticregression': { u'best': u'logisticregression/5afde6558bf7d551fd00513d', - u'count': 1}, - u'model': { u'best': u'model/5afde64e8bf7d551fd005131', - u'count': 8}}}, - u'private': True, - u'project': None, - u'resource': u'optiml/5afde4a42a83475c1b0008a2', - u'shared': False, - u'size': 3686, - u'source': u'source/5afdb6fb9252732d930009e5', - u'source_status': True, - u'status': { u'code': 5, - u'elapsed': 448878.0, - u'message': u'The optiml has been created', - u'progress': 1}, - u'subscription': False, - u'tags': [], - u'test_dataset': None, - u'type': 0, - u'updated': u'2018-05-17T20:30:29.063000'} - - -Fusions -------- - -A Fusion is a special type of composed resource for which all -submodels satisfy the following constraints: they're all either -classifications or regressions over the same kind of data or -compatible fields, with the same objective field. Given those -properties, a fusion can be considered a supervised model, -and therefore one can predict with fusions and evaluate them. -Ensembles can be viewed as a kind of fusion subject to the additional -constraints that all its submodels are tree models that, moreover, -have been built from the same base input data, but sampled in particular ways. - -The model types allowed to be a submodel of a fusion are: -deepnet, ensemble, fusion, model, logistic regression and linear regression. - -The JSON structure for an Fusion is: - -.. code-block:: python + from bigml.api import BigML + api = BigML() + dataset = api.get_dataset("dataset/5339d42337203f233e000015") + api.error_counts(dataset) - >>> api.pprint(fusion["object"]) - { - "category": 0, - "code": 200, - "configuration": null, - "configuration_status": false, - "created": "2018-05-09T20:11:05.821000", - "credits_per_prediction": 0, - "description": "", - "fields_meta": { - "count": 5, - "limit": 1000, - "offset": 0, - "query_total": 5, - "total": 5 - }, - "fusion": { - "models": [ - { - "id": "ensemble/5af272eb4e1727d378000050", - "kind": "ensemble", - "name": "Iris ensemble", - "name_options": "boosted trees, 1999-node, 16-iteration, deterministic order, balanced" - }, - { - "id": "model/5af272fe4e1727d3780000d6", - "kind": "model", - "name": "Iris model", - "name_options": "1999-node, pruned, deterministic order, balanced" - }, - { - "id": "logisticregression/5af272ff4e1727d3780000d9", - "kind": "logisticregression", - "name": "Iris LR", - "name_options": "L2 regularized (c=1), bias, auto-scaled, missing values, eps=0.001" - } - ] - }, - "importance": { - "000000": 0.05847, - "000001": 0.03028, - "000002": 0.13582, - "000003": 0.4421 - }, - "model_count": { - "ensemble": 1, - "logisticregression": 1, - "model": 1, - "total": 3 - }, - "models": [ - "ensemble/5af272eb4e1727d378000050", - "model/5af272fe4e1727d3780000d6", - "logisticregression/5af272ff4e1727d3780000d9" - ], - "models_meta": { - "count": 3, - "limit": 1000, - "offset": 0, - "total": 3 - }, - "name": "iris", - "name_options": "3 total models (ensemble: 1, logisticregression: 1, model: 1)", - "number_of_batchpredictions": 0, - "number_of_evaluations": 0, - "number_of_predictions": 0, - "number_of_public_predictions": 0, - "objective_field": "000004", - "objective_field_details": { - "column_number": 4, - "datatype": "string", - "name": "species", - "optype": "categorical", - "order": 4 - }, - "objective_field_name": "species", - "objective_field_type": "categorical", - "objective_fields": [ - "000004" - ], - "private": true, - "project": null, - "resource":"fusion/59af8107b8aa0965d5b61138", - "shared": false, - "status": { - "code": 5, - "elapsed": 8420, - "message": "The fusion has been created", - "progress": 1 - }, - "subscription": false, - "tags": [], - "type": 0, - "updated": "2018-05-09T20:11:14.258000" - } - - -PCAs ----- - -A PCA (Principal Component Analysis) resource fits a number of orthogonal -projections (components) to maximally capture the variance in a dataset. This -is a dimensional reduction technique, as it can be used to reduce -the number of inputs for the modeling step. PCA models belong to the -unsupervised class of models (there is no objective field). - -The JSON structure for an PCA is: +The generated output is like the one in ``missing_counts``, that is, the error +counts per field: .. code-block:: python - - {'code': 200, - 'error': None, - 'location': 'https://strato.dev.bigml.io/andromeda/pca/5c002572983efc0ac5000003', - 'object': {u'category': 0, - u'code': 200, - u'columns': 2, - u'configuration': None, - u'configuration_status': False, - u'created': u'2018-11-29T17:44:18.359000', - u'creator': u'merce', - u'credits': 0.0, - u'credits_per_prediction': 0.0, - u'dataset': u'dataset/5c00256a983efc0acf000000', - u'dataset_field_types': {u'categorical': 1, - u'datetime': 0, - u'items': 0, - u'numeric': 0, - u'preferred': 2, - u'text': 1, - u'total': 2}, - u'dataset_status': True, - u'description': u'', - u'excluded_fields': [], - u'fields_meta': {u'count': 2, - u'limit': 1000, - u'offset': 0, - u'query_total': 2, - u'total': 2}, - u'input_fields': [u'000000', u'000001'], - u'locale': u'en-us', - u'max_columns': 2, - u'max_rows': 7, - u'name': u'spam 4 words', - u'name_options': u'standardized', - u'number_of_batchprojections': 2, - u'number_of_projections': 0, - u'number_of_public_projections': 0, - u'ordering': 0, - u'out_of_bag': False, - u'pca': {u'components': [[-0.64757, - 0.83392, - 0.1158, - 0.83481, - ... - -0.09426, - -0.08544, - -0.03457]], - u'cumulative_variance': [0.43667, - 0.74066, - 0.87902, - 0.98488, - 0.99561, - 1], - u'eigenvectors': [[-0.3894, - 0.50146, - 0.06963, - ... - -0.56542, - -0.5125, - -0.20734]], - u'fields': {u'000000': {u'column_number': 0, - u'datatype': u'string', - u'name': u'Type', - ... - u'token_mode': u'all', - u'use_stopwords': False}}}, - u'pca_seed': u'2c249dda00fbf54ab4cdd850532a584f286af5b6', - u'standardized': True, - u'text_stats': {u'000001': {u'means': [0.71429, - 0.71429, - 0.42857, - 0.28571], - u'standard_deviations': [0.75593, - 0.75593, - 0.53452, - 0.48795]}}, - u'variance': [0.43667, - 0.30399, - 0.13837, - 0.10585, - 0.01073, - 0.00439]}, - u'price': 0.0, - u'private': True, - u'project': None, - u'range': None, - u'replacement': False, - u'resource': u'pca/5c002572983efc0ac5000003', - u'rows': 7, - u'sample_rate': 1.0, - u'shared': False, - u'size': 127, - u'source': u'source/5c00255e983efc0acd00001b', - u'source_status': True, - u'status': {u'code': 5, - u'elapsed': 1571, - u'message': u'The pca has been created', - u'progress': 1}, - u'subscription': True, - u'tags': [], - u'type': 0, - u'updated': u'2018-11-29T18:13:19.714000', - u'white_box': False}, - 'resource': u'pca/5c002572983efc0ac5000003'} + {'000000': 1} Account and tasks @@ -3533,3999 +615,77 @@ automation and using WhizzML, BigML's Domain Specific Language for Machine Learning, provides them out of the box. Client-side approaches and/or general languages are definitely not the best fit for that. -WhizzML Resources ------------------ +Environment variables +--------------------- -WhizzML is a Domain Specific Language that allows the definition and -execution of ML-centric workflows. Its objective is allowing BigML -users to define their own composite tasks, using as building blocks -the basic resources provided by BigML itself. Using Whizzml they can be -glued together using a higher order, functional, Turing-complete language. -The WhizzML code can be stored and executed in BigML using three kinds of -resources: ``Scripts``, ``Libraries`` and ``Executions``. - -WhizzML ``Scripts`` can be executed in BigML's servers, that is, -in a controlled, fully-scalable environment which takes care of their -parallelization and fail-safe operation. Each execution uses an ``Execution`` -resource to store the arguments and results of the process. WhizzML -``Libraries`` store generic code to be shared of reused in other WhizzML -``Scripts``. - -Scripts -------- +The bindings will read some configuration values from environment variables. -In BigML a ``Script`` resource stores WhizzML source code, and the results of -its compilation. Once a WhizzML script is created, it's automatically compiled; -if compilation succeeds, the script can be run, that is, -used as the input for a WhizzML execution resource. +- ``BIGML_USERNAME``: The name of the user in BigML +- ``BIGML_API_KEY``: The API key for authentication in BigML -An example of a ``script`` that would create a ``source`` in BigML using the -contents of a remote file is: +For VPCs or on-site API installs, +other than the general public ``bigml.io`` domain: -.. code-block:: python +- ``BIGML_DOMAIN``: The domain of the BigML API endpoints +- ``BIGML_PROTOCOL``: ``http``/``https`` protocol +- ``BIGML_API_VERSION``: `andromeda`` version name (empty string if using + PredictServer) +- ``BIGML_SSL_VERIFY``: (``0``/``1``) to set SSL verification - >>> from bigml.api import BigML - >>> api = BigML() - # creating a script directly from the source code. This script creates - # a source uploading data from an s3 repo. You could also create a - # a script by using as first argument the path to a .whizzml file which - # contains your source code. - >>> script = api.create_script( \ - "(create-source {\"remote\" \"s3://bigml-public/csv/iris.csv\"})") - >>> api.ok(script) # waiting for the script compilation to finish - >>> api.pprint(script['object']) - { u'approval_status': 0, - u'category': 0, - u'code': 200, - u'created': u'2016-05-18T16:54:05.666000', - u'description': u'', - u'imports': [], - u'inputs': None, - u'line_count': 1, - u'locale': u'en-US', - u'name': u'Script', - u'number_of_executions': 0, - u'outputs': None, - u'price': 0.0, - u'private': True, - u'project': None, - u'provider': None, - u'resource': u'script/573c9e2db85eee23cd000489', - u'shared': False, - u'size': 59, - u'source_code': u'(create-source {"remote" "s3://bigml-public/csv/iris.csv"})', - u'status': { u'code': 5, - u'elapsed': 4, - u'message': u'The script has been created', - u'progress': 1.0}, - u'subscription': True, - u'tags': [], - u'updated': u'2016-05-18T16:54:05.850000', - u'white_box': False} - -A ``script`` allows to define some variables as ``inputs``. In the previous -example, no input has been defined, but we could modify our code to -allow the user to set the remote file name as input: +If you are using a Predict Server (or a different API url only for predictions) -.. code-block:: python +- ``BIGML_PREDICTION_DOMAIN``: The domain of the BigML API prediction endpoint +- ``BIGML_PREDICTION_PROTOCOL``: ``http``/``https`` for prediction domain +- ``BIGML_PREDICTION_SSL_VERIFY``: (``0``/``1``) to set SSL verification for + predictions - >>> from bigml.api import BigML - >>> api = BigML() - >>> script = api.create_script( \ - "(create-source {\"remote\" my_remote_data})", - {"inputs": [{"name": "my_remote_data", - "type": "string", - "default": "s3://bigml-public/csv/iris.csv", - "description": "Location of the remote data"}]}) +For users working in an organization: -The ``script`` can also use a ``library`` resource (please, see the -``Libraries`` section below for more details) by including its id in the -``imports`` attribute. Other attributes can be checked at the -`API Developers documentation for Scripts `_. +- ``BIGML_ORGANIZATION``: The ID of the organization -Executions ----------- +To use external data connectors: -To execute in BigML a compiled WhizzML ``script`` you need to create an -``execution`` resource. It's also possible to execute a pipeline of -many compiled scripts in one request. +- ``BIGML_EXTERNAL_CONN_HOST``: Host name or IP for the external database +- ``BIGML_EXTERNAL_CONN_PORT``: Port for the exteranl database +- ``BIGML_EXTERNAL_CONN_DB``: Database name +- ``BIGML_EXTERNAL_CONN_USER``: Database user name +- ``BIGML_EXTERNAL_CONN_PWD``: Database user password +- ``BIGML_EXTERNAL_CONN_SOURCE``: Type of database: ``mysql``, ``postgresql``, + ``elasticsearch``, etc. (see details in the + `API documentation for external connectors `_) -Each ``execution`` is run under its associated user credentials and its -particular environment constrains. As ``scripts`` can be shared, -different users can execute the same ``script`` using different inputs. -Each particular execution will generate an ``execution`` resource in BigML. +Running the Tests +----------------- -As an example of an ``execution`` resource, let's create one for the first -script in the previous section. In this case, no inputs are required because -the ``script`` expects none: +The tests will be run using `pytest `_. +You'll need to set up your authentication +via environment variables, as explained +in the authentication section. Also some of the tests need other environment +variables like ``BIGML_ORGANIZATION`` to test calls when used by Organization +members and ``BIGML_EXTERNAL_CONN_HOST``, ``BIGML_EXTERNAL_CONN_PORT``, +``BIGML_EXTERNAL_CONN_DB``, ``BIGML_EXTERNAL_CONN_USER``, +``BIGML_EXTERNAL_CONN_PWD`` and ``BIGML_EXTERNAL_CONN_SOURCE`` +in order to test external data connectors. -.. code-block:: python +With that in place, you can run the test suite simply by issuing - >>> from bigml.api import BigML - >>> api = BigML() - >>> execution = api.create_execution('script/573c9e2db85eee23cd000489') - >>> api.ok(execution) # waiting for the execution to finish - >>> api.pprint(execution['object']) - { u'category': 0, - u'code': 200, - u'created': u'2016-05-18T16:58:01.613000', - u'creation_defaults': { }, - u'description': u'', - u'execution': { u'output_resources': [ { u'code': 1, - u'id': u'source/573c9f19b85eee23c600024a', - u'last_update': 1463590681854, - u'progress': 0.0, - u'state': u'queued', - u'task': u'Queuing job', - u'variable': u''}], - u'outputs': [], - u'result': u'source/573c9f19b85eee23c600024a', - u'results': [u'source/573c9f19b85eee23c600024a'], - u'sources': [[ u'script/573c9e2db85eee23cd000489', - u'']], - u'steps': 16}, - u'inputs': None, - u'locale': u'en-US', - u'name': u"Script's Execution", - u'project': None, - u'resource': u'execution/573c9f19b85eee23bd000125', - u'script': u'script/573c9e2db85eee23cd000489', - u'script_status': True, - u'shared': False, - u'status': { u'code': 5, - u'elapsed': 249, - u'elapsed_times': { u'in-progress': 247, - u'queued': 62, - u'started': 2}, - u'message': u'The execution has been created', - u'progress': 1.0}, - u'subscription': True, - u'tags': [], - u'updated': u'2016-05-18T16:58:02.035000'} - -As you can see, the execution resource contains information about the result -of the execution, the resources that have been generated while executing and -users can define some variables in the code to be exported as outputs. - -An ``execution`` receives inputs, the ones defined in the ``script`` chosen -to be executed, and generates a result. It can also generate outputs. To -execute a ``script`` that expects some inputs, you will need to specify the -concrete values of those inputs, unless a default value has been assigned -for them in the script's inputs definition. Following the second example in -the previous section, we can execute the script that creates a source from a -URL pointing to a CSV file: +.. code-block:: bash -.. code-block:: python + $ pytest - >>> from bigml.api import BigML - >>> api = BigML() - >>> execution = api.create_execution( \ - script, - {"inputs": [["my_remote_data", - "https://static.bigml.com/csv/iris.csv"]]}) +Additionally, `Tox `_ can be used to +automatically run the test suite in virtual environments for all +supported Python versions. To install Tox: -For more details on executions, please refer to the -`Developers documentation for Executions `_. +.. code-block:: bash -Libraries ---------- + $ pip install tox -The ``library`` resource in BigML stores a special kind of compiled Whizzml -source code that only defines functions and constants. The ``library`` is -intended as an import for executable scripts. -Thus, a compiled library cannot be executed, just used as an -import in other ``libraries`` and ``scripts`` (which then have access -to all identifiers defined in the ``library``). +Then run the tests from the top-level project directory: -As an example, we build a ``library`` to store the definition of two functions: -``mu`` and ``g``. The first one adds one to the value set as argument and -the second one adds two variables and increments the result by one. +.. code-block:: bash - -.. code-block:: python - - >>> from bigml.api import BigML - >>> api = BigML() - >>> library = api.create_library( \ - "(define (mu x) (+ x 1)) (define (g z y) (mu (+ y z)))") - >>> api.ok(library) # waiting for the library compilation to finish - >>> api.pprint(library['object']) - { u'approval_status': 0, - u'category': 0, - u'code': 200, - u'created': u'2016-05-18T18:58:50.838000', - u'description': u'', - u'exports': [ { u'name': u'mu', u'signature': [u'x']}, - { u'name': u'g', u'signature': [u'z', u'y']}], - u'imports': [], - u'line_count': 1, - u'name': u'Library', - u'price': 0.0, - u'private': True, - u'project': None, - u'provider': None, - u'resource': u'library/573cbb6ab85eee23c300018e', - u'shared': False, - u'size': 53, - u'source_code': u'(define (mu x) (+ x 1)) (define (g z y) (mu (+ y z)))', - u'status': { u'code': 5, - u'elapsed': 2, - u'message': u'The library has been created', - u'progress': 1.0}, - u'subscription': True, - u'tags': [], - u'updated': u'2016-05-18T18:58:52.432000', - u'white_box': False} - -Libraries can be imported in scripts. The ``imports`` attribute of a ``script`` -can contain a list of ``library`` IDs whose defined functions -and constants will be ready to be used throughout the ``script``. Please, -refer to the `API Developers documentation for Libraries `_ -for more details. - -Creating Resources ------------------- - -Newly-created resources are returned in a dictionary with the following -keys: - -- **code**: If the request is successful you will get a - ``bigml.api.HTTP_CREATED`` (201) status code. In asynchronous file uploading - ``api.create_source`` calls, it will contain ``bigml.api.HTTP_ACCEPTED`` (202) - status code. Otherwise, it will be - one of the standard HTTP error codes `detailed in the - documentation `_. -- **resource**: The identifier of the new resource. -- **location**: The location of the new resource. -- **object**: The resource itself, as computed by BigML. -- **error**: If an error occurs and the resource cannot be created, it - will contain an additional code and a description of the error. In - this case, **location**, and **resource** will be ``None``. - -Statuses -~~~~~~~~ - -Please, bear in mind that resource creation is almost always -asynchronous (**predictions** are the only exception). Therefore, when -you create a new source, a new dataset or a new model, even if you -receive an immediate response from the BigML servers, the full creation -of the resource can take from a few seconds to a few days, depending on -the size of the resource and BigML's load. A resource is not fully -created until its status is ``bigml.api.FINISHED``. See the -`documentation on status -codes `_ for the listing of -potential states and their semantics. So depending on your application -you might need to import the following constants: - -.. code-block:: python - - from bigml.api import WAITING - from bigml.api import QUEUED - from bigml.api import STARTED - from bigml.api import IN_PROGRESS - from bigml.api import SUMMARIZED - from bigml.api import FINISHED - from bigml.api import UPLOADING - from bigml.api import FAULTY - from bigml.api import UNKNOWN - from bigml.api import RUNNABLE - -Usually, you will simply need to wait until the resource is -in the ``bigml.api.FINISHED`` state for further processing. If that's the case, -the easiest way is calling the ``api.ok`` method and passing as first argument -the object that contains your resource: - -.. code-block:: python - - from bigml.api import BigML - api = BigML() # creates a connection to BigML's API - source = api.create_source('my_file.csv') # creates a source object - api.ok(source) # checks that the source is finished and updates ``source`` - -In this code, ``api.create_source`` will probably return a non-finished -``source`` object. Then, ``api.ok`` will query its status and update the -contents of the ``source`` variable with the retrieved information until it -reaches a ``bigml.api.FINISHED`` or ``bigml.api.FAILED`` status. - -If you don't want the contents of the variable to be updated, you can -also use the ``check_resource`` function: - -.. code-block:: python - - check_resource(resource, api.get_source) - -that will constantly query the API until the resource gets to a FINISHED or -FAULTY state, or can also be used with ``wait_time`` (in seconds) -and ``retries`` -arguments to control the polling: - -.. code-block:: python - - check_resource(resource, api.get_source, wait_time=2, retries=20) - -The ``wait_time`` value is used as seed to a wait -interval that grows exponentially with the number of retries up to the given -``retries`` limit. - -However, in other scenarios you might need to control the complete -evolution of the resource, not only its final states. -There, you can query the status of any resource -with the ``status`` method, which simply returns its value and does not -update the contents of the associated variable: - -.. code-block:: python - - api.status(source) - api.status(dataset) - api.status(model) - api.status(prediction) - api.status(evaluation) - api.status(ensemble) - api.status(batch_prediction) - api.status(cluster) - api.status(centroid) - api.status(batch_centroid) - api.status(anomaly) - api.status(anomaly_score) - api.status(batch_anomaly_score) - api.status(sample) - api.status(correlation) - api.status(statistical_test) - api.status(logistic_regression) - api.status(association) - api.status(association_set) - api.status(topic_model) - api.status(topic_distribution) - api.status(batch_topic_distribution) - api.status(time_series) - api.status(forecast) - api.status(optiml) - api.status(fusion) - api.status(pca) - api.status(projection) - api.status(batch_projection) - api.status(linear_regression) - api.status(script) - api.status(execution) - api.status(library) - -Remember that, consequently, you will need to retrieve the resources -explicitly in your code to get the updated information. - - -Projects -~~~~~~~~ - -A special kind of resource is ``project``. Projects are repositories -for resources, intended to fulfill organizational purposes. Each project can -contain any other kind of resource, but the project that a certain resource -belongs to is determined by the one used in the ``source`` -they are generated from. Thus, when a source is created -and assigned a certain ``project_id``, the rest of resources generated from -this source will remain in this project. - -The REST calls to manage the ``project`` resemble the ones used to manage the -rest of resources. When you create a ``project``: - -.. code-block:: python - - from bigml.api import BigML - api = BigML() - - project = api.create_project({'name': 'my first project'}) - -the resulting resource is similar to the rest of resources, although shorter: - -.. code-block:: python - - {'code': 201, - 'resource': u'project/54a1bd0958a27e3c4c0002f0', - 'location': 'http://bigml.io/andromeda/project/54a1bd0958a27e3c4c0002f0', - 'object': {u'category': 0, - u'updated': u'2014-12-29T20:43:53.060045', - u'resource': u'project/54a1bd0958a27e3c4c0002f0', - u'name': u'my first project', - u'created': u'2014-12-29T20:43:53.060013', - u'tags': [], - u'private': True, - u'dev': None, - u'description': u''}, - 'error': None} - -and you can use its project id to get, update or delete it: - -.. code-block:: python - - project = api.get_project('project/54a1bd0958a27e3c4c0002f0') - api.update_project(project['resource'], - {'description': 'This is my first project'}) - - api.delete_project(project['resource']) - -**Important**: Deleting a non-empty project will also delete **all resources** -assigned to it, so please be extra-careful when using -the ``api.delete_project`` call. - - -Creating sources -~~~~~~~~~~~~~~~~ - -To create a source from a local data file, you can use the -``create_source`` method. The only required parameter is the path to the -data file (or file-like object). You can use a second optional parameter -to specify any of the -options for source creation described in the `BigML API -documentation `_. - -Here's a sample invocation: - -.. code-block:: python - - from bigml.api import BigML - api = BigML() - - source = api.create_source('./data/iris.csv', - {'name': 'my source', 'source_parser': {'missing_tokens': ['?']}}) - -or you may want to create a source from a file in a remote location: - -.. code-block:: python - - source = api.create_source('s3://bigml-public/csv/iris.csv', - {'name': 'my remote source', 'source_parser': {'missing_tokens': ['?']}}) - -or maybe reading the content from stdin: - -.. code-block:: python - - content = StringIO.StringIO(sys.stdin.read()) - source = api.create_source(content, - {'name': 'my stdin source', 'source_parser': {'missing_tokens': ['?']}}) - -or using data stored in a local python variable. The following example -shows the two accepted formats: - -.. code-block:: python - - local = [['a', 'b', 'c'], [1, 2, 3], [4, 5, 6]] - local2 = [{'a': 1, 'b': 2, 'c': 3}, {'a': 4, 'b': 5, 'c': 6}] - source = api.create_source(local, {'name': 'inline source'}) - -As already mentioned, source creation is asynchronous. In both these examples, -the ``api.create_source`` call returns once the file is uploaded. -Then ``source`` will contain a resource whose status code will be either -``WAITING`` or ``QUEUED``. - -For local data files you can go one step further and use asynchronous -uploading: - -.. code-block:: python - - source = api.create_source('./data/iris.csv', - {'name': 'my source', 'source_parser': {'missing_tokens': ['?']}}, - async_load=True) - -In this case, the call fills `source` immediately with a primary resource like: - -.. code-block:: python - - {'code': 202, - 'resource': None, - 'location': None, - 'object': {'status': - {'progress': 0.99, - 'message': 'The upload is in progress', - 'code': 6}}, - 'error': None} - -where the ``source['object']`` status is set to ``UPLOADING`` and its -``progress`` is periodically updated with the current uploading -progress ranging from 0 to 1. When upload completes, this structure will be -replaced by the real resource info as computed by BigML. Therefore source's -status will eventually be (as it is in the synchronous upload case) -``WAITING`` or ``QUEUED``. - -You can retrieve the updated status at any time using the corresponding get -method. For example, to get the status of our source we would use: - -.. code-block:: python - - api.status(source) - -Creating datasets -~~~~~~~~~~~~~~~~~ - -Once you have created a source, you can create a dataset. The only -required argument to create a dataset is a source id. You can add all -the additional arguments accepted by BigML and documented in the -`Datasets section of the Developer's -documentation `_. - -For example, to create a dataset named "my dataset" with the first 1024 -bytes of a source, you can submit the following request: - -.. code-block:: python - - dataset = api.create_dataset(source, {"name": "my dataset", "size": 1024}) - -Upon success, the dataset creation job will be queued for execution, and -you can follow its evolution using ``api.status(dataset)``. - -As for the rest of resources, the create method will return an incomplete -object, that can be updated by issuing the corresponding -``api.get_dataset`` call until it reaches a ``FINISHED`` status. -Then you can export the dataset data to a CSV file using: - -.. code-block:: python - - api.download_dataset('dataset/526fc344035d071ea3031d75', - filename='my_dir/my_dataset.csv') - -You can also extract samples from an existing dataset and generate a new one -with them using the ``api.create_dataset`` method. The first argument should -be the origin dataset and the rest of arguments that set the range or the -sampling rate should be passed as a dictionary. For instance, to create a new -dataset extracting the 80% of instances from an existing one, you could use: - -.. code-block:: python - - dataset = api.create_dataset(origin_dataset, {"sample_rate": 0.8}) - -Similarly, if you want to split your source into training and test datasets, -you can set the `sample_rate` as before to create the training dataset and -use the `out_of_bag` option to assign the complementary subset of data to the -test dataset. If you set the `seed` argument to a value of your choice, you -will ensure a deterministic sampling, so that each time you execute this call -you will get the same datasets as a result and they will be complementary: - -.. code-block:: python - - origin_dataset = api.create_dataset(source) - train_dataset = api.create_dataset( - origin_dataset, {"name": "Dataset Name | Training", - "sample_rate": 0.8, "seed": "my seed"}) - test_dataset = api.create_dataset( - origin_dataset, {"name": "Dataset Name | Test", - "sample_rate": 0.8, "seed": "my seed", - "out_of_bag": True}) - -Sometimes, like for time series evaluations, it's important that the data -in your train and test datasets is ordered. In this case, the split -cannot be done at random. You will need to start from an ordered dataset and -decide the ranges devoted to training and testing using the ``range`` -attribute: - -.. code-block:: python - - origin_dataset = api.create_dataset(source) - train_dataset = api.create_dataset( - origin_dataset, {"name": "Dataset Name | Training", - "range": [1, 80]}) - test_dataset = api.create_dataset( - origin_dataset, {"name": "Dataset Name | Test", - "range": [81, 100]}) - - -It is also possible to generate a dataset from a list of datasets -(multidataset): - -.. code-block:: python - - dataset1 = api.create_dataset(source1) - dataset2 = api.create_dataset(source2) - multidataset = api.create_dataset([dataset1, dataset2]) - -Clusters can also be used to generate datasets containing the instances -grouped around each centroid. You will need the cluster id and the centroid id -to reference the dataset to be created. For instance, - -.. code-block:: python - - cluster = api.create_cluster(dataset) - cluster_dataset_1 = api.create_dataset(cluster, - args={'centroid': '000000'}) - -would generate a new dataset containing the subset of instances in the cluster -associated to the centroid id ``000000``. - -Creating models -~~~~~~~~~~~~~~~ - -Once you have created a dataset you can create a model from it. If you don't -select one, the model will use the last field of the dataset as objective -field. The only required argument to create a model is a dataset id. -You can also -include in the request all the additional arguments accepted by BigML -and documented in the `Models section of the Developer's -documentation `_. - -For example, to create a model only including the first two fields and -the first 10 instances in the dataset, you can use the following -invocation: - -.. code-block:: python - - model = api.create_model(dataset, { - "name": "my model", "input_fields": ["000000", "000001"], "range": [1, 10]}) - -Again, the model is scheduled for creation, and you can retrieve its -status at any time by means of ``api.status(model)``. - -Models can also be created from lists of datasets. Just use the list of ids -as the first argument in the api call - -.. code-block:: python - - model = api.create_model([dataset1, dataset2], { - "name": "my model", "input_fields": ["000000", "000001"], "range": [1, 10]}) - -And they can also be generated as the result of a clustering procedure. When -a cluster is created, a model that predicts if a certain instance belongs to -a concrete centroid can be built by providing the cluster and centroid ids: - -.. code-block:: python - - model = api.create_model(cluster, { - "name": "model for centroid 000001", "centroid": "000001"}) - -if no centroid id is provided, the first one appearing in the cluster is used. - -Creating clusters -~~~~~~~~~~~~~~~~~ - -If your dataset has no fields showing the objective information to -predict for the training data, you can still build a cluster -that will group similar data around -some automatically chosen points (centroids). Again, the only required -argument to create a cluster is the dataset id. You can also -include in the request all the additional arguments accepted by BigML -and documented in the `Clusters section of the Developer's -documentation `_. - -Let's create a cluster from a given dataset: - -.. code-block:: python - - cluster = api.create_cluster(dataset, {"name": "my cluster", - "k": 5}) - -that will create a cluster with 5 centroids. - -Creating anomaly detectors -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If your problem is finding the anomalous data in your dataset, you can -build an anomaly detector, that will use iforest to single out the -anomalous records. Again, the only required -argument to create an anomaly detector is the dataset id. You can also -include in the request all the additional arguments accepted by BigML -and documented in the `Anomaly detectors section of the Developer's -documentation `_. - -Let's create an anomaly detector from a given dataset: - -.. code-block:: python - - anomaly = api.create_anomaly(dataset, {"name": "my anomaly"}) - -that will create an anomaly resource with a `top_anomalies` block of the -most anomalous points. - -Creating associations -~~~~~~~~~~~~~~~~~~~~~ - -To find relations between the field values you can create an association -discovery resource. The only required argument to create an association -is a dataset id. -You can also -include in the request all the additional arguments accepted by BigML -and documented in the `Association section of the Developer's -documentation `_. - -For example, to create an association only including the first two fields and -the first 10 instances in the dataset, you can use the following -invocation: - -.. code-block:: python - - association = api.create_association(dataset, { \ - "name": "my association", "input_fields": ["000000", "000001"], \ - "range": [1, 10]}) - -Again, the association is scheduled for creation, and you can retrieve its -status at any time by means of ``api.status(association)``. - -Associations can also be created from lists of datasets. Just use the -list of ids as the first argument in the api call - -.. code-block:: python - - association = api.create_association([dataset1, dataset2], { \ - "name": "my association", "input_fields": ["000000", "000001"], \ - "range": [1, 10]}) - - -Creating topic models -~~~~~~~~~~~~~~~~~~~~~ - -To find which topics do your documents refer to you can create a topic model. -The only required argument to create a topic model -is a dataset id. -You can also -include in the request all the additional arguments accepted by BigML -and documented in the `Topic Model section of the Developer's -documentation `_. - -For example, to create a topic model including exactly 32 topics -you can use the following -invocation: - -.. code-block:: python - - topic_model = api.create_topic_model(dataset, { \ - "name": "my topics", "number_of_topics": 32}) - -Again, the topic model is scheduled for creation, and you can retrieve its -status at any time by means of ``api.status(topic_model)``. - -Topic models can also be created from lists of datasets. Just use the -list of ids as the first argument in the api call - -.. code-block:: python - - topic_model = api.create_topic_model([dataset1, dataset2], { \ - "name": "my topics", "number_of_topics": 32}) - - -Creating time series -~~~~~~~~~~~~~~~~~~~~ - -To forecast the behaviour of any numeric variable that depends on its -historical records you can use a time series. -The only required argument to create a time series -is a dataset id. -You can also -include in the request all the additional arguments accepted by BigML -and documented in the `Time Series section of the Developer's -documentation `_. - -For example, to create a time series including a forecast of 10 points -for the numeric values you can use the following -invocation: - -.. code-block:: python - - time_series = api.create_time_series(dataset, { \ - "name": "my time series", "horizon": 10}) - -Again, the time series is scheduled for creation, and you can retrieve its -status at any time by means of ``api.status(time_series)``. - -Time series also be created from lists of datasets. Just use the -list of ids as the first argument in the api call - -.. code-block:: python - - time_series = api.create_time_series([dataset1, dataset2], { \ - "name": "my time series", "horizon": 10}) - - -Creating OptiML -~~~~~~~~~~~~~~~ - -To create an OptiML, the only required argument is a dataset id. -You can also -include in the request all the additional arguments accepted by BigML -and documented in the `OptiML section of the Developer's -documentation `_. - -For example, to create an OptiML which optimizes the accuracy of the model you -can use the following method - -.. code-block:: python - - optiml = api.create_optiml(dataset, { \ - "name": "my optiml", "metric": "accuracy"}) - -The OptiML is then scheduled for creation, and you can retrieve its -status at any time by means of ``api.status(optiml)``. - - -Creating Fusion -~~~~~~~~~~~~~~~ - -To create a Fusion, the only required argument is a list of models. -You can also -include in the request all the additional arguments accepted by BigML -and documented in the `Fusion section of the Developer's -documentation `_. - -For example, to create a Fusion you can use this connection method: - -.. code-block:: python - - fusion = api.create_fusion(["model/5af06df94e17277501000010", - "model/5af06df84e17277502000019", - "deepnet/5af06df84e17277502000016", - "ensemble/5af06df74e1727750100000d"], - {"name": "my fusion"}) - -The Fusion is then scheduled for creation, and you can retrieve its -status at any time by means of ``api.status(fusion)``. - -Fusions can also be created by assigning some weights to each model in the -list. In this case, the argument for the create call will be a list of -dictionaries that contain the ``id`` and ``weight`` keys: - -.. code-block:: python - - fusion = api.create_fusion([{"id": "model/5af06df94e17277501000010", - "weight": 10}, - {"id": "model/5af06df84e17277502000019", - "weight": 20}, - {"id": "deepnet/5af06df84e17277502000016", - "weight": 5}], - {"name": "my weighted fusion"}) - - -Creating predictions -~~~~~~~~~~~~~~~~~~~~ - -You can now use the model resource identifier together with some input -parameters to ask for predictions, using the ``create_prediction`` -method. You can also give the prediction a name: - -.. code-block:: python - - prediction = api.create_prediction(model, - {"sepal length": 5, - "sepal width": 2.5}, - {"name": "my prediction"}) - -To see the prediction you can use ``pprint``: - -.. code-block:: python - - api.pprint(prediction) - -Predictions can be created using any supervised model (model, ensemble, -logistic regression, linear regression, deepnet and fusion) as first argument. - -Creating centroids -~~~~~~~~~~~~~~~~~~ - -To obtain the centroid associated to new input data, you -can now use the ``create_centroid`` method. Give the method a cluster -identifier and the input data to obtain the centroid. -You can also give the centroid predicition a name: - -.. code-block:: python - - centroid = api.create_centroid(cluster, - {"pregnancies": 0, - "plasma glucose": 118, - "blood pressure": 84, - "triceps skin thickness": 47, - "insulin": 230, - "bmi": 45.8, - "diabetes pedigree": 0.551, - "age": 31, - "diabetes": "true"}, - {"name": "my centroid"}) - -Creating anomaly scores -~~~~~~~~~~~~~~~~~~~~~~~ - -To obtain the anomaly score associated to new input data, you -can now use the ``create_anomaly_score`` method. Give the method an anomaly -detector identifier and the input data to obtain the score: - -.. code-block:: python - - anomaly_score = api.create_anomaly_score(anomaly, {"src_bytes": 350}, - args={"name": "my score"}) - -Creating association sets -~~~~~~~~~~~~~~~~~~~~~~~~~ - -Using the association resource, you can obtain the consequent items associated -by its rules to your input data. These association sets can be obtained calling -the ``create_association_set`` method. The first argument is the association -ID or object and the next one is the input data. - -.. code-block:: python - - association_set = api.create_association_set( \ - association, {"genres": "Action$Adventure"}, \ - args={"name": "my association set"}) - - -Creating topic distributions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -To obtain the topic distributions associated to new input data, you -can now use the ``create_topic_distribution`` method. Give -the method a topic model identifier and the input data to obtain the score: - -.. code-block:: python - - topic_distribution = api.create_topic_distribution( \ - topic_model, - {"Message": "The bubble exploded in 2007."}, - args={"name": "my topic distribution"}) - - -Creating forecasts -~~~~~~~~~~~~~~~~~~ - -To obtain the forecast associated to a numeric variable, you -can now use the ``create_forecast`` method. Give -the method a time series identifier and the input data to obtain the forecast: - -.. code-block:: python - - forecast = api.create_forecast( \ - time_series, - {"Final": {"horizon": 10}}) - - -Creating projections -~~~~~~~~~~~~~~~~~~~~ - -You can now use the PCA resource identifier together with some input -parameters to ask for the corresponding projections, -using the ``create_projection`` -method. You can also give the projection a name: - -.. code-block:: python - - projection = api.create_projection(pca, - {"sepal length": 5, - "sepal width": 2.5}, - {"name": "my projection"}) - - - -Creating evaluations -~~~~~~~~~~~~~~~~~~~~ - -Once you have created a supervised learning model, -you can measure its perfomance by running a -dataset of test data through it and comparing its predictions to the objective -field real values. Thus, the required arguments to create an evaluation are -model id and a dataset id. You can also -include in the request all the additional arguments accepted by BigML -and documented in the `Evaluations section of the Developer's -documentation `_. - -For instance, to evaluate a previously created model using an existing dataset -you can use the following call: - -.. code-block:: python - - evaluation = api.create_evaluation(model, dataset, { - "name": "my model"}) - -Again, the evaluation is scheduled for creation and ``api.status(evaluation)`` -will show its state. - -Evaluations can also check the ensembles' performance. To evaluate an ensemble -you can do exactly what we just did for the model case, using the ensemble -object instead of the model as first argument: - -.. code-block:: python - - evaluation = api.create_evaluation(ensemble, dataset) - -Evaluations can be created using any supervised model (including time series) -as first argument. - -Creating ensembles -~~~~~~~~~~~~~~~~~~ - -To improve the performance of your predictions, you can create an ensemble -of models and combine their individual predictions. -The only required argument to create an ensemble is the dataset id: - -.. code-block:: python - - ensemble = api.create_ensemble('dataset/5143a51a37203f2cf7000972') - -BigML offers three kinds of ensembles. Two of them are known as ``Decision -Forests`` because they are built as collections of ``Decision trees`` -whose predictions -are aggregated using different combiners (``plurality``, -``confidence weighted``, ``probability weighted``) or setting a ``threshold`` -to issue the ensemble's -prediction. All ``Decision Forests`` use bagging to sample the -data used to build the underlying models. - -As an example of how to create a ``Decision Forest`` -with `20` models, you only need to provide the dataset ID that you want to -build the ensemble from and the number of models: - -.. code-block:: python - - args = {'number_of_models': 20} - ensemble = api.create_ensemble('dataset/5143a51a37203f2cf7000972', args) - -If no ``number_of_models`` is provided, the ensemble will contain 10 models. - -``Random Decision Forests`` fall -also into the ``Decision Forest`` category, -but they only use a subset of the fields chosen -at random at each split. To create this kind of ensemble, just use the -``randomize`` option: - -.. code-block:: python - - args = {'number_of_models': 20, 'randomize': True} - ensemble = api.create_ensemble('dataset/5143a51a37203f2cf7000972', args) - -The third kind of ensemble is ``Boosted Trees``. This type of ensemble uses -quite a different algorithm. The trees used in the ensemble don't have as -objective field the one you want to predict, and they don't aggregate the -underlying models' votes. Instead, the goal is adjusting the coefficients -of a function that will be used to predict. The -models' objective is, therefore, the gradient that minimizes the error -of the predicting function (when comparing its output -with the real values). The process starts with -some initial values and computes these gradients. Next step uses the previous -fields plus the last computed gradient field as -the new initial state for the next iteration. -Finally, it stops when the error is smaller than a certain threshold -or iterations reach a user-defined limit. -In classification problems, every category in the ensemble's objective field -would be associated with a subset of the ``Boosted Trees``. The objective of -each subset of trees -is adjustig the function to the probability of belonging -to this particular category. - -In order to build -an ensemble of ``Boosted Trees`` you need to provide the ``boosting`` -attributes. You can learn about the existing attributes in the `ensembles' -section of the API documentation `_, -but a typical attribute to be set would -be the maximum number of iterations: - -.. code-block:: python - - args = {'boosting': {'iterations': 20}} - ensemble = api.create_ensemble('dataset/5143a51a37203f2cf7000972', args) - - -Creating linear regressions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -For regression problems, you can choose also linear regressions to model -your data. Linear regressions expect the predicted value for the objective -field to be computable as a linear combination of the predictors. - -As the rest of models, linear regressions can be created from a dataset by -calling the corresponding create method: - -.. code-block:: python - - linear_regression = api.create_linear_regression( \ - 'dataset/5143a51a37203f2cf7000972', - {"name": "my linear regression", - "objective_field": "my_objective_field"}) - -In this example, we created a linear regression named -``my linear regression`` and set the objective field to be -``my_objective_field``. Other arguments, like ``bias``, -can also be specified as attributes in arguments dictionary at -creation time. -Particularly for categorical fields, there are three different available -`field_codings`` options (``contrast``, ``other`` or the ``dummy`` -default coding). For a more detailed description of the -``field_codings`` attribute and its syntax, please see the `Developers API -Documentation -`_. - - - -Creating logistic regressions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -For classification problems, you can choose also logistic regressions to model -your data. Logistic regressions compute a probability associated to each class -in the objective field. The probability is obtained using a logistic -function, whose argument is a linear combination of the field values. - -As the rest of models, logistic regressions can be created from a dataset by -calling the corresponding create method: - -.. code-block:: python - - logistic_regression = api.create_logistic_regression( \ - 'dataset/5143a51a37203f2cf7000972', - {"name": "my logistic regression", - "objective_field": "my_objective_field"}) - -In this example, we created a logistic regression named -``my logistic regression`` and set the objective field to be -``my_objective_field``. Other arguments, like ``bias``, ``missing_numerics`` -and ``c`` can also be specified as attributes in arguments dictionary at -creation time. -Particularly for categorical fields, there are four different available -`field_codings`` options (``dummy``, ``contrast``, ``other`` or the ``one-hot`` -default coding). For a more detailed description of the -``field_codings`` attribute and its syntax, please see the `Developers API -Documentation -`_. - - -Creating deepnets -~~~~~~~~~~~~~~~~~ - -Deepnets can also solve classification and regression problems. -Deepnets are an optimized version of Deep Neural Networks, -a class of machine-learned models inspired by the neural -circuitry of the human brain. In these classifiers, the input features -are fed to a group of "nodes" called a "layer". -Each node is essentially a function on the input that -transforms the input features into another value or collection of values. -Then the entire layer transforms an input vector into a new "intermediate" -feature vector. This new vector is fed as input to another layer of nodes. -This process continues layer by layer, until we reach the final "output" -layer of nodes, where the output is the network’s prediction: an array -of per-class probabilities for classification problems or a single, -real value for regression problems. - -Deepnets predictions compute a probability associated to each class -in the objective field for classification problems. -As the rest of models, deepnets can be created from a dataset by -calling the corresponding create method: - -.. code-block:: python - - deepnet = api.create_deepnet( \ - 'dataset/5143a51a37203f2cf7000972', - {"name": "my deepnet", - "objective_field": "my_objective_field"}) - -In this example, we created a deepnet named -``my deepnet`` and set the objective field to be -``my_objective_field``. Other arguments, like ``number_of_hidden_layers``, -``learning_rate`` -and ``missing_numerics`` can also be specified as attributes -in an arguments dictionary at -creation time. For a more detailed description of the -available attributes and its syntax, please see the `Developers API -Documentation -`_. - -Creating PCAs -~~~~~~~~~~~~~ - -In order to reduce the number of features used in the modeling steps, -you can use a PCA (Principal Component Analysis) to find out the best -combination of features that describe the variance of your data. -As the rest of models, PCAs can be created from a dataset by -calling the corresponding create method: - -.. code-block:: python - - pca = api.create_pca( \ - 'dataset/5143a51a37203f2cf7000972', - {"name": "my PCA"}) - -In this example, we created a PCA named -``my PCA``. Other arguments, like ``standardized`` -can also be specified as attributes in arguments dictionary at -creation time. -Please see the `Developers API -Documentation -`_. - - -Creating batch predictions -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -We have shown how to create predictions individually, but when the amount -of predictions to make increases, this procedure is far from optimal. In this -case, the more efficient way of predicting remotely is to create a dataset -containing the input data you want your model to predict from and to give its -id and the one of the model to the ``create_batch_prediction`` api call: - -.. code-block:: python - - batch_prediction = api.create_batch_prediction(model, dataset, { - "name": "my batch prediction", "all_fields": True, - "header": True, - "confidence": True}) - -In this example, setting ``all_fields`` to true causes the input -data to be included in the prediction output, ``header`` controls whether a -headers line is included in the file or not and ``confidence`` set to true -causes the confidence of the prediction to be appended. If none of these -arguments is given, the resulting file will contain the name of the -objective field as a header row followed by the predictions. - -As for the rest of resources, the create method will return an incomplete -object, that can be updated by issuing the corresponding -``api.get_batch_prediction`` call until it reaches a ``FINISHED`` status. -Then you can download the created predictions file using: - -.. code-block:: python - - api.download_batch_prediction('batchprediction/526fc344035d071ea3031d70', - filename='my_dir/my_predictions.csv') - -that will copy the output predictions to the local file given in -``filename``. If no ``filename`` is provided, the method returns a file-like -object that can be read as a stream: - -.. code-block:: python - - CHUNK_SIZE = 1024 - response = api.download_batch_prediction( - 'batchprediction/526fc344035d071ea3031d70') - chunk = response.read(CHUNK_SIZE) - if chunk: - print chunk - -The output of a batch prediction can also be transformed to a source object -using the ``source_from_batch_prediction`` method in the api: - -.. code-block:: python - - api.source_from_batch_prediction( - 'batchprediction/526fc344035d071ea3031d70', - args={'name': 'my_batch_prediction_source'}) - -This code will create a new source object, that can be used again as starting -point to generate datasets. - - -Creating batch centroids -~~~~~~~~~~~~~~~~~~~~~~~~ - -As described in the previous section, it is also possible to make centroids' -predictions in batch. First you create a dataset -containing the input data you want your cluster to relate to a centroid. -The ``create_batch_centroid`` call will need the id of the input -data dataset and the -cluster used to assign a centroid to each instance: - -.. code-block:: python - - batch_centroid = api.create_batch_centroid(cluster, dataset, { - "name": "my batch centroid", "all_fields": True, - "header": True}) - -Creating batch anomaly scores -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Input data can also be assigned an anomaly score in batch. You train an -anomaly detector with your training data and then build a dataset from your -input data. The ``create_batch_anomaly_score`` call will need the id -of the dataset and of the -anomaly detector to assign an anomaly score to each input data instance: - -.. code-block:: python - - batch_anomaly_score = api.create_batch_anomaly_score(anomaly, dataset, { - "name": "my batch anomaly score", "all_fields": True, - "header": True}) - -Creating batch topic distributions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Input data can also be assigned a topic distribution in batch. You train a -topic model with your training data and then build a dataset from your -input data. The ``create_batch_topic_distribution`` call will need the id -of the dataset and of the -topic model to assign a topic distribution to each input data instance: - -.. code-block:: python - - batch_topic_distribution = api.create_batch_topic_distribution( \ - topic_model, dataset, { - "name": "my batch topic distribution", "all_fields": True, - "header": True}) - -Creating batch projections -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Input data can also be assigned a projection in batch. You train a -PCA with your training data and then build a dataset from your -input data. The ``create_batch_projection`` call will need the id -of the input data dataset and of the -PCA to compute the projection that corresponds to each input data instance: - -.. code-block:: python - - batch_projection = api.create_batch_projection( \ - pca, dataset, { - "name": "my batch pca", "all_fields": True, - "header": True}) - - -Reading Resources ------------------ - -When retrieved individually, resources are returned as a dictionary -identical to the one you get when you create a new resource. However, -the status code will be ``bigml.api.HTTP_OK`` if the resource can be -retrieved without problems, or one of the HTTP standard error codes -otherwise. - -Listing Resources ------------------ - -You can list resources with the appropriate api method: - -.. code-block:: python - - api.list_sources() - api.list_datasets() - api.list_models() - api.list_predictions() - api.list_evaluations() - api.list_ensembles() - api.list_batch_predictions() - api.list_clusters() - api.list_centroids() - api.list_batch_centroids() - api.list_anomalies() - api.list_anomaly_scores() - api.list_batch_anomaly_scores() - api.list_projects() - api.list_samples() - api.list_correlations() - api.list_statistical_tests() - api.list_logistic_regressions() - api.list_linear_regressions() - api.list_associations() - api.list_association_sets() - api.list_topic_models() - api.list_topic_distributions() - api.list_batch_topic_distributions() - api.list_time_series() - api.list_deepnets() - api.list_fusions() - api.list_pcas() - api.list_projections() - api.list_batch_projections() - api.list_forecasts() - api.list_scripts() - api.list_libraries() - api.list_executions() - -you will receive a dictionary with the following keys: - -- **code**: If the request is successful you will get a - ``bigml.api.HTTP_OK`` (200) status code. Otherwise, it will be one of - the standard HTTP error codes. See `BigML documentation on status - codes `_ for more info. -- **meta**: A dictionary including the following keys that can help you - paginate listings: - - - **previous**: Path to get the previous page or ``None`` if there - is no previous page. - - **next**: Path to get the next page or ``None`` if there is no - next page. - - **offset**: How far off from the first entry in the resources is - the first one listed in the resources key. - - **limit**: Maximum number of resources that you will get listed in - the resources key. - - **total\_count**: The total number of resources in BigML. - -- **objects**: A list of resources as returned by BigML. -- **error**: If an error occurs and the resource cannot be created, it - will contain an additional code and a description of the error. In - this case, **meta**, and **resources** will be ``None``. - -Filtering Resources -~~~~~~~~~~~~~~~~~~~ - -You can filter resources in listings using the syntax and fields labeled -as *filterable* in the `BigML -documentation `_ for each resource. - -A few examples: - -Ids of the first 5 sources created before April 1st, 2012 -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -:: - - [source['resource'] for source in - api.list_sources("limit=5;created__lt=2012-04-1")['objects']] - -Name of the first 10 datasets bigger than 1MB -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -:: - - [dataset['name'] for dataset in - api.list_datasets("limit=10;size__gt=1048576")['objects']] - -Name of models with more than 5 fields (columns) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -:: - - [model['name'] for model in api.list_models("columns__gt=5")['objects']] - -Ids of predictions whose model has not been deleted -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -:: - - [prediction['resource'] for prediction in - api.list_predictions("model_status=true")['objects']] - -Ordering Resources -~~~~~~~~~~~~~~~~~~ - -You can order resources in listings using the syntax and fields labeled -as *sortable* in the `BigML -documentation `_ for each resource. - -A few examples: - -Name of sources ordered by size -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -:: - - [source['name'] for source in api.list_sources("order_by=size")['objects']] - -Number of instances in datasets created before April 1st, 2012 ordered by size -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -:: - - [dataset['rows'] for dataset in - api.list_datasets("created__lt=2012-04-1;order_by=size")['objects']] - -Model ids ordered by number of predictions (in descending order). -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -:: - - [model['resource'] for model in - api.list_models("order_by=-number_of_predictions")['objects']] - -Name of predictions ordered by name. -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -:: - - [prediction['name'] for prediction in - api.list_predictions("order_by=name")['objects']] - -Updating Resources ------------------- - -When you update a resource, it is returned in a dictionary exactly like -the one you get when you create a new one. However the status code will -be ``bigml.api.HTTP_ACCEPTED`` if the resource can be updated without -problems or one of the HTTP standard error codes otherwise. - -.. code-block:: python - - api.update_source(source, {"name": "new name"}) - api.update_dataset(dataset, {"name": "new name"}) - api.update_model(model, {"name": "new name"}) - api.update_prediction(prediction, {"name": "new name"}) - api.update_evaluation(evaluation, {"name": "new name"}) - api.update_ensemble(ensemble, {"name": "new name"}) - api.update_batch_prediction(batch_prediction, {"name": "new name"}) - api.update_cluster(cluster, {"name": "new name"}) - api.update_centroid(centroid, {"name": "new name"}) - api.update_batch_centroid(batch_centroid, {"name": "new name"}) - api.update_anomaly(anomaly, {"name": "new name"}) - api.update_anomaly_score(anomaly_score, {"name": "new name"}) - api.update_batch_anomaly_score(batch_anomaly_score, {"name": "new name"}) - api.update_project(project, {"name": "new name"}) - api.update_correlation(correlation, {"name": "new name"}) - api.update_statistical_test(statistical_test, {"name": "new name"}) - api.update_logistic_regression(logistic_regression, {"name": "new name"}) - api.update_linear_regression(linear_regression, {"name": "new name"}) - api.update_association(association, {"name": "new name"}) - api.update_association_set(association_set, {"name": "new name"}) - api.update_topic_model(topic_model, {"name": "new name"}) - api.update_topic_distribution(topic_distribution, {"name": "new name"}) - api.update_batch_topic_distribution(\ - batch_topic_distribution, {"name": "new name"}) - api.update_time_series(\ - time_series, {"name": "new name"}) - api.update_forecast(\ - forecast, {"name": "new name"}) - api.update_deepnet(deepnet, {"name": "new name"}) - api.update_fusion(fusion, {"name": "new name"}) - api.update_pca(pca, {"name": "new name"}) - api.update_projection(projection, {"name": "new name"}) - api.update_batch_projection(batch_projection, {"name": "new name"}) - api.update_script(script, {"name": "new name"}) - api.update_library(library, {"name": "new name"}) - api.update_execution(execution, {"name": "new name"}) - -Updates can change resource general properties, such as the ``name`` or -``description`` attributes of a dataset, or specific properties, like -the ``missing tokens`` (strings considered as missing values). As an example, -let's say that your source has a certain field whose contents are -numeric integers. BigML will assign a numeric type to the field, but you -might want it to be used as a categorical field. You could change -its type to ``categorical`` by calling: - -.. code-block:: python - - api.update_source(source, \ - {"fields": {"000001": {"optype": "categorical"}}}) - -where ``000001`` is the field id that corresponds to the updated field. - -Another usually needed update is changing a fields' ``non-preferred`` -attribute, -so that it can be used in the modeling process: - - -.. code-block:: python - - api.update_dataset(dataset, {"fields": {"000001": {"preferred": True}}}) - -where you would be setting as ``preferred`` the field whose id is ``000001``. - -You may also want to change the name of one of the clusters found in your -clustering: - - -.. code-block:: python - - api.update_cluster(cluster, \ - {"clusters": {"000001": {"name": "my cluster"}}}) - -which is changing the name of the cluster whose centroid id is ``000001`` to -``my_cluster``. Or, similarly, changing the name of one detected topic: - - -.. code-block:: python - - api.update_topic_model(topic_model, \ - {"topics": {"000001": {"name": "my topic"}}}) - - -You will find detailed information about -the updatable attributes of each resource in -`BigML developer's documentation `_. - -Deleting Resources ------------------- - -Resources can be deleted individually using the corresponding method for -each type of resource. - -.. code-block:: python - - api.delete_source(source) - api.delete_dataset(dataset) - api.delete_model(model) - api.delete_prediction(prediction) - api.delete_evaluation(evaluation) - api.delete_ensemble(ensemble) - api.delete_batch_prediction(batch_prediction) - api.delete_cluster(cluster) - api.delete_centroid(centroid) - api.delete_batch_centroid(batch_centroid) - api.delete_anomaly(anomaly) - api.delete_anomaly_score(anomaly_score) - api.delete_batch_anomaly_score(batch_anomaly_score) - api.delete_sample(sample) - api.delete_correlation(correlation) - api.delete_statistical_test(statistical_test) - api.delete_logistic_regression(logistic_regression) - api.delete_linear_regression(linear_regression) - api.delete_association(association) - api.delete_association_set(association_set) - api.delete_topic_model(topic_model) - api.delete_topic_distribution(topic_distribution) - api.delete_batch_topic_distribution(batch_topic_distribution) - api.delete_time_series(time_series) - api.delete_forecast(forecast) - api.delete_fusion(fusion) - api.delete_pca(pca) - api.delete_deepnet(deepnet) - api.delete_projection(projection) - api.delete_batch_projection(batch_projection) - api.delete_project(project) - api.delete_script(script) - api.delete_library(library) - api.delete_execution(execution) - -Each of the calls above will return a dictionary with the following -keys: - -- **code** If the request is successful, the code will be a - ``bigml.api.HTTP_NO_CONTENT`` (204) status code. Otherwise, it wil be - one of the standard HTTP error codes. See the `documentation on - status codes `_ for more - info. -- **error** If the request does not succeed, it will contain a - dictionary with an error code and a message. It will be ``None`` - otherwise. - -Public and shared resources ---------------------------- - -The previous examples use resources that were created by the same user -that asks for their retrieval or modification. If a user wants to share one -of her resources, she can make them public or share them. Declaring a resource -public means that anyone can see the resource. This can be applied to datasets -and models. To turn a dataset public, just update its ``private`` property: - -.. code-block:: python - - api.update_dataset('dataset/5143a51a37203f2cf7000972', {'private': false}) - -and any user will be able to download it using its id prepended by ``public``: - -.. code-block:: python - - api.get_dataset('public/dataset/5143a51a37203f2cf7000972') - -In the models' case, you can also choose if you want the model to be fully -downloadable or just accesible to make predictions. This is controlled with the -``white_box`` property. If you want to publish your model completely, just -use: - -.. code-block:: python - - api.update_model('model/5143a51a37203f2cf7000956', {'private': false, - 'white_box': true}) - -Both public models and datasets, will be openly accessible for anyone, -registered or not, from the web -gallery. - -Still, you may want to share your models with other users, but without making -them public for everyone. This can be achieved by setting the ``shared`` -property: - -.. code-block:: python - - api.update_model('model/5143a51a37203f2cf7000956', {'shared': true}) - -Shared models can be accessed using their share hash (propery ``shared_hash`` -in the original model): - -.. code-block:: python - - api.get_model('shared/model/d53iw39euTdjsgesj7382ufhwnD') - -or by using their original id with the creator user as username and a specific -sharing api_key you will find as property ``sharing_api_key`` in the updated -model: - -.. code-block:: python - - api.get_model('model/5143a51a37203f2cf7000956', shared_username='creator', - shared_api_key='c972018dc5f2789e65c74ba3170fda31d02e00c3') - -Only users with the share link or credentials information will be able to -access your shared models. - - -.. _local_resources: - -Local Resources ---------------- - -All the resources in BigML can be downloaded and used locally with no -connection whatsoever to BigML's servers. This is specially important -for all Supervised and Unsupervised models, that can be used to generate -predictions in any programmable device. The next sections describe how to -do that for each type of resource, but as a general rule, resources can be -exported to a JSON file in your file system using the ``export`` method. - -.. code-block:: python - - api.export('model/5143a51a37203f2cf7000956', - 'filename': 'my_dir/my_model.json') - -The contents of the generated file can be used just as the remote model -to generate predictions. As you'll see in next section, the local ``Model`` -object can be instantiated by giving the path to this file as first argument: - -.. code-block:: python - - from bigml.model import Model - local_model = Model("my_dir/my_model.json") - local_model.predict({"petal length": 3, "petal width": 1}) - Iris-versicolor - -These bindings define a particular class for each type of Machine Learning -model that is able to interpret the corresponding JSON and create -the local predictions. The classes can be instantiated using: - -- The ID of the resource: In this case, the class looks for the JSON - information of the resource first locally (expecting to find a file - in the local storage directory --``./storage`` by default -- - whose name is the ID of the model after replacing ``/`` by ``_``) - and also remotely if absent. - -.. code-block:: python - - from bigml.model import Model - from bigml.api import BigML - - local_model = Model('model/502fdbff15526876610002615') - -- A dictionary containing the resource information. In this case, the class - checks that this information belongs to a finished resource and - contains the attributes needed to create predictions, like the fields - structure. If any of these attributes is absent, retrieves the ID of the - model and tries to download the correct JSON from the API to store it - locally for further use. - - -.. code-block:: python - - from bigml.anomaly import Anomaly - from bigml.api import BigML - api = BigML() - anomaly = api.get_anomaly('anomaly/502fdbff15526876610002615', - query_string='only_model=true;limit=-1') - - local_anomaly = Anomaly(anomaly) - -- A path to the file that contains the JSON information for the resource. - In this case, the - file is read and the same checks mentioned above are done. If any of these - checks fails, it tries to retrieve the correct JSON from the API to store - it locally for further use. - -.. code-block:: python - - from bigml.logistic import LogisticRegression - local_logistic_regression = LogisticRegression('./my_logistic.json') - -Internally, these classes need a connection object (``api = BigML()``) to: - -- Know the local storage in your file system. -- Download the JSON of the resource if the information provided is not the - full finished resource content. - -Users can provide the connection as a second argument when instantiating the -class: - -.. code-block:: python - - from bigml.cluster import Cluster - from bigml.api import BigML - - local_cluster = Cluster('cluster/502fdbff15526876610002435', - api=BigML(my_username, - my_api_key - storage="my_storage")) - -If no connection is provided, a default connection will be -instantiated internally. This default connection will use ``./storage`` -as default storage directory and the credentials used to connect to -the API when needed are retrieved from the ``BIGML_USERNAME`` and -``BIGML_API_KEY`` environment variables. If no credentials are found in your -environment, any attempt to download the information will raise a condition -asking the user to set these variables. - -Ensembles and composite objects, like Fusions, need more than one resource -to be downloaded and stored locally for the class to work. In this case, -the class needs all the component models, -so providing only a local file or a dictionary containing the -JSON for the resource is not enough for the ``Ensemble`` or ``Fusion`` -objects to be fully instantiated. If you only provide that partial information, -the class will use the internal API connection the first time -to download the components. -However, using the ``api.export`` method for ensembles or fusions -will download these component models for you -and will store them in the same directory as the file used to store -the ensemble or fusion information. After that, you can -instantiate the object using the path to the file where the ensemble -or fusion information was stored. The class will look internally for the -rest of components in the same directory and find them, so no connection to -the API will be done. - -If you use a tag to label the resource, you can also ask for the last resource -that has the tag: - -.. code-block:: python - - api.export_last('my_tag', - resource_type='ensemble', - 'filename': 'my_dir/my_ensemble.json') - -and even for a resource inside a project: - -.. code-block:: python - - api.export_last('my_tag', - resource_type='dataset', - project='project/5143a51a37203f2cf7000959', - 'filename': 'my_dir/my_dataset.json') - - -Local Models ------------- - -You can instantiate a local version of a remote model. - -.. code-block:: python - - from bigml.model import Model - local_model = Model('model/502fdbff15526876610002615') - -This will retrieve the remote model information, using an implicitly built -``BigML()`` connection object (see the `Authentication <#authentication>`_ -section for more -details on how to set your credentials) and return a Model object -that will be stored in the ``./storage`` directory and -you can use to make local predictions. If you want to use a -specific connection object for the remote retrieval or a different storage -directory, you can set it as second parameter: - -.. code-block:: python - - from bigml.model import Model - from bigml.api import BigML - - local_model = Model('model/502fdbff15526876610002615', - api=BigML(my_username, - my_api_key, - storage="my_storage")) - -or even use the remote model information previously retrieved to build the -local model object: - -.. code-block:: python - - from bigml.model import Model - from bigml.api import BigML - api = BigML() - model = api.get_model('model/502fdbff15526876610002615', - query_string='only_model=true;limit=-1') - - local_model = Model(model) - -As you can see, the ``query_string`` used to retrieve the model has two parts. -They both act on the ``fields`` -information that is added to the JSON response. First -``only_model=true`` is used to restrict the fields described in the -``fields`` structure of the response to those used as -predictors in the model. Also -``limit=-1`` avoids the pagination of fields which is used by default and -includes them all at once. These details are already taken care of in the -two previous examples, where the model ID is used as argument. - -Any of these methods will return a ``Model`` object that you can use to make -local predictions, generate IF-THEN rules, Tableau rules -or a Python function that implements the model. - -You can also build a local model from a model previously retrieved and stored -in a JSON file: - -.. code-block:: python - - from bigml.model import Model - local_model = Model('./my_model.json') - - -Local Predictions ------------------ - -Once you have a local model you can use to generate predictions locally. - -.. code-block:: python - - local_model.predict({"petal length": 3, "petal width": 1}) - Iris-versicolor - -Local predictions have three clear advantages: - -- Removing the dependency from BigML to make new predictions. - -- No cost (i.e., you do not spend BigML credits). - -- Extremely low latency to generate predictions for huge volumes of data. - -The default output for local predictions is the prediction itself, but you can -also add other properties associated to the prediction, like its -confidence or probability, the distribution of values in the predicted node -(for decision tree models), and the number of instances supporting the -prediction. To obtain a -dictionary with the prediction and the available additional -properties use the ``full=True`` argument: - -.. code-block:: python - - local_model.predict({"petal length": 3, "petal width": 1}, full=True) -that will return: - -.. code-block:: python - - {'count': 47, - 'confidence': 0.92444, - 'probability': 0.9861111111111112, - 'prediction': u'Iris-versicolor', - 'distribution_unit': 'categories', - 'path': [u'petal length > 2.45', - u'petal width <= 1.75', - u'petal length <= 4.95', - u'petal width <= 1.65'], - 'distribution': [[u'Iris-versicolor', 47]]} - -Note that the ``path`` attribute for the ``proportional`` missing strategy -shows the path leading to a final unique node, that gives the prediction, or -to the first split where a missing value is found. Other optional -attributes are -``next`` which contains the field that determines the next split after -the prediction node and ``distribution`` that adds the distribution -that leads to the prediction. For regression models, ``min`` and -``max`` will add the limit values for the data that supports the -prediction. - -When your test data has missing values, you can choose between ``last -prediction`` or ``proportional`` strategy to compute the -prediction. The ``last prediction`` strategy is the one used by -default. To compute a prediction, the algorithm goes down the model's -decision tree and checks the condition it finds at each node (e.g.: -'sepal length' > 2). If the field checked is missing in your input -data you have two options: by default (``last prediction`` strategy) -the algorithm will stop and issue the last prediction it computed in -the previous node. If you chose ``proportional`` strategy instead, the -algorithm will continue to go down the tree considering both branches -from that node on. Thus, it will store a list of possible predictions -from then on, one per valid node. In this case, the final prediction -will be the majority (for categorical models) or the average (for -regressions) of values predicted by the list of predicted values. - -You can set this strategy by using the ``missing_strategy`` -argument with code ``0`` to use ``last prediction`` and ``1`` for -``proportional``. - -.. code-block:: python - - from bigml.model import LAST_PREDICTION, PROPORTIONAL - # LAST_PREDICTION = 0; PROPORTIONAL = 1 - local_model.predict({"petal length": 3, "petal width": 1}, - missing_strategy=PROPORTIONAL) - -For classification models, it is sometimes useful to obtain a -probability or confidence prediction for each possible class of the -objective field. To do this, you can use the ``predict_probability`` -and ``predict_confidence`` methods respectively. The former gives a -prediction based on the distribution of instances at the appropriate -leaf node, with a Laplace correction based on the root node -distribution. The latter returns a lower confidence bound on the leaf -node probability based on the Wilson score interval. - -Each of these methods take the ``missing_strategy`` -argument that functions as it does in ``predict``, and one additional -argument, ``compact``. If ``compact`` is ``False`` (the default), the -output of these functions is a list of maps, each with the keys -``prediction`` and ``probability`` (or ``confidence``) mapped to the -class name and its associated probability (or confidence). Note that these -methods substitute the deprecated ``multiple`` parameter in the ``predict`` -method functionallity. - -So, for example, the following: - -.. code-block:: python - - local_model.predict_probability({"petal length": 3}) - -would result in - -.. code-block:: python - - [{'prediction': u'Iris-setosa', - 'probability': 0.0033003300330033}, - {'prediction': u'Iris-versicolor', - 'probability': 0.4983498349834984}, - {'prediction': u'Iris-virginica', - 'probability': 0.4983498349834984}] - -If ``compact`` is ``True``, only the probabilities themselves are -returned, as a list in class name order. Note that, for reference, -the attribute ``Model.class_names`` contains the class names in the -appropriate ordering. - -To illustrate, the following: - -.. code-block:: python - - local_model.predict_probability({"petal length": 3}, compact=True) - -would result in - -.. code-block:: python - - [0.0033003300330033, 0.4983498349834984, 0.4983498349834984] - -The output of ``predict_confidence`` is the same, except that the -output maps are keyed with ``confidence`` instead of ``probability``. - - -For classifications, the prediction of a local model will be one of the -available categories in the objective field and an associated ``confidence`` -or ``probability`` that is used to decide which is the predicted category. -If you prefer the model predictions to be operated using any of them, you can -use the ``operating_kind`` argument in the ``predict`` method. -Here's the example -to use predictions based on ``confidence``: - -.. code-block:: python - - local_model.predict({"petal length": 3, "petal width": 1}, - {"operating_kind": "confidence"}) - -Previous versions of the bindings had additional arguments in the ``predict`` -method that were used to format the prediction attributes. The signature of -the method has been changed to accept only arguments that affect the -prediction itself, (like ``missing_strategy``, ``operating_kind`` and -``opreating_point``) and ``full`` which is a boolean that controls whether -the output is the prediction itself or a dictionary will all the available -properties associated to the prediction. Formatting can be achieved by using -the ``cast_prediction`` function: - -.. code-block:: python - - def cast_prediction(full_prediction, to=None, - confidence=False, probability=False, - path=False, distribution=False, - count=False, next=False, d_min=False, - d_max=False, median=False, - unused_fields=False): - -whose first argument is the prediction obtained with the ``full=True`` -argument, the second one defines the type of output (``None```to obtain -the prediction output only, "list" or "dict") and the rest of booleans -cause the corresponding property to be included or not. - -Operating point's predictions ------------------------------ - -In classification problems, -Models, Ensembles and Logistic Regressions can be used at different -operating points, that is, associated to particular thresholds. Each -operating point is then defined by the kind of property you use as threshold, -its value and a the class that is supposed to be predicted if the threshold -is reached. - -Let's assume you decide that you have a binary problem, with classes ``True`` -and ``False`` as possible outcomes. Imagine you want to be very sure to -predict the `True` outcome, so you don't want to predict that unless the -probability associated to it is over ``0,8``. You can achieve this with any -classification model by creating an operating point: - -.. code-block:: python - - operating_point = {"kind": "probability", - "positive_class": "True", - "threshold": 0.8}; - -to predict using this restriction, you can use the ``operating_point`` -parameter: - -.. code-block:: python - - prediction = local_model.predict(inputData, - operating_point=operating_point) - -where ``inputData`` should contain the values for which you want to predict. -Local models allow two kinds of operating points: ``probability`` and -``confidence``. For both of them, the threshold can be set to any number -in the ``[0, 1]`` range. - - -Local Clusters --------------- - -You can also instantiate a local version of a remote cluster. - -.. code-block:: python - - from bigml.cluster import Cluster - local_cluster = Cluster('cluster/502fdbff15526876610002435') - -This will retrieve the remote cluster information, using an implicitly built -``BigML()`` connection object (see the `Authentication <#authentication>`_ -section for more -details on how to set your credentials) and return a ``Cluster`` object -that will be stored in the ``./storage`` directory and -you can use to make local centroid predictions. If you want to use a -specific connection object for the remote retrieval or a different storage -directory, you can set it as second -parameter: - -.. code-block:: python - - from bigml.cluster import Cluster - from bigml.api import BigML - - local_cluster = Cluster('cluster/502fdbff15526876610002435', - api=BigML(my_username, - my_api_key - storage="my_storage")) - -or even use the remote cluster information previously retrieved to build the -local cluster object: - -.. code-block:: python - - from bigml.cluster import Cluster - from bigml.api import BigML - api = BigML() - cluster = api.get_cluster('cluster/502fdbff15526876610002435', - query_string='limit=-1') - - local_cluster = Cluster(cluster) - -Note that in this example we used a ``limit=-1`` query string for the cluster -retrieval. This ensures that all fields are retrieved by the get method in the -same call (unlike in the standard calls where the number of fields returned is -limited). - -Local clusters provide also methods for the significant operations that -can be done using clusters: finding the centroid assigned to a certain data -point, sorting centroids according to their distance to a data point, -summarizing -the centroids intra-distances and inter-distances and also finding the -closest points to a given one. The `Local Centroids <#local-centroids>`_ -and the -`Summary generation <#summary-generation>`_ sections will -explain these methods. - -Local Centroids ---------------- - -Using the local cluster object, you can predict the centroid associated to -an input data set: - -.. code-block:: python - - local_cluster.centroid({"pregnancies": 0, "plasma glucose": 118, - "blood pressure": 84, "triceps skin thickness": 47, - "insulin": 230, "bmi": 45.8, - "diabetes pedigree": 0.551, "age": 31, - "diabetes": "true"}) - {'distance': 0.454110207355, 'centroid_name': 'Cluster 4', - 'centroid_id': '000004'} - - -You must keep in mind, though, that to obtain a centroid prediction, input data -must have values for all the numeric fields. No missing values for the numeric -fields are allowed unless you provided a ``default_numeric_value`` in the -cluster construction configuration. If so, this value will be used to fill -the missing numeric fields. - -As in the local model predictions, producing local centroids can be done -independently of BigML servers, so no cost or connection latencies are -involved. - -Another interesting method in the cluster object is -``local_cluster.closests_in_cluster``, which given a reference data point -will provide the rest of points that fall into the same cluster sorted -in an ascending order according to their distance to this point. You can limit -the maximum number of points returned by setting the ``number_of_points`` -argument to any positive integer. - -.. code-block:: python - - local_cluster.closests_in_cluster( \ - {"pregnancies": 0, "plasma glucose": 118, - "blood pressure": 84, "triceps skin thickness": 47, - "insulin": 230, "bmi": 45.8, - "diabetes pedigree": 0.551, "age": 31, - "diabetes": "true"}, number_of_points=2) - -The response will be a dictionary with the centroid id of the cluster an -the list of closest points and their distances to the reference point. - -.. code-block:: python - - {'closest': [ \ - {'distance': 0.06912270988567025, - 'data': {'plasma glucose': '115', 'blood pressure': '70', - 'triceps skin thickness': '30', 'pregnancies': '1', - 'bmi': '34.6', 'diabetes pedigree': '0.529', - 'insulin': '96', 'age': '32', 'diabetes': 'true'}}, - {'distance': 0.10396456577958413, - 'data': {'plasma glucose': '167', 'blood pressure': '74', - 'triceps skin thickness': '17', 'pregnancies': '1', 'bmi': '23.4', - 'diabetes pedigree': '0.447', 'insulin': '144', 'age': '33', - 'diabetes': 'true'}}], - 'reference': {'age': 31, 'bmi': 45.8, 'plasma glucose': 118, - 'insulin': 230, 'blood pressure': 84, - 'pregnancies': 0, 'triceps skin thickness': 47, - 'diabetes pedigree': 0.551, 'diabetes': 'true'}, - 'centroid_id': u'000000'} - -No missing numeric values are allowed either in the reference data point. -If you want the data points to belong to a different cluster, you can -provide the ``centroid_id`` for the cluster as an additional argument. - -Other utility methods are ``local_cluster.sorted_centroids`` which given -a reference data point will provide the list of centroids sorted according -to the distance to it - -.. code-block:: python - - local_cluster.sorted_centroids( \ - {'plasma glucose': '115', 'blood pressure': '70', - 'triceps skin thickness': '30', 'pregnancies': '1', - 'bmi': '34.6', 'diabetes pedigree': '0.529', - 'insulin': '96', 'age': '32', 'diabetes': 'true'}) - {'centroids': [{'distance': 0.31656890408929705, - 'data': {u'000006': 0.34571, u'000007': 30.7619, - u'000000': 3.79592, u'000008': u'false'}, - 'centroid_id': u'000000'}, - {'distance': 0.4424198506958207, - 'data': {u'000006': 0.77087, u'000007': 45.50943, - u'000000': 5.90566, u'000008': u'true'}, - 'centroid_id': u'000001'}], - 'reference': {'age': '32', 'bmi': '34.6', 'plasma glucose': '115', - 'insulin': '96', 'blood pressure': '70', - 'pregnancies': '1', 'triceps skin thickness': '30', - 'diabetes pedigree': '0.529', 'diabetes': 'true'}} - - - -or ``points_in_cluster`` that returns the list of -data points assigned to a certain cluster, given its ``centroid_id``. - -.. code-block:: python - - centroid_id = "000000" - local_cluster.points_in_cluster(centroid_id) - - -Local Anomaly Detector ----------------------- - -You can also instantiate a local version of a remote anomaly. - -.. code-block:: python - - from bigml.anomaly import Anomaly - local_anomaly = Anomaly('anomaly/502fcbff15526876610002435') - -This will retrieve the remote anomaly detector information, using an implicitly -built ``BigML()`` connection object (see the `Authentication <#authentication>`_ -section for -more details on how to set your credentials) and return an ``Anomaly`` object -that will be stored in the ``./storage`` directory and -you can use to make local anomaly scores. If you want to use a -specific connection object for the remote retrieval or a different storage -directory, you can set it as second -parameter: - -.. code-block:: python - - from bigml.anomaly import Anomaly - from bigml.api import BigML - - local_anomaly = Anomaly('anomaly/502fcbff15526876610002435', - api=BigML(my_username, - my_api_key)) - -or even use the remote anomaly information retrieved previously to build the -local anomaly detector object: - -.. code-block:: python - - from bigml.anomaly import Anomaly - from bigml.api import BigML - api = BigML() - anomaly = api.get_anomaly('anomaly/502fcbff15526876610002435', - query_string='limit=-1') - - local_anomaly = Anomaly(anomaly) - -Note that in this example we used a ``limit=-1`` query string for the anomaly -retrieval. This ensures that all fields are retrieved by the get method in the -same call (unlike in the standard calls where the number of fields returned is -limited). - -The anomaly detector object has also the method ``anomalies_filter`` -that will build the LISP filter you would need to filter the original -dataset and create a new one excluding -the top anomalies. Setting the ``include`` parameter to True you can do the -inverse and create a dataset with only the most anomalous data points. - - -Local Anomaly Scores --------------------- - -Using the local anomaly detector object, you can predict the anomaly score -associated to an input data set: - -.. code-block:: python - - local_anomaly.anomaly_score({"src_bytes": 350}) - 0.9268527808726705 - - -As in the local model predictions, producing local anomaly scores can be done -independently of BigML servers, so no cost or connection latencies are -involved. - -Local Logistic Regression -------------------------- - -You can also instantiate a local version of a remote logistic regression. - -.. code-block:: python - - from bigml.logistic import LogisticRegression - local_log_regression = LogisticRegression( - 'logisticregression/502fdbff15526876610042435') - -This will retrieve the remote logistic regression information, -using an implicitly built -``BigML()`` connection object (see the `Authentication <#authentication>`_ -section for more -details on how to set your credentials) and return a ``LogisticRegression`` -object that will be stored in the ``./storage`` directory and -you can use to make local predictions. If you want to use a -specific connection object for the remote retrieval or a different storage -directory, you can set it as second -parameter: - -.. code-block:: python - - from bigml.logistic import LogisticRegression - from bigml.api import BigML - - local_log_regression = LogisticRegression( - 'logisticregression/502fdbff15526876610602435', - api=BigML(my_username, my_api_key, storage="my_storage")) - -You can also reuse a remote logistic regression JSON structure -as previously retrieved to build the -local logistic regression object: - -.. code-block:: python - - from bigml.logistic import LogisticRegression - from bigml.api import BigML - api = BigML() - logistic_regression = api.get_logistic_regression( - 'logisticregression/502fdbff15526876610002435', - query_string='limit=-1') - - local_log_regression = LogisticRegression(logistic_regression) - -Note that in this example we used a ``limit=-1`` query string for the -logistic regression retrieval. This ensures that all fields are -retrieved by the get method in the same call (unlike in the standard -calls where the number of fields returned is limited). - -Local Logistic Regression Predictions -------------------------------------- - -Using the local logistic regression object, you can predict the prediction for -an input data set: - -.. code-block:: python - - local_log_regression.predict({"petal length": 2, "sepal length": 1.5, - "petal width": 0.5, "sepal width": 0.7}, - full=True) - {'distribution': [ - {'category': u'Iris-virginica', 'probability': 0.5041444478857267}, - {'category': u'Iris-versicolor', 'probability': 0.46926542042788333}, - {'category': u'Iris-setosa', 'probability': 0.02659013168639014}], - 'prediction': u'Iris-virginica', 'probability': 0.5041444478857267} - -As you can see, the prediction contains the predicted category and the -associated probability. It also shows the distribution of probabilities for -all the possible categories in the objective field. If you only need the -predicted value, you can remove the ``full``argument. - -You must keep in mind, though, that to obtain a logistic regression -prediction, input data -must have values for all the numeric fields. No missing values for the numeric -fields are allowed. - -For consistency of interface with the ``Model`` class, logistic -regressions again have a ``predict_probability`` method, which takes -the same argument as ``Model.predict``: -``compact``. As stated above, missing values are not allowed, and so -there is no ``missing_strategy`` argument. - -As with local Models, if ``compact`` is ``False`` (the default), the -output is a list of maps, each with the keys ``prediction`` and -``probability`` mapped to the class name and its associated -probability. - -So, for example - -.. code-block:: python - - local_log_regression.predict_probability({"petal length": 2, "sepal length": 1.5, - "petal width": 0.5, "sepal width": 0.7}) - - [{'category': u'Iris-setosa', 'probability': 0.02659013168639014}, - {'category': u'Iris-versicolor', 'probability': 0.46926542042788333}, - {'category': u'Iris-virginica', 'probability': 0.5041444478857267}] - -If ``compact`` is ``True``, only the probabilities themselves are -returned, as a list in class name order, again, as is the case with -local Models. - -Operating point predictions are also available for local logistic regressions -and an example of it would be: - -.. code-block:: python - - operating_point = {"kind": "probability", - "positive_class": "True", - "threshold": 0.8} - local_logistic.predict(inputData, operating_point=operating_point) - - -You can check the -`Operating point's predictions <#operating-point's-predictions>`_ section -to learn about -operating points. For logistic regressions, the only available kind is -``probability``, that sets the threshold of probability to be reached for the -prediction to be the positive class. - -Local Logistic Regression -------------------------- - -You can also instantiate a local version of a remote logistic regression. - -.. code-block:: python - - from bigml.logistic import LogisticRegression - local_log_regression = LogisticRegression( - 'logisticregression/502fdbff15526876610042435') - -This will retrieve the remote logistic regression information, -using an implicitly built -``BigML()`` connection object (see the `Authentication <#authentication>`_ -section for more -details on how to set your credentials) and return a ``LogisticRegression`` -object that will be stored in the ``./storage`` directory and -you can use to make local predictions. If you want to use a -specific connection object for the remote retrieval or a different storage -directory, you can set it as second -parameter: - -.. code-block:: python - - from bigml.logistic import LogisticRegression - from bigml.api import BigML - - local_log_regression = LogisticRegression( - 'logisticregression/502fdbff15526876610602435', - api=BigML(my_username, my_api_key, storage="my_storage")) - -You can also reuse a remote logistic regression JSON structure -as previously retrieved to build the -local logistic regression object: - -.. code-block:: python - - from bigml.logistic import LogisticRegression - from bigml.api import BigML - api = BigML() - logistic_regression = api.get_logistic_regression( - 'logisticregression/502fdbff15526876610002435', - query_string='limit=-1') - - local_log_regression = LogisticRegression(logistic_regression) - -Note that in this example we used a ``limit=-1`` query string for the -logistic regression retrieval. This ensures that all fields are -retrieved by the get method in the same call (unlike in the standard -calls where the number of fields returned is limited). - -Local Linear Regression Predictions ------------------------------------ - -Using the local ``LinearRegression`` class, you can predict the prediction for -an input data set: - -.. code-block:: python - - local_linear_regression.predict({"petal length": 2, "sepal length": 1.5, - "species": "Iris-setosa", - "sepal width": 0.7}, - full=True) - {'confidence_bounds': { - 'prediction_interval': 0.43783924497784293, - 'confidence_interval': 0.2561542783257394}, - 'prediction': -0.6109005499999999, 'unused_fields': ['petal length']} - - -To obtain a linear regression prediction, input data can only have missing -values for fields that had already some missings in training data. - -The ``full=True`` in the predict method will cause the prediction to include -``confidence bounds`` when available. Some logistic regressions will not -contain such information by construction. Also, in order to compute these -bounds locally, you will need ``numpy`` and ``scipy`` in place. -As they are quite heavy libraries, they aren't automatically installed as -dependencies of these bindings. - -Local Deepnet -------------- - -You can also instantiate a local version of a remote Deepnet. - -.. code-block:: python - - from bigml.deepnet import Deepnet - local_deepnet = Deepnet( - 'deepnet/502fdbff15526876610022435') - -This will retrieve the remote deepnet information, -using an implicitly built -``BigML()`` connection object (see the `Authentication <#authentication>`_ -section for more -details on how to set your credentials) and return a ``Deepnet`` -object that will be stored in the ``./storage`` directory and -you can use to make local predictions. If you want to use a -specific connection object for the remote retrieval or a different storage -directory, you can set it as second -parameter: - -.. code-block:: python - - from bigml.deepnet import Deepnet - from bigml.api import BigML - - local_deepnet = Deepnet( - 'deepnet/502fdbff15526876610602435', - api=BigML(my_username, my_api_key, storage="my_storage")) - -You can also reuse a remote Deepnet JSON structure -as previously retrieved to build the -local Deepnet object: - -.. code-block:: python - - from bigml.deepnet import Deepnet - from bigml.api import BigML - api = BigML() - deepnet = api.get_deepnet( - 'deepnet/502fdbff15526876610002435', - query_string='limit=-1') - - local_deepnet = Deepnet(deepnet) - -Note that in this example we used a ``limit=-1`` query string for the -deepnet retrieval. This ensures that all fields are -retrieved by the get method in the same call (unlike in the standard -calls where the number of fields returned is limited). - -Local Deepnet Predictions -------------------------- - -Using the local deepnet object, you can predict the prediction for -an input data set: - -.. code-block:: python - - local_deepnet.predict({"petal length": 2, "sepal length": 1.5, - "petal width": 0.5, "sepal width": 0.7}, - full=True) - {'distribution': [ - {'category': u'Iris-virginica', 'probability': 0.5041444478857267}, - {'category': u'Iris-versicolor', 'probability': 0.46926542042788333}, - {'category': u'Iris-setosa', 'probability': 0.02659013168639014}], - 'prediction': u'Iris-virginica', 'probability': 0.5041444478857267} - -As you can see, the full prediction contains the predicted category and the -associated probability. It also shows the distribution of probabilities for -all the possible categories in the objective field. If you only need the -predicted value, you can remove the ``full`` argument. - -To be consistent with the ``Model`` class interface, deepnets -have also a ``predict_probability`` method, which takes -the same argument as ``Model.predict``: -``compact``. - -As with local Models, if ``compact`` is ``False`` (the default), the -output is a list of maps, each with the keys ``prediction`` and -``probability`` mapped to the class name and its associated -probability. - -So, for example - -.. code-block:: python - - local_deepnet.predict_probability({"petal length": 2, "sepal length": 1.5, - "petal width": 0.5, "sepal width": 0.7}) - - [{'category': u'Iris-setosa', 'probability': 0.02659013168639014}, - {'category': u'Iris-versicolor', 'probability': 0.46926542042788333}, - {'category': u'Iris-virginica', 'probability': 0.5041444478857267}] - -If ``compact`` is ``True``, only the probabilities themselves are -returned, as a list in class name order, again, as is the case with -local Models. - -Operating point predictions are also available for local deepnets and an -example of it would be: - -.. code-block:: python - - operating_point = {"kind": "probability", - "positive_class": "True", - "threshold": 0.8}; - prediction = local_deepnet.predict(inputData, - operating_point=operating_point) - -Local Fusion ------------- - -You can also instantiate a local version of a remote Fusion. - -.. code-block:: python - - from bigml.fusion import Fusion - local_fusion = Fusion( - 'fusion/502fdbff15526876610022438') - -This will retrieve the remote fusion information, -using an implicitly built -``BigML()`` connection object (see the `Authentication <#authentication>`_ -section for more -details on how to set your credentials) and return a ``Fusion`` -object that will be stored in the ``./storage`` directory and -you can use to make local predictions. If you want to use a -specific connection object for the remote retrieval or a different storage -directory, you can set it as second -parameter: - -.. code-block:: python - - from bigml.fusion import Fusion - from bigml.api import BigML - - local_fusion = Fusion( - 'fusion/502fdbff15526876610602435', - api=BigML(my_username, my_api_key, storage="my_storage")) - -You can also reuse a remote Fusion JSON structure -as previously retrieved to build the -local Fusion object: - -.. code-block:: python - - from bigml.fusion import Fusion - from bigml.api import BigML - api = BigML() - fusion = api.get_fusion( - 'fusion/502fdbff15526876610002435', - query_string='limit=-1') - - local_fusion = Fusion(fusion) - -Note that in this example we used a ``limit=-1`` query string for the -fusion retrieval. This ensures that all fields are -retrieved by the get method in the same call (unlike in the standard -calls where the number of fields returned is limited). - -Local Fusion Predictions -------------------------- - -Using the local fusion object, you can predict the prediction for -an input data set: - -.. code-block:: python - - local_fusion.predict({"petal length": 2, "sepal length": 1.5, - "petal width": 0.5, "sepal width": 0.7}, - full=True) - {'prediction': u'Iris-setosa', 'probability': 0.45224} - - -As you can see, the full prediction contains the predicted category and the -associated probability. If you only need the -predicted value, you can remove the ``full`` argument. - -To be consistent with the ``Model`` class interface, fusions -have also a ``predict_probability`` method, which takes -the same argument as ``Model.predict``: -``compact``. - -As with local Models, if ``compact`` is ``False`` (the default), the -output is a list of maps, each with the keys ``prediction`` and -``probability`` mapped to the class name and its associated -probability. - -So, for example - -.. code-block:: python - - local_fusion.predict_probability({"petal length": 2, "sepal length": 1.5, - "petal width": 0.5, "sepal width": 0.7}) - - [{'category': u'Iris-setosa', 'probability': 0.45224}, - {'category': u'Iris-versicolor', 'probability': 0.2854}, - {'category': u'Iris-virginica', 'probability': 0.26236}] - - -If ``compact`` is ``True``, only the probabilities themselves are -returned, as a list in class name order, again, as is the case with -local Models. - -Operating point predictions are also available with probability as threshold -for local fusions and an -example of it would be: - -.. code-block:: python - - operating_point = {"kind": "probability", - "positive_class": "True", - "threshold": 0.8}; - prediction = local_fusion.predict(inputData, - operating_point=operating_point) - -Local Association ------------------ - -You can also instantiate a local version of a remote association resource. - -.. code-block:: python - - from bigml.association import Association - local_association = Association('association/502fdcff15526876610002435') - -This will retrieve the remote association information, using an implicitly -built -``BigML()`` connection object (see the `Authentication <#authentication>`_ -section for more -details on how to set your credentials) and return an ``Association`` object -that will be stored in the ``./storage`` directory and -you can use to extract the rules found in the original dataset. -If you want to use a -specific connection object for the remote retrieval or a different storage -directory, you can set it as second -parameter: - -.. code-block:: python - - from bigml.association import Association - from bigml.api import BigML - - local_association = Association('association/502fdcff15526876610002435', - api=BigML(my_username, - my_api_key - storage="my_storage")) - -or even use the remote association information retrieved previously -to build the -local association object: - -.. code-block:: python - - from bigml.association import Association - from bigml.api import BigML - api = BigML() - association = api.get_association('association/502fdcff15526876610002435', - query_string='limit=-1') - - local_association = Association(association) - -Note that in this example we used a ``limit=-1`` query string for the -association retrieval. This ensures that all fields are retrieved by the get -method in the -same call (unlike in the standard calls where the number of fields returned is -limited). - -The created ``Association`` object has some methods to help retrieving the -association rules found in the original data. The ``get_rules`` method will -return the association rules. Arguments can be set to filter the rules -returned according to its ``leverage``, ``strength``, ``support``, ``p_value``, -a list of items involved in the rule or a user-given filter function. - -.. code-block:: python - - from bigml.association import Association - local_association = Association('association/502fdcff15526876610002435') - local_association.get_rules(item_list=["Edible"], min_p_value=0.3) - -In this example, the only rules that will be returned by the ``get_rules`` -method will be the ones that mention ``Edible`` and their ``p_value`` -is greater or equal to ``0.3``. - -The rules can also be stored in a CSV file using ``rules_CSV``: - - -.. code-block:: python - - from bigml.association import Association - local_association = Association('association/502fdcff15526876610002435') - local_association.rules_CSV(file_name='/tmp/my_rules.csv', - min_strength=0.1) - -This example will store the rules whose strength is bigger or equal to 0.1 in -the ``/tmp/my_rules.csv`` file. - -You can also obtain the list of ``items`` parsed in the dataset using the -``get_items`` method. You can also filter the results by field name, by -item names and by a user-given function: - -.. code-block:: python - - from bigml.association import Association - local_association = Association('association/502fdcff15526876610002435') - local_association.get_items(field="Cap Color", - names=["Brown cap", "White cap", "Yellow cap"]) - -This will recover the ``Item`` objects found in the ``Cap Color`` field for -the names in the list, with their properties as described in the -`developers section `_ - - -Local Association Sets ----------------------- - -Using the local association object, you can predict the association sets -related to an input data set: - -.. code-block:: python - - local_association.association_set( \ - {"gender": "Female", "genres": "Adventure$Action", \ - "timestamp": 993906291, "occupation": "K-12 student", - "zipcode": 59583, "rating": 3}) - [{'item': {'complement': False, - 'count': 70, - 'field_id': u'000002', - 'name': u'Under 18'}, - 'rules': ['000000'], - 'score': 0.0969181441561211}, - {'item': {'complement': False, - 'count': 216, - 'field_id': u'000007', - 'name': u'Drama'}, - 'score': 0.025050115102862636}, - {'item': {'complement': False, - 'count': 108, - 'field_id': u'000007', - 'name': u'Sci-Fi'}, - 'rules': ['000003'], - 'score': 0.02384578264599424}, - {'item': {'complement': False, - 'count': 40, - 'field_id': u'000002', - 'name': u'56+'}, - 'rules': ['000008', - '000020'], - 'score': 0.021845366022721312}, - {'item': {'complement': False, - 'count': 66, - 'field_id': u'000002', - 'name': u'45-49'}, - 'rules': ['00000e'], - 'score': 0.019657155185835006}] - -As in the local model predictions, producing local association sets can be done -independently of BigML servers, so no cost or connection latencies are -involved. - -Local Topic Model ------------------ - -You can also instantiate a local version of a remote topic model. - -.. code-block:: python - - from bigml.topicmodel import TopicModel - local_topic_model = TopicModel( - 'topicmodel/502fdbcf15526876210042435') - -This will retrieve the remote topic model information, -using an implicitly built -``BigML()`` connection object (see the `Authentication <#authentication>`_ -section for more -details on how to set your credentials) and return a ``TopicModel`` -object that will be stored in the ``./storage`` directory and -you can use to obtain local topic distributions. -If you want to use a -specific connection object for the remote retrieval or a different storage -directory, you can set it as second -parameter: - -.. code-block:: python - - from bigml.topicmodel import TopicModel - from bigml.api import BigML - - local_topic_model = TopicModel( - 'topicmodel/502fdbcf15526876210042435', - api=BigML(my_username, my_api_key, storage="my_storage")) - -You can also reuse a remote topic model JSON structure -as previously retrieved to build the -local topic model object: - -.. code-block:: python - - from bigml.topicmodel import TopicModel - from bigml.api import BigML - api = BigML() - topic_model = api.get_topic_model( - 'topicmodel/502fdbcf15526876210042435', - query_string='limit=-1') - - local_topic_model = TopicModel(topic_model) - -Note that in this example we used a ``limit=-1`` query string for the topic -model retrieval. This ensures that all fields are retrieved by the get -method in the -same call (unlike in the standard calls where the number of fields returned is -limited). - -Local Topic Distributions -------------------------- - -Using the local topic model object, you can predict the local topic -distribution for -an input data set: - -.. code-block:: python - - local_topic_model.distribution({"Message": "Our mobile phone is free"}) - [ { 'name': u'Topic 00', 'probability': 0.002627154266498529}, - { 'name': u'Topic 01', 'probability': 0.003257671290458176}, - { 'name': u'Topic 02', 'probability': 0.002627154266498529}, - { 'name': u'Topic 03', 'probability': 0.1968263976460698}, - { 'name': u'Topic 04', 'probability': 0.002627154266498529}, - { 'name': u'Topic 05', 'probability': 0.002627154266498529}, - { 'name': u'Topic 06', 'probability': 0.13692728036990331}, - { 'name': u'Topic 07', 'probability': 0.6419714165615805}, - { 'name': u'Topic 08', 'probability': 0.002627154266498529}, - { 'name': u'Topic 09', 'probability': 0.002627154266498529}, - { 'name': u'Topic 10', 'probability': 0.002627154266498529}, - { 'name': u'Topic 11', 'probability': 0.002627154266498529}] - - -As you can see, the topic distribution contains the name of the -possible topics in the model and the -associated probabilities. - -Local Time Series ------------------ - -You can also instantiate a local version of a remote time series. - -.. code-block:: python - - from bigml.timeseries import TimeSeries - local_time_series = TimeSeries( - 'timeseries/502fdbcf15526876210042435') - -This will create a series of models from -the remote time series information, -using an implicitly built -``BigML()`` connection object (see the `Authentication <#authentication>`_ -section for more -details on how to set your credentials) and return a ``TimeSeries`` -object that will be stored in the ``./storage`` directory and -you can use to obtain local forecasts. -If you want to use a -specific connection object for the remote retrieval or a different storage -directory, you can set it as second -parameter: - -.. code-block:: python - - from bigml.timeseries import TimeSeries - from bigml.api import BigML - - local_time_series = TimeSeries( \ - 'timeseries/502fdbcf15526876210042435', - api=BigML(my_username, my_api_key, storage="my_storage")) - -You can also reuse a remote time series JSON structure -as previously retrieved to build the -local time series object: - -.. code-block:: python - - from bigml.timeseries import TimeSeries - from bigml.api import BigML - api = BigML() - time_series = api.get_time_series( \ - 'timeseries/502fdbcf15526876210042435', - query_string='limit=-1') - - local_time_series = TimeSeries(time_series) - -Note that in this example we used a ``limit=-1`` query string for the time -series retrieval. This ensures that all fields are retrieved by the get -method in the -same call (unlike in the standard calls where the number of fields returned is -limited). - - -Local Forecasts ---------------- - -Using the local time series object, you can forecast any of the objective -field values: - -.. code-block:: python - - local_time_series.forecast({"Final": {"horizon": 5}, "Assignment": { \ - "horizon": 10, "ets_models": {"criterion": "aic", "limit": 2}}}) - {u'000005': [ - {'point_forecast': [68.53181, 68.53181, 68.53181, 68.53181, 68.53181], - 'model': u'A,N,N'}], - u'000001': [{'point_forecast': [54.776650000000004, 90.00943000000001, - 83.59285000000001, 85.72403000000001, - 72.87196, 93.85872, 84.80786, 84.65522, - 92.52545, 88.78403], - 'model': u'A,N,A'}, - {'point_forecast': [55.882820120000005, 90.5255466567616, - 83.44908577909621, 87.64524353046498, - 74.32914583152592, 95.12372848262932, - 86.69298716626228, 85.31630744944385, - 93.62385478607113, 89.06905451921818], - 'model': u'A,Ad,A'}]} - - -As you can see, the forecast contains the ID of the forecasted field, the -computed points and the name of the models meeting the criterion. -For more details about the available parameters, please check the `API -documentation `_. - - -Local PCAs ----------- - -The `PCA` class will create a local version of a remote PCA. - -.. code-block:: python - - from bigml.pca import PCA - local_pca = PCA( - 'pca/502fdbcf15526876210042435') - - -This will create an object that stores the remote information that defines -the PCA, needed to generate -projections to the new dimensionally reduced components. The remote resource -is automatically downloaded the first time the the PCA is instantiated by -using an implicitly built -``BigML()`` connection object (see the -`Authentication <#authentication>`_ section for more -details on how to set your credentials). The JSON that contains this -information is stored in a ``./storage`` directory, which is the default -choice. If you want to use a -specific connection object to define the credentials for the authentication -in BigML or the directory where the JSON information is stored, -you can set it as the second parameter: - -.. code-block:: python - - from bigml.pca import PCA - from bigml.api import BigML - - local_pca = PCA( \ - 'timeseries/502fdbcf15526876210042435', - api=BigML(my_username, my_api_key, storage="my_storage")) - -You can also reuse a remote PCA JSON structure -as previously retrieved to build the -local PCA object: - -.. code-block:: python - - from bigml.pca import PCA - from bigml.api import BigML - api = BigML() - time_series = api.get_pca( \ - 'pca/502fdbcf15526876210042435', - query_string='limit=-1') - - local_pca = PCA(pca) - -Note that in this example we used a ``limit=-1`` query string for the PCA -retrieval. This ensures that all fields are retrieved by the get -method in the -same call (unlike in the standard calls where the number of fields returned is -limited). - - -Local Projections ------------------ - -Using the local PCA object, you can compute the projection of -an input dataset into the new components: - -.. code-block:: python - - local_pca.projection({"species": "Iris-versicolor"}) - [6.03852, 8.35456, 5.04432, 0.75338, 0.06787, 0.03018] - -You can use the ``max_components`` and ``variance_threshold`` arguments -to limit the number of components generated. You can also use the ``full`` -argument to produce a dictionary whose keys are the names of the generated -components. - -.. code-block:: python - - local_pca.projection({"species": "Iris-versicolor"}, full=yes) - {'PCA1': 6.03852, 'PCA2': 8.35456, 'PCA3': 5.04432, 'PCA4': 0.75338, - 'PCA5': 0.06787, 'PCA6': 0.03018} - -As in the local model predictions, producing local projections can be done -independently of BigML servers, so no cost or connection latencies are -involved. - - -Local Forecasts ---------------- - -Using the local time series object, you can forecast any of the objective -field values: - -.. code-block:: python - - local_time_series.forecast({"Final": {"horizon": 5}, "Assignment": { \ - "horizon": 10, "ets_models": {"criterion": "aic", "limit": 2}}}) - {u'000005': [ - {'point_forecast': [68.53181, 68.53181, 68.53181, 68.53181, 68.53181], - 'model': u'A,N,N'}], - u'000001': [{'point_forecast': [54.776650000000004, 90.00943000000001, - 83.59285000000001, 85.72403000000001, - 72.87196, 93.85872, 84.80786, 84.65522, - 92.52545, 88.78403], - 'model': u'A,N,A'}, - {'point_forecast': [55.882820120000005, 90.5255466567616, - 83.44908577909621, 87.64524353046498, - 74.32914583152592, 95.12372848262932, - 86.69298716626228, 85.31630744944385, - 93.62385478607113, 89.06905451921818], - 'model': u'A,Ad,A'}]} - - -As you can see, the forecast contains the ID of the forecasted field, the -computed points and the name of the models meeting the criterion. -For more details about the available parameters, please check the `API -documentation `_. - - -Multi Models ------------- - -Multi Models use a numbers of BigML remote models to build a local version -that can be used to generate predictions locally. Predictions are generated -combining the outputs of each model. - -.. code-block:: python - - from bigml.api import BigML - from bigml.multimodel import MultiModel - - api = BigML() - - model = MultiModel([api.get_model(model['resource']) for model in - api.list_models(query_string="tags__in=my_tag") - ['objects']]) - - model.predict({"petal length": 3, "petal width": 1}) - -This will create a multi model using all the models that have been previously -tagged with ``my_tag`` and predict by combining each model's prediction. -The combination method used by default is ``plurality`` for categorical -predictions and mean value for numerical ones. You can also use ``confidence -weighted``: - -.. code-block:: python - - model.predict({"petal length": 3, "petal width": 1}, method=1) - -that will weight each vote using the confidence/error given by the model -to each prediction, or even ``probability weighted``: - -.. code-block:: python - - model.predict({"petal length": 3, "petal width": 1}, method=2) - -that weights each vote by using the probability associated to the training -distribution at the prediction node. - -There's also a ``threshold`` method that uses an additional set of options: -threshold and category. The category is predicted if and only if -the number of predictions for that category is at least the threshold value. -Otherwise, the prediction is plurality for the rest of predicted values. - -An example of ``threshold`` combination method would be: - -.. code-block:: python - - model.predict({'petal length': 0.9, 'petal width': 3.0}, method=3, - options={'threshold': 3, 'category': 'Iris-virginica'}) - - -When making predictions on a test set with a large number of models, -``batch_predict`` can be useful to log each model's predictions in a -separated file. It expects a list of input data values and the directory path -to save the prediction files in. - -.. code-block:: python - - model.batch_predict([{"petal length": 3, "petal width": 1}, - {"petal length": 1, "petal width": 5.1}], - "data/predictions") - -The predictions generated for each model will be stored in an output -file in `data/predictions` using the syntax -`model_[id of the model]__predictions.csv`. For instance, when using -`model/50c0de043b563519830001c2` to predict, the output file name will be -`model_50c0de043b563519830001c2__predictions.csv`. An additional feature is -that using ``reuse=True`` as argument will force the function to skip the -creation of the file if it already exists. This can be -helpful when using repeatedly a bunch of models on the same test set. - -.. code-block:: python - - model.batch_predict([{"petal length": 3, "petal width": 1}, - {"petal length": 1, "petal width": 5.1}], - "data/predictions", reuse=True) - -Prediction files can be subsequently retrieved and converted into a votes list -using ``batch_votes``: - -.. code-block:: python - - model.batch_votes("data/predictions") - -which will return a list of MultiVote objects. Each MultiVote contains a list -of predictions (e.g. ``[{'prediction': u'Iris-versicolor', 'confidence': 0.34, -'order': 0}, {'prediction': u'Iris-setosa', 'confidence': 0.25, -'order': 1}]``). -These votes can be further combined to issue a final -prediction for each input data element using the method ``combine`` - -.. code-block:: python - - for multivote in model.batch_votes("data/predictions"): - prediction = multivote.combine() - -Again, the default method of combination is ``plurality`` for categorical -predictions and mean value for numerical ones. You can also use ``confidence -weighted``: - -.. code-block:: python - - prediction = multivote.combine(1) - -or ``probability weighted``: - -.. code-block:: python - - prediction = multivote.combine(2) - -You can also get a confidence measure for the combined prediction: - -.. code-block:: python - - prediction = multivolte.combine(0, with_confidence=True) - -For classification, the confidence associated to the combined prediction -is derived by first selecting the model's predictions that voted for the -resulting prediction and computing the weighted average of their individual -confidence. Nevertheless, when ``probability weighted`` is used, -the confidence is obtained by using each model's distribution at the -prediction node to build a probability distribution and combining them. -The confidence is then computed as the wilson score interval of the -combined distribution (using as total number of instances the sum of all -the model's distributions original instances at the prediction node) - -In regression, all the models predictions' confidences contribute -to the weighted average confidence. - - -Local Ensembles ---------------- - -Remote ensembles can also be used locally through the ``Ensemble`` -class. The simplest way to access an existing ensemble and using it to -predict locally is: - -.. code-block:: python - - from bigml.ensemble import Ensemble - ensemble = Ensemble('ensemble/5143a51a37203f2cf7020351') - ensemble.predict({"petal length": 3, "petal width": 1}) - -This is the simpler method to create a local Ensemble. The -``Ensemble('ensemble/5143a51a37203f2cf7020351')`` constructor, that fetches -all the related JSON files and stores them in an ``./storage`` directory. Next -calls to ``Ensemble('ensemble/50c0de043b5635198300033c')`` will retrieve the -files from this local storage, so that internet connection will only be needed -the first time an ``Ensemble`` is built. - -However, that method can only be used to work with the ensembles in our -account in BigML. If we intend to use ensembles created under an -``Organization``, then -we need to provide the information about the ``project`` that the ensemble -is included in. You need to provide a connection object for that: - -.. code-block:: python - - from bigml.ensemble import Ensemble - from bigml.api import BigML - - # connection object that informs about the project ID and the - # directory where the ensemble will be stored for local use - - api = BigML(project="project/5143a51a37203f2cf7020001", - storage="my_storage_directory") - - ensemble = Ensemble('ensemble/5143a51a37203f2cf7020351', api=api) - ensemble.predict({"petal length": 3, "petal width": 1}) - -The local ensemble object can be used to manage the -three types of ensembles: ``Decision Forests`` (bagging or random) and -the ones using ``Boosted Trees``. Also, you can choose -the storage directory or even avoid storing at all. The ``àpi`` connection -object controls the storage strategy through the ``storage`` argument. - -.. code-block:: python - - from bigml.api import BigML - from bigml.ensemble import Ensemble - - # api connection using a user-selected storage - api = BigML(storage='./my_storage') - - # creating ensemble - ensemble = api.create_ensemble('dataset/5143a51a37203f2cf7000972') - - # Ensemble object to predict - ensemble = Ensemble(ensemble, api) - ensemble.predict({"petal length": 3, "petal width": 1}, - operating_kind="votes") - -In this example, we create -a new ensemble and store its information in the ``./my_storage`` -folder. Then this information is used to predict locally using the number of -votes (one per model) backing each category. - -The ``operating_kind`` argument overrides the legacy ``method`` argument, which -was previously used to define the combiner for the models predictions. - -Similarly, local ensembles can also be created by giving a list of models to be -combined to issue the final prediction (note: only random decision forests and -bagging ensembles can be built using this method): - -.. code-block:: python - - from bigml.ensemble import Ensemble - ensemble = Ensemble(['model/50c0de043b563519830001c2', \ - 'model/50c0de043b5635198300031b')] - ensemble.predict({"petal length": 3, "petal width": 1}) - -or even a JSON file that contains the ensemble resource: - -.. code-block:: python - - from bigml.api import BigML - api = BigML() - api.export("ensemble/50c0de043b5635198300033c", - "my_directory/my_ensemble.json") - - from bigml.ensemble import Ensemble - local_ensemble = Ensemble("./my_directory/my_ensemble.json") - -Note: the ensemble JSON structure is not self-contained, meaning that it -contains references to the models that the ensemble is build of, but not the -information of the models themselves. -To use an ensemble locally with no connection to -the internet, you must make sure that not only a local copy of the ensemble -JSON file is available in your computer, but also the JSON files corresponding -to the models in it. The ``export`` method takes care of storing the -information of every model in the ensemble and storing it in the same directory -as the ensemble JSON file. The ``Ensemble`` class will also look up for the -model files in the same directory when using a path to an ensemble file as -argument. - -If you have no memory limitations you can create the ensemble -from a list of local model -objects. Then, local model objects will be always in memory and -will only be instantiated once. This will increase -performance for large ensembles: - -.. code-block:: python - - from bigml.model import Model - model_ids = ['model/50c0de043b563519830001c2', \ - 'model/50c0de043b5635198300031b'] - local_models = [Model(model_id) for model_id in model_ids] - local_ensemble = Ensemble(local_models) - -The ``Ensemble`` object can also be instantiated using local models previously -stored in disks or memory object caching systems. To retrieve these models -provide a list of model ids as first argument and an extra argument -named ``cache_get`` that should be a function receiving the model id -to be retrieved and returning a local model object. - -.. code-block:: python - - from bigml.model import Model - model_ids = ['model/50c0de043b563519830001c2', \ - 'model/50c0de043b5635198300031b'] - def cache_get(model_id): - """ Retrieves a JSON model structure and builds a local model object - - """ - model_file = model_id.replace("/", "_") - return Model(json.load(open(model_file))) - local_ensemble = Ensemble(model_ids, cache_get=cache_get) - - -Local Ensemble's Predictions ----------------------------- - -As in the local model's case, you can use the local ensemble to create -new predictions for your test data, and set some arguments to configure -the final output of the ``predict`` method. - -The predictions' structure will vary depending on the kind of -ensemble used. For ``Decision Forests`` local predictions will just contain -the ensemble's final prediction if no other argument is used. - -.. code-block:: python - - from bigml.ensemble import Ensemble - ensemble = Ensemble('ensemble/5143a51a37203f2cf7020351') - ensemble.predict({"petal length": 3, "petal width": 1}) - u'Iris-versicolor' - -The final prediction of an ensemble is determined -by aggregating or selecting the predictions of the individual models therein. -For classifications, the most probable class is returned if no especial -operating method is set. Using ``full=True`` you can see both the predicted -output and the associated probability: - -.. code-block:: python - - from bigml.ensemble import Ensemble - ensemble = Ensemble('ensemble/5143a51a37203f2cf7020351') - ensemble.predict({"petal length": 3, "petal width": 1}, \ - full=True) - - {'prediction': u'Iris-versicolor', - 'probability': 0.98566} - -In general, the prediction in a classification -will be one amongst the list of categories in the objective -field. When each model in the ensemble -is used to predict, each category has a confidence, a -probability or a vote associated to this prediction. -Then, through the collection -of models in the -ensemble, each category gets an averaged confidence, probabiity and number of -votes. Thus you can decide whether to operate the ensemble using the -``confidence``, the ``probability`` or the ``votes`` so that the predicted -category is the one that scores higher in any of these quantities. The -criteria can be set using the `operating_kind` option (default is set to -``probability``): - -.. code-block:: python - - ensemble.predict({"petal length": 3, "petal width": 1}, \ - operating_kind="votes") - -Regression will generate a predictiona and an associated error, however -``Boosted Trees`` don't have an associated confidence measure, so -only the prediction will be obtained in this case. - -For consistency of interface with the ``Model`` class, as well as -between boosted and non-boosted ensembles, local Ensembles again have -a ``predict_probability`` method. This takes the same optional -arguments as ``Model.predict``: ``missing_strategy`` and -``compact``. As with local Models, if ``compact`` is ``False`` (the default), -the output is a list of maps, each with the keys ``prediction`` and -``probability`` mapped to the class name and its associated -probability. - -So, for example: - -.. code-block:: python - - ensemble.predict_probability({"petal length": 3, "petal width": 1}) - - [{'category': u'Iris-setosa', 'probability': 0.006733220044732548}, - {'category': u'Iris-versicolor', 'probability': 0.9824478534614787}, - {'category': u'Iris-virginica', 'probability': 0.0108189264937886}] - -If ``compact`` is ``True``, only the probabilities themselves are -returned, as a list in class name order, again, as is the case with -local Models. - -Operating point predictions are also available for local ensembles and an -example of it would be: - -.. code-block:: python - - operating_point = {"kind": "probability", - "positive_class": "True", - "threshold": 0.8}; - prediction = local_ensemble.predict(inputData, - operating_point=operating_point) - -You can check the -`Operating point's predictions <#operating-point's-predictions>`_ section -to learn about -operating points. For ensembles, three kinds of operating points are available: -``votes``, ``probability`` and ``confidence``. ``Votes`` will use as threshold -the number of models in the ensemble that vote for the positive class. -The other two are already explained in the above mentioned section. - -Local Ensemble Predictor ------------------------- - -Predictions can take longer when the ensemble is formed by a large number of -models or when its models have a high number of nodes. In these cases, -predictions' speed can be increased and memory usage minimized by using the -``EnsemblePredictor`` object. The basic example to build it is: - -.. code-block:: python - - from bigml.ensemblepredictor import EnsemblePredictor - ensemble = EnsemblePredictor('ensemble/5143a51a37203f2cf7020351', - "./model_fns_directory") - ensemble.predict({"petal length": 3, "petal width": 1}, full=True) - {'prediction': u'Iris-versicolor', 'confidence': 0.91519} - -This constructor has two compulsory attributes: then ensemble ID (or the -corresponding API response) and the path to a directory that contains a file -per each of the ensemble models. Each file stores the ``predict`` function -needed to obtain the model's predictions. As in the ``Ensemble`` object, you -can also add an ``api`` argument with the connection to be used to download -the ensemble's JSON information. - -The functions stored in this directory are generated automatically the first -time you instantiate the ensemble. Once they are generated, the functions are -retrieved from the directory. - -Note that only last prediction missings strategy is available for these -predictions and the combiners available are ``plurality``, ``confidence`` and -``distribution`` but no ``operating_kind`` or ``operating_point`` options -are provided at present. - -Local Supervised Model ----------------------- - -There's a general class that will allow you to predict using any supervised -model resource, regardless of its particular type (model, ensemble, -logistic regression or deepnet). - -The ``SupervisedModel`` object will retrieve the resource information and -instantiate the corresponding local object, so that you can use its -``predict`` method to produce local predictions: - -.. code-block:: python - from bigml.supervised import SupervisedModel - local_supervised_1 = SupervisedModel( \ - "logisticregression/5143a51a37203f2cf7020351") - local_supervised_2 = SupervisedModel( \ - "model/5143a51a37203f2cf7020351") - input_data = {"petal length": 3, "petal width": 1} - logistic_regression_prediction = local_supervised_1.predict(input_data) - model_prediction = local_supervised_2.predict(input_data) - - -Fields ------- - -Once you have a resource, you can use the ``Fields`` class to generate a -representation that will allow you to easily list fields, get fields ids, get a -field id by name, column number, etc. - -.. code-block:: python - - from bigml.api import BigML - from bigml.fields import Fields - api = BigML() - source = api.get_source("source/5143a51a37203f2cf7000974") - - fields = Fields(source) - -you can also instantiate the Fields object from the fields dict itself: - -.. code-block:: python - - from bigml.api import BigML - from bigml.fields import Fields - api = BigML() - source = api.get_source("source/5143a51a37203f2cf7000974") - - fields = Fields(source['object']['fields']) - -The newly instantiated Fields object will give direct methods to retrieve -different fields properties: - -.. code-block:: python - - # Internal id of the 'sepal length' field - fields.field_id('sepal length') - - # Field name of field with column number 0 - fields.field_name(0) - - # Column number of field name 'petal length' - fields.field_column_number('petal length') - - # Statistics of values in field name 'petal length') - fields.stats('petal length') - -Depending on the resource type, Fields information will vary. ``Sources`` will -have only the name, label, description, type of field (``optype``) while -``dataset`` resources will have also the ``preferred`` (whether a field will is -selectable as predictor), ``missing_count``, ``errors`` and a summary of -the values found in each field. This is due to the fact that the ``source`` -object is built by inspecting the contents of a sample of the uploaded file, -while the ``dataset`` resource really reads all the uploaded information. Thus, -dataset's fields structure will always be more complete than source's. - -In both cases, you can extract the summarized information available using -the ``summary_csv`` method: - -.. code-block:: python - - from bigml.api import BigML - from bigml.fields import Fields - api = BigML() - dataset = api.get_dataset("dataset/5143a51a37203f2cf7300974") - - fields = Fields(dataset) - fields.summary_csv("my_fields_summary.csv") - -In this example, the information will be stored in the -``my_fields_summary.csv`` file. For the typical ``iris.csv`` data file, the -summary will read: - -.. csv-table:: - :header: "field column","field ID","field name","field label","field description","field type","preferred","missing count","errors","contents summary","errors summary" - :widths: 5, 10, 20, 5, 5, 10, 10, 5, 5, 100, 10 - - 0,000000,sepal length,,,numeric,true,0,0,"[4.3, 7.9], mean: 5.84333", - 1,000001,sepal width,,,numeric,false,0,0,"[2, 4.4], mean: 3.05733", - 2,000002,petal length,,,numeric,true,0,0,"[1, 6.9], mean: 3.758", - 3,000003,petal width,,,numeric,true,0,0,"[0.1, 2.5], mean: 1.19933", - 4,000004,species,,,categorical,true,0,0,"3 categorìes: Iris-setosa (50), Iris-versicolor (50), Iris-virginica (50)", - -Another utility in the ``Fields`` object will help you update the updatable -attributes of your source or dataset fields. For instance, if you -need to update the type associated to one field in your dataset, -you can change the ``field type`` -values in the previous file and use it to obtain the fields structure -needed to update your source: - -.. code-block:: python - - from bigml.api import BigML - from bigml.fields import Fields - api = BigML() - source = api.get_source("source/5143a51a37203f2cf7000974") - - fields = Fields(source) - fields_update_info = fields.new_fields_structure("my_fields_summary.csv") - source = api.update_source(source, fields_update_info) - -For both sources and datasets, the updatable attributes are name, label and -description. In ``sources`` you can also update the type of the field, and -in ``datasets`` you can update the ``preferred`` attribute. - -In addition to that, you can also easily ``pair`` a list of values with fields -ids what is very -useful to make predictions. - -For example, the following snippet may be useful to create local predictions -using a csv file as input: - -.. code-block:: python - - test_reader = csv.reader(open(dir + test_set)) - local_model = Model(model) - for row in test_reader: - input_data = fields.pair([float(val) for val in row], objective_field) - prediction = local_model.predict(input_data) - -If missing values are present, the ``Fields`` object can return a dict -with the ids of the fields that contain missing values and its count. The -following example: - -.. code-block:: python - - from bigml.fields import Fields - from bigml.api import BigML - api = BigML() - dataset = api.get_dataset("dataset/5339d42337203f233e000015") - - fields = Fields(dataset) - fields.missing_counts() - -would output: - -.. code-block:: python - - {'000003': 1, '000000': 1, '000001': 1} - -if the there was a missing value in each of the fields whose ids are -``000003``, ``000000``, ``000001``. - -You can also obtain the counts of errors per field using the ``errors_count`` -method of the api: - -.. code-block:: python - - from bigml.api import BigML - api = BigML() - dataset = api.get_dataset("dataset/5339d42337203f233e000015") - api.error_counts(dataset) - -The generated output is like the one in ``missing_counts``, that is, the error -counts per field: - -.. code-block:: python - - {'000000': 1} - - -Rule Generation ---------------- - -You can also use a local model to generate a IF-THEN rule set that can be very -helpful to understand how the model works internally. - -.. code-block:: python - - local_model.rules() - IF petal_length > 2.45 AND - IF petal_width > 1.65 AND - IF petal_length > 5.05 THEN - species = Iris-virginica - IF petal_length <= 5.05 AND - IF sepal_width > 2.9 AND - IF sepal_length > 5.95 AND - IF petal_length > 4.95 THEN - species = Iris-versicolor - IF petal_length <= 4.95 THEN - species = Iris-virginica - IF sepal_length <= 5.95 THEN - species = Iris-versicolor - IF sepal_width <= 2.9 THEN - species = Iris-virginica - IF petal_width <= 1.65 AND - IF petal_length > 4.95 AND - IF sepal_length > 6.05 THEN - species = Iris-virginica - IF sepal_length <= 6.05 AND - IF sepal_width > 2.45 THEN - species = Iris-versicolor - IF sepal_width <= 2.45 THEN - species = Iris-virginica - IF petal_length <= 4.95 THEN - species = Iris-versicolor - IF petal_length <= 2.45 THEN - species = Iris-setosa - - -Python, Tableau and Hadoop-ready Generation -------------------------------------------- - -If you prefer, you can also generate a Python function that implements the model -and that can be useful to make the model actionable right away with ``local_model.python()``. - -.. code-block:: python - - local_model.python() - def predict_species(sepal_length=None, - sepal_width=None, - petal_length=None, - petal_width=None): - """ Predictor for species from model/50a8e2d9eabcb404d2000293 - - Predictive model by BigML - Machine Learning Made Easy - """ - if (petal_length is None): - return 'Iris-virginica' - if (petal_length <= 2.45): - return 'Iris-setosa' - if (petal_length > 2.45): - if (petal_width is None): - return 'Iris-virginica' - if (petal_width <= 1.65): - if (petal_length <= 4.95): - return 'Iris-versicolor' - if (petal_length > 4.95): - if (sepal_length is None): - return 'Iris-virginica' - if (sepal_length <= 6.05): - if (petal_width <= 1.55): - return 'Iris-virginica' - if (petal_width > 1.55): - return 'Iris-versicolor' - if (sepal_length > 6.05): - return 'Iris-virginica' - if (petal_width > 1.65): - if (petal_length <= 5.05): - if (sepal_width is None): - return 'Iris-virginica' - if (sepal_width <= 2.9): - return 'Iris-virginica' - if (sepal_width > 2.9): - if (sepal_length is None): - return 'Iris-virginica' - if (sepal_length <= 6.4): - if (sepal_length <= 5.95): - return 'Iris-versicolor' - if (sepal_length > 5.95): - return 'Iris-virginica' - if (sepal_length > 6.4): - return 'Iris-versicolor' - if (petal_length > 5.05): - return 'Iris-virginica' - -The ``local.python(hadoop=True)`` call will generate the code that you need -for the Hadoop map-reduce engine to produce batch predictions using `Hadoop -streaming `_ . -Saving the mapper and reducer generated functions in their corresponding files -(let's say ``/home/hduser/hadoop_mapper.py`` and -``/home/hduser/hadoop_reducer.py``) you can start a Hadoop job -to generate predictions by issuing -the following Hadoop command in your system console: - -.. code-block:: bash - - bin/hadoop jar contrib/streaming/hadoop-*streaming*.jar \ - -file /home/hduser/hadoop_mapper.py -mapper hadoop_mapper.py \ - -file /home/hduser/hadoop_reducer.py -reducer hadoop_reducer.py \ - -input /home/hduser/hadoop/input.csv \ - -output /home/hduser/hadoop/output_dir - -assuming you are in the Hadoop home directory, your input file is in the -corresponding dfs directory -(``/home/hduser/hadoop/input.csv`` in this example) and the output will -be placed at ``/home/hduser/hadoop/output_dir`` (inside the dfs directory). - -Tableau-ready rules are also available through ``local_model.tableau()`` for -all the models except those that use text predictors. - -.. code-block:: python - - local_model.tableau() - IF ISNULL([petal width]) THEN 'Iris-virginica' - ELSEIF [petal width]>0.8 AND [petal width]>1.75 AND ISNULL([petal length]) THEN 'Iris-virginica' - ELSEIF [petal width]>0.8 AND [petal width]>1.75 AND [petal length]>4.85 THEN 'Iris-virginica' - ELSEIF [petal width]>0.8 AND [petal width]>1.75 AND [petal length]<=4.85 AND ISNULL([sepal width]) THEN 'Iris-virginica' - ELSEIF [petal width]>0.8 AND [petal width]>1.75 AND [petal length]<=4.85 AND [sepal width]>3.1 THEN 'Iris-versicolor' - ELSEIF [petal width]>0.8 AND [petal width]>1.75 AND [petal length]<=4.85 AND [sepal width]<=3.1 THEN 'Iris-virginica' - ELSEIF [petal width]>0.8 AND [petal width]<=1.75 AND ISNULL([petal length]) THEN 'Iris-versicolor' - ELSEIF [petal width]>0.8 AND [petal width]<=1.75 AND [petal length]>4.95 AND [petal width]>1.55 AND [petal length]>5.45 THEN 'Iris-virginica' - ELSEIF [petal width]>0.8 AND [petal width]<=1.75 AND [petal length]>4.95 AND [petal width]>1.55 AND [petal length]<=5.45 THEN 'Iris-versicolor' - ELSEIF [petal width]>0.8 AND [petal width]<=1.75 AND [petal length]>4.95 AND [petal width]<=1.55 THEN 'Iris-virginica' - ELSEIF [petal width]>0.8 AND [petal width]<=1.75 AND [petal length]<=4.95 AND [petal width]>1.65 THEN 'Iris-virginica' - ELSEIF [petal width]>0.8 AND [petal width]<=1.75 AND [petal length]<=4.95 AND [petal width]<=1.65 THEN 'Iris-versicolor' - ELSEIF [petal width]<=0.8 THEN 'Iris-setosa' - END - - -Summary generation ------------------- - -You can also print the model from the point of view of the classes it predicts -with ``local_model.summarize()``. -It shows a header section with the training data initial distribution per class -(instances and percentage) and the final predicted distribution per class. - -Then each class distribution is detailed. First a header section -shows the percentage of the total data that belongs to the class (in the -training set and in the predicted results) and the rules applicable to -all the -the instances of that class (if any). Just after that, a detail section shows -each of the leaves in which the class members are distributed. -They are sorted in descending -order by the percentage of predictions of the class that fall into that leaf -and also show the full rule chain that leads to it. - -:: - - Data distribution: - Iris-setosa: 33.33% (50 instances) - Iris-versicolor: 33.33% (50 instances) - Iris-virginica: 33.33% (50 instances) - - - Predicted distribution: - Iris-setosa: 33.33% (50 instances) - Iris-versicolor: 33.33% (50 instances) - Iris-virginica: 33.33% (50 instances) - - - Field importance: - 1. petal length: 53.16% - 2. petal width: 46.33% - 3. sepal length: 0.51% - 4. sepal width: 0.00% - - - Iris-setosa : (data 33.33% / prediction 33.33%) petal length <= 2.45 - · 100.00%: petal length <= 2.45 [Confidence: 92.86%] - - - Iris-versicolor : (data 33.33% / prediction 33.33%) petal length > 2.45 - · 94.00%: petal length > 2.45 and petal width <= 1.65 and petal length <= 4.95 [Confidence: 92.44%] - · 2.00%: petal length > 2.45 and petal width <= 1.65 and petal length > 4.95 and sepal length <= 6.05 and petal width > 1.55 [Confidence: 20.65%] - · 2.00%: petal length > 2.45 and petal width > 1.65 and petal length <= 5.05 and sepal width > 2.9 and sepal length > 6.4 [Confidence: 20.65%] - · 2.00%: petal length > 2.45 and petal width > 1.65 and petal length <= 5.05 and sepal width > 2.9 and sepal length <= 6.4 and sepal length <= 5.95 [Confidence: 20.65%] - - - Iris-virginica : (data 33.33% / prediction 33.33%) petal length > 2.45 - · 76.00%: petal length > 2.45 and petal width > 1.65 and petal length > 5.05 [Confidence: 90.82%] - · 12.00%: petal length > 2.45 and petal width > 1.65 and petal length <= 5.05 and sepal width <= 2.9 [Confidence: 60.97%] - · 6.00%: petal length > 2.45 and petal width <= 1.65 and petal length > 4.95 and sepal length > 6.05 [Confidence: 43.85%] - · 4.00%: petal length > 2.45 and petal width > 1.65 and petal length <= 5.05 and sepal width > 2.9 and sepal length <= 6.4 and sepal length > 5.95 [Confidence: 34.24%] - · 2.00%: petal length > 2.45 and petal width <= 1.65 and petal length > 4.95 and sepal length <= 6.05 and petal width <= 1.55 [Confidence: 20.65%] - - -You can also use ``local_model.get_data_distribution()`` and -``local_model.get_prediction_distribution()`` to obtain the training and -prediction basic distribution -information as a list (suitable to draw histograms or any further processing). -The tree nodes' information (prediction, confidence, impurity and distribution) -can also be retrieved in a CSV format using the method -``local_model.tree_CSV()``. The output can be sent to a file by providing a -``file_name`` argument or used as a list. - -Local ensembles have a ``local_ensemble.summarize()`` method too, the output -in this case shows only the data distribution (only available in -``Decision Forests``) and field importance sections. - -For local clusters, the ``local_cluster.summarize()`` method prints also the -data distribution, the training data statistics per cluster and the basic -intercentroid distance statistics. There's also a -``local_cluster.statistics_CSV(file_name)`` method that store in a CSV format -the values shown by the ``summarize()`` method. If no file name is provided, -the function returns the rows that would have been stored in the file as -a list. - -Running the Tests ------------------ - -The test will be run using `nose `_ , -that is installed on setup, and you'll need to set up your authentication -via environment variables, as explained -below. With that in place, you can run the test suite simply by issuing -.. code-block:: bash - - $ python setup.py nosetests - -Some tests need the `numpy `_ and -`scipy `_ libraries to be installed too. They are not -automatically installed as a dependency, as they are quite heavy and very -seldom used. - -Additionally, `Tox `_ can be used to -automatically run the test suite in virtual environments for all -supported Python versions. To install Tox: - -.. code-block:: bash - - $ pip install tox - -Then run the tests from the top-level project directory: - -.. code-block:: bash - - $ tox + $ tox Building the Documentation -------------------------- @@ -7535,6 +695,7 @@ Install the tools required to build the documentation: .. code-block:: bash $ pip install sphinx + $ pip install sphinx-rtd-theme To build the HTML version of the documentation: @@ -7545,6 +706,17 @@ To build the HTML version of the documentation: Then launch ``docs/_build/html/index.html`` in your browser. + +Support +------- + +Please report problems and bugs to our `BigML.io issue +tracker `_. + +Discussions about the different bindings take place in the general +`BigML mailing list `_. + + Additional Information ---------------------- diff --git a/docs/local_resources.rst b/docs/local_resources.rst new file mode 100644 index 00000000..8cd90ae9 --- /dev/null +++ b/docs/local_resources.rst @@ -0,0 +1,2970 @@ +.. toctree:: + :hidden: + +Local Resources +=============== + +All the resources in BigML can be downloaded and used afterwards locally, with +no connection whatsoever to BigML's servers. This is specially important +for all Supervised and Unsupervised models, that can be used to generate +predictions in any programmable device. The next sections describe how to +do that for each type of resource, but as a general rule, resources can be +exported to a JSON file in your file system using the ``export`` method. + +.. code-block:: python + + api.export('model/5143a51a37203f2cf7000956', + 'filename': 'my_dir/my_model.json') + +The contents of the generated file can be used just as the remote model +to generate predictions. As you'll see in next section, the local ``Model`` +object can be instantiated by giving the path to this file as first argument: + +.. code-block:: python + + from bigml.model import Model + local_model = Model("my_dir/my_model.json") + local_model.predict({"petal length": 3, "petal width": 1}) + Iris-versicolor + +These bindings define a particular class for each type of Machine Learning +model that is able to interpret the corresponding JSON and create +the local predictions. The classes can be instantiated using: + +- The ID of the resource: In this case, the class looks for the JSON + information of the resource first locally (expecting to find a file + in the local storage directory --``./storage`` by default -- + whose name is the ID of the model after replacing ``/`` by ``_``) + and also remotely if absent. + +.. code-block:: python + + from bigml.model import Model + from bigml.api import BigML + + local_model = Model('model/502fdbff15526876610002615') + +- A dictionary containing the resource information. In this case, the class + checks that this information belongs to a finished resource and + contains the attributes needed to create predictions, like the fields + structure. If any of these attributes is absent, retrieves the ID of the + model and tries to download the correct JSON from the API to store it + locally for further use. + + +.. code-block:: python + + from bigml.anomaly import Anomaly + from bigml.api import BigML + api = BigML() + anomaly = api.get_anomaly('anomaly/502fdbff15526876610002615', + query_string='only_model=true;limit=-1') + + local_anomaly = Anomaly(anomaly) + +- A path to the file that contains the JSON information for the resource. + In this case, the + file is read and the same checks mentioned above are done. If any of these + checks fails, it tries to retrieve the correct JSON from the API to store + it locally for further use. + +.. code-block:: python + + from bigml.logistic import LogisticRegression + local_logistic_regression = LogisticRegression('./my_logistic.json') + +Internally, these classes need a connection object +(``api = BigML(storage="./storage")``) to: + +- Set the local storage in your file system. +- Download the JSON of the resource if the information provided is not the + full finished resource content. + +Users can provide the connection as a second argument when instantiating the +class, but if they do and want the resource to be available locally, the +connection object must be created with an ``storage`` setting: + +.. code-block:: python + + from bigml.cluster import Cluster + from bigml.api import BigML + + local_cluster = Cluster('cluster/502fdbff15526876610002435', + api=BigML(my_username, + my_api_key + storage="my_storage")) + +If no connection is provided, a default connection will be +instantiated internally. This default connection will use ``./storage`` +as default storage directory and the credentials used to connect to +the API when needed are retrieved from the ``BIGML_USERNAME`` and +``BIGML_API_KEY`` environment variables. If no credentials are found in your +environment, any attempt to download the information will raise a condition +asking the user to set these variables. + +If a connection with no ``storage`` information is provided, then the models +will never be stored in your local file system, and will be retrieved from +BigML's API each time the local model is instantiated. + +Ensembles and composite objects, like Fusions, need more than one resource +to be downloaded and stored locally for the class to work. In this case, +the class needs all the component models, +so providing only a local file or a dictionary containing the +JSON for the resource is not enough for the ``Ensemble`` or ``Fusion`` +objects to be fully instantiated. If you only provide that partial information, +the class will use the internal API connection the first time +to download the components. +However, using the ``api.export`` method for ensembles or fusions +will download these component models for you +and will store them in the same directory as the file used to store +the ensemble or fusion information. After that, you can +instantiate the object using the path to the file where the ensemble +or fusion information was stored. The class will look internally for the +rest of components in the same directory and find them, so no connection to +the API will be done. + +If you use a tag to label the resource, you can also ask for the last resource +that has the tag: + +.. code-block:: python + + api.export_last('my_tag', + resource_type='ensemble', + 'filename': 'my_dir/my_ensemble.json') + +and even for a resource inside a project: + +.. code-block:: python + + api.export_last('my_tag', + resource_type='dataset', + project='project/5143a51a37203f2cf7000959', + 'filename': 'my_dir/my_dataset.json') + + +Local Datasets +-------------- + +You can instantiate a local version of a dataset so that you can reproduce +its transformations to generate new fields using Flatline expressions. + +.. code-block:: python + + from bigml.dataset import Dataset + local_dataset = Dataset('dataset/502fdbff15526876610003215') + +This will retrieve the remote dataset information, using an implicitly built +``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for more +details on how to set your credentials) and return a Dataset object +that will be stored in the ``./storage`` directory. If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second parameter: + +.. code-block:: python + + from bigml.dataset import Dataset + from bigml.api import BigML + + local_dataset = Dataset('dataset/502fdbff15526876610003215', + api=BigML(my_username, + my_api_key, + storage="my_storage")) + +or even use the remote dataset information previously retrieved to build the +local dataset object: + +.. code-block:: python + + from bigml.dataset import Dataset + from bigml.api import BigML + api = BigML() + dataset = api.get_dataset('dataset/502fdbff15526876610003215', + query_string='limit=-1') + + local_dataset = Dataset(dataset) + +As you can see, the ``query_string`` used to retrieve the dataset is +``limit=-1``, which avoids the pagination of fields that is used by default and +includes them all at once. These details are already taken care of in the +two previous examples, where the dataset ID is used as argument. + +You can also build a local dataset from a dataset previously retrieved and +stored in a JSON file: + +.. code-block:: python + + from bigml.dataset import Dataset + local_dataset = Dataset('./my_dataset.json') + +Adding new properties to an existing dataset is achieved by +defining some expressions based on the fields +of a previously existing origin dataset. The expressions are written using +the ``Flatline`` language. These transformations are +stored in a ``new_fields`` attribute and the +``Dataset`` object will store them, if available. +That information can be used to reproduce the same transformations +using new inputs. Of course, the fields in the input data to be transformed +are expected to match the fields structure of the dataset that was +used as origin to create the present one. + + +.. code-block:: python + + from bigml.dataset import Dataset + local_dataset = Dataset('./my_dataset.json') + # The dataset in my_dataset.json was created from a dataset whose fields + # were ``foo`` and ``baz``. The transformation that generated the new + # dataset added a new field ``qux`` whose value is ``baz`` divided by 2 + input_data_list = [{"foo": "bar", "baz": 32}] + output_data_list = local_dataset.transform(input_data_list) + # output_data_list: [{"foo": "bar", "baz": 32, "qux": 16}] + +The ``Dataset`` object offers a method to download a sample of the rows +that can be found in the dataset. + + +.. code-block:: python + + from bigml.dataset import Dataset + local_dataset = Dataset('dataset/502fdbff15526876610003215') + rows = local_dataset.get_sample(rows_number=50) + +The result will be a list of lists, which are the row values sorted as +described in the fields structure of the dataset. Of course, +this operation cannot be performed locally. BigML's API will be +called behind the scene to create a ``Sample`` object and retrieve the +corresponding rows. Similarly, you can use the ``get_input_sample`` +method to get a sample of rows of the origin dataset (if available in BigML). + +.. code-block:: python + + from bigml.dataset import Dataset + local_dataset = Dataset('dataset/502fdbff15526876610003215') + rows = local_dataset.get_input_sample(rows_number=50) + # these rows will represent the values available in the dataset + # that was used as origin to create dataset/502fdbff15526876610003215 + + +Local Models +------------ + +You can instantiate a local version of a remote model. + +.. code-block:: python + + from bigml.model import Model + local_model = Model('model/502fdbff15526876610002615') + +This will retrieve the remote model information, using an implicitly built +``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for more +details on how to set your credentials) and return a Model object +that will be stored in the ``./storage`` directory and +you can use to make local predictions. If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second parameter: + +.. code-block:: python + + from bigml.model import Model + from bigml.api import BigML + + local_model = Model('model/502fdbff15526876610002615', + api=BigML(my_username, + my_api_key, + storage="my_storage")) + +or even use the remote model information previously retrieved to build the +local model object: + +.. code-block:: python + + from bigml.model import Model + from bigml.api import BigML + api = BigML() + model = api.get_model('model/502fdbff15526876610002615', + query_string='only_model=true;limit=-1') + + local_model = Model(model) + +As you can see, the ``query_string`` used to retrieve the model has two parts. +They both act on the ``fields`` +information that is added to the JSON response. First +``only_model=true`` is used to restrict the fields described in the +``fields`` structure of the response to those used as +predictors in the model. Also +``limit=-1`` avoids the pagination of fields which is used by default and +includes them all at once. These details are already taken care of in the +two previous examples, where the model ID is used as argument. + +Any of these methods will return a ``Model`` object that you can use to make +local predictions, generate IF-THEN rules, Tableau rules +or a Python function that implements the model. + +You can also build a local model from a model previously retrieved and stored +in a JSON file: + +.. code-block:: python + + from bigml.model import Model + local_model = Model('./my_model.json') + + +Local Predictions +----------------- + +Once you have a local model you can use to generate predictions locally. + +.. code-block:: python + + local_model.predict({"petal length": 3, "petal width": 1}) + Iris-versicolor + +Local predictions have three clear advantages: + +- Removing the dependency from BigML to make new predictions. + +- No cost (i.e., you do not spend BigML credits). + +- Extremely low latency to generate predictions for huge volumes of data. + +The default output for local predictions is the prediction itself, but you can +also add other properties associated to the prediction, like its +confidence or probability, the distribution of values in the predicted node +(for decision tree models), and the number of instances supporting the +prediction. To obtain a +dictionary with the prediction and the available additional +properties use the ``full=True`` argument: + +.. code-block:: python + + local_model.predict({"petal length": 3, "petal width": 1}, full=True) + +that will return: + +.. code-block:: python + + {'count': 47, + 'confidence': 0.92444, + 'probability': 0.9861111111111112, + 'prediction': u'Iris-versicolor', + 'distribution_unit': 'categories', + 'path': [u'petal length > 2.45', + u'petal width <= 1.75', + u'petal length <= 4.95', + u'petal width <= 1.65'], + 'distribution': [[u'Iris-versicolor', 47]]} + +Note that the ``path`` attribute for the ``proportional`` missing strategy +shows the path leading to a final unique node, that gives the prediction, or +to the first split where a missing value is found. Other optional +attributes are +``next`` which contains the field that determines the next split after +the prediction node and ``distribution`` that adds the distribution +that leads to the prediction. For regression models, ``min`` and +``max`` will add the limit values for the data that supports the +prediction. + +When your test data has missing values, you can choose between ``last +prediction`` or ``proportional`` strategy to compute the +prediction. The ``last prediction`` strategy is the one used by +default. To compute a prediction, the algorithm goes down the model's +decision tree and checks the condition it finds at each node (e.g.: +'sepal length' > 2). If the field checked is missing in your input +data you have two options: by default (``last prediction`` strategy) +the algorithm will stop and issue the last prediction it computed in +the previous node. If you chose ``proportional`` strategy instead, the +algorithm will continue to go down the tree considering both branches +from that node on. Thus, it will store a list of possible predictions +from then on, one per valid node. In this case, the final prediction +will be the majority (for categorical models) or the average (for +regressions) of values predicted by the list of predicted values. + +You can set this strategy by using the ``missing_strategy`` +argument with code ``0`` to use ``last prediction`` and ``1`` for +``proportional``. + +.. code-block:: python + + from bigml.model import LAST_PREDICTION, PROPORTIONAL + # LAST_PREDICTION = 0; PROPORTIONAL = 1 + local_model.predict({"petal length": 3, "petal width": 1}, + missing_strategy=PROPORTIONAL) + +For classification models, it is sometimes useful to obtain a +probability or confidence prediction for each possible class of the +objective field. To do this, you can use the ``predict_probability`` +and ``predict_confidence`` methods respectively. The former gives a +prediction based on the distribution of instances at the appropriate +leaf node, with a Laplace correction based on the root node +distribution. The latter returns a lower confidence bound on the leaf +node probability based on the Wilson score interval. + +Each of these methods take the ``missing_strategy`` +argument that functions as it does in ``predict``, and one additional +argument, ``compact``. If ``compact`` is ``False`` (the default), the +output of these functions is a list of maps, each with the keys +``prediction`` and ``probability`` (or ``confidence``) mapped to the +class name and its associated probability (or confidence). Note that these +methods substitute the deprecated ``multiple`` parameter in the ``predict`` +method functionallity. + +So, for example, the following: + +.. code-block:: python + + local_model.predict_probability({"petal length": 3}) + +would result in + +.. code-block:: python + + [{'prediction': u'Iris-setosa', + 'probability': 0.0033003300330033}, + {'prediction': u'Iris-versicolor', + 'probability': 0.4983498349834984}, + {'prediction': u'Iris-virginica', + 'probability': 0.4983498349834984}] + +If ``compact`` is ``True``, only the probabilities themselves are +returned, as a list in class name order. Note that, for reference, +the attribute ``Model.class_names`` contains the class names in the +appropriate ordering. + +To illustrate, the following: + +.. code-block:: python + + local_model.predict_probability({"petal length": 3}, compact=True) + +would result in + +.. code-block:: python + + [0.0033003300330033, 0.4983498349834984, 0.4983498349834984] + +The output of ``predict_confidence`` is the same, except that the +output maps are keyed with ``confidence`` instead of ``probability``. + + +For classifications, the prediction of a local model will be one of the +available categories in the objective field and an associated ``confidence`` +or ``probability`` that is used to decide which is the predicted category. +If you prefer the model predictions to be operated using any of them, you can +use the ``operating_kind`` argument in the ``predict`` method. +Here's the example +to use predictions based on ``confidence``: + +.. code-block:: python + + local_model.predict({"petal length": 3, "petal width": 1}, + {"operating_kind": "confidence"}) + +Previous versions of the bindings had additional arguments in the ``predict`` +method that were used to format the prediction attributes. The signature of +the method has been changed to accept only arguments that affect the +prediction itself, (like ``missing_strategy``, ``operating_kind`` and +``opreating_point``) and ``full`` which is a boolean that controls whether +the output is the prediction itself or a dictionary will all the available +properties associated to the prediction. Formatting can be achieved by using +the ``cast_prediction`` function: + +.. code-block:: python + + def cast_prediction(full_prediction, to=None, + confidence=False, probability=False, + path=False, distribution=False, + count=False, next=False, d_min=False, + d_max=False, median=False, + unused_fields=False): + +whose first argument is the prediction obtained with the ``full=True`` +argument, the second one defines the type of output (``None`` to obtain +the prediction output only, "list" or "dict") and the rest of booleans +cause the corresponding property to be included or not. + +Operating point's predictions +----------------------------- + +In classification problems, +Models, Ensembles and Logistic Regressions can be used at different +operating points, that is, associated to particular thresholds. Each +operating point is then defined by the kind of property you use as threshold, +its value and a the class that is supposed to be predicted if the threshold +is reached. + +Let's assume you decide that you have a binary problem, with classes ``True`` +and ``False`` as possible outcomes. Imagine you want to be very sure to +predict the `True` outcome, so you don't want to predict that unless the +probability associated to it is over ``0.8``. You can achieve this with any +classification model by creating an operating point: + +.. code-block:: python + + operating_point = {"kind": "probability", + "positive_class": "True", + "threshold": 0.8}; + +to predict using this restriction, you can use the ``operating_point`` +parameter: + +.. code-block:: python + + prediction = local_model.predict(input_data, + operating_point=operating_point) + +where ``inputData`` should contain the values for which you want to predict. +Local models allow two kinds of operating points: ``probability`` and +``confidence``. For both of them, the threshold can be set to any number +in the ``[0, 1]`` range. + + +Local feature generation for predictions +---------------------------------------- + +All kind of local models (ensembles, clusters, etc.) offer a prediction-like +method that receives the input data to be used as test data and produces the +prediction output (prediction, centroid, etc.). However, one of BigML's +capabilities is automatic feature extraction from date-time +or image fields. Also, the Flatline language allows the user to create +new features to from the raw data to be used in modelling. Thus, your model +might use features that have been derived from the original raw data and should +be replicated at prediction time. + +``Local pipelines`` are objects that will store all the +feature extraction and transformations used to produce the dataset that was +used for training (see `Local Pipelines <#local-pipelines>`_). +These objects provide a ``.transform`` method that can be +applied to the raw input data to reproduce the same transformations that +were used to define the training data used by the model from the raw training +data. Every local model class offers a ``.data_transformations`` method that +generates a ``BMLPipeline`` object, storing these transformations. +The user can apply them before calling the corresponding prediction method. + +.. code-block:: python + + from bigml.model import Model + local_model = Model('model/502fdbff15526876610002435') + local_pipeline = local_model.data_transformations() + # the pipeline transform method is applied to lists of dictionaries + # (one row per dictionary). + # For a single prediction, a list of one input is sent to be + # transformed and the result will be a list, whose + # first element is used as transformed input data + input_data = local_pipeline.transform( + [{"petal length": 4.4, "sepal width": 3.2}])[0] + prediction = local_model.predict(input_data) + + +Local Clusters +-------------- + +You can also instantiate a local version of a remote cluster. + +.. code-block:: python + + from bigml.cluster import Cluster + local_cluster = Cluster('cluster/502fdbff15526876610002435') + +This will retrieve the remote cluster information, using an implicitly built +``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for more +details on how to set your credentials) and return a ``Cluster`` object +that will be stored in the ``./storage`` directory and +you can use to make local centroid predictions. If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second +parameter: + +.. code-block:: python + + from bigml.cluster import Cluster + from bigml.api import BigML + + local_cluster = Cluster('cluster/502fdbff15526876610002435', + api=BigML(my_username, + my_api_key + storage="my_storage")) + +or even use the remote cluster information previously retrieved to build the +local cluster object: + +.. code-block:: python + + from bigml.cluster import Cluster + from bigml.api import BigML + api = BigML() + cluster = api.get_cluster('cluster/502fdbff15526876610002435', + query_string='limit=-1') + + local_cluster = Cluster(cluster) + +Note that in this example we used a ``limit=-1`` query string for the cluster +retrieval. This ensures that all fields are retrieved by the get method in the +same call (unlike in the standard calls where the number of fields returned is +limited). + +Local clusters provide also methods for the significant operations that +can be done using clusters: finding the centroid assigned to a certain data +point, sorting centroids according to their distance to a data point, +summarizing +the centroids intra-distances and inter-distances and also finding the +closest points to a given one. The `Local Centroids <#local-centroids>`_ +and the +`Summary generation <#summary-generation>`_ sections will +explain these methods. + +Local Centroids +--------------- + +Using the local cluster object, you can predict the centroid associated to +an input data set: + +.. code-block:: python + + local_cluster.centroid({"pregnancies": 0, "plasma glucose": 118, + "blood pressure": 84, "triceps skin thickness": 47, + "insulin": 230, "bmi": 45.8, + "diabetes pedigree": 0.551, "age": 31, + "diabetes": "true"}) + {'distance': 0.454110207355, 'centroid_name': 'Cluster 4', + 'centroid_id': '000004'} + + +You must keep in mind, though, that to obtain a centroid prediction, input data +must have values for all the numeric fields. No missing values for the numeric +fields are allowed unless you provided a ``default_numeric_value`` in the +cluster construction configuration. If so, this value will be used to fill +the missing numeric fields. + +As in the local model predictions, producing local centroids can be done +independently of BigML servers, so no cost or connection latencies are +involved. + +Another interesting method in the cluster object is +``local_cluster.closests_in_cluster``, which given a reference data point +will provide the rest of points that fall into the same cluster sorted +in an ascending order according to their distance to this point. You can limit +the maximum number of points returned by setting the ``number_of_points`` +argument to any positive integer. + +.. code-block:: python + + local_cluster.closests_in_cluster( \ + {"pregnancies": 0, "plasma glucose": 118, + "blood pressure": 84, "triceps skin thickness": 47, + "insulin": 230, "bmi": 45.8, + "diabetes pedigree": 0.551, "age": 31, + "diabetes": "true"}, number_of_points=2) + +The response will be a dictionary with the centroid id of the cluster an +the list of closest points and their distances to the reference point. + +.. code-block:: python + + {'closest': [ \ + {'distance': 0.06912270988567025, + 'data': {'plasma glucose': '115', 'blood pressure': '70', + 'triceps skin thickness': '30', 'pregnancies': '1', + 'bmi': '34.6', 'diabetes pedigree': '0.529', + 'insulin': '96', 'age': '32', 'diabetes': 'true'}}, + {'distance': 0.10396456577958413, + 'data': {'plasma glucose': '167', 'blood pressure': '74', + 'triceps skin thickness': '17', 'pregnancies': '1', 'bmi': '23.4', + 'diabetes pedigree': '0.447', 'insulin': '144', 'age': '33', + 'diabetes': 'true'}}], + 'reference': {'age': 31, 'bmi': 45.8, 'plasma glucose': 118, + 'insulin': 230, 'blood pressure': 84, + 'pregnancies': 0, 'triceps skin thickness': 47, + 'diabetes pedigree': 0.551, 'diabetes': 'true'}, + 'centroid_id': u'000000'} + +No missing numeric values are allowed either in the reference data point. +If you want the data points to belong to a different cluster, you can +provide the ``centroid_id`` for the cluster as an additional argument. + +Other utility methods are ``local_cluster.sorted_centroids`` which given +a reference data point will provide the list of centroids sorted according +to the distance to it + +.. code-block:: python + + local_cluster.sorted_centroids( \ + {'plasma glucose': '115', 'blood pressure': '70', + 'triceps skin thickness': '30', 'pregnancies': '1', + 'bmi': '34.6', 'diabetes pedigree': '0.529', + 'insulin': '96', 'age': '32', 'diabetes': 'true'}) + {'centroids': [{'distance': 0.31656890408929705, + 'data': {u'000006': 0.34571, u'000007': 30.7619, + u'000000': 3.79592, u'000008': u'false'}, + 'centroid_id': u'000000'}, + {'distance': 0.4424198506958207, + 'data': {u'000006': 0.77087, u'000007': 45.50943, + u'000000': 5.90566, u'000008': u'true'}, + 'centroid_id': u'000001'}], + 'reference': {'age': '32', 'bmi': '34.6', 'plasma glucose': '115', + 'insulin': '96', 'blood pressure': '70', + 'pregnancies': '1', 'triceps skin thickness': '30', + 'diabetes pedigree': '0.529', 'diabetes': 'true'}} + + + +or ``points_in_cluster`` that returns the list of +data points assigned to a certain cluster, given its ``centroid_id``. + +.. code-block:: python + + centroid_id = "000000" + local_cluster.points_in_cluster(centroid_id) + + +Local Anomaly Detector +---------------------- + +You can also instantiate a local version of a remote anomaly. + +.. code-block:: python + + from bigml.anomaly import Anomaly + local_anomaly = Anomaly('anomaly/502fcbff15526876610002435') + +This will retrieve the remote anomaly detector information, using an implicitly +built ``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for +more details on how to set your credentials) and return an ``Anomaly`` object +that will be stored in the ``./storage`` directory and +you can use to make local anomaly scores. If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second +parameter: + +.. code-block:: python + + from bigml.anomaly import Anomaly + from bigml.api import BigML + + local_anomaly = Anomaly('anomaly/502fcbff15526876610002435', + api=BigML(my_username, + my_api_key, + storage="my_storage_dir")) + +or even use the remote anomaly information retrieved previously to build the +local anomaly detector object: + +.. code-block:: python + + from bigml.anomaly import Anomaly + from bigml.api import BigML + api = BigML() + anomaly = api.get_anomaly('anomaly/502fcbff15526876610002435', + query_string='limit=-1') + + local_anomaly = Anomaly(anomaly) + +Note that in this example we used a ``limit=-1`` query string for the anomaly +retrieval. This ensures that all fields are retrieved by the get method in the +same call (unlike in the standard calls where the number of fields returned is +limited). + +The anomaly detector object has also the method ``anomalies_filter`` +that will build the LISP filter you would need to filter the original +dataset and create a new one excluding +the top anomalies. Setting the ``include`` parameter to True you can do the +inverse and create a dataset with only the most anomalous data points. + + +Local Anomaly Scores +-------------------- + +Using the local anomaly detector object, you can predict the anomaly score +associated to an input data set: + +.. code-block:: python + + local_anomaly.anomaly_score({"src_bytes": 350}) + 0.9268527808726705 + + +As in the local model predictions, producing local anomaly scores can be done +independently of BigML servers, so no cost or connection latencies are +involved. + +Local Anomaly caching +--------------------- + +Anomalies can become quite large objects. That's why their use of memory +resources can be heavy. If your usual scenario is using many of them +constantly in a disordered way, the best strategy is setting up a cache +system to store them. The local anomaly class provides helpers to +interact with that cache. Here's an example using ``Redis``. + +.. code-block:: python + + from anomaly import Anomaly + import redis + r = redis.Redis() + # First build as you would any core Anomaly object: + anomaly = Anomaly('anomaly/5126965515526876630001b2') + # Store a serialized version in Redis + anomaly.dump(cache_set=r.set) + # (retrieve the external rep from its convenient place) + # Speedy Build from external rep + anomaly = Anomaly('anomaly/5126965515526876630001b2', cache_get=r.get) + # Get scores same as always: + anomaly.anomaly_score({"src_bytes": 350}) + + +Local Logistic Regression +------------------------- + +You can also instantiate a local version of a remote logistic regression. + +.. code-block:: python + + from bigml.logistic import LogisticRegression + local_log_regression = LogisticRegression( + 'logisticregression/502fdbff15526876610042435') + +This will retrieve the remote logistic regression information, +using an implicitly built +``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for more +details on how to set your credentials) and return a ``LogisticRegression`` +object that will be stored in the ``./storage`` directory and +you can use to make local predictions. If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second +parameter: + +.. code-block:: python + + from bigml.logistic import LogisticRegression + from bigml.api import BigML + + local_log_regression = LogisticRegression( + 'logisticregression/502fdbff15526876610602435', + api=BigML(my_username, my_api_key, storage="my_storage")) + +You can also reuse a remote logistic regression JSON structure +as previously retrieved to build the +local logistic regression object: + +.. code-block:: python + + from bigml.logistic import LogisticRegression + from bigml.api import BigML + api = BigML() + logistic_regression = api.get_logistic_regression( + 'logisticregression/502fdbff15526876610002435', + query_string='limit=-1') + + local_log_regression = LogisticRegression(logistic_regression) + +Note that in this example we used a ``limit=-1`` query string for the +logistic regression retrieval. This ensures that all fields are +retrieved by the get method in the same call (unlike in the standard +calls where the number of fields returned is limited). + +Local Logistic Regression Predictions +------------------------------------- + +Using the local logistic regression object, you can predict the prediction for +an input data set: + +.. code-block:: python + + local_log_regression.predict({"petal length": 2, "sepal length": 1.5, + "petal width": 0.5, "sepal width": 0.7}, + full=True) + {'distribution': [ + {'category': u'Iris-virginica', 'probability': 0.5041444478857267}, + {'category': u'Iris-versicolor', 'probability': 0.46926542042788333}, + {'category': u'Iris-setosa', 'probability': 0.02659013168639014}], + 'prediction': u'Iris-virginica', 'probability': 0.5041444478857267} + +As you can see, the prediction contains the predicted category and the +associated probability. It also shows the distribution of probabilities for +all the possible categories in the objective field. If you only need the +predicted value, you can remove the ``full`` argument. + +You must keep in mind, though, that to obtain a logistic regression +prediction, input data +must have values for all the numeric fields. No missing values for the numeric +fields are allowed. + +For consistency of interface with the ``Model`` class, logistic +regressions again have a ``predict_probability`` method, which takes +the same argument as ``Model.predict``: +``compact``. As stated above, missing values are not allowed, and so +there is no ``missing_strategy`` argument. + +As with local Models, if ``compact`` is ``False`` (the default), the +output is a list of maps, each with the keys ``prediction`` and +``probability`` mapped to the class name and its associated +probability. + +So, for example + +.. code-block:: python + + local_log_regression.predict_probability({"petal length": 2, "sepal length": 1.5, + "petal width": 0.5, "sepal width": 0.7}) + + [{'category': u'Iris-setosa', 'probability': 0.02659013168639014}, + {'category': u'Iris-versicolor', 'probability': 0.46926542042788333}, + {'category': u'Iris-virginica', 'probability': 0.5041444478857267}] + +If ``compact`` is ``True``, only the probabilities themselves are +returned, as a list in class name order, again, as is the case with +local Models. + +Operating point predictions are also available for local logistic regressions +and an example of it would be: + +.. code-block:: python + + operating_point = {"kind": "probability", + "positive_class": "True", + "threshold": 0.8} + local_logistic.predict(inputData, operating_point=operating_point) + +You can check the +`Operating point's predictions <#operating-point's-predictions>`_ section +to learn about +operating points. For logistic regressions, the only available kind is +``probability``, that sets the threshold of probability to be reached for the +prediction to be the positive class. + +Local Logistic Regression +------------------------- + +You can also instantiate a local version of a remote logistic regression: + +.. code-block:: python + + from bigml.logistic import LogisticRegression + local_log_regression = LogisticRegression( + 'logisticregression/502fdbff15526876610042435') + +This will retrieve the remote logistic regression information, +using an implicitly built +``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for more +details on how to set your credentials) and return a ``LogisticRegression`` +object that will be stored in the ``./storage`` directory and +you can use to make local predictions. If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second +parameter: + +.. code-block:: python + + from bigml.logistic import LogisticRegression + from bigml.api import BigML + + local_log_regression = LogisticRegression( + 'logisticregression/502fdbff15526876610602435', + api=BigML(my_username, my_api_key, storage="my_storage")) + +You can also reuse a remote logistic regression JSON structure +as previously retrieved to build the +local logistic regression object: + +.. code-block:: python + + from bigml.logistic import LogisticRegression + from bigml.api import BigML + api = BigML() + logistic_regression = api.get_logistic_regression( + 'logisticregression/502fdbff15526876610002435', + query_string='limit=-1') + + local_log_regression = LogisticRegression(logistic_regression) + +Note that in this example we used a ``limit=-1`` query string for the +logistic regression retrieval. This ensures that all fields are +retrieved by the get method in the same call (unlike in the standard +calls where the number of fields returned is limited). + +Local Linear Regression Predictions +----------------------------------- + +Using the local ``LinearRegression`` class, you can predict the prediction for +an input data set: + +.. code-block:: python + + local_linear_regression.predict({"petal length": 2, "sepal length": 1.5, + "species": "Iris-setosa", + "sepal width": 0.7}, + full=True) + {'confidence_bounds': { + 'prediction_interval': 0.43783924497784293, + 'confidence_interval': 0.2561542783257394}, + 'prediction': -0.6109005499999999, 'unused_fields': ['petal length']} + + +To obtain a linear regression prediction, input data can only have missing +values for fields that had already some missings in training data. + +The ``full=True`` in the predict method will cause the prediction to include +``confidence bounds`` when available. Some logistic regressions will not +contain such information by construction. Also, in order to compute these +bounds locally, you will need ``numpy`` and ``scipy`` in place. +As they are quite heavy libraries, they aren't automatically installed as +dependencies of these bindings. + +Local Deepnet +------------- + +You can also instantiate a local version of a remote Deepnet. + +.. code-block:: python + + from bigml.deepnet import Deepnet + local_deepnet = Deepnet( + 'deepnet/502fdbff15526876610022435') + +This will retrieve the remote deepnet information, +using an implicitly built +``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for more +details on how to set your credentials) and return a ``Deepnet`` +object that will be stored in the ``./storage`` directory and +you can use to make local predictions. If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second +parameter: + +.. code-block:: python + + from bigml.deepnet import Deepnet + from bigml.api import BigML + + local_deepnet = Deepnet( + 'deepnet/502fdbff15526876610602435', + api=BigML(my_username, my_api_key, storage="my_storage")) + +You can also reuse a remote Deepnet JSON structure +as previously retrieved to build the +local Deepnet object: + +.. code-block:: python + + from bigml.deepnet import Deepnet + from bigml.api import BigML + api = BigML() + deepnet = api.get_deepnet( + 'deepnet/502fdbff15526876610002435', + query_string='limit=-1') + + local_deepnet = Deepnet(deepnet) + +Note that in this example we used a ``limit=-1`` query string for the +deepnet retrieval. This ensures that all fields are +retrieved by the get method in the same call (unlike in the standard +calls where the number of fields returned is limited). + +Local Deepnet Predictions +------------------------- + +Using the local deepnet object, you can predict the prediction for +an input data set: + +.. code-block:: python + + local_deepnet.predict({"petal length": 2, "sepal length": 1.5, + "petal width": 0.5, "sepal width": 0.7}, + full=True) + {'distribution': [ + {'category': u'Iris-virginica', 'probability': 0.5041444478857267}, + {'category': u'Iris-versicolor', 'probability': 0.46926542042788333}, + {'category': u'Iris-setosa', 'probability': 0.02659013168639014}], + 'prediction': u'Iris-virginica', 'probability': 0.5041444478857267} + +As you can see, the full prediction contains the predicted category and the +associated probability. It also shows the distribution of probabilities for +all the possible categories in the objective field. If you only need the +predicted value, you can remove the ``full`` argument. + +To be consistent with the ``Model`` class interface, deepnets +have also a ``predict_probability`` method, which takes +the same argument as ``Model.predict``: +``compact``. + +As with local Models, if ``compact`` is ``False`` (the default), the +output is a list of maps, each with the keys ``prediction`` and +``probability`` mapped to the class name and its associated +probability. + +So, for example + +.. code-block:: python + + local_deepnet.predict_probability({"petal length": 2, "sepal length": 1.5, + "petal width": 0.5, "sepal width": 0.7}) + + [{'category': u'Iris-setosa', 'probability': 0.02659013168639014}, + {'category': u'Iris-versicolor', 'probability': 0.46926542042788333}, + {'category': u'Iris-virginica', 'probability': 0.5041444478857267}] + +If ``compact`` is ``True``, only the probabilities themselves are +returned, as a list in class name order, again, as is the case with +local Models. + +Operating point predictions are also available for local deepnets and an +example of it would be: + +.. code-block:: python + + operating_point = {"kind": "probability", + "positive_class": "True", + "threshold": 0.8}; + prediction = local_deepnet.predict(input_data, + operating_point=operating_point) + + +Local Deepnets for images supervised learning and object detection +------------------------------------------------------------------ + +Deepnets include Convolutional Neural Networks, so they can +be used to do classification, regression and object detection based on +images. For image classification and regression, the local Deepnets will just +need some image as input data when doing predictions. The image file should +be provided in input data as the contents to the corresponding image field. + +.. code-block:: python + + input_data = {"000002": "my_image.jpg"} + prediction = local_deepnet.predict(input_data) + +For object detection, as predictions are only based on one image, the input +to be provided is the plain image file itself. + +.. code-block:: python + + prediction = local_deepnet.predict("my_image.jpg") + +Also, object detection Deepnets allow some parameters to be set +at creation time. They slightly modify the operation of the ``Deepnet``, so +they are provided as ``operation_settings``. + +.. code-block:: python + + from bigml.deepnet import Deepnet + local_deepnet = Deepnet("deepnet/62a85964128d1c55610003cd", + operation_settings={"region_score_threshold": 0.6}) + prediction = local_deepnet.predict("my_image.jpg") + +The operation settings allowed are ``region_score_threshold``, that will set +the minimum accepted score in the predictions and ``max_objects`` which will +limit the number of regions returned. +The prediction will contain a list of dictionaries that contain the +label, score and box description of the found regions. Each box object is +an array that contains the ``xmin``, ``ymin``, ``xmax`` and ``ymax`` +coordinates: + +.. code-block:: python + + {'prediction': [{'box': [0.67742, 0.30469, 0.79472, 0.37109], + 'label': 'eye', + 'score': 0.83528}, + {'box': [0.3783, 0.27734, 0.50147, 0.35938], + 'label': 'eye', + 'score': 0.79117}, + {'box': [0.67742, 0.77344, 0.739, 0.81445], + 'label': 'eye', + 'score': 0.45094}]} + +**Note**: Local predictions for deepnets built on images datasets can differ +slightly from the predictions obtained by using BigML's API create prediction +call. When uploaded to BigML, images are standardized to a particular +resolution and compressed using the JPEG algorithm while local predictions +maintain the original image information. That can cause minor variations in +regression predictions or the probability associated to classification +predictions. Also object detection predictions can differ slightly, specially +if low region_threshold_scores are used. + +If anything, the local value will always be slightly more accurate, but if you +need to find results as close as possible to the ones produced in remote +predictions, you can use the ``remote_preprocess`` function in the ``deepnet`` +module. + +.. code-block:: python + from bigml.deepnet import Deepnet, remote_preprocess + + ld = Deepnet("deepnet/62a85964128d1c55610003cd") + ld.predict(remote_preprocess("./data/images/cats/pexels-pixabay-33358.jpg")) + + +Local Fusion +------------ + +You can also instantiate a local version of a remote Fusion. + +.. code-block:: python + + from bigml.fusion import Fusion + local_fusion = Fusion( + 'fusion/502fdbff15526876610022438') + +This will retrieve the remote fusion information, +using an implicitly built +``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for more +details on how to set your credentials) and return a ``Fusion`` +object that will be stored in the ``./storage`` directory and +you can use to make local predictions. If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second +parameter: + +.. code-block:: python + + from bigml.fusion import Fusion + from bigml.api import BigML + + local_fusion = Fusion( + 'fusion/502fdbff15526876610602435', + api=BigML(my_username, my_api_key, storage="my_storage")) + +You can also reuse a remote Fusion JSON structure +as previously retrieved to build the +local Fusion object: + +.. code-block:: python + + from bigml.fusion import Fusion + from bigml.api import BigML + api = BigML() + fusion = api.get_fusion( + 'fusion/502fdbff15526876610002435', + query_string='limit=-1') + + local_fusion = Fusion(fusion) + +Note that in this example we used a ``limit=-1`` query string for the +fusion retrieval. This ensures that all fields are +retrieved by the get method in the same call (unlike in the standard +calls where the number of fields returned is limited). + +Local Fusion Predictions +------------------------- + +Using the local fusion object, you can predict the prediction for +an input data set: + +.. code-block:: python + + local_fusion.predict({"petal length": 2, "sepal length": 1.5, + "petal width": 0.5, "sepal width": 0.7}, + full=True) + {'prediction': u'Iris-setosa', 'probability': 0.45224} + + +As you can see, the full prediction contains the predicted category and the +associated probability. If you only need the +predicted value, you can remove the ``full`` argument. + +To be consistent with the ``Model`` class interface, fusions +have also a ``predict_probability`` method, which takes +the same argument as ``Model.predict``: +``compact``. + +As with local Models, if ``compact`` is ``False`` (the default), the +output is a list of maps, each with the keys ``prediction`` and +``probability`` mapped to the class name and its associated +probability. + +So, for example + +.. code-block:: python + + local_fusion.predict_probability({"petal length": 2, "sepal length": 1.5, + "petal width": 0.5, "sepal width": 0.7}) + + [{'category': u'Iris-setosa', 'probability': 0.45224}, + {'category': u'Iris-versicolor', 'probability': 0.2854}, + {'category': u'Iris-virginica', 'probability': 0.26236}] + + +If ``compact`` is ``True``, only the probabilities themselves are +returned, as a list in class name order, again, as is the case with +local Models. + +Operating point predictions are also available with probability as threshold +for local fusions and an +example of it would be: + +.. code-block:: python + + operating_point = {"kind": "probability", + "positive_class": "True", + "threshold": 0.8}; + prediction = local_fusion.predict(inputData, + operating_point=operating_point) + +Local Association +----------------- + +You can also instantiate a local version of a remote association resource. + +.. code-block:: python + + from bigml.association import Association + local_association = Association('association/502fdcff15526876610002435') + +This will retrieve the remote association information, using an implicitly +built +``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for more +details on how to set your credentials) and return an ``Association`` object +that will be stored in the ``./storage`` directory and +you can use to extract the rules found in the original dataset. +If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second +parameter: + +.. code-block:: python + + from bigml.association import Association + from bigml.api import BigML + + local_association = Association('association/502fdcff15526876610002435', + api=BigML(my_username, + my_api_key + storage="my_storage")) + +or even use the remote association information retrieved previously +to build the +local association object: + +.. code-block:: python + + from bigml.association import Association + from bigml.api import BigML + api = BigML() + association = api.get_association('association/502fdcff15526876610002435', + query_string='limit=-1') + + local_association = Association(association) + +Note that in this example we used a ``limit=-1`` query string for the +association retrieval. This ensures that all fields are retrieved by the get +method in the +same call (unlike in the standard calls where the number of fields returned is +limited). + +The created ``Association`` object has some methods to help retrieving the +association rules found in the original data. The ``get_rules`` method will +return the association rules. Arguments can be set to filter the rules +returned according to its ``leverage``, ``strength``, ``support``, ``p_value``, +a list of items involved in the rule or a user-given filter function. + +.. code-block:: python + + from bigml.association import Association + local_association = Association('association/502fdcff15526876610002435') + local_association.get_rules(item_list=["Edible"], min_p_value=0.3) + +In this example, the only rules that will be returned by the ``get_rules`` +method will be the ones that mention ``Edible`` and their ``p_value`` +is greater or equal to ``0.3``. + +The rules can also be stored in a CSV file using ``rules_CSV``: + + +.. code-block:: python + + from bigml.association import Association + local_association = Association('association/502fdcff15526876610002435') + local_association.rules_CSV(file_name='/tmp/my_rules.csv', + min_strength=0.1) + +This example will store the rules whose strength is bigger or equal to 0.1 in +the ``/tmp/my_rules.csv`` file. + +You can also obtain the list of ``items`` parsed in the dataset using the +``get_items`` method. You can also filter the results by field name, by +item names and by a user-given function: + +.. code-block:: python + + from bigml.association import Association + local_association = Association('association/502fdcff15526876610002435') + local_association.get_items(field="Cap Color", + names=["Brown cap", "White cap", "Yellow cap"]) + +This will recover the ``Item`` objects found in the ``Cap Color`` field for +the names in the list, with their properties as described in the +`developers section `_ + + +Local Association Sets +---------------------- + +Using the local association object, you can predict the association sets +related to an input data set: + +.. code-block:: python + + local_association.association_set( \ + {"gender": "Female", "genres": "Adventure$Action", \ + "timestamp": 993906291, "occupation": "K-12 student", + "zipcode": 59583, "rating": 3}) + [{'item': {'complement': False, + 'count': 70, + 'field_id': u'000002', + 'name': u'Under 18'}, + 'rules': ['000000'], + 'score': 0.0969181441561211}, + {'item': {'complement': False, + 'count': 216, + 'field_id': u'000007', + 'name': u'Drama'}, + 'score': 0.025050115102862636}, + {'item': {'complement': False, + 'count': 108, + 'field_id': u'000007', + 'name': u'Sci-Fi'}, + 'rules': ['000003'], + 'score': 0.02384578264599424}, + {'item': {'complement': False, + 'count': 40, + 'field_id': u'000002', + 'name': u'56+'}, + 'rules': ['000008', + '000020'], + 'score': 0.021845366022721312}, + {'item': {'complement': False, + 'count': 66, + 'field_id': u'000002', + 'name': u'45-49'}, + 'rules': ['00000e'], + 'score': 0.019657155185835006}] + +As in the local model predictions, producing local association sets can be done +independently of BigML servers, so no cost or connection latencies are +involved. + +Local Topic Model +----------------- + +You can also instantiate a local version of a remote topic model. + +.. code-block:: python + + from bigml.topicmodel import TopicModel + local_topic_model = TopicModel( + 'topicmodel/502fdbcf15526876210042435') + +This will retrieve the remote topic model information, +using an implicitly built +``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for more +details on how to set your credentials) and return a ``TopicModel`` +object that will be stored in the ``./storage`` directory and +you can use to obtain local topic distributions. +If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second +parameter: + +.. code-block:: python + + from bigml.topicmodel import TopicModel + from bigml.api import BigML + + local_topic_model = TopicModel( + 'topicmodel/502fdbcf15526876210042435', + api=BigML(my_username, my_api_key, storage="my_storage")) + +You can also reuse a remote topic model JSON structure +as previously retrieved to build the +local topic model object: + +.. code-block:: python + + from bigml.topicmodel import TopicModel + from bigml.api import BigML + api = BigML() + topic_model = api.get_topic_model( + 'topicmodel/502fdbcf15526876210042435', + query_string='limit=-1') + + local_topic_model = TopicModel(topic_model) + +Note that in this example we used a ``limit=-1`` query string for the topic +model retrieval. This ensures that all fields are retrieved by the get +method in the +same call (unlike in the standard calls where the number of fields returned is +limited). + +Local Topic Distributions +------------------------- + +Using the local topic model object, you can predict the local topic +distribution for +an input data set: + +.. code-block:: python + + local_topic_model.distribution({"Message": "Our mobile phone is free"}) + [ { 'name': u'Topic 00', 'probability': 0.002627154266498529}, + { 'name': u'Topic 01', 'probability': 0.003257671290458176}, + { 'name': u'Topic 02', 'probability': 0.002627154266498529}, + { 'name': u'Topic 03', 'probability': 0.1968263976460698}, + { 'name': u'Topic 04', 'probability': 0.002627154266498529}, + { 'name': u'Topic 05', 'probability': 0.002627154266498529}, + { 'name': u'Topic 06', 'probability': 0.13692728036990331}, + { 'name': u'Topic 07', 'probability': 0.6419714165615805}, + { 'name': u'Topic 08', 'probability': 0.002627154266498529}, + { 'name': u'Topic 09', 'probability': 0.002627154266498529}, + { 'name': u'Topic 10', 'probability': 0.002627154266498529}, + { 'name': u'Topic 11', 'probability': 0.002627154266498529}] + + +As you can see, the topic distribution contains the name of the +possible topics in the model and the +associated probabilities. + +Local Time Series +----------------- + +You can also instantiate a local version of a remote time series. + +.. code-block:: python + + from bigml.timeseries import TimeSeries + local_time_series = TimeSeries( + 'timeseries/502fdbcf15526876210042435') + +This will create a series of models from +the remote time series information, +using an implicitly built +``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for more +details on how to set your credentials) and return a ``TimeSeries`` +object that will be stored in the ``./storage`` directory and +you can use to obtain local forecasts. +If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second +parameter: + +.. code-block:: python + + from bigml.timeseries import TimeSeries + from bigml.api import BigML + + local_time_series = TimeSeries( \ + 'timeseries/502fdbcf15526876210042435', + api=BigML(my_username, my_api_key, storage="my_storage")) + +You can also reuse a remote time series JSON structure +as previously retrieved to build the +local time series object: + +.. code-block:: python + + from bigml.timeseries import TimeSeries + from bigml.api import BigML + api = BigML() + time_series = api.get_time_series( \ + 'timeseries/502fdbcf15526876210042435', + query_string='limit=-1') + + local_time_series = TimeSeries(time_series) + +Note that in this example we used a ``limit=-1`` query string for the time +series retrieval. This ensures that all fields are retrieved by the get +method in the +same call (unlike in the standard calls where the number of fields returned is +limited). + + +Local Forecasts +--------------- + +Using the local time series object, you can forecast any of the objective +field values: + +.. code-block:: python + + local_time_series.forecast({"Final": {"horizon": 5}, "Assignment": { \ + "horizon": 10, "ets_models": {"criterion": "aic", "limit": 2}}}) + {u'000005': [ + {'point_forecast': [68.53181, 68.53181, 68.53181, 68.53181, 68.53181], + 'model': u'A,N,N'}], + u'000001': [{'point_forecast': [54.776650000000004, 90.00943000000001, + 83.59285000000001, 85.72403000000001, + 72.87196, 93.85872, 84.80786, 84.65522, + 92.52545, 88.78403], + 'model': u'A,N,A'}, + {'point_forecast': [55.882820120000005, 90.5255466567616, + 83.44908577909621, 87.64524353046498, + 74.32914583152592, 95.12372848262932, + 86.69298716626228, 85.31630744944385, + 93.62385478607113, 89.06905451921818], + 'model': u'A,Ad,A'}]} + + +As you can see, the forecast contains the ID of the forecasted field, the +computed points and the name of the models meeting the criterion. +For more details about the available parameters, please check the `API +documentation `_. + + +Local PCAs +---------- + +The `PCA` class will create a local version of a remote PCA. + +.. code-block:: python + + from bigml.pca import PCA + local_pca = PCA( + 'pca/502fdbcf15526876210042435') + + +This will create an object that stores the remote information that defines +the PCA, needed to generate +projections to the new dimensionally reduced components. The remote resource +is automatically downloaded the first time the the PCA is instantiated by +using an implicitly built +``BigML()`` connection object (see the +`Authentication <#authentication>`_ section for more +details on how to set your credentials). The JSON that contains this +information is stored in a ``./storage`` directory, which is the default +choice. If you want to use a +specific connection object to define the credentials for the authentication +in BigML or the directory where the JSON information is stored, +you can set it as the second parameter: + +.. code-block:: python + + from bigml.pca import PCA + from bigml.api import BigML + + local_pca = PCA( \ + 'timeseries/502fdbcf15526876210042435', + api=BigML(my_username, my_api_key, storage="my_storage")) + +You can also reuse a remote PCA JSON structure +as previously retrieved to build the +local PCA object: + +.. code-block:: python + + from bigml.pca import PCA + from bigml.api import BigML + api = BigML() + time_series = api.get_pca( \ + 'pca/502fdbcf15526876210042435', + query_string='limit=-1') + + local_pca = PCA(pca) + +Note that in this example we used a ``limit=-1`` query string for the PCA +retrieval. This ensures that all fields are retrieved by the get +method in the +same call (unlike in the standard calls where the number of fields returned is +limited). + + +Local Projections +----------------- + +Using the local PCA object, you can compute the projection of +an input dataset into the new components: + +.. code-block:: python + + local_pca.projection({"species": "Iris-versicolor"}) + [6.03852, 8.35456, 5.04432, 0.75338, 0.06787, 0.03018] + +You can use the ``max_components`` and ``variance_threshold`` arguments +to limit the number of components generated. You can also use the ``full`` +argument to produce a dictionary whose keys are the names of the generated +components. + +.. code-block:: python + + local_pca.projection({"species": "Iris-versicolor"}, full=yes) + {'PCA1': 6.03852, 'PCA2': 8.35456, 'PCA3': 5.04432, 'PCA4': 0.75338, + 'PCA5': 0.06787, 'PCA6': 0.03018} + +As in the local model predictions, producing local projections can be done +independently of BigML servers, so no cost or connection latencies are +involved. + + +Local Forecasts +--------------- + +Using the local time series object, you can forecast any of the objective +field values: + +.. code-block:: python + + local_time_series.forecast({"Final": {"horizon": 5}, "Assignment": { \ + "horizon": 10, "ets_models": {"criterion": "aic", "limit": 2}}}) + {u'000005': [ + {'point_forecast': [68.53181, 68.53181, 68.53181, 68.53181, 68.53181], + 'model': u'A,N,N'}], + u'000001': [{'point_forecast': [54.776650000000004, 90.00943000000001, + 83.59285000000001, 85.72403000000001, + 72.87196, 93.85872, 84.80786, 84.65522, + 92.52545, 88.78403], + 'model': u'A,N,A'}, + {'point_forecast': [55.882820120000005, 90.5255466567616, + 83.44908577909621, 87.64524353046498, + 74.32914583152592, 95.12372848262932, + 86.69298716626228, 85.31630744944385, + 93.62385478607113, 89.06905451921818], + 'model': u'A,Ad,A'}]} + + +As you can see, the forecast contains the ID of the forecasted field, the +computed points and the name of the models meeting the criterion. +For more details about the available parameters, please check the `API +documentation `_. + + +Multi Models +------------ + +Multi Models use a numbers of BigML remote models to build a local version +that can be used to generate predictions locally. Predictions are generated +combining the outputs of each model. + +.. code-block:: python + + from bigml.api import BigML + from bigml.multimodel import MultiModel + + api = BigML() + + model = MultiModel([api.get_model(model['resource']) for model in + api.list_models(query_string="tags__in=my_tag") + ['objects']]) + + model.predict({"petal length": 3, "petal width": 1}) + +This will create a multi model using all the models that have been previously +tagged with ``my_tag`` and predict by combining each model's prediction. +The combination method used by default is ``plurality`` for categorical +predictions and mean value for numerical ones. You can also use ``confidence +weighted``: + +.. code-block:: python + + model.predict({"petal length": 3, "petal width": 1}, method=1) + +that will weight each vote using the confidence/error given by the model +to each prediction, or even ``probability weighted``: + +.. code-block:: python + + model.predict({"petal length": 3, "petal width": 1}, method=2) + +that weights each vote by using the probability associated to the training +distribution at the prediction node. + +There's also a ``threshold`` method that uses an additional set of options: +threshold and category. The category is predicted if and only if +the number of predictions for that category is at least the threshold value. +Otherwise, the prediction is plurality for the rest of predicted values. + +An example of ``threshold`` combination method would be: + +.. code-block:: python + + model.predict({'petal length': 0.9, 'petal width': 3.0}, method=3, + options={'threshold': 3, 'category': 'Iris-virginica'}) + + +When making predictions on a test set with a large number of models, +``batch_predict`` can be useful to log each model's predictions in a +separated file. It expects a list of input data values and the directory path +to save the prediction files in. + +.. code-block:: python + + model.batch_predict([{"petal length": 3, "petal width": 1}, + {"petal length": 1, "petal width": 5.1}], + "data/predictions") + +The predictions generated for each model will be stored in an output +file in `data/predictions` using the syntax +`model_[id of the model]__predictions.csv`. For instance, when using +`model/50c0de043b563519830001c2` to predict, the output file name will be +`model_50c0de043b563519830001c2__predictions.csv`. An additional feature is +that using ``reuse=True`` as argument will force the function to skip the +creation of the file if it already exists. This can be +helpful when using repeatedly a bunch of models on the same test set. + +.. code-block:: python + + model.batch_predict([{"petal length": 3, "petal width": 1}, + {"petal length": 1, "petal width": 5.1}], + "data/predictions", reuse=True) + +Prediction files can be subsequently retrieved and converted into a votes list +using ``batch_votes``: + +.. code-block:: python + + model.batch_votes("data/predictions") + +which will return a list of MultiVote objects. Each MultiVote contains a list +of predictions (e.g. ``[{'prediction': u'Iris-versicolor', 'confidence': 0.34, +'order': 0}, {'prediction': u'Iris-setosa', 'confidence': 0.25, +'order': 1}]``). +These votes can be further combined to issue a final +prediction for each input data element using the method ``combine`` + +.. code-block:: python + + for multivote in model.batch_votes("data/predictions"): + prediction = multivote.combine() + +Again, the default method of combination is ``plurality`` for categorical +predictions and mean value for numerical ones. You can also use ``confidence +weighted``: + +.. code-block:: python + + prediction = multivote.combine(1) + +or ``probability weighted``: + +.. code-block:: python + + prediction = multivote.combine(2) + +You can also get a confidence measure for the combined prediction: + +.. code-block:: python + + prediction = multivolte.combine(0, with_confidence=True) + +For classification, the confidence associated to the combined prediction +is derived by first selecting the model's predictions that voted for the +resulting prediction and computing the weighted average of their individual +confidence. Nevertheless, when ``probability weighted`` is used, +the confidence is obtained by using each model's distribution at the +prediction node to build a probability distribution and combining them. +The confidence is then computed as the wilson score interval of the +combined distribution (using as total number of instances the sum of all +the model's distributions original instances at the prediction node) + +In regression, all the models predictions' confidences contribute +to the weighted average confidence. + + +Local Ensembles +--------------- + +Remote ensembles can also be used locally through the ``Ensemble`` +class. The simplest way to access an existing ensemble and using it to +predict locally is: + +.. code-block:: python + + from bigml.ensemble import Ensemble + ensemble = Ensemble('ensemble/5143a51a37203f2cf7020351') + ensemble.predict({"petal length": 3, "petal width": 1}) + +This is the simpler method to create a local Ensemble. The +``Ensemble('ensemble/5143a51a37203f2cf7020351')`` constructor, that fetches +all the related JSON files and stores them in an ``./storage`` directory. Next +calls to ``Ensemble('ensemble/50c0de043b5635198300033c')`` will retrieve the +files from this local storage, so that internet connection will only be needed +the first time an ``Ensemble`` is built. + +However, that method can only be used to work with the ensembles in our +account in BigML. If we intend to use ensembles created under an +``Organization``, then +we need to provide the information about the ``project`` that the ensemble +is included in. You need to provide a connection object for that: + +.. code-block:: python + + from bigml.ensemble import Ensemble + from bigml.api import BigML + + # connection object that informs about the project ID and the + # directory where the ensemble will be stored for local use + + api = BigML(project="project/5143a51a37203f2cf7020001", + storage="my_storage_directory") + + ensemble = Ensemble('ensemble/5143a51a37203f2cf7020351', api=api) + ensemble.predict({"petal length": 3, "petal width": 1}) + +The local ensemble object can be used to manage the +three types of ensembles: ``Decision Forests`` (bagging or random) and +the ones using ``Boosted Trees``. Also, you can choose +the storage directory or even avoid storing at all. The ``àpi`` connection +object controls the storage strategy through the ``storage`` argument. + +.. code-block:: python + + from bigml.api import BigML + from bigml.ensemble import Ensemble + + # api connection using a user-selected storage + api = BigML(storage='./my_storage') + + # creating ensemble + ensemble = api.create_ensemble('dataset/5143a51a37203f2cf7000972') + + # Ensemble object to predict + ensemble = Ensemble(ensemble, api) + ensemble.predict({"petal length": 3, "petal width": 1}, + operating_kind="votes") + +In this example, we create +a new ensemble and store its information in the ``./my_storage`` +folder. Then this information is used to predict locally using the number of +votes (one per model) backing each category. + +The ``operating_kind`` argument overrides the legacy ``method`` argument, which +was previously used to define the combiner for the models predictions. + +Similarly, local ensembles can also be created by giving a list of models to be +combined to issue the final prediction (note: only random decision forests and +bagging ensembles can be built using this method): + +.. code-block:: python + + from bigml.ensemble import Ensemble + ensemble = Ensemble(['model/50c0de043b563519830001c2', \ + 'model/50c0de043b5635198300031b')] + ensemble.predict({"petal length": 3, "petal width": 1}) + +or even a JSON file that contains the ensemble resource: + +.. code-block:: python + + from bigml.api import BigML + api = BigML() + api.export("ensemble/50c0de043b5635198300033c", + "my_directory/my_ensemble.json") + + from bigml.ensemble import Ensemble + local_ensemble = Ensemble("./my_directory/my_ensemble.json") + +Note: the ensemble JSON structure is not self-contained, meaning that it +contains references to the models that the ensemble is build of, but not the +information of the models themselves. +To use an ensemble locally with no connection to +the internet, you must make sure that not only a local copy of the ensemble +JSON file is available in your computer, but also the JSON files corresponding +to the models in it. The ``export`` method takes care of storing the +information of every model in the ensemble and storing it in the same directory +as the ensemble JSON file. The ``Ensemble`` class will also look up for the +model files in the same directory when using a path to an ensemble file as +argument. + +If you have no memory limitations you can create the ensemble +from a list of local model +objects. Then, local model objects will be always in memory and +will only be instantiated once. This will increase +performance for large ensembles: + +.. code-block:: python + + from bigml.model import Model + model_ids = ['model/50c0de043b563519830001c2', \ + 'model/50c0de043b5635198300031b'] + local_models = [Model(model_id) for model_id in model_ids] + local_ensemble = Ensemble(local_models) + +Local Ensemble caching +---------------------- + +Ensembles can become quite large objects and demand large memory resources. +If your usual scenario is using many of them +constantly in a disordered way, the best strategy is setting up a cache +system to store them. The local ensemble class provides helpers to +interact with that cache. Here's an example using ``Redis``. + +.. code-block:: python + + from ensemble import Ensemble + import redis + r = redis.Redis() + # First build as you would any core Ensemble object: + local_ensemble = Ensemble('ensemble/5126965515526876630001b2') + # Store a serialized version in Redis + ensemble.dump(cache_set=r.set) + # (retrieve the external rep from its convenient place) + # Speedy Build from external rep + local_ensemble = Ensemble('ensemble/5126965515526876630001b2', \ + cache_get=r.get) + # Get scores same as always: + local_ensemble.predict({"src_bytes": 350}) + + +Local Ensemble's Predictions +---------------------------- + +As in the local model's case, you can use the local ensemble to create +new predictions for your test data, and set some arguments to configure +the final output of the ``predict`` method. + +The predictions' structure will vary depending on the kind of +ensemble used. For ``Decision Forests`` local predictions will just contain +the ensemble's final prediction if no other argument is used. + +.. code-block:: python + + from bigml.ensemble import Ensemble + ensemble = Ensemble('ensemble/5143a51a37203f2cf7020351') + ensemble.predict({"petal length": 3, "petal width": 1}) + u'Iris-versicolor' + +The final prediction of an ensemble is determined +by aggregating or selecting the predictions of the individual models therein. +For classifications, the most probable class is returned if no especial +operating method is set. Using ``full=True`` you can see both the predicted +output and the associated probability: + +.. code-block:: python + + from bigml.ensemble import Ensemble + ensemble = Ensemble('ensemble/5143a51a37203f2cf7020351') + ensemble.predict({"petal length": 3, "petal width": 1}, \ + full=True) + + {'prediction': u'Iris-versicolor', + 'probability': 0.98566} + +In general, the prediction in a classification +will be one amongst the list of categories in the objective +field. When each model in the ensemble +is used to predict, each category has a confidence, a +probability or a vote associated to this prediction. +Then, through the collection +of models in the +ensemble, each category gets an averaged confidence, probabiity and number of +votes. Thus you can decide whether to operate the ensemble using the +``confidence``, the ``probability`` or the ``votes`` so that the predicted +category is the one that scores higher in any of these quantities. The +criteria can be set using the `operating_kind` option (default is set to +``probability``): + +.. code-block:: python + + ensemble.predict({"petal length": 3, "petal width": 1}, \ + operating_kind="votes") + +Regression will generate a predictiona and an associated error, however +``Boosted Trees`` don't have an associated confidence measure, so +only the prediction will be obtained in this case. + +For consistency of interface with the ``Model`` class, as well as +between boosted and non-boosted ensembles, local Ensembles again have +a ``predict_probability`` method. This takes the same optional +arguments as ``Model.predict``: ``missing_strategy`` and +``compact``. As with local Models, if ``compact`` is ``False`` (the default), +the output is a list of maps, each with the keys ``prediction`` and +``probability`` mapped to the class name and its associated +probability. + +So, for example: + +.. code-block:: python + + ensemble.predict_probability({"petal length": 3, "petal width": 1}) + + [{'category': u'Iris-setosa', 'probability': 0.006733220044732548}, + {'category': u'Iris-versicolor', 'probability': 0.9824478534614787}, + {'category': u'Iris-virginica', 'probability': 0.0108189264937886}] + +If ``compact`` is ``True``, only the probabilities themselves are +returned, as a list in class name order, again, as is the case with +local Models. + +Operating point predictions are also available for local ensembles and an +example of it would be: + +.. code-block:: python + + operating_point = {"kind": "probability", + "positive_class": "True", + "threshold": 0.8}; + prediction = local_ensemble.predict(inputData, + operating_point=operating_point) + +You can check the +`Operating point's predictions <#operating-point's-predictions>`_ section +to learn about +operating points. For ensembles, three kinds of operating points are available: +``votes``, ``probability`` and ``confidence``. ``Votes`` will use as threshold +the number of models in the ensemble that vote for the positive class. +The other two are already explained in the above mentioned section. + +Local Ensemble Predictor +------------------------ + +Predictions can take longer when the ensemble is formed by a large number of +models or when its models have a high number of nodes. In these cases, +predictions' speed can be increased and memory usage minimized by using the +``EnsemblePredictor`` object. The basic example to build it is: + +.. code-block:: python + + from bigml.ensemblepredictor import EnsemblePredictor + ensemble = EnsemblePredictor('ensemble/5143a51a37203f2cf7020351', + "./model_fns_directory") + ensemble.predict({"petal length": 3, "petal width": 1}, full=True) + {'prediction': u'Iris-versicolor', 'confidence': 0.91519} + +This constructor has two compulsory attributes: then ensemble ID (or the +corresponding API response) and the path to a directory that contains a file +per each of the ensemble models. Each file stores the ``predict`` function +needed to obtain the model's predictions. As in the ``Ensemble`` object, you +can also add an ``api`` argument with the connection to be used to download +the ensemble's JSON information. + +The functions stored in this directory are generated automatically the first +time you instantiate the ensemble. Once they are generated, the functions are +retrieved from the directory. + +Note that only last prediction missings strategy is available for these +predictions and the combiners available are ``plurality``, ``confidence`` and +``distribution`` but no ``operating_kind`` or ``operating_point`` options +are provided at present. + +Local Supervised Model +---------------------- + +There's a general class that will allow you to predict using any supervised +model resource, regardless of its particular type (model, ensemble, +logistic regression, linear regression or deepnet). + +The ``SupervisedModel`` object will retrieve the resource information and +instantiate the corresponding local object, so that you can use its +``predict`` method to produce local predictions: + +.. code-block:: python + + from bigml.supervised import SupervisedModel + local_supervised_1 = SupervisedModel( \ + "logisticregression/5143a51a37203f2cf7020351") + local_supervised_2 = SupervisedModel( \ + "model/5143a51a37203f2cf7020351") + input_data = {"petal length": 3, "petal width": 1} + logistic_regression_prediction = local_supervised_1.predict(input_data) + model_prediction = local_supervised_2.predict(input_data) + + +Local BigML Model +----------------- + +Following the approach of the local SupervisedModel class, the ``LocalModel`` +class will allow you to predict using any BigML model resource, +either supervised or unsupervised. +This class provides two methods: ``predict`` and ``batch_predict`` with +total abstraction as to the result of the predictions +(real predictions, centroids, anomaly scores, etc.), their parameters and the +format of the prediction result. +The ``predict`` method can be used on any type of +model and delegates to the specific method of each local model class. +Therefore, it will be the programmers responsibility to provide +only the parameters accepted in the low level +method and the response will be a dictionary whose contents will vary depending +on the type of prediction. Similarly, the ``batch_predict`` method +accepts a list of inputs and adds the prediction information to each +element of the list. + +The ``LocalModel`` object will retrieve the resource information and +instantiate the corresponding local object, so that you can use its +``predict`` method to produce local predictions: + +.. code-block:: python + + from bigml.local_model import LocalModel + local_model_1 = LocalModel( \ + "logisticregression/5143a51a37203f2cf7020351") + local_model_2 = LocalModel( \ + "anomaly/5143a51a37203f2cf7020351") + input_data = {"petal length": 3, "petal width": 1} + logistic_regression_prediction = local_model_1.predict(input_data) + # {"prediction": "Iris-setosa", "probability": 0.56} + anomaly_prediction = local_model_2.predict(input_data) + # {"score": 0.84} + + +Local Pipelines +--------------- + +More often than not, the Machine Learning solution to a problem entails +using data transformations and different models that produce some predictions +or scores. They all are useful information that contributes to the final +Machine Learning based decision. Usually, the training workflow becomes +a sequence of functions, each of which adds new fields to our data: engineered +features, scores, predictions, etc. Of course, once the training sequence +is determined, the same steps will need to be reproduced to create +batch predictions for a new list of test input data rows. +The ``BMLPipeline`` class offers the tools to extract that sequence from +the existing BigML objects and create the prediction pipeline. + +The first obvious goal that we may have is reproducing the same feature +extraction and transformations that were used when training our data to create +our model. That is achieved by using a ``BMLPipeline`` object built +on the training dataset. Note that, if your datasets contain features derived +from the original fields in your data, ``Nodejs`` has to be previously +installed for the transformations to work locally. + +.. code-block:: python + + from bigml.pipeline.pipeline import BMLPipeline + local_pipeline = BMLPipeline("my transformations pipeline", + ["dataset/5143a55637203f2cf7020351"]) + +Starting from ``dataset/5143a55637203f2cf7020351`` +and tracing the previous datasets up till the original source built from +our data, the pipeline will store all the steps that were done +to transform it. Maybe some year, month and day new features were +automatically extracted from our date-time fields, or even +the features corresponding to the histogram of gradients were +obtained from an image field (if your dataset had one of those). +Also, if transformations were defined using ``Flatline`` to +generate new fields, they will be detected and stored as a transformation +step. They are all retrieved and ready to be applied to a +list of dictionaries representing your rows information using the +``.transform`` method. + +.. code-block:: python + + local_pipeline.transform([{"plasma glucose": 130, "bmi":3}, + {"age":26, "plasma glucose": 70}]) + + +As a more powerful example, let's think about an entire workflow where +models have been built on a dataset adding a new field with a +simple feature engineering transformation, like the ratio of two fields. +Suppose a model has been created from the new dataset. +Also, an anomaly detector has been created from the same dataset +to check whether the new input data is too different from the original +examples used to train the model. +If the score is low, the model is still valid, so we accept its prediction. +If the score is too high, the model predictions might be inaccurate, and we +should not rely on them. Therefore, in order to take a decision on what to do +for new input data, we will need not only the values of the fields of that +new test case but also the prediction (plus the associated probability) +and anomaly score that the trained model and anomaly detector provide for it. + +To solve the problem, the process will be: on receving new data, +the transformation to generate the ratio between the raw input fields +should be applied and a new ``ratio`` field should be added. +After that, both the prediction and the anomaly score should be computed +and they also should be added to the initial data as new fields. +The ``BMLPipeline`` class will help us do that. + +First, we instantiate the ``BMLPipeline`` object by providing the models +that we want it to use and a name for it: + +.. code-block:: python + + from bigml.pipeline.pipeline import BMLPipeline + local_pipeline = BMLPipeline("my new pipeline", + ["model/5143a51a37203f2cf7020351", + "anomaly/5143a51a37203f2cf7027551"]) + +This code will retrieve all the datasets previous to the model and anomaly +detector construction and will store any transformation that they contain. +It creates a sequence starting on the first dataset that was created to +summarize the uploaded data, adding the datasets that store transformations, +and finally the model and anomaly detector. Every transformation that was +done when training those models, will be reflected as a new step in the +``BMLPipeline`` and every model that was added to the list will also be +added as an additional transformation step: the model will transform +our data by adding its prediction and associated probability and the +anomaly detector will transform the input by adding the computed +anomaly score. The result is obtained using the ``BMLPipeline`` object, that +offers a ``.transform`` method which accepts a list of input data dictionaries +or a DataFrame. For every row, it will execute the stored transformations +and generate the model's prediction and the anomaly's score. +All of them will be added to the original input data. + +.. code-block:: python + + local_pipeline.transform([{"plasma glucose": 130, "bmi":3}, + {"age":26, "plasma glucose": 70}]) + """That could produce a result such as + [{"plasma glucose": 130, "bmi":3, "prediction": "True", + "probability": 0.578, "score": 0.753}, + {"age": 26, "plasma glucose": 70, "prediction": "False", + "probability": 0.573, "score": 0.54}] + """ + +As for the rest of local resources, you can pass additional arguments to define +the API connection info and/or a ``cache_get`` function to be used when +resources are stored in memory caches. + +.. code-block:: python + + from bigml.pipeline.pipeline import BMLPipeline + local_pipeline = BMLPipeline("my new pipeline", + ["model/5143a51a37203f2cf7020351", + "anomaly/5143a51a37203f2cf7027551"], + api=BigML("my user", "my api", + storage="my_storage")) + +If no API connection is passed, or if the one given has no +``api.storage`` value, we use the default ``./storage`` directory +followed by the name of the pipeline as storage folder for the +JSON of the resources used in the pipeline. +In this case, four resources will be stored: the dataset created from +the uploaded data, the dataset generated when we added the ratio +field, the model and the anomaly detector. The ``BMLPipeline`` object +offers an ``.export`` method that can compress the entire directory to +a ``.zip`` file whose name is the name of the ``BMLPipeline`` +(conveniently encoded) and will be placed in the ``output_directory`` +given by the user: + +.. code-block:: python + + from bigml.pipeline.pipeline import BMLPipeline + local_pipeline = BMLPipeline("my new pipeline", + ["model/5143a51a37203f2cf7020351", + "anomaly/5143a51a37203f2cf7027551"] + api=BigML("my user", "my api", + storage="my_storage")) + local_pipeline.export(output_directory="my_export_dir") + +In this example, we wil find a ``my_export_dir/my_new_pipeline.zip`` file +in the current directory. The file contains a ``my new pipeline`` folder where +the four JSONs for the two datasets and two models are stored. + +The ``BMLPipeline`` provides also methods to ``dump`` and ``load`` the +data transformers it contains, in order to save them in a cache or in the file +system. As an example, we can create a ``BMLPipeline``, dump its contents to +a file system folder and build a second pipeline from them. The name of +the pipeline will be used as reference to know which object to load. + + +.. code-block:: python + + from bigml.pipeline.pipeline import BMLPipeline + local_pipeline = BMLPipeline("pipeline1", + "model/5143a51a37203f2cf7020351") + local_pipeline.dump("./pipeline1_storage") + # the `pipeline1_storage` folder is created and all the objects + # used in the pipeline are stored there, one file each + new_pipeline = BMLPipeline.load("pipeline1", "./pipeline1_storage") + # a new pipeline has been built with the same properties and steps + # that local_pipeline had + + +If using a cache system, the same methods described in the +`local caching<#local-caching>`_ section are available. + +.. code-block:: python + + from bigml.pipeline.pipeline import BMLPipeline + local_pipeline = BMLPipeline("pipeline1", + "model/631a6a6f8f679a2d31000445") + import redis + r = redis.Redis() + local_pipeline.dump(cache_set=r.set) + new_pipeline = BMLPipeline("pipeline1", cache_get=r.get) + # the new_pipeline has been recovered from Redis + + +Sometimes, one may want to aggregate pre-existing transformations +on your original data before loading it to BigML. In that case, you can use +the more general ``Pipeline`` class to store any sequence of transformations +made outside of BigML. As both ``Pipeline`` and ``BMLPipeline`` offer the +``.transform`` method, they are also data transformers, meaning that they +can be used as steps of a more general ``Pipeline`` as well. +Thus, combining pre-existing transformations +based on scikit-learn or Pandas with the transformations and models generated +in BigML is totally possible. For that, we will use the +``SKDataTransformer`` and ``DFDataTransformer`` classes, which provide a +``.transform`` method too. + +As an example of use, we'll create a ``Pipeline`` based on a existing +scikit pipeline. + +.. code-block:: python + + import pandas as pd + + from sklearn.tree import DecisionTreeClassifier + from sklearn.preprocessing import StandardScaler + from sklearn.model_selection import train_test_split + from sklearn.pipeline import Pipeline as SKPipeline + + # Building a prediction pipeline using a scikit learn + # scaler and decision tree and adding the prediction + # to the initial dataframe + + from bigml.pipeline.transformer import Pipeline, SKDataTransformer + from bigml.constants import OUT_NEW_HEADERS + + # pre-existing code to build the scikit pipeline + df = pd.read_csv("data/diabetes.csv") + X = df.drop('diabetes', axis=1) + y = df['diabetes'] + + X_train, X_test, y_train, y_test = train_test_split(X, y, + random_state=0) + + pipe = SKPipeline([('scaler', StandardScaler()), + ('DTC', DecisionTreeClassifier())]) + pipe.fit(X_train, y_train) + # end of pre-existing code + + pipeline = Pipeline( + "skpipeline", # pipeline name + steps=[SKDataTransformer(pipe, + "skDTC", + output={OUT_NEW_HEADERS: ["sk_prediction"]})]) + # the `pipe` scikit pipeline is wrapped as a SKDataTransformer to offer + # a `.transform` method + pipeline.transform(X_test) + +This new pipeline can be combined with a ``BMLPipeline`` and will accumulate +the insights of both. + +.. code-block:: python + + from bigml.pipeline import BMLPipeline + + bml_pipeline = BMLPipeline("bml_pipeline", + "anomaly/631a6a6f8f679a2d31000445") + extended_pipeline = Pipeline("extended", + steps=[pipeline, bml_pipeline]) + extended_pipeline.transform([{"plasma glucose": 80}]) + +The same can be done for a Pandas' pipe sequence + +.. code-block:: python + + # based on https://www.kdnuggets.com/2021/01/cleaner-data-analysis-pandas-pipes.html + + import pandas as pd + import numpy as np + + from bigml.pipeline.transformer import DFDataTransformer, Pipeline + + marketing = pd.read_csv("./data/DirectMarketing.csv") + + # code to define the transformations + + def drop_missing(df): + thresh = len(df) * 0.6 + df.dropna(axis=1, thresh=thresh, inplace=True) + return df + + def remove_outliers(df, column_name): + low = np.quantile(df[column_name], 0.05) + high = np.quantile(df[column_name], 0.95) + return df[df[column_name].between(low, high, inclusive=True)] + + def copy_df(df): + return df.copy() + + pipeline = Pipeline("pandas_pipeline", + steps=[DFDataTransformer([copy_df, + drop_missing, + (remove_outliers, + ['Salary'])])]) + # the list of functions are wrapped as a DFDataTransformer to offer + # a `.transform` method that generates the output using Pandas' `.pipe` + marketing_clean = pipeline.transform(marketing) + +where again, the pipeline could be combined with any ``BMLPipeline`` to +produce a more general transformation sequence. + +Of course, new classes could be built to support other transformation tools +and libraries. A new data transformer can be created by deriving the +``DataTransformer`` class and customizing its ``.data_transform`` method +to cover the particulars of the functions to be used in the generation of +new fields. + +Local Evaluations +----------------- + +You can instantiate a local version of an evaluation that will contain the +main evaluation metrics. + +.. code-block:: python + + from bigml.evaluation import Evaluation + local_evaluation = Evaluation('evaluation/502fdbff15526876610003215') + +This will retrieve the remote evaluation information, using an implicitly built +``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for more +details on how to set your credentials) and return a Dataset object +that will be stored in the ``./storage`` directory. If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second parameter: + +.. code-block:: python + + from bigml.evaluation import Evaluation + from bigml.api import BigML + + local_evaluation = Evaluation('evaluation/502fdbff15526876610003215', + api=BigML(my_username, + my_api_key, + storage="my_storage")) + +or even use the remote evaluation information previously retrieved to build the +local evaluation object: + +.. code-block:: python + + from bigml.evaluation import Evaluation + from bigml.api import BigML + api = BigML() + evaluation = api.get_evaluation('evaluation/502fdbff15526876610003215') + + local_evaluation = Evaluation(evaluation) + +You can also build a local evaluation from a previously retrieved and +stored evaluation JSON file: + +.. code-block:: python + + from bigml.evaluation import Evaluation + local_evaluation = Evaluation('./my_dataset.json') + +The Evaluation attributes depend on whether it belongs to a regression or a +classification. Regression evaluations will contain ``r_square``, +``mean_absolute_error``, ``mean_squared_error``. Classification evaluations +will contain ``accuracy``, ``precision``, ``recall``, ``phi`` and ``f_measure`` +besides the ``confusion_matrix`` and a ``-full`` attribute that will contain +the entire set of metrics as downloaded from the API. + +.. code-block:: python + + from bigml.evaluation import Evaluation + local_evaluation = Evaluation('evaluation/502fdbff15526876610003215') + local_evaluation.full # entire model evaluation metrics + if local_evaluation.regression: + local_evaluation.r_squared # r-squared metric value + else: + local_evaluation.confusion_matrix # confusion matrix + local_evaluation.accuracy + + +Local batch predictions +----------------------- + +As explained in the ``101s`` provided in the +`Quick Start `_ section, batch predictions for a +list of inputs can be obtained by iterating the single predictions discussed +in each different local model. However, we've also provided a +homogeneous ``batch_predict`` method in the following local objects: + +- SupervisedModel +- Anomaly +- Cluster +- PCA +- TopicModel + +which can receive the following parameters: + +- **input_data_list**: This can be a list of input data, expressed as a + dictionary containing ``field_name: field_value`` pairs or + a Pandas' DataFrame +- **outputs**: That's a dictionary that can contain ``output_fields`` + and/or ``output_headers`` information. Each one is + defined by default as the list of prediction keys to be + added to the inputs and the list of headers to be used + as keys in the output. E.g., for a supervised learning + model, the default if no information is provided would + be equivalent to ``{"output_fields": ["prediction", + "probability"], "output_headers": ["prediction", + "probability"]}`` and both the prediction and the + associated probability would be added to the input data. +- **\*\*kwargs**: Any other parameters allowed in the ``.predict`` method + could be added to the batch prediction too. For instance, + we could add the operating kind to a supervised model + batch prediction using ``operating_kind=probability`` as + argument. + + +Let's write some examples. If we are reading data from a CSV, we can use the +``csv`` library and pass the list of inputs as an array to an anomaly detector. + +.. code-block:: python + + import csv + + from bigml.anomaly import Anomaly + + input_data_list = [] + with open("my_input_data.csv") as handler: + reader = csv.DictReader(handler) + for row_dict in reader: + input_data_list.append(row_dict) + + local_anomaly = Anomaly("anomaly/5143a51a37203f2cf7027551") + scored_data_list = local_anomaly.batch_predict(input_data_list) + +Or if we are using a Pandas' ``DataFrame`` instead to read the data, we could +also use the DataFrame directly as input argument: + +.. code-block:: python + + import pandas as pd + + from bigml.anomaly import Anomaly + dataframe = pd.read_csv("my_input_data.csv") + + local_anomaly = Anomaly("anomaly/5143a51a37203f2cf7027551") + scored_dataframe = local_anomaly.batch_predict(dataframe) + +Now, let's add some complexity and do use a supervised model. We'd like to +add both the predicted value and the associated probability but we'd like +to use an ``operating point`` when predicting. The operating point needs +specifying a positive class, the kind of metric to compare (probabily or +confidence) and the threshold to use. We also want the prediction to +be added to the input data using the key ``sm_prediction``. In this case, the +code would be similar to + +.. code-block:: python + + import pandas as pd + + from bigml.supervised import SupervisedModel + dataframe = pd.read_csv("my_input_data.csv") + + local_supervised = SupervisedModel("ensemble/5143a51a37203f2cf7027551") + operating_point = {"positive_class": "yes", + "kind": "probability", + "threshold": 0.7} + predicted_dataframe = local_supervised.batch_predict( + dataframe, + outputs={"output_headers": ["sm_prediction", "probability"]}, + operating_point=operating_point) + +and the result would be like the one below: + +.. code-block:: python + + >>>predicted_dataframe + pregnancies plasma glucose ... sm_prediction probability + 0 6 148 ... true 0.95917 + 1 1 85 ... false 0.99538 + 2 8 183 ... true 0.93701 + 3 1 89 ... false 0.99452 + 4 0 137 ... true 0.90622 + .. ... ... ... ... ... + 195 1 117 ... false 0.90906 + 196 5 123 ... false 0.97179 + 197 2 120 ... false 0.99300 + 198 1 106 ... false 0.99452 + 199 2 155 ... false 0.51737 + + [200 rows x 11 columns] + + +Local Shap Wrapper +------------------ + +The Shap library accepts customized predict functions as long as they provide +a particular input/output interface that uses numpy arrays. The previously +described local models can be used to generate such an predict funcion. +The ``ShapWrapper`` class has been created to help users connect the +Shap library to BigML supervised models and provides the ``.predict`` and +``.predict_proba`` functions especially built to be used with that libary. + +.. code-block:: python + + from bigml.shapwrapper import ShapWrapper + shap_wrapper = ShapWrapper("model/5143a51a37203f2cf7027551") + # computing the Explainer on the X_test numpy array + explainer = shap.Explainer(shap_wrapper.predict, + X_test, algorithm='partition', + feature_names=shap_wrapper.x_headers) + shap_values = explainer(X_test) + + +Local predictions with shared models +------------------------------------ + +BigML's resources are private to the owner of the account where they were +created. However, owners can decide to share their resources with other +BigML users by creating +`Secret links `_ +to them. The users that receive the link, will be able to inspect the +resource and can also download them. This is specially important in the case +of models, as they will be able to generate local predictions from them. + +The ``Secret link`` URLs leading to shared resources end in a shared ID +(starting with the string ``shared/`` followed by the type of resource and +the particular sharing key). In order to use them locally, use this +string as first argument for the local model constructor. For instance, let's +say that someone shares with you the link to a shared ensemble +``https://bigml.com/shared/ensemble/qbXem5XoEiVKcq8MPmwjHnXunFj``. + +You could use that in local predictions by instantiating the corresponding +``Ensemble`` object. + +.. code-block:: python + + from bigml.ensemble import Ensemble + local_ensemble = Ensemble("shared/ensemble/qbXem5XoEiVKcq8MPmwjHnXunFj") + +And the new ``local_ensemble`` would be ready to predict using the ``.predict`` +method, as discussed in the `Local Ensembles <#Local-Ensembles>`_ section. + + +Local caching +------------- + +All local models can use an external cache system to manage memory storage and +recovery. The ``get`` and ``set`` functions of the cache manager should be +passed to the constructor or ``dump`` function. Here's an example on how to +cache a linear regression: + +.. code-block:: python + + from bigml.linear import LinearRegression + lm = LinearRegression("linearregression/5e827ff85299630d22007198") + lm.predict({"petal length": 4, "sepal length":4, "petal width": 4, \ + "sepal width": 4, "species": "Iris-setosa"}, full=True) + import redis + r = redis.Redis() + # First build as you would any core LinearRegression object: + # Store a serialized version in Redis + lm.dump(cache_set=r.set) + # (retrieve the external rep from its convenient place) + # Speedy Build from external rep + lm = LinearRegression("linearregression/5e827ff85299630d22007198", \ + cache_get=r.get) + # Get predictions same as always: + lm.predict({"petal length": 4, "sepal length":4, "petal width": 4, \ + "sepal width": 4, "species": "Iris-setosa"}, full=True) + + +Rule Generation +--------------- + +You can also use a local model to generate a IF-THEN rule set that can be very +helpful to understand how the model works internally. + +.. code-block:: python + + local_model.rules() + IF petal_length > 2.45 AND + IF petal_width > 1.65 AND + IF petal_length > 5.05 THEN + species = Iris-virginica + IF petal_length <= 5.05 AND + IF sepal_width > 2.9 AND + IF sepal_length > 5.95 AND + IF petal_length > 4.95 THEN + species = Iris-versicolor + IF petal_length <= 4.95 THEN + species = Iris-virginica + IF sepal_length <= 5.95 THEN + species = Iris-versicolor + IF sepal_width <= 2.9 THEN + species = Iris-virginica + IF petal_width <= 1.65 AND + IF petal_length > 4.95 AND + IF sepal_length > 6.05 THEN + species = Iris-virginica + IF sepal_length <= 6.05 AND + IF sepal_width > 2.45 THEN + species = Iris-versicolor + IF sepal_width <= 2.45 THEN + species = Iris-virginica + IF petal_length <= 4.95 THEN + species = Iris-versicolor + IF petal_length <= 2.45 THEN + species = Iris-setosa + + +Python, Tableau and Hadoop-ready Generation +------------------------------------------- + +If you prefer, you can also generate a Python function that implements the model +and that can be useful to make the model actionable right away with ``local_model.python()``. + +.. code-block:: python + + local_model.python() + def predict_species(sepal_length=None, + sepal_width=None, + petal_length=None, + petal_width=None): + """ Predictor for species from model/50a8e2d9eabcb404d2000293 + + Predictive model by BigML - Machine Learning Made Easy + """ + if (petal_length is None): + return 'Iris-virginica' + if (petal_length <= 2.45): + return 'Iris-setosa' + if (petal_length > 2.45): + if (petal_width is None): + return 'Iris-virginica' + if (petal_width <= 1.65): + if (petal_length <= 4.95): + return 'Iris-versicolor' + if (petal_length > 4.95): + if (sepal_length is None): + return 'Iris-virginica' + if (sepal_length <= 6.05): + if (petal_width <= 1.55): + return 'Iris-virginica' + if (petal_width > 1.55): + return 'Iris-versicolor' + if (sepal_length > 6.05): + return 'Iris-virginica' + if (petal_width > 1.65): + if (petal_length <= 5.05): + if (sepal_width is None): + return 'Iris-virginica' + if (sepal_width <= 2.9): + return 'Iris-virginica' + if (sepal_width > 2.9): + if (sepal_length is None): + return 'Iris-virginica' + if (sepal_length <= 6.4): + if (sepal_length <= 5.95): + return 'Iris-versicolor' + if (sepal_length > 5.95): + return 'Iris-virginica' + if (sepal_length > 6.4): + return 'Iris-versicolor' + if (petal_length > 5.05): + return 'Iris-virginica' + +The ``local.python(hadoop=True)`` call will generate the code that you need +for the Hadoop map-reduce engine to produce batch predictions using `Hadoop +streaming `_ . +Saving the mapper and reducer generated functions in their corresponding files +(let's say ``/home/hduser/hadoop_mapper.py`` and +``/home/hduser/hadoop_reducer.py``) you can start a Hadoop job +to generate predictions by issuing +the following Hadoop command in your system console: + +.. code-block:: bash + + bin/hadoop jar contrib/streaming/hadoop-*streaming*.jar \ + -file /home/hduser/hadoop_mapper.py -mapper hadoop_mapper.py \ + -file /home/hduser/hadoop_reducer.py -reducer hadoop_reducer.py \ + -input /home/hduser/hadoop/input.csv \ + -output /home/hduser/hadoop/output_dir + +assuming you are in the Hadoop home directory, your input file is in the +corresponding dfs directory +(``/home/hduser/hadoop/input.csv`` in this example) and the output will +be placed at ``/home/hduser/hadoop/output_dir`` (inside the dfs directory). + +Tableau-ready rules are also available through ``local_model.tableau()`` for +all the models except those that use text predictors. + +.. code-block:: python + + local_model.tableau() + IF ISNULL([petal width]) THEN 'Iris-virginica' + ELSEIF [petal width]>0.8 AND [petal width]>1.75 AND ISNULL([petal length]) THEN 'Iris-virginica' + ELSEIF [petal width]>0.8 AND [petal width]>1.75 AND [petal length]>4.85 THEN 'Iris-virginica' + ELSEIF [petal width]>0.8 AND [petal width]>1.75 AND [petal length]<=4.85 AND ISNULL([sepal width]) THEN 'Iris-virginica' + ELSEIF [petal width]>0.8 AND [petal width]>1.75 AND [petal length]<=4.85 AND [sepal width]>3.1 THEN 'Iris-versicolor' + ELSEIF [petal width]>0.8 AND [petal width]>1.75 AND [petal length]<=4.85 AND [sepal width]<=3.1 THEN 'Iris-virginica' + ELSEIF [petal width]>0.8 AND [petal width]<=1.75 AND ISNULL([petal length]) THEN 'Iris-versicolor' + ELSEIF [petal width]>0.8 AND [petal width]<=1.75 AND [petal length]>4.95 AND [petal width]>1.55 AND [petal length]>5.45 THEN 'Iris-virginica' + ELSEIF [petal width]>0.8 AND [petal width]<=1.75 AND [petal length]>4.95 AND [petal width]>1.55 AND [petal length]<=5.45 THEN 'Iris-versicolor' + ELSEIF [petal width]>0.8 AND [petal width]<=1.75 AND [petal length]>4.95 AND [petal width]<=1.55 THEN 'Iris-virginica' + ELSEIF [petal width]>0.8 AND [petal width]<=1.75 AND [petal length]<=4.95 AND [petal width]>1.65 THEN 'Iris-virginica' + ELSEIF [petal width]>0.8 AND [petal width]<=1.75 AND [petal length]<=4.95 AND [petal width]<=1.65 THEN 'Iris-versicolor' + ELSEIF [petal width]<=0.8 THEN 'Iris-setosa' + END + + +Summary generation +------------------ + +You can also print the model from the point of view of the classes it predicts +with ``local_model.summarize()``. +It shows a header section with the training data initial distribution per class +(instances and percentage) and the final predicted distribution per class. + +Then each class distribution is detailed. First a header section +shows the percentage of the total data that belongs to the class (in the +training set and in the predicted results) and the rules applicable to +all the +the instances of that class (if any). Just after that, a detail section shows +each of the leaves in which the class members are distributed. +They are sorted in descending +order by the percentage of predictions of the class that fall into that leaf +and also show the full rule chain that leads to it. + +:: + + Data distribution: + Iris-setosa: 33.33% (50 instances) + Iris-versicolor: 33.33% (50 instances) + Iris-virginica: 33.33% (50 instances) + + + Predicted distribution: + Iris-setosa: 33.33% (50 instances) + Iris-versicolor: 33.33% (50 instances) + Iris-virginica: 33.33% (50 instances) + + + Field importance: + 1. petal length: 53.16% + 2. petal width: 46.33% + 3. sepal length: 0.51% + 4. sepal width: 0.00% + + + Iris-setosa : (data 33.33% / prediction 33.33%) petal length <= 2.45 + · 100.00%: petal length <= 2.45 [Confidence: 92.86%] + + + Iris-versicolor : (data 33.33% / prediction 33.33%) petal length > 2.45 + · 94.00%: petal length > 2.45 and petal width <= 1.65 and petal length <= 4.95 [Confidence: 92.44%] + · 2.00%: petal length > 2.45 and petal width <= 1.65 and petal length > 4.95 and sepal length <= 6.05 and petal width > 1.55 [Confidence: 20.65%] + · 2.00%: petal length > 2.45 and petal width > 1.65 and petal length <= 5.05 and sepal width > 2.9 and sepal length > 6.4 [Confidence: 20.65%] + · 2.00%: petal length > 2.45 and petal width > 1.65 and petal length <= 5.05 and sepal width > 2.9 and sepal length <= 6.4 and sepal length <= 5.95 [Confidence: 20.65%] + + + Iris-virginica : (data 33.33% / prediction 33.33%) petal length > 2.45 + · 76.00%: petal length > 2.45 and petal width > 1.65 and petal length > 5.05 [Confidence: 90.82%] + · 12.00%: petal length > 2.45 and petal width > 1.65 and petal length <= 5.05 and sepal width <= 2.9 [Confidence: 60.97%] + · 6.00%: petal length > 2.45 and petal width <= 1.65 and petal length > 4.95 and sepal length > 6.05 [Confidence: 43.85%] + · 4.00%: petal length > 2.45 and petal width > 1.65 and petal length <= 5.05 and sepal width > 2.9 and sepal length <= 6.4 and sepal length > 5.95 [Confidence: 34.24%] + · 2.00%: petal length > 2.45 and petal width <= 1.65 and petal length > 4.95 and sepal length <= 6.05 and petal width <= 1.55 [Confidence: 20.65%] + + +You can also use ``local_model.get_data_distribution()`` and +``local_model.get_prediction_distribution()`` to obtain the training and +prediction basic distribution +information as a list (suitable to draw histograms or any further processing). +The tree nodes' information (prediction, confidence, impurity and distribution) +can also be retrieved in a CSV format using the method +``local_model.tree_CSV()``. The output can be sent to a file by providing a +``file_name`` argument or used as a list. + +Local ensembles have a ``local_ensemble.summarize()`` method too, the output +in this case shows only the data distribution (only available in +``Decision Forests``) and field importance sections. + +For local clusters, the ``local_cluster.summarize()`` method prints also the +data distribution, the training data statistics per cluster and the basic +intercentroid distance statistics. There's also a +``local_cluster.statistics_CSV(file_name)`` method that store in a CSV format +the values shown by the ``summarize()`` method. If no file name is provided, +the function returns the rows that would have been stored in the file as +a list. diff --git a/docs/ml_resources.rst b/docs/ml_resources.rst new file mode 100644 index 00000000..45ba0020 --- /dev/null +++ b/docs/ml_resources.rst @@ -0,0 +1,3880 @@ +.. toctree:: + :hidden: + +ML Resources +============ + +This section describes the resources available in the BigML API. When retrieved +with the corresponding bindings ``get_[resource_type]`` method, they will +some common attributes, like: + +- ``resource`` which contains their ID +- ``category`` which can be set to the list of categories as defined in the + API documentation. +- ``creator`` which refers to the creator username. + +To name some. + +Beside, every resource type will have different properties as required +by its nature, that can be checked in the +`API documentation +`_. Here's a list of the different +resource types and their associated structures and properties. + +Data Ingestion and Preparation +------------------------------ + +External Connectors +~~~~~~~~~~~~~~~~~~~ + +The ``Externalconnector`` object is is an abstract resource that helps +you create ``Sources`` from several external data sources +like relational databases or ElasticSearch engines. This is not strictly +a Machine Learning resource, but a helper to connect your data repos to BigML. + +.. code-block:: python + + >>> external_connector = api.get_external_connector( \ + "externalconnector/5e30b685e476845dd901df83") + +You can check the external connector properties at the `API documentation +`_. + +Source +~~~~~~ + +The ``Source`` is the first resource that you build in BigML when uploading +a file. BigML infers the structure of the file, whether it has headers or not, +the column separator or the field types and names and stores the results in +the ``Source`` information: + +.. code-block:: python + + >>> source = api.get_source("source/5e30b685e476845dd901df83") + >>> api.pprint(source["object"]) + { 'category': 0, + 'charset': 'UTF-8', + 'code': 200, + 'configuration': None, + 'configuration_status': False, + 'content_type': 'text/plain;UTF-8', + 'created': '2020-01-28T22:32:37.290000', + 'creator': 'mmartin', + 'credits': 0, + 'description': '', + 'disable_datetime': False, + 'field_types': { 'categorical': 0, + 'datetime': 0, + 'items': 0, + 'numeric': 4, + 'text': 1, + 'total': 5}, + 'fields': { '000000': { 'column_number': 0, + 'name': 'sepal length', + 'optype': 'numeric', + 'order': 0}, + '000001': { 'column_number': 1, + 'name': 'sepal width', + 'optype': 'numeric', + 'order': 1}, + '000002': { 'column_number': 2, + 'name': 'petal length', + 'optype': 'numeric', + 'order': 2}, + '000003': { 'column_number': 3, + 'name': 'petal width', + 'optype': 'numeric', + 'order': 3}, + '000004': { 'column_number': 4, + 'name': 'species', + 'optype': 'text', + 'order': 4, + 'term_analysis': { 'enabled': True}}}, + 'fields_meta': { 'count': 5, + 'image': 0, + 'limit': 1000, + 'offset': 0, + 'query_total': 5, + 'total': 5}, + ... + } + +You can check the source properties at the `API documentation +`_. + +Dataset +~~~~~~~ + +If you want to get some basic statistics for each field you can retrieve +the ``fields`` from the dataset as follows to get a dictionary keyed by +field id: + +.. code-block:: python + + >>> dataset = api.get_dataset(dataset) + >>> api.pprint(api.get_fields(dataset)) + { '000000': { 'column_number': 0, + 'datatype': 'double', + 'name': 'sepal length', + 'optype': 'numeric', + 'summary': { 'maximum': 7.9, + 'median': 5.77889, + 'minimum': 4.3, + 'missing_count': 0, + 'population': 150, + 'splits': [ 4.51526, + 4.67252, + 4.81113, + + [... snip ... ] + + + '000004': { 'column_number': 4, + 'datatype': 'string', + 'name': 'species', + 'optype': 'categorical', + 'summary': { 'categories': [ [ 'Iris-versicolor', + 50], + ['Iris-setosa', 50], + [ 'Iris-virginica', + 50]], + 'missing_count': 0}}} + + +The field filtering options are also available using a query string expression, +for instance: + +.. code-block:: python + + >>> dataset = api.get_dataset(dataset, "limit=20") + +limits the number of fields that will be included in ``dataset`` to 20. + +You can check the dataset properties at the `API documentation +`_. + +Samples +~~~~~~~ + +To provide quick access to your row data you can create a ``sample``. Samples +are in-memory objects that can be queried for subsets of data by limiting +their size, the fields or the rows returned. The structure of a sample would +be: + + +.. code-block:: python + + >>> from bigml.api import BigML + >>> api = BigML() + >>> sample = api.create_sample('dataset/55b7a6749841fa2500000d41', + {"max_rows": 150}) + >>> api.ok(sample) + >>> api.pprint(sample['object']) + { + "category": 0, + "code": 201, + "columns": 0, + "configuration": null, + "configuration_status": false, + "created": "2021-03-02T14:32:59.603699", + "creator": "alfred", + "dataset": "dataset/603e20a91f386f43db000004", + "dataset_status": true, + "description": "", + "excluded_fields": [], + "fields_meta": { + "count": 0, + "limit": 1000, + "offset": 0, + "total": 0 + }, + "input_fields": [ + "000000", + "000001", + "000002", + "000003", + "000004" + ], + "locale": "en_US", + "max_columns": 0, + "max_rows": 150, + "name": "iris", + "name_options": "", + "private": true, + "project": null, + "resource": "sample/603e4c9b1f386fdea6000000", + "rows": 0, + "seed": "d1dc0a2819344a079af521507b7e7ea8", + "shared": false, + "size": 4608, + "status": { + "code": 1, + "message": "The sample creation request has been queued and will be processed soon", + "progress": 0 + }, + "subscription": true, + "tags": [], + "type": 0, + "updated": "2021-03-02T14:32:59.603751" + } + + +Samples are not permanent objects. Once they are created, they will be +available as long as GETs are requested within periods smaller than +a pre-established TTL (Time to Live). The expiration timer of a sample is +reset every time a new GET is received. + +If requested, a sample can also perform linear regression and compute +Pearson's and Spearman's correlations for either one numeric field +against all other numeric fields or between two specific numeric fields. + +You can check the sample properties at the `API documentation +`_. + +Correlations +~~~~~~~~~~~~ + +A ``correlation`` resource contains a series of computations that reflect the +degree of dependence between the field set as objective for your predictions +and the rest of fields in your dataset. The dependence degree is obtained by +comparing the distributions in every objective and non-objective field pair, +as independent fields should have probabilistic +independent distributions. Depending on the types of the fields to compare, +the metrics used to compute the correlation degree will be: + +- for numeric to numeric pairs: + `Pearson's `_ + and `Spearman's correlation `_ + coefficients. +- for numeric to categorical pairs: + `One-way Analysis of Variance `_, with the + categorical field as the predictor variable. +- for categorical to categorical pairs: + `contingency table (or two-way table) `_, + `Chi-square test of independence `_ + , and `Cramer's V `_ + and `Tschuprow's T `_ coefficients. + +An example of the correlation resource JSON structure is: + +.. code-block:: python + + >>> from bigml.api import BigML + >>> api = BigML() + >>> correlation = api.create_correlation('dataset/55b7a6749841fa2500000d41') + >>> api.ok(correlation) + >>> api.pprint(correlation['object']) + { 'category': 0, + 'clones': 0, + 'code': 200, + 'columns': 5, + 'correlations': { 'correlations': [ { 'name': 'one_way_anova', + 'result': { '000000': { 'eta_square': 0.61871, + 'f_ratio': 119.2645, + 'p_value': 0, + 'significant': [ True, + True, + True]}, + '000001': { 'eta_square': 0.40078, + 'f_ratio': 49.16004, + 'p_value': 0, + 'significant': [ True, + True, + True]}, + '000002': { 'eta_square': 0.94137, + 'f_ratio': 1180.16118, + 'p_value': 0, + 'significant': [ True, + True, + True]}, + '000003': { 'eta_square': 0.92888, + 'f_ratio': 960.00715, + 'p_value': 0, + 'significant': [ True, + True, + True]}}}], + 'fields': { '000000': { 'column_number': 0, + 'datatype': 'double', + 'idx': 0, + 'name': 'sepal length', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'bins': [ [ 4.3, + 1], + [ 4.425, + 4], + ... + [ 7.9, + 1]], + 'kurtosis': -0.57357, + 'maximum': 7.9, + 'mean': 5.84333, + 'median': 5.8, + 'minimum': 4.3, + 'missing_count': 0, + 'population': 150, + 'skewness': 0.31175, + 'splits': [ 4.51526, + 4.67252, + 4.81113, + 4.89582, + 4.96139, + 5.01131, + ... + 6.92597, + 7.20423, + 7.64746], + 'standard_deviation': 0.82807, + 'sum': 876.5, + 'sum_squares': 5223.85, + 'variance': 0.68569}}, + '000001': { 'column_number': 1, + 'datatype': 'double', + 'idx': 1, + 'name': 'sepal width', + 'optype': 'numeric', + 'order': 1, + 'preferred': True, + 'summary': { 'counts': [ [ 2, + 1], + [ 2.2, + ... + '000004': { 'column_number': 4, + 'datatype': 'string', + 'idx': 4, + 'name': 'species', + 'optype': 'categorical', + 'order': 4, + 'preferred': True, + 'summary': { 'categories': [ [ 'Iris-setosa', + 50], + [ 'Iris-versicolor', + 50], + [ 'Iris-virginica', + 50]], + 'missing_count': 0}, + 'term_analysis': { 'enabled': True}}}, + 'significance_levels': [0.01, 0.05, 0.1]}, + 'created': '2015-07-28T18:07:37.010000', + 'credits': 0.017581939697265625, + 'dataset': 'dataset/55b7a6749841fa2500000d41', + 'dataset_status': True, + 'dataset_type': 0, + 'description': '', + 'excluded_fields': [], + 'fields_meta': { 'count': 5, + 'limit': 1000, + 'offset': 0, + 'query_total': 5, + 'total': 5}, + 'input_fields': ['000000', '000001', '000002', '000003'], + 'locale': 'en_US', + 'max_columns': 5, + 'max_rows': 150, + 'name': u"iris' dataset correlation", + 'objective_field_details': { 'column_number': 4, + 'datatype': 'string', + 'name': 'species', + 'optype': 'categorical', + 'order': 4}, + 'out_of_bag': False, + 'price': 0.0, + 'private': True, + 'project': None, + 'range': [1, 150], + 'replacement': False, + 'resource': 'correlation/55b7c4e99841fa24f20009bf', + 'rows': 150, + 'sample_rate': 1.0, + 'shared': False, + 'size': 4609, + 'source': 'source/55b7a6729841fa24f100036a', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 274, + 'message': 'The correlation has been created', + 'progress': 1.0}, + 'subscription': True, + 'tags': [], + 'updated': '2015-07-28T18:07:49.057000', + 'white_box': False} + +Note that the output in the snippet above has been abbreviated. As you see, the +``correlations`` attribute contains the information about each field +correlation to the objective field. + +You can check the correlations properties at the `API documentation +`_. + + +Statistical Tests +~~~~~~~~~~~~~~~~~ + +A ``statisticaltest`` resource contains a series of tests +that compare the +distribution of data in each numeric field of a dataset +to certain canonical distributions, +such as the +`normal distribution `_ +or `Benford's law `_ +distribution. Statistical test are useful in tasks such as fraud, normality, +or outlier detection. + +- Fraud Detection Tests: +Benford: This statistical test performs a comparison of the distribution of +first significant digits (FSDs) of each value of the field to the Benford's +law distribution. Benford's law applies to numerical distributions spanning +several orders of magnitude, such as the values found on financial balance +sheets. It states that the frequency distribution of leading, or first +significant digits (FSD) in such distributions is not uniform. +On the contrary, lower digits like 1 and 2 occur disproportionately +often as leading significant digits. The test compares the distribution +in the field to Bendford's distribution using a Chi-square goodness-of-fit +test, and Cho-Gaines d test. If a field has a dissimilar distribution, +it may contain anomalous or fraudulent values. + +- Normality tests: +These tests can be used to confirm the assumption that the data in each field +of a dataset is distributed according to a normal distribution. The results +are relevant because many statistical and machine learning techniques rely on +this assumption. +Anderson-Darling: The Anderson-Darling test computes a test statistic based on +the difference between the observed cumulative distribution function (CDF) to +that of a normal distribution. A significant result indicates that the +assumption of normality is rejected. +Jarque-Bera: The Jarque-Bera test computes a test statistic based on the third +and fourth central moments (skewness and kurtosis) of the data. Again, a +significant result indicates that the normality assumption is rejected. +Z-score: For a given sample size, the maximum deviation from the mean that +would expected in a sampling of a normal distribution can be computed based +on the 68-95-99.7 rule. This test simply reports this expected deviation and +the actual deviation observed in the data, as a sort of sanity check. + +- Outlier tests: +Grubbs: When the values of a field are normally distributed, a few values may +still deviate from the mean distribution. The outlier tests reports whether +at least one value in each numeric field differs significantly from the mean +using Grubb's test for outliers. If an outlier is found, then its value will +be returned. + +The JSON structure for ``statisticaltest`` resources is similar to this one: + +.. code-block:: python + + >>> statistical_test = api.create_statistical_test('dataset/55b7a6749841fa2500000d41') + >>> api.ok(statistical_test) + True + >>> api.pprint(statistical_test['object']) + { 'category': 0, + 'clones': 0, + 'code': 200, + 'columns': 5, + 'created': '2015-07-28T18:16:40.582000', + 'credits': 0.017581939697265625, + 'dataset': 'dataset/55b7a6749841fa2500000d41', + 'dataset_status': True, + 'dataset_type': 0, + 'description': '', + 'excluded_fields': [], + 'fields_meta': { 'count': 5, + 'limit': 1000, + 'offset': 0, + 'query_total': 5, + 'total': 5}, + 'input_fields': ['000000', '000001', '000002', '000003'], + 'locale': 'en_US', + 'max_columns': 5, + 'max_rows': 150, + 'name': u"iris' dataset test", + 'out_of_bag': False, + 'price': 0.0, + 'private': True, + 'project': None, + 'range': [1, 150], + 'replacement': False, + 'resource': 'statisticaltest/55b7c7089841fa25000010ad', + 'rows': 150, + 'sample_rate': 1.0, + 'shared': False, + 'size': 4609, + 'source': 'source/55b7a6729841fa24f100036a', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 302, + 'message': 'The test has been created', + 'progress': 1.0}, + 'subscription': True, + 'tags': [], + 'statistical_tests': { 'ad_sample_size': 1024, + 'fields': { '000000': { 'column_number': 0, + 'datatype': 'double', + 'idx': 0, + 'name': 'sepal length', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'bins': [ [ 4.3, + 1], + [ 4.425, + 4], + ... + [ 7.9, + 1]], + 'kurtosis': -0.57357, + 'maximum': 7.9, + 'mean': 5.84333, + 'median': 5.8, + 'minimum': 4.3, + 'missing_count': 0, + 'population': 150, + 'skewness': 0.31175, + 'splits': [ 4.51526, + 4.67252, + 4.81113, + 4.89582, + ... + 7.20423, + 7.64746], + 'standard_deviation': 0.82807, + 'sum': 876.5, + 'sum_squares': 5223.85, + 'variance': 0.68569}}, + ... + '000004': { 'column_number': 4, + 'datatype': 'string', + 'idx': 4, + 'name': 'species', + 'optype': 'categorical', + 'order': 4, + 'preferred': True, + 'summary': { 'categories': [ [ 'Iris-setosa', + 50], + [ 'Iris-versicolor', + 50], + [ 'Iris-virginica', + 50]], + 'missing_count': 0}, + 'term_analysis': { 'enabled': True}}}, + 'fraud': [ { 'name': 'benford', + 'result': { '000000': { 'chi_square': { 'chi_square_value': 506.39302, + 'p_value': 0, + 'significant': [ True, + True, + True]}, + 'cho_gaines': { 'd_statistic': 7.124311073683573, + 'significant': [ True, + True, + True]}, + 'distribution': [ 0, + 0, + 0, + 22, + 61, + 54, + 13, + 0, + 0], + 'negatives': 0, + 'zeros': 0}, + '000001': { 'chi_square': { 'chi_square_value': 396.76556, + 'p_value': 0, + 'significant': [ True, + True, + True]}, + 'cho_gaines': { 'd_statistic': 7.503503138331123, + 'significant': [ True, + True, + True]}, + 'distribution': [ 0, + 57, + 89, + 4, + 0, + 0, + 0, + 0, + 0], + 'negatives': 0, + 'zeros': 0}, + '000002': { 'chi_square': { 'chi_square_value': 154.20728, + 'p_value': 0, + 'significant': [ True, + True, + True]}, + 'cho_gaines': { 'd_statistic': 3.9229974017266054, + 'significant': [ True, + True, + True]}, + 'distribution': [ 50, + 0, + 11, + 43, + 35, + 11, + 0, + 0, + 0], + 'negatives': 0, + 'zeros': 0}, + '000003': { 'chi_square': { 'chi_square_value': 111.4438, + 'p_value': 0, + 'significant': [ True, + True, + True]}, + 'cho_gaines': { 'd_statistic': 4.103257341299901, + 'significant': [ True, + True, + True]}, + 'distribution': [ 76, + 58, + 7, + 7, + 1, + 1, + 0, + 0, + 0], + 'negatives': 0, + 'zeros': 0}}}], + 'normality': [ { 'name': 'anderson_darling', + 'result': { '000000': { 'p_value': 0.02252, + 'significant': [ False, + True, + True]}, + '000001': { 'p_value': 0.02023, + 'significant': [ False, + True, + True]}, + '000002': { 'p_value': 0, + 'significant': [ True, + True, + True]}, + '000003': { 'p_value': 0, + 'significant': [ True, + True, + True]}}}, + { 'name': 'jarque_bera', + 'result': { '000000': { 'p_value': 0.10615, + 'significant': [ False, + False, + False]}, + '000001': { 'p_value': 0.25957, + 'significant': [ False, + False, + False]}, + '000002': { 'p_value': 0.0009, + 'significant': [ True, + True, + True]}, + '000003': { 'p_value': 0.00332, + 'significant': [ True, + True, + True]}}}, + { 'name': 'z_score', + 'result': { '000000': { 'expected_max_z': 2.71305, + 'max_z': 2.48369}, + '000001': { 'expected_max_z': 2.71305, + 'max_z': 3.08044}, + '000002': { 'expected_max_z': 2.71305, + 'max_z': 1.77987}, + '000003': { 'expected_max_z': 2.71305, + 'max_z': 1.70638}}}], + 'outliers': [ { 'name': 'grubbs', + 'result': { '000000': { 'p_value': 1, + 'significant': [ False, + False, + False]}, + '000001': { 'p_value': 0.26555, + 'significant': [ False, + False, + False]}, + '000002': { 'p_value': 1, + 'significant': [ False, + False, + False]}, + '000003': { 'p_value': 1, + 'significant': [ False, + False, + False]}}}], + 'significance_levels': [0.01, 0.05, 0.1]}, + 'updated': '2015-07-28T18:17:11.829000', + 'white_box': False} + +Note that the output in the snippet above has been abbreviated. As you see, the +``statistical_tests`` attribute contains the ``fraud`, ``normality`` +and ``outliers`` +sections where the information for each field's distribution is stored. + +You can check the statistical tests properties at the `API documentation +`_. + + +Supervised Models +----------------- + +Model +~~~~~ + +One of the greatest things about BigML is that the models that it +generates for you are fully white-boxed. To get the explicit tree-like +predictive model for the example above: + +.. code-block:: python + + >>> model = api.get_model(model) + >>> api.pprint(model['object']['model']['root']) + {'children': [ + {'children': [ + {'children': [{'count': 38, + 'distribution': [['Iris-virginica', 38]], + 'output': 'Iris-virginica', + 'predicate': {'field': '000002', + 'operator': '>', + 'value': 5.05}}, + 'children': [ + + [ ... ] + + {'count': 50, + 'distribution': [['Iris-setosa', 50]], + 'output': 'Iris-setosa', + 'predicate': {'field': '000002', + 'operator': '<=', + 'value': 2.45}}]}, + {'count': 150, + 'distribution': [['Iris-virginica', 50], + ['Iris-versicolor', 50], + ['Iris-setosa', 50]], + 'output': 'Iris-virginica', + 'predicate': True}]}}} + +(Note that we have abbreviated the output in the snippet above for +readability: the full predictive model yo'll get is going to contain +much more details). + +Again, filtering options are also available using a query string expression, +for instance: + +.. code-block:: python + + >>> model = api.get_model(model, "limit=5") + +limits the number of fields that will be included in ``model`` to 5. + +You can check the model properties at the `API documentation +`_. + + +Linear Regressions +~~~~~~~~~~~~~~~~~~ + +A linear regression is a supervised machine learning method for +solving regression problems by computing the objective as a linear +combination of factors. The implementation is a multiple linear regression +that models the output as a linear combination of the predictors. +The coefficients are estimated doing a least-squares fit on the training data. + +As a linear combination can only be done using numeric values, non-numeric +fields need to be transformed to numeric ones following some rules: + +- Categorical fields will be encoded and each class appearance in input data + will convey a different contribution to the input vector. +- Text and items fields will be expanded to several numeric predictors, + each one indicating the number of occurences for a specific term. + Text fields without term analysis are excluded from the model. + +Therefore, the initial input data is transformed into an input vector with one +or may components per field. Also, if a field in the training data contains +missing data, the components corresponding to that field will include an +additional 1 or 0 value depending on whether the field is missing in the +input data or not. + +The JSON structure for a linear regression is: + +.. code-block:: python + + >>> api.pprint(linear_regression["object"]) + { 'category': 0, + 'code': 200, + 'columns': 4, + 'composites': None, + 'configuration': None, + 'configuration_status': False, + 'created': '2019-02-20T21:02:40.027000', + 'creator': 'merce', + 'credits': 0.0, + 'credits_per_prediction': 0.0, + 'dataset': 'dataset/5c6dc06a983efc18e2000084', + 'dataset_field_types': { 'categorical': 0, + 'datetime': 0, + 'items': 0, + 'numeric': 6, + 'preferred': 6, + 'text': 0, + 'total': 6}, + 'dataset_status': True, + 'datasets': [], + 'default_numeric_value': None, + 'description': '', + 'excluded_fields': [], + 'execution_id': None, + 'execution_status': None, + 'fields_maps': None, + 'fields_meta': { 'count': 4, + 'limit': 1000, + 'offset': 0, + 'query_total': 4, + 'total': 4}, + 'fusions': None, + 'input_fields': ['000000', '000001', '000002'], + 'linear_regression': { 'bias': True, + 'coefficients': [ [-1.88196], + [0.475633], + [0.122468], + [30.9141]], + 'fields': { '000000': { 'column_number': 0, + 'datatype': 'int8', + 'name': 'Prefix', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'counts': [ [ 4, + 1], + + ... + 'stats': { 'confidence_intervals': [ [ 5.63628], + [ 0.375062], + [ 0.348577], + [ 44.4112]], + 'mean_squared_error': 342.206, + 'number_of_parameters': 4, + 'number_of_samples': 77, + 'p_values': [ [0.512831], + [0.0129362], + [0.491069], + [0.172471]], + 'r_squared': 0.136672, + 'standard_errors': [ [ 2.87571], + [ 0.191361], + [ 0.177849], + [ 22.6592]], + 'sum_squared_errors': 24981, + 'xtx': [ [ 4242, + 48396.9, + 51273.97, + 568], + [ 48396.9, + 570177.6584, + 594274.3274, + 6550.52], + [ 51273.97, + 594274.3274, + 635452.7068, + 6894.24], + [ 568, + 6550.52, + 6894.24, + 77]], + 'z_scores': [ [-0.654436], + [2.48552], + [0.688609], + [1.36431]]}}, + 'locale': 'en_US', + 'max_columns': 6, + 'max_rows': 80, + 'name': 'grades', + 'name_options': 'bias', + 'number_of_batchpredictions': 0, + 'number_of_evaluations': 0, + 'number_of_predictions': 2, + 'number_of_public_predictions': 0, + 'objective_field': '000005', + 'objective_field_name': 'Final', + 'objective_field_type': 'numeric', + 'objective_fields': ['000005'], + 'operating_point': { }, + 'optiml': None, + 'optiml_status': False, + 'ordering': 0, + 'out_of_bag': False, + 'out_of_bags': None, + 'price': 0.0, + 'private': True, + 'project': 'project/5c6dc062983efc18d5000129', + 'range': None, + 'ranges': None, + 'replacement': False, + 'replacements': None, + 'resource': 'linearregression/5c6dc070983efc18e00001f1', + 'rows': 80, + 'sample_rate': 1.0, + 'sample_rates': None, + 'seed': None, + 'seeds': None, + 'shared': False, + 'size': 2691, + 'source': 'source/5c6dc064983efc18e00001ed', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 62086, + 'message': 'The linear regression has been created', + 'progress': 1}, + 'subscription': True, + 'tags': [], + 'type': 0, + 'updated': '2019-02-27T18:01:18.539000', + 'user_metadata': { }, + 'webhook': None, + 'weight_field': None, + 'white_box': False} + +Note that the output in the snippet above has been abbreviated. As you see, +the ``linear_regression`` attribute stores the coefficients used in the +linear function as well as the configuration parameters described in +the `developers section `_ . + + +Logistic Regressions +~~~~~~~~~~~~~~~~~~~~ + +A logistic regression is a supervised machine learning method for +solving classification problems. Each of the classes in the field +you want to predict, the objective field, is assigned a probability depending +on the values of the input fields. The probability is computed +as the value of a logistic function, +whose argument is a linear combination of the predictors' values. +You can create a logistic regression selecting which fields from your +dataset you want to use as input fields (or predictors) and which +categorical field you want to predict, the objective field. Then the +created logistic regression is defined by the set of coefficients in the +linear combination of the values. Categorical +and text fields need some prior work to be modelled using this method. They +are expanded as a set of new fields, one per category or term (respectively) +where the number of occurrences of the category or term is store. Thus, +the linear combination is made on the frequency of the categories or terms. + +The JSON structure for a logistic regression is: + +.. code-block:: python + + >>> api.pprint(logistic_regression['object']) + { 'balance_objective': False, + 'category': 0, + 'code': 200, + 'columns': 5, + 'created': '2015-10-09T16:11:08.444000', + 'credits': 0.017581939697265625, + 'credits_per_prediction': 0.0, + 'dataset': 'dataset/561304f537203f4c930001ca', + 'dataset_field_types': { 'categorical': 1, + 'datetime': 0, + 'effective_fields': 5, + 'numeric': 4, + 'preferred': 5, + 'text': 0, + 'total': 5}, + 'dataset_status': True, + 'description': '', + 'excluded_fields': [], + 'fields_meta': { 'count': 5, + 'limit': 1000, + 'offset': 0, + 'query_total': 5, + 'total': 5}, + 'input_fields': ['000000', '000001', '000002', '000003'], + 'locale': 'en_US', + 'logistic_regression': { 'bias': 1, + 'c': 1, + 'coefficients': [ [ 'Iris-virginica', + [ -1.7074433493289376, + -1.533662474502423, + 2.47026986670851, + 2.5567582221085563, + -1.2158200612711925]], + [ 'Iris-setosa', + [ 0.41021712519841674, + 1.464162165246765, + -2.26003266131107, + -1.0210350909174153, + 0.26421852991732514]], + [ 'Iris-versicolor', + [ 0.42702327817072505, + -1.611817241669904, + 0.5763832839459982, + -1.4069842681625884, + 1.0946877732663143]]], + 'eps': 1e-05, + 'fields': { '000000': { 'column_number': 0, + 'datatype': 'double', + 'name': 'sepal length', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'bins': [ [ 4.3, + 1], + [ 4.425, + 4], + [ 4.6, + 4], + ... + [ 7.9, + 1]], + 'kurtosis': -0.57357, + 'maximum': 7.9, + 'mean': 5.84333, + 'median': 5.8, + 'minimum': 4.3, + 'missing_count': 0, + 'population': 150, + 'skewness': 0.31175, + 'splits': [ 4.51526, + 4.67252, + 4.81113, + ... + 6.92597, + 7.20423, + 7.64746], + 'standard_deviation': 0.82807, + 'sum': 876.5, + 'sum_squares': 5223.85, + 'variance': 0.68569}}, + '000001': { 'column_number': 1, + 'datatype': 'double', + 'name': 'sepal width', + 'optype': 'numeric', + 'order': 1, + 'preferred': True, + 'summary': { 'counts': [ [ 2, + 1], + [ 2.2, + 3], + ... + [ 4.2, + 1], + [ 4.4, + 1]], + 'kurtosis': 0.18098, + 'maximum': 4.4, + 'mean': 3.05733, + 'median': 3, + 'minimum': 2, + 'missing_count': 0, + 'population': 150, + 'skewness': 0.31577, + 'standard_deviation': 0.43587, + 'sum': 458.6, + 'sum_squares': 1430.4, + 'variance': 0.18998}}, + '000002': { 'column_number': 2, + 'datatype': 'double', + 'name': 'petal length', + 'optype': 'numeric', + 'order': 2, + 'preferred': True, + 'summary': { 'bins': [ [ 1, + 1], + [ 1.16667, + 3], + ... + [ 6.6, + 1], + [ 6.7, + 2], + [ 6.9, + 1]], + 'kurtosis': -1.39554, + 'maximum': 6.9, + 'mean': 3.758, + 'median': 4.35, + 'minimum': 1, + 'missing_count': 0, + 'population': 150, + 'skewness': -0.27213, + 'splits': [ 1.25138, + 1.32426, + 1.37171, + ... + 6.02913, + 6.38125], + 'standard_deviation': 1.7653, + 'sum': 563.7, + 'sum_squares': 2582.71, + 'variance': 3.11628}}, + '000003': { 'column_number': 3, + 'datatype': 'double', + 'name': 'petal width', + 'optype': 'numeric', + 'order': 3, + 'preferred': True, + 'summary': { 'counts': [ [ 0.1, + 5], + [ 0.2, + 29], + ... + [ 2.4, + 3], + [ 2.5, + 3]], + 'kurtosis': -1.33607, + 'maximum': 2.5, + 'mean': 1.19933, + 'median': 1.3, + 'minimum': 0.1, + 'missing_count': 0, + 'population': 150, + 'skewness': -0.10193, + 'standard_deviation': 0.76224, + 'sum': 179.9, + 'sum_squares': 302.33, + 'variance': 0.58101}}, + '000004': { 'column_number': 4, + 'datatype': 'string', + 'name': 'species', + 'optype': 'categorical', + 'order': 4, + 'preferred': True, + 'summary': { 'categories': [ [ 'Iris-setosa', + 50], + [ 'Iris-versicolor', + 50], + [ 'Iris-virginica', + 50]], + 'missing_count': 0}, + 'term_analysis': { 'enabled': True}}}, + 'normalize': False, + 'regularization': 'l2'}, + 'max_columns': 5, + 'max_rows': 150, + 'name': u"iris' dataset's logistic regression", + 'number_of_batchpredictions': 0, + 'number_of_evaluations': 0, + 'number_of_predictions': 1, + 'objective_field': '000004', + 'objective_field_name': 'species', + 'objective_field_type': 'categorical', + 'objective_fields': ['000004'], + 'out_of_bag': False, + 'private': True, + 'project': 'project/561304c137203f4c9300016c', + 'range': [1, 150], + 'replacement': False, + 'resource': 'logisticregression/5617e71c37203f506a000001', + 'rows': 150, + 'sample_rate': 1.0, + 'shared': False, + 'size': 4609, + 'source': 'source/561304f437203f4c930001c3', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 86, + 'message': 'The logistic regression has been created', + 'progress': 1.0}, + 'subscription': False, + 'tags': ['species'], + 'updated': '2015-10-09T16:14:02.336000', + 'white_box': False} + +Note that the output in the snippet above has been abbreviated. As you see, +the ``logistic_regression`` attribute stores the coefficients used in the +logistic function as well as the configuration parameters described in +the `developers section +`_ . + +Ensembles +~~~~~~~~~ + +Ensembles are superveised machine learning models that contain several decision +tree models. In BigML, we offer different flavors or ensembles: bagging, +boosted and random decision forests. + +The structure of an ensemble can be obtained as follows: + +.. code-block:: python + + >>> ensemble = api.get_ensemble("ensemble/5d5aea06e476842219000add") + >>> api.pprint(ensemble["object"]) + { 'boosting': None, + 'category': 0, + 'code': 200, + 'columns': 5, + 'configuration': None, + 'configuration_status': False, + 'created': '2019-08-19T18:27:18.529000', + 'creator': 'mmartin', + 'dataset': 'dataset/5d5ae9f97811dd0195009c17', + 'dataset_field_types': { 'categorical': 1, + 'datetime': 0, + 'items': 0, + 'numeric': 4, + 'preferred': 5, + 'text': 0, + 'total': 5}, + 'dataset_status': False, + 'depth_threshold': 512, + 'description': '', + 'distributions': [ { 'importance': [ ['000002', 0.72548], + ['000003', 0.24971], + ['000001', 0.02481]], + 'predictions': { 'categories': [ [ 'Iris-setosa', + 52], + [ 'Iris-versicolor', + 49], + [ 'Iris-virginica', + 49]]}, + 'training': { 'categories': [ [ 'Iris-setosa', + 52], + [ 'Iris-versicolor', + 49], + [ 'Iris-virginica', + 49]]}}, + { 'importance': [ ['000002', 0.7129], + ['000003', 0.2635], + ['000000', 0.01485], + ['000001', 0.00875]], + 'predictions': { 'categories': [ [ 'Iris-setosa', + 52], + [ 'Iris-versicolor', + 46], + [ 'Iris-virginica', + 52]]}, + 'training': { 'categories': [ [ 'Iris-setosa', + 52], + [ 'Iris-versicolor', + 46], + [ 'Iris-virginica', + 52]]}}], + 'ensemble': { 'fields': { '000000': { 'column_number': 0, + 'datatype': 'double', + 'name': 'sepal length', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': + ... + 'missing_count': 0}, + 'term_analysis': { 'enabled': True}}}}, + 'ensemble_sample': { 'rate': 1, + 'replacement': True, + 'seed': '820c4aa0a34a4fb69392476c6ffc38dc'}, + 'error_models': 0, + 'fields_meta': { 'count': 5, + 'limit': 1000, + 'offset': 0, + 'query_total': 5, + 'total': 5}, + 'finished_models': 2, + 'focus_field': None, + 'focus_field_name': None, + 'fusions': ['fusion/6488ab197411b45de19f1e19'], + 'importance': { '000000': 0.00743, + '000001': 0.01678, + '000002': 0.71919, + '000003': 0.2566}, + 'input_fields': ['000000', '000001', '000002', '000003'], + 'locale': 'en_US', + 'max_columns': 5, + 'max_rows': 150, + 'missing_splits': False, + 'models': [ 'model/5d5aea073514cd6bf200a630', + 'model/5d5aea083514cd6bf200a632'], + 'name': 'iris', + 'name_options': 'bootstrap decision forest, 512-node, 2-model, pruned, ' + 'deterministic order', + 'node_threshold': 512, + 'number_of_batchpredictions': 0, + 'number_of_evaluations': 0, + 'number_of_models': 2, + 'number_of_predictions': 0, + 'number_of_public_predictions': 0, + 'objective_field': '000004', + 'objective_field_details': { 'column_number': 4, + 'datatype': 'string', + 'name': 'species', + 'optype': 'categorical', + 'order': 4}, + 'objective_field_name': 'species', + 'objective_field_type': 'categorical', + 'objective_fields': ['000004'], + 'optiml': None, + 'optiml_status': False, + 'ordering': 0, + 'out_of_bag': False, + 'price': 0.0, + 'private': True, + 'project': None, + 'randomize': False, + 'range': None, + 'replacement': False, + 'resource': 'ensemble/5d5aea06e476842219000add', + 'rows': 150, + 'sample_rate': 1.0, + 'selective_pruning': True, + 'shared': True, + 'shared_clonable': True, + 'shared_hash': 'qfCR2ezORt5u8GNyGaTtJqwJemh', + 'sharing_key': '125380a1560a8efdc0e3eedee7bd2ccce1c4936c', + 'size': 4608, + 'source': 'source/5d5ae9f7e47684769e001337', + 'source_status': False, + 'split_candidates': 32, + 'split_field': None, + 'split_field_name': None, + 'stat_pruning': True, + 'status': { 'code': 5, + 'elapsed': 804, + 'message': 'The ensemble has been created', + 'progress': 1}, + 'subscription': False, + 'support_threshold': 0.0, + 'tags': [], + 'type': 0, + 'updated': '2023-06-13T17:44:57.780000', + 'white_box': False} + +Note that the output in the snippet above has been abbreviated. As you see, +the ``number_of_models`` attribute stores number of decision trees used in the +ensemble and the rest of the dictionary contains the configuration parameters described in the `developers section +`_ . + +Deepnets +~~~~~~~~ + +Ensembles are superveised machine learning models that contain several decision +tree models. In BigML, we offer different flavors or ensembles: bagging, +boosted and random decision forests. + +The structure of an ensemble can be obtained as follows: + +.. code-block:: python + + >>> deepnet = api.get_deepnet("deepnet/64f2193379c602359ec90197") + >>> api.pprint(deepnet["object"]) + { 'category': 0, + 'code': 200, + 'columns': 11, + 'configuration': None, + 'configuration_status': False, + 'created': '2023-09-01T17:02:43.222000', + 'creator': 'mmartin', + 'dataset': 'dataset/64f2192251595a5d90394c1e', + 'dataset_field_types': { 'categorical': 1, + 'datetime': 1, + 'image': 0, + 'items': 0, + 'numeric': 9, + 'path': 0, + 'preferred': 10, + 'regions': 0, + 'text': 0, + 'total': 11}, + 'dataset_status': True, + 'deepnet': { 'batch_normalization': False, + 'deepnet_seed': 'bigml', + 'deepnet_version': 'alpha', + 'dropout_rate': 0.0, + 'fields': { '000000': { 'column_number': 0, + 'datatype': 'string', + 'name': 'cat-0', + 'optype': 'categorical', + 'order': 0, + 'preferred': True, + 'summary': { + ... + 1954.26254, + 'variance': 0.9737}}}, + 'hidden_layers': [ { 'activation_function': 'tanh', + 'number_of_nodes': 64, + 'offset': 'zeros', + 'seed': 0, + 'type': 'dense', + 'weights': 'glorot_uniform'}], + 'holdout_metrics': { 'mean_absolute_error': 0.8178046941757202, + 'mean_squared_error': 1.0125617980957031, + 'median_absolute_error': 0.6850314736366272, + 'r_squared': -0.009405492794412496, + 'spearman_r': 0.07955370033562714}, + 'learn_residuals': False, + 'learning_rate': 0.01, + 'max_iterations': 100, + 'missing_numerics': True, + 'network': { 'image_network': None, + 'layers': [ { 'activation_function': 'tanh', + 'mean': None, + 'number_of_nodes': 64, + 'offset': [ -0.01426, + 0.06489, + 0.00609, + ... + -0.06769, + 0.2289, + 0.03777]]}], + 'output_exposition': { 'mean': -0.06256, + 'stdev': 0.98676, + 'type': 'numeric'}, + 'preprocess': [ { 'index': 0, + 'type': 'categorical', + 'values': [ 'cat0', + 'cat1', + 'cat2']}, + { 'index': 1, + 'mean': 1974.3085, + 'stdev': 43.39534, + 'type': 'numeric'}, + { 'index': 2, + 'mean': 6.459, + 'stdev': 3.4764, + 'type': 'numeric'}, + { 'index': 3, + 'mean': 15.537, + 'stdev': 8.7924, + 'type': 'numeric'}, + { 'index': 4, + 'mean': 4.0015, + 'stdev': 2.02893, + 'type': 'numeric'}, + { 'index': 5, + 'mean': 11.8105, + 'stdev': 6.84646, + 'type': 'numeric'}, + { 'index': 6, + 'mean': 29.3555, + 'stdev': 17.3928, + 'type': 'numeric'}, + { 'index': 7, + 'mean': 29.715, + 'stdev': 17.14149, + 'type': 'numeric'}, + { 'index': 8, + 'mean': 501.6185, + 'stdev': 292.27451, + 'type': 'numeric'}], + 'trees': None}, + 'network_structure': { 'image_network': None, + 'layers': [ { 'activation_function': 'tanh', + 'mean': None, + 'number_of_nodes': 64, + 'offset': 'zeros', + 'residuals': False, + 'scale': None, + 'stdev': None, + 'weights': 'glorot_uniform'}, + { 'activation_function': 'linear', + 'mean': None, + 'number_of_nodes': 1, + 'offset': 'zeros', + 'residuals': False, + 'scale': None, + 'stdev': None, + 'weights': 'glorot_uniform'}], + 'output_exposition': { 'mean': -0.06256, + 'stdev': 0.98676, + 'type': 'numeric'}, + 'preprocess': [ { 'index': 0, + 'type': 'categorical', + 'values': [ 'cat0', + 'cat1', + 'cat2']}, + { 'index': 1, + 'mean': 1974.3085, + 'stdev': 43.39534, + 'type': 'numeric'}, + { 'index': 2, + 'mean': 6.459, + 'stdev': 3.4764, + 'type': 'numeric'}, + { 'index': 3, + 'mean': 15.537, + 'stdev': 8.7924, + 'type': 'numeric'}, + { 'index': 4, + 'mean': 4.0015, + 'stdev': 2.02893, + 'type': 'numeric'}, + { 'index': 5, + 'mean': 11.8105, + 'stdev': 6.84646, + 'type': 'numeric'}, + { 'index': 6, + 'mean': 29.3555, + 'stdev': 17.3928, + 'type': 'numeric'}, + { 'index': 7, + 'mean': 29.715, + 'stdev': 17.14149, + 'type': 'numeric'}, + { 'index': 8, + 'mean': 501.6185, + 'stdev': 292.27451, + 'type': 'numeric'}], + 'trees': None}, + 'number_of_hidden_layers': 1, + 'number_of_iterations': 100, + 'optimizer': { 'adam': { 'beta1': 0.9, + 'beta2': 0.999, + 'epsilon': 1e-08}}, + 'search': False, + 'suggest_structure': False, + 'tree_embedding': False}, + 'description': '', + 'excluded_fields': [], + 'fields_meta': { 'count': 11, + 'limit': 1000, + 'offset': 0, + 'query_total': 11, + 'total': 11}, + 'importance': { '000000': 0.12331, + '000001-0': 0.25597, + '000001-1': 0.07716, + '000001-2': 0.15659, + '000001-3': 0.11564, + '000001-4': 0.0644, + '000001-5': 0.09814, + '000001-6': 0.0555, + '000001-7': 0.05329}, + 'input_fields': [ '000000', + '000001-0', + '000001-1', + '000001-2', + '000001-3', + '000001-4', + '000001-5', + '000001-6', + '000001-7'], + 'locale': 'en_US', + 'max_columns': 11, + 'max_rows': 2000, + 'name': 'dates2', + 'name_options': '1 hidden layers, adam, learning rate=0.01, 100-iteration, ' + 'beta1=0.9, beta2=0.999, epsilon=1e-08, missing values', + 'number_of_batchpredictions': 0, + 'number_of_evaluations': 0, + 'number_of_predictions': 0, + 'number_of_public_predictions': 0, + 'objective_field': '000002', + 'objective_field_name': 'target-2', + 'objective_field_type': 'numeric', + 'objective_fields': ['000002'], + 'optiml': None, + 'optiml_status': False, + 'ordering': 0, + 'out_of_bag': False, + 'price': 0.0, + 'private': True, + 'project': 'project/64f2191c4a1a2c29a1084943', + 'range': None, + 'regression_weight_ratio': None, + 'replacement': False, + 'resource': 'deepnet/64f2193379c602359ec90197', + 'rows': 2000, + 'sample_rate': 1.0, + 'shared': False, + 'size': 96976, + 'source': 'source/64f2191f51595a5d8cbf7883', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 10013, + 'message': 'The deepnet has been created', + 'progress': 1.0}, + 'subscription': False, + 'tags': [], + 'type': 0, + 'updated': '2023-09-01T17:11:28.762000', + 'white_box': False} + + +Note that the output in the snippet above has been abbreviated. As you see, +the ``network`` attribute stores the coefficients used in the +neural network structure and the rest of the dictionary shows the +configuration parameters described in the `developers section +`_ . + +OptiMLs +~~~~~~~ + +An OptiML is the result of an automated optimization process to find the +best model (type and configuration) to solve a particular +classification or regression problem. + +The selection process automates the usual time-consuming task of trying +different models and parameters and evaluating their results to find the +best one. Using the OptiML, non-experts can build top-performing models. + +You can create an OptiML selecting the ojective field to be predicted, the +evaluation metric to be used to rank the models tested in the process and +a maximum time for the task to be run. + +The JSON structure for an OptiML is: + +.. code-block:: python + + >>> api.pprint(optiml["object"]) + { 'category': 0, + 'code': 200, + 'configuration': None, + 'configuration_status': False, + 'created': '2018-05-17T20:23:00.060000', + 'creator': 'mmartin', + 'dataset': 'dataset/5afdb7009252732d930009e8', + 'dataset_status': True, + 'datasets': [ 'dataset/5afde6488bf7d551ee00081c', + 'dataset/5afde6488bf7d551fd00511f', + 'dataset/5afde6488bf7d551fe002e0f', + ... + 'dataset/5afde64d8bf7d551fd00512e'], + 'description': '', + 'evaluations': [ 'evaluation/5afde65c8bf7d551fd00514c', + 'evaluation/5afde65c8bf7d551fd00514f', + ... + 'evaluation/5afde6628bf7d551fd005161'], + 'excluded_fields': [], + 'fields_meta': { 'count': 5, + 'limit': 1000, + 'offset': 0, + 'query_total': 5, + 'total': 5}, + 'input_fields': ['000000', '000001', '000002', '000003'], + 'model_count': { 'logisticregression': 1, 'model': 8, 'total': 9}, + 'models': [ 'model/5afde64e8bf7d551fd005131', + 'model/5afde64f8bf7d551fd005134', + 'model/5afde6518bf7d551fd005137', + 'model/5afde6538bf7d551fd00513a', + 'logisticregression/5afde6558bf7d551fd00513d', + ... + 'model/5afde65a8bf7d551fd005149'], + 'models_meta': { 'count': 9, 'limit': 1000, 'offset': 0, 'total': 9}, + 'name': 'iris', + 'name_options': '9 total models (logisticregression: 1, model: 8), metric=max_phi, model candidates=18, max. training time=300', + 'objective_field': '000004', + 'objective_field_details': { 'column_number': 4, + 'datatype': 'string', + 'name': 'species', + 'optype': 'categorical', + 'order': 4}, + 'objective_field_name': 'species', + 'objective_field_type': 'categorical', + 'objective_fields': ['000004'], + 'optiml': { 'created_resources': { 'dataset': 10, + 'logisticregression': 11, + 'logisticregression_evaluation': 11, + 'model': 29, + 'model_evaluation': 29}, + 'datasets': [ { 'id': 'dataset/5afde6488bf7d551ee00081c', + 'name': 'iris', + 'name_options': '120 instances, 5 fields (1 categorical, 4 numeric), sample rate=0.8'}, + { 'id': 'dataset/5afde6488bf7d551fd00511f', + 'name': 'iris', + 'name_options': '30 instances, 5 fields (1 categorical, 4 numeric), sample rate=0.2, out of bag'}, + { 'id': 'dataset/5afde6488bf7d551fe002e0f', + 'name': 'iris', + 'name_options': '120 instances, 5 fields (1 categorical, 4 numeric), sample rate=0.8'}, + ... + { 'id': 'dataset/5afde64d8bf7d551fd00512e', + 'name': 'iris', + 'name_options': '120 instances, 5 fields (1 categorical, 4 numeric), sample rate=0.8'}], + 'fields': { '000000': { 'column_number': 0, + 'datatype': 'double', + 'name': 'sepal length', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'bins': [ [ 4.3, + 1], + ... + [ 7.9, + 1]], + ... + 'sum': 179.9, + 'sum_squares': 302.33, + 'variance': 0.58101}}, + '000004': { 'column_number': 4, + 'datatype': 'string', + 'name': 'species', + 'optype': 'categorical', + 'order': 4, + 'preferred': True, + 'summary': { 'categories': [ [ 'Iris-setosa', + 50], + [ 'Iris-versicolor', + 50], + [ 'Iris-virginica', + 50]], + 'missing_count': 0}, + 'term_analysis': { 'enabled': True}}}, + 'max_training_time': 300, + 'metric': 'max_phi', + 'model_types': ['model', 'logisticregression'], + 'models': [ { 'evaluation': { 'id': 'evaluation/5afde65c8bf7d551fd00514c', + 'info': { 'accuracy': 0.96667, + 'average_area_under_pr_curve': 0.97867, + ... + 'per_class_statistics': [ { 'accuracy': 1, + 'area_under_pr_curve': 1, + ... + 'spearmans_rho': 0.82005}]}, + 'metric_value': 0.95356, + 'metric_variance': 0.00079, + 'name': 'iris vs. iris', + 'name_options': '279-node, deterministic order, operating kind=probability'}, + 'evaluation_count': 3, + 'id': 'model/5afde64e8bf7d551fd005131', + 'importance': [ [ '000002', + 0.70997], + [ '000003', + 0.27289], + [ '000000', + 0.0106], + [ '000001', + 0.00654]], + 'kind': 'model', + 'name': 'iris', + 'name_options': '279-node, deterministic order'}, + { 'evaluation': { 'id': 'evaluation/5afde65c8bf7d551fd00514f', + 'info': { 'accuracy': 0.93333, + + ... + [ '000001', + 0.02133]], + 'kind': 'model', + 'name': 'iris', + 'name_options': '12-node, randomize, deterministic order, balanced'}], + 'number_of_model_candidates': 18, + 'recent_evaluations': [ 0.90764, + 0.94952, + ... + 0.90427], + 'search_complete': True, + 'summary': { 'logisticregression': { 'best': 'logisticregression/5afde6558bf7d551fd00513d', + 'count': 1}, + 'model': { 'best': 'model/5afde64e8bf7d551fd005131', + 'count': 8}}}, + 'private': True, + 'project': None, + 'resource': 'optiml/5afde4a42a83475c1b0008a2', + 'shared': False, + 'size': 3686, + 'source': 'source/5afdb6fb9252732d930009e5', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 448878.0, + 'message': 'The optiml has been created', + 'progress': 1}, + 'subscription': False, + 'tags': [], + 'test_dataset': None, + 'type': 0, + 'updated': '2018-05-17T20:30:29.063000'} + + +You can check the optiml properties at the `API documentation +`_. + + +Fusions +~~~~~~~ + +A Fusion is a special type of composed resource for which all +submodels satisfy the following constraints: they're all either +classifications or regressions over the same kind of data or +compatible fields, with the same objective field. Given those +properties, a fusion can be considered a supervised model, +and therefore one can predict with fusions and evaluate them. +Ensembles can be viewed as a kind of fusion subject to the additional +constraints that all its submodels are tree models that, moreover, +have been built from the same base input data, but sampled in particular ways. + +The model types allowed to be a submodel of a fusion are: +deepnet, ensemble, fusion, model, logistic regression and linear regression. + +The JSON structure for an Fusion is: + +.. code-block:: python + + >>> api.pprint(fusion["object"]) + { + "category": 0, + "code": 200, + "configuration": null, + "configuration_status": false, + "created": "2018-05-09T20:11:05.821000", + "credits_per_prediction": 0, + "description": "", + "fields_meta": { + "count": 5, + "limit": 1000, + "offset": 0, + "query_total": 5, + "total": 5 + }, + "fusion": { + "models": [ + { + "id": "ensemble/5af272eb4e1727d378000050", + "kind": "ensemble", + "name": "Iris ensemble", + "name_options": "boosted trees, 1999-node, 16-iteration, deterministic order, balanced" + }, + { + "id": "model/5af272fe4e1727d3780000d6", + "kind": "model", + "name": "Iris model", + "name_options": "1999-node, pruned, deterministic order, balanced" + }, + { + "id": "logisticregression/5af272ff4e1727d3780000d9", + "kind": "logisticregression", + "name": "Iris LR", + "name_options": "L2 regularized (c=1), bias, auto-scaled, missing values, eps=0.001" + } + ] + }, + "importance": { + "000000": 0.05847, + "000001": 0.03028, + "000002": 0.13582, + "000003": 0.4421 + }, + "model_count": { + "ensemble": 1, + "logisticregression": 1, + "model": 1, + "total": 3 + }, + "models": [ + "ensemble/5af272eb4e1727d378000050", + "model/5af272fe4e1727d3780000d6", + "logisticregression/5af272ff4e1727d3780000d9" + ], + "models_meta": { + "count": 3, + "limit": 1000, + "offset": 0, + "total": 3 + }, + "name": "iris", + "name_options": "3 total models (ensemble: 1, logisticregression: 1, model: 1)", + "number_of_batchpredictions": 0, + "number_of_evaluations": 0, + "number_of_predictions": 0, + "number_of_public_predictions": 0, + "objective_field": "000004", + "objective_field_details": { + "column_number": 4, + "datatype": "string", + "name": "species", + "optype": "categorical", + "order": 4 + }, + "objective_field_name": "species", + "objective_field_type": "categorical", + "objective_fields": [ + "000004" + ], + "private": true, + "project": null, + "resource":"fusion/59af8107b8aa0965d5b61138", + "shared": false, + "status": { + "code": 5, + "elapsed": 8420, + "message": "The fusion has been created", + "progress": 1 + }, + "subscription": false, + "tags": [], + "type": 0, + "updated": "2018-05-09T20:11:14.258000" + } + +You can check the fusion properties at the `API documentation +`_. + + +Time Series +~~~~~~~~~~~ + +A time series model is a supervised learning method to forecast the future +values of a field based on its previously observed values. +It is used to analyze time based data when historical patterns can explain +the future behavior such as stock prices, sales forecasting, +website traffic, production and inventory analysis, weather forecasting, etc. +A time series model needs to be trained with time series data, +i.e., a field containing a sequence of equally distributed data points in time. + +BigML implements exponential smoothing to train time series models. +Time series data is modeled as a level component and it can optionally +include a trend (damped or not damped) and a seasonality +components. You can learn more about how to include these components and their +use in the `API documentation page `_. + +You can create a time series model selecting one or several fields from +your dataset, that will be the ojective fields. The forecast will compute +their future values. + + +The JSON structure for a time series is: + +.. code-block:: python + + >>> api.pprint(time_series['object']) + { 'category': 0, + 'clones': 0, + 'code': 200, + 'columns': 1, + 'configuration': None, + 'configuration_status': False, + 'created': '2017-07-15T12:49:42.601000', + 'credits': 0.0, + 'dataset': 'dataset/5968ec42983efc21b0000016', + 'dataset_field_types': { 'categorical': 0, + 'datetime': 0, + 'effective_fields': 6, + 'items': 0, + 'numeric': 6, + 'preferred': 6, + 'text': 0, + 'total': 6}, + 'dataset_status': True, + 'dataset_type': 0, + 'description': '', + 'fields_meta': { 'count': 1, + 'limit': 1000, + 'offset': 0, + 'query_total': 1, + 'total': 1}, + 'forecast': { '000005': [ { 'lower_bound': [ 30.14111, + 30.14111, + ... + 30.14111], + 'model': 'A,N,N', + 'point_forecast': [ 68.53181, + 68.53181, + ... + 68.53181, + 68.53181], + 'time_range': { 'end': 129, + 'interval': 1, + 'interval_unit': 'milliseconds', + 'start': 80}, + 'upper_bound': [ 106.92251, + 106.92251, + ... + 106.92251, + 106.92251]}, + { 'lower_bound': [ 35.44118, + 35.5032, + ... + 35.28083], + 'model': 'A,Ad,N', + ... + 66.83537, + 66.9465], + 'time_range': { 'end': 129, + 'interval': 1, + 'interval_unit': 'milliseconds', + 'start': 80}}]}, + 'horizon': 50, + 'locale': 'en_US', + 'max_columns': 6, + 'max_rows': 80, + 'name': 'my_ts_data', + 'name_options': 'period=1, range=[1, 80]', + 'number_of_evaluations': 0, + 'number_of_forecasts': 0, + 'number_of_public_forecasts': 0, + 'objective_field': '000005', + 'objective_field_name': 'Final', + 'objective_field_type': 'numeric', + 'objective_fields': ['000005'], + 'objective_fields_names': ['Final'], + 'price': 0.0, + 'private': True, + 'project': None, + 'range': [1, 80], + 'resource': 'timeseries/596a0f66983efc53f3000000', + 'rows': 80, + 'shared': False, + 'short_url': '', + 'size': 2691, + 'source': 'source/5968ec3c983efc218c000006', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 8358, + 'message': 'The time series has been created', + 'progress': 1.0}, + 'subscription': True, + 'tags': [], + 'time_series': { 'all_numeric_objectives': False, + 'datasets': { '000005': 'dataset/596a0f70983efc53f3000003'}, + 'ets_models': { '000005': [ { 'aic': 831.30903, + 'aicc': 831.84236, + 'alpha': 0.00012, + 'beta': 0, + 'bic': 840.83713, + 'final_state': { 'b': 0, + 'l': 68.53181, + 's': [ 0]}, + 'gamma': 0, + 'initial_state': { 'b': 0, + 'l': 68.53217, + 's': [ 0]}, + 'name': 'A,N,N', + 'period': 1, + 'phi': 1, + 'r_squared': -0.0187, + 'sigma': 19.19535}, + { 'aic': 834.43049, + ... + 'slope': 0.11113, + 'value': 61.39}]}, + 'fields': { '000005': { 'column_number': 5, + 'datatype': 'double', + 'name': 'Final', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'bins': [ [ 28.06, + 1], + [ 34.44, + ... + [ 108.335, + 2]], + ... + 'sum_squares': 389814.3944, + 'variance': 380.73315}}}, + 'period': 1, + 'time_range': { 'end': 79, + 'interval': 1, + 'interval_unit': 'milliseconds', + 'start': 0}}, + 'type': 0, + 'updated': '2017-07-15T12:49:52.549000', + 'white_box': False} + + +You can check the time series properties at the `API documentation +`_. + + +Unsupervised Models +------------------- + +Cluster +~~~~~~~ + +For unsupervised learning problems, the cluster is used to classify in a +limited number of groups your training data. The cluster structure is defined +by the centers of each group of data, named centroids, and the data enclosed +in the group. As for in the model's case, the cluster is a white-box resource +and can be retrieved as a JSON: + +.. code-block:: python + + >>> cluster = api.get_cluster(cluster) + >>> api.pprint(cluster['object']) + { 'balance_fields': True, + 'category': 0, + 'cluster_datasets': { '000000': '', '000001': '', '000002': ''}, + 'cluster_datasets_ids': { '000000': '53739b9ae4b0dad82b0a65e6', + '000001': '53739b9ae4b0dad82b0a65e7', + '000002': '53739b9ae4b0dad82b0a65e8'}, + 'cluster_seed': '2c249dda00fbf54ab4cdd850532a584f286af5b6', + 'clusters': { 'clusters': [ { 'center': { '000000': 58.5, + '000001': 26.8314, + '000002': 44.27907, + '000003': 14.37209}, + 'count': 56, + 'distance': { 'bins': [ [ 0.69602, + 2], + [ ... ] + [ 3.77052, + 1]], + 'maximum': 3.77052, + 'mean': 1.61711, + 'median': 1.52146, + 'minimum': 0.69237, + 'population': 56, + 'standard_deviation': 0.6161, + 'sum': 90.55805, + 'sum_squares': 167.31926, + 'variance': 0.37958}, + 'id': '000000', + 'name': 'Cluster 0'}, + { 'center': { '000000': 50.06, + '000001': 34.28, + '000002': 14.62, + '000003': 2.46}, + 'count': 50, + 'distance': { 'bins': [ [ 0.16917, + 1], + [ ... ] + [ 4.94699, + 1]], + 'maximum': 4.94699, + 'mean': 1.50725, + 'median': 1.3393, + 'minimum': 0.16917, + 'population': 50, + 'standard_deviation': 1.00994, + 'sum': 75.36252, + 'sum_squares': 163.56918, + 'variance': 1.01998}, + 'id': '000001', + 'name': 'Cluster 1'}, + { 'center': { '000000': 68.15625, + '000001': 31.25781, + '000002': 55.48438, + '000003': 19.96875}, + 'count': 44, + 'distance': { 'bins': [ [ 0.36825, + 1], + [ ... ] + [ 3.87216, + 1]], + 'maximum': 3.87216, + 'mean': 1.67264, + 'median': 1.63705, + 'minimum': 0.36825, + 'population': 44, + 'standard_deviation': 0.78905, + 'sum': 73.59627, + 'sum_squares': 149.87194, + 'variance': 0.6226}, + 'id': '000002', + 'name': 'Cluster 2'}], + 'fields': { '000000': { 'column_number': 0, + 'datatype': 'int8', + 'name': 'sepal length', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'bins': [ [ 43.75, + 4], + [ ... ] + [ 79, + 1]], + 'maximum': 79, + 'mean': 58.43333, + 'median': 57.7889, + 'minimum': 43, + 'missing_count': 0, + 'population': 150, + 'splits': [ 45.15258, + 46.72525, + 72.04226, + 76.47461], + 'standard_deviation': 8.28066, + 'sum': 8765, + 'sum_squares': 522385, + 'variance': 68.56935}}, + [ ... ] + [ 25, + 3]], + 'maximum': 25, + 'mean': 11.99333, + 'median': 13.28483, + 'minimum': 1, + 'missing_count': 0, + 'population': 150, + 'standard_deviation': 7.62238, + 'sum': 1799, + 'sum_squares': 30233, + 'variance': 58.10063}}}}, + 'code': 202, + 'columns': 4, + 'created': '2014-05-14T16:36:40.993000', + 'credits': 0.017578125, + 'credits_per_prediction': 0.0, + 'dataset': 'dataset/53739b88c8db63122b000411', + 'dataset_field_types': { 'categorical': 1, + 'datetime': 0, + 'numeric': 4, + 'preferred': 5, + 'text': 0, + 'total': 5}, + 'dataset_status': True, + 'dataset_type': 0, + 'description': '', + 'excluded_fields': ['000004'], + 'field_scales': None, + 'fields_meta': { 'count': 4, + 'limit': 1000, + 'offset': 0, + 'query_total': 4, + 'total': 4}, + 'input_fields': ['000000', '000001', '000002', '000003'], + 'k': 3, + 'locale': 'es-ES', + 'max_columns': 5, + 'max_rows': 150, + 'name': 'my iris', + 'number_of_batchcentroids': 0, + 'number_of_centroids': 0, + 'number_of_public_centroids': 0, + 'out_of_bag': False, + 'price': 0.0, + 'private': True, + 'range': [1, 150], + 'replacement': False, + 'resource': 'cluster/53739b98d994972da7001de9', + 'rows': 150, + 'sample_rate': 1.0, + 'scales': { '000000': 0.22445382597655375, + '000001': 0.4264213814821549, + '000002': 0.10528680248949522, + '000003': 0.2438379900517961}, + 'shared': False, + 'size': 4608, + 'source': 'source/53739b24d994972da7001ddd', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 1009, + 'message': 'The cluster has been created', + 'progress': 1.0}, + 'subscription': True, + 'tags': [], + 'updated': '2014-05-14T16:40:26.234728', + 'white_box': False} + +(Note that we have abbreviated the output in the snippet above for +readability: the full predictive cluster yo'll get is going to contain +much more details). + +You can check the cluster properties at the `API documentation +`_. + +Anomaly detector +~~~~~~~~~~~~~~~~ + +For anomaly detection problems, BigML anomaly detector uses iforest as an +unsupervised kind of model that detects anomalous data in a dataset. The +information it returns encloses a `top_anomalies` block +that contains a list of the most anomalous +points. For each, we capture a `score` from 0 to 1. The closer to 1, +the more anomalous. We also capture the `row` which gives values for +each field in the order defined by `input_fields`. Similarly we give +a list of `importances` which match the `row` values. These +importances tell us which values contributed most to the anomaly +score. Thus, the structure of an anomaly detector is similar to: + +.. code-block:: python + + { 'category': 0, + 'code': 200, + 'columns': 14, + 'constraints': False, + 'created': '2014-09-08T18:51:11.893000', + 'credits': 0.11653518676757812, + 'credits_per_prediction': 0.0, + 'dataset': 'dataset/540dfa9d9841fa5c88000765', + 'dataset_field_types': { 'categorical': 21, + 'datetime': 0, + 'numeric': 21, + 'preferred': 14, + 'text': 0, + 'total': 42}, + 'dataset_status': True, + 'dataset_type': 0, + 'description': '', + 'excluded_fields': [], + 'fields_meta': { 'count': 14, + 'limit': 1000, + 'offset': 0, + 'query_total': 14, + 'total': 14}, + 'forest_size': 128, + 'input_fields': [ '000004', + '000005', + '000009', + '000016', + '000017', + '000018', + '000019', + '00001e', + '00001f', + '000020', + '000023', + '000024', + '000025', + '000026'], + 'locale': 'en_US', + 'max_columns': 42, + 'max_rows': 200, + 'model': { 'fields': { '000004': { 'column_number': 4, + 'datatype': 'int16', + 'name': 'src_bytes', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'bins': [ [ 143, + 2], + ... + [ 370, + 2]], + 'maximum': 370, + 'mean': 248.235, + 'median': 234.57157, + 'minimum': 141, + 'missing_count': 0, + 'population': 200, + 'splits': [ 159.92462, + 173.73312, + 188, + ... + 339.55228], + 'standard_deviation': 49.39869, + 'sum': 49647, + 'sum_squares': 12809729, + 'variance': 2440.23093}}, + '000005': { 'column_number': 5, + 'datatype': 'int32', + 'name': 'dst_bytes', + 'optype': 'numeric', + 'order': 1, + 'preferred': True, + ... + 'sum': 1030851, + 'sum_squares': 22764504759, + 'variance': 87694652.45224}}, + '000009': { 'column_number': 9, + 'datatype': 'string', + 'name': 'hot', + 'optype': 'categorical', + 'order': 2, + 'preferred': True, + 'summary': { 'categories': [ [ '0', + 199], + [ '1', + 1]], + 'missing_count': 0}, + 'term_analysis': { 'enabled': True}}, + '000016': { 'column_number': 22, + 'datatype': 'int8', + 'name': 'count', + 'optype': 'numeric', + 'order': 3, + 'preferred': True, + ... + 'population': 200, + 'standard_deviation': 5.42421, + 'sum': 1351, + 'sum_squares': 14981, + 'variance': 29.42209}}, + '000017': { ... }}}, + 'kind': 'iforest', + 'mean_depth': 12.314174107142858, + 'top_anomalies': [ { 'importance': [ 0.06768, + 0.01667, + 0.00081, + 0.02437, + 0.04773, + 0.22197, + 0.18208, + 0.01868, + 0.11855, + 0.01983, + 0.01898, + 0.05306, + 0.20398, + 0.00562], + 'row': [ 183.0, + 8654.0, + '0', + 4.0, + 4.0, + 0.25, + 0.25, + 0.0, + 123.0, + 255.0, + 0.01, + 0.04, + 0.01, + 0.0], + 'score': 0.68782}, + { 'importance': [ 0.05645, + 0.02285, + 0.0015, + 0.05196, + 0.04435, + 0.0005, + 0.00056, + 0.18979, + 0.12402, + 0.23671, + 0.20723, + 0.05651, + 0.00144, + 0.00612], + 'row': [ 212.0, + 1940.0, + '0', + 1.0, + 2.0, + 0.0, + 0.0, + 1.0, + 1.0, + 69.0, + 1.0, + 0.04, + 0.0, + 0.0], + 'score': 0.6239}, + ...], + 'trees': [ { 'root': { 'children': [ { 'children': [ { 'children': [ { 'children': [ { 'children': + [ { 'population': 1, + 'predicates': [ { 'field': '00001f', + 'op': '>', + 'value': 35.54357}]}, + + ... + { 'population': 1, + 'predicates': [ { 'field': '00001f', + 'op': '<=', + 'value': 35.54357}]}], + 'population': 2, + 'predicates': [ { 'field': '000005', + 'op': '<=', + 'value': 1385.5166}]}], + 'population': 3, + 'predicates': [ { 'field': '000020', + 'op': '<=', + 'value': 65.14308}, + { 'field': '000019', + 'op': '=', + 'value': 0}]}], + 'population': 105, + 'predicates': [ { 'field': '000017', + 'op': '<=', + 'value': 13.21754}, + { 'field': '000009', + 'op': 'in', + 'value': [ '0']}]}], + 'population': 126, + 'predicates': [ True, + { 'field': '000018', + 'op': '=', + 'value': 0}]}, + 'training_mean_depth': 11.071428571428571}]}, + 'name': "tiny_kdd's dataset anomaly detector", + 'number_of_batchscores': 0, + 'number_of_public_predictions': 0, + 'number_of_scores': 0, + 'out_of_bag': False, + 'price': 0.0, + 'private': True, + 'project': None, + 'range': [1, 200], + 'replacement': False, + 'resource': 'anomaly/540dfa9f9841fa5c8800076a', + 'rows': 200, + 'sample_rate': 1.0, + 'sample_size': 126, + 'seed': 'BigML', + 'shared': False, + 'size': 30549, + 'source': 'source/540dfa979841fa5c7f000363', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 32397, + 'message': 'The anomaly detector has been created', + 'progress': 1.0}, + 'subscription': False, + 'tags': [], + 'updated': '2014-09-08T23:54:28.647000', + 'white_box': False} + +Note that we have abbreviated the output in the snippet above for +readability: the full anomaly detector yo'll get is going to contain +much more details). + +The `trees` list contains the actual isolation forest, and it can be quite +large usually. That's why, this part of the resource should only be included +in downloads when needed. If you are only interested in other properties, such +as `top_anomalies`, yo'll improve performance by excluding it, using the +`excluded=trees` query string in the API call: + +.. code-block:: python + + anomaly = api.get_anomaly('anomaly/540dfa9f9841fa5c8800076a', \ + query_string='excluded=trees') + +Each node in an isolation tree can have multiple predicates. +For the node to be a valid branch when evaluated with a data point, all of its +predicates must be true. + +You can check the anomaly detector properties at the `API documentation +`_. + +Associations +~~~~~~~~~~~~ + +Association Discovery is a popular method to find out relations among values +in high-dimensional datasets. + +A common case where association discovery is often used is +market basket analysis. This analysis seeks for customer shopping +patterns across large transactional +datasets. For instance, do customers who buy hamburgers and ketchup also +consume bread? + +Businesses use those insights to make decisions on promotions and product +placements. +Association Discovery can also be used for other purposes such as early +incident detection, web usage analysis, or software intrusion detection. + +In BigML, the Association resource object can be built from any dataset, and +its results are a list of association rules between the items in the dataset. +In the example case, the corresponding +association rule would have hamburguers and ketchup as the items at the +left hand side of the association rule and bread would be the item at the +right hand side. Both sides in this association rule are related, +in the sense that observing +the items in the left hand side implies observing the items in the right hand +side. There are some metrics to ponder the quality of these association rules: + +- Support: the proportion of instances which contain an itemset. + +For an association rule, it means the number of instances in the dataset which +contain the rule's antecedent and rule's consequent together +over the total number of instances (N) in the dataset. + +It gives a measure of the importance of the rule. Association rules have +to satisfy a minimum support constraint (i.e., min_support). + +- Coverage: the support of the antedecent of an association rule. +It measures how often a rule can be applied. + +- Confidence or (strength): The probability of seeing the rule's consequent +under the condition that the instances also contain the rule's antecedent. +Confidence is computed using the support of the association rule over the +coverage. That is, the percentage of instances which contain the consequent +and antecedent together over the number of instances which only contain +the antecedent. + +Confidence is directed and gives different values for the association +rules Antecedent → Consequent and Consequent → Antecedent. Association +rules also need to satisfy a minimum confidence constraint +(i.e., min_confidence). + +- Leverage: the difference of the support of the association +rule (i.e., the antecedent and consequent appearing together) and what would +be expected if antecedent and consequent where statistically independent. +This is a value between -1 and 1. A positive value suggests a positive +relationship and a negative value suggests a negative relationship. +0 indicates independence. + +Lift: how many times more often antecedent and consequent occur together +than expected if they where statistically independent. +A value of 1 suggests that there is no relationship between the antecedent +and the consequent. Higher values suggest stronger positive relationships. +Lower values suggest stronger negative relationships (the presence of the +antecedent reduces the likelihood of the consequent) + +As to the items used in association rules, each type of field is parsed to +extract items for the rules as follows: + +- Categorical: each different value (class) will be considered a separate item. +- Text: each unique term will be considered a separate item. +- Items: each different item in the items summary will be considered. +- Numeric: Values will be converted into categorical by making a +segmentation of the values. +For example, a numeric field with values ranging from 0 to 600 split +into 3 segments: +segment 1 → [0, 200), segment 2 → [200, 400), segment 3 → [400, 600]. +You can refine the behavior of the transformation using +`discretization `_ +and `field_discretizations `_. + +The JSON structure for an association resource is: + +.. code-block:: python + + + >>> api.pprint(association['object']) + { + "associations":{ + "complement":false, + "discretization":{ + "pretty":true, + "size":5, + "trim":0, + "type":"width" + }, + "items":[ + { + "complement":false, + "count":32, + "field_id":"000000", + "name":"Segment 1", + "bin_end":5, + "bin_start":null + }, + { + "complement":false, + "count":49, + "field_id":"000000", + "name":"Segment 3", + "bin_end":7, + "bin_start":6 + }, + { + "complement":false, + "count":12, + "field_id":"000000", + "name":"Segment 4", + "bin_end":null, + "bin_start":7 + }, + { + "complement":false, + "count":19, + "field_id":"000001", + "name":"Segment 1", + "bin_end":2.5, + "bin_start":null + }, + ... + { + "complement":false, + "count":50, + "field_id":"000004", + "name":"Iris-versicolor" + }, + { + "complement":false, + "count":50, + "field_id":"000004", + "name":"Iris-virginica" + } + ], + "max_k": 100, + "min_confidence":0, + "min_leverage":0, + "min_lift":1, + "min_support":0, + "rules":[ + { + "confidence":1, + "id":"000000", + "leverage":0.22222, + "lhs":[ + 13 + ], + "lhs_cover":[ + 0.33333, + 50 + ], + "lift":3, + "p_value":0.000000000, + "rhs":[ + 6 + ], + "rhs_cover":[ + 0.33333, + 50 + ], + "support":[ + 0.33333, + 50 + ] + }, + { + "confidence":1, + "id":"000001", + "leverage":0.22222, + "lhs":[ + 6 + ], + "lhs_cover":[ + 0.33333, + 50 + ], + "lift":3, + "p_value":0.000000000, + "rhs":[ + 13 + ], + "rhs_cover":[ + 0.33333, + 50 + ], + "support":[ + 0.33333, + 50 + ] + }, + ... + { + "confidence":0.26, + "id":"000029", + "leverage":0.05111, + "lhs":[ + 13 + ], + "lhs_cover":[ + 0.33333, + 50 + ], + "lift":2.4375, + "p_value":0.0000454342, + "rhs":[ + 5 + ], + "rhs_cover":[ + 0.10667, + 16 + ], + "support":[ + 0.08667, + 13 + ] + }, + { + "confidence":0.18, + "id":"00002a", + "leverage":0.04, + "lhs":[ + 15 + ], + "lhs_cover":[ + 0.33333, + 50 + ], + "lift":3, + "p_value":0.0000302052, + "rhs":[ + 9 + ], + "rhs_cover":[ + 0.06, + 9 + ], + "support":[ + 0.06, + 9 + ] + }, + { + "confidence":1, + "id":"00002b", + "leverage":0.04, + "lhs":[ + 9 + ], + "lhs_cover":[ + 0.06, + 9 + ], + "lift":3, + "p_value":0.0000302052, + "rhs":[ + 15 + ], + "rhs_cover":[ + 0.33333, + 50 + ], + "support":[ + 0.06, + 9 + ] + } + ], + "rules_summary":{ + "confidence":{ + "counts":[ + [ + 0.18, + 1 + ], + [ + 0.24, + 1 + ], + [ + 0.26, + 2 + ], + ... + [ + 0.97959, + 1 + ], + [ + 1, + 9 + ] + ], + "maximum":1, + "mean":0.70986, + "median":0.72864, + "minimum":0.18, + "population":44, + "standard_deviation":0.24324, + "sum":31.23367, + "sum_squares":24.71548, + "variance":0.05916 + }, + "k":44, + "leverage":{ + "counts":[ + [ + 0.04, + 2 + ], + [ + 0.05111, + 4 + ], + [ + 0.05316, + 2 + ], + ... + [ + 0.22222, + 2 + ] + ], + "maximum":0.22222, + "mean":0.10603, + "median":0.10156, + "minimum":0.04, + "population":44, + "standard_deviation":0.0536, + "sum":4.6651, + "sum_squares":0.61815, + "variance":0.00287 + }, + "lhs_cover":{ + "counts":[ + [ + 0.06, + 2 + ], + [ + 0.08, + 2 + ], + [ + 0.10667, + 4 + ], + [ + 0.12667, + 1 + ], + ... + [ + 0.5, + 4 + ] + ], + "maximum":0.5, + "mean":0.29894, + "median":0.33213, + "minimum":0.06, + "population":44, + "standard_deviation":0.13386, + "sum":13.15331, + "sum_squares":4.70252, + "variance":0.01792 + }, + "lift":{ + "counts":[ + [ + 1.40625, + 2 + ], + [ + 1.5067, + 2 + ], + ... + [ + 2.63158, + 4 + ], + [ + 3, + 10 + ], + [ + 4.93421, + 2 + ], + [ + 12.5, + 2 + ] + ], + "maximum":12.5, + "mean":2.91963, + "median":2.58068, + "minimum":1.40625, + "population":44, + "standard_deviation":2.24641, + "sum":128.46352, + "sum_squares":592.05855, + "variance":5.04635 + }, + "p_value":{ + "counts":[ + [ + 0.000000000, + 2 + ], + [ + 0.000000000, + 4 + ], + [ + 0.000000000, + 2 + ], + ... + [ + 0.0000910873, + 2 + ] + ], + "maximum":0.0000910873, + "mean":0.0000106114, + "median":0.00000000, + "minimum":0.000000000, + "population":44, + "standard_deviation":0.0000227364, + "sum":0.000466903, + "sum_squares":0.0000000, + "variance":0.000000001 + }, + "rhs_cover":{ + "counts":[ + [ + 0.06, + 2 + ], + [ + 0.08, + 2 + ], + ... + [ + 0.42667, + 2 + ], + [ + 0.46667, + 3 + ], + [ + 0.5, + 4 + ] + ], + "maximum":0.5, + "mean":0.29894, + "median":0.33213, + "minimum":0.06, + "population":44, + "standard_deviation":0.13386, + "sum":13.15331, + "sum_squares":4.70252, + "variance":0.01792 + }, + "support":{ + "counts":[ + [ + 0.06, + 4 + ], + [ + 0.06667, + 2 + ], + [ + 0.08, + 2 + ], + [ + 0.08667, + 4 + ], + [ + 0.10667, + 4 + ], + [ + 0.15333, + 2 + ], + [ + 0.18667, + 4 + ], + [ + 0.19333, + 2 + ], + [ + 0.20667, + 2 + ], + [ + 0.27333, + 2 + ], + [ + 0.28667, + 2 + ], + [ + 0.3, + 4 + ], + [ + 0.32, + 2 + ], + [ + 0.33333, + 6 + ], + [ + 0.37333, + 2 + ] + ], + "maximum":0.37333, + "mean":0.20152, + "median":0.19057, + "minimum":0.06, + "population":44, + "standard_deviation":0.10734, + "sum":8.86668, + "sum_squares":2.28221, + "variance":0.01152 + } + }, + "search_strategy":"leverage", + "significance_level":0.05 + }, + "category":0, + "clones":0, + "code":200, + "columns":5, + "created":"2015-11-05T08:06:08.184000", + "credits":0.017581939697265625, + "dataset":"dataset/562fae3f4e1727141d00004e", + "dataset_status":true, + "dataset_type":0, + "description":"", + "excluded_fields":[ ], + "fields_meta":{ + "count":5, + "limit":1000, + "offset":0, + "query_total":5, + "total":5 + }, + "input_fields":[ + "000000", + "000001", + "000002", + "000003", + "000004" + ], + "locale":"en_US", + "max_columns":5, + "max_rows":150, + "name":"iris' dataset's association", + "out_of_bag":false, + "price":0, + "private":true, + "project":null, + "range":[ + 1, + 150 + ], + "replacement":false, + "resource":"association/5621b70910cb86ae4c000000", + "rows":150, + "sample_rate":1, + "shared":false, + "size":4609, + "source":"source/562fae3a4e1727141d000048", + "source_status":true, + "status":{ + "code":5, + "elapsed":1072, + "message":"The association has been created", + "progress":1 + }, + "subscription":false, + "tags":[ ], + "updated":"2015-11-05T08:06:20.403000", + "white_box":false + } +Note that the output in the snippet above has been abbreviated. As you see, +the ``associations`` attribute stores items, rules and metrics extracted +from the datasets as well as the configuration parameters described in +the `developers section `_ . + + +Topic Models +~~~~~~~~~~~~ + +A topic model is an unsupervised machine learning method +for unveiling all the different topics +underlying a collection of documents. +BigML uses Latent Dirichlet Allocation (LDA), one of the most popular +probabilistic methods for topic modeling. +In BigML, each instance (i.e. each row in your dataset) will +be considered a document and the contents of all the text fields +given as inputs will be automatically concatenated and considered the +document bag of words. + +Topic model is based on the assumption that any document +exhibits a mixture of topics. Each topic is composed of a set of words +which are thematically related. The words from a given topic have different +probabilities for that topic. At the same time, each word can be attributable +to one or several topics. So for example the word "sea" may be found in +a topic related with sea transport but also in a topic related to holidays. +Topic model automatically discards stop words and high +frequency words. + +Topic model's main applications include browsing, organizing and understanding +large archives of documents. It can been applied for information retrieval, +collaborative filtering, assessing document similarity among others. +The topics found in the dataset can also be very useful new features +before applying other models like classification, clustering, or +anomaly detection. + +The JSON structure for a topic model is: + +.. code-block:: python + + >>> api.pprint(topic['object']) + { 'category': 0, + 'code': 200, + 'columns': 1, + 'configuration': None, + 'configuration_status': False, + 'created': '2016-11-23T23:47:54.703000', + 'credits': 0.0, + 'credits_per_prediction': 0.0, + 'dataset': 'dataset/58362aa0983efc45a0000005', + 'dataset_field_types': { 'categorical': 1, + 'datetime': 0, + 'effective_fields': 672, + 'items': 0, + 'numeric': 0, + 'preferred': 2, + 'text': 1, + 'total': 2}, + 'dataset_status': True, + 'dataset_type': 0, + 'description': '', + 'excluded_fields': [], + 'fields_meta': { 'count': 1, + 'limit': 1000, + 'offset': 0, + 'query_total': 1, + 'total': 1}, + 'input_fields': ['000001'], + 'locale': 'en_US', + 'max_columns': 2, + 'max_rows': 656, + 'name': u"spam dataset's Topic Model ", + 'number_of_batchtopicdistributions': 0, + 'number_of_public_topicdistributions': 0, + 'number_of_topicdistributions': 0, + 'ordering': 0, + 'out_of_bag': False, + 'price': 0.0, + 'private': True, + 'project': None, + 'range': [1, 656], + 'replacement': False, + 'resource': 'topicmodel/58362aaa983efc45a1000007', + 'rows': 656, + 'sample_rate': 1.0, + 'shared': False, + 'size': 54740, + 'source': 'source/58362a69983efc459f000001', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 3222, + 'message': 'The topic model has been created', + 'progress': 1.0}, + 'subscription': True, + 'tags': [], + 'topic_model': { 'alpha': 4.166666666666667, + 'beta': 0.1, + 'bigrams': False, + 'case_sensitive': False, + 'fields': { '000001': { 'column_number': 1, + 'datatype': 'string', + 'name': 'Message', + 'optype': 'text', + 'order': 0, + 'preferred': True, + 'summary': { 'average_length': 78.14787, + 'missing_count': 0, + 'tag_cloud': [ [ 'call', + 72], + [ 'ok', + 36], + [ 'gt', + 34], + ... + [ 'worse', + 2], + [ 'worth', + 2], + [ 'write', + 2], + [ 'yest', + 2], + [ 'yijue', + 2]], + 'term_forms': { }}, + 'term_analysis': { 'case_sensitive': False, + 'enabled': True, + 'language': 'en', + 'stem_words': False, + 'token_mode': 'all', + 'use_stopwords': False}}}, + 'hashed_seed': 62146850, + 'language': 'en', + 'number_of_topics': 12, + 'term_limit': 4096, + 'term_topic_assignments': [ [ 0, + 5, + 0, + 1, + 0, + 19, + 0, + 0, + 19, + 0, + 1, + 0], + [ 0, + 0, + 0, + 13, + 0, + 0, + 0, + 0, + 5, + 0, + 0, + 0], + ... + [ 0, + 7, + 27, + 0, + 112, + 0, + 0, + 0, + 0, + 0, + 14, + 2]], + 'termset': [ '000', + '03', + '04', + '06', + '08000839402', + '08712460324', + ... + + 'yes', + 'yest', + 'yesterday', + 'yijue', + 'yo', + 'yr', + 'yup', + '\xfc'], + 'top_n_terms': 10, + 'topicmodel_seed': '26c386d781963ca1ea5c90dab8a6b023b5e1d180', + 'topics': [ { 'id': '000000', + 'name': 'Topic 00', + 'probability': 0.09375, + 'top_terms': [ [ 'im', + 0.04849], + [ 'hi', + 0.04717], + [ 'love', + 0.04585], + [ 'please', + 0.02867], + [ 'tomorrow', + 0.02867], + [ 'cos', + 0.02823], + [ 'sent', + 0.02647], + [ 'da', + 0.02383], + [ 'meet', + 0.02207], + [ 'dinner', + 0.01898]]}, + { 'id': '000001', + 'name': 'Topic 01', + 'probability': 0.08215, + 'top_terms': [ [ 'lt', + 0.1015], + [ 'gt', + 0.1007], + [ 'wish', + 0.03958], + [ 'feel', + 0.0272], + [ 'shit', + 0.02361], + [ 'waiting', + 0.02281], + [ 'stuff', + 0.02001], + [ 'name', + 0.01921], + [ 'comp', + 0.01522], + [ 'forgot', + 0.01482]]}, + ... + { 'id': '00000b', + 'name': 'Topic 11', + 'probability': 0.0826, + 'top_terms': [ [ 'call', + 0.15084], + [ 'min', + 0.05003], + [ 'msg', + 0.03185], + [ 'home', + 0.02648], + [ 'mind', + 0.02152], + [ 'lt', + 0.01987], + [ 'bring', + 0.01946], + [ 'camera', + 0.01905], + [ 'set', + 0.01905], + [ 'contact', + 0.01781]]}], + 'use_stopwords': False}, + 'updated': '2016-11-23T23:48:03.336000', + 'white_box': False} + +Note that the output in the snippet above has been abbreviated. + + +The topic model returns a list of top terms for each topic found in the data. +Note that topics are not labeled, so you have to infer their meaning according +to the words they are composed of. + +Once you build the topic model you can calculate each topic probability +for a given document by using Topic Distribution. +This information can be useful to find documents similarities based +on their thematic. + +As you see, +the ``topic_model`` attribute stores the topics and termset and term to +topic assignment, +as well as the configuration parameters described in +the `developers section `_ . + +PCAs +~~~~ + +A PCA (Principal Component Analysis) resource fits a number of orthogonal +projections (components) to maximally capture the variance in a dataset. This +is a dimensional reduction technique, as it can be used to reduce +the number of inputs for the modeling step. PCA models belong to the +unsupervised class of models (there is no objective field). + +The JSON structure for an PCA is: + +.. code-block:: python + + + {'code': 200, + 'error': None, + 'location': 'https://strato.dev.bigml.io/andromeda/pca/5c002572983efc0ac5000003', + 'object': {'category': 0, + 'code': 200, + 'columns': 2, + 'configuration': None, + 'configuration_status': False, + 'created': '2018-11-29T17:44:18.359000', + 'creator': 'merce', + 'credits': 0.0, + 'credits_per_prediction': 0.0, + 'dataset': 'dataset/5c00256a983efc0acf000000', + 'dataset_field_types': {'categorical': 1, + 'datetime': 0, + 'items': 0, + 'numeric': 0, + 'preferred': 2, + 'text': 1, + 'total': 2}, + 'dataset_status': True, + 'description': '', + 'excluded_fields': [], + 'fields_meta': {'count': 2, + 'limit': 1000, + 'offset': 0, + 'query_total': 2, + 'total': 2}, + 'input_fields': ['000000', '000001'], + 'locale': 'en-us', + 'max_columns': 2, + 'max_rows': 7, + 'name': 'spam 4 words', + 'name_options': 'standardized', + 'number_of_batchprojections': 2, + 'number_of_projections': 0, + 'number_of_public_projections': 0, + 'ordering': 0, + 'out_of_bag': False, + 'pca': {'components': [[-0.64757, + 0.83392, + 0.1158, + 0.83481, + ... + -0.09426, + -0.08544, + -0.03457]], + 'cumulative_variance': [0.43667, + 0.74066, + 0.87902, + 0.98488, + 0.99561, + 1], + 'eigenvectors': [[-0.3894, + 0.50146, + 0.06963, + ... + -0.56542, + -0.5125, + -0.20734]], + 'fields': {'000000': {'column_number': 0, + 'datatype': 'string', + 'name': 'Type', + ... + 'token_mode': 'all', + 'use_stopwords': False}}}, + 'pca_seed': '2c249dda00fbf54ab4cdd850532a584f286af5b6', + 'standardized': True, + 'text_stats': {'000001': {'means': [0.71429, + 0.71429, + 0.42857, + 0.28571], + 'standard_deviations': [0.75593, + 0.75593, + 0.53452, + 0.48795]}}, + 'variance': [0.43667, + 0.30399, + 0.13837, + 0.10585, + 0.01073, + 0.00439]}, + 'price': 0.0, + 'private': True, + 'project': None, + 'range': None, + 'replacement': False, + 'resource': 'pca/5c002572983efc0ac5000003', + 'rows': 7, + 'sample_rate': 1.0, + 'shared': False, + 'size': 127, + 'source': 'source/5c00255e983efc0acd00001b', + 'source_status': True, + 'status': {'code': 5, + 'elapsed': 1571, + 'message': 'The pca has been created', + 'progress': 1}, + 'subscription': True, + 'tags': [], + 'type': 0, + 'updated': '2018-11-29T18:13:19.714000', + 'white_box': False}, + 'resource': 'pca/5c002572983efc0ac5000003'} + +You can check the PCA properties at the `API documentation +`_. + +Predictions and Evaluations +--------------------------- + +Prediction +~~~~~~~~~~ + +The output of a supervised learning model for a particular input is its +prediction. In BigML, a model is ready to produce predictions immediately, so +there's no need of a special deployment in order to start using it. Here's how +you create a prediction for a model and its response: + +.. code-block:: python + + >>> input_data = {"petal length": 4} + >>> prediction = api.create_prediction(model_id, input_data) + >>> api.pprint(prediction["object"]) + { 'boosted_ensemble': False, + 'category': 12, + 'code': 201, + 'confidence': 0.40383, + 'confidence_bounds': {}, + 'confidences': [ ['Iris-setosa', 0], + ['Iris-versicolor', 0.40383], + ['Iris-virginica', 0.40383]], + 'configuration': None, + 'configuration_status': False, + 'created': '2024-09-09T15:48:58.918313', + 'creator': 'mmartin', + 'dataset': 'dataset/6668805ad7413f90007ab83e', + 'dataset_status': True, + 'description': 'Created using BigMLer', + 'expanded_input_data': {'000002': 4.0}, + 'explanation': None, + 'fields': { '000002': { 'column_number': 2, + 'datatype': 'double', + 'name': 'petal length', + 'optype': 'numeric', + 'order': 2, + 'preferred': True}, + '000003': { 'column_number': 3, + 'datatype': 'double', + 'name': 'petal width', + 'optype': 'numeric', + 'order': 3, + 'preferred': True}, + '000004': { 'column_number': 4, + 'datatype': 'string', + 'name': 'species', + 'optype': 'categorical', + 'order': 4, + 'preferred': True, + 'term_analysis': {'enabled': True}}}, + 'importance': {'000002': 1}, + 'input_data': {'petal length': 4}, + 'locale': 'en_US', + 'missing_strategy': 0, + 'model': 'model/6668805f002883f09483369d', + 'model_status': True, + 'model_type': 0, + 'name': 'iris.csv', + 'name_options': 'operating kind=probability, 1 inputs', + 'number_of_models': 1, + 'objective_field': '000004', + 'objective_field_name': 'species', + 'objective_field_type': 'categorical', + 'objective_fields': ['000004'], + 'operating_kind': 'probability', + 'output': 'Iris-versicolor', + 'prediction': {'000004': 'Iris-versicolor'}, + 'prediction_path': { 'confidence': 0.40383, + 'next_predicates': [ { 'count': 46, + 'field': '000003', + 'operator': '>', + 'value': 1.75}, + { 'count': 54, + 'field': '000003', + 'operator': '<=', + 'value': 1.75}], + 'node_id': 1, + 'objective_summary': { 'categories': [ [ 'Iris-versicolor', + 50], + [ 'Iris-virginica', + 50]]}, + 'path': [ { 'field': '000002', + 'operator': '>', + 'value': 2.45}]}, + 'private': True, + 'probabilities': [ ['Iris-setosa', 0.0033], + ['Iris-versicolor', 0.49835], + ['Iris-virginica', 0.49835]], + 'probability': 0.49835, + 'project': None, + 'query_string': '', + 'resource': 'prediction/66df18eac6f7849b7b3f10ec', + 'shared': False, + 'source': 'source/66688055450bc914a2c147e0', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 227, + 'message': 'The prediction has been created', + 'progress': 1}, + 'subscription': True, + 'tags': ['BigMLer', 'BigMLer_TueJun1124_094957'], + 'task': 'classification', + 'type': 0, + 'updated': '2024-09-09T15:48:58.918335'} + +As you see, +the ``output`` attribute stores the prediction value and the ``confidence`` +and ``probability`` attributes show the respective values. The rest of the +dictionary contains the configuration parameters described in +the `developers section `_. + +Evaluation +~~~~~~~~~~ + +The predictive performance of a model can be measured using many different +measures. In BigML these measures can be obtained by creating evaluations. To +create an evaluation you need the id of the model you are evaluating and the id +of the dataset that contains the data to be tested with. The result is shown +as: + +.. code-block:: python + + >>> evaluation = api.get_evaluation(evaluation) + >>> api.pprint(evaluation['object']['result']) + { 'class_names': ['0', '1'], + 'mode': { 'accuracy': 0.9802, + 'average_f_measure': 0.495, + 'average_phi': 0, + 'average_precision': 0.5, + 'average_recall': 0.4901, + 'confusion_matrix': [[99, 0], [2, 0]], + 'per_class_statistics': [ { 'accuracy': 0.9801980198019802, + 'class_name': '0', + 'f_measure': 0.99, + 'phi_coefficient': 0, + 'precision': 1.0, + 'present_in_test_data': True, + 'recall': 0.9801980198019802}, + { 'accuracy': 0.9801980198019802, + 'class_name': '1', + 'f_measure': 0, + 'phi_coefficient': 0, + 'precision': 0.0, + 'present_in_test_data': True, + 'recall': 0}]}, + 'model': { 'accuracy': 0.9901, + 'average_f_measure': 0.89746, + 'average_phi': 0.81236, + 'average_precision': 0.99495, + 'average_recall': 0.83333, + 'confusion_matrix': [[98, 1], [0, 2]], + 'per_class_statistics': [ { 'accuracy': 0.9900990099009901, + 'class_name': '0', + 'f_measure': 0.9949238578680203, + 'phi_coefficient': 0.8123623944599232, + 'precision': 0.98989898989899, + 'present_in_test_data': True, + 'recall': 1.0}, + { 'accuracy': 0.9900990099009901, + 'class_name': '1', + 'f_measure': 0.8, + 'phi_coefficient': 0.8123623944599232, + 'precision': 1.0, + 'present_in_test_data': True, + 'recall': 0.6666666666666666}]}, + 'random': { 'accuracy': 0.50495, + 'average_f_measure': 0.36812, + 'average_phi': 0.13797, + 'average_precision': 0.74747, + 'average_recall': 0.51923, + 'confusion_matrix': [[49, 50], [0, 2]], + 'per_class_statistics': [ { 'accuracy': 0.504950495049505, + 'class_name': '0', + 'f_measure': 0.6621621621621622, + 'phi_coefficient': 0.1379728923974526, + 'precision': 0.494949494949495, + 'present_in_test_data': True, + 'recall': 1.0}, + { 'accuracy': 0.504950495049505, + 'class_name': '1', + 'f_measure': 0.07407407407407407, + 'phi_coefficient': 0.1379728923974526, + 'precision': 1.0, + 'present_in_test_data': True, + 'recall': 0.038461538461538464}]}} + +where two levels of detail are easily identified. For classifications, +the first level shows these keys: + +- **class_names**: A list with the names of all the categories for the objective field (i.e., all the classes) +- **mode**: A detailed result object. Measures of the performance of the classifier that predicts the mode class for all the instances in the dataset +- **model**: A detailed result object. +- **random**: A detailed result object. Measures the performance of the classifier that predicts a random class for all the instances in the dataset. + +and the detailed result objects include ``accuracy``, ``average_f_measure``, ``average_phi``, +``average_precision``, ``average_recall``, ``confusion_matrix`` +and ``per_class_statistics``. + +For regressions first level will contain these keys: + +- **mean**: A detailed result object. Measures the performance of the model that predicts the mean for all the instances in the dataset. +- **model**: A detailed result object. +- **random**: A detailed result object. Measures the performance of the model that predicts a random class for all the instances in the dataset. + +where the detailed result objects include ``mean_absolute_error``, +``mean_squared_error`` and ``r_squared`` (refer to +`developers documentation `_ for +more info on the meaning of these measures. + +You can check the evaluation properties at the `API documentation +`_. + +Centroid +~~~~~~~~ + +A ``centroid`` is the value predicted by a cluster model. Here's how to create +a centroid: + + +.. code-block:: python + + >>> input_data = {"petal length": 4} + >>> centroid = api.create_centroid(cluster_id, input_data) + +Mind that you will need to provide values for all the input fields in order to +create a centroid. To know more details about the centroid properties and +parameters you can check the corresponding +`API documentation `_. + +Anomaly Score +~~~~~~~~~~~~~ + +An ``anomaly score`` is the value predicted by an anomaly detector. +Here's how to create an anomaly score: + + +.. code-block:: python + + >>> input_data = {"petal length": 4} + >>> anomaly_score = api.create_anomaly_score(anomaly_id, input_data) + +To know more details about the anomaly score properties and +parameters you can check the corresponding +`API documentation `_. + +Association Set +~~~~~~~~~~~~~~~ + +An ``association set`` is the value predicted by an association discovery model. +Here's how to create an association set: + + +.. code-block:: python + + >>> input_data = {"petal length": 4} + >>> association_set = api.create_association_set(association_id, input_data) + +To know more details about the association set properties and +parameters you can check the corresponding +`API documentation `_. + +Topic Distribution +~~~~~~~~~~~~~~~~~~ + +A ``topic distribution`` is the value predicted by a topic model. +Here's how to create a topic distribution: + + +.. code-block:: python + + >>> input_data = {"text": "Now is the winter of our discontent"} + >>> topic_model = api.create_topic_model(topic_model_id, input_data) + +To know more details about the topic distribution properties and +parameters you can check the corresponding +`API documentation `_. + +Batch Prediction +~~~~~~~~~~~~~~~~ + +In BigML, you can create predictions for all the inputs provided as rows of a +dataset, i.e. a batch prediction. +The result of a batch prediction can either be downloaded as a CSV or +become a new dataset. As with predictions, a model is ready to produce batch +predictions immediately, so there's no need of a special deployment in order +to start using it. Here's how you create a batch prediction for a model +and its response: + +.. code-block:: python + + >>> batch_prediction = api.create_batch_prediction(model_id, test_dataset) + +To know more details about the batch prediction properties and +parameters you can check the corresponding +`API documentation `_. + +Batch Centroid +~~~~~~~~~~~~~~ + +In BigML, you can create centroids for all the inputs provided as rows of a +dataset, i.e. a batch centroid. +The result of a batch centroid can either be downloaded as a CSV or +become a new dataset. As with predictions, a cluster is ready to produce batch +centroids immediately, so there's no need of a special deployment in order +to start using it. Here's how you create a batch centroid for a cluster +and its response: + +.. code-block:: python + + >>> batch_centroid = api.create_batch_centroid(cluster_id, test_dataset) + +To know more details about the batch centroid properties and +parameters you can check the corresponding +`API documentation `_. + +Batch Anomaly Score +~~~~~~~~~~~~~~~~~~~ + +In BigML, you can create anomaly scores for all the inputs provided as rows of a +dataset, i.e. a batch anomaly score. +The result of a batch anomaly score can either be downloaded as a CSV or +become a new dataset. As with predictions, an anomaly detector +is ready to produce batch anomaly scores immediately, +so there's no need of a special deployment in order +to start using it. Here's how you create a batch anomaly score for an anomaly +detector and its response: + +.. code-block:: python + + >>> batch_anomaly_score = api.create_batch_anomaly_score( + anomaly_id, test_dataset) + +To know more details about the batch anomaly score properties and +parameters you can check the corresponding +`API documentation `_. + +Batch Topic Distribution +~~~~~~~~~~~~~~~~~~~~~~~~ + +In BigML, you can create topic distributions for all the inputs +provided as rows of a dataset, i.e. a batch topic distribution. +The result of a batch topic distribution can either be downloaded as a CSV or +become a new dataset. As with predictions, a topic model is ready to produce +batch topic distributions immediately, so there's no need of a +special deployment in order to start using it. +Here's how you create a batch topic distribution for a topic model +and its response: + +.. code-block:: python + + >>> batch_topic_distribution = api.create_batch_topic_distribution( + topic_id, test_dataset) + +To know more details about the batch topic distribution properties and +parameters you can check the corresponding +`API documentation `_. diff --git a/docs/quick_start.rst b/docs/quick_start.rst new file mode 100644 index 00000000..2ff7b0ac --- /dev/null +++ b/docs/quick_start.rst @@ -0,0 +1,284 @@ +Quick Start +=========== + +Imagine that you want to use `this csv +file `_ containing the `Iris +flower dataset `_ to +predict the species of a flower whose ``petal length`` is ``2.45`` and +whose ``petal width`` is ``1.75``. A preview of the dataset is shown +below. It has 4 numeric fields: ``sepal length``, ``sepal width``, +``petal length``, ``petal width`` and a categorical field: ``species``. +By default, BigML considers the last field in the dataset as the +objective field (i.e., the field that you want to generate predictions +for). + +:: + + sepal length,sepal width,petal length,petal width,species + 5.1,3.5,1.4,0.2,Iris-setosa + 4.9,3.0,1.4,0.2,Iris-setosa + 4.7,3.2,1.3,0.2,Iris-setosa + ... + 5.8,2.7,3.9,1.2,Iris-versicolor + 6.0,2.7,5.1,1.6,Iris-versicolor + 5.4,3.0,4.5,1.5,Iris-versicolor + ... + 6.8,3.0,5.5,2.1,Iris-virginica + 5.7,2.5,5.0,2.0,Iris-virginica + 5.8,2.8,5.1,2.4,Iris-virginica + +You can easily generate a prediction following these steps: + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + source = api.create_source('./data/iris.csv') + dataset = api.create_dataset(source) + model = api.create_model(dataset) + prediction = api.create_prediction(model, \ + {"petal width": 1.75, "petal length": 2.45}) + +You can then print the prediction using the ``pprint`` method: + +.. code-block:: python + + >>> api.pprint(prediction) + species for {"petal width": 1.75, "petal length": 2.45} is Iris-setosa + +Certainly, any of the resources created in BigML can be configured using +several arguments described in the `API documentation `_. +Any of these configuration arguments can be added to the ``create`` method +as a dictionary in the last optional argument of the calls: + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + source_args = {"name": "my source", + "source_parser": {"missing_tokens": ["NULL"]}} + source = api.create_source('./data/iris.csv', source_args) + dataset_args = {"name": "my dataset"} + dataset = api.create_dataset(source, dataset_args) + model_args = {"objective_field": "species"} + model = api.create_model(dataset, model_args) + prediction_args = {"name": "my prediction"} + prediction = api.create_prediction(model, \ + {"petal width": 1.75, "petal length": 2.45}, + prediction_args) + +The ``iris`` dataset has a small number of instances, and usually will be +instantly created, so the ``api.create_`` calls will probably return the +finished resources outright. As BigML's API is asynchronous, +in general you will need to ensure +that objects are finished before using them by using ``api.ok``. + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + source = api.create_source('./data/iris.csv') + api.ok(source) + dataset = api.create_dataset(source) + api.ok(dataset) + model = api.create_model(dataset) + api.ok(model) + prediction = api.create_prediction(model, \ + {"petal width": 1.75, "petal length": 2.45}) + +Note that the prediction +call is not followed by the ``api.ok`` method. Predictions are so quick to be +generated that, unlike the +rest of resouces, will be generated synchronously as a finished object. + +Alternatively to the ``api.ok`` method, BigML offers +`webhooks `_ that can be set +when creating a resource and will call the url of you choice when the +finished or failed event is reached. A secret can be included in the call to +verify the webhook call authenticity, and a + +.. code-block:: python + + bigml.webhooks.check_signature(request, signature) + +function is offered to that end. As an example, this snippet creates a source +and sets a webhook to call ``https://my_webhook.com/endpoint`` when finished: + +.. code-block:: python + + from bigml.api import BigML + api = BigML() + # using a webhook with a secret + api.create_source("https://static.bigml.com/csv/iris.csv", + {"webhook": {"url": "https://my_webhook.com/endpoint", + "secret": "mysecret"}}) + + +The ``iris`` prediction example assumed that your objective +field (the one you want to predict) is the last field in the dataset. +If that's not he case, you can explicitly +set the name of this field in the creation call using the ``objective_field`` +argument: + + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + source = api.create_source('./data/iris.csv') + api.ok(source) + dataset = api.create_dataset(source) + api.ok(dataset) + model = api.create_model(dataset, {"objective_field": "species"}) + api.ok(model) + prediction = api.create_prediction(model, \ + {'sepal length': 5, 'sepal width': 2.5}) + + +You can also generate an evaluation for the model by using: + +.. code-block:: python + + test_source = api.create_source('./data/test_iris.csv') + api.ok(test_source) + test_dataset = api.create_dataset(test_source) + api.ok(test_dataset) + evaluation = api.create_evaluation(model, test_dataset) + api.ok(evaluation) + + +The API object also offers the ``create``, ``get``, ``update`` and ``delete`` +generic methods to manage all type of resources. The type of resource to be +created is passed as first argument to the ``create`` method; + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + source = api.create('source', './data/iris.csv') + source = api.update(source, {"name": "my new source name"}) + +Note that these methods don't need the ``api.ok`` method to be called +to wait for the resource to be finished. +The method waits internally for it by default. +This can be avoided by using ``finished=False`` as one of the arguments. + + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + source = api.create('source', './data/iris.csv') + dataset = api.create('dataset', source, finished=False) # unfinished + api.ok(dataset) # waiting explicitly for the dataset to finish + dataset = api.update(dataset, {"name": "my_new_dataset_name"}, + finised=False) + api.ok(dataset) + +As an example for the ``delete`` and ``get`` methods, we could +create a batch prediction, put the predictions in a +dataset object and delete the ``batch_prediction``. + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + batch_prediction = api.create('batchprediction', + 'model/5f3c3d2b5299637102000882', + 'dataset/5f29a563529963736c0116e9', + args={"output_dataset": True}) + batch_prediction_dataset = api.get(batch_prediction["object"][ \ + "output_dataset_resource"]) + api.delete(batch_prediction) + +If you set the ``storage`` argument in the ``api`` instantiation: + +.. code-block:: python + + api = BigML(storage='./storage') + +all the generated, updated or retrieved resources will be automatically +saved to the chosen directory. Once they are stored locally, the +``retrieve_resource`` method will look for the resource information +first in the local storage before trying to download the information from +the API. + +.. code-block:: python + + dataset = api.retrieve_resource("dataset/5e8e5672c7736e3d830037b5", + query_string="limit=-1") + + +Alternatively, you can use the ``export`` method to explicitly +download the JSON information +that describes any of your resources in BigML to a particular file: + +.. code-block:: python + + api.export('model/5acea49a08b07e14b9001068', + filename="my_dir/my_model.json") + +This example downloads the JSON for the model and stores it in +the ``my_dir/my_model.json`` file. + +In the case of models that can be represented in a `PMML` syntax, the +export method can be used to produce the corresponding `PMML` file. + +.. code-block:: python + + api.export('model/5acea49a08b07e14b9001068', + filename="my_dir/my_model.pmml", + pmml=True) + +You can also retrieve the last resource with some previously given tag: + +.. code-block:: python + + api.export_last("foo", + resource_type="ensemble", + filename="my_dir/my_ensemble.json") + +which selects the last ensemble that has a ``foo`` tag. This mechanism can +be specially useful when retrieving retrained models that have been created +with a shared unique keyword as tag. + +For a descriptive overview of the steps that you will usually need to +follow to model +your data and obtain predictions, please see the `basic Workflow sketch +`_ +document. You can also check other simple examples in the following documents: + +- `model 101 <101_model.html>`_ +- `logistic regression 101 <101_logistic_regression.html>`_ +- `linear regression 101 <101_linear_regression.html>`_ +- `ensemble 101 <101_ensemble.html>`_ +- `cluster 101 <101_cluster>`_ +- `anomaly detector 101 <101_anomaly.html>`_ +- `association 101 <101_association.html>`_ +- `topic model 101 <101_topic_model.html>`_ +- `deepnet 101 <101_deepnet.html>`_ +- `time series 101 <101_ts.html>`_ +- `fusion 101 <101_fusion.html>`_ +- `optiml 101 <101_optiml.html>`_ +- `PCA 101 <101_pca.html>`_ +- `scripting 101 <101_scripting.html>`_ + +And for examples on Image Processing: + +- `Images Classification 101 <101_images_classification.html>`_ +- `Object Detection 101<101_object_detection.html>`_ +- `Images Feature Extraction 101 <101_images_feature_extraction.html>`_ diff --git a/docs/reading_resources.rst b/docs/reading_resources.rst new file mode 100644 index 00000000..541125e4 --- /dev/null +++ b/docs/reading_resources.rst @@ -0,0 +1,240 @@ +.. toctree:: + :hidden: + +Reading Resources +----------------- + +When retrieved individually, resources are returned as a dictionary +identical to the one you get when you create a new resource. However, +the status code will be ``bigml.api.HTTP_OK`` if the resource can be +retrieved without problems, or one of the HTTP standard error codes +otherwise. To know more about the errors that can happen when retrieving +a resource and what to expect if a resource is not correctly created, please +refer to the +`Waiting for Resources `_ +section. + +To retrieve an existing resource, you just need to use the corresponding +``get_[resouce type]`` method. There's a query string argument +that can be used to filter out or limit the attributes obtained: + +.. code-block:: python + + # gets the source information with no filters + api.get_source("source/5143a51a37203f2cf7000956") + # gets the dataset information with only 10 of the fields + api.get_dataset("dataset/5143a51a37203f2cf7000936", + query_string="limit=10") + # gets the model information excluding the model predicates tree + api.get_model("model/5143a51a37203f2cf7000956", + query_string="exclude=root") + + +Public and shared resources +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The previous examples use resources that were created by the same user +that asks for their retrieval or modification. If a user wants to share one +of her resources, she can make them public or share them. Declaring a resource +public means that anyone can see the resource. This can be applied to datasets +and models. To turn a dataset public, just update its ``private`` property: + +.. code-block:: python + + api.update_dataset('dataset/5143a51a37203f2cf7000972', {'private': false}) + +and any user will be able to download it using its id prepended by ``public``: + +.. code-block:: python + + api.get_dataset('public/dataset/5143a51a37203f2cf7000972') + +In the models' case, you can also choose if you want the model to be fully +downloadable or just accesible to make predictions. This is controlled with the +``white_box`` property. If you want to publish your model completely, just +use: + +.. code-block:: python + + api.update_model('model/5143a51a37203f2cf7000956', {'private': false, + 'white_box': true}) + +Both public models and datasets, will be openly accessible for anyone, +registered or not, from the web +gallery. + +Still, you may want to share your models with other users, but without making +them public for everyone. This can be achieved by setting the ``shared`` +property: + +.. code-block:: python + + api.update_model('model/5143a51a37203f2cf7000956', {'shared': true}) + +Shared models can be accessed using their share hash (propery ``shared_hash`` +in the original model): + +.. code-block:: python + + api.get_model('shared/model/d53iw39euTdjsgesj7382ufhwnD') + +or by using their original id with the creator user as username and a specific +sharing api_key you will find as property ``sharing_api_key`` in the updated +model: + +.. code-block:: python + + api.get_model('model/5143a51a37203f2cf7000956', shared_username='creator', + shared_api_key='c972018dc5f2789e65c74ba3170fda31d02e00c3') + +Only users with the share link or credentials information will be able to +access your shared models. + +Listing Resources +----------------- + +You can list resources with the appropriate api method: + +.. code-block:: python + + api.list_sources() + api.list_datasets() + api.list_models() + api.list_predictions() + api.list_evaluations() + api.list_ensembles() + api.list_batch_predictions() + api.list_clusters() + api.list_centroids() + api.list_batch_centroids() + api.list_anomalies() + api.list_anomaly_scores() + api.list_batch_anomaly_scores() + api.list_projects() + api.list_samples() + api.list_correlations() + api.list_statistical_tests() + api.list_logistic_regressions() + api.list_linear_regressions() + api.list_associations() + api.list_association_sets() + api.list_topic_models() + api.list_topic_distributions() + api.list_batch_topic_distributions() + api.list_time_series() + api.list_deepnets() + api.list_fusions() + api.list_pcas() + api.list_projections() + api.list_batch_projections() + api.list_forecasts() + api.list_scripts() + api.list_libraries() + api.list_executions() + api.list_external_connectors() + + +you will receive a dictionary with the following keys: + +- **code**: If the request is successful you will get a + ``bigml.api.HTTP_OK`` (200) status code. Otherwise, it will be one of + the standard HTTP error codes. See `BigML documentation on status + codes `_ for more info. +- **meta**: A dictionary including the following keys that can help you + paginate listings: + + - **previous**: Path to get the previous page or ``None`` if there + is no previous page. + - **next**: Path to get the next page or ``None`` if there is no + next page. + - **offset**: How far off from the first entry in the resources is + the first one listed in the resources key. + - **limit**: Maximum number of resources that you will get listed in + the resources key. + - **total\_count**: The total number of resources in BigML. + +- **objects**: A list of resources as returned by BigML. +- **error**: If an error occurs and the resource cannot be created, it + will contain an additional code and a description of the error. In + this case, **meta**, and **resources** will be ``None``. + +Filtering Resources +~~~~~~~~~~~~~~~~~~~ + +You can filter resources in listings using the syntax and fields labeled +as *filterable* in the `BigML +documentation `_ for each resource. + +A few examples: + +Ids of the first 5 sources created before April 1st, 2012 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + [source['resource'] for source in + api.list_sources("limit=5;created__lt=2012-04-1")['objects']] + +Name of the first 10 datasets bigger than 1MB +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + [dataset['name'] for dataset in + api.list_datasets("limit=10;size__gt=1048576")['objects']] + +Name of models with more than 5 fields (columns) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + [model['name'] for model in api.list_models("columns__gt=5")['objects']] + +Ids of predictions whose model has not been deleted +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + [prediction['resource'] for prediction in + api.list_predictions("model_status=true")['objects']] + +Ordering Resources +~~~~~~~~~~~~~~~~~~ + +You can order resources in listings using the syntax and fields labeled +as *sortable* in the `BigML +documentation `_ for each resource. + +A few examples: + +Name of sources ordered by size +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + [source['name'] for source in api.list_sources("order_by=size")['objects']] + +Number of instances in datasets created before April 1st, 2012 ordered by size +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + [dataset['rows'] for dataset in + api.list_datasets( + "created__lt=2012-04-01T00:00:00.00000;order_by=size")['objects']] + +Model ids ordered by number of predictions (in descending order). +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + [model['resource'] for model in + api.list_models("order_by=-number_of_predictions")['objects']] + +Name of predictions ordered by name. +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + [prediction['name'] for prediction in + api.list_predictions("order_by=name")['objects']] diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..6daf89af --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,2 @@ +sphinx +sphinx_rtd_theme==2.0.0 diff --git a/docs/updating_resources.rst b/docs/updating_resources.rst new file mode 100644 index 00000000..c4cb88a5 --- /dev/null +++ b/docs/updating_resources.rst @@ -0,0 +1,99 @@ +.. toctree:: + :hidden: + +Updating Resources +================== + +When you update a resource, it is returned in a dictionary exactly like +the one you get when you create a new one. However the status code will +be ``bigml.api.HTTP_ACCEPTED`` if the resource can be updated without +problems or one of the HTTP standard error codes otherwise. + +.. code-block:: python + + api.update_source(source, {"name": "new name"}) + api.update_dataset(dataset, {"name": "new name"}) + api.update_model(model, {"name": "new name"}) + api.update_prediction(prediction, {"name": "new name"}) + api.update_evaluation(evaluation, {"name": "new name"}) + api.update_ensemble(ensemble, {"name": "new name"}) + api.update_batch_prediction(batch_prediction, {"name": "new name"}) + api.update_cluster(cluster, {"name": "new name"}) + api.update_centroid(centroid, {"name": "new name"}) + api.update_batch_centroid(batch_centroid, {"name": "new name"}) + api.update_anomaly(anomaly, {"name": "new name"}) + api.update_anomaly_score(anomaly_score, {"name": "new name"}) + api.update_batch_anomaly_score(batch_anomaly_score, {"name": "new name"}) + api.update_project(project, {"name": "new name"}) + api.update_correlation(correlation, {"name": "new name"}) + api.update_statistical_test(statistical_test, {"name": "new name"}) + api.update_logistic_regression(logistic_regression, {"name": "new name"}) + api.update_linear_regression(linear_regression, {"name": "new name"}) + api.update_association(association, {"name": "new name"}) + api.update_association_set(association_set, {"name": "new name"}) + api.update_topic_model(topic_model, {"name": "new name"}) + api.update_topic_distribution(topic_distribution, {"name": "new name"}) + api.update_batch_topic_distribution(\ + batch_topic_distribution, {"name": "new name"}) + api.update_time_series(\ + time_series, {"name": "new name"}) + api.update_forecast(\ + forecast, {"name": "new name"}) + api.update_deepnet(deepnet, {"name": "new name"}) + api.update_fusion(fusion, {"name": "new name"}) + api.update_pca(pca, {"name": "new name"}) + api.update_projection(projection, {"name": "new name"}) + api.update_batch_projection(batch_projection, {"name": "new name"}) + api.update_script(script, {"name": "new name"}) + api.update_library(library, {"name": "new name"}) + api.update_execution(execution, {"name": "new name"}) + api.update_external_connector(external_connector, {"name": "new name"}) + +Updates can change resource general properties, such as the ``name`` or +``description`` attributes of a dataset, or specific properties, like +the ``missing tokens`` (strings considered as missing values). As an example, +let's say that your source has a certain field whose contents are +numeric integers. BigML will assign a numeric type to the field, but you +might want it to be used as a categorical field. You could change +its type to ``categorical`` by calling: + +.. code-block:: python + + api.update_source(source, \ + {"fields": {"000001": {"optype": "categorical"}}}) + +where ``000001`` is the field id that corresponds to the updated field. + +Another usually needed update is changing a fields' ``non-preferred`` +attribute, +so that it can be used in the modeling process: + + +.. code-block:: python + + api.update_dataset(dataset, {"fields": {"000001": {"preferred": True}}}) + +where you would be setting as ``preferred`` the field whose id is ``000001``. + +You may also want to change the name of one of the clusters found in your +clustering: + + +.. code-block:: python + + api.update_cluster(cluster, \ + {"clusters": {"000001": {"name": "my cluster"}}}) + +which is changing the name of the cluster whose centroid id is ``000001`` to +``my_cluster``. Or, similarly, changing the name of one detected topic: + + +.. code-block:: python + + api.update_topic_model(topic_model, \ + {"topics": {"000001": {"name": "my topic"}}}) + + +You will find detailed information about +the updatable attributes of each resource in +`BigML developer's documentation `_. diff --git a/docs/whizzml_resources.rst b/docs/whizzml_resources.rst new file mode 100644 index 00000000..440f6de3 --- /dev/null +++ b/docs/whizzml_resources.rst @@ -0,0 +1,267 @@ +.. toctree:: + :hidden: + +WhizzML Resources +================= + +WhizzML is a Domain Specific Language that allows the definition and +execution of ML-centric workflows. Its objective is allowing BigML +users to define their own composite tasks, using as building blocks +the basic resources provided by BigML itself. Using Whizzml they can be +glued together using a higher order, functional, Turing-complete language. +The WhizzML code can be stored and executed in BigML using three kinds of +resources: ``Scripts``, ``Libraries`` and ``Executions``. + +WhizzML ``Scripts`` can be executed in BigML's servers, that is, +in a controlled, fully-scalable environment which takes care of their +parallelization and fail-safe operation. Each execution uses an ``Execution`` +resource to store the arguments and results of the process. WhizzML +``Libraries`` store generic code to be shared of reused in other WhizzML +``Scripts``. + +Scripts +------- + +In BigML a ``Script`` resource stores WhizzML source code, and the results of +its compilation. Once a WhizzML script is created, it's automatically compiled; +if compilation succeeds, the script can be run, that is, +used as the input for a WhizzML execution resource. + +An example of a ``script`` that would create a ``source`` in BigML using the +contents of a remote file is: + +.. code-block:: python + + >>> from bigml.api import BigML + >>> api = BigML() + # creating a script directly from the source code. This script creates + # a source uploading data from an s3 repo. You could also create a + # a script by using as first argument the path to a .whizzml file which + # contains your source code. + >>> script = api.create_script( \ + "(create-source {\"remote\" \"s3://bigml-public/csv/iris.csv\"})") + >>> api.ok(script) # waiting for the script compilation to finish + >>> api.pprint(script['object']) + { u'approval_status': 0, + u'category': 0, + u'code': 200, + u'created': u'2016-05-18T16:54:05.666000', + u'description': u'', + u'imports': [], + u'inputs': None, + u'line_count': 1, + u'locale': u'en-US', + u'name': u'Script', + u'number_of_executions': 0, + u'outputs': None, + u'price': 0.0, + u'private': True, + u'project': None, + u'provider': None, + u'resource': u'script/573c9e2db85eee23cd000489', + u'shared': False, + u'size': 59, + u'source_code': u'(create-source {"remote" "s3://bigml-public/csv/iris.csv"})', + u'status': { u'code': 5, + u'elapsed': 4, + u'message': u'The script has been created', + u'progress': 1.0}, + u'subscription': True, + u'tags': [], + u'updated': u'2016-05-18T16:54:05.850000', + u'white_box': False} + +A ``script`` allows to define some variables as ``inputs``. In the previous +example, no input has been defined, but we could modify our code to +allow the user to set the remote file name as input: + +.. code-block:: python + + >>> from bigml.api import BigML + >>> api = BigML() + >>> script = api.create_script( \ + "(create-source {\"remote\" my_remote_data})", + {"inputs": [{"name": "my_remote_data", + "type": "string", + "default": "s3://bigml-public/csv/iris.csv", + "description": "Location of the remote data"}]}) + +The ``script`` can also use a ``library`` resource (please, see the +``Libraries`` section below for more details) by including its id in the +``imports`` attribute. Other attributes can be checked at the +`API Developers documentation for Scripts `_. + +Executions +---------- + +To execute in BigML a compiled WhizzML ``script`` you need to create an +``execution`` resource. It's also possible to execute a pipeline of +many compiled scripts in one request. + +Each ``execution`` is run under its associated user credentials and its +particular environment constrains. As ``scripts`` can be shared, +different users can execute the same ``script`` using different inputs. +Each particular execution will generate an ``execution`` resource in BigML. + +As an example of an ``execution`` resource, let's create one for the first +script in the previous section. In this case, no inputs are required because +the ``script`` expects none: + +.. code-block:: python + + >>> from bigml.api import BigML + >>> api = BigML() + >>> execution = api.create_execution('script/573c9e2db85eee23cd000489') + >>> api.ok(execution) # waiting for the execution to finish + >>> api.pprint(execution['object']) + { u'category': 0, + u'code': 200, + u'created': u'2016-05-18T16:58:01.613000', + u'creation_defaults': { }, + u'description': u'', + u'execution': { u'output_resources': [ { u'code': 1, + u'id': u'source/573c9f19b85eee23c600024a', + u'last_update': 1463590681854, + u'progress': 0.0, + u'state': u'queued', + u'task': u'Queuing job', + u'variable': u''}], + u'outputs': [], + u'result': u'source/573c9f19b85eee23c600024a', + u'results': [u'source/573c9f19b85eee23c600024a'], + u'sources': [[ u'script/573c9e2db85eee23cd000489', + u'']], + u'steps': 16}, + u'inputs': None, + u'locale': u'en-US', + u'name': u"Script's Execution", + u'project': None, + u'resource': u'execution/573c9f19b85eee23bd000125', + u'script': u'script/573c9e2db85eee23cd000489', + u'script_status': True, + u'shared': False, + u'status': { u'code': 5, + u'elapsed': 249, + u'elapsed_times': { u'in-progress': 247, + u'queued': 62, + u'started': 2}, + u'message': u'The execution has been created', + u'progress': 1.0}, + u'subscription': True, + u'tags': [], + u'updated': u'2016-05-18T16:58:02.035000'} + +As you can see, the execution resource contains information about the result +of the execution, the resources that have been generated while executing and +users can define some variables in the code to be exported as outputs. + +An ``execution`` receives inputs, the ones defined in the ``script`` chosen +to be executed, and generates a result. It can also generate outputs and +create resources. To +execute a ``script`` that expects some inputs, you will need to specify the +concrete values of those inputs, unless a default value has been assigned +for them in the script's inputs definition. Following the second example in +the previous section, we can execute the script that creates a source from a +URL pointing to a CSV file: + +.. code-block:: python + + >>> from bigml.api import BigML + >>> api = BigML() + >>> execution = api.create_execution( \ + script, + {"inputs": [["my_remote_data", + "https://static.bigml.com/csv/iris.csv"]]}) + +For more details on executions' structure, please refer to the +`Developers documentation for Executions `_. + + +The results of an execution can be easily obtained by using the ``Execution`` +class. This class can be used to instantiate a local object that will +expose the result, outputs and output resources generated in the execution +in its attributes. + + +.. code-block:: python + + from bigml.execution import Execution + execution = Execution("execution/5cae5ad4b72c6609d9000468") + print "The result of the execution is %s" % execution.result + print " and the output for variable 'my_variable': %s" % \ + execution.outputs["my_variable"] + print "The resources created in the execution are: %s" % \ + execution.output_resources + +As an execution is in progress, the ``execution.result`` attribute will +contain the value of the last evaluated expression at that point. +Therefore, the value of the ``result`` attribute will change untill it +will contain the final result of the execution when finished. + +Also, if the execution fails, the error information can be found in the +corresponding attributes: + +.. code-block:: python + + from bigml.execution import Execution + execution = Execution("execution/5cae5ad4b72c6609d9000468") + print "The status of the execution is %s" % execution.status + print "The execution failed at %s with error %s: %s" % ( \ + execution.error_location, execution.error, execution.error_message) + + +Libraries +--------- + +The ``library`` resource in BigML stores a special kind of compiled Whizzml +source code that only defines functions and constants. The ``library`` is +intended as an import for executable scripts. +Thus, a compiled library cannot be executed, just used as an +import in other ``libraries`` and ``scripts`` (which then have access +to all identifiers defined in the ``library``). + +As an example, we build a ``library`` to store the definition of two functions: +``mu`` and ``g``. The first one adds one to the value set as argument and +the second one adds two variables and increments the result by one. + + +.. code-block:: python + + >>> from bigml.api import BigML + >>> api = BigML() + >>> library = api.create_library( \ + "(define (mu x) (+ x 1)) (define (g z y) (mu (+ y z)))") + >>> api.ok(library) # waiting for the library compilation to finish + >>> api.pprint(library['object']) + { u'approval_status': 0, + u'category': 0, + u'code': 200, + u'created': u'2016-05-18T18:58:50.838000', + u'description': u'', + u'exports': [ { u'name': u'mu', u'signature': [u'x']}, + { u'name': u'g', u'signature': [u'z', u'y']}], + u'imports': [], + u'line_count': 1, + u'name': u'Library', + u'price': 0.0, + u'private': True, + u'project': None, + u'provider': None, + u'resource': u'library/573cbb6ab85eee23c300018e', + u'shared': False, + u'size': 53, + u'source_code': u'(define (mu x) (+ x 1)) (define (g z y) (mu (+ y z)))', + u'status': { u'code': 5, + u'elapsed': 2, + u'message': u'The library has been created', + u'progress': 1.0}, + u'subscription': True, + u'tags': [], + u'updated': u'2016-05-18T18:58:52.432000', + u'white_box': False} + +Libraries can be imported in scripts. The ``imports`` attribute of a ``script`` +can contain a list of ``library`` IDs whose defined functions +and constants will be ready to be used throughout the ``script``. Please, +refer to the `API Developers documentation for Libraries `_ +for more details. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..1de495d4 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,8 @@ +[build-system] +requires=[ + "setuptools==69.0.0" +] + +[tool.black] +line-length = 80 +target-version = ['py312'] diff --git a/setup.cfg b/setup.cfg index f86f7a62..24f5e88c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,7 +1,2 @@ -[nosetests] -with-id=1 -nologcapture=1 -tests=bigml/tests - [lint] -lint-exclude-packages = bigml.tests +lint-exclude-packages=bigml.tests.my_ensemble diff --git a/setup.py b/setup.py index 37d667e8..c7858b6c 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Copyright 2012 - 2017-2019 BigML, Inc +# Copyright 2012-2025 BigML, Inc # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -16,7 +16,6 @@ import os import re -import sys import setuptools @@ -30,6 +29,9 @@ version = re.search("__version__ = '([^']+)'", open(version_py_path).read()).group(1) +TOPIC_MODELING_DEPENDENCIES = ["cython", "pystemmer==2.2.0.1"] +IMAGES_DEPENDENCIES = ["bigml-sensenet==0.7.5"] + # Concatenate files into the long description file_contents = [] for file_name in ('README.rst', 'HISTORY.rst'): @@ -37,17 +39,6 @@ file_contents.append(open(path).read()) long_description = '\n\n'.join(file_contents) -PYTHON_VERSION = sys.version_info[0:3] -PYTHON_REQUESTS_CHANGE = (2, 7, 9) -REQUESTS_VERSION = "requests==2.5.3" if \ - PYTHON_VERSION < PYTHON_REQUESTS_CHANGE else "requests" -INSTALL_REQUIRES = ["unidecode", "bigml-chronos>=0.4.0"] -if PYTHON_VERSION[0] < 3: - INSTALL_REQUIRES.append('poster') -else: - INSTALL_REQUIRES.append('requests-toolbelt') -INSTALL_REQUIRES.append(REQUESTS_VERSION) - setuptools.setup( name="bigml", description="An open source binding to BigML.io, the public BigML API", @@ -58,11 +49,21 @@ url="https://bigml.com/developers", download_url="https://github.com/bigmlcom/python", license="http://www.apache.org/licenses/LICENSE-2.0", - setup_requires = ['nose'], - install_requires = INSTALL_REQUIRES, - packages = ['bigml', 'bigml.tests', 'bigml.laminar', 'bigml.out_model', - 'bigml.out_tree', 'bigml.tests.my_ensemble'], - package_data={'bigml':['out_model/static/*']}, + setup_requires = ['pytest'], + install_requires = ["setuptools==70.0.0", "unidecode", + "bigml-chronos>=0.4.3", "requests", + "requests-toolbelt", "msgpack", "numpy>=1.22", "scipy", + "javascript"], + extras_require={"images": IMAGES_DEPENDENCIES, + "topics": TOPIC_MODELING_DEPENDENCIES, + "full": IMAGES_DEPENDENCIES + TOPIC_MODELING_DEPENDENCIES}, + packages = ['bigml', 'bigml.tests', 'bigml.laminar', + 'bigml.tests.my_ensemble', + 'bigml.api_handlers', 'bigml.predicate_utils', + 'bigml.generators', 'bigml.predict_utils', + 'bigml.images', 'bigml.pipeline'], + package_data={'bigml':['generators/static/*', + 'flatline/*']}, classifiers=[ 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', @@ -70,11 +71,7 @@ 'Natural Language :: English', 'Operating System :: OS Independent', 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Topic :: Software Development :: Libraries :: Python Modules', - ], - test_suite='nose.collector', - use_2to3=True + ] )