diff --git a/.github/workflows/tests_01.yml b/.github/workflows/tests_01.yml new file mode 100644 index 00000000..3951257b --- /dev/null +++ b/.github/workflows/tests_01.yml @@ -0,0 +1,38 @@ +name: "Python bindings tests 01" + +on: + pull_request: + paths-ignore: + - 'docs/**' + workflow_dispatch: + +jobs: + tests-01: + + runs-on: ubuntu-latest + strategy: + matrix: + python: [3.12] + env: + BIGML_USERNAME: ${{ secrets.BIGML_USERNAME }} + BIGML_API_KEY: ${{ secrets.BIGML_API_KEY }} + BIGML_ORGANIZATION: ${{ secrets.BIGML_ORGANIZATION }} + BIGML_EXTERNAL_CONN_HOST: ${{ secrets.BIGML_EXTERNAL_CONN_HOST }} + BIGML_EXTERNAL_CONN_PORT: ${{ secrets.BIGML_EXTERNAL_CONN_PORT }} + BIGML_EXTERNAL_CONN_DB: ${{ secrets.BIGML_EXTERNAL_CONN_DB }} + BIGML_EXTERNAL_CONN_USER: ${{ secrets.BIGML_EXTERNAL_CONN_USER }} + BIGML_EXTERNAL_CONN_PWD: ${{ secrets.BIGML_EXTERNAL_CONN_PWD }} + BIGML_DELTA: ${{ vars.BIGML_DELTA }} + + steps: + - name: Install packages + uses: actions/checkout@v3 + - run: | + pip install .[topics] + + - name: Run tests *01 04 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 41 45 99 38 99* + run: | + pip3 install pytest + export TESTS=$(for t in "01" "04" "06" "07" "08" "09" "10" "11" "12" "13" "14" "15" "16" "17" "18" "19" "20" "21" "41" "38" "99"; do ls bigml/tests/*$t*.py;done|paste -sd " ") + echo $TESTS + pytest -s $TESTS diff --git a/.github/workflows/tests_05.yml b/.github/workflows/tests_05.yml new file mode 100644 index 00000000..ed1cac5f --- /dev/null +++ b/.github/workflows/tests_05.yml @@ -0,0 +1,37 @@ +name: "Python bindings tests 05" + +on: + pull_request: + paths-ignore: + - 'docs/**' + workflow_dispatch: + +jobs: + tests-05: + + runs-on: ubuntu-latest + strategy: + matrix: + python: [3.12] + env: + BIGML_USERNAME: ${{ secrets.BIGML_USERNAME }} + BIGML_API_KEY: ${{ secrets.BIGML_API_KEY }} + BIGML_ORGANIZATION: ${{ secrets.BIGML_ORGANIZATION }} + BIGML_EXTERNAL_CONN_HOST: ${{ secrets.BIGML_EXTERNAL_CONN_HOST }} + BIGML_EXTERNAL_CONN_PORT: ${{ secrets.BIGML_EXTERNAL_CONN_PORT }} + BIGML_EXTERNAL_CONN_DB: ${{ secrets.BIGML_EXTERNAL_CONN_DB }} + BIGML_EXTERNAL_CONN_USER: ${{ secrets.BIGML_EXTERNAL_CONN_USER }} + BIGML_EXTERNAL_CONN_PWD: ${{ secrets.BIGML_EXTERNAL_CONN_PWD }} + BIGML_DELTA: ${{ vars.BIGML_DELTA }} + steps: + - name: Install packages + uses: actions/checkout@v3 + - run: | + pip install .[topics] + + - name: Run tests *01 05 40 45 99* + run: | + pip3 install pytest + export TESTS=$(for t in "05" "40" "45" "99"; do ls bigml/tests/*$t*.py;done|paste -sd " ") + echo $TESTS + pytest -s $TESTS diff --git a/.github/workflows/tests_22.yml b/.github/workflows/tests_22.yml new file mode 100644 index 00000000..46784de2 --- /dev/null +++ b/.github/workflows/tests_22.yml @@ -0,0 +1,38 @@ +name: "Python bindings tests 22" + +on: + pull_request: + paths-ignore: + - 'docs/**' + workflow_dispatch: + +jobs: + tests-22: + + runs-on: ubuntu-latest + strategy: + matrix: + python: [3.12] + env: + BIGML_USERNAME: ${{ secrets.BIGML_USERNAME }} + BIGML_API_KEY: ${{ secrets.BIGML_API_KEY }} + BIGML_ORGANIZATION: ${{ secrets.BIGML_ORGANIZATION }} + BIGML_EXTERNAL_CONN_HOST: ${{ secrets.BIGML_EXTERNAL_CONN_HOST }} + BIGML_EXTERNAL_CONN_PORT: ${{ secrets.BIGML_EXTERNAL_CONN_PORT }} + BIGML_EXTERNAL_CONN_DB: ${{ secrets.BIGML_EXTERNAL_CONN_DB }} + BIGML_EXTERNAL_CONN_USER: ${{ secrets.BIGML_EXTERNAL_CONN_USER }} + BIGML_EXTERNAL_CONN_PWD: ${{ secrets.BIGML_EXTERNAL_CONN_PWD }} + BIGML_DELTA: ${{ vars.BIGML_DELTA }} + + steps: + - name: Install packages + uses: actions/checkout@v3 + - run: | + pip install .[topics] + + - name: Run tests *22 24 25 26 27 28 29 30 31 32 34 39 43 42 44 99* + run: | + pip3 install pytest + export TESTS=$(for t in "22" "24" "25" "26" "27" "28" "29" "30" "31" "32" "34" "39" "43" "42" "44" "99"; do ls bigml/tests/*$t*.py;done|paste -sd " ") + echo $TESTS + pytest -s $TESTS diff --git a/.github/workflows/tests_23.yml b/.github/workflows/tests_23.yml new file mode 100644 index 00000000..892a73d6 --- /dev/null +++ b/.github/workflows/tests_23.yml @@ -0,0 +1,38 @@ +name: "Python bindings tests 23" + +on: + pull_request: + paths-ignore: + - 'docs/**' + workflow_dispatch: + +jobs: + tests-23: + + runs-on: ubuntu-latest + strategy: + matrix: + python: [3.12] + env: + BIGML_USERNAME: ${{ secrets.BIGML_USERNAME }} + BIGML_API_KEY: ${{ secrets.BIGML_API_KEY }} + BIGML_ORGANIZATION: ${{ secrets.BIGML_ORGANIZATION }} + BIGML_EXTERNAL_CONN_HOST: ${{ secrets.BIGML_EXTERNAL_CONN_HOST }} + BIGML_EXTERNAL_CONN_PORT: ${{ secrets.BIGML_EXTERNAL_CONN_PORT }} + BIGML_EXTERNAL_CONN_DB: ${{ secrets.BIGML_EXTERNAL_CONN_DB }} + BIGML_EXTERNAL_CONN_USER: ${{ secrets.BIGML_EXTERNAL_CONN_USER }} + BIGML_EXTERNAL_CONN_PWD: ${{ secrets.BIGML_EXTERNAL_CONN_PWD }} + BIGML_DELTA: ${{ vars.BIGML_DELTA }} + + steps: + - name: Install packages + uses: actions/checkout@v3 + - run: | + pip install .[full] + + - name: Run tests *23 03 37 35 47 48 49 99* + run: | + pip3 install pytest + export TESTS=$(for t in "23" "03" "37" "35" "47" "48" "49" "99"; do ls bigml/tests/*$t*.py;done|paste -sd " ") + echo $TESTS + pytest -s $TESTS diff --git a/.github/workflows/tests_36.yml b/.github/workflows/tests_36.yml new file mode 100644 index 00000000..a766fa97 --- /dev/null +++ b/.github/workflows/tests_36.yml @@ -0,0 +1,38 @@ +name: "Python bindings tests 36" + +on: + pull_request: + paths-ignore: + - 'docs/**' + workflow_dispatch: + +jobs: + tests-36: + + runs-on: ubuntu-latest + strategy: + matrix: + python: [3.12] + env: + BIGML_USERNAME: ${{ secrets.BIGML_USERNAME }} + BIGML_API_KEY: ${{ secrets.BIGML_API_KEY }} + BIGML_ORGANIZATION: ${{ secrets.BIGML_ORGANIZATION }} + BIGML_EXTERNAL_CONN_HOST: ${{ secrets.BIGML_EXTERNAL_CONN_HOST }} + BIGML_EXTERNAL_CONN_PORT: ${{ secrets.BIGML_EXTERNAL_CONN_PORT }} + BIGML_EXTERNAL_CONN_DB: ${{ secrets.BIGML_EXTERNAL_CONN_DB }} + BIGML_EXTERNAL_CONN_USER: ${{ secrets.BIGML_EXTERNAL_CONN_USER }} + BIGML_EXTERNAL_CONN_PWD: ${{ secrets.BIGML_EXTERNAL_CONN_PWD }} + BIGML_DELTA: ${{ vars.BIGML_DELTA }} + + steps: + - name: Install packages + uses: actions/checkout@v3 + - run: | + pip install .[full] + + - name: Run tests *36 33 99* + run: | + pip3 install pytest + export TESTS=$(for t in "36" "33" "99"; do ls bigml/tests/*$t*.py;done|paste -sd " ") + echo $TESTS + pytest -s $TESTS diff --git a/.gitignore b/.gitignore index 20d583ba..03a19ab4 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,11 @@ build .tox/ set_credentials.sh docs/_build/* +*~ +/storage/ +/tests/storage/ +/bigml/storage/ +/bigml/tests/storage +/tmp +.noseids +fields_summary.csv diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..d74e663d --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,22 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the version of Python and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.12" + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/conf.py + +# We recommend specifying your dependencies to enable reproducible builds: +# https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +python: + install: + - requirements: docs/requirements.txt diff --git a/CONTRIBUTORS b/CONTRIBUTORS new file mode 100644 index 00000000..7c3d33bd --- /dev/null +++ b/CONTRIBUTORS @@ -0,0 +1,19 @@ +Thank you to all who have contributed to this project! +If you contributed and are not listed below please let us know. + +Marc Abramowitz ("msabramo") +Javier Alperte ("xalperte") +Adam Ashenfelter ("ashenfad") +Kamal Galrani("KamalGalrani") +Krishan Gupta ("krishangupta") +Leon Hwang ("leonhwang") +jao ("jaor") +Sean Kelly ("skelliest") +Chee Sing Lee ("cheesinglee") +Ben Letchford ("benletchford") +Francisco J Martin ("aficionado") +Mercè Martín Prats ("mmerce") +Charles Parker ("charlesparker") +Poul Petersen ("petersen-poul") +Oscar Rovira ("osroca") +Nick Wilson ("njwilson") diff --git a/HISTORY.rst b/HISTORY.rst index 61f3f483..6c85c8cd 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,10 +3,1680 @@ History ------- -X.Y.Z (YYYY-MM-DD) +9.8.3 (2025-03-27) +------------------ + +- Fixing annotations update for regions as lists. + +9.8.2 (2025-03-21) +------------------ + +- Retrying annotations update to avoid temporary concurrency issues in + source composites updates. + +9.8.1 (2025-01-14) +------------------ + +- Fixing annotations update in images composite sources. + +9.8.0 (2024-10-02) +------------------ + +- Fixing the get_leaves function for local decision trees. +- Fixing setup issues in Python3.12 +- Changing documentation templates. + +9.8.0.dev1 (2024-02-28) +----------------------- + +- Documenting and removing partially the need for Node.js in Pipelines. + +9.8.0.dev (2024-02-19) +---------------------- + +- Upgrading libraries to avoid failures in Apple M1 machines. +- Fixing local predictions input data preprocessing for missings. + +9.7.1 (2023-12-08) +------------------ + +- Fixing readthedocs configuration. + +9.7.0 (2023-12-06) +------------------ + +- Changing query string separator in internall API calls. + +9.6.2 (2023-08-02) +------------------ + +- Extending cloning to all available models and WhizzML scripts. +- Fixing shared resources cloning. + +9.6.1 (2023-08-01) +------------------ + +- Adding shared resources cloning. + +9.6.0 (2023-07-20) +------------------ + +- Adding ShapWrapper to enable local Shap values computation with the Shap + library. +- Adding Evaluation object. +- Improving Field class to allow field values encoding as numpy arrays. + +9.5.0 (2023-06-16) +------------------ + +- Extending Local Fusions output to include confidence. + +9.4.0 (2023-06-14) +------------------ + +- Extending LocalModel class to handle Time Series locally. + +9.3.0 (2023-06-09) +------------------ + +- Adding a LocalModel class to handle any type of BigML model locally. + +9.2.0 (2023-04-11) +------------------ + +- Extending all delete methods to allow additional query strings. + +9.1.4 (2023-02-10) +------------------ + +- Providing local deepnet predictions default for Windows OS that cannot + handle images predictions. + +9.1.3 (2022-12-22) +------------------ + +- Changing user's status endpoint retrieval to a lightweight version. + +9.1.2 (2022-11-26) +------------------ + +- Removing all nose dependencies in tests. + +9.1.1 (2022-11-18) +------------------ + +- Removing traces and refactoring Flatline interpreter invocation. +- Migrating tests to pytest. + +9.1.0 (2022-11-09) +------------------ + +- Enabling local models to generate the transformations pipeline used in + training to be able to apply it locally to input data for predictions. +- Refactoring local models to bring the feature extraction capabilities to + the transformations pipeline. +- Rounding local numeric predictions and scores to 5 digits. +- Improving Pipelines and Image Processing documentation. + +9.0.1 (2022-11-02) +------------------ + +- Fixing local batch_prediction method on DataFrames. +- Improving the training examples method in the Fields class to produce + examples of categorical field values following their original distribution. + +9.0.0 (2022-10-12) +------------------ + +- Renaming Pipeline class to BMLPipeline and refactoring to allow scikit-learn + and Pandas pipelines to be also part of the Pipeline transformations. +- Adding several DataTransformers (BMLDataTransformer, SKDataTransformer and + DFDataTransformer) as wrappers for transformation generators. + +8.2.3 (2022-10-07) +------------------ + +- Fixing dump methods for all local resources. +- Refactoring output options in batch_predict methods. +- Adding name and description to local resources. + +8.2.2 (2022-09-29) +------------------ + +- Fixing locale check. +- Documenting the new ``.batch_predict`` method added to local models to + homogenize local batch predictions and accept Pandas' DataFrame as input. + +8.2.1 (2022-09-23) +------------------ + +- Fixing update annotations function when creating annotated images sources. + +8.2.0 (2022-09-22) +------------------ + +- Adding new option to api.ok to report progress via a function callback. + +8.1.1 (2022-09-21) +------------------ + +- Fixing bug in api.ok: response to failing API calls. + +8.1.0 (2022-09-16) +------------------ + +- Upgrading dependencies. + +8.0.0 (2022-09-13) +------------------ + +- Adding new local Pipeline object to encapsulate and run prediction + workflows. +- Changing ``api.ok`` response to return as False when retrieving faulty + resources. Previously, only problems retrieving the API response cause + ``api.ok`` returning False. +- Adding FaultyResource exception to inform about that type of failure when + using the ``raise_on_error`` flag in ``api.ok``. +- Fixing bug in local LogisticRegression when predicting with operating points. + +7.7.0 (2022-08-30) +~~~~~~~~~~~~~~~~~~ + +- Adding local Dataset class to handle Flatline transformations. + +7.6.0 (2022-08-18) +~~~~~~~~~~~~~~~~~~ + +- Adding feature expansion to image fields in local predictors. + +7.5.1 (2022-08-08) +~~~~~~~~~~~~~~~~~~ + +- Adding utility to reproduce the backend images preprocess in local + deepnets. + +7.5.0 (2022-07-09) +~~~~~~~~~~~~~~~~~~ + +- Adding the new capabilities the last API release: object detection. + +7.4.2 (2022-05-26) +~~~~~~~~~~~~~~~~~~ + +- Improved webhooks hashing check. +- Adapting connection to allow API version setting, needed for compatibility + with the Predict Server syntax. + +7.4.1 (2022-05-16) +~~~~~~~~~~~~~~~~~~ + +- Adding support for webhooks hashing check. + +7.4.0 (2022-05-13) +~~~~~~~~~~~~~~~~~~ + +- Adding extras to setup, to allow a basic installation and extra dependencies + handling for topics and images. + +7.3.0 (2022-04-22) +~~~~~~~~~~~~~~~~~~ + +- Improving local ensembles to be generated from list of ensemble plus + models info. +- Allowing BigML connection class to retrieve models from storage without + any credentials. + +7.2.2 (2022-04-20) +~~~~~~~~~~~~~~~~~~ + +- Enabling dump from SupervisedModel class. + + +7.2.1 (2022-04-19) +~~~~~~~~~~~~~~~~~~ + +- Fixing windows setup. + +7.2.0 (2022-04-14) +~~~~~~~~~~~~~~~~~~ + +- Upgrading sensenet library and giving fallback for local deepnet predictions + on OS not supported by tensorflow. +- Adding support for pandas' DataFrame when creating sources. + +7.1.2 (2022-03-17) +~~~~~~~~~~~~~~~~~~ + +- Fixing local deepnet predictions if no missing_count info is found. + +7.1.1 (2022-03-11) +~~~~~~~~~~~~~~~~~~ + +- Improving local predictions for shared fusions and documenting. + +7.1.0 (2022-03-10) +~~~~~~~~~~~~~~~~~~ + +- Adding the ability to predict locally from shared composed resources, like + Ensembles and Fusions. + +7.0.1 (2022-03-04) +~~~~~~~~~~~~~~~~~~ + +- Fixing local Supervised Model to accept stored models as input. +- Improving tests to minimize resource creation. +- Updating dependencies versions. + +7.0.0 (2021-08-03) +~~~~~~~~~~~~~~~~~~ + +- Adding composite sources to the API REST interface methods. +- Adding predictions for images. + +6.3.2 (2021-12-02) +------------------ + +- Fixing local deepnet prediction method to fit new deepnet structure. + +6.3.1 (2021-11-03) +------------------ + +- Improving local ensembles to handle faulty models with no root. + +6.3.0 (2021-10-21) +------------------ + +- Adding activators to local deepnets prediction functions. + +6.2.2 (2021-10-11) +------------------ + +- Fixing underlying dependency used in datetime fields. + +6.2.1 (2021-08-10) +------------------ + +- Fixing predictions for local models with text fields. + +6.2.0 (2021-07-22) +------------------ + +- Minimizing local model information when text or items fields are used. + +6.1.2 (2021-06-16) +------------------ + +- Changing tests mechanism to github actions. + +6.1.1 (2021-06-11) +~~~~~~~~~~~~~~~~~~ + +- Fixing api `get` method. The query string argument was lost. +- Improving api `get`, `create` and `update` methods to avoid unneeded + iterations. +- Improving docs. + +6.1.0 (2021-03-06) +~~~~~~~~~~~~~~~~~~ + +- Fixing local anomalies when normalize_repeats has been used. + +6.0.3 (2021-02-06) +~~~~~~~~~~~~~~~~~~ + +- Improving local models with default numeric values to automatically fill in + missings in input data. +- Raising exceptions if trying to update a failed resource. + +6.0.2 (2020-12-21) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug in local anomaly filter. + +6.0.1 (2020-11-11) +~~~~~~~~~~~~~~~~~~ + +- Changing local anomaly filter. +- Fixing update and download methods to wait for the resource to be finished. + +6.0.0 (2020-09-02) +~~~~~~~~~~~~~~~~~~ + +- New compact format for local models. +- Adding cache managing capacities for all local predictors. + +5.2.0 (2020-08-19) +~~~~~~~~~~~~~~~~~~ + +- Adding syntactic sugar methods to the BigML class to help managing resources. + +5.1.1 (2020-08-11) +~~~~~~~~~~~~~~~~~~ + +- Fixing module directory inclusion and improving docs on local anomalies. + +5.1.0 (2020-08-07) +~~~~~~~~~~~~~~~~~~ + +- Refactoring local anomaly to reduce memory requirements. + + +5.0.1 (2020-08-05) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug in get_tasks_status to get information about transient net + errors. + +5.0.0 (2020-07-31) +~~~~~~~~~~~~~~~~~~ + +- Deprecating support for Python 2.7.X versions. Only Python 3 supported + from this version on. + +4.32.3 (2020-07-15) +~~~~~~~~~~~~~~~~~~~ + +- Extending the Fields class to check the attributes that can be updated in + a source or dataset fields structure to avoid failing fields updates. + +4.32.2 (2020-06-15) +~~~~~~~~~~~~~~~~~~~ + +- Fixing local anomaly scores for new anomaly detectors with feedback and + setting the maximum input data precision to five digits. + +4.32.1 (2020-06-10) +~~~~~~~~~~~~~~~~~~~ + +- Fixing local anomaly scores prediction for corner cases of samples with + one row. + +4.32.0 (2020-05-19) +~~~~~~~~~~~~~~~~~~~ + +- Allowing scripts to be created from gists using the create_script method. +- Improving training examples generation in Fields class. + +4.31.2 (2020-05-14) +~~~~~~~~~~~~~~~~~~~ + +- Fixing problems creating ephemeral prediction resources. + +4.31.1 (2020-05-06) +~~~~~~~~~~~~~~~~~~~ + +- Improving the api.ok method to add an estimated wait time. +- Improving docs and adding TOC for new structure. + +4.31.0 (2020-04-22) +~~~~~~~~~~~~~~~~~~~ + +- Adding REST methods to manage external data connections. + +4.30.2 (2020-04-20) +~~~~~~~~~~~~~~~~~~~ + +- Fixing local anomaly scores for datasets with significant amounts of missings. +- Fixing input data modification for local predictions when fields are not + used in the models. + +4.30.1 (2020-04-16) +~~~~~~~~~~~~~~~~~~~ + +- Fixing tasks status info for organizations. + +4.30.0 (2020-04-10) +~~~~~~~~~~~~~~~~~~~ + +- Allowing the BigML class to retrieve any resource from local storage and + extract its fields. + +4.29.2 (2020-03-20) +~~~~~~~~~~~~~~~~~~~ + +- Improving exception handling when retrieving resources. + +4.29.1 (2020-03-03) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug when disabling SSL verification in predictions only. + +4.29.0 (2020-02-29) +~~~~~~~~~~~~~~~~~~~ + +- Improving api.ok method to allow retries to avoid transient HTTP failures. +- Deprecating the `retries` argument in api.ok. +- Fixing local predictions confidence for weighted models. + +4.28.1 (2020-02-04) +~~~~~~~~~~~~~~~~~~~ + +- Changing api.ok method to avoid raising exceptions when retrieving a + faulty resource. +- Adding call stack info to local Execution class. +- Fixing docs builder. + +4.28.0 (2020-01-23) +~~~~~~~~~~~~~~~~~~~ + +- Adding Execution local utility to extract the outputs and results from an + execution. + +4.27.3 (2020-01-15) +~~~~~~~~~~~~~~~~~~~ + +- Fixing local Fusion class to allow using linear regressions. + +4.27.2 (2020-01-03) +~~~~~~~~~~~~~~~~~~~ + +- Fixing warning message and template files in generated code for hadoop + actionable models. +- Fixing local ensembles that asked for credentials before needing them. + +4.27.1 (2019-12-19) +~~~~~~~~~~~~~~~~~~~ + +- Avoiding asking for credential in classes that predict locally when the + complete information is provided so no connection is needed. + +4.27.0 (2019-12-03) +~~~~~~~~~~~~~~~~~~~ + +- Extending the custom formats for datetimes allowed as input for local + predictions. +- Fixing datetimes allowed as input for local predictions. They can be + provided by name or ID. + +4.26.0 (2019-11-27) +~~~~~~~~~~~~~~~~~~~ + +- Extending the ability to use an alternative url to all predictions, + centroids, anomaly scores, etc. Also to their batch versions. + +4.25.3 (2019-11-26) +~~~~~~~~~~~~~~~~~~~ + +- Changing bigml-chronos dependency version according to its new internal + structure. The previous version caused problems when used in some external + projects. + +4.25.2 (2019-11-06) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug in local Cluster object when using text or item fields. + +4.25.1 (2019-08-28) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug in local Fusion object when retrienving from storage. + +4.25.0 (2019-08-18) +~~~~~~~~~~~~~~~~~~~ + +- Adding the ability to parse datetime fields locally for local predictions + (uses bigml-chronos as a dependency). + +4.24.3 (2019-08-08) +~~~~~~~~~~~~~~~~~~~ + +- Fixing local LinearRegression to work even if `numpy` and `scipy` are + not installed. + +4.24.2 (2019-07-30) +~~~~~~~~~~~~~~~~~~~ + +- Fixing local EnsemblePredictor code to avoid crash when using deep trees. + +4.24.1 (2019-07-05) +~~~~~~~~~~~~~~~~~~~ + +- Adding missing tokens handling to local models. + +4.24.0 (2019-06-28) +~~~~~~~~~~~~~~~~~~~ + +- Refactoring for multipackage compatibility. +- Deprecating ``ensemble_id`` attribute in local ensembles. +- Extending the BigML class to export model's alternative output formats. + +4.23.1 (2019-06-06) +~~~~~~~~~~~~~~~~~~~ + +- Fixing local predictions for models with unpreferred and datetime fields. + +4.23.0 (2019-05-24) +~~~~~~~~~~~~~~~~~~~ + +- Adding access to tasks information in the API connection object. + +4.22.1 (2019-05-23) +~~~~~~~~~~~~~~~~~~~ + +- Improving the local Ensemble and Fusion classes to use the component + models when a local JSON file is used as argument. + +4.22.0 (2019-05-11) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug in local linear regressions for non-invertible confidence bounds + matrices. +- Adding the option of cloning model resources from shared clonable ones. +- Fixing Fields object for timeseries. + +4.21.2 (2019-04-09) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug in local fusion regression predictions. + +4.21.1 (2019-04-06) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug in local linear regression predictions. + +4.21.0 (2019-03-22) +~~~~~~~~~~~~~~~~~~~ + +- Adding REST and local methods for linear regression. + + +4.20.2 (2019-02-02) +~~~~~~~~~~~~~~~~~~~ + +- Adding new format for the list of datasets to create a multidataset from. + +4.20.1 (2019-02-01) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug in local ensemble when used with externally defined connection, + as found by @KamalGalrani. + +4.20.0 (2018-12-01) +~~~~~~~~~~~~~~~~~~~ + +- Adding PCA REST call methods. +- Adding local PCAs and Projections. + +4.19.10 (2018-12-01) +~~~~~~~~~~~~~~~~~~~~ + +- Fixing local Deepnet predictions for regressions without numpy. + +4.19.9 (2018-10-24) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug in create datasets for a list of one dataset only. + +4.19.8 (2018-09-18) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug in create evaluation for timeseries. + +4.19.7 (2018-09-13) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug when exporting fusions with weights. +- Local fusions now caching all models in the constructor. + +4.19.6 (2018-09-12) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug when exporting fusions. + +4.19.5 (2018-08-23) +~~~~~~~~~~~~~~~~~~~ + +- Changing source upload `async` parameter to ensure Python 3.7 compatibility. + +4.19.4 (2018-07-18) +~~~~~~~~~~~~~~~~~~~ + +- Fixing local logistic regression predictions with weight field missing in + input data. + +4.19.3 (2018-06-26) +~~~~~~~~~~~~~~~~~~~ + +- Modifying local fusion object to adapt to logistic regressions with + no missing numerics allowed. + +4.19.2 (2018-06-25) +~~~~~~~~~~~~~~~~~~~ + +- Removing left over comment. + +4.19.1 (2018-06-23) +~~~~~~~~~~~~~~~~~~~ + +- Refactoring the local classes that manage models information to create + predictions. Now all of them allow a path, an ID or a dictionary to be + the first argument in the constructor. + +4.19.0 (2018-06-20) +~~~~~~~~~~~~~~~~~~~ + +- Adding local fusion object and predict methods. +- Fixing error handling in local objects. +- Fixing bug in local logistic regressions when using a local stored file. + +4.18.3 (2018-06-03) +~~~~~~~~~~~~~~~~~~~ + +- Adding batch predictions for fusion resources. + +4.18.2 (2018-05-28) +~~~~~~~~~~~~~~~~~~~ + +- Adding predictions and evaluations for fusion resources. + +4.18.1 (2018-05-19) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug when unused field IDs are used in local prediction inputs. + +4.18.0 (2018-05-19) +~~~~~~~~~~~~~~~~~~~ + +- Adding methods for the REST calls to OptiMLs and Fusions. + +4.17.1 (2018-05-15) +~~~~~~~~~~~~~~~~~~~ + +- Adding the option to export PMML models when available. +- Fixing bug in local deepnets for regressions. +- Adapting local Cluster and Anomaly detector to not include summary fields + information. + +4.17.0 (2018-05-02) +~~~~~~~~~~~~~~~~~~~ + +- Adding the local Supervised Model class to allow local predictions with + any supervised model resource. + +4.16.2 (2018-04-31) +~~~~~~~~~~~~~~~~~~~ + +- Adding the `export` and `export_last` methods to download and save the + remote resources in the local file system. + +4.16.1 (2018-04-24) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug in local deepnet predictions. + +4.16.0 (2018-04-03) +~~~~~~~~~~~~~~~~~~~ + +- Deprecating local predictions formatting arguments. Formatting is available + through the `cast_prediction` function. + +4.15.2 (2018-02-24) +~~~~~~~~~~~~~~~~~~~ + +- Local predictions for regression ensembles corrected for strange models + whose nodes lack the confidence attribute. + +4.15.1 (2018-02-07) +~~~~~~~~~~~~~~~~~~~ + +- Removing logs left in local ensemble object. + +4.15.0 (2018-02-07) +~~~~~~~~~~~~~~~~~~~ + +- Adding organizations support for all the API calls. + +4.14.0 (2018-01-22) +~~~~~~~~~~~~~~~~~~~ + +- Deprecating `dev_mode` flag from BigML's API connection. The development + environment has been deprecated. +- Fixing bug in local cluster output to CSV. +- Improving docs with local batch predictions examples. +- Adding operating kind support for local predictions in models and ensembles. +- Fixing bug in ensembles local predictions with probability. +- Fixing bug in logistic regression local predictions with operating points. + +4.13.7 (2018-01-02) +~~~~~~~~~~~~~~~~~~~ + +- Changing local predictions with threshold to meet changes in backend. +- Adding support for configurations REST API calls. + +4.13.6 (2017-12-05) +~~~~~~~~~~~~~~~~~~~ + +- Fixing predict confidence method in local ensembles. + +4.13.5 (2017-11-23) +~~~~~~~~~~~~~~~~~~~ + +- Adding operating point local predictions to deepnets. + +4.13.4 (2017-11-21) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug in local ensemble predictions with operating points. +- Fixing bug for local EnsemblePredictor class. + +4.13.3 (2017-11-14) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug in local ensemble predictions for inputs that don't match the + expected field types. + +4.13.2 (2017-11-14) +~~~~~~~~~~~~~~~~~~~ + +- Adding left out static files for local ensemble predictor functions. + +4.13.1 (2017-11-10) +~~~~~~~~~~~~~~~~~~~ + +- Refactoring local BoostedTrees and adding the EnsemblePredictor to + use the local predict functions of each model to generate the ensemble + prediction. + +4.13.0 (2017-11-07) +~~~~~~~~~~~~~~~~~~~ + +- Adding operating point thresholds to local model, ensemble and logistic + regression predictions. + +4.12.1 (2017-10-12) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug in the local Deepnet predictions when numpy is not installed. + +4.12.0 (2017-10-04) +~~~~~~~~~~~~~~~~~~~ + +- Adding support for Deepnets REST API calls and local predictions using + the local Deepnet object. + +4.11.3 (2017-09-29) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug in the local Ensemble object. Failed to use the + stored ensemble object. + +4.11.2 (2017-07-29) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug in source uploads using Python3 when reading data from stdin. + +4.11.1 (2017-06-23) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug in source uploads using Python3 when a category is set. + +4.11.0 (2017-06-23) +~~~~~~~~~~~~~~~~~~~ + +- Adding REST methods for managing time-series and local time-series object + to create forecasts. + +4.10.5 (2017-07-13) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug in the sources upload using Python3. Server changes need the + content-type of the file to be sent. + +4.10.4 (2017-06-21) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug in the local model predicted distributions for weighted models. +- Fixing bug in predicted probability for local model predictions + using weighted models. + +4.10.3 (2017-06-07) +~~~~~~~~~~~~~~~~~~~ + +- Changing boosted local ensembles predictions to match the improvements in + API. +- Fixing bug in association rules export to CSV and lisp for rules with numeric + attributes. + +4.10.2 (2017-05-23) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug: local Model object failed when retrieving old JSON models from + local storage. + +4.10.1 (2017-05-15) +~~~~~~~~~~~~~~~~~~~ + +- Internal refactoring preparing for extensions in BigMLer. + +4.10.0 (2017-05-05) +~~~~~~~~~~~~~~~~~~~ + +- Adding predic_probability and predict_confidence methods to local model and + ensemble. +- Internal refactoring of local model classes preparing for extensions + in BigMLer. + +4.9.2 (2017-03-26) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug: local model slugifying fails when fields have empty names. + +4.9.1 (2017-03-23) +~~~~~~~~~~~~~~~~~~ + +- Adding methods to local cluster: closest data points from a + reference point and centroids ordered from a reference point. +- Modifying internal codes in MultiVote class. + +4.9.0 (2017-03-21) +~~~~~~~~~~~~~~~~~~ + +- Adding boosted ensembles to the local Ensemble object. + +4.8.3 (2017-03-01) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug in local logistic regression predictions when a constant field is + forced as input field. + +4.8.2 (2017-02-09) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug: Adapting to changes in Python 3.6 which cause the connection to + the API using SSL to fail. + +4.8.1 (2017-01-11) +~~~~~~~~~~~~~~~~~~ + +- Changing local association parameters to adapt to API docs specifications. + +4.8.0 (2017-01-08) +~~~~~~~~~~~~~~~~~~ + +- Adapting to final format of local association sets and adding tests. + +4.7.3 (2016-12-03) +~~~~~~~~~~~~~~~~~~ + +- Bug fixing: query string is allowed also for project get calls. + +4.7.2 (2016-12-02) +~~~~~~~~~~~~~~~~~~ + +- Allowing a query string to be added to get calls for all the resource types. + +4.7.1 (2016-12-01) +~~~~~~~~~~~~~~~~~~ + +- Improving the Fields object: extracting fields structure from topic models. +- Bug fixing: Local Topic Distributions failed when tokenizing inputs with + sequences of separators. + +4.7.0 (2016-11-30) +~~~~~~~~~~~~~~~~~~ + +- Adding REST methods for the new resource types: Topic Model, + Topic Distribution, Batch Topic Distribution. +- Adding local Topic Model object. + +4.6.10 (2016-10-26) +~~~~~~~~~~~~~~~~~~~ + +- Improving local cluster object to fill in missing numerics for clusters + with default numeric values. + +4.6.9 (2016-09-27) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug in tests for anomaly detector and ill-formatted comments. +- Adapting tests to new logistic regression default value for balance_fields. + +4.6.8 (2016-09-22) +~~~~~~~~~~~~~~~~~~ + +- Adding optional information to local predictions. +- Improving casting for booleans in local predictions. +- Improving the retrieval of stored or remote resources in local + predictor objects. + +4.6.7 (2016-09-15) +~~~~~~~~~~~~~~~~~~ + +- Changing the type for the bias attribute to create logistic regressions to + boolean. + +4.6.6 (2016-08-02) +~~~~~~~~~~~~~~~~~~ + +- Improving message for unauthorized API calls adding information about the + current domain. + +4.6.5 (2016-07-16) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug in local model. Fixing predictions for weighted models. + +4.6.4 (2016-07-06) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug in delete_execution method. The delete call now has a + query_string. + +4.6.3 (2016-06-25) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug in local logistic regression predictions' format. + +4.6.2 (2016-06-20) +~~~~~~~~~~~~~~~~~~ + +- Adding local logistic regression as argument for evaluations. + +4.6.1 (2016-06-12) +~~~~~~~~~~~~~~~~~~ + +- Adapting local logistic regression object to new coefficients format and + adding field_codings attribute. + +4.6.0 (2016-05-19) +~~~~~~~~~~~~~~~~~~ + +- Adding REST methods to manage new types of whizzml resources: scripts, + executions and libraries. +- Fixing bug in logistic regression predictions for datases with text fields. + When input data has only one term and `all` token mode is used, local and + remote predictions didn't match. + +4.5.3 (2016-05-04) +~~~~~~~~~~~~~~~~~~ + +- Improving the cluster report information. +- Fixing bug in logistic regression predictions. Results differred from + the backend predictions when date-time fields were present. + +4.5.2 (2016-03-24) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug in model's local predictions. When the model uses text fields and + the field contents are missing in the input data, the prediction does + not return the last prediction and stop. It now follows the + "does not contain" branch. + +4.5.1 (2016-03-12) +~~~~~~~~~~~~~~~~~~ + +- Adding method to Fields object to produce CSV summary files. +- Adding method to Fields object to import changes in updatable attributes + from CSV files or strings. + +4.5.0 (2016-02-08) +~~~~~~~~~~~~~~~~~~ + +- Adapting association object to the new syntax of missing values. +- Improving docs and comments for the proportional strategy in predictions. +- Fixing bug: centroid input data datetime fields are optional. + +4.4.2 (2016-01-06) +~~~~~~~~~~~~~~~~~~ + +- Adapting logistic regression local object to the new missing_numeric + parameter. + +4.4.1 (2015-12-18) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug: summarized path output failed when adding missing operators. + +4.4.0 (2015-12-15) +~~~~~~~~~~~~~~~~~~ + +- Adding REST API calls for association rules and local Association object. +- Adapting local model, cluster, anomaly and logistic regression objects + to new field type: items. +- Fixing bug: wrong value of giny impurity +- Fixing bug: local model summary failed occasionally when missings were used + in a numeric predicate. +- Fixing bug: wrong syntax in flatline filter method of the tree object. + +4.3.4 (2015-12-10) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug: Logistic regression object failed to build when using input + fields or non-preferred fields in dataset. + +4.3.3 (2015-11-30) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug: Anomaly object failed to generate the filter for new datasets + when text empty values were found. + +4.3.2 (2015-11-24) +~~~~~~~~~~~~~~~~~~ + +- Adding verify and protocol options to the existing Domain class constructor + to handle special installs. + +4.3.1 (2015-11-07) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug: Local logistic regression predictions differ when input data + has contents in a text field but the terms involved do not appear in the + bag of words. + +4.3.0 (2015-10-16) +~~~~~~~~~~~~~~~~~~ + +- Adding logistic regression as a new prediction model. + +4.2.2 (2015-10-14) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug: Fields object failed to store the correct objective id when the + objective was in the first column. + +4.2.1 (2015-10-14) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug: Improving error handling in download_dataset method. + +4.2.0 (2015-07-27) +~~~~~~~~~~~~~~~~~~ + +- Adding REST methods to manage new type of resource: correlations. +- Adding REST methods to manage new type of resource: tests. +- Adding min and max values predictions for regression models and ensembles. +- Fixing bug: Fields object was not retrieving objective id from the + resource info. + +4.1.7 (2015-08-15) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug: console messages failed when used with Python3 on Windows. + +4.1.6 (2015-06-25) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug: Removing id fields from the filter to select the anomalies listed + in the Anomaly object from the origin dataset. + +4.1.5 (2015-06-06) ~~~~~~~~~~~~~~~~~~ -- TODO +- Fixing bug: create_source method failed when unicode literals were used in + args. + +4.1.4 (2015-05-27) +~~~~~~~~~~~~~~~~~~ + +- Ensuring unique ordering in MultiVote categorical combinations (only + needed in Python 3). + +4.1.3 (2015-05-19) +~~~~~~~~~~~~~~~~~~ + +- Adapting code to handle uploading from String objects. +- Adding models creation new origin resources: clusters and centroids. + +4.1.2 (2015-04-28) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug in summarize method for local models. Ensuring unicode use and + adding tests for generated outputs. + +4.1.1 (2015-04-26) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug in method to print the fields in the anomaly trees. +- Fixing bug in the create_source method for Python3. Creation failed when + the `tags` argument was used. + +4.1.0 (2015-04-14) +~~~~~~~~~~~~~~~~~~ + +- Adding median based predictions to ensembles. + +4.0.2 (2015-04-12) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug: multimodels median predictions failed. + +4.0.1 (2015-04-10) +~~~~~~~~~~~~~~~~~~ + +- Adding support for median-based predictions in MultiModels. + +4.0.0 (2015-04-10) +~~~~~~~~~~~~~~~~~~ + +- Python 3 added to supported Python versions. +- Test suite migrated to nose. + + +3.0.3 (2015-04-08) +~~~~~~~~~~~~~~~~~~ + +- Changing setup to ensure compatible Python and requests versions. +- Hiding warnings when SSL verification is disabled. + +3.0.2 (2015-03-26) +~~~~~~~~~~~~~~~~~~ + +- Adding samples as Fields generator resources + +3.0.1 (2015-03-17) +~~~~~~~~~~~~~~~~~~ + +- Changing the Ensemble object init method to use the max_models argument + also when loading the ensemble fields to trigger garbage collection. + +3.0.0 (2015-03-04) +~~~~~~~~~~~~~~~~~~ + +- Adding Google App Engine support for remote REST calls. +- Adding cache_get argument to Ensemble constructor to allow getting + local model objects from cache. + +2.2.0 (2015-02-26) +~~~~~~~~~~~~~~~~~~ + +- Adding lists of local models as argument for the local ensemble + constructor. + +2.1.0 (2015-02-22) +~~~~~~~~~~~~~~~~~~ + +- Adding distribution and median to ensembles' predictions output. + +2.0.0 (2015-02-12) +~~~~~~~~~~~~~~~~~~ + +- Adding REST API calls for samples. + +1.10.8 (2015-02-10) +~~~~~~~~~~~~~~~~~~~ + +- Adding distribution units to the predict method output of the local model. + +1.10.7 (2015-02-07) +~~~~~~~~~~~~~~~~~~~ + +- Extending the predict method in local models to get multiple predictions. +- Changing the local model object to add the units used in the distribution + and the add_median argument in the predict method. + +1.10.6 (2015-02-06) +~~~~~~~~~~~~~~~~~~~ + +- Adding the median as prediction for the local model object. + +1.10.5 (2014-01-29) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug: centroids failed when predicted from local clusters with + summary fields. + +1.10.4 (2014-01-17) +~~~~~~~~~~~~~~~~~~~ + +- Improvements in docs presentation and content. +- Adding tree_CSV method to local model to output the nodes information + in CSV format. + +1.10.3 (2014-01-16) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug: local ensembles were not retrieved from the stored JSON file. +- Adding the ability to construct local ensembles from any existing JSON file + describing an ensemble structure. + +1.10.2 (2014-01-15) +~~~~~~~~~~~~~~~~~~~ + +- Source creation from inline data. + +1.10.1 (2014-12-29) +~~~~~~~~~~~~~~~~~~~ + +- Fixing bug: source upload failed in old Python versions. + +1.10.0 (2014-12-29) +~~~~~~~~~~~~~~~~~~~ + +- Refactoring the BigML class before adding the new project resource. +- Changing the ok and check_resource methods to download lighter resources. +- Fixing bug: cluster summarize for 1-centroid clusters. +- Fixing bug: adapting to new SSL verification in Python 2.7.9. + +1.9.8 (2014-12-01) +~~~~~~~~~~~~~~~~~~ + +- Adding impurity to Model leaves, and a new method to select impure leaves. +- Fixing bug: the Model, Cluster and Anomaly objects had no resource_id + attribute when built from a local resource JSON structure. + +1.9.7 (2014-11-24) +~~~~~~~~~~~~~~~~~~ + +- Adding method in Anomaly object to build the filter to exclude anomalies + from the original dataset. +- Basic code refactorization for initial resources structure. + +1.9.6 (2014-11-09) +~~~~~~~~~~~~~~~~~~ + +- Adding BIGML_PROTOCOL, BIGML_SSL_VERIFY and BIGML_PREDICTION_SSL_VERIFY + environment variables to change the default corresponding values in + customized private environments. + +1.9.5 (2014-11-03) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug: summarize method breaks for clusters with text fields. + +1.9.4 (2014-10-27) +~~~~~~~~~~~~~~~~~~ + +- Changing MultiModel class to return in-memory list of predictions. + +1.9.3 (2014-10-23) +~~~~~~~~~~~~~~~~~~ + +- Improving Fields and including the new Cluster and + Anomalies fields structures as fields resources. +- Improving ModelFields to filter missing values from input data. +- Forcing garbage collection in local ensemble to lower memory usage. + +1.9.2 (2014-10-13) +~~~~~~~~~~~~~~~~~~ + +- Changing some Fields exceptions handling. +- Refactoring api code to handle create, update and delete methods dynamically. +- Adding connection info string for printing. +- Improving tests information. + +1.9.1 (2014-10-10) +~~~~~~~~~~~~~~~~~~ + +- Adding the summarize and statistics_CSV methods to local cluster object. + +1.9.0 (2014-10-02) +~~~~~~~~~~~~~~~~~~ + +- Adding the batch anomaly score REST API calls. + +1.8.0 (2014-09-09) +~~~~~~~~~~~~~~~~~~ + +- Adding the anomaly detector and anomaly score REST API calls. +- Adding the local anomaly detector. + +1.7.0 (2014-08-29) +~~~~~~~~~~~~~~~~~~ + +- Adding to local model predictions the ability to use the new + missing-combined operators. + +1.6.7 (2014-08-05) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug in corner case of model predictions using proportional missing + strategy. +- Adding the unique path to the first missing split to the predictions using + proportional missing strategy. + +1.6.6 (2014-07-31) +~~~~~~~~~~~~~~~~~~ + +- Improving the locale handling to avoid problems when logging to console under + Windows. + +1.6.5 (2014-07-26) +~~~~~~~~~~~~~~~~~~ + +- Adding stats method to Fields to show fields statistics. +- Adding api method to create a source from a batch prediction. + +1.6.4 (2014-07-25) +~~~~~~~~~~~~~~~~~~ + +- Changing the create methods to check if origin resources are finished + by downloading no fields information. + +1.6.3 (2014-07-24) +~~~~~~~~~~~~~~~~~~ + +- Changing some variable names in the predict method (add_count, add_path) and + the prediction structure to follow other bindigns naming. + +1.6.2 (2014-07-19) +~~~~~~~~~~~~~~~~~~ + +- Building local model from a JSON model file. +- Predictions output can contain confidence, distribution, instances and/or + rules. + +1.6.1 (2014-07-09) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug: download_dataset method did not return content when no filename + was provided. + +1.6.0 (2014-07-03) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug: check valid parameter in distribution merge function. +- Adding downlod_dataset method to api to export datasets to CSV. + +1.5.1 (2014-06-13) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug: local clusters' centroid method crashes when text or categorical + fields are not present in input data. + +1.5.0 (2014-06-05) +~~~~~~~~~~~~~~~~~~ + +- Adding local cluster to produce centroid predictions locally. + +1.4.4 (2014-05-23) +~~~~~~~~~~~~~~~~~~ + +- Adding shared urls to datasets. +- Fixing bug: error renaming variables. + +1.4.3 (2014-05-22) +~~~~~~~~~~~~~~~~~~ + +- Adding the ability to change the remote server domain in the API + connection constructor (for VPCs). +- Adding the ability to generate datasets from clusters. + +1.4.2 (2014-05-20) +~~~~~~~~~~~~~~~~~~ + +- Fixing bug when using api.ok method for centroids and batch centroids. + +1.4.1 (2014-05-19) +~~~~~~~~~~~~~~~~~~ + +- Docs and test updates. + +1.4.0 (2014-05-14) +~~~~~~~~~~~~~~~~~~ + +- Adding REST methods to manage clusters, centroids and batch centroids. + +1.3.1 (2014-05-06) +~~~~~~~~~~~~~~~~~~ + +- Adding the average_confidence method to local models. +- Fixing bug in pprint for predictions with input data keyed by field names. + +1.3.0 (2014-04-07) +~~~~~~~~~~~~~~~~~~ + +- Changing Fields object constructor to accept also source, dataset or model + resources. + +1.2.2 (2014-04-01) +~~~~~~~~~~~~~~~~~~ + +- Changing error message when create_source calls result in http errors + to standarize them. +- Simplifying create_prediction calls because now API accepts field names + as input_data keys. +- Adding missing_counts and error_counts to report the missing values and + error counts per field in the dataset. + +1.2.1 (2014-03-19) +~~~~~~~~~~~~~~~~~~ + +- Adding error to regression local predictions using proportional missing + strategy. + +1.2.0 (2014-03-07) +~~~~~~~~~~~~~~~~~~ + +- Adding proportional missing strategy to MultiModel and solving tie breaks + in remote predictions. +- Adding new output options to model's python, rules and tableau outputs: + ability to extract the branch of the model leading to a certain node with + or without the hanging subtree. +- Adding HTTP_TOO_MANY_REQUESTS error handling in REST API calls. + +1.1.0 (2014-02-10) +~~~~~~~~~~~~~~~~~~ + +- Adding Tableau-ready ouput to local model code generators. + +1.0.6 (2014-02-03) +~~~~~~~~~~~~~~~~~~ + +- Fixing getters: getter for batch predictions was missing. + +1.0.5 (2014-01-22) +~~~~~~~~~~~~~~~~~~ + +- Improving BaseModel and Model. If they receive a partial model + structure with a correct model id, the needed model resource is downloaded + and stored (if storage is enabled in the given api connection). +- Improving local ensemble. Adding a new `fields` attribute that + contains all the fields used in its models. + +1.0.4 (2014-01-21) +~~~~~~~~~~~~~~~~~~ + +- Adding a summarize method to local ensembles with data distribution + and field importance information. + +1.0.3 (2014-01-21) +~~~~~~~~~~~~~~~~~~ + +- Fixes bug in regressions predictions with ensembles and plurality without + confidence information. Predictions values were not normalized. +- Updating copyright information. + +1.0.2 (2014-01-20) +~~~~~~~~~~~~~~~~~~ + +- Fixes bug in create calls: the user provided args dictionaries were + updated inside the calls. + +1.0.1 (2014-01-05) +~~~~~~~~~~~~~~~~~~ + +- Changing the source for ensemble field importance computations. +- Fixes bug in http_ok adding the valid state for updates. + +1.0.0 (2013-12-09) +~~~~~~~~~~~~~~~~~~ + +- Adding more info to error messages in REST methods. +- Adding new missing fields strategy in predict method. +- Fixes bug in shared models: credentials where not properly set. +- Adding batch predictions REST methods. + +0.10.3 (2013-12-19) +~~~~~~~~~~~~~~~~~~~ + +- Fixes bug in local ensembles with more than 200 fields. + +0.10.2 (2013-12-02) +~~~~~~~~~~~~~~~~~~~ + +- Fixes bug in summarize method of local models: field importance report + crashed. +- Fixes bug in status method of the BigML connection object: status for + async uploads of source files crashed while uploading. + +0.10.1 (2013-11-25) +~~~~~~~~~~~~~~~~~~~ + +- Adding threshold combiner to MultiModel objects. + +0.10.0 (2013-11-21) +~~~~~~~~~~~~~~~~~~~ + +- Adding a function printing field importance to ensembles. +- Changing Model to add a lightweight BaseModel class with no Tree + information. +- Adding function to get resource type from resource id or structure. +- Adding resource type checks to REST functions. +- Adding threshold as new combination method for local ensembles. + +0.9.1 (2013-10-17) +~~~~~~~~~~~~~~~~~~ + +- Fixes duplication changing field names in local model if they are not unique. + +0.9.0 (2013-10-08) +~~~~~~~~~~~~~~~~~~ + +- Adds the environment variables and adapts the create_prediction method + to create predictions using a different prediction server. +- Support for shared models. + +0.8.0 (2013-08-10) +~~~~~~~~~~~~~~~~~~ + +- Adds text analysis local predict function +- Modifies outputs for text analysis: rules, summary, python, hadoop + +0.7.5 (2013-08-22) +~~~~~~~~~~~~~~~~~~ + +- Fixes temporarily problems in predictions for regression models and + ensembles +- Adds en-gb to the list of available locales, avoiding spurious warnings + +0.7.4 (2013-08-17) +~~~~~~~~~~~~~~~~~~ + +- Changes warning logger level to info + +0.7.3 (2013-08-09) +~~~~~~~~~~~~~~~~~~ + +- Adds fields method to retrieve only preferred fields +- Fixes error message when no valid resource id is provided in check_resource + +0.7.2 (2013-07-04) +~~~~~~~~~~~~~~~~~~ + +- Fixes check_resource method that was not using query-string data +- Add list of models as argument in Ensemble constructor +- MultiModel has BigML connection as a new optional argument + +0.7.1 (2013-06-19) +~~~~~~~~~~~~~~~~~~ + +- Fixes Multimodel list_models method +- Fixes check_resource method for predictions +- Adds local configuration environment variable BIGML_DOMAIN replacing + BIGML_URL and BIGML_DEV_URL +- Refactors Ensemble and Model's predict method + +0.7.0 (2013-05-01) +~~~~~~~~~~~~~~~~~~ + +- Adds splits in datasets to generate new datasets +- Adds evaluations for ensembles + +0.6.0 (2013-04-27) +~~~~~~~~~~~~~~~~~~ + +- REST API methods for model ensembles +- New method returning the leaves of tree models +- Improved error handling in GET methods + +0.5.2 (2013-03-03) +~~~~~~~~~~~~~~~~~~ + +- Adds combined confidence to combined predictions +- Fixes get_status for resources that have no status info +- Fixes bug: public datasets, that should be downloadable, weren't + +0.5.1 (2013-02-12) +~~~~~~~~~~~~~~~~~~ + +- Fixes bug: no status info in public models, now shows FINISHED status code +- Adds more file-like objects (e.g. stdin) support in create_source input +- Refactoring Fields pair method and Model predict method to increase +- Adds some more locale aliases + +0.5.0 (2013-01-16) +~~~~~~~~~~~~~~~~~~ + +- Adds evaluation api functions +- New prediction combination method: probability weighted +- Refactors MultiModels lists of predictions into MultiVote +- Multimodels partial predictions: new format + +0.4.8 (2012-12-21) +~~~~~~~~~~~~~~~~~~ + +- Improved locale management +- Adds new features to MultiModel to allow local batch predictions +- Improved combined predictions +- Adds local predictions options: plurality, confidence weighted + +0.4.7 (2012-12-06) +~~~~~~~~~~~~~~~~~~ + +- Warning message to inform of locale default if verbose mode + +0.4.6 (2012-12-06) +~~~~~~~~~~~~~~~~~~ + +- Fix locale code for windows + +0.4.5 (2012-12-05) +~~~~~~~~~~~~~~~~~~ + +- Fix remote predictions for input data containing fields not included in rules + +0.4.4 (2012-12-02) +~~~~~~~~~~~~~~~~~~ + +- Tiny fixes +- Fix local predictions for input data containing fields not included in rules +- Overall clean up + +0.4.3 (2012-11-07) +~~~~~~~~~~~~~~~~~~ + +- A few tiny fixes +- Multi models to generate predictions from multiple local models +- Adds hadoop-python code generation to create local predictions + +0.4.2 (2012-09-19) +~~~~~~~~~~~~~~~~~~ + +- Fix Python generation +- Add a debug flag to log https requests and responses +- Type conversion in fields pairing + +0.4.1 (2012-09-17) +~~~~~~~~~~~~~~~~~~ + +- Fix missing distribution field in new models +- Add new Field class to deal with BigML auto-generated ids +- Add by_name flag to predict methods to avoid reverse name lookups +- Add summarize method in models to generate class grouped printed output + +0.4.0 (2012-08-20) +~~~~~~~~~~~~~~~~~~ + +- Development Mode +- Remote Sources +- Bigger files streamed with Poster +- Asynchronous Uploading +- Local Models +- Local Predictions +- Rule Generation +- Python Generation +- Overall clean up + 0.3.1 (2012-07-05) ~~~~~~~~~~~~~~~~~~ diff --git a/LICENSE b/LICENSE index a82c5890..5c304d1a 100644 --- a/LICENSE +++ b/LICENSE @@ -1,13 +1,201 @@ -# Copyright 2012 BigML, Inc -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/MANIFEST.in b/MANIFEST.in index c2aa50b9..4f3fd0ba 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,7 +3,6 @@ include HISTORY.rst include README.rst recursive-include data * recursive-include docs * -recursive-include tests * -recursive-exclude tests *.pyc -recursive-exclude tests *.pyo +recursive-include bigml/tests * +recursive-exclude bigml/tests *.pyc prune docs/_build diff --git a/README.rst b/README.rst index 7d92bba7..89da1cf6 100644 --- a/README.rst +++ b/README.rst @@ -8,10 +8,13 @@ creates `beautiful predictive models `_ that can be easily understood and interacted with. -These BigML Python bindings allow you to interact with BigML.io, the API +These BigML Python bindings allow you to interact with +`BigML.io `_, the API for BigML. You can use it to easily create, retrieve, list, update, and delete BigML resources (i.e., sources, datasets, models and, -predictions). +predictions). For additional information, see +the `full documentation for the Python +bindings on Read the Docs `_. This module is licensed under the `Apache License, Version 2.0 `_. @@ -29,37 +32,138 @@ in our `Campfire chatroom `_. Requirements ------------ -Python 2.6 and Python 2.7 are currently supported by these bindings. - -The only mandatory third-party dependency is the -`requests `_ library. This -library is automatically installed during the setup. +Only ``Python 3`` versions are currently supported by these bindings. +Support for Python 2.7.X ended in version ``4.32.3``. + +The basic third-party dependencies are the +`requests `_, +`unidecode `_, +`requests-toolbelt `_, +`bigml-chronos `_, +`msgpack `_, +`numpy `_ and +`scipy `_ libraries. These +libraries are automatically installed during the basic setup. +Support for Google App Engine has been added as of version 3.0.0, +using the `urlfetch` package instead of `requests`. The bindings will also use ``simplejson`` if you happen to have it installed, but that is optional: we fall back to Python's built-in JSON libraries is ``simplejson`` is not found. +The bindings provide support to use the ``BigML`` platform to create, update, +get and delete resources, but also to produce local predictions using the +models created in ``BigML``. Most of them will be actionable with the basic +installation, but some additional dependencies are needed to use local +``Topic Models`` and Image Processing models. Please, refer to the +`Installation <#installation>`_ section for details. + +OS Requirements +~~~~~~~~~~~~~~~ + +The basic installation of the bindings is compatible and can be used +on Linux and Windows based Operating Systems. +However, the extra options that allow working with +image processing models (``[images]`` and ``[full]``) are only supported +and tested on Linux-based Operating Systems. +For image models, Windows OS is not recommended and cannot be supported out of +the box, because the specific compiler versions or dlls required are +unavailable in general. + Installation ------------ -To install the latest stable release with -`pip `_:: +To install the basic latest stable release with +`pip `_, please use: + +.. code-block:: bash $ pip install bigml +Support for local Topic Distributions (Topic Models' predictions) +and local predictions for datasets that include Images will only be +available as extras, because the libraries used for that are not +usually available in all Operative Systems. If you need to support those, +please check the `Installation Extras <#installation-extras>`_ section. + +Installation Extras +------------------- + +Local Topic Distributions support can be installed using: + +.. code-block:: bash + + pip install bigml[topics] + +Images local predictions support can be installed using: + +.. code-block:: bash + + pip install bigml[images] + +The full set of features can be installed using: + +.. code-block:: bash + + pip install bigml[full] + + +WARNING: Mind that installing these extras can require some extra work, as +explained in the `Requirements <#requirements>`_ section. + You can also install the development version of the bindings directly -from the Git repository:: +from the Git repository + +.. code-block:: bash $ pip install -e git://github.com/bigmlcom/python.git#egg=bigml_python + +Running the Tests +----------------- + +The tests will be run using `pytest `_. +You'll need to set up your authentication +via environment variables, as explained +in the authentication section. Also some of the tests need other environment +variables like ``BIGML_ORGANIZATION`` to test calls when used by Organization +members and ``BIGML_EXTERNAL_CONN_HOST``, ``BIGML_EXTERNAL_CONN_PORT``, +``BIGML_EXTERNAL_CONN_DB``, ``BIGML_EXTERNAL_CONN_USER``, +``BIGML_EXTERNAL_CONN_PWD`` and ``BIGML_EXTERNAL_CONN_SOURCE`` +in order to test external data connectors. + +With that in place, you can run the test suite simply by issuing + +.. code-block:: bash + + $ pytest + +Additionally, `Tox `_ can be used to +automatically run the test suite in virtual environments for all +supported Python versions. To install Tox: + +.. code-block:: bash + + $ pip install tox + +Then run the tests from the top-level project directory: + +.. code-block:: bash + + $ tox + Importing the module -------------------- -To import the module:: +To import the module: + +.. code-block:: python import bigml.api -Alternatively you can just import the BigML class:: +Alternatively you can just import the BigML class: + +.. code-block:: python from bigml.api import BigML @@ -71,36 +175,178 @@ and `API key `_ and are always transmitted over HTTPS. This module will look for your username and API key in the environment -variables ``BIGML_USERNAME`` and ``BIGML_API_KEY`` respectively. You can +variables ``BIGML_USERNAME`` and ``BIGML_API_KEY`` respectively. + +Unix and MacOS +-------------- + +You can add the following lines to your ``.bashrc`` or ``.bash_profile`` to set -those variables automatically when you log in:: +those variables automatically when you log in: + +.. code-block:: bash export BIGML_USERNAME=myusername export BIGML_API_KEY=ae579e7e53fb9abd646a6ff8aa99d4afe83ac291 -With that environment set up, connecting to BigML is a breeze:: +refer to the next chapters to know how to do that in other operating systems. + +With that environment set up, connecting to BigML is a breeze: + +.. code-block:: python from bigml.api import BigML api = BigML() Otherwise, you can initialize directly when instantiating the BigML -class as follows:: +class as follows: + +.. code-block:: python api = BigML('myusername', 'ae579e7e53fb9abd646a6ff8aa99d4afe83ac291') -Also, you can initialize the library to work in the Sandbox environment by -passing the parameter ``dev_mode``:: +These credentials will allow you to manage any resource in your user +environment. + +In BigML a user can also work for an ``organization``. +In this case, the organization administrator should previously assign +permissions for the user to access one or several particular projects +in the organization. +Once permissions are granted, the user can work with resources in a project +according to his permission level by creating a special constructor for +each project. The connection constructor in this case +should include the ``project ID``: + +.. code-block:: python + + api = BigML('myusername', 'ae579e7e53fb9abd646a6ff8aa99d4afe83ac291', + project='project/53739b98d994972da7001d4a') + +If the project used in a connection object +does not belong to an existing organization but is one of the +projects under the user's account, all the resources +created or updated with that connection will also be assigned to the +specified project. + +When the resource to be managed is a ``project`` itself, the connection +needs to include the corresponding``organization ID``: + +.. code-block:: python + + api = BigML('myusername', 'ae579e7e53fb9abd646a6ff8aa99d4afe83ac291', + organization='organization/53739b98d994972da7025d4a') + + +Authentication on Windows +------------------------- + +The credentials should be permanently stored in your system using + +.. code-block:: bash + + setx BIGML_USERNAME myusername + setx BIGML_API_KEY ae579e7e53fb9abd646a6ff8aa99d4afe83ac291 + +Note that ``setx`` will not change the environment variables of your actual +console, so you will need to open a new one to start using them. + + +Authentication on Jupyter Notebook +---------------------------------- + +You can set the environment variables using the ``%env`` command in your +cells: + +.. code-block:: bash + + %env BIGML_USERNAME=myusername + %env BIGML_API_KEY=ae579e7e53fb9abd646a6ff8aa99d4afe83ac291 + + +Alternative domains +------------------- + + +The main public domain for the API service is ``bigml.io``, but there are some +alternative domains, either for Virtual Private Cloud setups or +the australian subdomain (``au.bigml.io``). You can change the remote +server domain +to the VPC particular one by either setting the ``BIGML_DOMAIN`` environment +variable to your VPC subdomain: + +.. code-block:: bash + + export BIGML_DOMAIN=my_VPC.bigml.io + +or setting it when instantiating your connection: + +.. code-block:: python + + api = BigML(domain="my_VPC.bigml.io") + +The corresponding SSL REST calls will be directed to your private domain +henceforth. + +You can also set up your connection to use a particular PredictServer +only for predictions. In order to do so, you'll need to specify a ``Domain`` +object, where you can set up the general domain name as well as the +particular prediction domain name. + +.. code-block:: python + + from bigml.domain import Domain + from bigml.api import BigML + + domain_info = Domain(prediction_domain="my_prediction_server.bigml.com", + prediction_protocol="http") + + api = BigML(domain=domain_info) + +Finally, you can combine all the options and change both the general domain +server, and the prediction domain server. + +.. code-block:: python + + from bigml.domain import Domain + from bigml.api import BigML + domain_info = Domain(domain="my_VPC.bigml.io", + prediction_domain="my_prediction_server.bigml.com", + prediction_protocol="https") + + api = BigML(domain=domain_info) + +Some arguments for the Domain constructor are more unsual, but they can also +be used to set your special service endpoints: + +- protocol (string) Protocol for the service + (when different from HTTPS) +- verify (boolean) Sets on/off the SSL verification +- prediction_verify (boolean) Sets on/off the SSL verification + for the prediction server (when different from the general + SSL verification) + +**Note** that the previously existing ``dev_mode`` flag: + +.. code-block:: python api = BigML(dev_mode=True) +that caused the connection to work with the Sandbox ``Development Environment`` +has been **deprecated** because this environment does not longer exist. +The existing resources that were previously +created in this environment have been moved +to a special project in the now unique ``Production Environment``, so this +flag is no longer needed to work with them. + + Quick Start ----------- Imagine that you want to use `this csv file `_ containing the `Iris flower dataset `_ to -predict the species of a flower whose ``sepal length`` is ``5`` and -whose ``sepal width`` is ``2.5``. A preview of the dataset is shown +predict the species of a flower whose ``petal length`` is ``2.45`` and +whose ``petal width`` is ``1.75``. A preview of the dataset is shown below. It has 4 numeric fields: ``sepal length``, ``sepal width``, ``petal length``, ``petal width`` and a categorical field: ``species``. By default, BigML considers the last field in the dataset as the @@ -122,7 +368,9 @@ for). 5.7,2.5,5.0,2.0,Iris-virginica 5.8,2.8,5.1,2.4,Iris-virginica -You can easily generate a prediction following these steps:: +You can easily generate a prediction following these steps: + +.. code-block:: python from bigml.api import BigML @@ -131,12 +379,158 @@ You can easily generate a prediction following these steps:: source = api.create_source('./data/iris.csv') dataset = api.create_dataset(source) model = api.create_model(dataset) - prediction = api.create_prediction(model, {'sepal length': 5, 'sepal width': 2.5}) + prediction = api.create_prediction(model, \ + {"petal width": 1.75, "petal length": 2.45}) -You can then print the prediction using the ``pprint`` method:: +You can then print the prediction using the ``pprint`` method: + +.. code-block:: python >>> api.pprint(prediction) - species for {"sepal width": 2.5, "sepal length": 5} is Iris-virginica + species for {"petal width": 1.75, "petal length": 2.45} is Iris-setosa + +Certainly, any of the resources created in BigML can be configured using +several arguments described in the `API documentation `_. +Any of these configuration arguments can be added to the ``create`` method +as a dictionary in the last optional argument of the calls: + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + source_args = {"name": "my source", + "source_parser": {"missing_tokens": ["NULL"]}} + source = api.create_source('./data/iris.csv', source_args) + dataset_args = {"name": "my dataset"} + dataset = api.create_dataset(source, dataset_args) + model_args = {"objective_field": "species"} + model = api.create_model(dataset, model_args) + prediction_args = {"name": "my prediction"} + prediction = api.create_prediction(model, \ + {"petal width": 1.75, "petal length": 2.45}, + prediction_args) + +The ``iris`` dataset has a small number of instances, and usually will be +instantly created, so the ``api.create_`` calls will probably return the +finished resources outright. As BigML's API is asynchronous, +in general you will need to ensure +that objects are finished before using them by using ``api.ok``. + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + source = api.create_source('./data/iris.csv') + api.ok(source) + dataset = api.create_dataset(source) + api.ok(dataset) + model = api.create_model(dataset) + api.ok(model) + prediction = api.create_prediction(model, \ + {"petal width": 1.75, "petal length": 2.45}) + +Note that the prediction +call is not followed by the ``api.ok`` method. Predictions are so quick to be +generated that, unlike the +rest of resouces, will be generated synchronously as a finished object. + +The example assumes that your objective field (the one you want to predict) +is the last field in the dataset. If that's not he case, you can explicitly +set the name of this field in the creation call using the ``objective_field`` +argument: + + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + source = api.create_source('./data/iris.csv') + api.ok(source) + dataset = api.create_dataset(source) + api.ok(dataset) + model = api.create_model(dataset, {"objective_field": "species"}) + api.ok(model) + prediction = api.create_prediction(model, \ + {'sepal length': 5, 'sepal width': 2.5}) + + +You can also generate an evaluation for the model by using: + +.. code-block:: python + + test_source = api.create_source('./data/test_iris.csv') + api.ok(test_source) + test_dataset = api.create_dataset(test_source) + api.ok(test_dataset) + evaluation = api.create_evaluation(model, test_dataset) + api.ok(evaluation) + +If you set the ``storage`` argument in the ``api`` instantiation: + +.. code-block:: python + + api = BigML(storage='./storage') + +all the generated, updated or retrieved resources will be automatically +saved to the chosen directory. + +Alternatively, you can use the ``export`` method to explicitly +download the JSON information +that describes any of your resources in BigML to a particular file: + +.. code-block:: python + + api.export('model/5acea49a08b07e14b9001068', + filename="my_dir/my_model.json") + +This example downloads the JSON for the model and stores it in +the ``my_dir/my_model.json`` file. + +In the case of models that can be represented in a `PMML` syntax, the +export method can be used to produce the corresponding `PMML` file. + +.. code-block:: python + + api.export('model/5acea49a08b07e14b9001068', + filename="my_dir/my_model.pmml", + pmml=True) + +You can also retrieve the last resource with some previously given tag: + +.. code-block:: python + + api.export_last("foo", + resource_type="ensemble", + filename="my_dir/my_ensemble.json") + +which selects the last ensemble that has a ``foo`` tag. This mechanism can +be specially useful when retrieving retrained models that have been created +with a shared unique keyword as tag. + +For a descriptive overview of the steps that you will usually need to +follow to model +your data and obtain predictions, please see the `basic Workflow sketch +`_ +document. You can also check other simple examples in the following documents: + +- `model 101 <101_model.html>`_ +- `logistic regression 101 <101_logistic_regression.html>`_ +- `linear regression 101 <101_linear_regression.html>`_ +- `ensemble 101 <101_ensemble.html>`_ +- `cluster 101 <101_cluster>`_ +- `anomaly detector 101 <101_anomaly.html>`_ +- `association 101 <101_association.html>`_ +- `topic model 101 <101_topic_model.html>`_ +- `deepnet 101 <101_deepnet.html>`_ +- `time series 101 <101_ts.html>`_ +- `fusion 101 <101_fusion.html>`_ +- `scripting 101 <101_scripting.html>`_ Additional Information ---------------------- @@ -146,12 +540,25 @@ the `full documentation for the Python bindings on Read the Docs `_. Alternatively, the same documentation can be built from a local checkout of the source by installing `Sphinx `_ -(``$ pip install sphinx``) and then running:: +(``$ pip install sphinx``) and then running + +.. code-block:: bash $ cd docs $ make html Then launch ``docs/_build/html/index.html`` in your browser. +How to Contribute +----------------- + +Please follow the next steps: + + 1. Fork the project on github.com. + 2. Create a new branch. + 3. Commit changes to the new branch. + 4. Send a `pull request `_. + + For details on the underlying API, see the `BigML API documentation `_. diff --git a/bigml/__init__.py b/bigml/__init__.py index 75b4e196..818decaa 100644 --- a/bigml/__init__.py +++ b/bigml/__init__.py @@ -1 +1,2 @@ -__version__ = '0.3.1.dev1' +from bigml.version import __version__ +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/bigml/anomaly.py b/bigml/anomaly.py new file mode 100644 index 00000000..4a345724 --- /dev/null +++ b/bigml/anomaly.py @@ -0,0 +1,390 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +A fast building local Predictive Anomaly Detector. +This module defines an Anomaly Detector to score anomalies in a dataset locally +or embedded into your application without needing to send requests to +BigML.io. +The module is also designed for situations when it is desirable to be able to +build the anomaly detector very quickly from an external representation. +It also offers the ability to load its contents from a cache system like +Redis or memcache. The `get` method of the cache system has to be passed +in the `cache_get` argument and the hash for the storage should be the +corresponding anomaly ID. +Example usage (assuming that you have previously set up the BIGML_USERNAME +and BIGML_API_KEY environment variables and that you own the model/id below): +from bigml.api import BigML +from anomaly import Anomaly +import redis +r = redis.Redis() +# First build as you would any core Anomaly object: +anomaly = Anomaly('anomaly/5126965515526876630001b2') +# Store a serialized version in Redis +anomaly.dump(cache_set=r.set) +# (retrieve the external rep from its convenient place) +# Speedy Build from external rep +anomaly = Anomaly('anomaly/5126965515526876630001b2', cache_get=r.get) +# Get scores same as always: +anomaly.anomaly_score({"src_bytes": 350}) +""" + + +import math + +from bigml.predicate_utils.utils import OPERATOR_CODE, PREDICATE_INFO_LENGTH +from bigml.predicate_utils.utils import apply_predicates +from bigml.api import FINISHED +from bigml.api import get_status, get_api_connection, get_anomaly_id +from bigml.basemodel import get_resource_dict +from bigml.modelfields import ModelFields, NUMERIC +from bigml.util import cast, use_cache, load, get_data_format, \ + get_formatted_data, format_data, get_data_transformations +from bigml.constants import OUT_NEW_HEADERS, INTERNAL, DECIMALS + + +DEPTH_FACTOR = 0.5772156649 +PREDICATES_OFFSET = 3 + +DFT_OUTPUTS = ["score"] + +#pylint: disable=locally-disabled,invalid-name +def get_repeat_depth(population): + """Computes the correction to depth used to normalize repeats + + """ + repeat_depth = 0 + if population > 1: + h = DEPTH_FACTOR + math.log(population - 1) + repeat_depth = max([1.0, + 2 * (h - (float(population - 1) / population))]) + return repeat_depth + + +def build_tree(node, add_population=False): + """Builds a compressed version of the tree structure as an list of + lists. Starting from the root node, each node + is represented by a list whose elements are: + [weight, len(predicates), operator_code, field, value, term, missing, + ..., len(children), children_nodes_list*] + + When the normalize_repeats flag is set to True, we need to add the + population of the node: [weight, population, len(predicates), ...] + """ + outer = [] + outer.append(node.get('weight', 1)) + if add_population: + outer.append(get_repeat_depth(node.get("population", 0))) + build_predicates(node, outer) + children = node.get("children", []) + outer.append(len(children)) + + if not children: + return outer + + for child in children: + outer.append(build_tree(child, add_population=add_population)) + + return outer + + +def build_predicates(node, encoded_node): + """Build the minified version of the predicate in a node""" + predicates = node.get('predicates') + if predicates and not (predicates is True or predicates == [True]): + predicates = [x for x in predicates if x is not True] + encoded_node.append(len(predicates)) + for pred in predicates: + operation = pred.get('op') + value = pred.get('value') + missing = False + if operation.endswith("*"): + operation = operation[0: -1] + missing = True + elif operation == 'in' and None in value: + missing = True + + encoded_node.append(OPERATOR_CODE.get(operation)) + encoded_node.append(pred.get('field')) + encoded_node.append(value) + encoded_node.append(pred.get('term')) + encoded_node.append(missing) + else: + encoded_node.append(0) # no predicates + + return encoded_node + + +def calculate_depth(node, input_data, fields, depth=0, + normalize_repeats=False): + """Computes the depth in the tree for the input data + + """ + + weight = node[0] + shift = 0 + repeat_depth = 0 + if normalize_repeats: + shift = 1 + repeat_depth = node[1] + + num_predicates = node[1 + shift] + num_children = node[2 + shift + (5 * num_predicates)] + + predicates_ok = 0 + + if num_predicates > 0: + predicates_ok = apply_predicates(node, input_data, fields, + normalize_repeats=normalize_repeats) + + + # some of the predicates where met and depth > 1 in a leaf + if num_predicates > 0 and 0 < predicates_ok < num_predicates and \ + depth > 1 and num_children == 0: + return depth + repeat_depth + + + if num_predicates > 0 and predicates_ok != num_predicates: + return depth + + depth += weight + + if num_children > 0: + start = PREDICATES_OFFSET + (PREDICATE_INFO_LENGTH * num_predicates) \ + + shift + end = PREDICATES_OFFSET + num_children + ( \ + PREDICATE_INFO_LENGTH * num_predicates) + shift + children = node[slice(start, end)] + for child in children: + num_predicates = child[1 + shift] + predicates_ok = apply_predicates( \ + child, input_data, fields, + normalize_repeats=normalize_repeats) + if predicates_ok == num_predicates: + return calculate_depth(child, input_data, fields, depth, + normalize_repeats=normalize_repeats) + else: + depth += repeat_depth + + return depth + + +class Anomaly(ModelFields): + """ A minimal anomaly detector designed to build quickly from a + specialized external representation. See file documentation, above, + for usage. + + """ + + def __init__(self, anomaly, api=None, cache_get=None): + + if use_cache(cache_get): + # using a cache to store the Minomaly attributes + self.__dict__ = load(get_anomaly_id(anomaly), cache_get) + return + + self.resource_id = None + self.name = None + self.description = None + self.parent_id = None + self.sample_size = None + self.input_fields = None + self.default_numeric_value = None + self.mean_depth = None + self.expected_mean_depth = None + self.normalize_repeats = None + self.iforest = None + self.id_fields = [] + api = get_api_connection(api) + self.resource_id, anomaly = get_resource_dict( + anomaly, "anomaly", api=api) + + if 'object' in anomaly and isinstance(anomaly['object'], dict): + anomaly = anomaly['object'] + try: + self.parent_id = anomaly.get('dataset') + self.name = anomaly.get("name") + self.description = anomaly.get("description") + self.sample_size = anomaly.get('sample_size') + self.input_fields = anomaly.get('input_fields') + self.default_numeric_value = anomaly.get('default_numeric_value') + self.normalize_repeats = anomaly.get('normalize_repeats', False) + self.id_fields = anomaly.get('id_fields', []) + except AttributeError: + raise ValueError("Failed to find the expected " + "JSON structure. Check your arguments.") + + if 'model' in anomaly and isinstance(anomaly['model'], dict): + ModelFields.__init__( + self, anomaly['model'].get('fields'), + missing_tokens=anomaly['model'].get('missing_tokens')) + self.mean_depth = anomaly['model'].get('mean_depth') + self.normalization_factor = anomaly['model'].get( + 'normalization_factor') + self.nodes_mean_depth = anomaly['model'].get( + 'nodes_mean_depth') + status = get_status(anomaly) + if 'code' in status and status['code'] == FINISHED: + self.expected_mean_depth = None + if self.mean_depth is None or self.sample_size is None: + raise Exception("The anomaly data is not complete. " + "Score will not be available") + self.norm = self.normalization_factor if \ + self.normalization_factor is not None else \ + self.norm_factor() + iforest = anomaly['model'].get('trees', []) + self.iforest = [] + if iforest: + self.iforest = [ + build_tree(anomaly_tree['root'], + add_population=self.normalize_repeats) + for anomaly_tree in iforest] + self.top_anomalies = anomaly['model']['top_anomalies'] + else: + raise Exception("The anomaly isn't finished yet") + + def norm_factor(self): + """Computing the normalization factor for simple anomaly detectors""" + if self.mean_depth is not None: + default_depth = self.mean_depth if self.sample_size == 1 else \ + (2 * (DEPTH_FACTOR + math.log(self.sample_size - 1) - + (float(self.sample_size - 1) / self.sample_size))) + return min(self.mean_depth, default_depth) + return None + + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + Avoiding to set it in a Mixin to maintain the current dump function. + """ + return get_data_transformations(self.resource_id, self.parent_id) + + def anomaly_score(self, input_data): + """Returns the anomaly score given by the iforest + To produce an anomaly score, we evaluate each tree in the iforest + for its depth result (see the depth method in the AnomalyTree + object for details). We find the average of these depths + to produce an `observed_mean_depth`. We calculate an + `expected_mean_depth` using the `sample_size` and `mean_depth` + parameters which come as part of the forest message. + We combine those values as seen below, which should result in a + value between 0 and 1. + + """ + # corner case with only one record + if self.sample_size == 1 and self.normalization_factor is None: + return 1 + # Checks and cleans input_data leaving the fields used in the model + norm_input_data = self.filter_input_data(input_data) + # Strips affixes for numeric values and casts to the final field type + cast(norm_input_data, self.fields) + + depth_sum = 0 + + if self.iforest is None: + raise Exception("We could not find the iforest information to " + "compute the anomaly score. Please, rebuild your " + "Anomaly object from a complete anomaly detector " + "resource.") + for tree in self.iforest: + tree_depth = calculate_depth( + tree, + norm_input_data, self.fields, + normalize_repeats=self.normalize_repeats) + depth_sum += tree_depth + + observed_mean_depth = float(depth_sum) / len(self.iforest) + return round(math.pow(2, - observed_mean_depth / self.norm), + DECIMALS) + + def anomalies_filter(self, include=True): + """Returns the LISP expression needed to filter the subset of + top anomalies. When include is set to True, only the top + anomalies are selected by the filter. If set to False, only the + rest of the dataset is selected. + """ + anomaly_filters = [] + for anomaly in self.top_anomalies: + row = anomaly.get('row_number') + if row is not None: + anomaly_filters.append('(= (row-number) %s)' % row) + + anomalies_filter = " ".join(anomaly_filters) + if len(anomaly_filters) == 1: + if include: + return anomalies_filter + return "(not %s)" % anomalies_filter + if include: + return "(or %s)" % anomalies_filter + return "(not (or %s))" % anomalies_filter + + def fill_numeric_defaults(self, input_data): + """Checks whether input data is missing a numeric field and + fills it with the average quantity set in default_numeric_value + + """ + + for field_id, field in list(self.fields.items()): + if field_id not in self.id_fields and \ + field['optype'] == NUMERIC and \ + field_id not in input_data and \ + self.default_numeric_value is not None: + default_value = 0 if self.default_numeric_value == "zero" \ + else field['summary'].get(self.default_numeric_value) + input_data[field_id] = default_value + return input_data + + def predict(self, input_data, full=False): + """Method to homogeneize the local models interface for all BigML + models. It returns the anomaly_score method result. If full is set + to True, then the result is returned as a dictionary. + """ + score = self.anomaly_score(input_data) + if full: + return {DFT_OUTPUTS[0]: score} + return score + + def batch_predict(self, input_data_list, outputs=None, **kwargs): + """Creates a batch anomaly score for a list of inputs using the local + anomaly detector. Allows to define some output settings to decide the + name of the header used for the score in the result. To homogeneize + the behaviour of supervised batch_predict method, the outputs argument + accepts a dictionary with keys: "output_fields" and "output_headers". + In this case, output_fields is ignored, as only the score can be + obtained from the anomaly_score method, and only "output_headers" is + considered to allow changing the header associated to that new field. + + :param input_data_list: List of input data to be predicted + :type input_data_list: list or Panda's dataframe + :param dict outputs: properties that define the headers and fields to + be added to the input data + :return: the list of input data plus the predicted values + :rtype: list or Panda's dataframe depending on the input type in + input_data_list + + """ + if outputs is None: + outputs = {} + new_headers = outputs.get(OUT_NEW_HEADERS, DFT_OUTPUTS) + data_format = get_data_format(input_data_list) + inner_data_list = get_formatted_data(input_data_list, INTERNAL) + for input_data in inner_data_list: + prediction = {"score": self.anomaly_score(input_data, **kwargs)} + for index, key in enumerate(DFT_OUTPUTS): + input_data[new_headers[index]] = prediction[key] + if data_format != INTERNAL: + return format_data(inner_data_list, out_format=data_format) + return inner_data_list diff --git a/bigml/api.py b/bigml/api.py index 11426276..55b1e591 100644 --- a/bigml/api.py +++ b/bigml/api.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python +#pylint: disable=too-many-ancestors,non-parent-init-called, unused-import, no-member # -# Copyright 2012 BigML +# Copyright 2012-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -30,70 +30,114 @@ model = api.create_model(dataset) prediction = api.create_prediction(model, {'sepal width': 1}) api.pprint(prediction) -""" -import logging -FORMAT = '%(asctime)-15s: %(message)s' -logging.basicConfig(format=FORMAT) -LOGGER = logging.getLogger('BigML') -import time -import os -import re +""" +import sys import pprint -import requests - -try: - import simplejson as json -except ImportError: - import json - -from urlparse import urlparse - -# Base URL -BIGML_URL = "https://bigml.io/andromeda/" - -SOURCE_PATH = 'source' -DATASET_PATH = 'dataset' -MODEL_PATH = 'model' -PREDICTION_PATH = 'prediction' - -#Development Mode URL -BIGML_DEV_URL = "https://bigml.io/dev/andromeda/" - -SOURCE_RE = re.compile(r'^%s/[a-f,0-9]{24}$' % SOURCE_PATH) -DATASET_RE = re.compile(r'^%s/[a-f,0-9]{24}$' % DATASET_PATH) -MODEL_RE = re.compile(r'^%s/[a-f,0-9]{24}$' % MODEL_PATH) -PREDICTION_RE = re.compile(r'^%s/[a-f,0-9]{24}$' % PREDICTION_PATH) - -# Headers -SEND_JSON = {'Content-Type': 'application/json;charset=utf-8'} -ACCEPT_JSON = {'Accept': 'application/json;charset=utf-8'} - -# HTTP Status Codes -HTTP_OK = 200 -HTTP_CREATED = 201 -HTTP_ACCEPTED = 202 -HTTP_NO_CONTENT = 204 -HTTP_BAD_REQUEST = 400 -HTTP_UNAUTHORIZED = 401 -HTTP_PAYMENT_REQUIRED = 402 -HTTP_FORBIDDEN = 403 -HTTP_NOT_FOUND = 404 -HTTP_METHOD_NOT_ALLOWED = 405 -HTTP_LENGTH_REQUIRED = 411 -HTTP_INTERNAL_SERVER_ERROR = 500 - -# Resource status codes -WAITING = 0 -QUEUED = 1 -STARTED = 2 -IN_PROGRESS = 3 -SUMMARIZED = 4 -FINISHED = 5 -FAULTY = -1 -UNKNOWN = -2 -RUNNABLE = -3 - +import os +import json + + +from bigml.bigmlconnection import BigMLConnection +from bigml.domain import BIGML_PROTOCOL +from bigml.constants import STORAGE, ALL_FIELDS, TINY_RESOURCE, TASKS_QS +from bigml.util import is_in_progress, is_image +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.sourcehandler import SourceHandlerMixin +from bigml.api_handlers.datasethandler import DatasetHandlerMixin +from bigml.api_handlers.modelhandler import ModelHandlerMixin +from bigml.api_handlers.ensemblehandler import EnsembleHandlerMixin +from bigml.api_handlers.predictionhandler import PredictionHandlerMixin +from bigml.api_handlers.clusterhandler import ClusterHandlerMixin +from bigml.api_handlers.centroidhandler import CentroidHandlerMixin +from bigml.api_handlers.anomalyhandler import AnomalyHandlerMixin +from bigml.api_handlers.anomalyscorehandler import AnomalyScoreHandlerMixin +from bigml.api_handlers.evaluationhandler import EvaluationHandlerMixin +from bigml.api_handlers.batchpredictionhandler import BatchPredictionHandlerMixin +from bigml.api_handlers.batchcentroidhandler import BatchCentroidHandlerMixin +from bigml.api_handlers.batchanomalyscorehandler \ + import BatchAnomalyScoreHandlerMixin +from bigml.api_handlers.projecthandler import ProjectHandlerMixin +from bigml.api_handlers.samplehandler import SampleHandlerMixin +from bigml.api_handlers.correlationhandler import CorrelationHandlerMixin +from bigml.api_handlers.statisticaltesthandler import StatisticalTestHandlerMixin +from bigml.api_handlers.logistichandler import LogisticRegressionHandlerMixin +from bigml.api_handlers.associationhandler import AssociationHandlerMixin +from bigml.api_handlers.associationsethandler import AssociationSetHandlerMixin +from bigml.api_handlers.configurationhandler import ConfigurationHandlerMixin +from bigml.api_handlers.topicmodelhandler import TopicModelHandlerMixin +from bigml.api_handlers.topicdistributionhandler \ + import TopicDistributionHandlerMixin +from bigml.api_handlers.batchtopicdistributionhandler \ + import BatchTopicDistributionHandlerMixin +from bigml.api_handlers.timeserieshandler import TimeSeriesHandlerMixin +from bigml.api_handlers.forecasthandler import ForecastHandlerMixin +from bigml.api_handlers.deepnethandler import DeepnetHandlerMixin +from bigml.api_handlers.optimlhandler import OptimlHandlerMixin +from bigml.api_handlers.fusionhandler import FusionHandlerMixin +from bigml.api_handlers.pcahandler import PCAHandlerMixin +from bigml.api_handlers.projectionhandler import ProjectionHandlerMixin +from bigml.api_handlers.linearhandler import LinearRegressionHandlerMixin +from bigml.api_handlers.batchprojectionhandler import BatchProjectionHandlerMixin +from bigml.api_handlers.scripthandler import ScriptHandlerMixin +from bigml.api_handlers.executionhandler import ExecutionHandlerMixin +from bigml.api_handlers.libraryhandler import LibraryHandlerMixin +from bigml.api_handlers.externalconnectorhandler import \ + ExternalConnectorHandlerMixin + + +# Repeating constants and functions for backwards compatibility + +# HTTP Status Codes from https://bigml.com/developers/status_codes +from bigml.bigmlconnection import ( + HTTP_OK, HTTP_CREATED, HTTP_ACCEPTED, HTTP_NO_CONTENT, HTTP_BAD_REQUEST, + HTTP_UNAUTHORIZED, HTTP_PAYMENT_REQUIRED, HTTP_FORBIDDEN, + HTTP_NOT_FOUND, HTTP_METHOD_NOT_ALLOWED, HTTP_TOO_MANY_REQUESTS, + HTTP_LENGTH_REQUIRED, HTTP_INTERNAL_SERVER_ERROR, DOWNLOAD_DIR, LOGGER) + + +# Resource types and status codes +from bigml.constants import ( + WAITING, QUEUED, STARTED, IN_PROGRESS, SUMMARIZED, FINISHED, UPLOADING, + FAULTY, UNKNOWN, RUNNABLE, RESOURCE_RE, RENAMED_RESOURCES, SOURCE_RE, + DATASET_RE, MODEL_RE, ENSEMBLE_RE, CLUSTER_RE, CENTROID_RE, ANOMALY_RE, + PREDICTION_RE, EVALUATION_RE, BATCH_PREDICTION_RE, BATCH_CENTROID_RE, + BATCH_ANOMALY_SCORE_RE, ANOMALY_SCORE_RE, PROJECT_RE, SOURCE_PATH, + DATASET_PATH, MODEL_PATH, PREDICTION_PATH, EVALUATION_PATH, ENSEMBLE_PATH, + BATCH_PREDICTION_PATH, CLUSTER_PATH, CENTROID_PATH, BATCH_CENTROID_PATH, + ANOMALY_PATH, ANOMALY_SCORE_PATH, BATCH_ANOMALY_SCORE_PATH, PROJECT_PATH, + SAMPLE_PATH, SAMPLE_RE, CORRELATION_PATH, CORRELATION_RE, + STATISTICAL_TEST_PATH, STATISTICAL_TEST_RE, + LOGISTIC_REGRESSION_PATH, LOGISTIC_REGRESSION_RE, ASSOCIATION_PATH, + ASSOCIATION_RE, ASSOCIATION_SET_PATH, ASSOCIATION_SET_RE, TOPIC_MODEL_PATH, + TOPIC_MODEL_RE, TOPIC_DISTRIBUTION_PATH, BATCH_TOPIC_DISTRIBUTION_PATH, + TOPIC_DISTRIBUTION_RE, BATCH_TOPIC_DISTRIBUTION_RE, TIME_SERIES_RE, + TIME_SERIES_PATH, FORECAST_RE, DEEPNET_PATH, DEEPNET_RE, OPTIML_PATH, + OPTIML_RE, FUSION_PATH, FUSION_RE, CONFIGURATION_PATH, CONFIGURATION_RE, + FORECAST_PATH, PCA_PATH, PCA_RE, PROJECTION_PATH, PROJECTION_RE, + BATCH_PROJECTION_PATH, BATCH_PROJECTION_RE, + LINEAR_REGRESSION_PATH, LINEAR_REGRESSION_RE, SCRIPT_PATH, SCRIPT_RE, + EXECUTION_PATH, EXECUTION_RE, LIBRARY_PATH, LIBRARY_RE, STATUS_PATH, + IRREGULAR_PLURALS, RESOURCES_WITH_FIELDS, FIELDS_PARENT, + EXTERNAL_CONNECTOR_PATH, EXTERNAL_CONNECTOR_RE, CLONABLE_PATHS) + +from bigml.api_handlers.resourcehandler import ( + get_resource, get_resource_type, check_resource_type, get_source_id, + get_dataset_id, get_model_id, get_ensemble_id, get_evaluation_id, + get_cluster_id, get_centroid_id, get_anomaly_id, get_anomaly_score_id, + get_prediction_id, get_batch_prediction_id, get_batch_centroid_id, + get_batch_anomaly_score_id, get_resource_id, resource_is_ready, + get_status, check_resource, http_ok, get_project_id, get_sample_id, + get_correlation_id, get_statistical_test_id, get_logistic_regression_id, + get_association_id, get_association_set_id, get_topic_model_id, + get_topic_distribution_id, get_batch_topic_distribution_id, + get_time_series_id, get_forecast_id, get_deepnet_id, get_optiml_id, + get_fusion_id, get_pca_id, get_projection_id, get_batch_projection_id, + get_configuration_id, get_linear_regression_id, get_fields, + get_script_id, get_execution_id, get_library_id, get_external_connector_id) + + +# Map status codes to labels STATUSES = { WAITING: "WAITING", QUEUED: "QUEUED", @@ -101,720 +145,609 @@ IN_PROGRESS: "IN_PROGRESS", SUMMARIZED: "SUMMARIZED", FINISHED: "FINISHED", + UPLOADING: "UPLOADING", FAULTY: "FAULTY", UNKNOWN: "UNKNOWN", RUNNABLE: "RUNNABLE" } -def _is_valid_remote_url(value): - """Says if given value is a URL - with scheme, netloc and path - or not.""" - url = isinstance(value, basestring) and urlparse(value) - return url and url.scheme and url.netloc and url.path +ID_GETTERS = { + PROJECT_PATH: get_project_id, + SOURCE_PATH: get_source_id, + DATASET_PATH: get_dataset_id, + MODEL_PATH: get_model_id, + ENSEMBLE_PATH: get_ensemble_id, + LOGISTIC_REGRESSION_PATH: get_logistic_regression_id, + DEEPNET_PATH: get_deepnet_id, + EVALUATION_PATH: get_evaluation_id, + CLUSTER_PATH: get_cluster_id, + ANOMALY_PATH: get_anomaly_id, + TOPIC_MODEL_PATH: get_topic_model_id, + ASSOCIATION_PATH: get_association_id, + TIME_SERIES_PATH: get_time_series_id, + OPTIML_PATH: get_optiml_id, + FUSION_PATH: get_fusion_id, + PREDICTION_PATH: get_prediction_id, + CENTROID_PATH: get_centroid_id, + ANOMALY_SCORE_PATH: get_anomaly_score_id, + TOPIC_DISTRIBUTION_PATH: get_topic_distribution_id, + ASSOCIATION_SET_PATH: get_association_set_id, + BATCH_PREDICTION_PATH: get_batch_prediction_id, + BATCH_CENTROID_PATH: get_batch_centroid_id, + BATCH_ANOMALY_SCORE_PATH: get_batch_anomaly_score_id, + BATCH_TOPIC_DISTRIBUTION_PATH: get_batch_topic_distribution_id, + FORECAST_PATH: get_forecast_id, + CORRELATION_PATH: get_correlation_id, + STATISTICAL_TEST_PATH: get_statistical_test_id, + SAMPLE_PATH: get_sample_id, + CONFIGURATION_PATH: get_configuration_id, + PCA_PATH: get_pca_id, + PROJECTION_PATH: get_projection_id, + BATCH_PROJECTION_PATH: get_batch_projection_id, + LINEAR_REGRESSION_PATH: get_linear_regression_id, + SCRIPT_PATH: get_script_id, + LIBRARY_PATH: get_library_id, + EXECUTION_PATH: get_execution_id, + EXTERNAL_CONNECTOR_PATH: get_external_connector_id +} + -############################################################################## -# -# BigML class -# -############################################################################## +PREDICTIONS = [PREDICTION_RE , PROJECTION_RE, ANOMALY_SCORE_RE, + CENTROID_RE, TOPIC_DISTRIBUTION_RE, ASSOCIATION_SET_RE] + +PREDICTION_LABELS = { + "anomalyscore": "score", + "topicdistribution": "topic distribution", + "associationset": "association set"} + + +def get_resources_re(exceptions=None): + """Returning the patterns that correspond to a filtered subset of + resources. + """ + if exceptions is None: + exceptions = {} + resources_re = list(RESOURCE_RE.values()) + for res_re in exceptions: + resources_re.remove(res_re) + return resources_re + + +NON_PREDICTIONS = get_resources_re(PREDICTIONS) + + +def get_prediction_label(resource_id): + """Gets the label to be prepended to predictions according to their type""" + resource_type = get_resource_type(resource_id) + return PREDICTION_LABELS.get(resource_type, resource_type) + + +#pylint: disable=locally-disabled,too-many-return-statements +def get_prediction_attr(resource): + """Getting the attribute that contains the prediction, score, etc. """ + if PREDICTION_RE.match(resource["resource"]): + return resource['object']['prediction'][ + resource['object']['objective_fields'][0]] + if PROJECTION_RE.match(resource["resource"]): + return resource["object"]["projection"]["result"] + if ANOMALY_SCORE_RE.match(resource["resource"]): + return resource["object"]["score"] + if CENTROID_RE.match(resource["resource"]): + return resource["object"]["centroid_name"] + if TOPIC_DISTRIBUTION_RE.match(resource["resource"]): + return resource["object"]["topic_distribution"]["result"] + if ASSOCIATION_SET_RE.match(resource["resource"]): + return resource["object"]["association_set"]["result"] + return "" + + +def count(listing): + """Count of existing resources + + """ + if 'meta' in listing and 'query_total' in listing['meta']: + return listing['meta']['query_total'] + return None -class BigML(object): +def filter_kwargs(kwargs, list_of_keys, out=False): + """Creates a new dict with the selected list of keys if present + If `out` is set to True, the keys in the list are removed + If `out` is set to False, only the keys in the list are kept + + """ + new_kwargs = {} + for key in kwargs: + if (key not in list_of_keys and out) or \ + (key in list_of_keys and not out): + new_kwargs[key] = kwargs[key] + return new_kwargs + + +class BigML(BigMLConnection,ExternalConnectorHandlerMixin, + LinearRegressionHandlerMixin, BatchProjectionHandlerMixin, + ProjectionHandlerMixin, PCAHandlerMixin, + ConfigurationHandlerMixin, FusionHandlerMixin, + OptimlHandlerMixin, + DeepnetHandlerMixin, ForecastHandlerMixin, TimeSeriesHandlerMixin, + BatchTopicDistributionHandlerMixin, TopicDistributionHandlerMixin, + TopicModelHandlerMixin, LibraryHandlerMixin, ExecutionHandlerMixin, + ScriptHandlerMixin, AssociationSetHandlerMixin, + AssociationHandlerMixin, LogisticRegressionHandlerMixin, + StatisticalTestHandlerMixin, CorrelationHandlerMixin, + SampleHandlerMixin, ProjectHandlerMixin, + BatchAnomalyScoreHandlerMixin, BatchCentroidHandlerMixin, + BatchPredictionHandlerMixin, EvaluationHandlerMixin, + AnomalyScoreHandlerMixin, AnomalyHandlerMixin, + CentroidHandlerMixin, ClusterHandlerMixin, PredictionHandlerMixin, + EnsembleHandlerMixin, ModelHandlerMixin, DatasetHandlerMixin, + SourceHandlerMixin, ResourceHandlerMixin): """Entry point to create, retrieve, list, update, and delete - sources, datasets, models and predictions. + BigML resources. Full API documentation on the API can be found from BigML at: - https://bigml.com/developers + https://bigml.com/api + + Resources are wrapped in a dictionary that includes: + code: HTTP status code + resource: The resource/id + location: Remote location of the resource + object: The resource itself + error: An error code and message + """ - def __init__(self, username=None, api_key=None, dev_mode=False): - """Initialize the BigML API. + def __init__(self, username=None, api_key=None, + debug=False, set_locale=False, storage=None, domain=None, + project=None, organization=None, short_debug=False): + """Initializes the BigML API. If left unspecified, `username` and `api_key` will default to the values of the `BIGML_USERNAME` and `BIGML_API_KEY` environment variables respectively. - If `dev_mode` is set to `True`, the API will be used in development - mode where the size of your datasets are limited but you are not - charged any credits. + `dev_mode` has been deprecated. Now all resources coexisit in the + same production environment. - """ - if username is None: - username = os.environ['BIGML_USERNAME'] - if api_key is None: - api_key = os.environ['BIGML_API_KEY'] + If storage is set to a directory name, the resources obtained in + CRU operations will be stored in the given directory. - self.auth = "?username=%s;api_key=%s;" % (username, api_key) - self.dev_mode = dev_mode + If domain is set, the api will point to the specified domain. Default + will be the one in the environment variable `BIGML_DOMAIN` or + `bigml.io` if missing. The expected domain argument is a string or a + Domain object. See Domain class for details. - if dev_mode: - self.URL = BIGML_DEV_URL - else: - self.URL = BIGML_URL - - # Base Resource URLs - self.SOURCE_URL = self.URL + SOURCE_PATH - self.DATASET_URL = self.URL + DATASET_PATH - self.MODEL_URL = self.URL + MODEL_PATH - self.PREDICTION_URL = self.URL + PREDICTION_PATH - - def _create(self, url, body): - """Create a new resource. """ - code = HTTP_INTERNAL_SERVER_ERROR - resource_id = None - location = None - resource = None - error = { - "status": { - "code": code, - "message": "The resource couldn't be created"}} - try: - response = requests.post(url + self.auth, headers=SEND_JSON, - data=body) - - code = response.status_code - - if code == HTTP_CREATED: - location = response.headers['location'] - resource = json.loads(response.content, 'utf-8') - resource_id = resource['resource'] - error = None - elif code in [ - HTTP_BAD_REQUEST, - HTTP_UNAUTHORIZED, - HTTP_PAYMENT_REQUIRED, - HTTP_NOT_FOUND]: - error = json.loads(response.content, 'utf-8') - else: - LOGGER.error("Unexpected error (%s)" % code) - code = HTTP_INTERNAL_SERVER_ERROR - - except ValueError: - LOGGER.error("Malformed response") - except requests.ConnectionError: - LOGGER.error("Connection error") - except requests.Timeout: - LOGGER.error("Request timed out") - except requests.RequestException: - LOGGER.error("Ambiguous exception occurred") + When project is set to a project ID, + the user is considered to be working in an + organization project. The scope of the API requests will be limited + to this project and permissions should be previously given by the + organization administrator. + + When organization is set to an organization ID, + the user is considered to be working for an + organization. The scope of the API requests will be limited to the + projects of the organization and permissions need to be previously + given by the organization administrator. - return { - 'code': code, - 'resource': resource_id, - 'location': location, - 'object': resource, - 'error': error} - - def _get(self, url): - """Retrieve a resource """ - code = HTTP_INTERNAL_SERVER_ERROR - resource_id = None - location = url - resource = None - error = { - "status": { - "code": HTTP_INTERNAL_SERVER_ERROR, - "message": "The resource couldn't be retrieved"}} + # first BigMLConnection needs to exist + super().__init__(username=username, api_key=api_key, + debug=debug, + set_locale=set_locale, storage=storage, + domain=domain, project=project, + organization=organization, + short_debug=short_debug) + # adding mixins properties + ResourceHandlerMixin.__init__(self) + SourceHandlerMixin.__init__(self) + DatasetHandlerMixin.__init__(self) + ModelHandlerMixin.__init__(self) + EnsembleHandlerMixin.__init__(self) + PredictionHandlerMixin.__init__(self) + ClusterHandlerMixin.__init__(self) + CentroidHandlerMixin.__init__(self) + AnomalyHandlerMixin.__init__(self) + AnomalyScoreHandlerMixin.__init__(self) + EvaluationHandlerMixin.__init__(self) + BatchPredictionHandlerMixin.__init__(self) + BatchCentroidHandlerMixin.__init__(self) + BatchAnomalyScoreHandlerMixin.__init__(self) + ProjectHandlerMixin.__init__(self) + SampleHandlerMixin.__init__(self) + CorrelationHandlerMixin.__init__(self) + StatisticalTestHandlerMixin.__init__(self) + LogisticRegressionHandlerMixin.__init__(self) + AssociationHandlerMixin.__init__(self) + AssociationSetHandlerMixin.__init__(self) + ScriptHandlerMixin.__init__(self) + ExecutionHandlerMixin.__init__(self) + LibraryHandlerMixin.__init__(self) + TopicModelHandlerMixin.__init__(self) + TopicDistributionHandlerMixin.__init__(self) + BatchTopicDistributionHandlerMixin.__init__(self) + TimeSeriesHandlerMixin.__init__(self) + ForecastHandlerMixin.__init__(self) + DeepnetHandlerMixin.__init__(self) + OptimlHandlerMixin.__init__(self) + FusionHandlerMixin.__init__(self) + ConfigurationHandlerMixin.__init__(self) + PCAHandlerMixin.__init__(self) + ProjectionHandlerMixin.__init__(self) + BatchProjectionHandlerMixin.__init__(self) + LinearRegressionHandlerMixin.__init__(self) + ExternalConnectorHandlerMixin.__init__(self) + self.status_url = "%s%s" % (self.url, STATUS_PATH) + + + self.getters = {} + for resource_type in RESOURCE_RE: + method_name = RENAMED_RESOURCES.get(resource_type, resource_type) + self.getters[resource_type] = getattr(self, "get_%s" % method_name) + self.creators = {} + for resource_type in RESOURCE_RE: + method_name = RENAMED_RESOURCES.get(resource_type, resource_type) + self.creators[resource_type] = getattr(self, + "create_%s" % method_name) + self.creaters = self.creators # to be deprecated + self.updaters = {} + for resource_type in RESOURCE_RE: + method_name = RENAMED_RESOURCES.get(resource_type, resource_type) + self.updaters[resource_type] = getattr(self, + "update_%s" % method_name) + self.deleters = {} + for resource_type in RESOURCE_RE: + method_name = RENAMED_RESOURCES.get(resource_type, resource_type) + self.deleters[resource_type] = getattr(self, + "delete_%s" % method_name) + self.listers = {} + for resource_type in RESOURCE_RE: + method_name = IRREGULAR_PLURALS.get( \ + resource_type, "%ss" % RENAMED_RESOURCES.get( \ + resource_type, resource_type)) + self.listers[resource_type] = getattr(self, + "list_%s" % method_name) + self.cloners = {} + for resource_type in CLONABLE_PATHS: + method_name = RENAMED_RESOURCES.get(resource_type, resource_type) + self.cloners[resource_type] = getattr(self, + "clone_%s" % method_name) + + def prepare_image_fields(self, model_info, input_data): + """Creating a source for each image field used by the model + that is found in input_data + """ + new_input_data = {} + new_input_data.update(input_data) + #pylint: disable=locally-disabled,broad-except try: - response = requests.get(url + self.auth, headers=ACCEPT_JSON) - code = response.status_code - - if code == HTTP_OK: - resource = json.loads(response.content, 'utf-8') - resource_id = resource['resource'] - error = None - elif code in [HTTP_BAD_REQUEST, HTTP_UNAUTHORIZED, HTTP_NOT_FOUND]: - error = json.loads(response.content, 'utf-8') - else: - LOGGER.error("Unexpected error (%s)" % code) - code = HTTP_INTERNAL_SERVER_ERROR - - except ValueError: - LOGGER.error("Malformed response") - except requests.ConnectionError: - LOGGER.error("Connection error") - except requests.Timeout: - LOGGER.error("Request timed out") - except requests.RequestException: - LOGGER.error("Ambiguous exception occurred") + fields = self.get_fields(model_info) + image_fields = [field_pair for field_pair in fields.items() + if field_pair[1]["optype"] == "image"] + for image_field, value in image_fields: + if image_field in input_data: + key = image_field + filename = input_data[key] + elif value["name"] in input_data: + key = value["name"] + filename = input_data[key] + source = self.create_source(filename) + source = self.check_resource(source, + query_string=TINY_RESOURCE, + raise_on_error=True) + new_input_data[key] = source["resource"] + except Exception: + # Predict Server does not return the fields info, so we infer + for field, value in input_data.items(): + if isinstance(value, str) and os.path.isfile(value) and \ + is_image(value): + source = self.create_source(value) + source = self.check_resource(source, + query_string=TINY_RESOURCE, + raise_on_error=True) + new_input_data[field] = source["resource"] + + return new_input_data + + def create(self, resource_type, *args, **kwargs): + """Create resources - return { - 'code': code, - 'resource': resource_id, - 'location': location, - 'object': resource, - 'error': error} - - def _list(self, url, query_string=''): - """List resources """ - code = HTTP_INTERNAL_SERVER_ERROR - meta = None - resources = None - error = { - "status": { - "code": code, - "message": "The resource couldn't be listed"}} + finished = kwargs.get('finished', True) + create_kwargs = filter_kwargs(kwargs, + ['query_string', 'finished'], + out=True) try: - response = requests.get(url + self.auth + query_string, - headers=ACCEPT_JSON) - - code = response.status_code - - if code == HTTP_OK: - resource = json.loads(response.content, 'utf-8') - meta = resource['meta'] - resources = resource['objects'] - error = None - elif code in [HTTP_BAD_REQUEST, HTTP_UNAUTHORIZED, HTTP_NOT_FOUND]: - error = json.loads(response.content, 'utf-8') - else: - LOGGER.error("Unexpected error (%s)" % code) - code = HTTP_INTERNAL_SERVER_ERROR - - except ValueError: - LOGGER.error("Malformed response") - except requests.ConnectionError: - LOGGER.error("Connection error") - except requests.Timeout: - LOGGER.error("Request timed out") - except requests.RequestException: - LOGGER.error("Ambiguous exception occurred") + resource_info = self.creators[resource_type](*args, + **create_kwargs) + except KeyError: + raise ValueError("Failed to create %s. This kind of resource" + " does not exist." % resource_type) + if finished and is_in_progress(resource_info): + ok_kwargs = filter_kwargs(kwargs, ['query_string']) + ok_kwargs.update({"error_retries": 5, "debug": self.debug}) + self.ok(resource_info, **ok_kwargs) + return resource_info + + def get(self, resource, **kwargs): + """Method to get resources - return { - 'code': code, - 'meta': meta, - 'objects': resources, - 'error': error} - - def _update(self, url, body): - """Update a resource """ - code = HTTP_INTERNAL_SERVER_ERROR - resource_id = None - location = url - resource = None - error = { - "status": { - "code": code, - "message": "The resource couldn't be updated"}} - + finished = kwargs.get('finished', True) + get_kwargs = filter_kwargs(kwargs, + ['finished'], + out=True) try: - response = requests.put(url + self.auth, - headers=SEND_JSON, - data=body) - - code = response.status_code - - if code == HTTP_ACCEPTED: - location = response.headers['location'] - resource = json.loads(response.content, 'utf-8') - resource_id = resource['resource'] - error = None - elif code in [ - HTTP_UNAUTHORIZED, - HTTP_PAYMENT_REQUIRED, - HTTP_METHOD_NOT_ALLOWED]: - error = json.loads(response.content, 'utf-8') - else: - LOGGER.error("Unexpected error (%s)" % code) - code = HTTP_INTERNAL_SERVER_ERROR - - except ValueError: - LOGGER.error("Malformed response") - except requests.ConnectionError: - LOGGER.error("Connection error") - except requests.Timeout: - LOGGER.error("Request timed out") - except requests.RequestException: - LOGGER.error("Ambiguous exception occurred") + resource_type = get_resource_type(resource) + resource_info = self.getters[resource_type](resource, **get_kwargs) + except KeyError: + raise ValueError("%s is not a resource or ID." % resource) + if finished and is_in_progress(resource_info): + ok_kwargs = filter_kwargs(kwargs, ['query_string']) + ok_kwargs.update({"error_retries": 5, "debug": self.debug}) + self.ok(resource_info, **ok_kwargs) + return resource_info + + def update(self, resource, changes, **kwargs): + """Method to update resources - return { - 'code': code, - 'resource': resource_id, - 'location': location, - 'object': resource, - 'error': error} - - def _delete(self, url): - """Delete a resource """ - code = HTTP_INTERNAL_SERVER_ERROR - error = { - "status": { - "code": code, - "message": "The resource couldn't be deleted"}} + finished = kwargs.get('finished', True) + try: + resource_type = get_resource_type(resource) + update_kwargs = filter_kwargs(kwargs, + ['query_string', 'finished'], + out=True) + resource_info = self.updaters[resource_type](resource, changes, + **update_kwargs) + except KeyError: + raise ValueError("%s is not a resource or ID." % resource) + if finished and is_in_progress(resource_info): + ok_kwargs = filter_kwargs(kwargs, ['query_string']) + ok_kwargs.update({"error_retries": 5, "debug": self.debug}) + self.ok(resource_info, **ok_kwargs) + return resource_info + + def delete(self, resource, **kwargs): + """Method to delete resources + """ try: - response = requests.delete(url + self.auth) + resource_type = get_resource_type(resource) + return self.deleters[resource_type](resource, **kwargs) + except KeyError: + raise ValueError("%s is not a resource." % resource) - code = response.status_code + def connection_info(self): + """Printable string: domain where the connection is bound and the + credentials used. - if code == HTTP_NO_CONTENT: - error = None - elif code in [HTTP_BAD_REQUEST, HTTP_UNAUTHORIZED, HTTP_NOT_FOUND]: - error = json.loads(response.content, 'utf-8') - else: - LOGGER.error("Unexpected error (%s)" % code) - code = HTTP_INTERNAL_SERVER_ERROR + """ + info = "Connecting to:\n" + info += " %s (%s)\n" % (self.domain.general_domain, + self.domain.api_version) + if self.domain.general_protocol != BIGML_PROTOCOL: + info += " using %s protocol\n" % self.domain.general_protocol + info += " SSL verification %s\n" % ( + "on" if self.domain.verify else "off") + short = "(shortened)" if self.short_debug else "" + if self.debug: + info += " Debug on %s\n" % short + if self.domain.general_domain != self.domain.prediction_domain: + info += " %s (predictions only)\n" % \ + self.domain.prediction_domain + if self.domain.prediction_protocol != BIGML_PROTOCOL: + info += " using %s protocol\n" % \ + self.domain.prediction_protocol + info += " SSL verification %s\n" % ( + "on" if self.domain.verify_prediction else "off") + + if self.project or self.organization: + info += " Scope info: %s\n" % \ + "%s\n %s" % (self.organization or "", + self.project or "") + info += "\nAuthentication string:\n" + info += " %s\n" % self.auth[1:] + return info + + def get_account_status(self, query_string=''): + """Retrieve the account information: tasks, available_tasks, max_tasks, . + + Returns a dictionary with the summarized information about the account - except ValueError: - LOGGER.error("Malformed response") - except requests.ConnectionError: - LOGGER.error("Connection error") - except requests.Timeout: - LOGGER.error("Request timed out") - except requests.RequestException: - LOGGER.error("Ambiguous exception occurred") + """ + if self.organization is not None: + return self._status(self.status_url, + query_string=query_string, + organization=self.organization) + return self._status(self.status_url, query_string=query_string) + def get_tasks_status(self): + """Retrieve the tasks information of the account - return { - 'code': code, - 'error': error} + Returns a dictionary with the summarized information about the tasks - ########################################################################## - # - # Utils - # - ########################################################################## + """ + status = self.get_account_status(query_string=TASKS_QS) + if status["error"] is None: + status = status.get("object", {}) + return { + "tasks": status.get("tasks"), + "max_tasks": status.get("subscription", {}).get("max_tasks"), + "available_tasks": (status.get("subscription", + {}).get("max_tasks") + - status.get("tasks")), + "tasks_in_progress": status.get("tasks_in_progress"), + "error": None} + + return { + "tasks": 0, + "max_tasks": 0, + "available_tasks": 0, + "tasks_in_progress": 0, + "error": status["error"]} def get_fields(self, resource): - """Return a dictionary of fields""" + """Retrieve fields used by a resource. + + Returns a dictionary with the fields that uses + the resource keyed by Id. + + """ + if isinstance(resource, dict) and 'resource' in resource: resource_id = resource['resource'] - elif (isinstance(resource, basestring) and ( - SOURCE_RE.match(resource) or DATASET_RE.match(resource) or - MODEL_RE.match(resource) or PREDICTION_RE.match(resource))): + elif isinstance(resource, str) and get_resource_type(resource) \ + in RESOURCES_WITH_FIELDS: resource_id = resource + resource = self.retrieve_resource(resource, + query_string=ALL_FIELDS) else: LOGGER.error("Wrong resource id") - return - - resource = self._get("%s%s" % (self.URL, resource_id)) - if resource['code'] == HTTP_OK: - if MODEL_RE.match(resource_id): - return resource['object']['model']['fields'] - else: - return resource['object']['fields'] - return None + return None + # Tries to extract fields information from resource dict. If it fails, + # a get remote call is used to retrieve the resource by id. + fields = None + try: + fields = get_fields(resource) + except KeyError: + resource = self._get("%s%s" % (self.url, resource_id)) + fields = get_fields(resource) + return fields - def invert_dictionary(self, dictionary): - """Invert a dictionary""" - return dict([[value['name'], key] - for key, value in dictionary.items()]) - - def pprint(self, resource): - """Pretty prints a resource or part of it""" - pretty_print = pprint.PrettyPrinter(indent=4) - if (isinstance(resource, dict) and - 'object' in resource and - 'resource' in resource): - if SOURCE_RE.match(resource['resource']): - print "%s (%s bytes)" % (resource['object']['name'], - resource['object']['size']) - elif DATASET_RE.match(resource['resource']): - print "%s (%s bytes)" % (resource['object']['name'], - resource['object']['size']) - elif MODEL_RE.match(resource['resource']): - print "%s (%s bytes)" % (resource['object']['name'], - resource['object']['size']) + #pylint: disable=locally-disabled,no-self-use + def pprint(self, resource, out=sys.stdout): + """Pretty prints a resource or part of it. + + """ + + if (isinstance(resource, dict) + and 'object' in resource + and 'resource' in resource): + + resource_id = resource['resource'] + if (any(getattr(res_re, "match")(resource_id) for res_re + in NON_PREDICTIONS)): + out.write("%s (%s bytes)\n" % (resource['object']['name'], + resource['object']['size'])) elif PREDICTION_RE.match(resource['resource']): - objective_field_name = ( - resource['object']['fields'] - [resource['object']['objective_fields'][0]]['name']) - input_data = dict( - [[resource['object']['fields'][key]['name'], value] - for key, value in - resource['object']['input_data'].items()]) - prediction = ( - resource['object']['prediction'] - [resource['object']['objective_fields'][0]]) - print("%s for %s is %s" % (objective_field_name, input_data, - prediction)) + input_data = {} + for key, value in list(resource['object']['input_data'].items()): + try: + name = resource['object']['fields'][key]['name'] + except KeyError: + name = key + input_data[name] = value + try: + prediction_label = ( + resource['object']['fields'][ + resource['object']['objective_fields'][0]]['name']) + except IndexError: + prediction_label = get_prediction_label( + resource["resource"]) + prediction = get_prediction_attr(resource) + out.write("%s for %s is %s\n" % (prediction_label, + input_data, + prediction)) + out.flush() else: - pretty_print.pprint(resource) + pprint.pprint(resource, out, indent=4) def status(self, resource): - "Maps status code to string" + """Maps status code to string. - if isinstance(resource, dict) and 'resource' in resource: - resource_id = resource['resource'] - elif (isinstance(resource, basestring) and ( - SOURCE_RE.match(resource) or DATASET_RE.match(resource) or - MODEL_RE.match(resource) or PREDICTION_RE.match(resource))): - resource_id = resource - else: + """ + resource_id = get_resource_id(resource) + if resource_id: + resource = self._get("%s%s" % (self.url, resource_id)) + status = get_status(resource) + code = status['code'] + return STATUSES.get(code, "UNKNOWN") + + status = get_status(resource) + if status['code'] != UPLOADING: LOGGER.error("Wrong resource id") - return + return None + return STATUSES[UPLOADING] - resource = self._get("%s%s" % (self.URL, resource_id)) - code = resource['object']['status']['code'] - if code in STATUSES: - return STATUSES[code] - else: - return "UNKNOWN" - - ########################################################################## - # - # Sources - # https://bigml.com/developers/sources - # - ########################################################################## - def _create_remote_source(self, url, args=None): - """Create a new source. The source is available - in the given URL instead of being a file - in a local path.""" - if args is None: - args = {} - args.update({"remote": url}) - body = json.dumps(args) - return self._create(self.SOURCE_URL, body) - - def _create_local_source(self, file_name, args=None): - """Create a new source. The souce is a file in - a local path.""" - if args is None: - args = {} - elif 'source_parser' in args: - args['source_parser'] = json.dumps(args['source_parser']) - - code = HTTP_INTERNAL_SERVER_ERROR - resource_id = None - location = None - resource = None - error = { - "status": { - "code": code, - "message": "The resource couldn't be deleted"}} - - files = {os.path.basename(file_name): open(file_name, "rb")} - try: - response = requests.post(self.SOURCE_URL + self.auth, - files=files, - data=args) - - code = response.status_code - - if code == HTTP_CREATED: - location = response.headers['location'] - resource = json.loads(response.content, 'utf-8') - resource_id = resource['resource'] - error = None - elif code in [ - HTTP_BAD_REQUEST, - HTTP_UNAUTHORIZED, - HTTP_PAYMENT_REQUIRED, - HTTP_NOT_FOUND]: - error = json.loads(response.content, 'utf-8') - else: - LOGGER.error("Unexpected error (%s)" % code) - code = HTTP_INTERNAL_SERVER_ERROR - - except ValueError: - LOGGER.error("Malformed response") - except requests.ConnectionError: - LOGGER.error("Connection error") - except requests.Timeout: - LOGGER.error("Request timed out") - except requests.RequestException: - LOGGER.error("Ambiguous exception occurred") + def check_resource(self, resource, + query_string='', wait_time=1, retries=None, + raise_on_error=False): + """Check resource method. - return { - 'code': code, - 'resource': resource_id, - 'location': location, - 'object': resource, - 'error': error} - - def create_source(self, path, args=None): - """Create a new source. - The souce can be provided as a local file - path or as a URL.""" - if not path: - raise Exception('Source local path or a URL must be provided.') - - if _is_valid_remote_url(path): - return self._create_remote_source(url=path, args=args) - else: - return self._create_local_source(file_name=path, args=args) - - def get_source(self, source): - """Retrieve a source.""" - if isinstance(source, dict) and 'resource' in source: - source_id = source['resource'] - elif isinstance(source, basestring) and SOURCE_RE.match(source): - source_id = source - else: - LOGGER.error("Wrong source id") - return - return self._get("%s%s" % (self.URL, source_id)) - - def source_is_ready(self, source): - """Check whether a source' status is FINISHED.""" - source = self.get_source(source) - return (source['code'] == HTTP_OK and - source['object']['status']['code'] == FINISHED) - - def list_sources(self, query_string=''): - """List all your sources.""" - return self._list(self.SOURCE_URL, query_string) - - def update_source(self, source, changes): - """Update a source.""" - if isinstance(source, dict) and 'resource' in source: - source_id = source['resource'] - elif isinstance(source, basestring) and SOURCE_RE.match(source): - source_id = source - else: - LOGGER.error("Wrong source id") - return - - body = json.dumps(changes) - return self._update("%s%s" % (self.URL, source_id), body) - - def delete_source(self, source): - """Delete a source.""" - if isinstance(source, dict) and 'resource' in source: - source_id = source['resource'] - elif isinstance(source, basestring) and SOURCE_RE.match(source): - source_id = source - else: - LOGGER.error("Wrong source id") - return - - return self._delete("%s%s" % (self.URL, source_id)) - - ########################################################################## - # - # Datasets - # https://bigml.com/developers/datasets - # - ########################################################################## - def create_dataset(self, source, args=None, wait_time=3): - """Create a dataset.""" - if isinstance(source, dict) and 'resource' in source: - source_id = source['resource'] - elif isinstance(source, basestring) and SOURCE_RE.match(source): - source_id = source - else: - LOGGER.error("Wrong source id") - return - - if wait_time > 0: - while not self.source_is_ready(source_id): - time.sleep(wait_time) - - if args is None: - args = {} - args.update({ - "source": source_id}) - body = json.dumps(args) - return self._create(self.DATASET_URL, body) - - def get_dataset(self, dataset): - """Retrieve a dataset.""" - if isinstance(dataset, dict) and 'resource' in dataset: - dataset_id = dataset['resource'] - elif isinstance(dataset, basestring) and DATASET_RE.match(dataset): - dataset_id = dataset - else: - LOGGER.error("Wrong dataset id") - return - return self._get("%s%s" % (self.URL, dataset_id)) - - def dataset_is_ready(self, dataset): - """Check whether a dataset' status is FINISHED.""" - resource = self.get_dataset(dataset) - return (resource['code'] == HTTP_OK and - resource['object']['status']['code'] == FINISHED) - - def list_datasets(self, query_string=''): - """List all your datasets.""" - return self._list(self.DATASET_URL, query_string) - - def update_dataset(self, dataset, changes): - """Update a dataset.""" - if isinstance(dataset, dict) and 'resource' in dataset: - dataset_id = dataset['resource'] - elif isinstance(dataset, basestring) and DATASET_RE.match(dataset): - dataset_id = dataset - else: - LOGGER.error("Wrong dataset id") - return - - body = json.dumps(changes) - return self._update("%s%s" % (self.URL, dataset_id), body) - - def delete_dataset(self, dataset): - """Delete a dataset.""" - if isinstance(dataset, dict) and 'resource' in dataset: - dataset_id = dataset['resource'] - elif isinstance(dataset, basestring) and DATASET_RE.match(dataset): - dataset_id = dataset - else: - LOGGER.error("Wrong dataset id") - return - - return self._delete("%s%s" % (self.URL, dataset_id)) - - ########################################################################## - # - # Models - # https://bigml.com/developers/models - # - ########################################################################## - def create_model(self, dataset, args=None, wait_time=3): - """Create a model.""" - if isinstance(dataset, dict) and 'resource' in dataset: - dataset_id = dataset['resource'] - elif isinstance(dataset, basestring) and DATASET_RE.match(dataset): - dataset_id = dataset - else: - LOGGER.error("Wrong dataset id") - return - - if wait_time > 0: - while not self.dataset_is_ready(dataset_id): - time.sleep(wait_time) - - if args is None: - args = {} - args.update({ - "dataset": dataset_id}) - body = json.dumps(args) - return self._create(self.MODEL_URL, body) - - def get_model(self, model): - """Retrieve a model.""" - if isinstance(model, dict) and 'resource' in model: - model_id = model['resource'] - elif isinstance(model, basestring) and MODEL_RE.match(model): - model_id = model - else: - LOGGER.error("Wrong model id") - return - - return self._get("%s%s" % (self.URL, model_id)) - - def model_is_ready(self, model): - """Check whether a model' status is FINISHED.""" - resource = self.get_model(model) - return (resource['code'] == HTTP_OK and - resource['object']['status']['code'] == FINISHED) - - def list_models(self, query_string=''): - """List all your models.""" - return self._list(self.MODEL_URL, query_string) - - def update_model(self, model, changes): - """Update a model.""" - if isinstance(model, dict) and 'resource' in model: - model_id = model['resource'] - elif isinstance(model, basestring) and MODEL_RE.match(model): - model_id = model - else: - LOGGER.error("Wrong model id") - return - - body = json.dumps(changes) - return self._update("%s%s" % (self.URL, model_id), body) - - def delete_model(self, model): - """Delete a model.""" - if isinstance(model, dict) and 'resource' in model: - model_id = model['resource'] - elif isinstance(model, basestring) and MODEL_RE.match(model): - model_id = model - else: - LOGGER.error("Wrong model id") - return - - return self._delete("%s%s" % (self.URL, model_id)) - - ########################################################################## - # - # Predictions - # https://bigml.com/developers/predictions - # - ########################################################################## - def create_prediction(self, model, input_data=None, args=None, - wait_time=3): - """Create a new prediction.""" - if isinstance(model, dict) and 'resource' in model: - model_id = model['resource'] - elif isinstance(model, basestring) and MODEL_RE.match(model): - model_id = model - else: - LOGGER.error("Wrong model id") - return + """ + return check_resource(resource, + query_string=query_string, wait_time=wait_time, + retries=retries, raise_on_error=raise_on_error, + api=self) - if wait_time > 0: - while not self.model_is_ready(model_id): - time.sleep(wait_time) + def source_from_batch_prediction(self, batch_prediction, args=None): + """Creates a source from a batch prediction using the download url - if input_data is None: - input_data = {} - else: - fields = self.get_fields(model_id) - inverted_fields = self.invert_dictionary(fields) - try: - input_data = dict( - [[inverted_fields[key], value] - for key, value in input_data.items()]) - except KeyError, field: - LOGGER.error("Wrong field name %s" % field) - - if args is None: - args = {} - args.update({ - "model": model_id, - "input_data": input_data}) - body = json.dumps(args) - return self._create(self.PREDICTION_URL, body) - - def get_prediction(self, prediction): - """Retrieve a prediction.""" - if isinstance(prediction, dict) and 'resource' in prediction: - prediction_id = prediction['resource'] - elif (isinstance(prediction, basestring) and - PREDICTION_RE.match(prediction)): - prediction_id = prediction - else: - LOGGER.error("Wrong prediction id") - return - - return self._get("%s%s" % (self.URL, prediction_id)) - - def list_predictions(self, query_string=''): - """List all your predictions.""" - return self._list(self.PREDICTION_URL, query_string) - - def update_prediction(self, prediction, changes): - """Update a prediction.""" - if isinstance(prediction, dict) and 'resource' in prediction: - prediction_id = prediction['resource'] - elif (isinstance(prediction, basestring) and - PREDICTION_RE.match(prediction)): - prediction_id = prediction - else: - LOGGER.error("Wrong prediction id") - return - - body = json.dumps(changes) - return self._update("%s%s" % (self.URL, prediction_id), body) - - def delete_prediction(self, prediction): - """Delete a prediction.""" - if isinstance(prediction, dict) and 'resource' in prediction: - prediction_id = prediction['resource'] - elif (isinstance(prediction, basestring) and - PREDICTION_RE.match(prediction)): - prediction_id = prediction - else: - LOGGER.error("Wrong prediction id") - return + """ + check_resource_type(batch_prediction, BATCH_PREDICTION_PATH, + message="A batch prediction id is needed.") + batch_prediction_id = get_batch_prediction_id(batch_prediction) + if batch_prediction_id: + download_url = "%s%s%s%s" % (self.url, batch_prediction_id, + DOWNLOAD_DIR, self.auth) + return self._create_remote_source(download_url, args=args) + return None - return self._delete("%s%s" % - (self.URL, prediction_id)) + def retrieve_resource(self, resource_id, query_string=None, + check_local_fn=None, retries=None): + """ Retrieves resource info either from the local repo or + from the remote server + + """ + if query_string is None: + query_string = '' + if self.storage is not None: + try: + stored_resource = os.path.join(self.storage, + resource_id.replace("/", "_")) + with open(stored_resource) as resource_file: + resource = json.loads(resource_file.read()) + # we check that the stored resource has the information + # needed (for instance, input_fields for predicting) + if check_local_fn is None or check_local_fn(resource): + return resource + except ValueError: + raise ValueError("The file %s contains no JSON" % + stored_resource) + except IOError: + pass + if self.auth == '?username=;api_key=;': + raise ValueError("The credentials information is missing. This" + " information is needed to download resource %s" + " for the first time and store it locally for further" + " use. Please export BIGML_USERNAME" + " and BIGML_API_KEY." % resource_id) + + resource = check_resource(resource_id, query_string=query_string, + api=self, retries=retries) + return resource + + +def get_api_connection(api, store=True, context=None): + """Checks whether there's a valid api connection. If there's not + such object, it creates a default connection with the credentials + and other attributes provided in the context dictionary + + api: (BigML) customized api connection (if provided) + store: (boolean) use storage when creating the connection + context: (dict) parameters to be provided when creating the connection + """ + if api is None or not isinstance(api, BigML): + if context is None: + context = {} + storage = context.get("storage") or STORAGE + context.update({"storage": storage} if store else {}) + try: + api = BigML(**context) + except AttributeError: + context.update({"username": "", "api_key": ""}) + api = BigML(**context) + # API connection with + # False credentials is returned. It can only access the + # local resources stored in the storage directory when present + return api diff --git a/tests/features/__init__.py b/bigml/api_handlers/__init__.py similarity index 100% rename from tests/features/__init__.py rename to bigml/api_handlers/__init__.py diff --git a/bigml/api_handlers/anomalyhandler.py b/bigml/api_handlers/anomalyhandler.py new file mode 100644 index 00000000..03ece5e2 --- /dev/null +++ b/bigml/api_handlers/anomalyhandler.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for anomaly detectors' REST calls + + https://bigml.com/api/anomalies + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready +from bigml.constants import ANOMALY_PATH + + +class AnomalyHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the AnomalyHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.anomaly_url = self.url + ANOMALY_PATH + + def create_anomaly(self, datasets, args=None, wait_time=3, retries=10): + """Creates an anomaly detector from a `dataset` or a list o `datasets`. + + """ + create_args = self._set_create_from_datasets_args( + datasets, args=args, wait_time=wait_time, retries=retries) + + body = json.dumps(create_args) + return self._create(self.anomaly_url, body) + + def get_anomaly(self, anomaly, query_string='', + shared_username=None, shared_api_key=None): + """Retrieves an anomaly detector. + + The anomaly parameter should be a string containing the + anomaly id or the dict returned by create_anomaly. + As the anomaly detector is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the model values and state info + available at the time it is called. + + If this is a shared anomaly detector, the username and sharing api + key must also be provided. + """ + check_resource_type(anomaly, ANOMALY_PATH, + message="A anomaly id is needed.") + return self.get_resource( \ + anomaly, + query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) + + def anomaly_is_ready(self, anomaly, **kwargs): + """Checks whether an anomaly detector's status is FINISHED. + + """ + check_resource_type(anomaly, ANOMALY_PATH, + message="An anomaly id is needed.") + resource = self.get_anomaly(anomaly, **kwargs) + return resource_is_ready(resource) + + def list_anomalies(self, query_string=''): + """Lists all your anomaly detectors. + + """ + return self._list(self.anomaly_url, query_string) + + def update_anomaly(self, anomaly, changes): + """Updates an anomaly detector. + + """ + check_resource_type(anomaly, ANOMALY_PATH, + message="An anomaly detector id is needed.") + return self.update_resource(anomaly, changes) + + def delete_anomaly(self, anomaly, query_string=''): + """Deletes an anomaly detector. + + """ + check_resource_type(anomaly, ANOMALY_PATH, + message="An anomaly detector id is needed.") + return self.delete_resource(anomaly, query_string=query_string) + + def clone_anomaly(self, anomaly, + args=None, wait_time=3, retries=10): + """Creates a cloned anomaly from an existing `anomaly` + + """ + create_args = self._set_clone_from_args( + anomaly, "anomaly", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.anomaly_url, body) diff --git a/bigml/api_handlers/anomalyscorehandler.py b/bigml/api_handlers/anomalyscorehandler.py new file mode 100644 index 00000000..1398d539 --- /dev/null +++ b/bigml/api_handlers/anomalyscorehandler.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for anomaly scores' REST calls + + https://bigml.com/api/anomalyscores + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + get_resource_type, check_resource, get_anomaly_id +from bigml.constants import ANOMALY_SCORE_PATH, ANOMALY_PATH, \ + IMAGE_FIELDS_FILTER, SPECIFIC_EXCLUDES + + +class AnomalyScoreHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the AnomalyScoreHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.anomaly_score_url = self.prediction_base_url + ANOMALY_SCORE_PATH + + def create_anomaly_score(self, anomaly, input_data=None, + args=None, wait_time=3, retries=10): + """Creates a new anomaly score. + + """ + anomaly_id = None + resource_type = get_resource_type(anomaly) + if resource_type != ANOMALY_PATH: + raise Exception("An anomaly detector id is needed to create an" + " anomaly score. %s found." % resource_type) + + anomaly_id = get_anomaly_id(anomaly) + if anomaly_id is None: + raise Exception("Failed to detect a correct anomaly detector " + "structure in %s." % anomaly) + + if isinstance(anomaly, dict) and anomaly.get("resource") is not None: + # retrieving fields info from model structure + model_info = anomaly + else: + # minimal info to check status and prepare image fields + image_fields_filter = IMAGE_FIELDS_FILTER + "," + \ + ",".join(SPECIFIC_EXCLUDES[resource_type]) + model_info = check_resource(anomaly_id, + query_string=image_fields_filter, + wait_time=wait_time, + retries=retries, + raise_on_error=True, + api=self) + + if input_data is None: + input_data = {} + create_args = {} + if args is not None: + create_args.update(args) + create_args.update({ + "input_data": self.prepare_image_fields(model_info, input_data)}) + create_args.update({ + "anomaly": anomaly_id}) + + body = json.dumps(create_args) + return self._create(self.anomaly_score_url, body, + verify=self.domain.verify_prediction) + + def get_anomaly_score(self, anomaly_score, query_string=''): + """Retrieves an anomaly score. + + """ + check_resource_type(anomaly_score, ANOMALY_SCORE_PATH, + message="An anomaly score id is needed.") + return self.get_resource(anomaly_score, query_string=query_string) + + def list_anomaly_scores(self, query_string=''): + """Lists all your anomaly_scores. + + """ + return self._list(self.anomaly_score_url, query_string) + + def update_anomaly_score(self, anomaly_score, changes): + """Updates an anomaly_score. + + """ + check_resource_type(anomaly_score, ANOMALY_SCORE_PATH, + message="An anomaly_score id is needed.") + return self.update_resource(anomaly_score, changes) + + def delete_anomaly_score(self, anomaly_score, query_string=''): + """Deletes an anomaly_score. + + """ + check_resource_type(anomaly_score, ANOMALY_SCORE_PATH, + message="An anomaly_score id is needed.") + return self.delete_resource(anomaly_score, query_string=query_string) diff --git a/bigml/api_handlers/associationhandler.py b/bigml/api_handlers/associationhandler.py new file mode 100644 index 00000000..994a0050 --- /dev/null +++ b/bigml/api_handlers/associationhandler.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for associations' REST calls + + https://bigml.com/api/associations + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type +from bigml.constants import ASSOCIATION_PATH + + +class AssociationHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the correlations' REST calls. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the CorrelationHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.association_url = self.url + ASSOCIATION_PATH + + def create_association(self, datasets, args=None, wait_time=3, retries=10): + """Creates an association from a `dataset`. + + """ + create_args = self._set_create_from_datasets_args( + datasets, args=args, wait_time=wait_time, retries=retries) + + body = json.dumps(create_args) + return self._create(self.association_url, body) + + def get_association(self, association, query_string=''): + """Retrieves an association. + + The association parameter should be a string containing the + association id or the dict returned by create_association. + As association is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the association values and state info + available at the time it is called. + """ + check_resource_type(association, ASSOCIATION_PATH, + message="An association id is needed.") + return self.get_resource(association, query_string=query_string) + + def list_associations(self, query_string=''): + """Lists all your associations. + + """ + return self._list(self.association_url, query_string) + + def update_association(self, association, changes): + """Updates a association. + + """ + check_resource_type(association, ASSOCIATION_PATH, + message="An association id is needed.") + return self.update_resource(association, changes) + + def delete_association(self, association, query_string=''): + """Deletes an association. + + """ + check_resource_type(association, ASSOCIATION_PATH, + message="An association id is needed.") + return self.delete_resource(association, query_string=query_string) + + def clone_association(self, association, + args=None, wait_time=3, retries=10): + """Creates a cloned association from an existing `association` + + """ + create_args = self._set_clone_from_args( + association, "association", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.association_url, body) diff --git a/bigml/api_handlers/associationsethandler.py b/bigml/api_handlers/associationsethandler.py new file mode 100644 index 00000000..f1c13bb1 --- /dev/null +++ b/bigml/api_handlers/associationsethandler.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for associationset' REST calls + + https://bigml.com/api/associationset + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + get_resource_type, check_resource, get_association_id +from bigml.constants import ASSOCIATION_SET_PATH, ASSOCIATION_PATH, \ + IMAGE_FIELDS_FILTER, SPECIFIC_EXCLUDES + + +class AssociationSetHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the AssociationSetHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.association_set_url = self.url + ASSOCIATION_SET_PATH + + def create_association_set(self, association, input_data=None, + args=None, wait_time=3, retries=10): + """Creates a new association set. + + """ + association_id = None + resource_type = get_resource_type(association) + if resource_type != ASSOCIATION_PATH: + raise Exception("An association id is needed to create an" + " association set. %s found." % resource_type) + + association_id = get_association_id(association) + if association_id is None: + raise Exception("Failed to detect a correct association " + "structure in %s." % association) + + if isinstance(association, dict) and \ + association.get("resource") is not None: + # retrieving fields info from model structure + model_info = association + else: + image_fields_filter = IMAGE_FIELDS_FILTER + "," + \ + ",".join(SPECIFIC_EXCLUDES[resource_type]) + model_info = check_resource(association_id, + query_string=image_fields_filter, + wait_time=wait_time, + retries=retries, + raise_on_error=True, + api=self) + + if input_data is None: + input_data = {} + create_args = {} + if args is not None: + create_args.update(args) + create_args.update({ + "input_data": self.prepare_image_fields(model_info, input_data)}) + create_args.update({ + "association": association_id}) + + body = json.dumps(create_args) + return self._create(self.association_set_url, body, + verify=self.domain.verify_prediction) + + def get_association_set(self, association_set, query_string=''): + """Retrieves an association set. + + """ + check_resource_type(association_set, ASSOCIATION_SET_PATH, + message="An association set id is needed.") + return self.get_resource(association_set, query_string=query_string) + + def list_association_sets(self, query_string=''): + """Lists all your association sets. + + """ + return self._list(self.association_set_url, query_string) + + def update_association_set(self, association_set, changes): + """Updates a association set. + + """ + check_resource_type(association_set, ASSOCIATION_SET_PATH, + message="An association set id is needed.") + return self.update_resource(association_set, changes) + + def delete_association_set(self, association_set, query_string=''): + """Deletes an association set. + + """ + check_resource_type(association_set, ASSOCIATION_SET_PATH, + message="An association set id is needed.") + return self.delete_resource(association_set, query_string=query_string) diff --git a/bigml/api_handlers/batchanomalyscorehandler.py b/bigml/api_handlers/batchanomalyscorehandler.py new file mode 100644 index 00000000..07516a27 --- /dev/null +++ b/bigml/api_handlers/batchanomalyscorehandler.py @@ -0,0 +1,120 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for batch anomaly scores' REST calls + + https://bigml.com/api/batchanomalyscores + +""" + +try: + import simplejson as json +except ImportError: + import json + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type +from bigml.constants import BATCH_ANOMALY_SCORE_PATH, ANOMALY_PATH + + +class BatchAnomalyScoreHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the BatchAnomalyScoreHandler. This class is intended + to be used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.batch_anomaly_score_url = self.prediction_base_url + \ + BATCH_ANOMALY_SCORE_PATH + + def create_batch_anomaly_score(self, anomaly, dataset, + args=None, wait_time=3, retries=10): + """Creates a new batch anomaly score. + + + """ + create_args = {} + if args is not None: + create_args.update(args) + + origin_resources_checked = self.check_origins( + dataset, anomaly, create_args, model_types=[ANOMALY_PATH], + wait_time=wait_time, retries=retries) + + if origin_resources_checked: + body = json.dumps(create_args) + return self._create(self.batch_anomaly_score_url, body) + return None + + def get_batch_anomaly_score(self, batch_anomaly_score, query_string=''): + """Retrieves a batch anomaly score. + + The batch_anomaly_score parameter should be a string containing the + batch_anomaly_score id or the dict returned by + create_batch_anomaly_score. + As batch_anomaly_score is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the batch_anomaly_score values and state + info available at the time it is called. + """ + check_resource_type(batch_anomaly_score, BATCH_ANOMALY_SCORE_PATH, + message="A batch anomaly score id is needed.") + return self.get_resource(batch_anomaly_score, + query_string=query_string) + + def download_batch_anomaly_score(self, batch_anomaly_score, filename=None, + retries=10): + """Retrieves the batch anomaly score file. + + Downloads anomaly scores, that are stored in a remote CSV file. If + a path is given in filename, the contents of the file are downloaded + and saved locally. A file-like object is returned otherwise. + """ + check_resource_type(batch_anomaly_score, BATCH_ANOMALY_SCORE_PATH, + message="A batch anomaly score id is needed.") + return self._download_resource(batch_anomaly_score, + filename, + retries=retries) + + def list_batch_anomaly_scores(self, query_string=''): + """Lists all your batch anomaly scores. + + """ + return self._list(self.batch_anomaly_score_url, query_string) + + def update_batch_anomaly_score(self, batch_anomaly_score, changes): + """Updates a batch anomaly scores. + + """ + check_resource_type(batch_anomaly_score, BATCH_ANOMALY_SCORE_PATH, + message="A batch anomaly score id is needed.") + return self.update_resource(batch_anomaly_score, changes) + + def delete_batch_anomaly_score(self, batch_anomaly_score, query_string=''): + """Deletes a batch anomaly score. + + """ + check_resource_type(batch_anomaly_score, BATCH_ANOMALY_SCORE_PATH, + message="A batch anomaly score id is needed.") + return self.delete_resource(batch_anomaly_score, + query_string=query_string) diff --git a/bigml/api_handlers/batchcentroidhandler.py b/bigml/api_handlers/batchcentroidhandler.py new file mode 100644 index 00000000..79c25f52 --- /dev/null +++ b/bigml/api_handlers/batchcentroidhandler.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for batch centroids' REST calls + + https://bigml.com/api/batchcentroids + +""" + +try: + import simplejson as json +except ImportError: + import json + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type +from bigml.constants import BATCH_CENTROID_PATH, CLUSTER_PATH + + +class BatchCentroidHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the BatchCentroidHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.batch_centroid_url = self.prediction_base_url \ + + BATCH_CENTROID_PATH + + def create_batch_centroid(self, cluster, dataset, + args=None, wait_time=3, retries=10): + """Creates a new batch centroid. + + + """ + create_args = {} + if args is not None: + create_args.update(args) + + origin_resources_checked = self.check_origins( + dataset, cluster, create_args, model_types=[CLUSTER_PATH], + wait_time=wait_time, retries=retries) + + if origin_resources_checked: + body = json.dumps(create_args) + return self._create(self.batch_centroid_url, body) + return None + + def get_batch_centroid(self, batch_centroid, query_string=''): + """Retrieves a batch centroid. + + The batch_centroid parameter should be a string containing the + batch_centroid id or the dict returned by create_batch_centroid. + As batch_centroid is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the batch_centroid values and state + info available at the time it is called. + """ + check_resource_type(batch_centroid, BATCH_CENTROID_PATH, + message="A batch centroid id is needed.") + return self.get_resource(batch_centroid, query_string=query_string) + + def download_batch_centroid(self, batch_centroid, filename=None, + retries=10): + """Retrieves the batch centroid file. + + Downloads centroids, that are stored in a remote CSV file. If + a path is given in filename, the contents of the file are downloaded + and saved locally. A file-like object is returned otherwise. + """ + check_resource_type(batch_centroid, BATCH_CENTROID_PATH, + message="A batch centroid id is needed.") + return self._download_resource(batch_centroid, filename, + retries=retries) + + def list_batch_centroids(self, query_string=''): + """Lists all your batch centroids. + + """ + return self._list(self.batch_centroid_url, query_string) + + def update_batch_centroid(self, batch_centroid, changes): + """Updates a batch centroid. + + """ + check_resource_type(batch_centroid, BATCH_CENTROID_PATH, + message="A batch centroid id is needed.") + return self.update_resource(batch_centroid, changes) + + def delete_batch_centroid(self, batch_centroid, query_string=''): + """Deletes a batch centroid. + + """ + check_resource_type(batch_centroid, BATCH_CENTROID_PATH, + message="A batch centroid id is needed.") + return self.delete_resource(batch_centroid, query_string=query_string) diff --git a/bigml/api_handlers/batchpredictionhandler.py b/bigml/api_handlers/batchpredictionhandler.py new file mode 100644 index 00000000..462d127a --- /dev/null +++ b/bigml/api_handlers/batchpredictionhandler.py @@ -0,0 +1,119 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for batch predictions' REST calls + + https://bigml.com/api/batchpredictions + +""" + +try: + import simplejson as json +except ImportError: + import json + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type +from bigml.constants import BATCH_PREDICTION_PATH, SUPERVISED_PATHS + + +class BatchPredictionHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the BatchPredictionHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.batch_prediction_url = (self.prediction_base_url + + BATCH_PREDICTION_PATH) + + def create_batch_prediction(self, model, dataset, + args=None, wait_time=3, retries=10): + """Creates a new batch prediction. + + The model parameter can be: + - a simple model + - an ensemble + + """ + create_args = {} + if args is not None: + create_args.update(args) + + origin_resources_checked = self.check_origins( + dataset, model, create_args, model_types=SUPERVISED_PATHS, + wait_time=wait_time, retries=retries) + if origin_resources_checked: + body = json.dumps(create_args) + return self._create(self.batch_prediction_url, body) + return None + + def get_batch_prediction(self, batch_prediction, query_string=''): + """Retrieves a batch prediction. + + The batch_prediction parameter should be a string containing the + batch_prediction id or the dict returned by create_batch_prediction. + As batch_prediction is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the batch_prediction values and state + info available at the time it is called. + """ + check_resource_type(batch_prediction, BATCH_PREDICTION_PATH, + message="A batch prediction id is needed.") + return self.get_resource(batch_prediction, query_string=query_string) + + def download_batch_prediction(self, batch_prediction, filename=None, + retries=10): + """Retrieves the batch predictions file. + + Downloads predictions, that are stored in a remote CSV file. If + a path is given in filename, the contents of the file are downloaded + and saved locally. A file-like object is returned otherwise. + """ + check_resource_type(batch_prediction, BATCH_PREDICTION_PATH, + message="A batch prediction id is needed.") + return self._download_resource(batch_prediction, filename, + retries=retries) + + def list_batch_predictions(self, query_string=''): + """Lists all your batch predictions. + + """ + return self._list(self.batch_prediction_url, query_string) + + def update_batch_prediction(self, batch_prediction, changes): + """Updates a batch prediction. + + """ + check_resource_type(batch_prediction, BATCH_PREDICTION_PATH, + message="A batch prediction id is needed.") + return self.update_resource(batch_prediction, changes) + + def delete_batch_prediction(self, batch_prediction, query_string=''): + """Deletes a batch prediction. + + """ + check_resource_type(batch_prediction, BATCH_PREDICTION_PATH, + message="A batch prediction id is needed.") + return self.delete_resource(batch_prediction, + query_string=query_string) diff --git a/bigml/api_handlers/batchprojectionhandler.py b/bigml/api_handlers/batchprojectionhandler.py new file mode 100644 index 00000000..bfb05228 --- /dev/null +++ b/bigml/api_handlers/batchprojectionhandler.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2018-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for batch projections' REST calls + + https://bigml.com/api/batchprojections + +""" + +try: + import simplejson as json +except ImportError: + import json + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type +from bigml.constants import BATCH_PROJECTION_PATH, PCA_PATH + + +class BatchProjectionHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the BatchProjectionHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.batch_projection_url = self.prediction_base_url + \ + BATCH_PROJECTION_PATH + + def create_batch_projection(self, pca, dataset, + args=None, wait_time=3, retries=10): + """Creates a new batch projection. + + The pca parameter can be a pca resoruce or ID + + """ + create_args = {} + if args is not None: + create_args.update(args) + + origin_resources_checked = self.check_origins( + dataset, pca, create_args, model_types=[PCA_PATH], + wait_time=wait_time, retries=retries) + if origin_resources_checked: + body = json.dumps(create_args) + return self._create(self.batch_projection_url, body) + return None + + def get_batch_projection(self, batch_projection, query_string=''): + """Retrieves a batch projection. + + The batch_projection parameter should be a string containing the + batch_projection id or the dict returned by create_batch_projection. + As batch_projection is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the batch_projection values and state + info available at the time it is called. + """ + check_resource_type(batch_projection, BATCH_PROJECTION_PATH, + message="A batch projection id is needed.") + return self.get_resource(batch_projection, query_string=query_string) + + def download_batch_projection(self, batch_projection, filename=None, + retries=10): + """Retrieves the batch projections file. + + Downloads projections, that are stored in a remote CSV file. If + a path is given in filename, the contents of the file are downloaded + and saved locally. A file-like object is returned otherwise. + """ + check_resource_type(batch_projection, BATCH_PROJECTION_PATH, + message="A batch projection id is needed.") + return self._download_resource(batch_projection, filename, + retries=retries) + + def list_batch_projections(self, query_string=''): + """Lists all your batch projections. + + """ + return self._list(self.batch_projection_url, query_string) + + def update_batch_projection(self, batch_projection, changes): + """Updates a batch projection. + + """ + check_resource_type(batch_projection, BATCH_PROJECTION_PATH, + message="A batch projection id is needed.") + return self.update_resource(batch_projection, changes) + + def delete_batch_projection(self, batch_projection, query_string=''): + """Deletes a batch projection. + + """ + check_resource_type(batch_projection, BATCH_PROJECTION_PATH, + message="A batch projection id is needed.") + return self.delete_resource(batch_projection, + query_string=query_string) diff --git a/bigml/api_handlers/batchtopicdistributionhandler.py b/bigml/api_handlers/batchtopicdistributionhandler.py new file mode 100644 index 00000000..2a1bd204 --- /dev/null +++ b/bigml/api_handlers/batchtopicdistributionhandler.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2016-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for batch topic distributions' REST calls + + https://bigml.com/developers/batchtopicdistributions + +""" + +try: + import simplejson as json +except ImportError: + import json + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type +from bigml.constants import BATCH_TOPIC_DISTRIBUTION_PATH, TOPIC_MODEL_PATH + + +class BatchTopicDistributionHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the BatchTopidDistributionHandler. This class is + intended to be used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.batch_topic_distribution_url = self.prediction_base_url + \ + BATCH_TOPIC_DISTRIBUTION_PATH + + def create_batch_topic_distribution(self, topic_model, dataset, + args=None, wait_time=3, retries=10): + """Creates a new batch topic distribution. + + + """ + create_args = {} + if args is not None: + create_args.update(args) + + origin_resources_checked = self.check_origins( + dataset, topic_model, create_args, model_types=[TOPIC_MODEL_PATH], + wait_time=wait_time, retries=retries) + if origin_resources_checked: + body = json.dumps(create_args) + return self._create(self.batch_topic_distribution_url, body) + return None + + def get_batch_topic_distribution(self, batch_topic_distribution, + query_string=''): + """Retrieves a batch topic distribution. + + The batch_topic_distribution parameter should be a string + containing the batch_topic_distribution id or the dict + returned by create_batch_topic_distribution. + As batch_topic_distribution is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the batch_topic_distribution values + and state info available at the time it is called. + """ + check_resource_type(batch_topic_distribution, + BATCH_TOPIC_DISTRIBUTION_PATH, + message="A batch topic distribution id is needed.") + return self.get_resource(batch_topic_distribution, + query_string=query_string) + + def download_batch_topic_distribution(self, + batch_topic_distribution, + filename=None, retries=10): + """Retrieves the batch topic distribution file. + + Downloads topic distributions, that are stored in a remote CSV file. + If a path is given in filename, the contents of the file are + downloaded and saved locally. A file-like object is returned + otherwise. + """ + check_resource_type(batch_topic_distribution, + BATCH_TOPIC_DISTRIBUTION_PATH, + message="A batch topic distribution id is needed.") + return self._download_resource(batch_topic_distribution, filename, + retries=retries) + + def list_batch_topic_distributions(self, query_string=''): + """Lists all your batch topic distributions. + + """ + return self._list(self.batch_topic_distribution_url, query_string) + + def update_batch_topic_distribution(self, batch_topic_distribution, + changes): + """Updates a batch topic distributions. + + """ + check_resource_type(batch_topic_distribution, + BATCH_TOPIC_DISTRIBUTION_PATH, + message="A batch topic distribution id is needed.") + return self.update_resource(batch_topic_distribution, changes) + + def delete_batch_topic_distribution(self, batch_topic_distribution, + query_string=''): + """Deletes a batch topic distribution. + + """ + check_resource_type(batch_topic_distribution, + BATCH_TOPIC_DISTRIBUTION_PATH, + message="A batch topic distribution id is needed.") + return self.delete_resource(batch_topic_distribution, + query_string=query_string) diff --git a/bigml/api_handlers/centroidhandler.py b/bigml/api_handlers/centroidhandler.py new file mode 100644 index 00000000..d0455649 --- /dev/null +++ b/bigml/api_handlers/centroidhandler.py @@ -0,0 +1,123 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for centroids' REST calls + + https://bigml.com/api/centroids + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + get_resource_type, check_resource, get_cluster_id +from bigml.constants import CENTROID_PATH, CLUSTER_PATH, SPECIFIC_EXCLUDES, \ + IMAGE_FIELDS_FILTER + + +class CentroidHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the CentroidHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.centroid_url = self.prediction_base_url + CENTROID_PATH + + def create_centroid(self, cluster, input_data=None, + args=None, wait_time=3, retries=10): + """Creates a new centroid. + + """ + cluster_id = None + resource_type = get_resource_type(cluster) + if resource_type != CLUSTER_PATH: + raise Exception("A cluster id is needed to create a" + " centroid. %s found." % resource_type) + + cluster_id = get_cluster_id(cluster) + if cluster_id is None: + raise Exception("Failed to detect a correct cluster " + "structure in %s." % cluster) + + if isinstance(cluster, dict) and cluster.get("resource") is not None: + # retrieving fields info from model structure + model_info = cluster + else: + image_fields_filter = IMAGE_FIELDS_FILTER + "," + \ + ",".join(SPECIFIC_EXCLUDES[resource_type]) + model_info = check_resource(cluster_id, + query_string=image_fields_filter, + wait_time=wait_time, + retries=retries, + raise_on_error=True, + api=self) + + if input_data is None: + input_data = {} + create_args = {} + if args is not None: + create_args.update(args) + create_args.update({ + "input_data": self.prepare_image_fields(model_info, input_data)}) + create_args.update({ + "cluster": cluster_id}) + + body = json.dumps(create_args) + return self._create(self.centroid_url, body, + verify=self.domain.verify_prediction) + + def get_centroid(self, centroid, query_string=''): + """Retrieves a centroid. + + """ + check_resource_type(centroid, CENTROID_PATH, + message="A centroid id is needed.") + return self.get_resource(centroid, query_string=query_string) + + def list_centroids(self, query_string=''): + """Lists all your centroids. + + """ + return self._list(self.centroid_url, query_string) + + def update_centroid(self, centroid, changes): + """Updates a centroid. + + """ + check_resource_type(centroid, CENTROID_PATH, + message="A centroid id is needed.") + return self.update_resource(centroid, changes) + + def delete_centroid(self, centroid, query_string=''): + """Deletes a centroid. + + """ + check_resource_type(centroid, CENTROID_PATH, + message="A centroid id is needed.") + return self.delete_resource(centroid, query_string=query_string) diff --git a/bigml/api_handlers/clusterhandler.py b/bigml/api_handlers/clusterhandler.py new file mode 100644 index 00000000..ffc833eb --- /dev/null +++ b/bigml/api_handlers/clusterhandler.py @@ -0,0 +1,123 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for clusters' REST calls + + https://bigml.com/api/clusters + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready +from bigml.constants import CLUSTER_PATH + + +class ClusterHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the ClusterHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.cluster_url = self.url + CLUSTER_PATH + + def create_cluster(self, datasets, args=None, wait_time=3, retries=10): + """Creates a cluster from a `dataset` or a list o `datasets`. + + """ + create_args = self._set_create_from_datasets_args( + datasets, args=args, wait_time=wait_time, retries=retries) + + body = json.dumps(create_args) + return self._create(self.cluster_url, body) + + def get_cluster(self, cluster, query_string='', + shared_username=None, shared_api_key=None): + """Retrieves a cluster. + + The model parameter should be a string containing the + cluster id or the dict returned by create_cluster. + As cluster is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the cluster values and state info + available at the time it is called. + + If this is a shared cluster, the username and sharing api key must + also be provided. + """ + check_resource_type(cluster, CLUSTER_PATH, + message="A cluster id is needed.") + return self.get_resource(cluster, + query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) + + def cluster_is_ready(self, cluster, **kwargs): + """Checks whether a cluster's status is FINISHED. + + """ + check_resource_type(cluster, CLUSTER_PATH, + message="A cluster id is needed.") + resource = self.get_cluster(cluster, **kwargs) + return resource_is_ready(resource) + + def list_clusters(self, query_string=''): + """Lists all your clusters. + + """ + return self._list(self.cluster_url, query_string) + + def update_cluster(self, cluster, changes): + """Updates a cluster. + + """ + check_resource_type(cluster, CLUSTER_PATH, + message="A cluster id is needed.") + return self.update_resource(cluster, changes) + + def delete_cluster(self, cluster, query_string=''): + """Deletes a cluster. + + """ + check_resource_type(cluster, CLUSTER_PATH, + message="A cluster id is needed.") + return self.delete_resource(cluster, query_string=query_string) + + def clone_cluster(self, cluster, + args=None, wait_time=3, retries=10): + """Creates a cloned cluster from an existing `cluster` + + """ + create_args = self._set_clone_from_args( + cluster, "cluster", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.cluster_url, body) diff --git a/bigml/api_handlers/configurationhandler.py b/bigml/api_handlers/configurationhandler.py new file mode 100644 index 00000000..4e2e1ae1 --- /dev/null +++ b/bigml/api_handlers/configurationhandler.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for configurations' REST calls + + https://bigml.com/api/configurations + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type +from bigml.constants import CONFIGURATION_PATH + + +class ConfigurationHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the ConfigurationHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.configuration_url = self.url + CONFIGURATION_PATH + + def create_configuration(self, configurations, + args=None): + """Creates a configuration from a `configurations` dictionary. + + """ + if not isinstance(configurations, dict): + raise AttributeError("Failed to find a configuration dictionary as" + " first argument.") + if args is None: + args = {} + create_args = {"configurations": configurations} + create_args.update(args) + + body = json.dumps(create_args) + return self._create(self.configuration_url, body) + + def get_configuration(self, configuration, query_string=''): + """Retrieves a configuration. + + The configuration parameter should be a string containing the + configuration id or the dict returned by create_configuration. + """ + check_resource_type(configuration, CONFIGURATION_PATH, + message="A configuration id is needed.") + return self.get_resource(configuration, query_string=query_string) + + def list_configurations(self, query_string=''): + """Lists all your configurations. + + """ + return self._list(self.configuration_url, query_string) + + def update_configuration(self, configuration, changes): + """Updates a configuration. + + """ + check_resource_type(configuration, CONFIGURATION_PATH, + message="A configuration id is needed.") + return self.update_resource(configuration, changes) + + def delete_configuration(self, configuration, query_string=''): + """Deletes a configuration. + + """ + check_resource_type(configuration, CONFIGURATION_PATH, + message="A configuration id is needed.") + return self.delete_resource(configuration, query_string=query_string) diff --git a/bigml/api_handlers/correlationhandler.py b/bigml/api_handlers/correlationhandler.py new file mode 100644 index 00000000..29fedc23 --- /dev/null +++ b/bigml/api_handlers/correlationhandler.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for correlations' REST calls + + https://bigml.com/api/correlations + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + get_resource_type, get_dataset_id, check_resource +from bigml.constants import (CORRELATION_PATH, DATASET_PATH, + TINY_RESOURCE) + + +class CorrelationHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the correlations' REST calls. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the CorrelationHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.correlation_url = self.url + CORRELATION_PATH + + def create_correlation(self, dataset, args=None, wait_time=3, retries=10): + """Creates a correlation from a `dataset`. + + """ + dataset_id = None + resource_type = get_resource_type(dataset) + if resource_type == DATASET_PATH: + dataset_id = get_dataset_id(dataset) + check_resource(dataset_id, + query_string=TINY_RESOURCE, + wait_time=wait_time, retries=retries, + raise_on_error=True, api=self) + else: + raise Exception("A dataset id is needed to create a" + " correlation. %s found." % resource_type) + + create_args = {} + if args is not None: + create_args.update(args) + create_args.update({ + "dataset": dataset_id}) + + body = json.dumps(create_args) + return self._create(self.correlation_url, body) + + def get_correlation(self, correlation, query_string=''): + """Retrieves a correlation. + + The correlation parameter should be a string containing the + correlation id or the dict returned by create_correlation. + As correlation is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the correlation values and state info + available at the time it is called. + """ + check_resource_type(correlation, CORRELATION_PATH, + message="A correlation id is needed.") + return self.get_resource(correlation, query_string=query_string) + + def list_correlations(self, query_string=''): + """Lists all your correlations. + + """ + return self._list(self.correlation_url, query_string) + + def update_correlation(self, correlation, changes): + """Updates a correlation. + + """ + check_resource_type(correlation, CORRELATION_PATH, + message="A correlation id is needed.") + return self.update_resource(correlation, changes) + + def delete_correlation(self, correlation, query_string=''): + """Deletes a correlation. + + """ + check_resource_type(correlation, CORRELATION_PATH, + message="A correlation id is needed.") + return self.delete_resource(correlation, query_string=query_string) diff --git a/bigml/api_handlers/datasethandler.py b/bigml/api_handlers/datasethandler.py new file mode 100644 index 00000000..04ac3ec6 --- /dev/null +++ b/bigml/api_handlers/datasethandler.py @@ -0,0 +1,216 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for datasets' REST calls + + https://bigml.com/api/datasets + +""" + +try: + import simplejson as json +except ImportError: + import json + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + get_resource_type, resource_is_ready, check_resource, get_source_id, \ + get_dataset_id, get_cluster_id +from bigml.constants import (DATASET_PATH, SOURCE_PATH, + TINY_RESOURCE, CLUSTER_PATH) + + +class DatasetHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls to datasets. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the DatasetHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.dataset_url = self.url + DATASET_PATH + + def create_dataset(self, origin_resource, args=None, + wait_time=3, retries=10): + """Creates a remote dataset. + + Uses a remote resource to create a new dataset using the + arguments in `args`. + The allowed remote resources can be: + - source + - dataset + - list of datasets + - cluster + In the case of using cluster id as origin_resources, a centroid must + also be provided in the args argument. The first centroid is used + otherwise. + If `wait_time` is higher than 0 then the dataset creation + request is not sent until the `source` has been created successfuly. + + """ + create_args = {} + if args is not None: + create_args.update(args) + + if isinstance(origin_resource, list): + # mutidatasets + create_args = self._set_create_from_datasets_args( + origin_resource, args=create_args, wait_time=wait_time, + retries=retries, key="origin_datasets") + else: + # dataset from source + resource_type = get_resource_type(origin_resource) + if resource_type == SOURCE_PATH: + source_id = get_source_id(origin_resource) + if source_id: + check_resource(source_id, + query_string=TINY_RESOURCE, + wait_time=wait_time, + retries=retries, + raise_on_error=True, api=self) + create_args.update({ + "source": source_id}) + # dataset from dataset + elif resource_type == DATASET_PATH: + create_args = self._set_create_from_datasets_args( + origin_resource, args=create_args, wait_time=wait_time, + retries=retries, key="origin_dataset") + # dataset from cluster and centroid + elif resource_type == CLUSTER_PATH: + cluster_id = get_cluster_id(origin_resource) + cluster = check_resource(cluster_id, + query_string=TINY_RESOURCE, + wait_time=wait_time, + retries=retries, + raise_on_error=True, api=self) + if 'centroid' not in create_args: + try: + centroid = list(cluster['object'][ + 'cluster_datasets_ids'].keys())[0] + create_args.update({'centroid': centroid}) + except KeyError: + raise KeyError("Failed to generate the dataset. A " + "centroid id is needed in the args " + "argument to generate a dataset from " + "a cluster.") + create_args.update({'cluster': cluster_id}) + else: + raise Exception("A source, dataset, list of dataset ids" + " or cluster id plus centroid id are needed" + " to create a" + " dataset. %s found." % resource_type) + + body = json.dumps(create_args) + return self._create(self.dataset_url, body) + + def get_dataset(self, dataset, query_string=''): + """Retrieves a dataset. + + The dataset parameter should be a string containing the + dataset id or the dict returned by create_dataset. + As dataset is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the dataset values and state info + available at the time it is called. + """ + check_resource_type(dataset, DATASET_PATH, + message="A dataset id is needed.") + return self.get_resource(dataset, query_string=query_string) + + def dataset_is_ready(self, dataset): + """Check whether a dataset' status is FINISHED. + + """ + check_resource_type(dataset, DATASET_PATH, + message="A dataset id is needed.") + resource = self.get_dataset(dataset) + return resource_is_ready(resource) + + def list_datasets(self, query_string=''): + """Lists all your datasets. + + """ + return self._list(self.dataset_url, query_string) + + def update_dataset(self, dataset, changes): + """Updates a dataset. + + """ + check_resource_type(dataset, DATASET_PATH, + message="A dataset id is needed.") + return self.update_resource(dataset, changes) + + def delete_dataset(self, dataset, query_string=''): + """Deletes a dataset. + + """ + check_resource_type(dataset, DATASET_PATH, + message="A dataset id is needed.") + return self.delete_resource(dataset, query_string=query_string) + + def error_counts(self, dataset, raise_on_error=True): + """Returns the ids of the fields that contain errors and their number. + + The dataset argument can be either a dataset resource structure + or a dataset id (that will be used to retrieve the associated + remote resource). + + """ + errors_dict = {} + if not isinstance(dataset, dict) or 'object' not in dataset: + check_resource_type(dataset, DATASET_PATH, + message="A dataset id is needed.") + dataset_id = get_dataset_id(dataset) + dataset = check_resource(dataset_id, self.get_dataset, + raise_on_error=raise_on_error) + if not raise_on_error and dataset['error'] is not None: + dataset_id = None + else: + dataset_id = get_dataset_id(dataset) + if dataset_id: + errors = dataset.get('object', {}).get( + 'status', {}).get('field_errors', {}) + for field_id in errors: + errors_dict[field_id] = errors[field_id]['total'] + return errors_dict + + def download_dataset(self, dataset, filename=None, retries=10): + """Donwloads dataset contents to a csv file or file object + + """ + check_resource_type(dataset, DATASET_PATH, + message="A dataset id is needed.") + return self._download_resource(dataset, + filename, + retries=retries) + + def clone_dataset(self, dataset, + args=None, wait_time=3, retries=10): + """Creates a cloned dataset from an existing `dataset` + + """ + create_args = self._set_clone_from_args( + dataset, "dataset", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.dataset_url, body) diff --git a/bigml/api_handlers/deepnethandler.py b/bigml/api_handlers/deepnethandler.py new file mode 100644 index 00000000..ff966793 --- /dev/null +++ b/bigml/api_handlers/deepnethandler.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for deepnets' REST calls + + https://bigml.com/api/deepnets + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready +from bigml.constants import DEEPNET_PATH + + +class DeepnetHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the DeepnetHandler. This class is intended + to be used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.deepnet_url = self.url + DEEPNET_PATH + + def create_deepnet(self, datasets, + args=None, wait_time=3, retries=10): + """Creates a deepnet from a `dataset` + of a list o `datasets`. + + """ + create_args = self._set_create_from_datasets_args( + datasets, args=args, wait_time=wait_time, retries=retries) + + body = json.dumps(create_args) + return self._create(self.deepnet_url, body) + + def get_deepnet(self, deepnet, query_string='', + shared_username=None, shared_api_key=None): + """Retrieves a deepnet. + + The model parameter should be a string containing the + deepnet id or the dict returned by + create_deepnet. + As a deepnet is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the deepnet + values and state info available at the time it is called. + + If this is a shared deepnet, the username and + sharing api key must also be provided. + """ + check_resource_type(deepnet, DEEPNET_PATH, + message="A deepnet id is needed.") + return self.get_resource(deepnet, + query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) + + def deepnet_is_ready(self, deepnet, **kwargs): + """Checks whether a deepnet's status is FINISHED. + + """ + check_resource_type(deepnet, DEEPNET_PATH, + message="A deepnet id is needed.") + resource = self.get_deepnet(deepnet, **kwargs) + return resource_is_ready(resource) + + def list_deepnets(self, query_string=''): + """Lists all your deepnets. + + """ + return self._list(self.deepnet_url, query_string) + + def update_deepnet(self, deepnet, changes): + """Updates a deepnet. + + """ + check_resource_type(deepnet, DEEPNET_PATH, + message="A deepnet id is needed.") + return self.update_resource(deepnet, changes) + + def delete_deepnet(self, deepnet, query_string=''): + """Deletes a deepnet. + + """ + check_resource_type(deepnet, DEEPNET_PATH, + message="A deepnet id is needed.") + return self.delete_resource(deepnet, query_string=query_string) + + def clone_deepnet(self, deepnet, + args=None, wait_time=3, retries=10): + """Creates a cloned deepnet from an existing `deepnet` + + """ + create_args = self._set_clone_from_args( + deepnet, "deepnet", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.deepnet_url, body) diff --git a/bigml/api_handlers/ensemblehandler.py b/bigml/api_handlers/ensemblehandler.py new file mode 100644 index 00000000..6ebd035e --- /dev/null +++ b/bigml/api_handlers/ensemblehandler.py @@ -0,0 +1,120 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for ensembles' REST calls + + https://bigml.com/api/ensembles + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready +from bigml.constants import ENSEMBLE_PATH + + +class EnsembleHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the EnsembleHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.ensemble_url = self.url + ENSEMBLE_PATH + + def create_ensemble(self, datasets, args=None, wait_time=3, retries=10): + """Creates an ensemble from a dataset or a list of datasets. + + """ + + create_args = self._set_create_from_datasets_args( + datasets, args=args, wait_time=wait_time, retries=retries) + + body = json.dumps(create_args) + return self._create(self.ensemble_url, body) + + def get_ensemble(self, ensemble, query_string='', + shared_username=None, shared_api_key=None): + """Retrieves an ensemble. + + The ensemble parameter should be a string containing the + ensemble id or the dict returned by create_ensemble. + As an ensemble is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the ensemble values and state info + available at the time it is called. + """ + check_resource_type(ensemble, ENSEMBLE_PATH, + message="An ensemble id is needed.") + return self.get_resource(ensemble, query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) + + def ensemble_is_ready(self, ensemble): + """Checks whether a ensemble's status is FINISHED. + + """ + check_resource_type(ensemble, ENSEMBLE_PATH, + message="An ensemble id is needed.") + resource = self.get_ensemble(ensemble) + return resource_is_ready(resource) + + def list_ensembles(self, query_string=''): + """Lists all your ensembles. + + """ + return self._list(self.ensemble_url, query_string) + + def update_ensemble(self, ensemble, changes): + """Updates a ensemble. + + """ + check_resource_type(ensemble, ENSEMBLE_PATH, + message="An ensemble id is needed.") + return self.update_resource(ensemble, changes) + + def delete_ensemble(self, ensemble, query_string=''): + """Deletes a ensemble. + + """ + check_resource_type(ensemble, ENSEMBLE_PATH, + message="An ensemble id is needed.") + return self.delete_resource(ensemble, query_string=query_string) + + def clone_ensemble(self, ensemble, + args=None, wait_time=3, retries=10): + """Creates a cloned ensemble from an existing `ensemble` + + """ + create_args = self._set_clone_from_args( + ensemble, "ensemble", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.ensemble_url, body) diff --git a/bigml/api_handlers/evaluationhandler.py b/bigml/api_handlers/evaluationhandler.py new file mode 100644 index 00000000..82b224d4 --- /dev/null +++ b/bigml/api_handlers/evaluationhandler.py @@ -0,0 +1,105 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for evaluations' REST calls + + https://bigml.com/api/evaluations + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type +from bigml.constants import SUPERVISED_PATHS, TIME_SERIES_PATH, EVALUATION_PATH + + +class EvaluationHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the EvaluationHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.evaluation_url = self.url + EVALUATION_PATH + + def create_evaluation(self, model, dataset, + args=None, wait_time=3, retries=10): + """Creates a new evaluation. + + """ + create_args = {} + if args is not None: + create_args.update(args) + + model_types = SUPERVISED_PATHS[:] + model_types.append(TIME_SERIES_PATH) + + origin_resources_checked = self.check_origins( + dataset, model, create_args, model_types=model_types, + wait_time=wait_time, retries=retries) + + if origin_resources_checked: + body = json.dumps(create_args) + return self._create(self.evaluation_url, body) + return None + + def get_evaluation(self, evaluation, query_string=''): + """Retrieves an evaluation. + + The evaluation parameter should be a string containing the + evaluation id or the dict returned by create_evaluation. + As evaluation is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the evaluation values and state info + available at the time it is called. + """ + check_resource_type(evaluation, EVALUATION_PATH, + message="An evaluation id is needed.") + return self.get_resource(evaluation, query_string=query_string) + + def list_evaluations(self, query_string=''): + """Lists all your evaluations. + + """ + return self._list(self.evaluation_url, query_string) + + def update_evaluation(self, evaluation, changes): + """Updates an evaluation. + + """ + check_resource_type(evaluation, EVALUATION_PATH, + message="An evaluation id is needed.") + return self.update_resource(evaluation, changes) + + def delete_evaluation(self, evaluation, query_string=''): + """Deletes an evaluation. + + """ + check_resource_type(evaluation, EVALUATION_PATH, + message="An evaluation id is needed.") + return self.delete_resource(evaluation, query_string=query_string) diff --git a/bigml/api_handlers/executionhandler.py b/bigml/api_handlers/executionhandler.py new file mode 100644 index 00000000..2fbf6f7e --- /dev/null +++ b/bigml/api_handlers/executionhandler.py @@ -0,0 +1,130 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for whizzml script executions' REST calls + + https://bigml.com/api/executions + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + get_resource_type, get_script_id, check_resource +from bigml.constants import (EXECUTION_PATH, SCRIPT_PATH, + TINY_RESOURCE) + + +class ExecutionHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the executions' REST calls. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the ExecutionHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.execution_url = self.url + EXECUTION_PATH + + def create_execution(self, origin_resource, args=None, + wait_time=3, retries=10): + """Creates an execution from a `script` or a list of `scripts`. + + """ + + create_args = {} + if args is not None: + create_args.update(args) + + if isinstance(origin_resource, (dict, str)): + # single script + scripts = [origin_resource] + else: + scripts = origin_resource + try: + script_ids = [get_script_id(script) for script in scripts] + except TypeError: + raise Exception("A script id or a list of them is needed to create" + " a script execution. %s found." % + get_resource_type(origin_resource)) + + if all(get_resource_type(script_id) == SCRIPT_PATH for + script_id in script_ids): + for script in scripts: + check_resource(script, + query_string=TINY_RESOURCE, + wait_time=wait_time, retries=retries, + raise_on_error=True, api=self) + else: + raise Exception("A script id or a list of them is needed to create" + " a script execution. %s found." % + get_resource_type(origin_resource)) + + if len(scripts) > 1: + create_args.update({ + "scripts": script_ids}) + else: + create_args.update({ + "script": script_ids[0]}) + + body = json.dumps(create_args) + return self._create(self.execution_url, body) + + def get_execution(self, execution, query_string=''): + """Retrieves an execution. + + The execution parameter should be a string containing the + execution id or the dict returned by create_execution. + As execution is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the execution contents and state info + available at the time it is called. + """ + check_resource_type(execution, EXECUTION_PATH, + message="An execution id is needed.") + return self.get_resource(execution, query_string=query_string) + + def list_executions(self, query_string=''): + """Lists all your executions. + + """ + return self._list(self.execution_url, query_string) + + def update_execution(self, execution, changes): + """Updates an execution. + + """ + check_resource_type(execution, EXECUTION_PATH, + message="An execution id is needed.") + return self.update_resource(execution, changes) + + def delete_execution(self, execution, query_string=''): + """Deletes an execution. + + """ + check_resource_type(execution, EXECUTION_PATH, + message="An execution id is needed.") + return self.delete_resource(execution, query_string=query_string) diff --git a/bigml/api_handlers/externalconnectorhandler.py b/bigml/api_handlers/externalconnectorhandler.py new file mode 100644 index 00000000..7d33a58e --- /dev/null +++ b/bigml/api_handlers/externalconnectorhandler.py @@ -0,0 +1,130 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2020-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for external connectors' REST calls + + https://bigml.com/api/externalconnectors + +""" +import os +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type +from bigml.constants import EXTERNAL_CONNECTOR_PATH, \ + EXTERNAL_CONNECTION_ATTRS + + +def get_env_connection_info(): + """Retrieves the information to use in the external connection from + environment variables. + + """ + # try to use environment variables values + connection_info = {} + for external_key in list(EXTERNAL_CONNECTION_ATTRS.keys()): + if os.environ.get(external_key): + connection_info.update( \ + {EXTERNAL_CONNECTION_ATTRS[external_key]: + os.environ.get(external_key)}) + return connection_info + + +class ExternalConnectorHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the external connectors' REST calls. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the ExternalConnectorHandler. This class is intended to + be used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.external_connector_url = self.url + EXTERNAL_CONNECTOR_PATH + + def create_external_connector(self, connection_info, args=None): + """Creates an external connections from a dictionary containing the + connection information. + + """ + + create_args = {} + if args is not None: + create_args.update(args) + + if connection_info is None: + connection_info = get_env_connection_info() + + if not isinstance(connection_info, dict): + raise Exception("To create an external connector you need to" + " provide a dictionary with the connection" + " information. Please refer to the API external" + " connector docs for details.") + + source = connection_info.get("source", "postgresql") + if "source" in connection_info: + del connection_info["source"] + + create_args.update({"connection": connection_info}) + create_args.update({"source": source}) + body = json.dumps(create_args) + return self._create(self.external_connector_url, body) + + def get_external_connector(self, external_connector, query_string=''): + """Retrieves an external connector. + + The external connector parameter should be a string containing the + external connector id or the dict returned by + create_external_connector. + As an external connector is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the connector contents and state info + available at the time it is called. + """ + check_resource_type(external_connector, EXTERNAL_CONNECTOR_PATH, + message="An external connector id is needed.") + return self.get_resource(external_connector, query_string=query_string) + + def list_external_connectors(self, query_string=''): + """Lists all your external connectors. + + """ + return self._list(self.external_connector_url, query_string) + + def update_external_connector(self, external_connector, changes): + """Updates an external connector. + + """ + check_resource_type(external_connector, EXTERNAL_CONNECTOR_PATH, + message="An external connector id is needed.") + return self.update_resource(external_connector, changes) + + def delete_external_connector(self, external_connector, query_string=''): + """Deletes an external connector. + + """ + check_resource_type(external_connector, EXTERNAL_CONNECTOR_PATH, + message="An external connector id is needed.") + return self.delete_resource(external_connector, + query_string=query_string) diff --git a/bigml/api_handlers/forecasthandler.py b/bigml/api_handlers/forecasthandler.py new file mode 100644 index 00000000..cfaba279 --- /dev/null +++ b/bigml/api_handlers/forecasthandler.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for forecasts' REST calls + + https://bigml.com/api/forecasts + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + check_resource, get_time_series_id, get_resource_type +from bigml.constants import (FORECAST_PATH, TIME_SERIES_PATH, TINY_RESOURCE) + +class ForecastHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the ForecastHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.forecast_url = self.prediction_base_url + FORECAST_PATH + + def create_forecast(self, time_series, input_data=None, + args=None, wait_time=3, retries=10): + """Creates a new forecast. + + """ + time_series_id = get_time_series_id(time_series) + resource_type = get_resource_type(time_series_id) + if resource_type == TIME_SERIES_PATH and time_series_id is not None: + check_resource(time_series_id, + query_string=TINY_RESOURCE, + wait_time=wait_time, retries=retries, + raise_on_error=True, api=self) + else: + raise Exception("A time series model id is needed to create a" + " forecast. %s found." % resource_type) + + if input_data is None: + input_data = {} + create_args = {} + if args is not None: + create_args.update(args) + create_args.update({ + "input_data": input_data}) + if time_series_id is not None: + create_args.update({ + "timeseries": time_series_id}) + + body = json.dumps(create_args) + return self._create(self.forecast_url, body, + verify=self.domain.verify_prediction) + + def get_forecast(self, forecast, query_string=''): + """Retrieves a forecast. + + """ + check_resource_type(forecast, FORECAST_PATH, + message="A forecast id is needed.") + return self.get_resource(forecast, query_string=query_string) + + def list_forecasts(self, query_string=''): + """Lists all your forecasts. + + """ + return self._list(self.forecast_url, query_string) + + def update_forecast(self, forecast, changes): + """Updates a forecast. + + """ + check_resource_type(forecast, FORECAST_PATH, + message="A forecast id is needed.") + return self.update_resource(forecast, changes) + + def delete_forecast(self, forecast, query_string=''): + """Deletes a forecast. + + """ + check_resource_type(forecast, FORECAST_PATH, + message="A forecast id is needed.") + return self.delete_resource(forecast, query_string=query_string) diff --git a/bigml/api_handlers/fusionhandler.py b/bigml/api_handlers/fusionhandler.py new file mode 100644 index 00000000..90e22ee7 --- /dev/null +++ b/bigml/api_handlers/fusionhandler.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2018-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for fusion's REST calls + + https://bigml.com/api/fusions + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready +from bigml.constants import FUSION_PATH, SUPERVISED_PATHS + + +class FusionHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the FusionHandler. This class is intended + to be used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.fusion_url = self.url + FUSION_PATH + + def create_fusion(self, models, + args=None, wait_time=3, retries=10): + """Creates a fusion from a list of supervised models + + """ + create_args = self._set_create_from_models_args( + models, SUPERVISED_PATHS, + args=args, wait_time=wait_time, retries=retries) + + body = json.dumps(create_args) + return self._create(self.fusion_url, body) + + def get_fusion(self, fusion, query_string='', + shared_username=None, shared_api_key=None): + """Retrieves a fusion. + + The model parameter should be a string containing the + fusion id or the dict returned by + create_fusion. + As a fusion is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the fusion + values and state info available at the time it is called. + + If this is a shared fusion, the username and + sharing api key must also be provided. + """ + check_resource_type(fusion, FUSION_PATH, + message="A fusion id is needed.") + return self.get_resource(fusion, + query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) + + def fusion_is_ready(self, fusion, **kwargs): + """Checks whether a fusion's status is FINISHED. + + """ + check_resource_type(fusion, FUSION_PATH, + message="A fusion id is needed.") + resource = self.get_fusion(fusion, **kwargs) + return resource_is_ready(resource) + + def list_fusions(self, query_string=''): + """Lists all your fusions. + + """ + return self._list(self.fusion_url, query_string) + + def update_fusion(self, fusion, changes): + """Updates a fusion. + + """ + check_resource_type(fusion, FUSION_PATH, + message="A fusion id is needed.") + return self.update_resource(fusion, changes) + + def clone_fusion(self, fusion, + args=None, wait_time=3, retries=10): + """Creates a cloned fusion from an existing `fusion` + + """ + create_args = self._set_clone_from_args( + fusion, "fusion", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.fusion_url, body) + + def delete_fusion(self, fusion, query_string=''): + """Deletes a fusion. + + """ + check_resource_type(fusion, FUSION_PATH, + message="A fusion id is needed.") + return self.delete_resource(fusion, query_string=query_string) diff --git a/bigml/api_handlers/libraryhandler.py b/bigml/api_handlers/libraryhandler.py new file mode 100644 index 00000000..36055eee --- /dev/null +++ b/bigml/api_handlers/libraryhandler.py @@ -0,0 +1,130 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for whizzml libraries' REST calls + + https://bigml.com/api/libraries + +""" + +try: + import simplejson as json +except ImportError: + import json +import os + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + get_library_id, get_resource_type, check_resource +from bigml.constants import LIBRARY_PATH, TINY_RESOURCE + + +class LibraryHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the whizzml libraries' REST calls. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the LibraryHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.library_url = self.url + LIBRARY_PATH + + def create_library(self, source_code=None, args=None, + wait_time=3, retries=10): + """Creates a whizzml library from its source code. The `source_code` + parameter can be a: + {library ID}: the ID for an existing whizzml library + {path}: the path to a file containing the source code + {string} : the string containing the source code for the library + + """ + create_args = {} + if args is not None: + create_args.update(args) + + if source_code is None: + raise Exception('A valid code string' + ' or a library id must be provided.') + resource_type = get_resource_type(source_code) + if resource_type == LIBRARY_PATH: + library_id = get_library_id(source_code) + if library_id: + check_resource(library_id, + query_string=TINY_RESOURCE, + wait_time=wait_time, retries=retries, + raise_on_error=True, api=self) + create_args.update({ + "origin": library_id}) + elif isinstance(source_code, str): + try: + if os.path.exists(source_code): + with open(source_code) as code_file: + source_code = code_file.read() + except IOError: + raise IOError("Could not open the source code file %s." % + source_code) + create_args.update({ + "source_code": source_code}) + else: + raise Exception("A library id or a valid source code" + " is needed to create a" + " library. %s found." % resource_type) + + + body = json.dumps(create_args) + return self._create(self.library_url, body) + + def get_library(self, library, query_string=''): + """Retrieves a library. + + The library parameter should be a string containing the + library id or the dict returned by create_script. + As library is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the library content and state info + available at the time it is called. + """ + check_resource_type(library, LIBRARY_PATH, + message="A library id is needed.") + return self.get_resource(library, query_string=query_string) + + def list_libraries(self, query_string=''): + """Lists all your libraries. + + """ + return self._list(self.library_url, query_string) + + def update_library(self, library, changes): + """Updates a library. + + """ + check_resource_type(library, LIBRARY_PATH, + message="A library id is needed.") + return self.update_resource(library, changes) + + def delete_library(self, library, query_string=''): + """Deletes a library. + + """ + check_resource_type(library, LIBRARY_PATH, + message="A library id is needed.") + return self.delete_resource(library, query_string=query_string) diff --git a/bigml/api_handlers/linearhandler.py b/bigml/api_handlers/linearhandler.py new file mode 100644 index 00000000..3f24a5f8 --- /dev/null +++ b/bigml/api_handlers/linearhandler.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2019-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for linear regressions' REST calls + + https://bigml.com/api/linearregressions + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready +from bigml.constants import LINEAR_REGRESSION_PATH + + +class LinearRegressionHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the LinearRegressionHandler. This class is intended + to be used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.linear_regression_url = self.url + LINEAR_REGRESSION_PATH + + def create_linear_regression(self, datasets, + args=None, wait_time=3, retries=10): + """Creates a linear regression from a `dataset` + of a list o `datasets`. + + """ + create_args = self._set_create_from_datasets_args( + datasets, args=args, wait_time=wait_time, retries=retries) + + body = json.dumps(create_args) + return self._create(self.linear_regression_url, body) + + def get_linear_regression(self, linear_regression, query_string='', + shared_username=None, shared_api_key=None): + """Retrieves a linear regression. + + The model parameter should be a string containing the + linear regression id or the dict returned by + create_linear_regression. + As a linear regression is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the linear regression + values and state info available at the time it is called. + + If this is a shared linear regression, the username and + sharing api key must also be provided. + """ + check_resource_type(linear_regression, LINEAR_REGRESSION_PATH, + message="A linear regression id is needed.") + return self.get_resource(linear_regression, + query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) + + def linear_regression_is_ready(self, linear_regression, **kwargs): + """Checks whether a linear regressioin's status is FINISHED. + + """ + check_resource_type(linear_regression, LINEAR_REGRESSION_PATH, + message="A linear regression id is needed.") + resource = self.get_linear_regression(linear_regression, **kwargs) + return resource_is_ready(resource) + + def list_linear_regressions(self, query_string=''): + """Lists all your linear regressions. + + """ + return self._list(self.linear_regression_url, query_string) + + def update_linear_regression(self, linear_regression, changes): + """Updates a linear regression. + + """ + check_resource_type(linear_regression, LINEAR_REGRESSION_PATH, + message="A linear regression id is needed.") + return self.update_resource(linear_regression, changes) + + def delete_linear_regression(self, linear_regression, query_string=''): + """Deletes a linear regression. + + """ + check_resource_type(linear_regression, LINEAR_REGRESSION_PATH, + message="A linear regression id is needed.") + return self.delete_resource(linear_regression, + query_string=query_string) + + def clone_linear_regression(self, linear_regression, + args=None, wait_time=3, retries=10): + """Creates a cloned linear regression from an existing `linear regression` + + """ + create_args = self._set_clone_from_args( + linear_regression, "linearregression", + args=args, wait_time=wait_time, retries=retries) + + body = json.dumps(create_args) + return self._create(self.linear_regression_url, body) diff --git a/bigml/api_handlers/logistichandler.py b/bigml/api_handlers/logistichandler.py new file mode 100644 index 00000000..744422bf --- /dev/null +++ b/bigml/api_handlers/logistichandler.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for logistic regressions' REST calls + + https://bigml.com/api/logisticregressions + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready +from bigml.constants import LOGISTIC_REGRESSION_PATH + + +class LogisticRegressionHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the LogisticRegressionHandler. This class is intended + to be used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.logistic_regression_url = self.url + LOGISTIC_REGRESSION_PATH + + def create_logistic_regression(self, datasets, + args=None, wait_time=3, retries=10): + """Creates a logistic regression from a `dataset` + of a list o `datasets`. + + """ + create_args = self._set_create_from_datasets_args( + datasets, args=args, wait_time=wait_time, retries=retries) + + body = json.dumps(create_args) + return self._create(self.logistic_regression_url, body) + + def get_logistic_regression(self, logistic_regression, query_string='', + shared_username=None, shared_api_key=None): + """Retrieves a logistic regression. + + The model parameter should be a string containing the + logistic regression id or the dict returned by + create_logistic_regression. + As a logistic regression is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the logistic regression + values and state info available at the time it is called. + + If this is a shared logistic regression, the username and + sharing api key must also be provided. + """ + check_resource_type(logistic_regression, LOGISTIC_REGRESSION_PATH, + message="A logistic regression id is needed.") + return self.get_resource(logistic_regression, + query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) + + def logistic_regression_is_ready(self, logistic_regression, **kwargs): + """Checks whether a logistic regressioin's status is FINISHED. + + """ + check_resource_type(logistic_regression, LOGISTIC_REGRESSION_PATH, + message="A logistic regression id is needed.") + resource = self.get_logistic_regression(logistic_regression, **kwargs) + return resource_is_ready(resource) + + def list_logistic_regressions(self, query_string=''): + """Lists all your logistic regressions. + + """ + return self._list(self.logistic_regression_url, query_string) + + def update_logistic_regression(self, logistic_regression, changes): + """Updates a logistic regression. + + """ + check_resource_type(logistic_regression, LOGISTIC_REGRESSION_PATH, + message="A logistic regression id is needed.") + return self.update_resource(logistic_regression, changes) + + def delete_logistic_regression(self, logistic_regression, query_string=''): + """Deletes a logistic regression. + + """ + check_resource_type(logistic_regression, LOGISTIC_REGRESSION_PATH, + message="A logistic regression id is needed.") + return self.delete_resource(logistic_regression, + query_string=query_string) + + def clone_logistic_regression(self, logistic_regression, + args=None, wait_time=3, retries=10): + """Creates a cloned logistic regression from an existing `logistic regression` + + """ + create_args = self._set_clone_from_args( + logistic_regression, "logisticregression", + args=args, wait_time=wait_time, retries=retries) + + body = json.dumps(create_args) + return self._create(self.logistic_regression_url, body) diff --git a/bigml/api_handlers/modelhandler.py b/bigml/api_handlers/modelhandler.py new file mode 100644 index 00000000..0a94d342 --- /dev/null +++ b/bigml/api_handlers/modelhandler.py @@ -0,0 +1,174 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for models' REST calls + + https://bigml.com/api/models + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready, get_resource_type, check_resource, \ + get_cluster_id +from bigml.constants import (MODEL_PATH, CLUSTER_PATH, DATASET_PATH, + TINY_RESOURCE) + + +class ModelHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the ModelHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.model_url = self.url + MODEL_PATH + + def create_model(self, origin_resource, args=None, wait_time=3, retries=10): + """Creates a model from an origin_resource. + + Uses a remote resource to create a new model using the + arguments in `args`. + The allowed remote resources can be: + - dataset + - list of datasets + - cluster + In the case of using cluster id as origin_resource, a centroid must + also be provided in the args argument. The first centroid is used + otherwise. + + """ + + create_args = {} + if args is not None: + create_args.update(args) + if isinstance(origin_resource, list): + # mutidatasets + create_args = self._set_create_from_datasets_args( + origin_resource, args=create_args, wait_time=wait_time, + retries=retries) + else: + resource_type = get_resource_type(origin_resource) + # model from cluster and centroid + if resource_type == CLUSTER_PATH: + cluster_id = get_cluster_id(origin_resource) + cluster = check_resource(cluster_id, + query_string=TINY_RESOURCE, + wait_time=wait_time, + retries=retries, + raise_on_error=True, api=self) + if 'centroid' not in create_args: + try: + centroid = list(cluster['object'][ + 'cluster_models'].keys())[0] + create_args.update({'centroid': centroid}) + except KeyError: + raise KeyError("Failed to generate the model. A " + "centroid id is needed in the args " + "argument to generate a model from " + "a cluster.") + create_args.update({'cluster': cluster_id}) + elif resource_type == DATASET_PATH: + create_args = self._set_create_from_datasets_args( + origin_resource, args=create_args, wait_time=wait_time, + retries=retries) + else: + raise Exception("A dataset, list of dataset ids" + " or cluster id plus centroid id are needed" + " to create a" + " dataset. %s found." % resource_type) + + body = json.dumps(create_args) + return self._create(self.model_url, body) + + def get_model(self, model, query_string='', + shared_username=None, shared_api_key=None): + """Retrieves a model. + + The model parameter should be a string containing the + model id or the dict returned by create_model. + As model is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the model values and state info + available at the time it is called. + + If this is a shared model, the username and sharing api key must + also be provided. + If it's a model inside an ensemble or fusion, the shared_ref is + needed. + """ + check_resource_type(model, MODEL_PATH, + message="A model id is needed.") + return self.get_resource(model, + query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) + + def model_is_ready(self, model, **kwargs): + """Checks whether a model's status is FINISHED. + + """ + check_resource_type(model, MODEL_PATH, + message="A model id is needed.") + resource = self.get_model(model, **kwargs) + return resource_is_ready(resource) + + def list_models(self, query_string=''): + """Lists all your models. + + """ + return self._list(self.model_url, query_string) + + def update_model(self, model, changes): + """Updates a model. + + """ + check_resource_type(model, MODEL_PATH, + message="A model id is needed.") + return self.update_resource(model, changes) + + def delete_model(self, model, query_string=''): + """Deletes a model. + + """ + check_resource_type(model, MODEL_PATH, + message="A model id is needed.") + return self.delete_resource(model, query_string=query_string) + + def clone_model(self, model, + args=None, wait_time=3, retries=10): + """Creates a cloned model from an existing `model` + + """ + create_args = self._set_clone_from_args( + model, "model", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.model_url, body) diff --git a/bigml/api_handlers/optimlhandler.py b/bigml/api_handlers/optimlhandler.py new file mode 100644 index 00000000..cd5853d5 --- /dev/null +++ b/bigml/api_handlers/optimlhandler.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2018-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for optiml's REST calls + + https://bigml.com/api/optimls + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready +from bigml.constants import OPTIML_PATH + + +class OptimlHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the OptimlHandler. This class is intended + to be used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.optiml_url = self.url + OPTIML_PATH + + def create_optiml(self, datasets, + args=None, wait_time=3, retries=10): + """Creates an optiml from a `dataset` + of a list o `datasets`. + + """ + create_args = self._set_create_from_datasets_args( + datasets, args=args, wait_time=wait_time, retries=retries) + + body = json.dumps(create_args) + return self._create(self.optiml_url, body) + + def get_optiml(self, optiml, query_string='', + shared_username=None, shared_api_key=None): + """Retrieves an optiml. + + The model parameter should be a string containing the + optiml id or the dict returned by + create_optiml. + As an optiml is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the optiml + values and state info available at the time it is called. + + If this is a shared optiml, the username and + sharing api key must also be provided. + """ + check_resource_type(optiml, OPTIML_PATH, + message="An optiml id is needed.") + return self.get_resource(optiml, + query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) + + def optiml_is_ready(self, optiml, **kwargs): + """Checks whether an optiml's status is FINISHED. + + """ + check_resource_type(optiml, OPTIML_PATH, + message="An optiml id is needed.") + resource = self.get_optiml(optiml, **kwargs) + return resource_is_ready(resource) + + def list_optimls(self, query_string=''): + """Lists all your optimls. + + """ + return self._list(self.optiml_url, query_string) + + def update_optiml(self, optiml, changes): + """Updates an optiml. + + """ + check_resource_type(optiml, OPTIML_PATH, + message="An optiml id is needed.") + return self.update_resource(optiml, changes) + + def delete_optiml(self, optiml, query_string=''): + """Deletes an optiml. + + """ + check_resource_type(optiml, OPTIML_PATH, + message="An optiml id is needed.") + return self.delete_resource(optiml, query_string=query_string) diff --git a/bigml/api_handlers/pcahandler.py b/bigml/api_handlers/pcahandler.py new file mode 100644 index 00000000..933d73da --- /dev/null +++ b/bigml/api_handlers/pcahandler.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2018-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for PCA' REST calls + + https://bigml.com/api/pcas + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready +from bigml.constants import PCA_PATH + + +class PCAHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the PCAHandler. This class is intended + to be used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.pca_url = self.url + PCA_PATH + + def create_pca(self, datasets, args=None, wait_time=3, retries=10): + """Creates a PCA from a `dataset` + of a list o `datasets`. + + """ + create_args = self._set_create_from_datasets_args( + datasets, args=args, wait_time=wait_time, retries=retries) + + body = json.dumps(create_args) + return self._create(self.pca_url, body) + + def get_pca(self, pca, query_string='', + shared_username=None, shared_api_key=None): + """Retrieves a PCA. + + The model parameter should be a string containing the + PCA id or the dict returned by create_pca. + As a PCA is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the PCA + values and state info available at the time it is called. + + If this is a shared PCA, the username and + sharing api key must also be provided. + """ + check_resource_type(pca, PCA_PATH, + message="A PCA id is needed.") + return self.get_resource(pca, + query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) + + def pca_is_ready(self, pca, **kwargs): + """Checks whether a pca's status is FINISHED. + + """ + check_resource_type(pca, PCA_PATH, + message="A PCA id is needed.") + resource = self.get_pca(pca, **kwargs) + return resource_is_ready(resource) + + def list_pcas(self, query_string=''): + """Lists all your PCAs. + + """ + return self._list(self.pca_url, query_string) + + def update_pca(self, pca, changes): + """Updates a PCA. + + """ + check_resource_type(pca, PCA_PATH, + message="A PCA id is needed.") + return self.update_resource(pca, changes) + + def delete_pca(self, pca, query_string=''): + """Deletes a PCA. + + """ + check_resource_type(pca, PCA_PATH, + message="A PCA id is needed.") + return self.delete_resource(pca, query_string=query_string) + + def clone_pca(self, pca, + args=None, wait_time=3, retries=10): + """Creates a cloned PCA from an existing `PCA` + + """ + create_args = self._set_clone_from_args( + pca, "pca", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.pca_url, body) diff --git a/bigml/api_handlers/predictionhandler.py b/bigml/api_handlers/predictionhandler.py new file mode 100644 index 00000000..c2c160b2 --- /dev/null +++ b/bigml/api_handlers/predictionhandler.py @@ -0,0 +1,134 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for predictions' REST calls + + https://bigml.com/api/predictions + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + check_resource, get_resource_id, get_resource_type +from bigml.constants import SUPERVISED_PATHS, IMAGE_FIELDS_FILTER, \ + PREDICTION_PATH, SPECIFIC_EXCLUDES + + +class PredictionHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the PredictionHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.prediction_url = self.prediction_base_url + PREDICTION_PATH + + def create_prediction(self, model, input_data=None, + args=None, wait_time=3, retries=10): + """Creates a new prediction. + The model parameter can be: + - a simple tree model + - a simple logistic regression model + - an ensemble + - a deepnet + . a linear regression + - a fusion + Note that the old `by_name` argument has been deprecated. + + """ + model_id = None + + resource_type = get_resource_type(model) + if resource_type not in SUPERVISED_PATHS: + raise Exception("A supervised model resource id is needed" + " to create a prediction. %s found." % + resource_type) + + model_id = get_resource_id(model) + if model_id is None: + raise Exception("Failed to detect a correct model structure" + " in %s." % model) + + if isinstance(model, dict) and model.get("resource") is not None: + # retrieving fields info from model structure + model_info = model + else: + image_fields_filter = IMAGE_FIELDS_FILTER + "," + \ + ",".join(SPECIFIC_EXCLUDES[resource_type]) + model_info = check_resource(model_id, + query_string=image_fields_filter, + wait_time=wait_time, + retries=retries, + raise_on_error=True, + api=self) + + if input_data is None: + input_data = {} + create_args = {} + if args is not None: + create_args.update(args) + create_args.update({ + "input_data": self.prepare_image_fields(model_info, input_data)}) + if model_id is not None: + create_args.update({ + "model": model_id}) + + body = json.dumps(create_args) + return self._create(self.prediction_url, body, + verify=self.domain.verify_prediction) + + def get_prediction(self, prediction, query_string=''): + """Retrieves a prediction. + + """ + check_resource_type(prediction, PREDICTION_PATH, + message="A prediction id is needed.") + return self.get_resource(prediction, query_string=query_string) + + def list_predictions(self, query_string=''): + """Lists all your predictions. + + """ + return self._list(self.prediction_url, query_string) + + def update_prediction(self, prediction, changes): + """Updates a prediction. + + """ + check_resource_type(prediction, PREDICTION_PATH, + message="A prediction id is needed.") + return self.update_resource(prediction, changes) + + def delete_prediction(self, prediction, query_string=''): + """Deletes a prediction. + + """ + check_resource_type(prediction, PREDICTION_PATH, + message="A prediction id is needed.") + return self.delete_resource(prediction, query_string=query_string) diff --git a/bigml/api_handlers/projecthandler.py b/bigml/api_handlers/projecthandler.py new file mode 100644 index 00000000..3c3b7a51 --- /dev/null +++ b/bigml/api_handlers/projecthandler.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for projects' REST calls + + https://bigml.com/api/projects + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type +from bigml.constants import PROJECT_PATH + + +class ProjectHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the ProjectHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.project_url = self.url + PROJECT_PATH + + def create_project(self, args=None): + """Creates a project. + + """ + if args is None: + args = {} + body = json.dumps(args) + return self._create(self.project_url, body, organization=True) + + def get_project(self, project, query_string=''): + """Retrieves a project. + + The project parameter should be a string containing the + project id or the dict returned by create_project. + As every resource, is an evolving object that is processed + until it reaches the FINISHED or FAULTY state. The function will + return a dict that encloses the project values and state info + available at the time it is called. + """ + check_resource_type(project, PROJECT_PATH, + message="A project id is needed.") + return self.get_resource(project, query_string=query_string, + organization=True) + + def list_projects(self, query_string=''): + """Lists all your projects. + + """ + return self._list(self.project_url, query_string, organization=True) + + def update_project(self, project, changes): + """Updates a project. + + """ + check_resource_type(project, PROJECT_PATH, + message="A project id is needed.") + return self.update_resource(project, changes, organization=True) + + def delete_project(self, project, query_string=''): + """Deletes a project. + + """ + check_resource_type(project, PROJECT_PATH, + message="A project id is needed.") + return self.delete_resource(project, query_string=query_string, + organization=True) diff --git a/bigml/api_handlers/projectionhandler.py b/bigml/api_handlers/projectionhandler.py new file mode 100644 index 00000000..d463fca8 --- /dev/null +++ b/bigml/api_handlers/projectionhandler.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2018-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for projections' REST calls + + https://bigml.com/api/projections + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + check_resource, get_resource_id, get_resource_type +from bigml.constants import PROJECTION_PATH, PCA_PATH, \ + IMAGE_FIELDS_FILTER, SPECIFIC_EXCLUDES + + +class ProjectionHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the ProjectionHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.projection_url = self.prediction_base_url + PROJECTION_PATH + + def create_projection(self, pca, input_data=None, + args=None, wait_time=3, retries=10): + """Creates a new projection. + The pca parameter can be a pca resource or ID + + """ + pca_id = None + + resource_type = get_resource_type(pca) + if resource_type != PCA_PATH: + raise Exception("A PCA resource id is needed" + " to create a projection. %s found." % + resource_type) + + pca_id = get_resource_id(pca) + if pca_id is None: + raise Exception("Failed to detect a correct pca structure" + " in %s." % pca) + + if isinstance(pca, dict) and pca.get("resource") is not None: + # retrieving fields info from model structure + model_info = pca + else: + image_fields_filter = IMAGE_FIELDS_FILTER + "," + \ + ",".join(SPECIFIC_EXCLUDES[resource_type]) + model_info = check_resource(pca_id, + query_string=image_fields_filter, + wait_time=wait_time, + retries=retries, + raise_on_error=True, + api=self) + + if input_data is None: + input_data = {} + create_args = {} + if args is not None: + create_args.update(args) + create_args.update({ + "input_data": self.prepare_image_fields(model_info, input_data)}) + if pca_id is not None: + create_args.update({ + "pca": pca_id}) + + body = json.dumps(create_args) + return self._create(self.projection_url, body, + verify=self.domain.verify_prediction) + + def get_projection(self, projection, query_string=''): + """Retrieves a projection. + + """ + check_resource_type(projection, PROJECTION_PATH, + message="A projection id is needed.") + return self.get_resource(projection, query_string=query_string) + + def list_projections(self, query_string=''): + """Lists all your projections. + + """ + return self._list(self.projection_url, query_string) + + def update_projection(self, projection, changes): + """Updates a projection. + + """ + check_resource_type(projection, PROJECTION_PATH, + message="A projection id is needed.") + return self.update_resource(projection, changes) + + def delete_projection(self, projection, query_string=''): + """Deletes a projection. + + """ + check_resource_type(projection, PROJECTION_PATH, + message="A projection id is needed.") + return self.delete_resource(projection, query_string=query_string) diff --git a/bigml/api_handlers/resourcehandler.py b/bigml/api_handlers/resourcehandler.py new file mode 100644 index 00000000..524f53ef --- /dev/null +++ b/bigml/api_handlers/resourcehandler.py @@ -0,0 +1,1070 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method,unused-import +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIn545D, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Common auxiliary constants, functions and class for all resources + +""" + +import time +import os +import datetime +import json +import re +import abc + +from xml.dom import minidom + +import bigml.constants as c + +from bigml.util import get_exponential_wait, get_status, is_status_final, \ + save, save_json +from bigml.util import DFT_STORAGE +from bigml.bigmlconnection import HTTP_OK, HTTP_ACCEPTED, HTTP_CREATED, \ + LOGGER, DOWNLOAD_DIR, HTTP_INTERNAL_SERVER_ERROR +from bigml.constants import WAITING, QUEUED, STARTED, IN_PROGRESS, \ + SUMMARIZED, FINISHED, UPLOADING, FAULTY, UNKNOWN, RUNNABLE +from bigml.exceptions import FaultyResourceError + +# Minimum query string to get model fields +TINY_RESOURCE = "full=false" + +# Resource types that are composed by other resources +COMPOSED_RESOURCES = ["ensemble", "fusion"] + +LIST_LAST = "limit=1&full=yes&tags=%s" + +PMML_QS = "pmml=yes" + + +def get_resource_type(resource): + """Returns the associated resource type for a resource + + """ + if isinstance(resource, dict) and 'resource' in resource: + resource = resource['resource'] + if not isinstance(resource, str): + raise ValueError("Failed to parse a resource string or structure.") + for resource_type, resource_re in list(c.RESOURCE_RE.items()): + if resource_re.match(resource): + return resource_type + return None + + +def get_resource(resource_type, resource): + """Returns a resource/id. + + """ + if isinstance(resource, dict) and 'resource' in resource: + resource = resource['resource'] + if isinstance(resource, str): + if c.RESOURCE_RE[resource_type].match(resource): + return resource + found_type = get_resource_type(resource) + if found_type is not None and \ + resource_type != get_resource_type(resource): + raise ValueError( + "The resource %s has not the expected type:" + " %s" % ( + resource, resource_type)) + raise ValueError("%s is not a valid resource ID." % resource) + + +def get_id(pure_id): + """Returns last part or a resource ID. + + """ + if isinstance(pure_id, str): + pure_id = re.sub(r'^[^/]*/(%s)' % c.ID_PATTERN, r'\1', pure_id) + if c.ID_RE.match(pure_id): + return pure_id + raise ValueError("%s is not a valid ID." % pure_id) + + +def get_fields(resource): + """Returns the field information in a resource dictionary structure + + """ + try: + resource_type = get_resource_type(resource) + except ValueError: + raise ValueError("Unknown resource structure. Failed to find" + " a valid resource dictionary as argument.") + + if resource_type in c.RESOURCES_WITH_FIELDS: + resource = resource.get('object', resource) + # fields structure + if resource_type in list(c.FIELDS_PARENT.keys()) and \ + c.FIELDS_PARENT[resource_type] is not None: + fields = resource[c.FIELDS_PARENT[resource_type]].get('fields', {}) + else: + fields = resource.get('fields', {}) + + if resource_type == c.SAMPLE_PATH: + fields = {field['id']: field for field in fields} + return fields + + +def resource_is_ready(resource): + """Checks a fully fledged resource structure and returns True if finished. + + """ + if not isinstance(resource, dict): + raise Exception("No valid resource structure found") + # full resources + if 'object' in resource: + if 'error' not in resource: + raise Exception("No valid resource structure found") + if resource['error'] is not None: + raise Exception(resource['error']['status']['message']) + return (resource['code'] in [HTTP_OK, HTTP_ACCEPTED] and + get_status(resource)['code'] == c.FINISHED) + # only API response contents + return get_status(resource)['code'] == c.FINISHED + + +def check_resource_type(resource, expected_resource, message=None): + """Checks the resource type. + + """ + if isinstance(expected_resource, str): + expected_resources = [expected_resource] + else: + expected_resources = expected_resource + if isinstance(resource, dict) and 'id' in resource: + resource = resource['id'] + resource_type = get_resource_type(resource) + if resource_type not in expected_resources: + raise Exception("%s\nFound %s." % (message, resource_type)) + + +def get_source_id(source): + """Returns a source/id. + + """ + return get_resource(c.SOURCE_PATH, source) + + +def get_dataset_id(dataset): + """Returns a dataset/id. + + """ + return get_resource(c.DATASET_PATH, dataset) + + +def get_model_id(model): + """Returns a model/id. + + """ + return get_resource(c.MODEL_PATH, model) + + +def get_prediction_id(prediction): + """Returns a prediction/id. + + """ + return get_resource(c.PREDICTION_PATH, prediction) + + +def get_evaluation_id(evaluation): + """Returns an evaluation/id. + + """ + return get_resource(c.EVALUATION_PATH, evaluation) + + +def get_ensemble_id(ensemble): + """Returns an ensemble/id. + + """ + return get_resource(c.ENSEMBLE_PATH, ensemble) + + +def get_batch_prediction_id(batch_prediction): + """Returns a batchprediction/id. + + """ + return get_resource(c.BATCH_PREDICTION_PATH, batch_prediction) + + +def get_cluster_id(cluster): + """Returns a cluster/id. + + """ + return get_resource(c.CLUSTER_PATH, cluster) + + +def get_centroid_id(centroid): + """Returns a centroid/id. + + """ + return get_resource(c.CENTROID_PATH, centroid) + + +def get_batch_centroid_id(batch_centroid): + """Returns a batchcentroid/id. + + """ + return get_resource(c.BATCH_CENTROID_PATH, batch_centroid) + + +def get_anomaly_id(anomaly): + """Returns an anomaly/id. + + """ + return get_resource(c.ANOMALY_PATH, anomaly) + + +def get_anomaly_score_id(anomaly_score): + """Returns an anomalyscore/id. + + """ + return get_resource(c.ANOMALY_SCORE_PATH, anomaly_score) + + +def get_batch_anomaly_score_id(batch_anomaly_score): + """Returns a batchanomalyscore/id. + + """ + return get_resource(c.BATCH_ANOMALY_SCORE_PATH, batch_anomaly_score) + + +def get_project_id(project): + """Returns a project/id. + + """ + return get_resource(c.PROJECT_PATH, project) + + +def get_sample_id(sample): + """Returns a sample/id. + + """ + return get_resource(c.SAMPLE_PATH, sample) + + +def get_correlation_id(correlation): + """Returns a correlation/id. + + """ + return get_resource(c.CORRELATION_PATH, correlation) + + +def get_statistical_test_id(statistical_test): + """Returns a statisticaltest/id. + + """ + return get_resource(c.STATISTICAL_TEST_PATH, statistical_test) + + +def get_logistic_regression_id(logistic_regression): + """Returns a logisticregression/id. + + """ + return get_resource(c.LOGISTIC_REGRESSION_PATH, logistic_regression) + + +def get_association_id(association): + """Returns an association/id. + + """ + return get_resource(c.ASSOCIATION_PATH, association) + + +def get_association_set_id(association_set): + """Returns an associationset/id. + + """ + return get_resource(c.ASSOCIATION_SET_PATH, association_set) + + +def get_configuration_id(configuration): + """Returns a configuration/id. + + """ + return get_resource(c.CONFIGURATION_PATH, configuration) + + +def get_topic_model_id(topic_model): + """Returns a topicmodel/id. + + """ + return get_resource(c.TOPIC_MODEL_PATH, topic_model) + + +def get_topic_distribution_id(topic_distribution): + """Returns a topicdistribution/id. + + """ + return get_resource(c.TOPIC_DISTRIBUTION_PATH, topic_distribution) + + +def get_batch_topic_distribution_id(batch_topic_distribution): + """Returns a batchtopicdistribution/id. + + """ + return get_resource(c.BATCH_TOPIC_DISTRIBUTION_PATH, + batch_topic_distribution) + + +def get_time_series_id(time_series): + """Returns a timeseries/id. + + """ + return get_resource(c.TIME_SERIES_PATH, time_series) + + +def get_forecast_id(forecast): + """Returns a forecast/id. + + """ + return get_resource(c.FORECAST_PATH, forecast) + + +def get_fusion_id(fusion): + """Returns an fusion/id. + + """ + return get_resource(c.FUSION_PATH, fusion) + + +def get_optiml_id(optiml): + """Returns an optiml/id. + + """ + return get_resource(c.OPTIML_PATH, optiml) + + +def get_deepnet_id(deepnet): + """Returns a deepnet/id. + + """ + return get_resource(c.DEEPNET_PATH, deepnet) + + +def get_pca_id(pca): + """Returns a PCA/id. + + """ + return get_resource(c.PCA_PATH, pca) + + +def get_projection_id(projection): + """Returns a projection/id. + + """ + return get_resource(c.PROJECTION_PATH, projection) + + +def get_batch_projection_id(batch_projection): + """Returns a batchprojection/id. + + """ + return get_resource(c.BATCH_PROJECTION_PATH, batch_projection) + + +def get_linear_regression_id(linear_regression): + """Returns a linearregression/id. + + """ + return get_resource(c.LINEAR_REGRESSION_PATH, linear_regression) + + +def get_script_id(script): + """Returns a script/id. + + """ + return get_resource(c.SCRIPT_PATH, script) + + +def get_execution_id(execution): + """Returns a execution/id. + + """ + return get_resource(c.EXECUTION_PATH, execution) + + +def get_library_id(library): + """Returns a library/id. + + """ + return get_resource(c.LIBRARY_PATH, library) + + +def get_external_connector_id(library): + """Returns a externalconnector/id. + + """ + return get_resource(c.EXTERNAL_CONNECTOR_PATH, library) + + +def get_resource_id(resource): + """Returns the resource id if it falls in one of the registered types + + """ + if isinstance(resource, dict) and 'resource' in resource: + return resource['resource'] + if isinstance(resource, str) and any( + resource_re.match(resource) for _, resource_re + in list(c.RESOURCE_RE.items())): + return resource + return None + + +def exception_on_error(resource, logger=None): + """Raises exception if the resource has an error. The error can be + due to a problem in the API call to retrieve it or because the + resource is FAULTY. + + """ + if resource.get('error') is not None: + # http error calling the API + message = "API connection problem - %s" % resource.get('error', \ + {}).get('status', {}).get('message') + if logger is not None: + logger.error(message) + raise Exception(message) + if resource.get('object', resource).get('status', {}).get('error') \ + is not None: + # Faulty resource problem + status = resource.get('object', resource).get( \ + 'status', {}) + message = "Faulty resource %s - %s" % (resource["resource"], + status.get('cause', status).get('message')) + if logger is not None: + logger.error(message) + raise FaultyResourceError(message) + + +def check_resource(resource, get_method=None, query_string='', wait_time=1, + retries=None, raise_on_error=False, + max_elapsed_estimate=float('inf'), api=None, debug=False, + progress_cb=None): + """Waits until a resource is finished. + + Given a resource and its corresponding get_method (if absent, the + generic get_resource is used), it calls the get_method on + the resource with the given query_string + and waits with sleeping intervals of wait_time + until the resource is in a final state (either FINISHED + or FAULTY. The number of retries can be limited using the retries + parameter. + + """ + + resource_id = get_resource_id(resource) + # ephemeral predictions + if isinstance(resource, dict) and resource.get("resource") is None: + return resource + if resource_id is None: + raise ValueError("Failed to extract a valid resource id to check.") + if wait_time <= 0: + raise ValueError("The time to wait needs to be positive.") + debug = debug or (api is not None and (api.debug or api.short_debug)) + if debug: + print("Checking resource: %s" % resource_id) + kwargs = {'query_string': query_string} + if hasattr(api, 'shared_ref') or (get_method is None and + hasattr(api, 'get_resource')): + get_method = api.get_resource + elif get_method is None: + raise ValueError("You must supply either the get_method or the api" + " connection info to retrieve the resource") + + if not isinstance(resource, dict) or not http_ok(resource) or \ + resource.get("object") is None: + resource = resource_id + + if isinstance(resource, str): + if debug: + print("Getting resource %s" % resource_id) + resource = get_method(resource_id, **kwargs) + if not http_ok(resource): + if raise_on_error: + raise Exception("API connection problem: %s" % + json.dumps(resource)) + return resource + + counter = 0 + elapsed = 0 + while retries is None or counter < retries: + + counter += 1 + status = get_status(resource) + code = status['code'] + if debug: + print("The resource has status code: %s" % code) + if code == c.FINISHED: + if counter > 1: + if debug: + print("Getting resource %s with args %s" % (resource_id, + kwargs)) + # final get call to retrieve complete resource + resource = get_method(resource, **kwargs) + if raise_on_error: + exception_on_error(resource) + return resource + if code == c.FAULTY: + if raise_on_error: + exception_on_error(resource) + return resource + # resource is ok + progress = 0 + #pylint: disable=locally-disabled, bare-except + if status is not None: + progress = status.get("progress", 0) + if debug: + print("Progress: %s" % progress) + try: + if progress_cb is not None: + progress_cb(progress, resource) + except: + print("WARNING: Progress callback raised exception. Please," + "double check your function.") + progress = progress if progress > 0.8 \ + else 0 # dumping when almost finished + progress_dumping = (1 - progress) + _wait_time = get_exponential_wait(wait_time, + max(int(counter * progress_dumping), 1)) + _max_wait = max_elapsed_estimate - _wait_time + _wait_time = min(_max_wait, _wait_time) + if _wait_time <= 0: + # when the max_expected_elapsed time is met, we still wait for + # the resource to be finished but we restart all counters and + # the exponentially growing time is initialized + _wait_time = wait_time + counter = 0 + elapsed = 0 + if debug: + print("Sleeping %s" % _wait_time) + time.sleep(_wait_time) + elapsed += _wait_time + # retries for the finished status use a query string that gets the + # minimal available resource + if kwargs.get('query_string') is not None: + tiny_kwargs = {'query_string': c.TINY_RESOURCE} + else: + tiny_kwargs = {} + if debug: + print("Getting only status for resource %s" % resource_id) + resource = get_method(resource, **tiny_kwargs) + if not http_ok(resource): + resource["resource"] = resource_id + if raise_on_error: + raise Exception("API connection problem: %s" % + json.dumps(resource)) + return resource + + if raise_on_error: + exception_on_error(resource) + return resource + + +def http_ok(resource): + """Checking the validity of the http return code + + """ + if 'code' in resource: + return resource['code'] in [HTTP_OK, HTTP_CREATED, HTTP_ACCEPTED] + return False + + +class ResourceHandlerMixin(metaclass=abc.ABCMeta): + """This class is used by the BigML class as + a mixin that provides the get method for all kind of + resources and auxiliar utilities to check their status. It should not + be instantiated independently. + + """ + @abc.abstractmethod + def prepare_image_fields(self, model_info, input_data): + """This is an abstract method that should be implemented in the API + final class to create sources for the image fields used in the model + + """ + + def get_resource(self, resource, **kwargs): + """Retrieves a remote resource. + + The resource parameter should be a string containing the + resource id or the dict returned by the corresponding create method. + As each resource is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, thet function will + return a dict that encloses the resource values and state info + available at the time it is called. + + """ + resource_type = get_resource_type(resource) + if resource_type is None: + raise ValueError("A resource id or structure is needed.") + resource_id = get_resource_id(resource) + + # adding the shared_ref if the API connection object has one + if hasattr(self, "shared_ref"): + kwargs.update({"shared_ref": self.shared_ref}) + + if resource_id: + kwargs.update({"resource_id": resource_id}) + return self._get("%s%s" % (self.url, resource_id), **kwargs) + return None + + def update_resource(self, resource, changes, **kwargs): + """Updates a remote resource. + + The resource parameter should be a string containing the + resource id or the dict returned by the corresponding create method. + + """ + resource_id, error = self.final_resource(resource) + if error or resource_id is None: + raise Exception("Failed to update %s. Only correctly finished " + "resources can be updated. Please, check " + "the resource status." % resource_id) + kwargs.update({"resource_id": resource_id}) + body = json.dumps(changes) + return self._update("%s%s" % (self.url, resource_id), body, **kwargs) + + def delete_resource(self, resource, **kwargs): + """Delete a remote resource + + """ + resource_id = get_resource_id(resource) + if resource_id: + return self._delete("%s%s" % (self.url, resource_id), **kwargs) + return None + + def _download_resource(self, resource, filename, retries=10): + """Download CSV information from downloadable resources + + """ + resource_id, error = self.final_resource(resource, retries=retries) + if error or resource_id is None: + raise Exception("Failed to download %s. Only correctly finished " + "resources can be downloaded. Please, check " + "the resource status. %s" % (resource_id, error)) + return self._download("%s%s%s" % (self.url, resource_id, + DOWNLOAD_DIR), + filename=filename, + retries=retries) + + #pylint: disable=locally-disabled,invalid-name + def ok(self, resource, query_string='', wait_time=1, + max_requests=None, raise_on_error=False, retries=None, + error_retries=None, max_elapsed_estimate=float('inf'), debug=False, + progress_cb=None): + """Waits until the resource is finished or faulty, updates it and + returns True when a finished resource is correctly retrieved + and False if the retrieval fails or the resource is faulty. + + resource: (map) Resource structure + query_string: (string) Filters used on the resource attributes + wait_time: (number) Time to sleep between get requests + max_requests: (integer) Maximum number of get requests + raise_on_error: (boolean) Whether to raise errors or log them + retries: (integer) Now `max_requests` (deprecated) + error_retries: (integer) Retries for transient HTTP errors + max_elapsed_estimate: (integer) Elapsed number of seconds that we + expect the resource to be finished in. + This is not a hard limit for the method + to end, but an estimation of time to wait. + debug: (boolean) Whether to print traces for every get call + progress_cb: (function) Callback function to log progress + + """ + def maybe_retrying(resource, error_retries, new_resource=None): + """Retrying retrieval if it's due to a transient error """ + if new_resource is None: + new_resource = resource + else: + new_resource.update({"object": resource["object"]}) + if new_resource.get('error', {}).get( + 'status', {}).get('type') == c.TRANSIENT \ + and error_retries is not None and error_retries > 0: + time.sleep(wait_time) + return self.ok(resource, query_string, wait_time, + max_requests, raise_on_error, retries, + error_retries - 1, max_elapsed_estimate, + debug) + resource.update(new_resource) + if raise_on_error: + exception_on_error(resource, logger=LOGGER) + return False + + new_resource = check_resource( \ + resource, + query_string=query_string, + wait_time=wait_time, + retries=max_requests, + max_elapsed_estimate=max_elapsed_estimate, + raise_on_error=False, # we don't raise on error to update always + api=self, + debug=debug, + progress_cb=progress_cb) + + if http_ok(new_resource): + resource.update(new_resource) + # try to recover from transient errors + if resource["error"] is not None: + return maybe_retrying(resource, error_retries) + + #pylint: disable=locally-disabled,bare-except + if raise_on_error: + exception_on_error(resource, logger=LOGGER) + else: + try: + exception_on_error(resource) + except: + return False + return True + return maybe_retrying(resource, error_retries, + new_resource=new_resource) + + def _set_create_from_datasets_args(self, datasets, args=None, + wait_time=3, retries=10, key=None): + """Builds args dictionary for the create call from a `dataset` or a + list of `datasets`. + + """ + dataset_ids = [] + single = False + + create_args = {} + if args is not None: + create_args.update(args) + + if isinstance(datasets, str) and datasets.startswith('shared/'): + origin = datasets.replace('shared/', "") + if get_resource_type(origin) != "dataset": + create_args.update({"shared_hash": origin.split("/")[1]}) + return create_args + + if not isinstance(datasets, list): + single = True + origin_datasets = [datasets] + else: + origin_datasets = datasets + + for dataset in origin_datasets: + check_resource_type(dataset, c.DATASET_PATH, + message=("A dataset id is needed to create" + " the resource.")) + if isinstance(dataset, dict) and 'id' in dataset: + dataset['id'] = dataset['id'].replace("shared/", "") + dataset_ids.append(dataset) + dataset_id = dataset['id'] + else: + dataset_id = get_dataset_id(dataset).replace( \ + "shared/", "") + dataset_ids.append(dataset_id) + dataset = check_resource(dataset_id, + query_string=c.TINY_RESOURCE, + wait_time=wait_time, retries=retries, + raise_on_error=True, api=self) + if single: + if key is None: + key = "dataset" + create_args.update({key: dataset_ids[0]}) + else: + if key is None: + key = "datasets" + create_args.update({key: dataset_ids}) + + return create_args + + def _set_create_from_models_args(self, models, types, args=None, + wait_time=3, retries=10): + """Builds args dictionary for the create call from a list of + models. The first argument needs to be a list of: + - the model IDs + - dict objects with the "id" attribute set to the ID of the model + and the "weight" attribute set to the weight associated to that + model. + + """ + model_ids = [] + if not isinstance(models, list): + origin_models = [models] + else: + origin_models = models + + for model in origin_models: + if isinstance(model, dict) and model.get("id"): + model = model.get("id") + check_resource_type(model, types, + message=("A list of model ids " + "is needed to create" + " the resource.")) + model_ids.append(get_resource_id(model).replace("shared/", "")) + model = check_resource(model, + query_string=c.TINY_RESOURCE, + wait_time=wait_time, retries=retries, + raise_on_error=True, api=self) + + if not isinstance(origin_models[0], dict) \ + or not origin_models[0].get("id"): + origin_models = model_ids + + create_args = {} + if args is not None: + create_args.update(args) + + create_args.update({"models": origin_models}) + + return create_args + + def _set_clone_from_args(self, origin, resource_type, args=None, + wait_time=3, retries=10): + """Builds args dictionary for the create call to clone resources. + The first argument needs to be a resource or resource ID that + has one of the types in resource_type + + """ + if isinstance(origin, dict) and origin.get("id"): + origin = origin.get("id") + + origin_id = get_resource_id(origin) + + if origin_id is not None: + check_resource_type(origin, resource_type, + message=("Failed to find a %s as the resource" + " to clone." % resource_type)) + origin = check_resource(origin, + query_string=c.TINY_RESOURCE, + wait_time=wait_time, retries=retries, + raise_on_error=True, api=self) + + create_args = {} + if args is not None: + create_args.update(args) + + if isinstance(origin, dict) and origin["object"].get("shared_hash"): + attr = "shared_hash" + origin_id = origin["object"][attr] + else: + attr = "origin" + create_args.update({attr: origin_id}) + + return create_args + + def check_origins(self, dataset, model, args, model_types=None, + wait_time=3, retries=10): + """Returns True if the dataset and model needed to build + the batch prediction or evaluation are finished. The args given + by the user are modified to include the related ids in the + create call. + + If model_types is a list, then we check any of the model types in + the list. + + """ + + def args_update(resource_id): + """Updates args when the resource is ready + + """ + if resource_id: + check_resource(resource_id, + query_string=c.TINY_RESOURCE, + wait_time=wait_time, retries=retries, + raise_on_error=True, api=self) + args.update({ + resource_type: resource_id, + "dataset": dataset_id}) + + if model_types is None: + model_types = [] + + resource_type = get_resource_type(dataset) + if c.DATASET_PATH != resource_type: + raise Exception("A dataset id is needed as second argument" + " to create the resource. %s found." % + resource_type) + dataset_id = get_dataset_id(dataset) + if dataset_id: + dataset = check_resource(dataset_id, + query_string=c.TINY_RESOURCE, + wait_time=wait_time, retries=retries, + raise_on_error=True, api=self) + resource_type = get_resource_type(model) + if resource_type in model_types: + resource_id = get_resource_id(model) + args_update(resource_id) + elif resource_type == c.MODEL_PATH: + resource_id = get_model_id(model) + args_update(resource_id) + else: + raise Exception("A model or ensemble id is needed as first" + " argument to create the resource." + " %s found." % resource_type) + + return dataset_id and resource_id + + def export(self, resource, filename=None, pmml=False, + **kwargs): + """Retrieves a remote resource when finished and stores it + in the user-given file + + The resource parameter should be a string containing the + resource id or the dict returned by the corresponding create method. + As each resource is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + wait until the resource is in one of these states to store the + associated info. + + """ + resource_type = get_resource_type(resource) + if resource_type is None: + raise ValueError("A resource ID or structure is needed.") + + if pmml: + if resource_type not in c.PMML_MODELS: + raise ValueError("Failed to export to PMML. Only some models" + " can be exported to PMML.") + + resource_id = get_resource_id(resource) + if resource_id: + if pmml: + # only models with no text fields can be exported + resource_info = self._get("%s%s" % (self.url, resource_id), + query_string=c.TINY_RESOURCE) + field_types = resource_info["object"].get( \ + "dataset_field_types", {}) + if field_types.get("items", 0) > 0 or \ + field_types.get("text", 0) > 0: + raise ValueError("Failed to export to PMML. Models with " + "text and items fields cannot be " + "exported to PMML.") + if kwargs.get("query_string"): + kwargs["query_string"] += "&%s" % PMML_QS + else: + kwargs["query_string"] = PMML_QS + + if kwargs.get("query_string") and \ + "output_format" in kwargs.get("query_string"): + resource_info = self._get("%s%s" % (self.url, + resource_id)) + else: + resource_info = self._get("%s%s" % (self.url, resource_id), + **kwargs) + if not is_status_final(resource_info): + self.ok(resource_info) + if filename is None: + file_dir = self.storage or DFT_STORAGE + filename = os.path.join( \ + file_dir, resource_id.replace("/", "_")) + if resource_type in COMPOSED_RESOURCES: + # inner models in composed resources need the shared reference + # to be downloaded + if resource.startswith("shared"): + kwargs.update( + {"shared_ref": resource_id.replace("shared/", "")}) + elif "shared_ref" in kwargs and not resource.startswith("shared"): + kwargs["shared_ref"] = "%s,%s" % (kwargs["shared_ref"], + resource_id) + for component_id in resource_info["object"]["models"]: + # for weighted fusions we need to retrieve the component ID + if isinstance(component_id, dict): + component_id = component_id['id'] + component_filename = os.path.join( + os.path.dirname(filename), + component_id.replace("/", "_")) + self.export( \ + component_id, + filename=component_filename, + pmml=pmml, + **kwargs) + if kwargs.get("query_string") and \ + "output_format" in kwargs.get("query_string"): + return self._download("%s%s?%s" % \ + (self.url, resource_id, kwargs["query_string"]), filename) + + if pmml and resource_info.get("object", {}).get("pmml"): + resource_info = resource_info.get("object", {}).get("pmml") + resource_info = minidom.parseString( \ + resource_info).toprettyxml() + return save(resource_info, filename) + return save_json(resource_info, filename) + raise ValueError("First agument is expected to be a valid" + " resource ID or structure.") + + def export_last(self, tags, filename=None, + resource_type="model", project=None, + **kwargs): + """Retrieves a remote resource by tag when finished and stores it + in the user-given file + + The resource parameter should be a string containing the + resource id or the dict returned by the corresponding create method. + As each resource is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + wait until the resource is in one of these states to store the + associated info. + + """ + + if tags is not None and tags != '': + query_string = LIST_LAST % tags + if project is not None: + query_string += "&project=%s" % project + + kwargs.update({'query_string': "%s&%s" % \ + (query_string, kwargs.get('query_string', ''))}) + + response = self._list("%s%s" % (self.url, resource_type), + **kwargs) + if len(response.get("objects", [])) > 0: + resource_info = response["objects"][0] + if not is_status_final(resource_info): + self.ok(resource_info) + if filename is None: + file_dir = self.storage or DFT_STORAGE + now = datetime.datetime.now().strftime("%a%b%d%y_%H%M%S") + filename = os.path.join( \ + file_dir, + "%s_%s.json" % (tags.replace("/", "_"), now)) + if resource_type in COMPOSED_RESOURCES: + for component_id in resource_info["models"]: + self.export( \ + component_id, + filename=os.path.join( \ + os.path.dirname(filename), + component_id.replace("/", "_"))) + return save_json(resource_info, filename) + raise ValueError("No %s found with tags %s." % (resource_type, + tags)) + raise ValueError("First agument is expected to be a non-empty" + " tag.") + + def final_resource(self, resource, retries=10): + """Waits for a resource to finish or fail and returns + its ID and the error information + + """ + resource = check_resource( \ + resource, + query_string=c.TINY_RESOURCE, + retries=retries, + api=self) + error = resource.get("error") + try: + if resource.get("object", resource)["status"]["code"] == c.FAULTY: + error = "%s (%s)" % (resource.get("error"), + resource.get("object", resource)[ \ + "status"]["message"]) + except KeyError: + error = "Could not get resource status info for %s" % \ + resource.get("resource", resource) + return get_resource_id(resource), error diff --git a/bigml/api_handlers/samplehandler.py b/bigml/api_handlers/samplehandler.py new file mode 100644 index 00000000..d50baf0b --- /dev/null +++ b/bigml/api_handlers/samplehandler.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for samples' REST calls + + https://bigml.com/api/samples + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + get_resource_type, get_dataset_id, check_resource +from bigml.constants import (SAMPLE_PATH, DATASET_PATH, + TINY_RESOURCE) + + +class SampleHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the samples' REST calls. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the SampleHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.sample_url = self.url + SAMPLE_PATH + + def create_sample(self, dataset, args=None, wait_time=3, retries=10): + """Creates a sample from a `dataset`. + + """ + dataset_id = None + resource_type = get_resource_type(dataset) + if resource_type == DATASET_PATH: + dataset_id = get_dataset_id(dataset) + check_resource(dataset_id, + query_string=TINY_RESOURCE, + wait_time=wait_time, retries=retries, + raise_on_error=True, api=self) + else: + raise Exception("A dataset id is needed to create a" + " sample. %s found." % resource_type) + + create_args = {} + if args is not None: + create_args.update(args) + create_args.update({ + "dataset": dataset_id}) + + body = json.dumps(create_args) + return self._create(self.sample_url, body) + + def get_sample(self, sample, query_string=''): + """Retrieves a sample. + + The sample parameter should be a string containing the + sample id or the dict returned by create_sample. + As sample is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the sample values and state info + available at the time it is called. + """ + check_resource_type(sample, SAMPLE_PATH, + message="A sample id is needed.") + return self.get_resource(sample, query_string=query_string) + + def list_samples(self, query_string=''): + """Lists all your samples. + + """ + return self._list(self.sample_url, query_string) + + def update_sample(self, sample, changes): + """Updates a sample. + + """ + check_resource_type(sample, SAMPLE_PATH, + message="A sample id is needed.") + return self.update_resource(sample, changes) + + def delete_sample(self, sample, query_string=''): + """Deletes a sample. + + """ + check_resource_type(sample, SAMPLE_PATH, + message="A sample id is needed.") + return self.delete_resource(sample, query_string=query_string) diff --git a/bigml/api_handlers/scripthandler.py b/bigml/api_handlers/scripthandler.py new file mode 100644 index 00000000..d03ed771 --- /dev/null +++ b/bigml/api_handlers/scripthandler.py @@ -0,0 +1,185 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for whizzml script' REST calls + + https://bigml.com/api/scripts + +""" + +import os +import re + +from urllib.parse import urljoin + +import requests + +try: + import simplejson as json +except ImportError: + import json + + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + get_script_id, get_resource_type, check_resource +from bigml.constants import SCRIPT_PATH, TINY_RESOURCE +from bigml.util import is_url +from bigml.bigmlconnection import HTTP_OK + + +def retrieve_script_args(gist_url): + """Retrieves the information to create a script from a public + gist url + + """ + + response = requests.get(gist_url) + response.encoding = "utf8" + if response.status_code == HTTP_OK: + pattern = r"\"[^\"]*?\/raw\/[^\"]*" + urls = re.findall(pattern, response.text) + script_args = {} + + for url in urls: + url = urljoin(gist_url, url.replace("\"", "")) + if url.endswith(".whizzml"): + response = requests.get(url) + if response.status_code == HTTP_OK: + script_args["source_code"] = response.text + if url.endswith(".json"): + response = requests.get(url, \ + headers={"content-type": "application/json"}) + if response.status_code == HTTP_OK: + script_args["json"] = response.text + return script_args + raise ValueError("The url did not contain the expected structure.") + + +class ScriptHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the whizzml script' REST calls. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the ScriptHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.script_url = self.url + SCRIPT_PATH + + def create_script(self, source_code=None, args=None, + wait_time=3, retries=10): + """Creates a whizzml script from its source code. The `source_code` + parameter can be a: + {script ID}: the ID for an existing whizzml script + {path}: the path to a file containing the source code + {string} : the string containing the source code for the script + + """ + create_args = {} + if args is not None: + create_args.update(args) + + if source_code is None: + raise Exception('A valid code string' + ' or a script id must be provided.') + resource_type = get_resource_type(source_code) + if resource_type == SCRIPT_PATH: + script_id = get_script_id(source_code) + if script_id: + check_resource(script_id, + query_string=TINY_RESOURCE, + wait_time=wait_time, retries=retries, + raise_on_error=True, api=self) + create_args.update({ + "origin": script_id}) + elif isinstance(source_code, str): + if is_url(source_code): + script_args = retrieve_script_args(source_code) + source_code = script_args.get("source_code") + create_args.update(json.loads(script_args.get("json"))) + else: + try: + if os.path.exists(source_code): + with open(source_code) as code_file: + source_code = code_file.read() + except IOError: + raise IOError("Could not open the source code file %s." % + source_code) + create_args.update({ + "source_code": source_code}) + else: + raise Exception("A script id or a valid source code" + " is needed to create a" + " script. %s found." % resource_type) + + + body = json.dumps(create_args) + return self._create(self.script_url, body) + + def get_script(self, script, query_string=''): + """Retrieves a script. + + The script parameter should be a string containing the + script id or the dict returned by create_script. + As script is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the script content and state info + available at the time it is called. + """ + check_resource_type(script, SCRIPT_PATH, + message="A script id is needed.") + return self.get_resource(script, query_string=query_string) + + def list_scripts(self, query_string=''): + """Lists all your scripts. + + """ + return self._list(self.script_url, query_string) + + def update_script(self, script, changes): + """Updates a script. + + """ + check_resource_type(script, SCRIPT_PATH, + message="A script id is needed.") + return self.update_resource(script, changes) + + def clone_script(self, script, + args=None, wait_time=3, retries=10): + """Creates a cloned script from an existing `script` + + """ + create_args = self._set_clone_from_args( + script, "script", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.script_url, body) + + def delete_script(self, script, query_string=''): + """Deletes a script. + + """ + check_resource_type(script, SCRIPT_PATH, + message="A script id is needed.") + return self.delete_resource(script, query_string=query_string) diff --git a/bigml/api_handlers/sourcehandler.py b/bigml/api_handlers/sourcehandler.py new file mode 100644 index 00000000..bd4b6e6b --- /dev/null +++ b/bigml/api_handlers/sourcehandler.py @@ -0,0 +1,637 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for sources' REST calls + + https://bigml.com/api/sources + +""" + +import sys +import os +import numbers +import time +import logging + +from urllib import parse + +try: + #added to allow GAE to work + from google.appengine.api import urlfetch + GAE_ENABLED = True +except ImportError: + GAE_ENABLED = False + +try: + import simplejson as json +except ImportError: + import json + +try: + from pandas import DataFrame + from io import StringIO + PANDAS_READY = True +except ImportError: + PANDAS_READY = False + +from zipfile import ZipFile + +import mimetypes +import requests + +from requests_toolbelt import MultipartEncoder + +from bigml.util import is_url, maybe_save, filter_by_extension, \ + infer_field_type +from bigml.bigmlconnection import ( + HTTP_CREATED, HTTP_BAD_REQUEST, + HTTP_UNAUTHORIZED, HTTP_PAYMENT_REQUIRED, HTTP_NOT_FOUND, + HTTP_TOO_MANY_REQUESTS, + HTTP_INTERNAL_SERVER_ERROR, GAE_ENABLED, SEND_JSON, LOGGER) +from bigml.bigmlconnection import json_load +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready, get_source_id, get_id +from bigml.constants import SOURCE_PATH, IMAGE_EXTENSIONS +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.fields import Fields + + +MAX_CHANGES = 5 +MAX_RETRIES = 5 + +def compact_regions(regions): + """Returns the list of regions in the compact value used for updates """ + + out_regions = [] + for region in regions: + new_region = [] + new_region.append(region.get("label")) + new_region.append(region.get("xmin")) + new_region.append(region.get("ymin")) + new_region.append(region.get("xmax")) + new_region.append(region.get("ymax")) + out_regions.append(new_region) + return out_regions + + +class SourceHandlerMixin(ResourceHandlerMixin): + + """This class is used by the BigML class as + a mixin that provides the REST calls to sources. It should not + be instantiated independently. + + """ + + def __init__(self): + """Initializes the SourceHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.source_url = self.url + SOURCE_PATH + + def _create_remote_source(self, url, args=None): + """Creates a new source using a URL + + """ + create_args = {} + if args is not None: + create_args.update(args) + create_args.update({"remote": url}) + create_args = self._add_project(create_args) + body = json.dumps(create_args) + return self._create(self.source_url, body) + + def _create_connector_source(self, connector, args=None): + """Creates a new source using an external connector + + """ + create_args = {} + if args is not None: + create_args.update(args) + create_args.update({"external_data": connector}) + create_args = self._add_project(create_args) + body = json.dumps(create_args) + return self._create(self.source_url, body) + + def _create_inline_source(self, src_obj, args=None): + """Create source from inline data + + The src_obj data should be a list of rows stored as dict or + list objects. + """ + create_args = {} + if args is not None: + create_args.update(args) + create_args = self._add_project(create_args) + + # some basic validation + if (not isinstance(src_obj, list) or ( + not all(isinstance(row, dict) for row in src_obj) and + not all(isinstance(row, list) for row in src_obj))): + raise TypeError( + 'ERROR: inline source must be a list of dicts or a ' + 'list of lists') + + create_args.update({"data": json.dumps(src_obj)}) + body = json.dumps(create_args) + return self._create(self.source_url, body) + + def _create_local_source(self, file_name, args=None): + """Creates a new source using a local file. + + + """ + create_args = {} + if args is not None: + create_args.update(args) + + for key, value in list(create_args.items()): + if value is not None and isinstance(value, (list, dict)): + create_args[key] = json.dumps(value) + elif value is not None and isinstance(value, numbers.Number): + # the multipart encoder only accepts strings and files + create_args[key] = str(value) + + + code = HTTP_INTERNAL_SERVER_ERROR + resource_id = None + location = None + resource = None + error = { + "status": { + "code": code, + "message": "The resource couldn't be created"}} + + #pylint: disable=locally-disabled,consider-using-with + try: + if isinstance(file_name, str): + name = os.path.basename(file_name) + file_handler = open(file_name, "rb") + else: + name = 'Stdin input' + file_handler = file_name + except IOError: + sys.exit("ERROR: cannot read training set") + qs_params = self._add_credentials({}) + qs_str = "?%s" % parse.urlencode(qs_params) if qs_params else "" + create_args = self._add_project(create_args, True) + if GAE_ENABLED: + try: + req_options = { + 'url': self.source_url + qs_str, + 'method': urlfetch.POST, + 'headers': SEND_JSON, + 'data': create_args, + 'files': {name: file_handler}, + 'validate_certificate': self.domain.verify + } + response = urlfetch.fetch(**req_options) + except urlfetch.Error as exception: + LOGGER.error("HTTP request error: %s", + str(exception)) + return maybe_save(resource_id, self.storage, code, + location, resource, error) + else: + try: + files = {"file": (name, + file_handler, + mimetypes.guess_type(name)[0])} + files.update(create_args) + multipart = MultipartEncoder(fields=files) + response = requests.post( \ + self.source_url, + params=qs_params, + headers={'Content-Type': multipart.content_type}, + data=multipart, verify=self.domain.verify) + except (requests.ConnectionError, + requests.Timeout, + requests.RequestException) as exc: + LOGGER.error("HTTP request error: %s", str(exc)) + code = HTTP_INTERNAL_SERVER_ERROR + return maybe_save(resource_id, self.storage, code, + location, resource, error) + try: + code = response.status_code + if code == HTTP_CREATED: + location = response.headers['location'] + resource = json_load(response.content) + resource_id = resource['resource'] + error = None + elif code in [HTTP_BAD_REQUEST, + HTTP_UNAUTHORIZED, + HTTP_PAYMENT_REQUIRED, + HTTP_NOT_FOUND, + HTTP_TOO_MANY_REQUESTS]: + error = json_load(response.content) + else: + LOGGER.error("Unexpected error (%s)", code) + code = HTTP_INTERNAL_SERVER_ERROR + + except ValueError: + LOGGER.error("Malformed response") + + return maybe_save(resource_id, self.storage, code, + location, resource, error) + + def clone_source(self, source, + args=None, wait_time=3, retries=10): + """Creates a cloned source from an existing `source` + + """ + create_args = self._set_clone_from_args( + source, "source", args=args, wait_time=wait_time, retries=retries) + + body = json.dumps(create_args) + return self._create(self.source_url, body) + + def _create_composite(self, sources, args=None): + """Creates a composite source from an existing `source` or list of + sources + + """ + create_args = {} + if args is not None: + create_args.update(args) + + if not isinstance(sources, list): + sources = [sources] + + source_ids = [] + for source in sources: + # we accept full resource IDs or pure IDs and produce pure IDs + try: + source_id = get_source_id(source) + except ValueError: + source_id = None + + if source_id is None: + pure_id = get_id(source) + source_id = "source/%s" % pure_id + else: + pure_id = source_id.replace("source/", "") + + if pure_id is not None: + source_ids.append(pure_id) + else: + raise Exception("A source or list of source ids" + " are needed to create a" + " source.") + create_args.update({"sources": source_ids}) + + body = json.dumps(create_args) + return self._create(self.source_url, body) + + def create_source(self, path=None, args=None): + """Creates a new source. + + The source can be a local file path or a URL. + We also accept a pandas DataFrame as first argument + TODO: add async load and progress bar in Python 3 + + """ + + if path is None: + raise Exception('A local path or a valid URL must be provided.') + + if PANDAS_READY and isinstance(path, DataFrame): + buffer = StringIO(path.to_csv(index=False)) + return self._create_local_source(file_name=buffer, args=args) + if is_url(path): + return self._create_remote_source(path, args=args) + if isinstance(path, list): + try: + if all(get_id(item) is not None \ + for item in path): + # list of sources + return self._create_composite(path, args=args) + except ValueError: + pass + return self._create_inline_source(path, args=args) + if isinstance(path, dict): + return self._create_connector_source(path, args=args) + try: + if get_source_id(path) is not None: + # cloning source + return self.clone_source(path, args=args) + except ValueError: + pass + return self._create_local_source(file_name=path, args=args) + + def create_annotated_source(self, annotations_file, args=None): + """Creates a composite source for annotated images. + + Images are usually associated to other information, like labels or + numeric fields, which can be regarded as additional attributes + related to that image. The associated information can be described + as annotations for each of the images. These annotations can be + provided as a JSON file that contains the properties associated to + each image and the name of the image file, that is used as foreign key. + The meta information needed to create the structure of the composite + source, such as the fields to be associated and their types, + should also be included in the annotations file. + This is an example of the expected structure of the annotations file: + + {"description": "Fruit images to test colour distributions", + "images_file": "./fruits_hist.zip", + "new_fields": [{"name": "new_label", "optype": "categorical"}], + "source_id": null, + "annotations": [ + {"file": "f1/fruits1f.png", "new_label": "True"}, + {"file": "f1/fruits1.png", "new_label": "False"}, + {"file": "f2/fruits2e.png", "new_label": "False"}]} + + The "images_file" attribute should contain the path to zip-compressed + images file and the "annotations" attribute the corresponding + annotations. The "new_fields" attribute should be a list of the fields + used as annotations for the images. + + Also, if you prefer to keep your annotations in a separate file, you + can point to that file in the "annotations" attribute: + + {"description": "Fruit images to test colour distributions", + "images_file": "./fruits_hist.zip", + "new_fields": [{"name": "new_label", "optype": "categorical"}], + "source_id": null, + "annotations": "./annotations_detail.json"} + + The created source will contain the fields associated to the + uploaded images, plus an additional field named "new_label" with the + values defined in this file. + + If a source has already been created from this collection of images, + you can provide the ID of this source in the "source_id" attribute. + Thus, the existing source will be updated to add the new annotations + (if still open for editing) or will be cloned (if the source is + closed for editing) and the new source will be updated . In both cases, + images won't be uploaded when "source_id" is used. + + """ + + if not os.path.exists(annotations_file): + raise ValueError("A local path to a JSON file must be provided.") + + with open(annotations_file) as annotations_handler: + annotations_info = json.load(annotations_handler) + + if annotations_info.get("images_file") is None: + raise ValueError("Failed to find the `images_file` attribute " + "in the annotations file %s" % annotations_file) + base_directory = os.path.dirname(annotations_file) + zip_path = os.path.join(base_directory, + annotations_info.get("images_file")) + if isinstance(annotations_info.get("annotations"), str): + annotations = os.path.join(base_directory, + annotations_info.get("annotations")) + else: + annotations = annotations_info.get("annotations") + # check metadata file attributes + if annotations_info.get("source_id") is None: + # upload the compressed images + source = self.create_source(zip_path, args=args) + if not self.ok(source): + raise IOError("A source could not be created for %s" % + zip_path) + source_id = source["resource"] + else: + source_id = annotations_info.get("source_id") + return self.update_composite_annotations( + source_id, zip_path, annotations, + new_fields=annotations_info.get("new_fields")) + + def update_composite_annotations(self, source, images_file, + annotations, new_fields=None, + source_changes=None): + """Updates a composite source to add a list of annotations + The annotations argument should contain annotations in a BigML-COCO + syntax: + + [{"file": "image1.jpg", + "label": "label1"}. + {"file": "image2.jpg", + "label": "label1"}, + {"file": "image3.jpg", + "label": "label2"}] + + or point to a JSON file that contains that information, + and the images_file argument should point to a zip file that + contains the referrered images sorted as uploaded to build the source. + + If the attributes in the annotations file ("file" excluded) are not + already defined in the composite source, the `new_fields` argument + can be set to contain a list of the fields and types to be added + + [{"name": "label", "optype": "categorical"}] + """ + if source_changes is None: + source_changes = {} + + source_id = get_source_id(source) + if source_id: + source = self.get_source(source_id) + if source.get("object", {}).get("closed"): + source = self.clone_source(source_id) + self.ok(source) + # corresponding source IDs + try: + sources = source["object"]["sources"] + except KeyError: + raise ValueError("Failed to find the list of sources in the " + "created composite: %s." % source["resource"]) + try: + with ZipFile(images_file) as zip_handler: + file_list = zip_handler.namelist() + file_list = filter_by_extension(file_list, IMAGE_EXTENSIONS) + except IOError: + raise ValueError("Failed to find the list of images in zip %s" % + images_file) + + file_to_source = dict(zip(file_list, sources)) + + fields = Fields(source) + + # adding the annotation values + if annotations: + if isinstance(annotations, str): + # path to external annotations file + try: + with open(annotations) as \ + annotations_handler: + annotations = json.load(annotations_handler) + except IOError as exc: + raise ValueError("Failed to find annotations in %s" % + exc) + elif not isinstance(annotations, list): + raise ValueError("The annotations attribute needs to contain" + " a list of annotations or the path to " + " a file with such a list.") + if new_fields is None: + new_fields = {} + for annotation in annotations: + for field, value in annotation.items(): + if field != "file" and field not in new_fields: + new_fields[field] = infer_field_type(field, value) + new_fields = list(new_fields.values()) + + # creating new annotation fields, if absent + if new_fields: + field_names = [field["name"] for _, field in fields.fields.items()] + changes = [] + for field_info in new_fields: + if field_info.get("name") not in field_names: + changes.append(field_info) + if changes: + source_changes.update({"new_fields": changes}) + if source_changes: + source = self.update_source(source["resource"], source_changes) + self.ok(source) + + fields = Fields(source) + + changes = [] + changes_dict = {} + for annotation in annotations: + filename = annotation.get("file") + try: + _ = file_list.index(filename) + except ValueError: + LOGGER.error("WARNING: Could not find annotated file (%s)" + " in the composite's sources list", filename) + continue + for key in annotation.keys(): + if key == "file": + continue + if key not in changes_dict: + changes_dict[key] = [] + value = annotation.get(key) + changes_dict[key].append((value, file_to_source[filename])) + + #pylint: disable=locally-disabled,broad-except + for field, values in changes_dict.items(): + try: + optype = fields.fields[fields.field_id(field)]["optype"] + if optype == "categorical": + sorted_values = sorted(values, key=lambda x: x[0]) + old_value = None + source_ids = [] + for value, source_id in sorted_values: + if value != old_value and old_value is not None: + changes.append({"field": field, "value": old_value, + "components": source_ids}) + source_ids = [source_id] + old_value = value + else: + source_ids.append(source_id) + if old_value is None: + old_value = value + changes.append({"field": field, "value": value, + "components": source_ids}) + elif optype == "regions": + for value, source_id in values: + if isinstance(value, list): + # dictionary should contain the bigml-coco format + value = compact_regions(value) + changes.append( + {"field": field, + "value": value, + "components": [source_id]}) + else: + for value, source_id in values: + changes.append( + {"field": field, + "value": value, + "components": [source_id]}) + except Exception: + LOGGER.error("WARNING: Problem adding annotation to %s (%s)", + field, values) + pass + + # we need to limit the amount of changes per update + batches_number = int(len(changes) / MAX_CHANGES) + for offset in range(0, batches_number + 1): + new_batch = changes[ + offset * MAX_CHANGES: (offset + 1) * MAX_CHANGES] + if new_batch: + source = self.update_source(source, + {"row_values": new_batch}) + counter = 0 + while source["error"] is not None and counter < MAX_RETRIES: + # retrying in case update is temporarily unavailable + counter += 1 + time.sleep(counter) + source = self.get_source(source) + self.ok(source) + source = self.update_source(source, + {"row_values": new_batch}) + if source["error"] is not None: + err_str = json.dumps(source["error"]) + v_str = json.dumps(new_batch) + LOGGER.error("WARNING: Some annotations were not updated " + f" (error: {err_str}, values: {v_str})") + if not self.ok(source): + raise Exception( + f"Failed to update {len(new_batch)} annotations.") + time.sleep(0.1) + + return source + + def get_source(self, source, query_string=''): + """Retrieves a remote source. + The source parameter should be a string containing the + source id or the dict returned by create_source. + As source is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, thet function will + return a dict that encloses the source values and state info + available at the time it is called. + """ + check_resource_type(source, SOURCE_PATH, + message="A source id is needed.") + return self.get_resource(source, query_string=query_string) + + def source_is_ready(self, source): + """Checks whether a source' status is FINISHED. + + """ + check_resource_type(source, SOURCE_PATH, + message="A source id is needed.") + source = self.get_source(source) + return resource_is_ready(source) + + def list_sources(self, query_string=''): + """Lists all your remote sources. + + """ + return self._list(self.source_url, query_string) + + def update_source(self, source, changes): + """Updates a source. + + Updates remote `source` with `changes'. + + """ + check_resource_type(source, SOURCE_PATH, + message="A source id is needed.") + return self.update_resource(source, changes) + + def delete_source(self, source, query_string=''): + """Deletes a remote source permanently. + + """ + check_resource_type(source, SOURCE_PATH, + message="A source id is needed.") + return self.delete_resource(source, query_string=query_string) diff --git a/bigml/api_handlers/statisticaltesthandler.py b/bigml/api_handlers/statisticaltesthandler.py new file mode 100644 index 00000000..eca91255 --- /dev/null +++ b/bigml/api_handlers/statisticaltesthandler.py @@ -0,0 +1,112 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for statisticaltests' REST calls + + https://bigml.com/api/statisticaltests + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + get_resource_type, get_dataset_id, check_resource +from bigml.constants import (STATISTICAL_TEST_PATH, DATASET_PATH, + TINY_RESOURCE) + + +class StatisticalTestHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the statistical tests' REST calls. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the StatisticalTestHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.statistical_test_url = self.url + STATISTICAL_TEST_PATH + + def create_statistical_test(self, dataset, args=None, wait_time=3, retries=10): + """Creates a statistical test from a `dataset`. + + """ + dataset_id = None + resource_type = get_resource_type(dataset) + if resource_type == DATASET_PATH: + dataset_id = get_dataset_id(dataset) + check_resource(dataset_id, + query_string=TINY_RESOURCE, + wait_time=wait_time, retries=retries, + raise_on_error=True, api=self) + else: + raise Exception("A dataset id is needed to create a" + " statistical test. %s found." % resource_type) + + create_args = {} + if args is not None: + create_args.update(args) + create_args.update({ + "dataset": dataset_id}) + + body = json.dumps(create_args) + return self._create(self.statistical_test_url, body) + + def get_statistical_test(self, statistical_test, query_string=''): + """Retrieves a statistical test. + + The statistical test parameter should be a string containing the + statisticaltest id or the dict returned by create_statistical_test. + As an statistical test is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the statistical test values and state + info available at the time it is called. + """ + check_resource_type(statistical_test, STATISTICAL_TEST_PATH, + message="A statistical test id is needed.") + return self.get_resource(statistical_test, query_string=query_string) + + def list_statistical_tests(self, query_string=''): + """Lists all your statistical tests. + + """ + return self._list(self.statistical_test_url, query_string) + + def update_statistical_test(self, statistical_test, changes): + """Updates an statistical test. + + """ + check_resource_type(statistical_test, STATISTICAL_TEST_PATH, + message="A statistical test id is needed.") + return self.update_resource(statistical_test, changes) + + def delete_statistical_test(self, statistical_test, query_string=''): + """Deletes a statistical test. + + """ + check_resource_type(statistical_test, STATISTICAL_TEST_PATH, + message="A statistical test id is needed.") + return self.delete_resource(statistical_test, + query_string=query_string) diff --git a/bigml/api_handlers/timeserieshandler.py b/bigml/api_handlers/timeserieshandler.py new file mode 100644 index 00000000..2d57a08c --- /dev/null +++ b/bigml/api_handlers/timeserieshandler.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for time series'' REST calls + + https://bigml.com/api/timeseries + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready +from bigml.constants import TIME_SERIES_PATH + + +class TimeSeriesHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the TimeSeriesHandler. This class is intended + to be used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.time_series_url = self.url + TIME_SERIES_PATH + + def create_time_series(self, datasets, + args=None, wait_time=3, retries=10): + """Creates a time series from a `dataset` + of a list o `datasets`. + + """ + create_args = self._set_create_from_datasets_args( + datasets, args=args, wait_time=wait_time, retries=retries) + + body = json.dumps(create_args) + return self._create(self.time_series_url, body) + + def get_time_series(self, time_series, query_string='', + shared_username=None, shared_api_key=None): + """Retrieves a time series. + + The model parameter should be a string containing the + time series id or the dict returned by + create_time_series. + As a time series is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the time series + values and state info available at the time it is called. + + If this is a shared time series, the username and + sharing api key must also be provided. + """ + check_resource_type(time_series, TIME_SERIES_PATH, + message="A time series id is needed.") + return self.get_resource(time_series, + query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) + + def time_series_is_ready(self, time_series, **kwargs): + """Checks whether a time series's status is FINISHED. + + """ + check_resource_type(time_series, TIME_SERIES_PATH, + message="A time series id is needed.") + resource = self.get_time_series(time_series, **kwargs) + return resource_is_ready(resource) + + def list_time_series(self, query_string=''): + """Lists all your time series. + + """ + return self._list(self.time_series_url, query_string) + + def update_time_series(self, time_series, changes): + """Updates a time series. + + """ + check_resource_type(time_series, TIME_SERIES_PATH, + message="A time series id is needed.") + return self.update_resource(time_series, changes) + + def delete_time_series(self, time_series, query_string=''): + """Deletes a time series. + + """ + check_resource_type(time_series, TIME_SERIES_PATH, + message="A time series id is needed.") + return self.delete_resource(time_series, query_string=query_string) + + def clone_time_series(self, time_series, + args=None, wait_time=3, retries=10): + """Creates a cloned time_series from an existing `time series` + + """ + create_args = self._set_clone_from_args( + time_series, "timeseries", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.time_series_url, body) diff --git a/bigml/api_handlers/topicdistributionhandler.py b/bigml/api_handlers/topicdistributionhandler.py new file mode 100644 index 00000000..117cefd2 --- /dev/null +++ b/bigml/api_handlers/topicdistributionhandler.py @@ -0,0 +1,125 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2016-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for topicdistributions' REST calls + + https://bigml.com/api/topic_distributions + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + get_resource_type, check_resource, get_topic_model_id +from bigml.constants import TOPIC_MODEL_PATH, TOPIC_DISTRIBUTION_PATH, \ + IMAGE_FIELDS_FILTER, SPECIFIC_EXCLUDES + + +class TopicDistributionHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the TopicDistributionHandler. This class is intended to + be used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.topic_distribution_url = self.url + TOPIC_DISTRIBUTION_PATH + + def create_topic_distribution(self, topic_model, input_data=None, + args=None, wait_time=3, retries=10): + """Creates a new topic distribution. + + """ + + resource_type = get_resource_type(topic_model) + if resource_type != TOPIC_MODEL_PATH: + raise Exception("A topic model resource id is needed" + " to create a prediction. %s found." % + resource_type) + + topic_model_id = get_topic_model_id(topic_model) + if topic_model_id is None: + raise Exception("Failed to detect a correct topic model structure" + " in %s." % topic_model) + + if isinstance(topic_model, dict) and \ + topic_model.get("resource") is not None: + # retrieving fields info from model structure + model_info = topic_model + else: + image_fields_filter = IMAGE_FIELDS_FILTER + "," + \ + ",".join(SPECIFIC_EXCLUDES[resource_type]) + model_info = check_resource(topic_model_id, + query_string=image_fields_filter, + wait_time=wait_time, + retries=retries, + raise_on_error=True, + api=self) + + if input_data is None: + input_data = {} + create_args = {} + if args is not None: + create_args.update(args) + create_args.update({ + "input_data": self.prepare_image_fields(model_info, input_data), + "topicmodel": topic_model_id}) + + body = json.dumps(create_args) + return self._create(self.topic_distribution_url, body, + verify=self.domain.verify_prediction) + + def get_topic_distribution(self, topic_distribution, query_string=''): + """Retrieves a topic distribution. + + """ + check_resource_type(topic_distribution, TOPIC_DISTRIBUTION_PATH, + message="A topic distribution id is needed.") + return self.get_resource(topic_distribution, query_string=query_string) + + def list_topic_distributions(self, query_string=''): + """Lists all your topic distributions. + + """ + return self._list(self.topic_distribution_url, query_string) + + def update_topic_distribution(self, topic_distribution, changes): + """Updates a topic distribution. + + """ + check_resource_type(topic_distribution, TOPIC_DISTRIBUTION_PATH, + message="A topic distribution id is needed.") + return self.update_resource(topic_distribution, changes) + + def delete_topic_distribution(self, topic_distribution, query_string=''): + """Deletes a topic distribution. + + """ + check_resource_type(topic_distribution, TOPIC_DISTRIBUTION_PATH, + message="A topic distribution id is needed.") + return self.delete_resource(topic_distribution, + query_string=query_string) diff --git a/bigml/api_handlers/topicmodelhandler.py b/bigml/api_handlers/topicmodelhandler.py new file mode 100644 index 00000000..a34b904b --- /dev/null +++ b/bigml/api_handlers/topicmodelhandler.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- +#pylint: disable=abstract-method +# +# Copyright 2016-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base class for TopicModel's REST calls + + https://bigml.com/api/topicmodels + +""" + +try: + import simplejson as json +except ImportError: + import json + + +from bigml.api_handlers.resourcehandler import ResourceHandlerMixin +from bigml.api_handlers.resourcehandler import check_resource_type, \ + resource_is_ready +from bigml.constants import TOPIC_MODEL_PATH + + +class TopicModelHandlerMixin(ResourceHandlerMixin): + """This class is used by the BigML class as + a mixin that provides the REST calls models. It should not + be instantiated independently. + + """ + def __init__(self): + """Initializes the TopicModelHandler. This class is intended to be + used as a mixin on ResourceHandler, that inherits its + attributes and basic method from BigMLConnection, and must not be + instantiated independently. + + """ + self.topic_model_url = self.url + TOPIC_MODEL_PATH + + def create_topic_model(self, datasets, args=None, wait_time=3, retries=10): + """Creates a Topic Model from a `dataset` or a list o `datasets`. + + """ + create_args = self._set_create_from_datasets_args( + datasets, args=args, wait_time=wait_time, retries=retries) + + body = json.dumps(create_args) + return self._create(self.topic_model_url, body) + + def get_topic_model(self, topic_model, query_string='', + shared_username=None, shared_api_key=None): + """Retrieves a Topic Model. + + The topic_model parameter should be a string containing the + topic model ID or the dict returned by create_topic_model. + As the topic model is an evolving object that is processed + until it reaches the FINISHED or FAULTY state, the function will + return a dict that encloses the topic model values and state info + available at the time it is called. + + If this is a shared topic model, the username and sharing api key + must also be provided. + """ + check_resource_type(topic_model, TOPIC_MODEL_PATH, + message="A Topic Model id is needed.") + return self.get_resource(topic_model, + query_string=query_string, + shared_username=shared_username, + shared_api_key=shared_api_key) + + + def topic_model_is_ready(self, topic_model, **kwargs): + """Checks whether a topic model's status is FINISHED. + + """ + check_resource_type(topic_model, TOPIC_MODEL_PATH, + message="A topic model id is needed.") + resource = self.get_topic_model(topic_model, **kwargs) + return resource_is_ready(resource) + + def list_topic_models(self, query_string=''): + """Lists all your Topic Models. + + """ + return self._list(self.topic_model_url, query_string) + + def update_topic_model(self, topic_model, changes): + """Updates a Topic Model. + + """ + check_resource_type(topic_model, TOPIC_MODEL_PATH, + message="A topic model id is needed.") + return self.update_resource(topic_model, changes) + + def delete_topic_model(self, topic_model, query_string=''): + """Deletes a Topic Model. + + """ + check_resource_type(topic_model, TOPIC_MODEL_PATH, + message="A topic model id is needed.") + return self.delete_resource(topic_model, query_string=query_string) + + def clone_topic_model(self, topic_model, + args=None, wait_time=3, retries=10): + """Creates a cloned topic model from an existing `topic model` + + """ + create_args = self._set_clone_from_args( + topic_model, "topicmodel", args=args, wait_time=wait_time, + retries=retries) + + body = json.dumps(create_args) + return self._create(self.topic_model_url, body) diff --git a/bigml/association.py b/bigml/association.py new file mode 100644 index 00000000..a3b65d76 --- /dev/null +++ b/bigml/association.py @@ -0,0 +1,531 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A local Association Rules object. + +This module defines an Association Rule object as extracted from a given +dataset. It shows the items discovered in the dataset and the association +rules between these items. + +Example usage (assuming that you have previously set up the BIGML_USERNAME +and BIGML_API_KEY environment variables and that you own the association/id +below): + +from bigml.api import BigML +from bigml.association import Association + +api = BigML() + +association = Association('association/5026966515526876630001b2') +association.association_set() + +""" + +import sys +import math +import logging +import csv + + +from bigml.api import FINISHED +from bigml.api import get_status, get_api_connection, get_association_id +from bigml.basemodel import get_resource_dict +from bigml.modelfields import ModelFields +from bigml.associationrule import AssociationRule +from bigml.item import Item +from bigml.io import UnicodeWriter +from bigml.util import use_cache, load, dump, dumps, get_data_transformations + +LOGGER = logging.getLogger('BigML') + +RULE_HEADERS = ["Rule ID", "Antecedent", "Consequent", "Antecedent Coverage %", + "Antecedent Coverage", "Support %", "Support", "Confidence", + "Leverage", "Lift", "p-value", "Consequent Coverage %", + "Consequent Coverage"] + +ASSOCIATION_METRICS = ["lhs_cover", "support", "confidence", + "leverage", "lift", "p_value"] + +SCORES = ASSOCIATION_METRICS[:-1] + +METRIC_LITERALS = {"confidence": "Confidence", "support": "Support", + "leverage": "Leverage", "lhs_cover": "Coverage", + "p_value": "p-value", "lift": "Lift"} + +INDENT = " " * 4 + +DEFAULT_K = 100 +DEFAULT_SEARCH_STRATEGY = "leverage" + + +NO_ITEMS = ['numeric', 'categorical'] + + +def get_metric_string(rule, reverse=False): + """Returns the string that describes the values of metrics for a rule. + + """ + metric_values = [] + for metric in ASSOCIATION_METRICS: + if reverse and metric == 'lhs_cover': + metric_key = 'rhs_cover' + else: + metric_key = metric + metric_value = getattr(rule, metric_key) + if isinstance(metric_value, list): + metric_values.append("%s=%.2f%% (%s)" % ( + METRIC_LITERALS[metric], ((round(metric_value[0], 4) * 100)), \ + metric_value[1])) + elif metric == 'confidence': + metric_values.append("%s=%.2f%%" % ( + METRIC_LITERALS[metric], ((round(metric_value, 4) * 100)))) + else: + metric_values.append("%s=%s" % ( + METRIC_LITERALS[metric], metric_value)) + return "; ".join(metric_values) + + +class Association(ModelFields): + """ A lightweight wrapper around an Association rules object. + + Uses a BigML remote association resource to build a local version + that can be used to extract associations information. + + """ + #pylint: disable=locally-disabled,access-member-before-definition + def __init__(self, association, api=None, cache_get=None): + + + if use_cache(cache_get): + # using a cache to store the association attributes + self.__dict__ = load(get_association_id(association), cache_get) + for index, item in enumerate(self.items): + self.items[index] = Item(item["index"], item, self.fields) + for index, rule in enumerate(self.rules): + self.rules[index] = AssociationRule(rule) + return + + self.resource_id = None + self.name = None + self.description = None + self.parent_id = None + self.complement = None + self.discretization = {} + self.default_numeric_value = None + self.field_discretizations = {} + self.items = [] + self.max_k = None + self.max_lhs = None + self.min_confidence = None + self.min_leverage = None + self.min_support = None + self.min_lift = None + self.search_strategy = DEFAULT_SEARCH_STRATEGY + self.rules = [] + self.significance_level = None + api = get_api_connection(api) + + self.resource_id, association = get_resource_dict( \ + association, "association", api=api) + if 'object' in association and isinstance(association['object'], dict): + association = association['object'] + try: + self.parent_id = association.get('dataset') + self.name = association.get("name") + self.description = association.get("description") + except AttributeError: + raise ValueError("Failed to find the expected " + "JSON structure. Check your arguments.") + + if 'associations' in association and \ + isinstance(association['associations'], dict): + status = get_status(association) + if 'code' in status and status['code'] == FINISHED: + self.input_fields = association['input_fields'] + self.default_numeric_value = association.get( \ + 'default_numeric_value') + associations = association['associations'] + fields = associations['fields'] + ModelFields.__init__( \ + self, fields, \ + missing_tokens=associations.get('missing_tokens')) + self.complement = associations.get('complement', False) + self.discretization = associations.get('discretization', {}) + self.field_discretizations = associations.get( + 'field_discretizations', {}) + self.items = [Item(index, item, fields) for index, item in + enumerate(associations.get('items', []))] + self.max_k = associations.get('max_k', 100) + self.max_lhs = associations.get('max_lhs', 4) + self.min_confidence = associations.get('min_confidence', 0) + self.min_leverage = associations.get('min_leverage', -1) + self.min_support = associations.get('min_support', 0) + self.min_lift = associations.get('min_lift', 0) + self.search_strategy = associations.get('search_strategy', \ + DEFAULT_SEARCH_STRATEGY) + self.rules = [AssociationRule(rule) for rule in + associations.get('rules', [])] + self.significance_level = associations.get( + 'significance_level', 0.05) + else: + raise Exception("The association isn't finished yet") + else: + raise Exception("Cannot create the Association instance. Could not" + " find the 'associations' key in the " + "resource:\n\n%s" % + association) + + def association_set(self, input_data, + k=DEFAULT_K, score_by=None): + """Returns the Consequents for the rules whose LHS best match + the provided items. Cosine similarity is used to score the match. + + @param inputs dict map of input data: e.g. + {"petal length": 4.4, + "sepal length": 5.1, + "petal width": 1.3, + "sepal width": 2.1, + "species": "Iris-versicolor"} + @param k integer Maximum number of item predictions to return + (Default 100) + @param max_rules integer Maximum number of rules to return per item + @param score_by Code for the metric used in scoring + (default search_strategy) + leverage + confidence + support + lhs-cover + lift + + """ + predictions = {} + if score_by and score_by not in SCORES: + raise ValueError("The available values of score_by are: %s" % + ", ".join(SCORES)) + norm_input_data = self.filter_input_data(input_data) + # retrieving the items in input_data + items_indexes = [item.index for item in + self.get_items(input_map=norm_input_data)] + if score_by is None: + score_by = self.search_strategy + + for rule in self.rules: + # checking that the field in the rhs is not in the input data + field_type = self.fields[self.items[rule.rhs[0]].field_id][ \ + 'optype'] + # if the rhs corresponds to a non-itemized field and this field + # is already in input_data, don't add rhs + if field_type in NO_ITEMS and self.items[rule.rhs[0]].field_id in \ + norm_input_data: + continue + # if an itemized content is in input_data, don't add it to the + # prediction + if field_type not in NO_ITEMS and rule.rhs[0] in items_indexes: + continue + cosine = sum([1 for index in items_indexes \ + if index in rule.lhs]) + if cosine > 0: + cosine = cosine / float(math.sqrt(len(items_indexes)) * \ + math.sqrt(len(rule.lhs))) + + rhs = tuple(rule.rhs) + if rhs not in predictions: + predictions[rhs] = {"score": 0} + predictions[rhs]["score"] += cosine * getattr( + rule, score_by) + if not "rules" in predictions[rhs]: + predictions[rhs]["rules"] = [] + predictions[rhs]["rules"].append(rule.rule_id) + # choose the best k predictions + k = len(predictions) if k is None else k + predictions = sorted(list(predictions.items()), + key=lambda x: x[1]["score"], reverse=True)[:k] + final_predictions = [] + for rhs, prediction in predictions: + prediction["item"] = self.items[rhs[0]].to_json() + # adapting to association_set item format + for key in ["description", "bin_start", "bin_end"]: + del prediction["item"][key] + final_predictions.append(prediction) + return final_predictions + + def get_items(self, field=None, + names=None, input_map=None, filter_function=None): + """Returns the items array, previously selected by the field + corresponding to the given field name or a user-defined function + (if set) + + """ + items = [] + if field: + if field in self.fields: + field_id = field + elif field in self.inverted_fields: + field_id = self.inverted_fields[field] + else: + raise ValueError("Failed to find a field name or ID" + " corresponding to %s." % field) + + def filter_function_set(item): + """Checking filter function if set + + """ + if filter_function is None: + return True + return filter_function(item) + + def field_filter(item): + """Checking if an item is associated to a fieldInfo + + """ + if field is None: + return True + return item.field_id == field_id + + def names_filter(item): + """Checking if an item by name + + """ + if names is None: + return True + return item.name in names + + def input_map_filter(item): + """ Checking if an item is in the input map + + """ + if input_map is None: + return True + value = input_map.get(item.field_id) + return item.matches(value) + + for item in self.items: + if all([field_filter(item), names_filter(item), + input_map_filter(item), + filter_function_set(item)]): + items.append(item) + + return items + + def get_rules(self, min_leverage=None, min_confidence=None, + min_support=None, min_p_value=None, item_list=None, + filter_function=None): + """Returns the rules array, previously selected by the leverage, + confidence, support or a user-defined filter function (if set) + + @param float min_leverage Minum leverage value + @param float min_confidence Minum confidence value + @param float min_support Minum support value + @param float min_p_value Minum p_value value + @param List item_list List of Item objects. Any of them should be + in the rules + @param function filter_function Function used as filter + """ + def leverage(rule): + """Check minimum leverage + + """ + if min_leverage is None: + return True + return rule.leverage >= min_leverage + + def confidence(rule): + """Check minimum confidence + + """ + if min_confidence is None: + return True + return rule.confidence >= min_confidence + + def support(rule): + """Check minimum support + + """ + if min_support is None: + return True + for rhs_support, _ in rule.support: + if rhs_support >= min_support: + return True + return False + + def p_value(rule): + """Check minimum p_value + + """ + if min_p_value is None: + return True + return rule.p_value >= min_p_value + + def filter_function_set(rule): + """Checking filter function if set + + """ + if filter_function is None: + return True + return filter_function(rule) + + def item_list_set(rule): + """Checking if any of the items list is in a rule + + """ + if item_list is None: + return True + if isinstance(item_list[0], Item): + items = [item.index for item in item_list] + elif isinstance(item_list[0], str): + items = [item.index for item + in self.get_items(names=item_list)] + + for item_index in rule.lhs: + if item_index in items: + return True + for item_index in rule.rhs: + if item_index in items: + return True + return False + + rules = [] + for rule in self.rules: + if all([leverage(rule), confidence(rule), support(rule), + p_value(rule), item_list_set(rule), + filter_function_set(rule)]): + rules.append(rule) + + return rules + + def rules_csv(self, file_name, **kwargs): + """Stores the rules in CSV format in the user-given file. The rules + can be previously selected using the arguments in get_rules + + """ + rules = self.get_rules(**kwargs) + rules = [self.describe(rule.to_csv()) for rule in rules] + if file_name is None: + raise ValueError("A valid file name is required to store the " + "rules.") + with UnicodeWriter(file_name, quoting=csv.QUOTE_NONNUMERIC) as writer: + writer.writerow(RULE_HEADERS) + for rule in rules: + writer.writerow([item if not isinstance(item, str) + else item.encode("utf-8") + for item in rule]) + + def describe(self, rule_row): + """Transforms the lhs and rhs index information to a human-readable + rule text. + + """ + # lhs items and rhs items (second and third element in the row) + # substitution by description + for index in range(1, 3): + description = [] + for item_index in rule_row[index]: + item = self.items[item_index] + # if there's just one field, we don't use the item description + # to avoid repeating the field name constantly. + item_description = item.name if \ + len(list(self.fields.keys())) == 1 \ + and not item.complement else item.describe() + description.append(item_description) + description_str = " & ".join(description) + rule_row[index] = description_str + return rule_row + + def summarize(self, out=sys.stdout, limit=10, **kwargs): + """Prints a summary of the obtained rules + + """ + # groups the rules by its metrics + rules = self.get_rules(**kwargs) + out.write("Total number of rules: %s\n" % len(rules)) + for metric in ASSOCIATION_METRICS: + out.write("\n\nTop %s by %s:\n\n" % ( + limit, METRIC_LITERALS[metric])) + #pylint: disable=locally-disabled,cell-var-from-loop + top_rules = sorted(rules, key=lambda x: getattr(x, metric), + reverse=True)[0: limit * 2] + out_rules = [] + ref_rules = [] + counter = 0 + for rule in top_rules: + rule_row = self.describe(rule.to_csv()) + metric_string = get_metric_string(rule) + operator = "->" + rule_id_string = "Rule %s: " % rule.rule_id + for item in top_rules: + if rule.rhs == item.lhs and rule.lhs == item.rhs and \ + metric_string == get_metric_string( + item, reverse=True): + rule_id_string = "Rules %s, %s: " % (rule.rule_id, + item.rule_id) + operator = "<->" + out_rule = "%s %s %s [%s]" % ( + rule_row[1], operator, rule_row[2], + metric_string) + reverse_rule = "%s %s %s [%s]" % ( + rule_row[2], operator, rule_row[1], + metric_string) + if operator == "->" or reverse_rule not in ref_rules: + ref_rules.append(out_rule) + out_rule = "%s%s%s" % (INDENT * 2, + rule_id_string, out_rule) + + out_rules.append(out_rule) + counter += 1 + if counter > limit: + break + out.write("\n".join(out_rules)) + out.write("\n") + + def predict(self, input_data, k=DEFAULT_K, score_by=None, full=False): + """Method to homogeneize the local models interface for all BigML + models. It returns the association_set method result. If full is set + to True, then the result is returned as a dictionary. + """ + rules = self.association_set(input_data, k=k, score_by=score_by) + if full: + return {"rules": rules} + return rules + + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + Avoiding to set it in a Mixin to maintain the current dump function. + """ + return get_data_transformations(self.resource_id, self.parent_id) + + def dump(self, output=None, cache_set=None): + """Uses msgpack to serialize the resource object + If cache_set is filled with a cache set method, the method is called + + """ + self_vars = vars(self).copy() + for index, elem in enumerate(self_vars["items"]): + self_vars["items"][index] = vars(elem) + for index, elem in enumerate(self_vars["rules"]): + self_vars["rules"][index] = vars(elem) + dump(self_vars, output=output, cache_set=cache_set) + + def dumps(self): + """Uses msgpack to serialize the resource object to a string + + """ + self_vars = vars(self).copy() + for index, elem in enumerate(self_vars["items"]): + self_vars["items"][index] = vars(elem) + for index, elem in enumerate(self_vars["rules"]): + self_vars["rules"][index] = vars(elem) + dumps(self_vars) diff --git a/bigml/associationrule.py b/bigml/associationrule.py new file mode 100644 index 00000000..63944342 --- /dev/null +++ b/bigml/associationrule.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Association Rule object. + + This module defines each Rule in an Association Rule. +""" + +SUPPORTED_LANGUAGES = ["JSON", "CSV"] + +class AssociationRule(): + """ Object encapsulating an association rule as described in + https://bigml.com/developers/associations + + """ + + def __init__(self, rule_info): + self.rule_id = rule_info.get('id') + self.confidence = rule_info.get('confidence') + self.leverage = rule_info.get('leverage') + self.lhs = rule_info.get('lhs', []) + self.lhs_cover = rule_info.get('lhs_cover', []) + self.p_value = rule_info.get('p_value') + self.rhs = rule_info.get('rhs', []) + self.rhs_cover = rule_info.get('rhs_cover', []) + self.lift = rule_info.get('lift') + self.support = rule_info.get('support', []) + + def out_format(self, language="JSON"): + """Transforming the rule structure to a string in the required format + + """ + if language in SUPPORTED_LANGUAGES: + return getattr(self, "to_%s" % language.lower())() + return self + + def to_csv(self): + """Transforming the rule to CSV formats + Metrics ordered as in ASSOCIATION_METRICS in association.py + + """ + output = [self.rule_id, self.lhs, self.rhs, + self.lhs_cover[0] if self.lhs_cover else None, + self.lhs_cover[1] if self.lhs_cover else None, + self.support[0] if self.support else None, + self.support[1] if self.support else None, + self.confidence, + self.leverage, + self.lift, + self.p_value, + self.rhs_cover[0] if self.rhs_cover else None, + self.rhs_cover[1] if self.rhs_cover else None + ] + return output + + def to_json(self): + """Transforming the rule to JSON + + """ + rule_dict = {} + rule_dict.update(self.__dict__) + return rule_dict + + def to_lisp_rule(self, item_list): + """Transforming the rule in a LISP flatline filter to select the + rows in the dataset that fulfill the rule + + """ + + items = [item_list[index].to_lisp_rule() for index in self.lhs] + rhs_items = [item_list[index].to_lisp_rule() for index in self.rhs] + items.extend(rhs_items) + return "(and %s)" % "".join(items) diff --git a/bigml/basemodel.py b/bigml/basemodel.py new file mode 100644 index 00000000..0c22dc54 --- /dev/null +++ b/bigml/basemodel.py @@ -0,0 +1,296 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2013-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A BasicModel resource. + +This module defines a BasicModel to hold the main information of the model +resource in BigML. It becomes the starting point for the Model class, that +is used for local predictions. + +""" +import logging +import sys +import json +import os + +from bigml.api import FINISHED +from bigml.api import get_status, get_model_id, ID_GETTERS, \ + get_api_connection +from bigml.util import utf8 +from bigml.util import DEFAULT_LOCALE +from bigml.modelfields import ModelFields, check_resource_structure, \ + check_resource_fields +from bigml.api_handlers.resourcehandler import resource_is_ready + +LOGGER = logging.getLogger('BigML') + +# Query string to ask for fields: only the ones in the model, with summary +# (needed for the list of terms in text fields) and +# no pagination (all the model fields) + +# We need datefields in the download models, and apian sometimes +# remove them when we use only_model=true so we will set it to +# false until the problem in apian is fixed + +ONLY_MODEL = 'only_model=false&limit=-1&' +EXCLUDE_FIELDS = 'exclude=fields&' + + +def retrieve_resource(api, resource_id, query_string=ONLY_MODEL, + no_check_fields=False, retries=None): + """ Retrieves resource info either from a local repo or + from the remote server + + """ + + check_local_fn = check_local_but_fields if no_check_fields \ + else check_local_info + return api.retrieve_resource(resource_id, query_string=query_string, + check_local_fn=check_local_fn, + retries=retries) + + +def extract_objective(objective_field): + """Extract the objective field id from the model structure + + """ + if isinstance(objective_field, list): + return objective_field[0] + return objective_field + + +def print_importance(instance, out=sys.stdout): + """Print a field importance structure + + """ + count = 1 + field_importance, fields = instance.field_importance_data() + for [field, importance] in field_importance: + out.write(utf8(" %s. %s: %.2f%%\n" % ( + count, + fields[field]['name'], + round(importance, 4) * 100))) + count += 1 + +#pylint: disable=locally-disabled,broad-except +def check_local_but_fields(resource): + """Whether the information in `resource` is enough to use it locally + except for the fields section + + """ + try: + return resource_is_ready(resource) and \ + check_resource_structure(resource) + except Exception: + return False + +#pylint: disable=locally-disabled,broad-except +def check_local_info(resource): + """Whether the information in `model` is enough to use it locally + + """ + try: + return check_local_but_fields(resource) and \ + check_resource_fields(resource) + except Exception: + return False + + +def get_resource_dict(resource, resource_type, api=None, + no_check_fields=False): + """Extracting the resource JSON info as a dict from the first argument of + the local object constructors, that can be: + + - the path to a file that contains the JSON + - the ID of the resource + - the resource dict itself + + """ + + get_id = ID_GETTERS[resource_type] + resource_id = None + # the string can be a path to a JSON file + if isinstance(resource, str): + try: + resource_path = resource + with open(resource) as resource_file: + resource = json.load(resource_file) + resource_id = get_id(resource) + if resource_id is None: + raise ValueError("The JSON file does not seem" + " to contain a valid BigML %s" + " representation." % resource_type) + # keeping the path to the main file as storage folder for + # related files + storage = os.path.dirname(resource_path) + api.storage = storage + except IOError: + # if it is not a path, it can be a model id + resource_id = get_id(resource) + if resource_id is None: + if resource.find("%s/" % resource_type) > -1: + raise Exception( + api.error_message(resource, + resource_type=resource_type, + method="get")) + raise IOError("Failed to open the expected JSON file" + " at %s." % resource) + except ValueError: + raise ValueError("Failed to interpret %s." + " JSON file expected." % resource) + + # dict resource or file path argument: + # checks whether the information needed for local predictions is in + # the first argument + check_fn = check_local_but_fields if no_check_fields else \ + check_local_info + + if isinstance(resource, dict) and not check_fn( + resource): + # if the fields used by the resource are not + # available, use only ID to retrieve it again + resource = get_id(resource) + resource_id = resource + + # resource ID or failed resource info: + # trying to read the resource from storage or from the API + if not (isinstance(resource, dict) and 'resource' in resource and + resource['resource'] is not None): + query_string = ONLY_MODEL + resource = retrieve_resource(api, resource_id, + query_string=query_string, + no_check_fields=no_check_fields) + else: + resource_id = get_id(resource) + + return resource_id, resource + + +def datetime_fields(fields): + """Returns datetime fields from a dict of fields + + """ + return {k: v for k, v in list(fields.items()) \ + if v.get("optype", False) == "datetime"} + + +class BaseModel(ModelFields): + """ A lightweight wrapper of the basic model information + + Uses a BigML remote model to build a local version that contains the + main features of a model, except its tree structure. + model: the model dict or ID + api: connection to the API + fields: fields dict (used in ensembles where fields info can be shared) + checked: boolean that avoids rechecking the model structure when it + has already been checked previously in a derived class + operation_settings: operation thresholds for the classification model + + """ + + def __init__(self, model, api=None, fields=None, checked=True, + operation_settings=None): + + check_fn = check_local_but_fields if fields is not None else \ + check_local_info + if isinstance(model, dict) and (checked or check_fn(model)): + self.resource_id = model['resource'] + else: + # If only the model id is provided, the short version of the model + # resource is used to build a basic summary of the model + self.api = get_api_connection(api) + self.resource_id = get_model_id(model) + if self.resource_id is None: + raise Exception(self.api.error_message(model, + resource_type='model', + method='get')) + if fields is not None and isinstance(fields, dict): + query_string = EXCLUDE_FIELDS + else: + query_string = ONLY_MODEL + model = retrieve_resource(api, self.resource_id, + query_string=query_string, + no_check_fields=fields is not None) + + if 'object' in model and isinstance(model['object'], dict): + model = model['object'] + + if 'model' in model and isinstance(model['model'], dict): + status = get_status(model) + if 'code' in status and status['code'] == FINISHED: + model_fields = None + if (fields is None and ('model_fields' in model['model'] or + 'fields' in model['model'])): + # models might use less fields that provided + model_fields = model['model'].get('model_fields') + fields = model['model'].get('fields', {}) + # pagination or exclusion might cause a field not to + # be in available fields dict + if model_fields: + if not all(key in fields + for key in list(model_fields.keys())): + raise Exception("Some fields are missing" + " to generate a local model." + " Please, provide a model with" + " the complete list of fields.") + for field in model_fields: + field_info = fields[field] + if 'summary' in field_info: + model_fields[field]['summary'] = field_info[ + 'summary'] + model_fields[field]['name'] = field_info[ + 'name'] + objective_field = model['objective_fields'] + missing_tokens = model['model'].get('missing_tokens') + + ModelFields.__init__( + self, fields, objective_id=extract_objective( + objective_field), + missing_tokens=missing_tokens, + operation_settings=operation_settings, + model_fields=model_fields) + self.description = model['description'] + self.field_importance = model['model'].get('importance', + None) + if self.field_importance: + self.field_importance = [element for element + in self.field_importance + if element[0] in fields] + self.locale = model.get('locale', DEFAULT_LOCALE) + else: + raise Exception("The model isn't finished yet") + else: + raise Exception("Cannot create the BaseModel instance. Could not" + " find the 'model' key in the resource:\n\n%s" % + model) + + def resource(self): + """Returns the model resource ID + + """ + return self.resource_id + + def field_importance_data(self): + """Returns field importance related info + + """ + return self.field_importance, self.fields + + def print_importance(self, out=sys.stdout): + """Prints the importance data + + """ + print_importance(self, out=out) diff --git a/bigml/bigmlconnection.py b/bigml/bigmlconnection.py new file mode 100644 index 00000000..1e680915 --- /dev/null +++ b/bigml/bigmlconnection.py @@ -0,0 +1,990 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Class for the BigML connection + +""" +import sys +import os +import time +import locale +import io +import logging + +from urllib import parse + +try: + import simplejson as json +except ImportError: + import json + +try: + #added to allow GAE to work + from google.appengine.api import urlfetch + GAE_ENABLED = True +except ImportError: + GAE_ENABLED = False + import requests + + +import bigml.constants as c + +from bigml.util import check_dir, maybe_save, get_exponential_wait +from bigml.util import DEFAULT_LOCALE +from bigml.domain import Domain +from bigml.domain import DEFAULT_DOMAIN + + +LOG_FORMAT = '%(asctime)-15s: %(message)s' +LOGGER = logging.getLogger('BigML') +CONSOLE = logging.StreamHandler(sys.stdout) +CONSOLE.setLevel(logging.WARNING) +LOGGER.addHandler(CONSOLE) + + +# Base URL +BIGML_URL = '%s://%s/%s' + +DOWNLOAD_DIR = '/download' + + +# Headers +JSON_TYPE = 'application/json' +SEND_JSON = {'Content-Type': '%s;charset=utf-8' % JSON_TYPE} +ACCEPT_JSON = {'Accept': '%s;charset=utf-8' % JSON_TYPE} + +# HTTP Status Codes from https://bigml.com/api/status_codes +HTTP_OK = 200 +HTTP_CREATED = 201 +HTTP_ACCEPTED = 202 +HTTP_NO_CONTENT = 204 +HTTP_BAD_REQUEST = 400 +HTTP_UNAUTHORIZED = 401 +HTTP_PAYMENT_REQUIRED = 402 +HTTP_FORBIDDEN = 403 +HTTP_NOT_FOUND = 404 +HTTP_METHOD_NOT_ALLOWED = 405 +HTTP_TOO_MANY_REQUESTS = 429 +HTTP_LENGTH_REQUIRED = 411 +HTTP_INTERNAL_SERVER_ERROR = 500 + + +def stream_copy(response, filename): + """Copies the contents of a response stream to a local file. + + """ + file_size = 0 + path = os.path.dirname(filename) + check_dir(path) + try: + with open(filename, 'wb') as file_handle: + for chunk in response.iter_content(chunk_size=1024): + if chunk: + file_handle.write(chunk) + file_handle.flush() + file_size += len(chunk) + except IOError: + file_size = 0 + return file_size + + +def assign_dir(path): + """Silently checks the path for existence or creates it. + + Returns either the path or None. + """ + if not isinstance(path, str): + return None + try: + return check_dir(path) + except ValueError: + return None + + +def json_load(content): + """Loads the bytes or string contents in the correct encoding to + create the JSON corresponding object. + + """ + args = [content.decode('utf-8')] + return json.loads(*args) + + +############################################################################## +# +# Patch for requests +# +############################################################################## +#pylint: disable=locally-disabled,broad-except,used-before-assignment +def patch_requests(short_debug): + """ Monkey patches requests to get debug output. + + """ + def debug_request(method, url, **kwargs): + """Logs the request and response content for api's remote requests + + """ + response = original_request(method, url, **kwargs) + LOGGER.debug("Data: %s", response.request.body) + try: + response_content = "Download status is %s" % response.status_code \ + if "download" in url else \ + json.dumps(json.loads(response.content), indent=4) + except Exception: + response_content = response.content + response_content = response_content[0: 256] if short_debug else \ + response_content + LOGGER.debug("Response: %s\n", response_content) + return response + + original_request = requests.api.request + requests.api.request = debug_request + + +class BigMLConnection(): + """Low level point to create, retrieve, list, update, and delete + sources, datasets, models and predictions. + + + Resources are wrapped in a dictionary that includes: + code: HTTP status code + resource: The resource/id + location: Remote location of the resource + object: The resource itself + error: An error code and message + + """ + def __init__(self, username=None, api_key=None, + debug=False, set_locale=False, storage=None, domain=None, + project=None, organization=None, short_debug=False): + """Initializes the BigML API. + + If left unspecified, `username` and `api_key` will default to the + values of the `BIGML_USERNAME` and `BIGML_API_KEY` environment + variables respectively. + + `dev_mode` has been deprecated. Now all resources coexist in the + same production environment. Existing resources generated in + development mode have been archived under a special project and + are now accessible in production mode. + + If storage is set to a directory name, the resources obtained in + CRU operations will be stored in the given directory. + + If domain is set, the api will point to the specified domain. Default + will be the one in the environment variable `BIGML_DOMAIN` or + `bigml.io` if missing. The expected domain argument is a string or a + Domain object. See Domain class for details. + + When project is set to a project ID, + the user is considered to be working in an + organization project. The scope of the API requests will be limited + to this project and permissions should be previously given by the + organization administrator. + + When organization is set to an organization ID, + the user is considered to be working for an + organization. The scope of the API requests will be limited to the + projects of the organization and permissions need to be previously + given by the organization administrator. + + """ + + + logging_level = logging.ERROR + if debug or short_debug: + try: + logging_level = logging.DEBUG + patch_requests(short_debug) + except Exception: + # when using GAE will fail + pass + + LOGGER.forma = LOG_FORMAT, + LOGGER.level = logging_level + + if username is None: + try: + username = os.environ['BIGML_USERNAME'] + except KeyError: + if storage is None: + raise AttributeError("Cannot find BIGML_USERNAME in" + " your environment") + + if api_key is None: + try: + api_key = os.environ['BIGML_API_KEY'] + except KeyError: + if storage is None: + raise AttributeError("Cannot find BIGML_API_KEY in" + " your environment") + + self.username = username + self.api_key = api_key + self.qs_params = {"username": self.username, "api_key": self.api_key} + self.auth = "?" + parse.urlencode(self.qs_params) + self.project = None + self.organization = None + if project is not None: + self.project = project + self.qs_params.update({"project": self.project}) + if organization is not None: + self.organization = organization + + self.debug = debug + self.short_debug = short_debug + self.domain = None + self.url = None + self.prediction_base_url = None + + self._set_api_urls(domain=domain) + + # if verify is not set, we capture warnings to avoid `requests` library + # warnings: InsecurePlatformWarning + logging.captureWarnings(not self.domain.verify) + if set_locale: + locale.setlocale(locale.LC_ALL, DEFAULT_LOCALE) + self.storage = assign_dir(storage) + + def _set_api_urls(self, domain=None): + """Sets the urls that point to the REST api methods for each resource + + """ + if domain is None: + domain = Domain() + elif isinstance(domain, str): + domain = Domain(domain=domain) + elif not isinstance(domain, Domain): + raise ValueError("The domain must be set using a Domain object.") + # Setting the general and prediction domain options + self.domain = domain + api_version = "%s/" % self.domain.api_version if \ + self.domain.api_version != "" else "" + self.url = BIGML_URL % (self.domain.general_protocol, + self.domain.general_domain, + api_version) + self.prediction_base_url = BIGML_URL % ( + self.domain.prediction_protocol, self.domain.prediction_domain, "") + + def _add_credentials(self, qs_params, + organization=False, shared_auth=None): + """Adding the credentials and project or organization information + for authentication + + The organization argument is a boolean that controls authentication + profiles in organizations. When set to true, + the organization ID is used to access the projects and tasks in an + organization. If false, a particular project ID must be used. + + The shared_auth dictionary provides the alternative credentials for + shared resources. + + """ + if qs_params is None: + qs_params = {} + params = {} + params.update(qs_params) + if shared_auth is None: + params.update(self.qs_params) + else: + params.update(share_auth) + if organization and self.organization: + try: + del params["project"] + except KeyError: + pass + params.update({"organization": self.organization}) + return params + + def _add_project(self, payload, include=True): + """Adding project id as attribute when it has been set in the + connection arguments. + + """ + to_string = False + if self.project and include: + # Adding project ID to args if it's not set + if isinstance(payload, str): + payload = json.loads(payload) + to_string = True + if payload.get("project") is None: + payload["project"] = self.project + if to_string: + return json.dumps(payload) + return payload + + def _create(self, url, body, verify=None, organization=None): + """Creates a new remote resource. + + Posts `body` in JSON to `url` to create a new remote resource. + + Returns a BigML resource wrapped in a dictionary that includes: + code: HTTP status code + resource: The resource/id + location: Remote location of the resource + object: The resource itself + error: An error code and message + + """ + code = HTTP_INTERNAL_SERVER_ERROR + resource_id = None + location = None + resource = None + error = { + "status": { + "code": code, + "message": "The resource couldn't be created"}} + + # If a prediction server is in use, the first prediction request might + # return a HTTP_ACCEPTED (202) while the model or ensemble is being + # downloaded. + code = HTTP_ACCEPTED + if verify is None: + verify = self.domain.verify + qs_params = self._add_credentials({}, organization=organization) + qs_str = "?%s" % parse.urlencode(qs_params) if qs_params else "" + body = self._add_project(body, not organization) + while code == HTTP_ACCEPTED: + if GAE_ENABLED: + try: + req_options = { + 'url': url + qs_str, + 'method': urlfetch.POST, + 'headers': SEND_JSON, + 'payload': body, + 'validate_certificate': verify + } + response = urlfetch.fetch(**req_options) + except urlfetch.Error as exception: + LOGGER.error("HTTP request error: %s", + str(exception)) + error["status"]["type"] = c.TRANSIENT + return maybe_save(resource_id, self.storage, code, + location, resource, error) + else: + try: + response = requests.post(url, + params=qs_params, + headers=SEND_JSON, + data=body, verify=verify) + except (requests.ConnectionError, + requests.Timeout, + requests.RequestException) as exc: + LOGGER.error("HTTP request error: %s", str(exc)) + code = HTTP_INTERNAL_SERVER_ERROR + error["status"]["type"] = c.TRANSIENT + return maybe_save(resource_id, self.storage, code, + location, resource, error) + try: + code = response.status_code + if code in [HTTP_CREATED, HTTP_OK]: + if 'location' in response.headers: + location = response.headers['location'] + resource = json_load(response.content) + resource_id = resource.get('resource') + error = None + elif code in [HTTP_BAD_REQUEST, + HTTP_UNAUTHORIZED, + HTTP_PAYMENT_REQUIRED, + HTTP_FORBIDDEN, + HTTP_NOT_FOUND, + HTTP_TOO_MANY_REQUESTS]: + error = json_load(response.content) + LOGGER.error(self.error_message(error, method='create')) + elif code != HTTP_ACCEPTED: + LOGGER.error("CREATE Unexpected error (%s)", code) + code = HTTP_INTERNAL_SERVER_ERROR + except ValueError as exc: + LOGGER.error("Malformed response: %s", str(exc)) + code = HTTP_INTERNAL_SERVER_ERROR + + return maybe_save(resource_id, self.storage, code, + location, resource, error) + + def _get(self, url, query_string='', + shared_username=None, shared_api_key=None, organization=None, + shared_ref=None, resource_id=None): + """Retrieves a remote resource. + + Uses HTTP GET to retrieve a BigML `url`. + + Returns a BigML resource wrapped in a dictionary that includes: + code: HTTP status code + resource: The resource/id + location: Remote location of the resource + object: The resource itself + error: An error code and message + + """ + code = HTTP_INTERNAL_SERVER_ERROR + location = url + resource = None + error = { + "status": { + "code": HTTP_INTERNAL_SERVER_ERROR, + "message": "The resource couldn't be retrieved"}} + + kwargs = {"organization": organization} + if shared_username is not None and shared_api_key is not None: + kwargs.update({"shared_auth": {"username": shared_username, + "api_key": shared_api_key}}) + + qs_params = self._add_credentials({}, **kwargs) + if shared_ref is not None: + qs_params.update({"shared_ref": shared_ref}) + qs_params.update(dict(parse.parse_qsl(query_string))) + qs_str = "?%s" % parse.urlencode(qs_params) if qs_params else "" + if GAE_ENABLED: + try: + req_options = { + 'url': url + qs_str, + 'method': urlfetch.GET, + 'headers': ACCEPT_JSON, + 'validate_certificate': self.domain.verify + } + response = urlfetch.fetch(**req_options) + except urlfetch.Error as exception: + LOGGER.error("HTTP request error: %s", + str(exception)) + error["status"]["type"] = c.TRANSIENT + return maybe_save(resource_id, self.storage, code, + location, resource, error) + else: + try: + response = requests.get(url, params = qs_params, + headers=ACCEPT_JSON, + verify=self.domain.verify) + except (requests.ConnectionError, + requests.Timeout, + requests.RequestException) as exc: + LOGGER.error("HTTP request error: %s", str(exc)) + error["status"]["type"] = c.TRANSIENT + return maybe_save(resource_id, self.storage, code, + location, resource, error) + try: + code = response.status_code + if code == HTTP_OK: + resource = json_load(response.content) + resource_id = resource['resource'] + error = None + elif code in [HTTP_BAD_REQUEST, + HTTP_UNAUTHORIZED, + HTTP_NOT_FOUND, + HTTP_TOO_MANY_REQUESTS]: + error = json_load(response.content) + LOGGER.error(self.error_message(error, method='get', + resource_id=resource_id)) + else: + LOGGER.error("GET Unexpected error (%s)", code) + code = HTTP_INTERNAL_SERVER_ERROR + + except ValueError as exc: + if "output_format" in query_string: + # output can be an xml file that is returned without storing + return response.content + LOGGER.error("Malformed response: %s", str(exc)) + + return maybe_save(resource_id, self.storage, code, + location, resource, error) + + def _list(self, url, query_string='', organization=None): + """Lists all existing remote resources. + + Resources in listings can be filterd using `query_string` formatted + according to the syntax and fields labeled as filterable in the BigML + documentation for each resource. + + Sufixes: + __lt: less than + __lte: less than or equal to + __gt: greater than + __gte: greater than or equal to + + For example: + + 'size__gt=1024' + + Resources can also be sortened including a sort_by statement within + the `query_sting`. For example: + + 'order_by=size' + + """ + code = HTTP_INTERNAL_SERVER_ERROR + meta = None + resources = None + error = { + "status": { + "code": code, + "message": "The resource couldn't be listed"}} + + qs_params = self._add_credentials({}, organization=organization) + qs_params.update(dict(parse.parse_qsl(query_string))) + qs_str = "?%s" % parse.urlencode(qs_params) if qs_params else "" + if GAE_ENABLED: + try: + req_options = { + 'url': url + qs_str, + 'method': urlfetch.GET, + 'headers': ACCEPT_JSON, + 'validate_certificate': self.domain.verify + } + response = urlfetch.fetch(**req_options) + except urlfetch.Error as exception: + LOGGER.error("HTTP request error: %s", + str(exception)) + error["status"]["type"] = c.TRANSIENT + return { + 'code': code, + 'meta': meta, + 'objects': resources, + 'error': error} + else: + try: + response = requests.get(url, params=qs_params, + headers=ACCEPT_JSON, + verify=self.domain.verify) + except (requests.ConnectionError, + requests.Timeout, + requests.RequestException) as exc: + LOGGER.error("HTTP request error: %s", str(exc)) + error["status"]["type"] = c.TRANSIENT + return { + 'code': code, + 'meta': meta, + 'objects': resources, + 'error': error} + try: + code = response.status_code + + if code == HTTP_OK: + resource = json_load(response.content) + meta = resource['meta'] + resources = resource['objects'] + error = None + elif code in [HTTP_BAD_REQUEST, + HTTP_UNAUTHORIZED, + HTTP_NOT_FOUND, + HTTP_TOO_MANY_REQUESTS]: + error = json_load(response.content) + else: + LOGGER.error("LIST Unexpected error (%s)", code) + code = HTTP_INTERNAL_SERVER_ERROR + except ValueError as exc: + LOGGER.error("Malformed response: %s", str(exc)) + + return { + 'code': code, + 'meta': meta, + 'objects': resources, + 'error': error} + + def _update(self, url, body, organization=None, resource_id=None): + """Updates a remote resource. + + Uses PUT to update a BigML resource. Only the new fields that + are going to be updated need to be included in the `body`. + + Returns a resource wrapped in a dictionary: + code: HTTP_ACCEPTED if the update has been OK or an error + code otherwise. + resource: Resource/id + location: Remote location of the resource. + object: The new updated resource + error: Error code if any. None otherwise + + """ + code = HTTP_INTERNAL_SERVER_ERROR + location = url + resource = None + error = { + "status": { + "code": code, + "message": "The resource couldn't be updated"}} + + qs_params = self._add_credentials({}, organization=organization) + qs_str = "?%s" % parse.urlencode(qs_params) if qs_params else "" + body = self._add_project(body, not organization) + if GAE_ENABLED: + try: + req_options = { + 'url': url + qs_str, + 'method': urlfetch.PUT, + 'headers': SEND_JSON, + 'payload': body, + 'validate_certificate': self.domain.verify + } + response = urlfetch.fetch(**req_options) + except urlfetch.Error as exception: + LOGGER.error("HTTP request error: %s", + str(exception)) + error["status"]["type"] = c.TRANSIENT + return maybe_save(resource_id, self.storage, code, + location, resource, error) + else: + try: + response = requests.put(url, + params=qs_params, + headers=SEND_JSON, + data=body, verify=self.domain.verify) + except (requests.ConnectionError, + requests.Timeout, + requests.RequestException) as exc: + LOGGER.error("HTTP request error: %s", str(exc)) + error["status"]["type"] = c.TRANSIENT + return maybe_save(resource_id, self.storage, code, + location, resource, error) + try: + code = response.status_code + if code == HTTP_ACCEPTED: + resource = json_load(response.content) + resource_id = resource['resource'] + error = None + elif code in [HTTP_UNAUTHORIZED, + HTTP_PAYMENT_REQUIRED, + HTTP_METHOD_NOT_ALLOWED, + HTTP_TOO_MANY_REQUESTS]: + error = json_load(response.content) + LOGGER.error(self.error_message(error, method='update', + resource_id=resource_id)) + else: + LOGGER.error("UPDATE Unexpected error (%s)", code) + code = HTTP_INTERNAL_SERVER_ERROR + except ValueError: + LOGGER.error("Malformed response") + + return maybe_save(resource_id, self.storage, code, + location, resource, error) + + def _delete(self, url, query_string='', organization=None, + resource_id=None): + """Permanently deletes a remote resource. + + If the request is successful the status `code` will be HTTP_NO_CONTENT + and `error` will be None. Otherwise, the `code` will be an error code + and `error` will be provide a specific code and explanation. + + """ + code = HTTP_INTERNAL_SERVER_ERROR + error = { + "status": { + "code": code, + "message": "The resource couldn't be deleted"}} + qs_params = self._add_credentials({}, organization=organization) + qs_params.update(dict(parse.parse_qsl(query_string))) + qs_str = "?%s" % parse.urlencode(qs_params) if qs_params else "" + if GAE_ENABLED: + try: + req_options = { + 'url': url + qs_str, + 'method': urlfetch.DELETE, + 'validate_certificate': self.domain.verify + } + response = urlfetch.fetch(**req_options) + except urlfetch.Error as exception: + LOGGER.error("HTTP request error: %s", + str(exception)) + error["status"]["type"] = c.TRANSIENT + return { + 'code': code, + 'resource': resource_id, + 'error': error} + else: + try: + response = requests.delete(url, params=qs_params, + verify=self.domain.verify) + except (requests.ConnectionError, + requests.Timeout, + requests.RequestException) as exc: + LOGGER.error("HTTP request error: %s", str(exc)) + error["status"]["type"] = c.TRANSIENT + return { + 'code': code, + 'resource': resource_id, + 'error': error} + try: + code = response.status_code + + if code == HTTP_NO_CONTENT: + error = None + elif code in [HTTP_BAD_REQUEST, + HTTP_UNAUTHORIZED, + HTTP_NOT_FOUND, + HTTP_TOO_MANY_REQUESTS]: + error = json_load(response.content) + LOGGER.error(self.error_message(error, method='delete')) + else: + LOGGER.error("Unexpected error (%s)", code) + code = HTTP_INTERNAL_SERVER_ERROR + + except ValueError: + LOGGER.error("Malformed response") + + return { + 'code': code, + 'resource': resource_id, + 'error': error} + + def _download(self, url, filename=None, wait_time=10, retries=10, + counter=0): + """Retrieves a remote file. + + Uses HTTP GET to download a file object with a BigML `url`. + """ + code = HTTP_INTERNAL_SERVER_ERROR + file_object = None + + # if retries for the creation and download have been exhausted, + # return None + if counter > 2 * retries: + LOGGER.error("Retries exhausted trying to download the file.") + return file_object + qs_params = self._add_credentials({}) + qs_str = "?%s" % parse.urlencode(qs_params) if qs_params else "" + if GAE_ENABLED: + try: + req_options = { + 'url': url + qs_str, + 'method': urlfetch.GET, + 'validate_certificate': self.domain.verify + } + response = urlfetch.fetch(**req_options) + except urlfetch.Error as exception: + LOGGER.error("HTTP request error: %s", + str(exception)) + return file_object + else: + try: + response = requests.get(url, params=qs_params, + verify=self.domain.verify, + stream=True) + except (requests.ConnectionError, + requests.Timeout, + requests.RequestException) as exc: + LOGGER.error("HTTP request error: %s", str(exc)) + return file_object + try: + code = response.status_code + if code == HTTP_OK: + # starting the dataset export procedure + if response.headers.get("content-type") == JSON_TYPE: + try: + if counter < retries: + download_status = json_load(response.content) + if download_status and isinstance(download_status, + dict): + if download_status['status']['code'] != 5: + time.sleep(get_exponential_wait(wait_time, + counter)) + counter += 1 + return self._download(url, + filename=filename, + wait_time=wait_time, + retries=retries, + counter=counter) + return self._download(url, + filename=filename, + wait_time=wait_time, + retries=retries, + counter=retries + 1) + elif counter == retries: + LOGGER.error("The maximum number of retries " + " for the download has been " + " exceeded. You can retry your " + " command again in" + " a while.") + return None + except ValueError: + LOGGER.error("Failed getting a valid JSON structure.") + else: + # When download starts, content-type is no longer a + # JSON object. + if filename is not None and GAE_ENABLED: + LOGGER.error("No support for downloading" + " to local files in Google App Engine.") + filename = None + if filename is None: + if GAE_ENABLED: + file_object = io.StringIO(response.content) + else: + file_object = response.raw + else: + try: + total_size = int( + response.headers.get("content-length")) + except ValueError: + total_size = None + file_size = stream_copy(response, filename) + if file_size == 0: + LOGGER.error("Error copying file to %s", filename) + else: + file_object = filename + # if transient connection errors prevent the download, + # retry + if total_size is None or file_size < total_size: + LOGGER.error("Error downloading: " + "total size=%s, %s downloaded", + total_size, file_size) + time.sleep(get_exponential_wait(wait_time, + counter)) + return self._download(url, filename=filename, + wait_time=wait_time, + retries=retries, + counter=counter + 1) + elif code in [HTTP_BAD_REQUEST, + HTTP_UNAUTHORIZED, + HTTP_NOT_FOUND, + HTTP_TOO_MANY_REQUESTS]: + error = response.content + LOGGER.error("Error downloading: %s", error) + else: + LOGGER.error("Unexpected error (%s)", code) + code = HTTP_INTERNAL_SERVER_ERROR + except ValueError: + LOGGER.error("Malformed response") + + return file_object + + def _status(self, url, query_string='', organization=None): + """Returns the status of the account. + + + """ + code = HTTP_INTERNAL_SERVER_ERROR + resources = None + error = { + "status": { + "code": code, + "message": "Failed to obtain the account status info"}} + qs_params = self._add_credentials({}, organization=organization) + qs_params.update(dict(parse.parse_qsl(query_string))) + qs_str = "?%s" % parse.urlencode(qs_params) if qs_params else "" + + if GAE_ENABLED: + try: + req_options = { + 'url': url + qs_str, + 'method': urlfetch.GET, + 'headers': ACCEPT_JSON, + 'validate_certificate': self.domain.verify + } + response = urlfetch.fetch(**req_options) + except urlfetch.Error as exception: + LOGGER.error("HTTP request error: %s", + str(exception)) + return { + 'code': code, + 'object': resources, + 'error': error} + else: + try: + response = requests.get(url, params=qs_params, + headers=ACCEPT_JSON, + verify=self.domain.verify) + except (requests.ConnectionError, + requests.Timeout, + requests.RequestException) as exc: + LOGGER.error("HTTP request error: %s", str(exc)) + error["status"]["type"] = c.TRANSIENT + return { + 'code': code, + 'object': resources, + 'error': error} + try: + code = response.status_code + + if code == HTTP_OK: + resource = json_load(response.content) + resources = resource + error = None + elif code in [HTTP_BAD_REQUEST, + HTTP_UNAUTHORIZED, + HTTP_NOT_FOUND, + HTTP_TOO_MANY_REQUESTS]: + error = json_load(response.content) + else: + LOGGER.error("Unexpected error (%s)", code) + code = HTTP_INTERNAL_SERVER_ERROR + except ValueError as exc: + LOGGER.error("Malformed response: %s", str(exc)) + + return { + 'code': code, + 'object': resources, + 'error': error} + + def error_message(self, resource, resource_type='resource', method=None, + resource_id=None): + """Error message for each type of resource + + """ + error = None + error_info = None + if isinstance(resource, dict): + if 'error' in resource: + error_info = resource['error'] + elif ('code' in resource + and 'status' in resource): + error_info = resource + resource_id = resource_id or resource.get("resource") + else: + resource_id = resource_id or resource + if error_info is not None and 'code' in error_info: + code = error_info['code'] + if ('status' in error_info and + 'message' in error_info['status']): + error = error_info['status']['message'] + extra = error_info['status'].get('extra', None) + if extra is not None: + error += ": %s" % extra + if code == HTTP_NOT_FOUND and method == 'get': + alternate_message = '' + if self.domain.general_domain != DEFAULT_DOMAIN: + alternate_message = ( + '- The %s was not created in %s.\n' % ( + resource_type, self.domain.general_domain)) + error += ( + '\nCouldn\'t find a %s matching the given' + ' id (%s) in %s. The most probable causes are:\n\n%s' + '- A typo in the %s\'s id.\n' + '- The %s id cannot be accessed with your credentials' + ' or was not created in %s.\n' + '\nDouble-check your %s and' + ' credentials info and retry.' % ( + resource_type, resource_id, self.domain.general_domain, + alternate_message, resource_type, + resource_type, self.domain.general_domain, + resource_type)) + return error + if code == HTTP_UNAUTHORIZED: + error += ('\nDouble-check your credentials and the general' + ' domain your account is registered with (currently' + ' using %s), please.' % self.domain.general_domain) + return error + if code == HTTP_BAD_REQUEST: + error += '\nDouble-check the arguments for the call, please.' + return error + if code == HTTP_TOO_MANY_REQUESTS: + error += ('\nToo many requests. Please stop ' + ' requests for a while before resuming.') + return error + if code == HTTP_PAYMENT_REQUIRED: + error += ('\nThis operation exceeds your subscription limits.' + ' Please, upgrade your subscription, reduce the ' + 'dataset size or wait for a running task to finish.') + return error + + return "Invalid %s structure:\n\n%s" % (resource_type, resource) diff --git a/bigml/centroid.py b/bigml/centroid.py new file mode 100644 index 00000000..534cb562 --- /dev/null +++ b/bigml/centroid.py @@ -0,0 +1,102 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Centroid structure for the BigML local Cluster + +This module defines an auxiliary Centroid predicate structure that is used +in the cluster. + +""" + +import math +import sys + +INDENT = " " * 4 +STATISTIC_MEASURES = [ + 'Minimum', 'Mean', 'Median', 'Maximum', 'Standard deviation', 'Sum', + 'Sum squares', 'Variance'] + + +def cosine_distance2(terms, centroid_terms, scale): + """Returns the distance defined by cosine similarity + + """ + # Centroid values for the field can be an empty list. + # Then the distance for an empty input is 1 + # (before applying the scale factor). + if not terms and not centroid_terms: + return 0 + if not terms or not centroid_terms: + return scale ** 2 + input_count = 0 + for term in centroid_terms: + if term in terms: + input_count += 1 + cosine_similarity = input_count / math.sqrt( + len(terms) * len(centroid_terms)) + similarity_distance = scale * (1 - cosine_similarity) + return similarity_distance ** 2 + + +class Centroid(): + """A Centroid. + + """ + def __init__(self, centroid_info): + self.center = centroid_info.get('center', {}) + self.count = centroid_info.get('count', 0) + self.centroid_id = centroid_info.get( + 'id', centroid_info.get("centroid_id", None)) + self.name = centroid_info.get('name', None) + self.distance = centroid_info.get('distance', {}) + + def distance2(self, input_data, term_sets, scales, stop_distance2=None): + """Squared Distance from the given input data to the centroid + + """ + distance2 = 0.0 + for field_id, value in list(self.center.items()): + try: + if isinstance(value, list): + # text field + terms = ([] if field_id not in term_sets else + term_sets[field_id]) + distance2 += cosine_distance2(terms, value, scales[field_id]) + elif isinstance(value, str): + if field_id not in input_data or input_data[field_id] != value: + distance2 += 1 * scales[field_id] ** 2 + else: + distance2 += ((input_data[field_id] - value) * + scales[field_id]) ** 2 + if stop_distance2 is not None and distance2 >= stop_distance2: + return None + except: + raise ValueError("Error computing field id %s input %s value %s" % + (field_id, input_data[field_id], value)) + return distance2 + + def print_statistics(self, out=sys.stdout): + """Print the statistics for the training data clustered around the + centroid + + """ + out.write("%s%s:\n" % (INDENT, self.name)) + literal = "%s%s: %s\n" + for measure_title in STATISTIC_MEASURES: + measure = measure_title.lower().replace(" ", "_") + out.write(literal % (INDENT * 2, measure_title, + self.distance[measure])) + out.write("\n") diff --git a/bigml/cluster.py b/bigml/cluster.py new file mode 100644 index 00000000..5739554b --- /dev/null +++ b/bigml/cluster.py @@ -0,0 +1,724 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A local Predictive Cluster. + +This module defines a Cluster to make predictions (centroids) locally or +embedded into your application without needing to send requests to +BigML.io. + +This module can help you enormously to +reduce the latency for each prediction and let you use your clusters +offline. + +Example usage (assuming that you have previously set up the BIGML_USERNAME +and BIGML_API_KEY environment variables and that you own the cluster/id +below): + +from bigml.api import BigML +from bigml.cluster import Cluster + +api = BigML() + +cluster = Cluster('cluster/5026965515526876630001b2') +cluster.centroid({"petal length": 3, "petal width": 1, + "sepal length": 1, "sepal width": 0.5}) + +""" +import logging +import sys +import math +import re +import csv +import codecs + + +from bigml.api import FINISHED +from bigml.api import get_status, get_api_connection, get_cluster_id +from bigml.util import cast, utf8, NUMERIC, use_cache, load, dump, dumps, \ + get_data_format, get_formatted_data, format_data, get_data_transformations +from bigml.centroid import Centroid +from bigml.basemodel import get_resource_dict +from bigml.generators.model import print_distribution +from bigml.predicate import TM_TOKENS, TM_FULL_TERM +from bigml.modelfields import ModelFields +from bigml.io import UnicodeWriter +from bigml.constants import OUT_NEW_FIELDS, OUT_NEW_HEADERS, INTERNAL + + +LOGGER = logging.getLogger('BigML') + +CSV_STATISTICS = ['minimum', 'mean', 'median', 'maximum', 'standard_deviation', + 'sum', 'sum_squares', 'variance'] +INDENT = " " * 4 +INTERCENTROID_MEASURES = [('Minimum', min), + ('Mean', lambda x: sum(x)/float(len(x))), + ('Maximum', max)] +GLOBAL_CLUSTER_LABEL = 'Global' + +DFT_OUTPUTS = ["centroid_name", "distance"] + + +def parse_terms(text, case_sensitive=True): + """Returns the list of parsed terms + + """ + if text is None: + return [] + expression = r'(\b|_)([^\b_\s]+?)(\b|_)' + pattern = re.compile(expression) + return [match[1] if case_sensitive else match[1].lower() + for match in re.findall(pattern, text)] + + +def parse_items(text, regexp): + """Returns the list of parsed items + + """ + if text is None: + return [] + pattern = re.compile(regexp, flags=re.U) + return pattern.split(text) + + +def get_unique_terms(terms, term_forms, tag_cloud): + """Extracts the unique terms that occur in one of the alternative forms in + term_forms or in the tag cloud. + + """ + extend_forms = {} + for term, forms in list(term_forms.items()): + for form in forms: + extend_forms[form] = term + extend_forms[term] = term + terms_set = set() + for term in terms: + if term in tag_cloud: + terms_set.add(term) + elif term in extend_forms: + terms_set.add(extend_forms[term]) + return list(terms_set) + + +def cluster_global_distance(): + """Used to populate the intercentroid distances columns in the CSV + report. For now we don't want to compute real distance and just + display "N/A" + """ + intercentroid_distance = [] + for measure, _ in INTERCENTROID_MEASURES: + intercentroid_distance.append([measure, 'N/A']) + return intercentroid_distance + + +def centroid_features(centroid, field_ids, encode=True): + """Returns features defining the centroid according to the list + of common field ids that define the centroids. + + """ + features = [] + for field_id in field_ids: + value = centroid.center[field_id] + if isinstance(value, str) and encode: + value = utf8(value) + features.append(value) + return features + + +class Cluster(ModelFields): + """ A lightweight wrapper around a cluster model. + + Uses a BigML remote cluster model to build a local version that can be used + to generate centroid predictions locally. + + """ + + def __init__(self, cluster, api=None, cache_get=None): + + self.api = get_api_connection(api) + self.centroids = None + if use_cache(cache_get): + # using a cache to store the cluster attributes + self.__dict__ = load(get_cluster_id(cluster), cache_get) + + for index, centroid in enumerate(self.centroids): + self.centroids[index] = Centroid(centroid) + self.cluster_global = Centroid(self.cluster_global) + return + + self.resource_id = None + self.name = None + self.description = None + self.parent_id = None + self.cluster_global = None + self.total_ss = None + self.within_ss = None + self.between_ss = None + self.ratio_ss = None + self.critical_value = None + self.input_fields = [] + self.default_numeric_value = None + self.summary_fields = [] + self.default_numeric_value = None + self.k = None + self.summary_fields = [] + self.scales = {} + self.term_forms = {} + self.tag_clouds = {} + self.term_analysis = {} + self.item_analysis = {} + self.items = {} + self.datasets = {} + + self.resource_id, cluster = get_resource_dict( \ + cluster, "cluster", api=self.api) + + if 'object' in cluster and isinstance(cluster['object'], dict): + cluster = cluster['object'] + try: + self.parent_id = cluster.get('dataset') + self.name = cluster.get("name") + self.description = cluster.get("description") + except AttributeError: + raise ValueError("Failed to find the expected " + "JSON structure. Check your arguments.") + + if 'clusters' in cluster and isinstance(cluster['clusters'], dict): + status = get_status(cluster) + if 'code' in status and status['code'] == FINISHED: + self.default_numeric_value = cluster.get( \ + "default_numeric_value") + self.summary_fields = cluster.get("summary_fields", []) + self.input_fields = cluster.get("input_fields", []) + self.datasets = cluster.get("cluster_datasets", {}) + the_clusters = cluster['clusters'] + cluster_global = the_clusters.get('global') + clusters = the_clusters['clusters'] + self.centroids = [Centroid(centroid) for centroid in clusters] + self.cluster_global = cluster_global + if cluster_global: + self.cluster_global = Centroid(cluster_global) + # "global" has no "name" and "count" then we set them + self.cluster_global.name = GLOBAL_CLUSTER_LABEL + self.cluster_global.count = \ + self.cluster_global.distance['population'] + self.total_ss = the_clusters.get('total_ss') + self.within_ss = the_clusters.get('within_ss') + if not self.within_ss: + self.within_ss = sum(centroid.distance['sum_squares'] for + centroid in self.centroids) + self.between_ss = the_clusters.get('between_ss') + self.ratio_ss = the_clusters.get('ratio_ss') + self.critical_value = cluster.get('critical_value', None) + self.k = cluster.get('k') + self.scales.update(cluster['scales']) + self.term_forms = {} + self.tag_clouds = {} + self.term_analysis = {} + fields = cluster['clusters']['fields'] + summary_fields = cluster['summary_fields'] + for field_id in summary_fields: + try: + del fields[field_id] + except KeyError: + # clusters retrieved from API will only contain + # model fields + pass + missing_tokens = cluster['clusters'].get('missing_tokens') + ModelFields.__init__(self, fields, + missing_tokens=missing_tokens) + if not all(field_id in self.fields for + field_id in self.scales): + raise Exception("Some fields are missing" + " to generate a local cluster." + " Please, provide a cluster with" + " the complete list of fields.") + else: + raise Exception("The cluster isn't finished yet") + else: + raise Exception("Cannot create the Cluster instance. Could not" + " find the 'clusters' key in the resource:\n\n%s" % + cluster) + + def centroid(self, input_data): + """Returns the id of the nearest centroid + + """ + clean_input_data, unique_terms = self._prepare_for_distance( \ + input_data) + nearest = {'centroid_id': None, 'centroid_name': None, + 'distance': float('inf')} + for centroid in self.centroids: + distance2 = centroid.distance2(clean_input_data, unique_terms, + self.scales, + stop_distance2=nearest['distance']) + if distance2 is not None: + nearest = {'centroid_id': centroid.centroid_id, + 'centroid_name': centroid.name, + 'distance': distance2} + nearest['distance'] = math.sqrt(nearest['distance']) + return nearest + + @property + def is_g_means(self): + """Checks whether the cluster has been created using g-means + + """ + return self.critical_value is not None + + def fill_numeric_defaults(self, input_data): + """Checks whether input data is missing a numeric field and + fills it with the average quantity set in default_numeric_value + + """ + + for field_id, field in self.fields.items(): + if (field_id not in self.summary_fields and \ + field['optype'] == NUMERIC and + field_id not in input_data): + if self.default_numeric_value is None: + raise Exception("Missing values in input data. Input" + " data must contain values for all " + "numeric fields to compute a distance.") + default_value = 0 if self.default_numeric_value == "zero" \ + else field['summary'].get(self.default_numeric_value) + input_data[field_id] = default_value + return input_data + + def get_unique_terms(self, input_data): + """Parses the input data to find the list of unique terms in the + tag cloud + + """ + unique_terms = {} + for field_id in self.term_forms: + if field_id in input_data: + input_data_field = input_data.get(field_id, '') + if isinstance(input_data_field, str): + case_sensitive = self.term_analysis[field_id].get( + 'case_sensitive', True) + token_mode = self.term_analysis[field_id].get( + 'token_mode', 'all') + if token_mode != TM_FULL_TERM: + terms = parse_terms(input_data_field, + case_sensitive=case_sensitive) + else: + terms = [] + if token_mode != TM_TOKENS: + terms.append( + input_data_field if case_sensitive + else input_data_field.lower()) + unique_terms[field_id] = get_unique_terms( + terms, self.fields[field_id]["summary"]["term_forms"], + self.tag_clouds.get(field_id, [])) + else: + unique_terms[field_id] = input_data_field + del input_data[field_id] + # the same for items fields + #pylint: disable=locally-disabled,consider-using-dict-items + for field_id in self.item_analysis: + if field_id in input_data: + input_data_field = input_data.get(field_id, '') + if isinstance(input_data_field, str): + # parsing the items in input_data + separator = self.item_analysis[field_id].get( + 'separator', ' ') + regexp = self.item_analysis[field_id].get( + 'separator_regexp') + if regexp is None: + regexp = r'%s' % re.escape(separator) + terms = parse_items(input_data_field, regexp) + unique_terms[field_id] = get_unique_terms( + terms, {}, + self.items.get(field_id, [])) + else: + unique_terms[field_id] = input_data_field + del input_data[field_id] + + return unique_terms + + def centroids_distance(self, to_centroid): + """Statistic distance information from the given centroid + to the rest of centroids in the cluster + + """ + intercentroid_distance = [] + unique_terms = self.get_unique_terms(to_centroid.center) + distances = [] + for centroid in self.centroids: + if centroid.centroid_id != to_centroid.centroid_id: + distances.append( + math.sqrt( + centroid.distance2(to_centroid.center, + unique_terms, + self.scales))) + for measure, function in INTERCENTROID_MEASURES: + result = function(distances) + intercentroid_distance.append([measure, result]) + return intercentroid_distance + + def _prepare_for_distance(self, input_data): + """Prepares the fields to be able to compute the distance2 + + """ + # Checks and cleans input_data leaving the fields used in the model + # and adding default numeric values if set + norm_input_data = self.filter_input_data(input_data) + # Strips affixes for numeric values and casts to the final field type + cast(norm_input_data, self.fields) + + unique_terms = self.get_unique_terms(norm_input_data) + + return norm_input_data, unique_terms + + def distances2_to_point(self, reference_point, + list_of_points): + """Computes the cluster square of the distance to an arbitrary + reference point for a list of points. + reference_point: (dict) The field values for the point used as + reference + list_of_points: (dict|Centroid) The field values or a Centroid object + which contains these values + + + """ + # Checks and cleans input_data leaving the fields used in the model + reference_point, text_coords = self._prepare_for_distance( \ + reference_point) + reference_point.update(text_coords) + # mimic centroid structure to use it in distance computation + point_info = {"center": reference_point} + reference = Centroid(point_info) + distances = [] + for point in list_of_points: + centroid_id = None + if isinstance(point, Centroid): + centroid_id = point.centroid_id + point = point.center + clean_point, unique_terms = self._prepare_for_distance( \ + point) + if clean_point != reference_point: + result = {"data": point, "distance": reference.distance2( \ + clean_point, unique_terms, self.scales)} + if centroid_id is not None: + result.update({"centroid_id": centroid_id}) + distances.append(result) + return distances + + def points_in_cluster(self, centroid_id): + """Returns the list of data points that fall in one cluster. + + """ + + cluster_datasets = self.datasets + centroid_dataset = cluster_datasets.get(centroid_id) + if centroid_dataset in [None, ""]: + centroid_dataset = self.api.create_dataset( \ + self.resource_id, {"centroid": centroid_id}) + self.datasets[centroid_id] = centroid_dataset[ \ + "resource"].replace("dataset/", "") + self.api.ok(centroid_dataset, raise_on_error=True) + else: + centroid_dataset = self.api.check_resource( \ + "dataset/%s" % centroid_dataset) + # download dataset to compute local predictions + downloaded_data = self.api.download_dataset( \ + centroid_dataset["resource"]) + text_reader = codecs.getreader("utf-8") + downloaded_data = text_reader(downloaded_data) + reader = csv.DictReader(downloaded_data) + points = [] + for row in reader: + points.append(row) + return points + + def closest_in_cluster(self, reference_point, + number_of_points=None, + centroid_id=None): + """Computes the list of data points closer to a reference point. + If no centroid_id information is provided, the points are chosen + from the same cluster as the reference point. + The points are returned in a list, sorted according + to their distance to the reference point. The number_of_points + parameter can be set to truncate the list to a maximum number of + results. The response is a dictionary that contains the + centroid id of the cluster plus the list of points + """ + if centroid_id is not None and centroid_id not in \ + [centroid.centroid_id for centroid in self.centroids]: + raise AttributeError( \ + "Failed to find the provided centroid_id: %s" % centroid_id) + if centroid_id is None: + # finding the reference point cluster's centroid + centroid_info = self.centroid(reference_point) + centroid_id = centroid_info["centroid_id"] + # reading the points that fall in the same cluster + points = self.points_in_cluster(centroid_id) + # computing distance to reference point + points = self.distances2_to_point(reference_point, points) + points = sorted(points, key=lambda x: x["distance"]) + if number_of_points is not None: + points = points[:number_of_points] + for point in points: + point["distance"] = math.sqrt(point["distance"]) + return {"centroid_id": centroid_id, "reference": reference_point, + "closest": points} + + def sorted_centroids(self, reference_point): + """ Gives the list of centroids sorted according to its distance to + an arbitrary reference point. + + """ + close_centroids = self.distances2_to_point( \ + reference_point, self.centroids) + for centroid in close_centroids: + centroid["distance"] = math.sqrt(centroid["distance"]) + centroid["center"] = centroid["data"] + del centroid["data"] + return {"reference": reference_point, + "centroids": sorted(close_centroids, + key=lambda x: x["distance"])} + + def get_data_distribution(self): + """Returns training data distribution + + """ + distribution = [[centroid.name, centroid.count] for centroid in + self.centroids] + return sorted(distribution, key=lambda x: x[0]) + + + def print_global_distribution(self, out=sys.stdout): + """Prints the line Global: 100% ( instances) + + """ + output = "" + if self.cluster_global: + output += (" %s: 100%% (%d instances)\n" % ( + self.cluster_global.name, + self.cluster_global.count)) + out.write(output) + out.flush() + + def print_ss_metrics(self, out=sys.stdout): + """Prints the block of *_ss metrics from the cluster + + """ + ss_metrics = [("total_ss (Total sum of squares)", self.total_ss), + ("within_ss (Total within-cluster sum of the sum " + "of squares)", self.within_ss), + ("between_ss (Between sum of squares)", self.between_ss), + ("ratio_ss (Ratio of sum of squares)", self.ratio_ss)] + output = "" + + for metric in ss_metrics: + if metric[1]: + output += ("%s%s: %5f\n" % (INDENT, metric[0], metric[1])) + + out.write(output) + out.flush() + + def statistics_csv(self, file_name=None): + """Clusters statistic information in CSV format + + """ + rows = [] + writer = None + field_ids = self.centroids[0].center.keys() + headers = ["Centroid_name"] + headers.extend(["%s" % self.fields[field_id]["name"] + for field_id in field_ids]) + headers.extend(["Instances"]) + intercentroids = False + header_complete = False + + + centroids_list = sorted(self.centroids, key=lambda x: x.name) + for centroid in centroids_list: + row = [centroid.name] + row.extend(centroid_features(centroid, field_ids, + encode=False)) + row.append(centroid.count) + if len(self.centroids) > 1: + for measure, result in self.centroids_distance(centroid): + if not intercentroids: + headers.append("%s intercentroid distance" % \ + measure.title()) + row.append(result) + intercentroids = True + for measure, result in centroid.distance.items(): + if measure in CSV_STATISTICS: + if not header_complete: + headers.append("Distance %s" % + measure.lower().replace("_", " ")) + row.append(result) + if not header_complete: + rows.append(headers) + header_complete = True + rows.append(row) + + if self.cluster_global: + row = ["%s" % self.cluster_global.name] + row.extend(centroid_features(self.cluster_global, field_ids, + encode=False)) + row.append(self.cluster_global.count) + if len(self.centroids) > 1: + for measure, result in cluster_global_distance(): + row.append(result) + for measure, result in self.cluster_global.distance.items(): + if measure in CSV_STATISTICS: + row.append(result) + # header is already in rows then insert cluster_global after it + rows.insert(1, row) + + if file_name is None: + return rows + with UnicodeWriter(file_name) as writer: + writer.writerows(rows) + return file_name + + def summarize(self, out=sys.stdout): + """Prints a summary of the cluster info + + """ + report_header = '' + if self.is_g_means: + report_header = \ + 'G-means Cluster (critical_value=%d)' % self.critical_value + else: + report_header = 'K-means Cluster (k=%d)' % self.k + + out.write(report_header + ' with %d centroids\n\n' % + len(self.centroids)) + + out.write("Data distribution:\n") + # "Global" is set as first entry + self.print_global_distribution(out=out) + print_distribution(self.get_data_distribution(), out=out) + out.write("\n") + centroids_list = [self.cluster_global] if self.cluster_global else [] + centroids_list.extend(sorted(self.centroids, key=lambda x: x.name)) + + out.write("Cluster metrics:\n") + self.print_ss_metrics(out=out) + out.write("\n") + + + out.write("Centroids:\n") + for centroid in centroids_list: + out.write(utf8("\n%s%s: " % (INDENT, centroid.name))) + connector = "" + for field_id, value in centroid.center.items(): + if isinstance(value, str): + value = "\"%s\"" % value + out.write(utf8("%s%s: %s" % (connector, + self.fields[field_id]['name'], + value))) + connector = ", " + out.write("\n\n") + + out.write("Distance distribution:\n\n") + for centroid in centroids_list: + centroid.print_statistics(out=out) + out.write("\n") + + if len(self.centroids) > 1: + out.write("Intercentroid distance:\n\n") + centroids_list = (centroids_list[1:] if self.cluster_global else + centroids_list) + for centroid in centroids_list: + out.write(utf8("%sTo centroid: %s\n" % (INDENT, + centroid.name))) + for measure, result in self.centroids_distance(centroid): + out.write("%s%s: %s\n" % (INDENT * 2, measure, result)) + out.write("\n") + + def predict(self, input_data, full=False): + """Method to homogeneize the local models interface for all BigML + models. It returns the centroid method result. + """ + centroid = self.centroid(input_data) + if not full: + return {"centroid_name": centroid["name"]} + return centroid + + def batch_predict(self, input_data_list, outputs=None, **kwargs): + """Creates a batch centroid for a list of inputs using the local + cluster model. Allows to define some output settings to + decide the fields to be added to the input_data (centroid_name, + distance, etc.) and the name that we want to assign to these new + fields. The outputs argument accepts a dictionary with keys + "output_fields", to contain a list of the prediction properties to add + (["centroid_name", "distance"] by default) and "output_headers", to + contain a list of the headers to be used when adding them (identical + to "output_fields" list, by default). + + :param input_data_list: List of input data to be predicted + :type input_data_list: list or Panda's dataframe + :param dict outputs: properties that define the headers and fields to + be added to the input data + :return: the list of input data plus the predicted values + :rtype: list or Panda's dataframe depending on the input type in + input_data_list + + """ + if outputs is None: + outputs = {} + new_fields = outputs.get(OUT_NEW_FIELDS, DFT_OUTPUTS) + new_headers = outputs.get(OUT_NEW_HEADERS, new_fields) + if len(new_fields) > len(new_headers): + new_headers.expand(new_fields[len(new_headers):]) + else: + new_headers = new_headers[0: len(new_fields)] + data_format = get_data_format(input_data_list) + inner_data_list = get_formatted_data(input_data_list, INTERNAL) + for input_data in inner_data_list: + prediction = self.centroid(input_data, **kwargs) + for index, key in enumerate(new_fields): + input_data[new_headers[index]] = prediction[key] + if data_format != INTERNAL: + return format_data(inner_data_list, out_format=data_format) + return inner_data_list + + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + Avoiding to set it in a Mixin to maintain the current dump function. + """ + return get_data_transformations(self.resource_id, self.parent_id) + + def dump(self, output=None, cache_set=None): + """Uses msgpack to serialize the resource object + If cache_set is filled with a cache set method, the method is called + + """ + self_vars = vars(self) + for index, centroid in enumerate(self_vars["centroids"]): + self_vars["centroids"][index] = vars(centroid) + self_vars["cluster_global"] = vars(self_vars["cluster_global"]) + del self_vars["api"] + dump(self_vars, output=output, cache_set=cache_set) + + def dumps(self): + """Uses msgpack to serialize the resource object to a string + + """ + self_vars = vars(self) + for index, centroid in enumerate(self_vars["centroids"]): + self_vars["centroids"][index] = vars(centroid) + self_vars["cluster_global"] = vars(self_vars["cluster_global"]) + del self_vars["api"] + dumps(self_vars) diff --git a/bigml/constants.py b/bigml/constants.py new file mode 100644 index 00000000..5171d557 --- /dev/null +++ b/bigml/constants.py @@ -0,0 +1,360 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Common auxiliary constants for all resources + +""" + +import re + + +# Basic resources +SOURCE_PATH = 'source' +DATASET_PATH = 'dataset' +MODEL_PATH = 'model' +PREDICTION_PATH = 'prediction' +EVALUATION_PATH = 'evaluation' +ENSEMBLE_PATH = 'ensemble' +BATCH_PREDICTION_PATH = 'batchprediction' +CLUSTER_PATH = 'cluster' +CENTROID_PATH = 'centroid' +BATCH_CENTROID_PATH = 'batchcentroid' +ANOMALY_PATH = 'anomaly' +ANOMALY_SCORE_PATH = 'anomalyscore' +BATCH_ANOMALY_SCORE_PATH = 'batchanomalyscore' +PROJECT_PATH = 'project' +SAMPLE_PATH = 'sample' +CORRELATION_PATH = 'correlation' +STATISTICAL_TEST_PATH = 'statisticaltest' +LOGISTIC_REGRESSION_PATH = 'logisticregression' +ASSOCIATION_PATH = 'association' +ASSOCIATION_SET_PATH = 'associationset' +CONFIGURATION_PATH = 'configuration' +TOPIC_MODEL_PATH = 'topicmodel' +TOPIC_DISTRIBUTION_PATH = 'topicdistribution' +BATCH_TOPIC_DISTRIBUTION_PATH = 'batchtopicdistribution' +TIME_SERIES_PATH = 'timeseries' +FORECAST_PATH = 'forecast' +DEEPNET_PATH = 'deepnet' +OPTIML_PATH = 'optiml' +FUSION_PATH = 'fusion' +PCA_PATH = 'pca' +PROJECTION_PATH = 'projection' +BATCH_PROJECTION_PATH = 'batchprojection' +LINEAR_REGRESSION_PATH = 'linearregression' +SCRIPT_PATH = 'script' +EXECUTION_PATH = 'execution' +LIBRARY_PATH = 'library' +STATUS_PATH = 'status' +EXTERNAL_CONNECTOR_PATH = 'externalconnector' + +SUPERVISED_PATHS = [ + MODEL_PATH, + ENSEMBLE_PATH, + LOGISTIC_REGRESSION_PATH, + LINEAR_REGRESSION_PATH, + DEEPNET_PATH, + FUSION_PATH +] +MODELS_PATHS = [ + MODEL_PATH, + ENSEMBLE_PATH, + LOGISTIC_REGRESSION_PATH, + LINEAR_REGRESSION_PATH, + DEEPNET_PATH, + CLUSTER_PATH, + ANOMALY_PATH, + ASSOCIATION_PATH, + TOPIC_MODEL_PATH, + TIME_SERIES_PATH, + FUSION_PATH, + PCA_PATH +] + +CLONABLE_PATHS = [SOURCE_PATH, DATASET_PATH, SCRIPT_PATH] +CLONABLE_PATHS.extend(MODELS_PATHS) + +PMML_MODELS = [ + MODEL_PATH, + LOGISTIC_REGRESSION_PATH, + CLUSTER_PATH, + ASSOCIATION_PATH +] + +# Resource Ids patterns +ID_PATTERN = '[a-f0-9]{24}' +SHARED_PATTERN = '[a-zA-Z0-9]{24,30}' +ID_RE = re.compile(r'^%s$' % ID_PATTERN) +SOURCE_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % (SOURCE_PATH, ID_PATTERN, + SOURCE_PATH, SHARED_PATTERN)) +DATASET_RE = re.compile(r'^(public/)?%s/%s$|^shared/%s/%s$' % ( + DATASET_PATH, ID_PATTERN, DATASET_PATH, SHARED_PATTERN)) +MODEL_RE = re.compile(r'^(public/)?%s/%s$|^shared/%s/%s$' % ( + MODEL_PATH, ID_PATTERN, MODEL_PATH, SHARED_PATTERN)) +PREDICTION_RE = re.compile(r'^%s/%s$' % (PREDICTION_PATH, ID_PATTERN)) +EVALUATION_RE = re.compile(r'^%s/%s$' % (EVALUATION_PATH, ID_PATTERN)) +ENSEMBLE_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % ( + ENSEMBLE_PATH, ID_PATTERN, ENSEMBLE_PATH, SHARED_PATTERN)) +BATCH_PREDICTION_RE = re.compile(r'^%s/%s$' % (BATCH_PREDICTION_PATH, + ID_PATTERN)) +CLUSTER_RE = re.compile(r'^(public/)?%s/%s$|^shared/%s/%s$' % ( + CLUSTER_PATH, ID_PATTERN, CLUSTER_PATH, SHARED_PATTERN)) +CENTROID_RE = re.compile(r'^%s/%s$' % (CENTROID_PATH, ID_PATTERN)) +BATCH_CENTROID_RE = re.compile(r'^%s/%s$' % (BATCH_CENTROID_PATH, + ID_PATTERN)) +ANOMALY_RE = re.compile(r'^(public/)?%s/%s$|^shared/%s/%s$' % ( + ANOMALY_PATH, ID_PATTERN, ANOMALY_PATH, SHARED_PATTERN)) +ANOMALY_SCORE_RE = re.compile(r'^%s/%s$' % (ANOMALY_SCORE_PATH, ID_PATTERN)) +BATCH_ANOMALY_SCORE_RE = re.compile(r'^%s/%s$' % (BATCH_ANOMALY_SCORE_PATH, + ID_PATTERN)) +PROJECT_RE = re.compile(r'^%s/%s$' % (PROJECT_PATH, ID_PATTERN)) +SAMPLE_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % ( + SAMPLE_PATH, ID_PATTERN, SAMPLE_PATH, SHARED_PATTERN)) +CORRELATION_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % ( + CORRELATION_PATH, ID_PATTERN, CORRELATION_PATH, SHARED_PATTERN)) +STATISTICAL_TEST_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % \ + (STATISTICAL_TEST_PATH, ID_PATTERN, STATISTICAL_TEST_PATH, SHARED_PATTERN)) +LOGISTIC_REGRESSION_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % \ + (LOGISTIC_REGRESSION_PATH, ID_PATTERN, + LOGISTIC_REGRESSION_PATH, SHARED_PATTERN)) +ASSOCIATION_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % \ + (ASSOCIATION_PATH, ID_PATTERN, ASSOCIATION_PATH, SHARED_PATTERN)) +ASSOCIATION_SET_RE = re.compile(r'^%s/%s$' % \ + (ASSOCIATION_SET_PATH, ID_PATTERN)) +CONFIGURATION_RE = re.compile(r'^%s/%s$' % \ + (CONFIGURATION_PATH, ID_PATTERN)) +TOPIC_MODEL_RE = re.compile(r'^(public/)?%s/%s$|^shared/%s/%s$' % ( + TOPIC_MODEL_PATH, ID_PATTERN, TOPIC_MODEL_PATH, SHARED_PATTERN)) +TOPIC_DISTRIBUTION_RE = re.compile(r'^(public/)?%s/%s$|^shared/%s/%s$' % ( + TOPIC_DISTRIBUTION_PATH, ID_PATTERN, TOPIC_DISTRIBUTION_PATH, + SHARED_PATTERN)) +BATCH_TOPIC_DISTRIBUTION_RE = re.compile(r'^(public/)?%s/%s$|^shared/%s/%s$' % ( + BATCH_TOPIC_DISTRIBUTION_PATH, ID_PATTERN, BATCH_TOPIC_DISTRIBUTION_PATH, + SHARED_PATTERN)) +TIME_SERIES_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % \ + (TIME_SERIES_PATH, ID_PATTERN, TIME_SERIES_PATH, SHARED_PATTERN)) +FORECAST_RE = re.compile(r'^%s/%s$' % \ + (FORECAST_PATH, ID_PATTERN)) +DEEPNET_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % \ + (DEEPNET_PATH, ID_PATTERN, DEEPNET_PATH, SHARED_PATTERN)) +OPTIML_RE = re.compile(r'^%s/%s$' % \ + (OPTIML_PATH, ID_PATTERN)) +FUSION_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % \ + (FUSION_PATH, ID_PATTERN, FUSION_PATH, SHARED_PATTERN)) +PCA_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % \ + (PCA_PATH, ID_PATTERN, PCA_PATH, SHARED_PATTERN)) +PROJECTION_RE = re.compile(r'^%s/%s$' % (PROJECTION_PATH, ID_PATTERN)) +BATCH_PROJECTION_RE = re.compile(r'^%s/%s$' % (BATCH_PROJECTION_PATH, + ID_PATTERN)) +LINEAR_REGRESSION_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % \ + (LINEAR_REGRESSION_PATH, ID_PATTERN, + LINEAR_REGRESSION_PATH, SHARED_PATTERN)) +SCRIPT_RE = re.compile(r'^(public/)?%s/%s$|^shared/%s/%s$' % \ + (SCRIPT_PATH, ID_PATTERN, SCRIPT_PATH, SHARED_PATTERN)) +EXECUTION_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % \ + (EXECUTION_PATH, ID_PATTERN, EXECUTION_PATH, SHARED_PATTERN)) +LIBRARY_RE = re.compile(r'^%s/%s|^shared/%s/%s$' % \ + (LIBRARY_PATH, ID_PATTERN, LIBRARY_PATH, SHARED_PATTERN)) +EXTERNAL_CONNECTOR_RE = re.compile(r'^%s/%s$' % \ + (EXTERNAL_CONNECTOR_PATH, ID_PATTERN)) + + +RESOURCE_RE = { + SOURCE_PATH: SOURCE_RE, + DATASET_PATH: DATASET_RE, + MODEL_PATH: MODEL_RE, + PREDICTION_PATH: PREDICTION_RE, + EVALUATION_PATH: EVALUATION_RE, + ENSEMBLE_PATH: ENSEMBLE_RE, + BATCH_PREDICTION_PATH: BATCH_PREDICTION_RE, + CLUSTER_PATH: CLUSTER_RE, + CENTROID_PATH: CENTROID_RE, + BATCH_CENTROID_PATH: BATCH_CENTROID_RE, + ANOMALY_PATH: ANOMALY_RE, + ANOMALY_SCORE_PATH: ANOMALY_SCORE_RE, + BATCH_ANOMALY_SCORE_PATH: BATCH_ANOMALY_SCORE_RE, + PROJECT_PATH: PROJECT_RE, + SAMPLE_PATH: SAMPLE_RE, + CORRELATION_PATH: CORRELATION_RE, + STATISTICAL_TEST_PATH: STATISTICAL_TEST_RE, + LOGISTIC_REGRESSION_PATH: LOGISTIC_REGRESSION_RE, + ASSOCIATION_PATH: ASSOCIATION_RE, + ASSOCIATION_SET_PATH: ASSOCIATION_SET_RE, + CONFIGURATION_PATH: CONFIGURATION_RE, + TOPIC_MODEL_PATH: TOPIC_MODEL_RE, + TOPIC_DISTRIBUTION_PATH: TOPIC_DISTRIBUTION_RE, + BATCH_TOPIC_DISTRIBUTION_PATH: BATCH_TOPIC_DISTRIBUTION_RE, + TIME_SERIES_PATH: TIME_SERIES_RE, + FORECAST_PATH: FORECAST_RE, + DEEPNET_PATH: DEEPNET_RE, + OPTIML_PATH: OPTIML_RE, + FUSION_PATH: FUSION_RE, + PCA_PATH: PCA_RE, + PROJECTION_PATH: PROJECTION_RE, + BATCH_PROJECTION_PATH: BATCH_PROJECTION_RE, + LINEAR_REGRESSION_PATH: LINEAR_REGRESSION_RE, + SCRIPT_PATH: SCRIPT_RE, + EXECUTION_PATH: EXECUTION_RE, + LIBRARY_PATH: LIBRARY_RE, + EXTERNAL_CONNECTOR_PATH: EXTERNAL_CONNECTOR_RE} + + +RENAMED_RESOURCES = { + BATCH_PREDICTION_PATH: 'batch_prediction', + BATCH_CENTROID_PATH: 'batch_centroid', + ANOMALY_SCORE_PATH: 'anomaly_score', + BATCH_ANOMALY_SCORE_PATH: 'batch_anomaly_score', + STATISTICAL_TEST_PATH: 'statistical_test', + LOGISTIC_REGRESSION_PATH: 'logistic_regression', + LINEAR_REGRESSION_PATH: 'linear_regression', + ASSOCIATION_SET_PATH: 'association_set', + TOPIC_MODEL_PATH: 'topic_model', + TOPIC_DISTRIBUTION_PATH: 'topic_distribution', + BATCH_TOPIC_DISTRIBUTION_PATH: 'batch_topic_distribution', + TIME_SERIES_PATH: 'time_series', + BATCH_PROJECTION_PATH: 'batch_projection', + EXTERNAL_CONNECTOR_PATH: 'external_connector' +} + +IRREGULAR_PLURALS = { + ANOMALY_PATH: 'anomalies', + BATCH_PREDICTION_PATH: 'batch_predictions', + BATCH_CENTROID_PATH: 'batch_centroids', + ANOMALY_SCORE_PATH: 'anomaly_scores', + BATCH_ANOMALY_SCORE_PATH: 'batch_anomaly_scores', + STATISTICAL_TEST_PATH: 'statistical_tests', + LOGISTIC_REGRESSION_PATH: 'logistic_regressions', + LINEAR_REGRESSION_PATH: 'linear_regressions', + ASSOCIATION_SET_PATH: 'association_sets', + TOPIC_MODEL_PATH: 'topic_models', + TOPIC_DISTRIBUTION_PATH: 'topic_distributions', + TIME_SERIES_PATH: 'time_series', + LIBRARY_PATH: 'libraries', + BATCH_PROJECTION_PATH: 'batch_projections', + EXTERNAL_CONNECTOR_PATH: 'external_connectors' +} + +# Resource status codes +WAITING = 0 +QUEUED = 1 +STARTED = 2 +IN_PROGRESS = 3 +SUMMARIZED = 4 +FINISHED = 5 +UPLOADING = 6 +FAULTY = -1 +UNKNOWN = -2 +RUNNABLE = -3 + +# Minimum query string to get model status +TINY_RESOURCE = "full=false" + +# Filtering only tasks status info +TASKS_QS = "include=subscription,tasks" + +# Minimum query string to get model image fields and status +IMAGE_FIELDS_FILTER = ("optype=image&exclude=summary,objective_summary," + "input_fields,importance,model_fields") + +# Default storage folder +STORAGE = "./storage" + +# label for transient HTTP errors +TRANSIENT = "transient" + +# fields related attributes +RESOURCES_WITH_FIELDS = [SOURCE_PATH, DATASET_PATH, MODEL_PATH, + PREDICTION_PATH, CLUSTER_PATH, ANOMALY_PATH, + SAMPLE_PATH, CORRELATION_PATH, STATISTICAL_TEST_PATH, + LOGISTIC_REGRESSION_PATH, ASSOCIATION_PATH, + TOPIC_MODEL_PATH, ENSEMBLE_PATH, PCA_PATH, + FUSION_PATH, + DEEPNET_PATH, LINEAR_REGRESSION_PATH] +DEFAULT_MISSING_TOKENS = ["", "N/A", "n/a", "NULL", "null", "-", "#DIV/0", + "#REF!", "#NAME?", "NIL", "nil", "NA", "na", + "#VALUE!", "#NULL!", "NaN", "#N/A", "#NUM!", "?"] +FIELDS_PARENT = { \ + "model": "model", + "anomaly": "model", + "cluster": "clusters", + "logisticregression": "logistic_regression", + "linearregression": "linear_regression", + "ensemble": "ensemble", + "deepnet": "deepnet", + "topicmodel": "topic_model", + "association": "associations", + "correlation": "correlations", + "sample": "sample", + "pca": "pca", + "fusion": "fusion", + "timeseries": "timeseries", + "statisticaltest": "statistical_tests", + "dataset": None} +ALL_FIELDS = "limit=-1" +SPECIFIC_EXCLUDES = { \ + "model": ["root"], + "anomaly": ["trees"], + "cluster": ["clusters"], + "logisticregression": ["coefficients"], + "linearregression": ["coefficients"], + "ensemble": ["models"], + "deepnet": ["network"], + "topicmodel": ["topics"], + "association": ["rules", "rules_summary"], + "fusion": ["models"], + "pca": ["pca"], + "timeseries": ["ets_models"]} + +EXTERNAL_CONNECTION_ATTRS = { \ + "BIGML_EXTERNAL_CONN_HOST": "host", + "BIGML_EXTERNAL_CONN_PORT": "port", + "BIGML_EXTERNAL_CONN_USER": "user", + "BIGML_EXTERNAL_CONN_PWD": "password", + "BIGML_EXTERNAL_CONN_DB": "database", + "BIGML_EXTERNAL_CONN_SOURCE": "source"} + + +# missing strategies +LAST_PREDICTION = 0 +PROPORTIONAL = 1 + +# output options in batch predictions +OUT_NEW_FIELDS = "output_fields" +OUT_NEW_HEADERS = "output_headers" + +# input data allowed formats in batch predictions +NUMPY = "numpy" +DATAFRAME = "dataframe" +INTERNAL = "list_of_dicts" + +CATEGORICAL = "categorical" + +IMAGE_EXTENSIONS = ['png', 'jpg', 'jpeg', 'gif', 'tiff', 'tif', 'bmp', + 'webp', 'cur', 'ico', 'pcx', 'psd', 'psb'] + +REGIONS = "regions" +REGION_SCORE_ALIAS = "region_score_threshold" +REGION_SCORE_THRESHOLD = "bounding_box_threshold" +REGIONS_OPERATION_SETTINGS = [ + REGION_SCORE_ALIAS, "iou_threshold", "max_objects"] +DEFAULT_OPERATION_SETTINGS = ["operating_point", "operating_kind"] +DECIMALS = 5 + +IMAGE = "image" +DATETIME = "datetime" +IOU_REMOTE_SETTINGS = {"iou_threshold": 0.2} +TEMP_DIR = "/tmp" +TOP_IMAGE_SIZE = 512 diff --git a/bigml/dataset.py b/bigml/dataset.py new file mode 100644 index 00000000..5c548e61 --- /dev/null +++ b/bigml/dataset.py @@ -0,0 +1,255 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +Class to store Dataset transformations based on the Dataset API response + +""" +import os +import subprocess + +from bigml.fields import Fields, sorted_headers, get_new_fields +from bigml.api import get_api_connection, get_dataset_id, get_status +from bigml.basemodel import get_resource_dict +from bigml.util import DEFAULT_LOCALE, use_cache, cast, load, dump, dumps, \ + sensenet_logging +from bigml.constants import FINISHED +from bigml.flatline import Flatline +from bigml.featurizer import Featurizer + +process = subprocess.Popen(['node -v'], stdout=subprocess.PIPE, shell=True) +out = process.stdout.read() +FLATLINE_READY = out.startswith(b"v") +if FLATLINE_READY: + from bigml.flatline import Flatline + + +#pylint: disable=locally-disabled,bare-except,ungrouped-imports +try: + # bigml-sensenet should be installed for image processing + sensenet_logging() + import sensenet + from bigml.images.featurizers import ImageFeaturizer as Featurizer +except: + pass + + +class Dataset: + """Local representation of a BigML Dataset. It can store a sample of + data whose fields are a subset of the ones defined in the fields + attribute. + """ + + def __init__(self, dataset, api=None, cache_get=None): + if use_cache(cache_get): + #pylint: disable=locally-disabled,access-member-before-definition + self.__dict__ = load(get_dataset_id(dataset), cache_get) + if self.origin_dataset is not None: + self.origin_dataset = Dataset(self.origin_dataset, + api=api, cache_get=cache_get) + self.featurizer = Featurizer(self.in_fields, + self.input_fields, preferred_only=False) + return + + self.resource_id = None + self.name = None + self.description = None + self.rows = None + self.origin_dataset = None + self.parent_id = None + self.in_fields = None + self.out_fields = None + self.description = None + self.locale = None + self.input_fields = None + self.missing_tokens = None + self.fields_obj = None + self.api = get_api_connection(api) + self.cache_get = cache_get + self.featurizer = None + self.transformations = None + + # retrieving dataset information from + self.resource_id, dataset = get_resource_dict( \ + dataset, "dataset", api=self.api, no_check_fields=False) + + if 'object' in dataset and isinstance(dataset['object'], dict): + dataset = dataset['object'] + self.name = dataset.get('name') + self.description = dataset.get('description') + if 'fields' in dataset and isinstance(dataset['fields'], dict): + status = get_status(dataset) + if 'code' in status and status['code'] == FINISHED: + out_fields_obj = Fields(dataset) + self.out_fields = out_fields_obj.fields + self.out_header_names, _ = sorted_headers(out_fields_obj) + self.out_fields = out_fields_obj.fields + self.description = dataset["description"] + self.locale = dataset.get('locale', DEFAULT_LOCALE) + self.missing_tokens = dataset.get('missing_tokens') + self.input_fields = dataset.get('input_fields') + self.rows = dataset.get("rows", 0) + # we extract the generators and names from the "output_fields" + if dataset.get("new_fields"): + new_fields = get_new_fields(dataset.get( + "output_fields", [])) + else: + new_fields = None + origin_dataset = dataset.get("origin_dataset") + if origin_dataset: + self.parent_id = origin_dataset + self.add_transformations(origin_dataset, new_fields) + elif dataset.get("source"): + self.parent_id = dataset.get("source") + self.in_fields = out_fields_obj.fields + self.featurizer = Featurizer(self.in_fields, + self.input_fields, + self.in_fields, + preferred_only=False) + self.fields_obj = Fields(self.in_fields) + self.in_header_names, self.in_header_ids = sorted_headers( + Fields(self.in_fields)) + + def add_transformations(self, origin_dataset, new_fields): + """Adds a new transformation where the new fields provided are + defined + """ + _, origin_dataset = get_resource_dict( + origin_dataset, "dataset", api=self.api) + self.origin_dataset = Dataset(origin_dataset, api=self.api, + cache_get=self.cache_get) + self.in_fields = self.origin_dataset.out_fields + if new_fields: + self.transformations = new_fields + + def get_sample(self, rows_number=32): + """Gets a sample of data representing the dataset """ + sample = self.api.create_sample(self.resource_id) + if self.api.ok(sample): + sample = self.api.get_sample( + sample["resource"], "rows=%s" % rows_number) + return sample.get("object", {}).get("sample", {}).get("rows") + return [] + + def get_inputs_sample(self, rows_number=32): + """Gets a sample of data representing the origin dataset """ + if self.origin_dataset is None: + return [] + return self.origin_dataset.get_sample(rows_number=rows_number) + + def _input_array(self, input_data): + """Transform the dict-like input data into a row """ + + # new_input_data = self.filter_input_data(input_data) + new_input_data = {} + for key, value in input_data.items(): + if key not in self.in_fields: + key = self.fields_obj.fields_by_name.get(key, key) + new_input_data.update({key: value}) + if self.featurizer is not None: + new_input_data = self.featurizer.extend_input(new_input_data) + cast(new_input_data, self.in_fields) + row = [] + for f_id in self.in_header_ids: + row.append(None if not f_id in new_input_data else + new_input_data[f_id]) + return row + + def _transform(self, input_arrays): + """Given a list of inputs that match the origin dataset structure, + apply the Flatline transformations used in the dataset + + """ + new_input_arrays = [] + out_headers = [] + fields = {"fields": self.in_fields} + out_arrays = [] + for transformation in self.transformations: + expr = transformation.get("field") + names = transformation.get("names", []) + out_headers.extend(names) + # evaluating first to raise an alert if the expression is failing + check = Flatline.check_lisp(expr, fields) + if "error" in check: + raise ValueError(check["error"]) + if expr == '(all)': + new_input_arrays = input_arrays.copy() + continue + new_input = Flatline.apply_lisp(expr, input_arrays, self) + for index, _ in enumerate(new_input): + try: + new_input_arrays[index] + except IndexError: + new_input_arrays.append([]) + new_input_arrays[index].extend(new_input[index]) + for index, input_array in enumerate(new_input_arrays): + try: + out_arrays[index] + except IndexError: + out_arrays.append([]) + out_arrays[index].extend(input_array) + return [out_headers, out_arrays] + + + def transform(self, input_data_list): + """Applies the transformations to the given input data and returns + the result. Usually, the input_data_list will contain a single + dictionary, but it can contain a list of them if needed for window + functions. + """ + if self.transformations is None and self.featurizer is None: + return input_data_list + rows = [self._input_array(input_data) for input_data in + input_data_list] + if self.transformations: + if not FLATLINE_READY: + raise ValueError("Nodejs should be installed to handle this" + " dataset's transformations. Please, check" + " the bindings documentation for details.") + out_headers, out_arrays = self._transform(rows) + rows = [dict(zip(out_headers, row)) for row + in out_arrays] + for index, result in enumerate(rows): + rows[index] = {key: value for key, value in result.items() + if value is not None} + else: + rows = [dict(zip(self.out_header_names, row)) for row in rows] + return rows + + def dump(self, output=None, cache_set=None): + """Uses msgpack to serialize the resource object + If cache_set is filled with a cache set method, the method is called + + """ + self_vars = vars(self).copy() + del self_vars["api"] + del self_vars["cache_get"] + self_vars["origin_dataset"] = self_vars["origin_dataset"].resource_id + del self_vars["featurizer"] + del self_vars["fields_obj"] + dump(self_vars, output=output, cache_set=cache_set) + + def dumps(self): + """Uses msgpack to serialize the resource object to a string + + """ + self_vars = vars(self).copy() + del self_vars["api"] + del self_vars["cache_get"] + self_vars["origin_dataset"] = self_vars["origin_dataset"].resource_id + del self_vars["featurizer"] + del self_vars["fields_obj"] + return dumps(self_vars) diff --git a/bigml/deepnet.py b/bigml/deepnet.py new file mode 100644 index 00000000..dbb45dc9 --- /dev/null +++ b/bigml/deepnet.py @@ -0,0 +1,558 @@ +# -*- coding: utf-8 -*- +#pylint: disable=wrong-import-position,ungrouped-imports +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A local Predictive Deepnet. + +This module defines a Deepnet to make predictions locally or +embedded into your application without needing to send requests to +BigML.io. + +This module can help you enormously to +reduce the latency for each prediction and let you use your models +offline. + +You can also visualize your predictive model in IF-THEN rule format +and even generate a python function that implements the model. + +Example usage (assuming that you have previously set up the BIGML_USERNAME +and BIGML_API_KEY environment variables and that you own the model/id below): + +from bigml.api import BigML +from bigml.deepnet import Deepnet + +api = BigML() + +deepnet = Deepnet('deepnet/5026965515526876630001b2') +deepnet.predict({"petal length": 3, "petal width": 1}) + +""" +import os +import warnings + +from functools import cmp_to_key + +from bigml.api import FINISHED +from bigml.api import get_status, get_api_connection, get_deepnet_id +from bigml.util import cast, use_cache, load, get_data_transformations, \ + PRECISION, sensenet_logging +from bigml.basemodel import get_resource_dict, extract_objective +from bigml.modelfields import ModelFields +from bigml.laminar.constants import NUMERIC +from bigml.model import parse_operating_point, sort_categories +from bigml.constants import REGIONS, REGIONS_OPERATION_SETTINGS, \ + DEFAULT_OPERATION_SETTINGS, REGION_SCORE_ALIAS, REGION_SCORE_THRESHOLD, \ + IMAGE, DECIMALS, IOU_REMOTE_SETTINGS + +import bigml.laminar.numpy_ops as net +import bigml.laminar.preprocess_np as pp + +try: + sensenet_logging() + from sensenet.models.wrappers import create_model + from bigml.images.utils import to_relative_coordinates + LAMINAR_VERSION = False +except Exception: + LAMINAR_VERSION = True + + +MEAN = "mean" +STANDARD_DEVIATION = "stdev" + + +def moments(amap): + """Extracts mean and stdev + + """ + return amap[MEAN], amap[STANDARD_DEVIATION] + + +def expand_terms(terms_list, input_terms): + """Builds a list of occurrences for all the available terms + + """ + terms_occurrences = [0.0] * len(terms_list) + for term, occurrences in input_terms: + index = terms_list.index(term) + terms_occurrences[index] = occurrences + return terms_occurrences + + +class Deepnet(ModelFields): + """ A lightweight wrapper around Deepnet model. + + Uses a BigML remote model to build a local version that can be used + to generate predictions locally. + + """ + + def __init__(self, deepnet, api=None, cache_get=None, + operation_settings=None): + """The Deepnet constructor can be given as first argument: + - a deepnet structure + - a deepnet id + - a path to a JSON file containing a deepnet structure + + :param deepnet: The deepnet info or reference + :param api: Connection object that will be used to download the deepnet + info if not locally available + :param cache_get: Get function that handles memory-cached objects + :param operation_settings: Dict object that contains operating options + + The operation_settings will depend on the type of ML problem: + - regressions: no operation_settings allowed + - classifications: operating_point, operating_kind + - regions: bounding_box_threshold, iou_threshold and max_objects + """ + + self.using_laminar = LAMINAR_VERSION + + if use_cache(cache_get): + # using a cache to store the model attributes + self.__dict__ = load(get_deepnet_id(deepnet), cache_get) + self.operation_settings = self._add_operation_settings( + operation_settings) + return + + self.resource_id = None + self.name = None + self.description = None + self.parent_id = None + self.regression = False + self.network = None + self.networks = None + self.input_fields = [] + self.class_names = [] + self.preprocess = [] + self.optimizer = None + self.default_numeric_value = None + self.missing_numerics = False + api = get_api_connection(api) + self.resource_id, deepnet = get_resource_dict( \ + deepnet, "deepnet", api=api) + + if 'object' in deepnet and isinstance(deepnet['object'], dict): + deepnet = deepnet['object'] + try: + self.parent_id = deepnet.get('dataset') + self.name = deepnet.get('name') + self.description = deepnet.get('description') + self.input_fields = deepnet['input_fields'] + self.default_numeric_value = deepnet.get('default_numeric_value') + except (AttributeError, KeyError): + raise ValueError("Failed to find the expected " + "JSON structure. Check your arguments.") + if 'deepnet' in deepnet and isinstance(deepnet['deepnet'], dict): + status = get_status(deepnet) + objective_field = deepnet['objective_fields'] + deepnet_info = deepnet['deepnet'] + if 'code' in status and status['code'] == FINISHED: + self.fields = deepnet_info['fields'] + missing_tokens = deepnet_info.get('missing_tokens') + ModelFields.__init__( + self, self.fields, + objective_id=extract_objective(objective_field), + categories=True, missing_tokens=missing_tokens) + + self.regression = \ + self.fields[self.objective_id]['optype'] == NUMERIC + self.regions = \ + self.fields[self.objective_id]['optype'] == REGIONS + if not self.regression and not self.regions: + # order matters + self.objective_categories = self.categories[ + self.objective_id] + self.class_names = sorted(self.objective_categories) + + self.missing_numerics = deepnet_info.get('missing_numerics', + False) + self.operation_settings = self._add_operation_settings( + operation_settings) + if 'network' in deepnet_info: + network = deepnet_info['network'] + self.network = network + self.networks = network.get('networks', []) + # old deepnets might use the latter option + if self.networks: + self.output_exposition = self.networks[0].get( + "output_exposition") + else: + self.output_exposition = None + self.output_exposition = self.network.get( + "output_exposition", self.output_exposition) + self.preprocess = network.get('preprocess') + self.optimizer = network.get('optimizer', {}) + + if self.regions: + settings = self.operation_settings or {} + settings.update(IOU_REMOTE_SETTINGS) + else: + settings = None + + #pylint: disable=locally-disabled,broad-except + if not self.using_laminar: + try: + self.deepnet = create_model(deepnet, + settings=settings) + except Exception: + # Windows systems can fail to have some libraries + # required to predict complex deepnets with inner + # tree layers. In this case, we revert to the old + # library version iff possible. + self.using_laminar = True + + if self.using_laminar: + if self.regions: + raise ValueError("Failed to find the extra libraries" + " that are compulsory for predicting " + "regions. Please, install them by " + "running \n" + "pip install bigml[images]") + for _, field in self.fields.items(): + if field["optype"] == IMAGE: + raise ValueError("This deepnet cannot be predicted" + " as some required libraries are " + "not available for this OS.") + self.deepnet = None + else: + raise Exception("The deepnet isn't finished yet") + else: + raise Exception("Cannot create the Deepnet instance. Could not" + " find the 'deepnet' key in the resource:\n\n%s" % + deepnet) + + def _add_operation_settings(self, operation_settings): + """Checks and adds the user-given operation settings """ + if operation_settings is None: + return None + if self.regression: + raise ValueError("No operating settings are allowed" + " for regressions") + allowed_settings = REGIONS_OPERATION_SETTINGS if \ + self.regions else DEFAULT_OPERATION_SETTINGS + settings = {setting: operation_settings[setting] for + setting in operation_settings.keys() if setting in + allowed_settings + } + if REGION_SCORE_ALIAS in settings: + settings[REGION_SCORE_THRESHOLD] = settings[ + REGION_SCORE_ALIAS] + del settings[REGION_SCORE_ALIAS] + return settings + + def fill_array(self, input_data, unique_terms): + """ Filling the input array for the network with the data in the + input_data dictionary. Numeric missings are added as a new field + and texts/items are processed. + """ + columns = [] + for field_id in self.input_fields: + # if the field is text or items, we need to expand the field + # in one field per term and get its frequency + if field_id in self.tag_clouds: + terms_occurrences = expand_terms(self.tag_clouds[field_id], + unique_terms.get(field_id, + [])) + columns.extend(terms_occurrences) + elif field_id in self.items: + terms_occurrences = expand_terms(self.items[field_id], + unique_terms.get(field_id, + [])) + columns.extend(terms_occurrences) + elif field_id in self.categories: + category = unique_terms.get(field_id) + if category is not None: + category = category[0][0] + if self.using_laminar: + columns.append([category]) + else: + columns.append(category) + else: + # when missing_numerics is True and the field had missings + # in the training data, then we add a new "is missing?" element + # whose value is 1 or 0 according to whether the field is + # missing or not in the input data + if self.missing_numerics \ + and self.fields[field_id][\ + "summary"].get("missing_count", 0) > 0: + if field_id in input_data: + columns.extend([input_data[field_id], 0.0]) + else: + columns.extend([0.0, 1.0]) + else: + columns.append(input_data.get(field_id)) + if self.using_laminar: + return pp.preprocess(columns, self.preprocess) + return columns + + def predict(self, input_data, operating_point=None, operating_kind=None, + full=False): + """Makes a prediction based on a number of field values. + + input_data: Input data to be predicted + operating_point: In classification models, this is the point of the + ROC curve where the model will be used at. The + operating point can be defined in terms of: + - the positive_class, the class that is important to + predict accurately + - the probability_threshold, + the probability that is stablished + as minimum for the positive_class to be predicted. + The operating_point is then defined as a map with + two attributes, e.g.: + {"positive_class": "Iris-setosa", + "probability_threshold": 0.5} + operating_kind: "probability". Sets the + property that decides the prediction. Used only if + no operating_point is used + full: Boolean that controls whether to include the prediction's + attributes. By default, only the prediction is produced. If set + to True, the rest of available information is added in a + dictionary format. The dictionary keys can be: + - prediction: the prediction value + - probability: prediction's probability + - unused_fields: list of fields in the input data that + are not being used in the model + """ + + # Checks and cleans input_data leaving the fields used in the model + unused_fields = [] + + if self.regions: + # Only a single image file is allowed as input. + # Sensenet predictions are using absolute coordinates, so we need + # to change it to relative and set the decimal precision + prediction = to_relative_coordinates(input_data, + self.deepnet(input_data)) + return {"prediction": prediction} + + norm_input_data = self.filter_input_data( \ + input_data, add_unused_fields=full) + if full: + norm_input_data, unused_fields = norm_input_data + + # Strips affixes for numeric values and casts to the final field type + cast(norm_input_data, self.fields) + + # When operating_point is used, we need the probabilities + # of all possible classes to decide, so se use + # the `predict_probability` method + if operating_point is None and self.operation_settings is not None: + operating_point = self.operation_settings.get("operating_point") + if operating_kind is None and self.operation_settings is not None: + operating_kind = self.operation_settings.get("operating_kind") + + if operating_point: + if self.regression: + raise ValueError("The operating_point argument can only be" + " used in classifications.") + return self.predict_operating( \ + norm_input_data, operating_point=operating_point) + if operating_kind: + if self.regression: + raise ValueError("The operating_point argument can only be" + " used in classifications.") + return self.predict_operating_kind( \ + norm_input_data, operating_kind=operating_kind) + + # Computes text and categorical field expansion + unique_terms = self.get_unique_terms(norm_input_data) + input_array = self.fill_array(norm_input_data, unique_terms) + if self.deepnet is not None: + prediction = list(self.deepnet(input_array)[0]) + # prediction is now a numpy array of probabilities for classification + # and a numpy array with the value for regressions + prediction = self.to_prediction(prediction) + else: + # no tensorflow + if self.networks: + prediction = self.predict_list(input_array) + else: + prediction = self.predict_single(input_array) + if full: + if not isinstance(prediction, dict): + prediction = {"prediction": round(prediction, DECIMALS)} + prediction.update({"unused_fields": unused_fields}) + if "probability" in prediction: + prediction["confidence"] = prediction.get("probability") + else: + if isinstance(prediction, dict): + prediction = prediction["prediction"] + + return prediction + + def predict_single(self, input_array): + """Makes a prediction with a single network + """ + if self.network['trees'] is not None: + input_array = pp.tree_transform(input_array, self.network['trees']) + + return self.to_prediction(self.model_predict(input_array, + self.network)) + + def predict_list(self, input_array): + """Makes predictions with a list of networks + """ + if self.network['trees'] is not None: + input_array_trees = pp.tree_transform(input_array, + self.network['trees']) + youts = [] + for model in self.networks: + if model['trees']: + youts.append(self.model_predict(input_array_trees, model)) + else: + youts.append(self.model_predict(input_array, model)) + + return self.to_prediction(net.sum_and_normalize(youts, + self.regression)) + + def model_predict(self, input_array, model): + """Prediction with one model + + """ + layers = net.init_layers(model['layers']) + y_out = net.propagate(input_array, layers) + if self.regression: + y_mean, y_stdev = moments(self.output_exposition) + y_out = net.destandardize(y_out, y_mean, y_stdev) + return y_out[0][0] + + return y_out + + def to_prediction(self, y_out): + """Structuring prediction in a dictionary output + + """ + if self.regression: + if not self.using_laminar: + y_out = y_out[0] + return float(y_out) + if self.using_laminar: + y_out = y_out[0] + prediction = sorted(enumerate(y_out), key=lambda x: -x[1])[0] + prediction = {"prediction": self.class_names[prediction[0]], + "probability": round(prediction[1], PRECISION), + "distribution": [{"category": category, + "probability": round(y_out[i], + PRECISION)} \ + for i, category in enumerate(self.class_names)]} + + return prediction + + def predict_probability(self, input_data, compact=False): + """Predicts a probability for each possible output class, + based on input values. The input fields must be a dictionary + keyed by field name or field ID. This method is not available for + regions objectives + + :param input_data: Input data to be predicted + :param compact: If False, prediction is returned as a list of maps, one + per class, with the keys "prediction" and "probability" + mapped to the name of the class and it's probability, + respectively. If True, returns a list of probabilities + ordered by the sorted order of the class names. + """ + if self.regions: + raise ValueError("The .predict_probability method cannot be used" + " to predict regions.") + if self.regression: + prediction = self.predict(input_data, full=not compact) + if compact: + return [prediction] + return prediction + distribution = self.predict(input_data, full=True)['distribution'] + distribution.sort(key=lambda x: x['category']) + + if compact: + return [category['probability'] for category in distribution] + return distribution + + def predict_confidence(self, input_data, compact=False): + """Uses probability as a confidence + """ + if compact or self.regression: + return self.predict_probability(input_data, compact=compact) + return [{"category": pred["category"], + "confidence": pred["probability"]} + for pred in self.predict_probability(input_data, + compact=compact)] + + #pylint: disable=locally-disabled,invalid-name + def _sort_predictions(self, a, b, criteria): + """Sorts the categories in the predicted node according to the + given criteria + + """ + if a[criteria] == b[criteria]: + return sort_categories(a, b, self.objective_categories) + return 1 if b[criteria] > a[criteria] else - 1 + + def predict_operating_kind(self, input_data, operating_kind=None): + """Computes the prediction based on a user-given operating kind. + + """ + + kind = operating_kind.lower() + if kind == "probability": + predictions = self.predict_probability(input_data, False) + else: + raise ValueError("Only probability is allowed as operating kind" + " for deepnets.") + predictions.sort( \ + key=cmp_to_key( \ + lambda a, b: self._sort_predictions(a, b, kind))) + prediction = predictions[0] + prediction["prediction"] = prediction["category"] + del prediction["category"] + if "probability" in prediction: + prediction["confidence"] = prediction.get("probability") + return prediction + + def predict_operating(self, input_data, operating_point=None): + """Computes the prediction based on a user-given operating point. + + """ + + kind, threshold, positive_class = parse_operating_point( \ + operating_point, ["probability"], self.class_names, + self.operation_settings) + predictions = self.predict_probability(input_data, False) + position = self.class_names.index(positive_class) + if predictions[position][kind] > threshold: + prediction = predictions[position] + else: + # if the threshold is not met, the alternative class with + # highest probability or confidence is returned + predictions.sort( \ + key=cmp_to_key( \ + lambda a, b: self._sort_predictions(a, b, kind))) + prediction = predictions[0 : 2] + if prediction[0]["category"] == positive_class: + prediction = prediction[1] + else: + prediction = prediction[0] + prediction["prediction"] = prediction["category"] + del prediction["category"] + if "probability" in prediction: + prediction["confidence"] = prediction.get("probability") + return prediction + + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + Avoiding to set it in a Mixin to maintain the current dump function. + """ + return get_data_transformations(self.resource_id, self.parent_id) diff --git a/bigml/domain.py b/bigml/domain.py new file mode 100644 index 00000000..81a26ebc --- /dev/null +++ b/bigml/domain.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Domain class to handle domain assignation for VPCs + +""" +import os + +# Default domain and protocol +DEFAULT_DOMAIN = 'bigml.io' +DEFAULT_PROTOCOL = 'https' +DEFAULT_API_VERSION = 'andromeda' + +# Base Domain +BIGML_DOMAIN = os.environ.get('BIGML_DOMAIN', DEFAULT_DOMAIN) + +# Default API version +BIGML_API_VERSION = os.environ.get('BIGML_API_VERSION', DEFAULT_API_VERSION) + +# Protocol for main server +BIGML_PROTOCOL = os.environ.get('BIGML_PROTOCOL', + DEFAULT_PROTOCOL) + +# SSL Verification +BIGML_SSL_VERIFY = os.environ.get('BIGML_SSL_VERIFY') + +# Domain for prediction server +BIGML_PREDICTION_DOMAIN = os.environ.get('BIGML_PREDICTION_DOMAIN', + BIGML_DOMAIN) + +# Protocol for prediction server +BIGML_PREDICTION_PROTOCOL = os.environ.get('BIGML_PREDICTION_PROTOCOL', + DEFAULT_PROTOCOL) + +# SSL Verification for prediction server +BIGML_PREDICTION_SSL_VERIFY = os.environ.get('BIGML_PREDICTION_SSL_VERIFY') + + +class Domain(): + """A Domain object to store the remote domain information for the API + + The domain that serves the remote resources can be set globally for + all the resources either by setting the BIGML_DOMAIN environment + variable + + export BIGML_DOMAIN=my_VPC.bigml.io + + or can be given in the constructor using the `domain` argument. + + my_domain = Domain("my_VPC.bigml.io") + + You can also specify a separate domain to handle predictions. This can + be set by using the BIGML_PREDICTION_DOMAIN and + BIGML_PREDICTION_PROTOCOL + environment variables + + export BIGML_PREDICTION_DOMAIN=my_prediction_server.bigml.com + export BIGML_PREDICITION_PROTOCOL=https + + or the `prediction_server` and `prediction_protocol` arguments. + + The constructor values will override the environment settings. + """ + + def __init__(self, domain=None, prediction_domain=None, + prediction_protocol=None, protocol=None, verify=None, + prediction_verify=None, api_version=None): + """Domain object constructor. + + @param: domain string Domain name + @param: prediction_domain string Domain for the prediction server + (when different from the general domain) + @param: prediction_protocol string Protocol for prediction server + (when different from the general protocol) + @param: protocol string Protocol for the service + (when different from HTTPS) + @param: verify boolean Sets on/off the SSL verification + @param: prediction_verify boolean Sets on/off the SSL verification + for the prediction server (when different from the general + SSL verification) + @param: api_version string Name of the API version + """ + # Base domain for remote resources + self.general_domain = domain if domain is not None else BIGML_DOMAIN + self.general_protocol = protocol if protocol is not None else \ + BIGML_PROTOCOL + self.api_version = api_version if api_version is not None else \ + BIGML_API_VERSION + # Usually, predictions are served from the same domain + if prediction_domain is None: + if domain is not None: + self.prediction_domain = domain + self.prediction_protocol = protocol if protocol is not None \ + else BIGML_PROTOCOL + else: + self.prediction_domain = BIGML_PREDICTION_DOMAIN + self.prediction_protocol = BIGML_PREDICTION_PROTOCOL + # If the domain for predictions is different from the general domain, + # for instance in high-availability prediction servers + else: + self.prediction_domain = prediction_domain + self.prediction_protocol = prediction_protocol if \ + prediction_protocol is not None else \ + BIGML_PREDICTION_PROTOCOL + + # Check SSL when comming from `bigml.io` subdomains or when forced + # by the external BIGML_SSL_VERIFY environment variable or verify + # arguments + self.verify = None + self.verify_prediction = None + if self.general_protocol == BIGML_PROTOCOL and \ + (verify is not None or BIGML_SSL_VERIFY is not None): + try: + self.verify = verify if verify is not None \ + else bool(int(BIGML_SSL_VERIFY)) + except ValueError: + pass + if self.verify is None: + self.verify = self.general_domain.lower().endswith(DEFAULT_DOMAIN) + if self.prediction_protocol == BIGML_PROTOCOL and \ + (prediction_verify is not None or \ + BIGML_PREDICTION_SSL_VERIFY is not None): + try: + self.verify_prediction = prediction_verify \ + if prediction_verify is not None else \ + bool(int(BIGML_PREDICTION_SSL_VERIFY)) + except ValueError: + pass + + if self.verify_prediction is None: + self.verify_prediction = ( + (self.prediction_domain.lower().endswith(DEFAULT_DOMAIN) and + self.prediction_protocol == DEFAULT_PROTOCOL)) diff --git a/bigml/ensemble.py b/bigml/ensemble.py new file mode 100644 index 00000000..94c96a77 --- /dev/null +++ b/bigml/ensemble.py @@ -0,0 +1,1046 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2012-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""An local Ensemble object. + +This module defines an Ensemble to make predictions locally using its +associated models. + +This module can help you enormously to +reduce the latency for each prediction and let you use your models +offline. + +from bigml.api import BigML +from bigml.ensemble import Ensemble + +# api connection +api = BigML(storage='./storage') + +# creating ensemble +ensemble = api.create_ensemble('dataset/5143a51a37203f2cf7000972') + +# Ensemble object to predict +ensemble = Ensemble(ensemble, api) +ensemble.predict({"petal length": 3, "petal width": 1}) + +""" +import sys +import logging +import gc +import json +import os + +from functools import cmp_to_key +from copy import deepcopy + +from bigml.exceptions import NoRootDecisionTree +from bigml.api import get_ensemble_id, get_model_id, get_api_connection +from bigml.model import Model, parse_operating_point, sort_categories +from bigml.generators.model import print_distribution +from bigml.basemodel import retrieve_resource, ONLY_MODEL, EXCLUDE_FIELDS +from bigml.model import LAST_PREDICTION +from bigml.multivote import MultiVote +from bigml.multivote import PLURALITY_CODE, PROBABILITY_CODE, CONFIDENCE_CODE +from bigml.multimodel import MultiModel +from bigml.basemodel import BaseModel, print_importance, check_local_but_fields +from bigml.modelfields import ModelFields, NUMERIC +from bigml.multivotelist import MultiVoteList +from bigml.tree_utils import add_distribution +from bigml.util import cast, use_cache, load, dump, dumps, \ + get_data_transformations +from bigml.constants import DECIMALS + +BOOSTING = 1 +LOGGER = logging.getLogger('BigML') +OPERATING_POINT_KINDS = ["probability", "confidence", "votes"] + + +def boosted_list_error(boosting): + """The local ensemble cannot be built from a list of boosted models + + """ + if boosting: + raise ValueError("Failed to build the local ensemble. Boosted" + " ensembles cannot be built from a list" + " of boosting models.") + + +class Ensemble(ModelFields): + """A local predictive Ensemble. + + Uses a number of BigML remote models to build an ensemble local version + that can be used to generate predictions locally. + The expected arguments are: + + """ + + #pylint: disable=locally-disabled,broad-except,access-member-before-definition + def __init__(self, ensemble, api=None, max_models=None, cache_get=None, + operation_settings=None): + """ + :param ensemble: ensemble object or id, list of ensemble model + objects or ids or list of ensemble obj and local model + objects (see Model) + :param api: connection object. If None, a new connection object is + instantiated. + :param max_models: integer that limits the number of models instantiated + and held in memory at the same time while predicting. + If None, no limit is set and all the ensemble models + are instantiated and held in memory permanently. + :param cache_get: user-provided function that should return the JSON + information describing the model or the corresponding + Ensemble object. Can be used to read these objects + from a cache storage. + :param operation_settings: Dict object that contains operating options + + """ + self.model_splits = [] + self.multi_model = None + self.api = get_api_connection(api) + self.fields = None + self.class_names = None + self.default_numeric_value = None + if use_cache(cache_get): + # using a cache to store the model attributes + self.__dict__ = load(get_ensemble_id(ensemble), cache_get) + self.api = get_api_connection(api) + self.operation_settings = self._add_operation_settings( + operation_settings) + if len(self.models_splits) == 1: + # retrieve the models from a cache get function + try: + models = [Model(model_id, cache_get=cache_get, + operation_settings=operation_settings) + for model_id + in self.models_splits[0]] + except Exception as exc: + raise Exception('Error while calling the user-given' + ' function %s: %s' % + (cache_get.__name__, str(exc))) + self.multi_model = MultiModel( + models, + self.api, + fields=self.fields, + class_names=self.class_names, + cache_get=cache_get, + operation_settings=operation_settings) + return + + self.resource_id = None + self.name = None + self.description = None + self.parent_id = None + self.objective_id = None + self.distributions = None + self.distribution = None + self.boosting = None + self.boosting_offsets = None + self.cache_get = None + self.regression = False + self.importance = {} + query_string = ONLY_MODEL + no_check_fields = False + self.input_fields = [] + child_api = self.api + + + models = [] + if isinstance(ensemble, list): + try: + if isinstance(ensemble[0], dict) and \ + get_ensemble_id(ensemble[0]): + number_of_models = len(ensemble) - 1 + model_list = ensemble + ensemble = model_list[0] + if len(ensemble["object"]["models"]) == number_of_models: + model_list = model_list[1:] + else: + raise ValueError("The provided list of models does not" + " match the ensemble list of models.") + try: + models = [Model( + model, operation_settings=operation_settings) + for model in model_list] + except Exception: + models = model_list + else: + # only list of models (old ensembles) + models = ensemble + ensemble=None + except ValueError: + # only list of models (old ensembles) + models = ensemble + ensemble = None + if models: + if all(isinstance(model, Model) for model in models): + self.model_ids = [local_model.resource_id for local_model in + models] + else: + try: + models = [get_model_id(model) for model in models] + self.model_ids = models + except ValueError as exc: + raise ValueError('Failed to verify the list of models.' + ' Check your model id values: %s' % + str(exc)) + if ensemble: + ensemble = self.get_ensemble_resource(ensemble) + self.resource_id = get_ensemble_id(ensemble) + shared_ref = self.resource_id.replace("shared/", "") if \ + self.resource_id.startswith("shared/") else None + if shared_ref is not None: + child_api = deepcopy(self.api) + child_api.shared_ref = shared_ref + elif hasattr(self.api, "shared_ref") and \ + self.api.shared_ref is not None: + child_api = deepcopy(self.api) + # adding the resource ID to the sharing chain + child_api.shared_ref += ",%s" % self.resource_id + + if not check_local_but_fields(ensemble): + # avoid checking fields because of old ensembles + ensemble = retrieve_resource(self.api, self.resource_id, + no_check_fields=True) + self.parent_id = ensemble.get('object', {}).get('dataset') + self.name = ensemble.get('object', {}).get('name') + self.description = ensemble.get('object', {}).get('description') + if ensemble['object'].get('type') == BOOSTING: + self.boosting = ensemble['object'].get('boosting') + self.distributions = ensemble['object'].get('distributions', []) + self.importance = ensemble['object'].get('importance', []) + self.model_ids = ensemble['object']['models'] + if not models: + models = self.model_ids + # new ensembles have the fields structure + if ensemble['object'].get('ensemble'): + self.fields = ensemble['object'].get( \ + 'ensemble', {}).get("fields") + self.objective_id = ensemble['object'].get("objective_field") + query_string = EXCLUDE_FIELDS + no_check_fields = True + self.input_fields = ensemble['object'].get('input_fields') + self.default_numeric_value = ensemble.get('default_numeric_value') + + number_of_models = len(models) + if max_models is None: + self.models_splits = [models] + else: + self.models_splits = [models[index:(index + max_models)] for index + in range(0, number_of_models, max_models)] + if len(self.models_splits) == 1: + if not isinstance(models[0], Model): + if use_cache(cache_get): + # retrieve the models from a cache get function + try: + models = [Model(model_id, cache_get=cache_get, + operation_settings=operation_settings) + for model_id + in self.models_splits[0]] + self.cache_get = cache_get + except Exception as exc: + raise Exception('Error while calling the user-given' + ' function %s: %s' % + (cache_get.__name__, str(exc))) + else: + models = [retrieve_resource( \ + child_api, + model_id, + query_string=query_string, + no_check_fields=no_check_fields) + for model_id in self.models_splits[0]] + model = models[0] + + else: + # only retrieving first model + self.cache_get = cache_get + if not isinstance(models[0], Model): + if use_cache(cache_get): + # retrieve the models from a cache get function + try: + model = Model(self.models_splits[0][0], + cache_get=cache_get, + operation_settings=operation_settings) + self.cache_get = cache_get + except Exception as exc: + raise Exception('Error while calling the user-given' + ' function %s: %s' % + (cache_get.__name__, str(exc))) + else: + model = retrieve_resource( \ + child_api, + self.models_splits[0][0], + query_string=query_string, + no_check_fields=no_check_fields) + + models = [model] + + if self.distributions is None: + try: + self.distributions = [] + for model in models: + self.distributions.append({ + 'training': model.root_distribution + }) + except AttributeError: + self.distributions = [model['object']['model']['distribution'] + for model in models] + + if self.boosting is None: + self._add_models_attrs(model, max_models) + + if self.fields is None: + self.fields, self.objective_id = self.all_model_fields( + max_models=max_models) + + if self.fields: + add_distribution(self) + self.regression = \ + self.fields[self.objective_id].get('optype') == NUMERIC + if self.boosting: + self.boosting_offsets = ensemble['object'].get('initial_offset', + 0) \ + if self.regression else dict(ensemble['object'].get( \ + 'initial_offsets', [])) + if not self.regression: + try: + objective_field = self.fields[self.objective_id] + categories = objective_field['summary']['categories'] + classes = [category[0] for category in categories] + except (AttributeError, KeyError): + classes = set() + for distribution in self.distributions: + for category in distribution['training']['categories']: + classes.add(category[0]) + + self.class_names = sorted(classes) + self.objective_categories = [category for \ + category, _ in self.fields[self.objective_id][ \ + "summary"]["categories"]] + + ModelFields.__init__( \ + self, self.fields, + objective_id=self.objective_id) + + if len(self.models_splits) == 1: + self.multi_model = MultiModel( + models, + self.api, + fields=self.fields, + class_names=self.class_names, + operation_settings=operation_settings) + for index, model in enumerate(self.multi_model.models): + self.multi_model.models[index].term_forms = self.term_forms + + def _add_models_attrs(self, model, max_models=None): + """ Adds the boosting and fields info when the ensemble is built from + a list of models. They can be either Model objects + or the model dictionary info structure. + + """ + if isinstance(model, Model): + self.boosting = model.boosting + boosted_list_error(self.boosting) + self.objective_id = model.objective_id + else: + if model['object'].get('boosted_ensemble'): + self.boosting = model['object']['boosting'] + boosted_list_error(self.boosting) + if self.fields is None: + self.fields, _ = self.all_model_fields( \ + max_models=max_models) + self.objective_id = model['object']['objective_field'] + + def get_ensemble_resource(self, ensemble): + """Extracts the ensemble resource info. The ensemble argument can be + - a path to a local file + - an ensemble id + """ + # the string can be a path to a JSON file + if isinstance(ensemble, str): + try: + path = os.path.dirname(os.path.abspath(ensemble)) + with open(ensemble) as ensemble_file: + ensemble = json.load(ensemble_file) + self.resource_id = get_ensemble_id(ensemble) + if self.resource_id is None: + raise ValueError("The JSON file does not seem" + " to contain a valid BigML ensemble" + " representation.") + self.api.storage = path + except IOError: + # if it is not a path, it can be an ensemble id + self.resource_id = get_ensemble_id(ensemble) + if self.resource_id is None: + if ensemble.find('ensemble/') > -1: + raise Exception( + self.api.error_message(ensemble, + resource_type='ensemble', + method='get')) + raise IOError("Failed to open the expected JSON file" + " at %s" % ensemble) + except ValueError: + raise ValueError("Failed to interpret %s." + " JSON file expected.") + return ensemble + + def list_models(self): + """Lists all the model/ids that compound the ensemble. + + """ + return self.model_ids + + def predict_probability(self, input_data, + missing_strategy=LAST_PREDICTION, + compact=False): + + """For classification models, Predicts a probability for + each possible output class, based on input values. The input + fields must be a dictionary keyed by field name or field ID. + + For regressions, the output is a single element list + containing the prediction. + + :param input_data: Input data to be predicted + :param missing_strategy: LAST_PREDICTION|PROPORTIONAL missing strategy + for missing fields + :param compact: If False, prediction is returned as a list of maps, one + per class, with the keys "prediction" and "probability" + mapped to the name of the class and it's probability, + respectively. If True, returns a list of probabilities + ordered by the sorted order of the class names. + """ + if self.regression: + prediction = self.predict(input_data, + method=PROBABILITY_CODE, + missing_strategy=missing_strategy, + full=not compact) + + if compact: + output = [prediction] + else: + output = prediction + elif self.boosting is not None: + probabilities = self.predict(input_data, + method=PLURALITY_CODE, + missing_strategy=missing_strategy, + full=True)['probabilities'] + + probabilities.sort(key=lambda x: x['category']) + + if compact: + output = [probability['probability'] + for probability in probabilities] + else: + output = probabilities + else: + output = self._combine_distributions( \ + input_data, + missing_strategy) + + if not compact: + names_probabilities = list(zip(self.class_names, output)) + output = [{'category': class_name, + 'probability': probability} + for class_name, probability in names_probabilities] + + return output + + def predict_confidence(self, input_data, + missing_strategy=LAST_PREDICTION, + compact=False): + + """For classification models, Predicts a confidence for + each possible output class, based on input values. The input + fields must be a dictionary keyed by field name or field ID. + + For regressions, the output is a single element list + containing the prediction. + + :param input_data: Input data to be predicted + :param missing_strategy: LAST_PREDICTION|PROPORTIONAL missing strategy + for missing fields + :param compact: If False, prediction is returned as a list of maps, one + per class, with the keys "prediction" and "probability" + mapped to the name of the class and it's probability, + respectively. If True, returns a list of probabilities + ordered by the sorted order of the class names. + """ + + if self.boosting: + # we use boosting probabilities as confidences also + return self.predict_probability( \ + input_data, + missing_strategy=missing_strategy, + compact=compact) + if self.regression: + prediction = self.predict(input_data, method=CONFIDENCE_CODE, + missing_strategy=missing_strategy, + full=not compact) + if compact: + output = [prediction] + else: + output = prediction + else: + output = self._combine_distributions( \ + input_data, + missing_strategy, + method=CONFIDENCE_CODE) + if not compact: + names_confidences = list(zip(self.class_names, output)) + output = [{'category': class_name, + 'confidence': confidence} + for class_name, confidence in names_confidences] + + return output + + def predict_votes(self, input_data, + missing_strategy=LAST_PREDICTION, + compact=False): + + """For classification models, Predicts the votes for + each possible output class, based on input values. The input + fields must be a dictionary keyed by field name or field ID. + + For regressions, the output is a single element list + containing the prediction. + + :param input_data: Input data to be predicted + :param missing_strategy: LAST_PREDICTION|PROPORTIONAL missing strategy + for missing fields + :param compact: If False, prediction is returned as a list of maps, one + per class, with the keys "prediction" and "probability" + mapped to the name of the class and it's probability, + respectively. If True, returns a list of probabilities + ordered by the sorted order of the class names. + """ + if self.regression: + prediction = self.predict(input_data, + method=PLURALITY_CODE, + missing_strategy=missing_strategy, + full=not compact) + + if compact: + output = [prediction] + else: + output = prediction + elif self.boosting is not None: + raise ValueError("Votes cannot be computed for boosted" + " ensembles.") + else: + output = self._combine_distributions( \ + input_data, + missing_strategy, + method=PLURALITY_CODE) + if not compact: + names_votes = list(zip(self.class_names, output)) + output = [{'category': class_name, + 'votes': k} + for class_name, k in names_votes] + + return output + + def _combine_distributions(self, input_data, missing_strategy, + method=PROBABILITY_CODE): + """Computes the predicted distributions and combines them to give the + final predicted distribution. Depending on the method parameter + probability, votes or the confidence are used to weight the models. + + """ + + if len(self.models_splits) > 1: + # If there's more than one chunk of models, they must be + # sequentially used to generate the votes for the prediction + votes = MultiVoteList([]) + + for models_split in self.models_splits: + models = self._get_models(models_split) + multi_model = MultiModel(models, + api=self.api, + fields=self.fields, + class_names=self.class_names) + for index, _ in enumerate(multi_model.models): + multi_model.models[index].term_forms = self.term_forms + + votes_split = multi_model.generate_votes_distribution( \ + input_data, + missing_strategy=missing_strategy, + method=method) + votes.extend(votes_split) + else: + # When only one group of models is found you use the + # corresponding multimodel to predict + votes = self.multi_model.generate_votes_distribution( \ + input_data, + missing_strategy=missing_strategy, method=method) + + return votes.combine_to_distribution(normalize=False) + + def _get_models(self, models_split): + if not isinstance(models_split[0], Model): + if self.cache_get is not None and \ + hasattr(self.cache_get, '__call__'): + # retrieve the models from a cache get function + try: + models = [self.cache_get(model_id) for model_id + in models_split] + except Exception as exc: + raise Exception('Error while calling the ' + 'user-given' + ' function %s: %s' % + (self.cache_get.__name__, + str(exc))) + else: + models = [retrieve_resource(self.api, model_id, + query_string=ONLY_MODEL) + for model_id in models_split] + + return models + + #pylint: disable=locally-disabled,invalid-name + def _sort_predictions(self, a, b, criteria): + """Sorts the categories in the predicted node according to the + given criteria + + """ + if a[criteria] == b[criteria]: + return sort_categories(a, b, self.objective_categories) + return 1 if b[criteria] > a[criteria] else - 1 + + def predict_operating(self, input_data, + missing_strategy=LAST_PREDICTION, + operating_point=None): + """Computes the prediction based on a user-given operating point. + + """ + kind, threshold, positive_class = parse_operating_point( \ + operating_point, OPERATING_POINT_KINDS, + self.class_names, self.operation_settings) + + try: + predict_method = None + predict_method = getattr(self, "predict_%s" % kind) + + predictions = predict_method(input_data, + missing_strategy, False) + position = self.class_names.index(positive_class) + except KeyError: + raise ValueError("The operating point needs to contain a valid" + " positive class, kind and a threshold.") + + if self.regression: + prediction = predictions + else: + position = self.class_names.index(positive_class) + if predictions[position][kind] > threshold: + prediction = predictions[position] + else: + # if the threshold is not met, the alternative class with + # highest probability or confidence is returned + predictions.sort( \ + key=cmp_to_key( \ + lambda a, b: self._sort_predictions(a, b, kind))) + prediction = predictions[0: 2] + if prediction[0]["category"] == positive_class: + prediction = prediction[1] + else: + prediction = prediction[0] + prediction["prediction"] = prediction["category"] + del prediction["category"] + return prediction + + def predict_operating_kind(self, input_data, + missing_strategy=LAST_PREDICTION, + operating_kind=None): + """Computes the prediction based on a user-given operating kind, + i.e, confidence, probability or votes. + + """ + + kind = operating_kind.lower() + if self.boosting and kind != "probability": + raise ValueError("Only probability is allowed as operating kind" + " for boosted ensembles.") + if kind not in OPERATING_POINT_KINDS: + raise ValueError("Allowed operating kinds are %s. %s found." % + (", ".join(OPERATING_POINT_KINDS), kind)) + + try: + predict_method = None + predict_method = getattr(self, "predict_%s" % kind) + + predictions = predict_method(input_data, + missing_strategy, False) + except KeyError: + raise ValueError("The operating kind needs to contain a valid" + " property.") + + if self.regression: + prediction = predictions + else: + predictions.sort( \ + key=cmp_to_key( \ + lambda a, b: self._sort_predictions(a, b, kind))) + prediction = predictions[0] + prediction["prediction"] = prediction["category"] + del prediction["category"] + return prediction + + #pylint: disable=locally-disabled,protected-access + def predict(self, input_data, method=None, + options=None, missing_strategy=LAST_PREDICTION, + operating_point=None, operating_kind=None, median=False, + full=False): + """Makes a prediction based on the prediction made by every model. + + :param input_data: Test data to be used as input + :param method: **deprecated**. Please check the `operating_kind` + attribute. Numeric key code for the following + combination methods in classifications/regressions: + 0 - majority vote (plurality)/ average: PLURALITY_CODE + 1 - confidence weighted majority vote / error weighted: + CONFIDENCE_CODE + 2 - probability weighted majority vote / average: + PROBABILITY_CODE + 3 - threshold filtered vote / doesn't apply: + THRESHOLD_CODE + :param options: Options to be used in threshold filtered votes. + :param missing_strategy: numeric key for the individual model's + prediction method. See the model predict + method. + :param operating_point: In classification models, this is the point of + the ROC curve where the model will be used at. + The operating point can be defined in terms of: + - the positive_class, the class that is + important to predict accurately + - its kind: probability, confidence or voting + - its threshold: the minimum established + for the positive_class to be predicted. + The operating_point is then defined as a + map with three attributes, e.g.: + {"positive_class": "Iris-setosa", + "kind": "probability", + "threshold": 0.5} + :param operating_kind: "probability", "confidence" or "votes". Sets the + property that decides the prediction. + Used only if no operating_point is used + :param median: Uses the median of each individual model's predicted + node as individual prediction for the specified + combination method. + :param full: Boolean that controls whether to include the prediction's + attributes. By default, only the prediction is produced. + If set to True, the rest of available information is + added in a dictionary format. The dictionary keys can be: + - prediction: the prediction value + - confidence: prediction's confidence + - probability: prediction's probability + - path: rules that lead to the prediction + - count: number of training instances supporting the + prediction + - next: field to check in the next split + - min: minim value of the training instances in the + predicted node + - max: maximum value of the training instances in the + predicted node + - median: median of the values of the training instances + in the predicted node + - unused_fields: list of fields in the input data that + are not being used in the model + """ + + # Checks and cleans input_data leaving the fields used in the model + norm_input_data = self.filter_input_data( \ + input_data, + add_unused_fields=full) + unused_fields = None + if full: + norm_input_data, unused_fields = norm_input_data + + # Strips affixes for numeric values and casts to the final field type + cast(norm_input_data, self.fields) + + if median and method is None: + # predictions with median are only available with old combiners + method = PLURALITY_CODE + if operating_point is None and self.operation_settings is not None: + operating_point = self.operation_settings.get("operating_point") + if operating_kind is None and self.operation_settings is not None: + operating_kind = self.operation_settings.get("operating_kind") + + if method is None and operating_point is None and \ + operating_kind is None and not median: + # operating_point has precedence over operating_kind. If no + # combiner is set, default operating kind is "probability" + operating_kind = "probability" + + if operating_point: + if self.regression: + raise ValueError("The operating_point argument can only be" + " used in classifications.") + prediction = self.predict_operating( \ + norm_input_data, + missing_strategy=missing_strategy, + operating_point=operating_point) + if full: + return prediction + return prediction["prediction"] + + if operating_kind: + if self.regression: + # for regressions, operating_kind defaults to the old + # combiners + method = 1 if operating_kind == "confidence" else 0 + return self.predict( \ + norm_input_data, method=method, + options=options, missing_strategy=missing_strategy, + operating_point=None, operating_kind=None, full=full) + prediction = self.predict_operating_kind( \ + norm_input_data, + missing_strategy=missing_strategy, + operating_kind=operating_kind) + return prediction + + if len(self.models_splits) > 1: + # If there's more than one chunk of models, they must be + # sequentially used to generate the votes for the prediction + votes = MultiVote([], boosting_offsets=self.boosting_offsets) + + for models_split in self.models_splits: + models = self._get_models(models_split) + multi_model = MultiModel(models, + api=self.api, + fields=self.fields) + for index, _ in enumerate(multi_model.models): + multi_model.models[index].term_forms = self.term_forms + + votes_split = multi_model._generate_votes( + norm_input_data, + missing_strategy=missing_strategy, + unused_fields=unused_fields) + if median: + for prediction in votes_split.predictions: + prediction['prediction'] = prediction['median'] + votes.extend(votes_split.predictions) + else: + # When only one group of models is found you use the + # corresponding multimodel to predict + votes_split = self.multi_model._generate_votes( + norm_input_data, missing_strategy=missing_strategy, + unused_fields=unused_fields) + + votes = MultiVote(votes_split.predictions, + boosting_offsets=self.boosting_offsets) + if median: + for prediction in votes.predictions: + prediction['prediction'] = prediction['median'] + + if self.boosting is not None and not self.regression: + categories = [ \ + d[0] for d in + self.fields[self.objective_id]["summary"]["categories"]] + options = {"categories": categories} + result = votes.combine(method=method, options=options, full=full) + if full: + unused_fields = set(norm_input_data.keys()) + for prediction in votes.predictions: + unused_fields = unused_fields.intersection( \ + set(prediction.get("unused_fields", []))) + if not isinstance(result, dict): + result = {"prediction": round(result, DECIMALS)} + if "probability" in result and "confidence" not in result: + result["confidence"] = result["probability"] + result['unused_fields'] = list(unused_fields) + + return result + + def field_importance_data(self): + """Computes field importance based on the field importance information + of the individual models in the ensemble. + + """ + field_importance = {} + field_names = {} + if self.importance: + field_importance = self.importance + field_names = {field_id: {'name': self.fields[field_id]["name"]} \ + for field_id in list(field_importance.keys())} + return [list(importance) for importance in \ + sorted(list(field_importance.items()), key=lambda x: x[1], + reverse=True)], field_names + + if (self.distributions is not None and + isinstance(self.distributions, list) and + all('importance' in item for item in self.distributions)): + # Extracts importance from ensemble information + importances = [model_info['importance'] for model_info in + self.distributions] + for model_info in importances: + for field_info in model_info: + field_id = field_info[0] + if field_id not in field_importance: + field_importance[field_id] = 0.0 + name = self.fields[field_id]['name'] + field_names[field_id] = {'name': name} + field_importance[field_id] += field_info[1] + else: + # Old ensembles, extracts importance from model information + for model_id in self.model_ids: + local_model = BaseModel(model_id, api=self.api) + for field_info in local_model.field_importance: + field_id = field_info[0] + if field_info[0] not in field_importance: + field_importance[field_id] = 0.0 + name = self.fields[field_id]['name'] + field_names[field_id] = {'name': name} + field_importance[field_id] += field_info[1] + + number_of_models = len(self.model_ids) + for field_id in field_importance: + field_importance[field_id] /= number_of_models + return [list(importance) for importance in \ + sorted(list(field_importance.items()), key=lambda x: x[1], + reverse=True)], field_names + + def print_importance(self, out=sys.stdout): + """Prints ensemble field importance + + """ + print_importance(self, out=out) + + def get_data_distribution(self, distribution_type="training"): + """Returns the required data distribution by adding the distributions + in the models + + """ + ensemble_distribution = [] + categories = [] + distribution = [] + # ensembles have now the field information + if self.distribution and self.boosting: + return sorted(self.distribution, key=lambda x: x[0]) + + for model_distribution in self.distributions: + summary = model_distribution[distribution_type] + if 'bins' in summary: + distribution = summary['bins'] + elif 'counts' in summary: + distribution = summary['counts'] + elif 'categories' in summary: + distribution = summary['categories'] + else: + distribution = [] + for point, instances in distribution: + if point in categories: + ensemble_distribution[ + categories.index(point)][1] += instances + else: + categories.append(point) + ensemble_distribution.append([point, instances]) + + return sorted(ensemble_distribution, key=lambda x: x[0]) + + def summarize(self, out=sys.stdout): + """Prints ensemble summary. Only field importance at present. + + """ + distribution = self.get_data_distribution("training") + + if distribution: + out.write("Data distribution:\n") + print_distribution(distribution, out=out) + out.write("\n\n") + + if not self.boosting: + predictions = self.get_data_distribution("predictions") + + if predictions: + out.write("Predicted distribution:\n") + print_distribution(predictions, out=out) + out.write("\n\n") + + out.write("Field importance:\n") + self.print_importance(out=out) + out.flush() + + def all_model_fields(self, max_models=None): + """Retrieves the fields used as predictors in all the ensemble + models + + """ + + fields = {} + models = [] + objective_id = None + no_objective_id = False + if isinstance(self.models_splits[0][0], Model): + for split in self.models_splits: + models.extend(split) + else: + models = self.model_ids + + for index, model_id in enumerate(models): + try: + if isinstance(model_id, Model): + local_model = model_id + elif self.cache_get is not None: + local_model = self.cache_get(model_id) + else: + local_model = Model(model_id, self.api) + if (max_models is not None and index > 0 and + index % max_models == 0): + gc.collect() + fields.update(local_model.fields) + if (objective_id is not None and + objective_id != local_model.objective_id): + # the models' objective field have different ids, no global id + no_objective_id = True + else: + objective_id = local_model.objective_id + except NoRootDecisionTree: + pass + if no_objective_id: + objective_id = None + gc.collect() + return fields, objective_id + + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + Avoiding to set it in a Mixin to maintain the current dump function. + """ + return get_data_transformations(self.resource_id, self.parent_id) + + def dump(self, output=None, cache_set=None): + """Uses msgpack to serialize the resource object + If cache_set is filled with a cache set method, the method is called + + """ + self_vars = vars(self).copy() + del self_vars["api"] + if "multi_model" in self_vars: + for model in self_vars["multi_model"].models: + model.dump(output=output, cache_set=cache_set) + del self_vars["multi_model"] + dump(self_vars, output=output, cache_set=cache_set) + + def dumps(self): + """Uses msgpack to serialize the resource object to a string + + """ + self_vars = vars(self).copy() + del self_vars["api"] + if "multi_model" in self_vars: + del self_vars["multi_model"] + dumps(self_vars) diff --git a/bigml/ensemblepredictor.py b/bigml/ensemblepredictor.py new file mode 100644 index 00000000..cab2fbdd --- /dev/null +++ b/bigml/ensemblepredictor.py @@ -0,0 +1,372 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A local Ensemble object focused on quick predictions. + +This module defines an EnsemblePredictor to make predictions locally using its +associated models. To use this ensemble, you need a local directory containing +the files that store the prediction functions generated for every model in +the ensemble. Please, check `bigmler export` to see how to do that. + + +# Ensemble object to predict +ensemble = EnsemblePredictor(ensemble_id) +ensemble.predict({"petal length": 3, "petal width": 1}) + +""" +import sys +import os +import logging +import json + +from bigml.api import get_ensemble_id, get_api_connection +from bigml.generators.model import print_distribution +from bigml.constants import STORAGE +from bigml.multivote import MultiVote +from bigml.multivote import PLURALITY_CODE +from bigml.basemodel import BaseModel, print_importance, retrieve_resource, \ + check_local_info +from bigml.model import Model +from bigml.flattree import FlatTree +from bigml.util import NUMERIC +from bigml.tree_utils import add_distribution + + +BOOSTING = 1 +LOGGER = logging.getLogger('BigML') + + +class EnsemblePredictor(): + """A local predictive Ensemble. + + Uses a number of BigML models to build an ensemble local version + that can be used to generate predictions locally. + The expected arguments are: + + ensemble: ensemble object or id + model_fns_dir: path to the local directory where the functions that + are to be used for each model's prediction are stored. + The files containing each model predictor function + can be obtained from the `bigmler export` command. + Check the bigmler docs in + http://bigmler.readthedocs.io/en/latest/#bigmler-export + api: connection object. If None, a new connection object is + instantiated. + """ + + def __init__(self, ensemble, model_fns_dir, api=None): + + self.resource_id = None + # to be deprecated + self.ensemble_id = None + self.objective_id = None + self.distributions = None + self.distribution = None + self.models_splits = [] + self.multi_model = None + self.boosting = None + self.boosting_offsets = None + self.regression = False + self.fields = None + self.class_names = None + self.importance = {} + self.predict_functions = [] + self.api = get_api_connection(api) + + ensemble = self.get_ensemble_resource(ensemble) + self.resource_id = get_ensemble_id(ensemble) + self.ensemble_id = self.resource_id + + if not check_local_info(ensemble): + # avoid checking fields because of old ensembles + ensemble = retrieve_resource(self.api, self.resource_id, + no_check_fields=True) + if ensemble['object'].get('type') == BOOSTING: + self.boosting = ensemble['object'].get('boosting') + models = ensemble['object']['models'] + self.distributions = ensemble['object'].get('distributions', []) + self.importance = ensemble['object'].get('importance', []) + self.model_ids = models + # new ensembles have the fields structure + if ensemble['object'].get('ensemble'): + self.fields = ensemble['object'].get( \ + 'ensemble', {}).get("fields") + self.objective_id = ensemble['object'].get("objective_field") + self.input_fields = ensemble['object'].get("input_fields") + + if model_fns_dir: + self.get_model_fns(model_fns_dir) + else: + raise ValueError("The EnsemblePredictor object expects as" + " argument the directory where the models" + " predict functions are stored. To generate " + " them, please check the 'bigmler export'" + " command.") + + if self.fields: + add_distribution(self) + + self.regression = \ + self.fields[self.objective_id].get('optype') == NUMERIC + if self.boosting: + self.boosting_offsets = ensemble['object'].get('initial_offset', + 0) \ + if self.regression else dict(ensemble['object'].get( \ + 'initial_offsets', [])) + + if not self.regression and self.boosting is None: + try: + objective_field = self.fields[self.objective_id] + categories = objective_field['summary']['categories'] + classes = [category[0] for category in categories] + except (AttributeError, KeyError): + classes = set() + for distribution in self.distributions: + for category in distribution['training']['categories']: + classes.add(category[0]) + + self.class_names = sorted(classes) + + def get_model_fns(self, model_fns_dir): + """Retrieves the predict functions for each model. The functions are + named after the field that is being predicted prepended by the + `predict_` string. + + """ + function_name = "predict" + model_id = self.model_ids[0] + module_path = ".".join(os.path.normpath(model_fns_dir).split(os.sep)) + if not os.path.isfile(os.path.join(model_fns_dir, "%s.py" % + model_id.replace("/", "_"))): + self.generate_models(model_fns_dir) + for model_id in self.model_ids: + module_name = "%s.%s" % (module_path, + model_id.replace("/", "_")) + try: + __import__(module_name) + prediction_module = sys.modules[module_name] + function = getattr(prediction_module, function_name) + self.predict_functions.append(function) + except ImportError: + raise ImportError("Failed to import the predict function" + " from %s." % module_name) + + def get_ensemble_resource(self, ensemble): + """Extracts the ensemble resource info. The ensemble argument can be + - a path to a local file + - an ensemble id + """ + # the string can be a path to a JSON file + if isinstance(ensemble, str): + try: + with open(ensemble) as ensemble_file: + path = os.path.dirname(ensemble) + ensemble = json.load(ensemble_file) + self.resource_id = get_ensemble_id(ensemble) + if self.resource_id is None: + raise ValueError("The JSON file does not seem" + " to contain a valid BigML ensemble" + " representation.") + self.api.storage = path + except IOError: + # if it is not a path, it can be an ensemble id + self.resource_id = get_ensemble_id(ensemble) + if self.resource_id is None: + if ensemble.find('ensemble/') > -1: + raise Exception( + self.api.error_message(ensemble, + resource_type='ensemble', + method='get')) + raise IOError("Failed to open the expected JSON file" + " at %s" % ensemble) + except ValueError: + raise ValueError("Failed to interpret %s." + " JSON file expected.") + return ensemble + + def list_models(self): + """Lists all the model/ids that compound the ensemble. + + """ + return self.model_ids + + def predict(self, input_data, method=PLURALITY_CODE, full=False): + """Makes a prediction based on the prediction made by every model. + + :param input_data: Test data to be used as input + :param method: numeric key code for the following combination + methods in classifications/regressions: + 0 - majority vote (plurality)/ average: PLURALITY_CODE + 1 - confidence weighted majority vote / error weighted: + CONFIDENCE_CODE + 2 - probability weighted majority vote / average: + PROBABILITY_CODE + """ + + + # When only one group of models is found you use the + # corresponding multimodel to predict + votes_split = [] + options = None + count = 1 + for fun in self.predict_functions: + prediction = fun(input_data) + prediction.update({"order": count, "count": 1}) + count += 1 + votes_split.append(prediction) + votes = MultiVote(votes_split, + boosting_offsets=self.boosting_offsets) + if self.boosting is not None and not self.regression: + categories = [ \ + d[0] for d in + self.fields[self.objective_id]["summary"]["categories"]] + options = {"categories": categories} + + result = votes.combine(method=method, options=options, full=full) + if isinstance(result, dict): + del result['count'] + + return result + + def field_importance_data(self): + """Computes field importance based on the field importance information + of the individual models in the ensemble. + + """ + field_importance = {} + field_names = {} + if self.importance: + field_importance = self.importance + field_names = {field_id: {'name': self.fields[field_id]["name"]} \ + for field_id in list(field_importance.keys())} + return [list(importance) for importance in \ + sorted(list(field_importance.items()), key=lambda x: x[1], + reverse=True)], field_names + + if (self.distributions is not None and + isinstance(self.distributions, list) and + all('importance' in item for item in self.distributions)): + # Extracts importance from ensemble information + importances = [model_info['importance'] for model_info in + self.distributions] + for model_info in importances: + for field_info in model_info: + field_id = field_info[0] + if field_id not in field_importance: + field_importance[field_id] = 0.0 + name = self.fields[field_id]['name'] + field_names[field_id] = {'name': name} + field_importance[field_id] += field_info[1] + else: + # Old ensembles, extracts importance from model information + for model_id in self.model_ids: + local_model = BaseModel(model_id, api=self.api) + for field_info in local_model.field_importance: + field_id = field_info[0] + if field_info[0] not in field_importance: + field_importance[field_id] = 0.0 + name = self.fields[field_id]['name'] + field_names[field_id] = {'name': name} + field_importance[field_id] += field_info[1] + + number_of_models = len(self.model_ids) + for field_id in field_importance: + field_importance[field_id] /= number_of_models + return [list(importance) for importance in \ + sorted(list(field_importance.items()), key=lambda x: x[1], + reverse=True)], field_names + + def print_importance(self, out=sys.stdout): + """Prints ensemble field importance + + """ + print_importance(self, out=out) + + def get_data_distribution(self, distribution_type="training"): + """Returns the required data distribution by adding the distributions + in the models + + """ + ensemble_distribution = [] + categories = [] + distribution = [] + # ensembles have now the field information + if self.distribution and self.boosting: + return sorted(self.distribution, key=lambda x: x[0]) + + for model_distribution in self.distributions: + summary = model_distribution[distribution_type] + if 'bins' in summary: + distribution = summary['bins'] + elif 'counts' in summary: + distribution = summary['counts'] + elif 'categories' in summary: + distribution = summary['categories'] + else: + distribution = [] + for point, instances in distribution: + if point in categories: + ensemble_distribution[ + categories.index(point)][1] += instances + else: + categories.append(point) + ensemble_distribution.append([point, instances]) + + return sorted(ensemble_distribution, key=lambda x: x[0]) + + def summarize(self, out=sys.stdout): + """Prints ensemble summary. Only field importance at present. + + """ + distribution = self.get_data_distribution("training") + + if distribution: + out.write("Data distribution:\n") + print_distribution(distribution, out=out) + out.write("\n\n") + + if not self.boosting: + predictions = self.get_data_distribution("predictions") + + if predictions: + out.write("Predicted distribution:\n") + print_distribution(predictions, out=out) + out.write("\n\n") + + out.write("Field importance:\n") + self.print_importance(out=out) + out.flush() + + def generate_models(self, directory=STORAGE): + """Generates the functions for the models in the ensemble + + """ + if not os.path.isfile(directory) and not os.path.exists(directory): + os.makedirs(directory) + with open(os.path.join(directory, "__init__.py"), mode='w'): + pass + for model_id in self.model_ids: + local_model = Model(model_id, api=self.api, + fields=self.fields) + local_flat_tree = FlatTree(local_model.tree, local_model.offsets, + local_model.fields, + local_model.objective_id, + local_model.boosting) + with open(os.path.join(directory, "%s.py" % + model_id.replace("/", "_")), "w") \ + as handler: + local_flat_tree.python(out=handler, + docstring="Model %s" % model_id) diff --git a/bigml/evaluation.py b/bigml/evaluation.py new file mode 100644 index 00000000..76726589 --- /dev/null +++ b/bigml/evaluation.py @@ -0,0 +1,123 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2023-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A local Evaluation object. + +This module defines a local class to handle the results of an evaluation + +""" +import json + + +from bigml.api import get_api_connection, ID_GETTERS +from bigml.basemodel import retrieve_resource, get_resource_dict + +CLASSIFICATION_METRICS = [ + "accuracy", "precision", "recall", "phi", "phi_coefficient", + "f_measure", "confusion_matrix", "per_class_statistics"] + +REGRESSION_METRICS = ["mean_absolute_error", "mean_squared_error", "r_squared"] + + +class ClassificationEval(): + """A class to store the classification metrics """ + def __init__(self, name, per_class_statistics): + + self.name = name + for statistics in per_class_statistics: + if statistics["class_name"] == name: + break + for metric in CLASSIFICATION_METRICS: + if metric in statistics.keys(): + setattr(self, metric, statistics.get(metric)) + + +class Evaluation(): + """A class to deal with the information in an evaluation result + + """ + def __init__(self, evaluation, api=None): + + self.resource_id = None + self.model_id = None + self.test_dataset_id = None + self.regression = None + self.full = None + self.random = None + self.error = None + self.error_message = None + self.api = get_api_connection(api) + + try: + self.resource_id, evaluation = get_resource_dict( \ + evaluation, "evaluation", self.api, no_check_fields=True) + except ValueError as resource: + try: + evaluation = json.loads(str(resource)) + self.resource_id = evaluation["resource"] + except ValueError: + raise ValueError("The evaluation resource was faulty: \n%s" % \ + resource) + + if 'object' in evaluation and isinstance(evaluation['object'], dict): + evaluation = evaluation['object'] + self.status = evaluation["status"] + self.error = self.status.get("error") + if self.error is not None: + self.error_message = self.status.get("message") + else: + self.model_id = evaluation["model"] + self.test_dataset_id = evaluation["dataset"] + + if 'result' in evaluation and \ + isinstance(evaluation['result'], dict): + self.full = evaluation.get("result", {}).get("model") + self.random = evaluation.get("result", {}).get("random") + self.regression = not self.full.get("confusion_matrix") + if self.regression: + self.add_metrics(self.full, REGRESSION_METRICS) + self.mean = evaluation.get("result", {}).get("mean") + else: + self.add_metrics(self.full, CLASSIFICATION_METRICS) + self.mode = evaluation.get("result", {}).get("mode") + self.classes = evaluation.get("result", {}).get( + "class_names") + else: + raise ValueError("Failed to find the correct evaluation" + " structure.") + if not self.regression: + self.positive_class = ClassificationEval(self.classes[-1], + self.per_class_statistics) + + def add_metrics(self, metrics_info, metrics_list, obj=None): + """Adding the metrics in the `metrics_info` dictionary as attributes + in the object passed as argument. If None is given, the metrics will + be added to the self object. + """ + if obj is None: + obj = self + + for metric in metrics_list: + setattr(obj, metric, metrics_info.get(metric, + metrics_info.get("average_%s" % metric))) + + def set_positive_class(self, positive_class): + """Changing the positive class """ + if positive_class is None or positive_class not in self.classes: + raise ValueError("The possible classes are: %s" % + ", ".join(self.classes)) + self.positive_class = ClassificationEval(positive_class, + self.per_class_statistics) diff --git a/tests/features/read_source-steps.py b/bigml/exceptions.py similarity index 51% rename from tests/features/read_source-steps.py rename to bigml/exceptions.py index 03fff7d4..71e965f6 100644 --- a/tests/features/read_source-steps.py +++ b/bigml/exceptions.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -#!/usr/bin/env python # -# Copyright 2012 BigML +# Copyright 2021-2025 BigML # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -15,20 +14,20 @@ # License for the specific language governing permissions and limitations # under the License. -from lettuce import step, world -from bigml.api import HTTP_OK - -@step(r'I get the source "(.*)"') -def i_get_the_source(step, resource): - resource = world.api.get_source(resource) - world.status = resource['code'] - assert world.status == HTTP_OK - world.source = resource['object'] - -@step(r'the source has DEV (True|False)') -def source_has_dev(step, boolean): - if boolean == 'False': - boolean = '' - boolean = bool(boolean) - dev = world.source['dev'] - assert dev == boolean +"""Declared exceptions. + +""" + +class ResourceException(Exception): + """Base class to any exception that arises from a bad structured resource + + """ + + +class NoRootDecisionTree(ResourceException): + """The decision tree structure has no "root" attribute """ + + + +class FaultyResourceError(Exception): + """Exception to be raised when retrieving a Faulty resource """ diff --git a/bigml/execution.py b/bigml/execution.py new file mode 100644 index 00000000..626cd06e --- /dev/null +++ b/bigml/execution.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""An local Execution object. + +This module defines a local class to handle the results of an execution + +""" +import json + + +from bigml.api import get_api_connection, ID_GETTERS +from bigml.basemodel import retrieve_resource + + + +def get_resource_dict(resource, resource_type, api=None): + """Extracting the resource JSON info as a dict from the first argument of + the local object constructors, that can be: + + - the path to a file that contains the JSON + - the ID of the resource + - the resource dict itself + + """ + + get_id = ID_GETTERS[resource_type] + resource_id = None + # the string can be a path to a JSON file + if isinstance(resource, str): + try: + with open(resource) as resource_file: + resource = json.load(resource_file) + resource_id = get_id(resource) + if resource_id is None: + raise ValueError("The JSON file does not seem" + " to contain a valid BigML %s" + " representation." % resource_type) + except IOError: + # if it is not a path, it can be a model id + resource_id = get_id(resource) + if resource_id is None: + if resource.find("%s/" % resource_type) > -1: + raise Exception( + api.error_message(resource, + resource_type=resource_type, + method="get")) + raise IOError("Failed to open the expected JSON file" + " at %s." % resource) + except ValueError: + raise ValueError("Failed to interpret %s." + " JSON file expected." % resource) + + if not (isinstance(resource, dict) and 'resource' in resource and + resource['resource'] is not None): + resource = retrieve_resource(api, resource_id, retries=0) + else: + resource_id = get_id(resource) + + return resource_id, resource + + +class Execution(): + """A class to deal with the information in an execution result + + """ + def __init__(self, execution, api=None): + + self.resource_id = None + self.outputs = None + self.output_types = None + self.output_resources = None + self.result = None + self.status = None + self.source_location = None + self.error = None + self.error_message = None + self.error_location = None + self.call_stack = None + self.api = get_api_connection(api) + + try: + self.resource_id, execution = get_resource_dict( \ + execution, "execution", self.api) + except ValueError as resource: + try: + execution = json.loads(str(resource)) + self.resource_id = execution["resource"] + except ValueError: + raise ValueError("The execution resource was faulty: \n%s" % \ + resource) + + if 'object' in execution and isinstance(execution['object'], dict): + execution = execution['object'] + self.status = execution["status"] + self.error = self.status.get("error") + if self.error is not None: + self.error_message = self.status.get("message") + self.error_location = self.status.get("source_location") + self.call_stack = self.status.get("call_stack") + else: + self.source_location = self.status.get("source_location") + if 'execution' in execution and \ + isinstance(execution['execution'], dict): + execution = execution.get('execution') + self.result = execution.get("result") + self.outputs = dict((output[0], output[1]) \ + for output in execution.get("outputs")) + self.output_types = dict((output[0], output[2]) \ + for output in execution.get("outputs")) + self.output_resources = dict((res["variable"], res["id"]) \ + for res in execution.get("output_resources")) + self.execution = execution diff --git a/bigml/featurizer.py b/bigml/featurizer.py new file mode 100644 index 00000000..0a6d9e33 --- /dev/null +++ b/bigml/featurizer.py @@ -0,0 +1,119 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A Featurizer to generate features for composed fields. + +This module defines a Featurizer class to hold the information associated +to the subfields derived from datetime fields. +It is used for local predictions. + +""" +from bigml_chronos import chronos +from bigml.constants import DATETIME + + +DATE_FNS = { + "day-of-month": lambda x: x.day, + "day-of-week": lambda x: x.weekday() + 1, + "millisecond": lambda x: x.microsecond / 1000} + +IMAGE_PROVENANCE = ["dimensions", "average_pixels", "level_histogram", + "histogram_of_gradients", "pretrained_cnn", "wavelet_subbands"] + +def expand_date(res_object, parent_id, date): + """ Retrieves all the values of the subfields generated from + a parent datetime field + + """ + expanded = {} + timeformats = res_object.fields[parent_id].get('time_formats', {}) + try: + parsed_date = chronos.parse(date, format_names=timeformats) + except ValueError: + return {} + for fid, ftype in list(res_object.subfields[parent_id].items()): + date_fn = DATE_FNS.get(ftype) + if date_fn is not None: + expanded.update({fid: date_fn(parsed_date)}) + else: + expanded.update({fid: getattr(parsed_date, ftype)}) + return expanded + + +class Featurizer: + """A class to generate the components derived from a composed field """ + + def __init__(self, fields, input_fields, selected_fields=None, + preferred_only=True): + self.fields = fields + self.input_fields = input_fields + self.subfields = {} + self.generators = {} + self.preferred_only = preferred_only + self.selected_fields = self.add_subfields( + selected_fields, preferred_only=preferred_only) + + def add_subfields(self, selected_fields=None, preferred_only=True): + """Adding the subfields information in the fields structure and the + generating functions for the subfields values. + """ + # filling preferred fields with preferred input fields + fields = selected_fields or self.fields + + if selected_fields is None: + selected_fields = {} + selected_fields.update({field_id: field for field_id, field \ + in fields.items() if field_id in self.input_fields \ + and (not preferred_only or self.fields[field_id].get( + "preferred", True))}) + + # computing the subfields generated from parsing datetimes + for fid, finfo in list(selected_fields.items()): + + # datetime subfields + if finfo.get('parent_optype', False) == DATETIME: + parent_id = finfo["parent_ids"][0] + subfield = {fid: finfo["datatype"]} + if parent_id in list(self.subfields.keys()): + self.subfields[parent_id].update(subfield) + else: + selected_fields[parent_id] = self.fields[parent_id] + self.subfields[parent_id] = subfield + self.generators.update({parent_id: expand_date}) + elif finfo.get('provenance', False) in IMAGE_PROVENANCE: + raise ValueError("This model uses image-derived fields. " + "Please, use the pip install bigml[images] " + "option to install the libraries required " + "for local predictions in this case.") + + return selected_fields + + def extend_input(self, input_data): + """Computing the values for the generated subfields and adding them + to the original input data. Parent fields will be removed if the + `preferred_only` option is set, as they are not used in models. + However, the `preferred_only` option set to False will keep them, + allowing to be used as generators in other transformations. + """ + extended = {} + for f_id, value in list(input_data.items()): + if f_id in self.subfields: + if not self.preferred_only: + extended[f_id] = value + extended.update(self.generators[f_id](self, f_id, value)) + else: + extended[f_id] = value + return extended diff --git a/bigml/fields.py b/bigml/fields.py new file mode 100644 index 00000000..41246b62 --- /dev/null +++ b/bigml/fields.py @@ -0,0 +1,842 @@ +# -*- coding: utf-8 -*- +#pylint: disable=unbalanced-tuple-unpacking +# +# Copyright 2012-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A class to deal with the fields of a resource. + +This module helps to map between ids, names, and column_numbers in the +fields of source, dataset, or model. Also to validate your input data +for predictions or to list all the fields from a resource. + +from bigml.api import BigML +from bigml.fields import Fields + +api = BigML() + +source = api.get_source("source/50a6bb94eabcb404d3000174") +fields = Fields(source['object']['fields']) + +dataset = api.get_dataset("dataset/50a6bb96eabcb404cd000342") +fields = Fields(dataset['object']['fields']) + +# Note that the fields in a model come one level deeper +model = api.get_model("model/50a6bbac035d0706db0008f8") +fields = Fields(model['object']['model']['fields']) + +prediction = api.get_prediction("prediction/50a69688035d0706dd00044d") +fields = Fields(prediction['object']['fields']) + + +""" +import sys +import json +import csv +import random +import numpy as np + +try: + from pandas import DataFrame + PANDAS_READY = True +except ImportError: + PANDAS_READY = False + + +from bigml.util import invert_dictionary, python_map_type, find_locale +from bigml.util import DEFAULT_LOCALE +from bigml.api_handlers.resourcehandler import get_resource_type, get_fields +from bigml.constants import ( + SOURCE_PATH, DATASET_PATH, SUPERVISED_PATHS, FUSION_PATH, + RESOURCES_WITH_FIELDS, DEFAULT_MISSING_TOKENS, REGIONS, CATEGORICAL) +from bigml.io import UnicodeReader, UnicodeWriter + +LIST_LIMIT = 10 +REGIONS_ATTR = "labels" +SUMMARY_HEADERS = ["field column", "field ID", "field name", "field label", + "field description", "field type", "preferred", + "missing count", "errors", "contents summary", + "errors summary"] + +UPDATABLE_HEADERS = {"field name": "name", + "field label": "label", + "field description": "description", + "field type": "optype", + "preferred": "preferred"} + +ITEM_SINGULAR = {"categories": "category"} + + +def get_fields_structure(resource, errors=False): + """Returns the field structure for a resource, its locale and + missing_tokens + + """ + try: + resource_type = get_resource_type(resource) + except ValueError: + raise ValueError("Unknown resource structure") + field_errors = None + resource = resource.get('object', resource) + # locale and missing tokens + if resource_type == SOURCE_PATH: + resource_locale = resource['source_parser']['locale'] + missing_tokens = resource[ + 'source_parser']['missing_tokens'] + else: + resource_locale = resource.get('locale', DEFAULT_LOCALE) + missing_tokens = resource.get('missing_tokens', + DEFAULT_MISSING_TOKENS) + + fields = get_fields(resource) + if resource_type in RESOURCES_WITH_FIELDS: + # Check whether there's an objective id + objective_column = None + if resource_type == DATASET_PATH: + objective_column = resource.get( \ + 'objective_field', {}).get('id') + if errors: + field_errors = resource.get("status", {}).get("field_errors") + elif resource_type in SUPERVISED_PATHS and \ + resource_type != FUSION_PATH: + objective_id = resource.get( \ + 'objective_fields', [None])[0] + objective_column = fields.get( \ + objective_id, {}).get('column_number') + result = fields, resource_locale, missing_tokens, objective_column + if errors: + result = result + (field_errors,) + return result + return (None, None, None, None, None) if errors else \ + (None, None, None, None) + + +def attribute_summary(attribute_value, item_type, limit=None): + """Summarizes the information in fields attributes where content is + written as an array of arrays like tag_cloud, items, etc. + """ + if attribute_value is None: + return None + if item_type != REGIONS_ATTR: + items = ["%s (%s)" % (item, instances) for + item, instances in attribute_value] + items_length = len(items) + if limit is None or limit > items_length: + limit = items_length + return "%s %s: %s" % (items_length, type_singular(item_type, + items_length == 1), + ", ".join(items[0: limit])) + items = ["%s (%s)" % (attr.get("label"), attr.get("count")) for + attr in attribute_value] + items_length = len(items) + if limit is None or limit > items_length: + limit = items_length + return "%s %s: %s" % (items_length, type_singular(item_type, + items_length == 1), + ", ".join(items[0: limit])) + + +def type_singular(item_type, singular=False): + """Singularizes item types if needed + + """ + if singular: + return ITEM_SINGULAR.get(item_type, item_type[:-1]) + return item_type + + +def numeric_example(numeric_summary): + """Generates a random numeric example in the gaussian defined by + mean and sigma in the numeric_summary + + """ + try: + mean = numeric_summary.get("mean") + sigma = numeric_summary.get("standard_deviation") + minimum = numeric_summary.get("minimum") + maximum = numeric_summary.get("maximum") + value = -1 + while value < minimum or value > maximum: + value = random.gauss(mean, sigma) + return value + except TypeError: + return None + + +def sorted_headers(fields): + """Listing the names of the fields as ordered in the original dataset. + The `fields` parameter is a Fields object. + """ + header_names = [] + header_ids = [] + for column in fields.fields_columns: + header_names.append(fields.fields[ + fields.fields_by_column_number[column]]["name"]) + header_ids.append(fields.fields_by_column_number[column]) + + return header_names, header_ids + + +def get_new_fields(output_fields): + """Extracts the sexpr and names of the output fields in a dataset + generated from a new_fields transformation. + """ + new_fields = [] + for output_field in output_fields: + sexp = output_field.get("generator") + names = output_field.get("names") + new_fields.append({"field": sexp, "names": names}) + return new_fields + + +def one_hot_code(value, field, decode=False): + """Translating into codes categorical values. The codes are the index + of the value in the list of categories read from the fields summary. + Decode set to True will cause the code to be translated to the value""" + + try: + categories = [cat[0] for cat in field["summary"]["categories"]] + except KeyError: + raise KeyError("Failed to find the categories list. Check the field" + " information.") + + if decode: + try: + result = categories[int(value)] + except KeyError: + raise KeyError("Code not found in the categories list. %s" % + categories) + else: + try: + result = categories.index(value) + except ValueError: + raise ValueError("The '%s' value is not found in the categories " + "list: %s" % (value, categories)) + return result + + +class Fields(): + """A class to deal with BigML auto-generated ids. + + """ + def __init__(self, resource_or_fields, missing_tokens=None, + data_locale=None, verbose=False, + objective_field=None, objective_field_present=False, + include=None, errors=None): + + # The constructor can be instantiated with resources or a fields + # structure. The structure is checked and fields structure is returned + # if a resource type is matched. + try: + self.resource_type = get_resource_type(resource_or_fields) + resource_info = get_fields_structure(resource_or_fields, True) + (self.fields, + resource_locale, + resource_missing_tokens, + objective_column, + resource_errors) = resource_info + if data_locale is None: + data_locale = resource_locale + if missing_tokens is None: + if resource_missing_tokens: + missing_tokens = resource_missing_tokens + if errors is None: + errors = resource_errors + except ValueError: + # If the resource structure is not in the expected set, fields + # structure is assumed + self.fields = resource_or_fields + if data_locale is None: + data_locale = DEFAULT_LOCALE + if missing_tokens is None: + missing_tokens = DEFAULT_MISSING_TOKENS + objective_column = None + if self.fields is None: + raise ValueError("No fields structure was found.") + self.fields_by_name = invert_dictionary(self.fields, 'name') + self.fields_by_column_number = invert_dictionary(self.fields, + 'column_number') + find_locale(data_locale, verbose) + self.missing_tokens = missing_tokens + self.fields_columns = sorted(self.fields_by_column_number.keys()) + # Ids of the fields to be included + self.filtered_fields = (list(self.fields.keys()) if include is None + else include) + # To be updated in update_objective_field + self.row_ids = None + self.headers = None + self.objective_field = None + self.objective_field_present = None + self.filtered_indexes = None + self.field_errors = errors + # if the objective field is not set by the user + # use the one extracted from the resource info + if objective_field is None and objective_column is not None: + objective_field = objective_column + objective_field_present = True + if self.fields: + # empty composite sources will not have an objective field + self.update_objective_field(objective_field, + objective_field_present) + + def update_objective_field(self, objective_field, objective_field_present, + headers=None): + """Updates objective_field and headers info + + Permits to update the objective_field, objective_field_present and + headers info from the constructor and also in a per row basis. + """ + # If no objective field, select the last column, else store its column + if objective_field is None: + self.objective_field = self.fields_columns[-1] + elif isinstance(objective_field, str): + try: + self.objective_field = self.field_column_number( \ + objective_field) + except KeyError: + # if the name of the objective field is not found, use the last + # field as objective + self.objective_field = self.fields_columns[-1] + else: + self.objective_field = objective_field + + # If present, remove the objective field from the included fields + objective_id = self.field_id(self.objective_field) + if objective_id in self.filtered_fields: + del self.filtered_fields[self.filtered_fields.index(objective_id)] + + self.objective_field_present = objective_field_present + if headers is None: + # The row is supposed to contain the fields sorted by column number + self.row_ids = [item[0] for item in + sorted(list(self.fields.items()), + key=lambda x: x[1]['column_number']) + if objective_field_present or + item[1]['column_number'] != self.objective_field] + self.headers = self.row_ids + else: + # The row is supposed to contain the fields as sorted in headers + self.row_ids = [self.field_id(header) for header in headers] + self.headers = headers + # Mapping each included field to its correspondent index in the row. + # The result is stored in filtered_indexes. + self.filtered_indexes = [] + for field in self.filtered_fields: + try: + index = self.row_ids.index(field) + self.filtered_indexes.append(index) + except ValueError: + continue + + def field_id(self, key): + """Returns a field id. + + """ + + if isinstance(key, str): + try: + f_id = self.fields_by_name[key] + except KeyError: + raise ValueError("Error: field name '%s' does not exist" % key) + return f_id + if isinstance(key, int): + try: + f_id = self.fields_by_column_number[key] + except KeyError: + raise ValueError("Error: field column number '%s' does not" + " exist" % key) + return f_id + return None + + def field_name(self, key): + """Returns a field name. + + """ + if isinstance(key, str): + try: + name = self.fields[key]['name'] + except KeyError: + raise ValueError("Error: field id '%s' does not exist" % key) + return name + if isinstance(key, int): + try: + name = self.fields[self.fields_by_column_number[key]]['name'] + except KeyError: + raise ValueError("Error: field column number '%s' does not" + " exist" % key) + return name + return None + + def field_column_number(self, key): + """Returns a field column number. + + """ + try: + return self.fields[key]['column_number'] + except KeyError: + return self.fields[self.fields_by_name[key]]['column_number'] + + def len(self): + """Returns the number of fields. + + """ + return len(self.fields) + + def pair(self, row, headers=None, + objective_field=None, objective_field_present=None): + """Pairs a list of values with their respective field ids. + + objective_field is the column_number of the objective field. + + `objective_field_present` must be True is the objective_field column + is present in the row. + + """ + # Try to get objective field form Fields or use the last column + if objective_field is None: + if self.objective_field is None: + objective_field = self.fields_columns[-1] + else: + objective_field = self.objective_field + # If objective fields is a name or an id, retrive column number + if isinstance(objective_field, str): + objective_field = self.field_column_number(objective_field) + + # Try to guess if objective field is in the data by using headers or + # comparing the row length to the number of fields + if objective_field_present is None: + if headers: + objective_field_present = (self.field_name(objective_field) in + headers) + else: + objective_field_present = len(row) == self.len() + + # If objective field, its presence or headers have changed, update + if (objective_field != self.objective_field or + objective_field_present != self.objective_field_present or + (headers is not None and headers != self.headers)): + self.update_objective_field(objective_field, + objective_field_present, headers) + + row = [self.normalize(info) for info in row] + return self.to_input_data(row) + + def list_fields(self, out=sys.stdout): + """Lists a description of the fields. + + """ + for field in [(val['name'], val['optype'], val['column_number']) + for _, val in sorted(list(self.fields.items()), + key=lambda k: + k[1]['column_number'])]: + out.write('[%-32s: %-16s: %-8s]\n' % (field[0], + field[1], field[2])) + out.flush() + + def preferred_fields(self): + """Returns fields where attribute preferred is set to True or where + it isn't set at all. + + """ + return {key: field for key, field in self.fields.items() + if ('preferred' not in field) or field['preferred']} + + def validate_input_data(self, input_data, out=sys.stdout): + """Validates whether types for input data match types in the + fields definition. + + """ + if isinstance(input_data, dict): + for name in input_data: + if name in self.fields_by_name: + out.write('[%-32s: %-16s: %-16s: ' % + (name, type(input_data[name]), + self.fields[self.fields_by_name[name]] + ['optype'])) + if (type(input_data[name]) in python_map_type(self.fields[ + self.fields_by_name[name]]['optype'])): + out.write('OK\n') + else: + out.write('WRONG\n') + else: + out.write("Field '%s' does not exist\n" % name) + else: + out.write("Input data must be a dictionary") + + def normalize(self, value): + """Transforms to unicode and cleans missing tokens + + """ + if not isinstance(value, str): + value = str(value, "utf-8") + return None if value in self.missing_tokens else value + + def to_input_data(self, row): + """Builds dict with field, value info only for the included headers + + """ + pair = [] + for index in self.filtered_indexes: + pair.append((self.headers[index], row[index])) + return dict(pair) + + def missing_counts(self): + """Returns the ids for the fields that contain missing values + + """ + summaries = [(field_id, field.get('summary', {})) + for field_id, field in list(self.fields.items())] + if len(summaries) == 0: + raise ValueError("The structure has not enough information " + "to extract the fields containing missing values." + "Only datasets and models have such information. " + "You could retry the get remote call " + " with 'limit=-1' as query string.") + + return {field_id: summary.get('missing_count', 0) + for field_id, summary in summaries + if summary.get('missing_count', 0) > 0} + + def stats(self, field_name): + """Returns the summary information for the field + + """ + field_id = self.field_id(field_name) + summary = self.fields[field_id].get('summary', {}) + return summary + + def objective_field_info(self): + """Returns the fields structure for the objective field""" + if self.objective_field is None: + return None + objective_id = self.field_id(self.objective_field) + return {objective_id: self.fields[objective_id]} + + def sorted_field_ids(self, objective=False): + """List of field IDs ordered by column number. If objective is + set to False, the objective field will be excluded. + """ + fields = {} + fields.update(self.fields_by_column_number) + if not objective and self.objective_field is not None: + del(fields[self.objective_field]) + field_ids = fields.values() + return field_ids + + def to_numpy(self, input_data_list, objective=False): + """Transforming input data to numpy syntax. Fields are sorted + in the dataset order and categorical fields are one-hot encoded. + If objective set to False, the objective field will not be included""" + if PANDAS_READY and isinstance(input_data_list, DataFrame): + inner_data_list = input_data_list.to_dict('records') + else: + inner_data_list = input_data_list + field_ids = self.sorted_field_ids(objective=objective) + np_input_list = np.empty(shape=(len(input_data_list), + len(field_ids))) + for index, input_data in enumerate(inner_data_list): + np_input = np.array([]) + for field_id in field_ids: + field_input = input_data.get(field_id, + input_data.get(self.field_name(field_id))) + field = self.fields[field_id] + if field["optype"] == CATEGORICAL: + field_input = one_hot_code(field_input, field) + np_input = np.append(np_input, field_input) + np_input_list[index] = np_input + return np_input_list + + def from_numpy(self, np_data_list, objective=False, by_name=True): + """Transforming input data from numpy syntax. Fields are sorted + in the dataset order and categorical fields are one-hot encoded.""" + input_data_list = [] + field_ids = self.sorted_field_ids(objective=objective) + for np_data in np_data_list: + if len(np_data) != len(field_ids): + raise ValueError("Wrong number of features in data: %s" + " found, %s expected" % (len(np_data), len(field_ids))) + input_data = {} + for index, field_id in enumerate(field_ids): + field_input = None if np.isnan(np_data[index]) else \ + np_data[index] + field = self.fields[field_id] + if field["optype"] == CATEGORICAL: + field_input = one_hot_code(field_input, field, decode=True) + if by_name: + field_id = self.fields[field_id]["name"] + input_data.update({field_id: field_input}) + input_data_list.append(input_data) + return input_data_list + + def one_hot_codes(self, field_name): + """Returns the codes used for every category in a categorical field""" + field = self.fields[self.field_id(field_name)] + if field["optype"] != CATEGORICAL: + raise ValueError("Only categorical fields are encoded") + categories = [cat[0] for cat in field["summary"]["categories"]] + return dict(zip(categories, range(0, len(categories)))) + + def summary_csv(self, filename=None): + """Summary of the contents of the fields + + """ + + summary = [] + writer = None + if filename is not None: + writer = UnicodeWriter(filename, + quoting=csv.QUOTE_NONNUMERIC).open_writer() + writer.writerow(SUMMARY_HEADERS) + else: + summary.append(SUMMARY_HEADERS) + + for field_column in self.fields_columns: + field_id = self.field_id(field_column) + field = self.fields.get(field_id) + field_summary = [] + field_summary.append(field.get('column_number')) + field_summary.append(field_id) + field_summary.append(field.get('name')) + field_summary.append(field.get('label')) + field_summary.append(field.get('description')) + field_summary.append(field.get('optype')) + field_summary_value = field.get('summary', {}) + + if not field_summary_value: + field_summary.append("") # no preferred info + field_summary.append("") # no missing info + field_summary.append("") # no error info + field_summary.append("") # no content summary + field_summary.append("") # no error summary + else: + field_summary.append(json.dumps(field.get('preferred'))) + field_summary.append(field_summary_value.get("missing_count")) + if self.field_errors and field_id in list(self.field_errors.keys()): + errors = self.field_errors.get(field_id) + field_summary.append(errors.get("total")) + else: + field_summary.append("0") + if field['optype'] == 'numeric': + field_summary.append("[%s, %s], mean: %s" % \ + (field_summary_value.get("minimum"), + field_summary_value.get("maximum"), + field_summary_value.get("mean"))) + elif field['optype'] == 'categorical': + categories = field_summary_value.get("categories") + field_summary.append( \ + attribute_summary(categories, "categorìes", + limit=LIST_LIMIT)) + elif field['optype'] == REGIONS: + labels_info = field_summary_value.get("labels") + field_summary.append( \ + attribute_summary(labels_info, "labels", + limit=LIST_LIMIT)) + elif field['optype'] == "text": + terms = field_summary_value.get("tag_cloud") + field_summary.append( \ + attribute_summary(terms, "terms", + limit=LIST_LIMIT)) + elif field['optype'] == "items": + items = field_summary_value.get("items") + field_summary.append( \ + attribute_summary(items, "items", limit=LIST_LIMIT)) + else: + field_summary.append("") + if self.field_errors and field_id in list(self.field_errors.keys()): + field_summary.append( \ + attribute_summary(errors.get("sample"), "errors", + limit=None)) + else: + field_summary.append("") + if writer: + writer.writerow(field_summary) + else: + summary.append(field_summary) + if writer is None: + return summary + writer.close_writer() + return filename + + def new_fields_structure(self, csv_attributes_file=None, + attributes=None, out_file=None): + """Builds the field structure needed to update a fields dictionary + in a BigML resource. + + :param csv_attributes_file: (string) Path to a CSV file like the one + generated by summary_csv. + :param attributes: (list) list of rows containing the + attributes information ordered + as in the summary_csv output. + :param out_file: (string) Path to a JSON file that will be used + to store the new fields structure. If None, + the output is returned as a dict. + """ + if csv_attributes_file is not None: + reader = UnicodeReader(csv_attributes_file).open_reader() + attributes = list(reader) + new_fields_structure = {} + if "field ID" in attributes[0] or "field column" in attributes[0]: + # headers are used + for index in range(1, len(attributes)): + new_attributes = dict(list(zip(attributes[0], + attributes[index]))) + if new_attributes.get("field ID"): + field_id = new_attributes.get("field ID") + if not field_id in list(self.fields.keys()): + raise ValueError("Field ID %s not found" + " in this resource" % field_id) + del new_attributes["field ID"] + else: + try: + field_column = int(new_attributes.get("field column")) + except TypeError: + raise ValueError( + "Field column %s not found" + " in this resource" % new_attributes.get( + "field_column")) + if not field_column in self.fields_columns: + raise ValueError("Field column %s not found" + " in this resource" % field_column) + field_id = self.field_id(field_column) + del new_attributes["field column"] + new_attributes_headers = list(new_attributes.keys()) + for attribute in new_attributes_headers: + if not attribute in list(UPDATABLE_HEADERS.keys()): + del new_attributes[attribute] + else: + new_attributes[UPDATABLE_HEADERS[attribute]] = \ + new_attributes[attribute] + if attribute != UPDATABLE_HEADERS[attribute]: + del new_attributes[attribute] + if "preferred" in new_attributes: + new_attributes['preferred'] = json.loads( \ + new_attributes['preferred']) + new_fields_structure[field_id] = new_attributes + else: + # assume the order given in the summary_csv method + first_attribute = attributes[0][0] + first_column_is_id = False + try: + field_id = self.field_id(int(first_attribute)) + except ValueError: + field_id = first_attribute + first_column_is_id = True + if not field_id in self.fields: + raise ValueError("The first column should contain either the" + " column or ID of the fields. Failed to find" + " %s as either of them." % field_id) + headers = SUMMARY_HEADERS[2: 7] + headers = [UPDATABLE_HEADERS[header] for header in headers] + try: + for field_attributes in attributes: + if field_attributes[6] is not None: + field_attributes[6] = json.loads(field_attributes[6]) + field_id = field_attributes[0] if first_column_is_id else \ + self.field_id(int(field_attributes[0])) + new_fields_structure[field_id] = \ + dict(list(zip(headers, field_attributes[1: 6]))) + except ValueError: + raise ValueError("The first column should contain either the" + " column or ID of the fields. Failed to find" + " %s as either of them." % field_id) + new_fields_structure = {"fields": new_fields_structure} + if out_file is None: + return new_fields_structure + try: + with open(out_file, "w") as out: + json.dump(new_fields_structure, out) + except IOError: + raise IOError("Failed writing the fields structure file in" + " %s- Please, check your arguments." % + out_file) + return out_file + + def training_data_example(self, missings=False): + """Generates an example of training data based on the contents of the + summaries of every field + + If missings is set to true, missing values are allowed + + """ + training_data = {} + for _, field in list(self.fields.items()): + if field.get("summary") is not None: + value = None + optype = field.get("optype") + if optype == "numeric": + if missings and random.randint(0, 5) > 3: + value = None + else: + value = numeric_example(field["summary"]) + if optype == "categorical": + if missings and random.randint(0, 5) > 3: + value = None + else: + categories = [cat[0] for cat in field["summary"]["categories"]] + weights = [cat[1] for cat in field["summary"]["categories"]] + value = random.choices(categories, weights)[0] + if optype == "text": + if missings and random.randint(0, 5) > 3: + value = None + else: + text_number = len(field["summary"]["tag_cloud"]) + index = random.randint(0, text_number - 1) + value = field["summary"]["tag_cloud"][index][0] + if optype == "items": + if missings and random.randint(0, 5) > 3: + value = None + else: + items_number = len(field["summary"]["items"]) + index = random.randint(0, items_number - 1) + value = field["summary"]["items"][index][0] + if optype == REGIONS: + if missings and random.randint(0, 5) > 3: + value = None + else: + labels_number = len(field["summary"]["labels"]) + index = random.randint(0, labels_number - 1) + field_summary = field["summary"]["labels"][index] + label = field_summary["label"] + xmin = numeric_example(field_summary["xmin"]) + xmax = numeric_example(field_summary["xmax"]) + ymin = numeric_example(field_summary["ymin"]) + ymax = numeric_example(field_summary["ymax"]) + #pylint: disable=locally-disabled,too-many-boolean-expressions + if None in [xmin, xmax, ymin, ymax] or xmax < xmin or \ + ymax < ymin or xmin < 0 or xmax < 0 or \ + ymin < 0 or ymax < 0: + value = [] + else: + value = [[label, xmin, xmax, ymin, ymax]] + + if value is not None: + training_data.update({field["name"]: value}) + return training_data + + def filter_fields_update(self, update_body): + """Filters the updatable attributes according to the type of resource + + """ + fields_info = update_body.get("fields") + if self.resource_type and fields_info is not None: + if self.resource_type == "dataset": + for _, field in list(fields_info.items()): + if field.get("optype") is not None: + del field["optype"] + elif self.resource_type == "source": + for _, field in list(fields_info.items()): + if field.get("preferred") is not None: + del field["preferred"] + update_body["fields"] = fields_info + return update_body diff --git a/bigml/flatline.py b/bigml/flatline.py new file mode 100644 index 00000000..ee18536a --- /dev/null +++ b/bigml/flatline.py @@ -0,0 +1,171 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +Flatline: Class that encapsulates the Flatline expressions interpreter +""" + +from javascript import require + + +class Flatline: + """A bridge to an underlying nodejs Flatline interpreter. + + This class uses JSPyBridge to launch a Nodejs interpreter that loads + Flatline's javascript implementation and allows interaction via + Python constructs. + + Example: + + Flatline.check_lisp('(+ 1 2)') + Flatline.check_json(["f", 0], dataset=dataset) + + """ + + __FLATLINEJS = require('./flatline/flatline-node.js') + interpreter = __FLATLINEJS.bigml.dixie.flatline + + #pylint: disable=locally-disabled,invalid-name + @staticmethod + def infer_fields(row, prefix=None, offset=None): + """Utility function generating a mock list of fields. + + Usually, checks and applications of Flatline expressions run + in the context of a given dataset's field descriptors, but + during testing it's useful sometimes to provide a mock set of + them, based on the types of the values of the test input rows. + + Example: + + In[1]: Interpreter.infer_fields([0, 'a label']) + Out[2]: [{'column_number': 0, + 'datatype': 'int64', + 'id': '000000', + 'optype': 'numeric'}, + {'column_number': 1, + 'datatype': 'string', + 'id': '000001', + 'optype': 'categorical'}] + + """ + result = [] + id_ = 0 + for v in row: + t = type(v) + optype = 'categorical' + datatype = 'string' + if (t is int or t is float): + optype = 'numeric' + if t is float: + datatype = 'float64' + else: + datatype = 'int64' + id_str = '%06x' % id_ + if prefix: + length = len(prefix) + id_str = prefix + id_str[length:] + column = id_ + if offset: + column = offset + id_ + result.append({'id': id_str, + 'optype':optype, + 'datatype': datatype, + 'column_number': column}) + id_ = id_ + 1 + return result + + @staticmethod + def _dataset(dataset, rows): + """The dataset argument should be a Dataset that contains the + in_fields information + """ + try: + return {"fields": dataset.in_fields} + except AttributeError: + if len(rows) > 0: + return {'fields': Flatline.infer_fields(rows[0])} + return None + + @staticmethod + def defined_functions(): + """A list of the names of all defined Flaline functions""" + return Flatline.interpreter.defined_primitives + + @staticmethod + def check_lisp(sexp, fields=None): + """Checks whether the given lisp s-expression is valid. + + Any operations referring to a dataset's fields will use the + information found in fields structure. + + """ + r = Flatline.interpreter.evaluate_sexp(sexp, fields, True).valueOf() + return r + + @staticmethod + def check_json(json_sexp, fields=None): + """Checks whether the given JSON s-expression is valid. + + Works like `check_lisp` (which see), but taking a JSON + expression represented as a native Python list instead of a + Lisp sexp string. + + """ + r = Flatline.interpreter.evaluate_js(json_sexp, fields).valueOf() + return r + + @staticmethod + def lisp_to_json(sexp): + """ Auxliary function transforming Lisp to Python representation.""" + return Flatline.interpreter.sexp_to_js(sexp) + + @staticmethod + def json_to_lisp(json_sexp): + """ Auxliary function transforming Python to lisp representation.""" + return Flatline.interpreter.js_to_sexp(json_sexp) + + @staticmethod + def apply_lisp(sexp, rows, dataset=None): + """Applies the given Lisp sexp to a set of input rows. + + Input rows are represented as a list of lists of native Python + values. The dataset info should be provided as a Dataset object. + If no dataset is provided, the field characteristics + of the input rows are guessed using `infer_fields`. + + """ + return Flatline.interpreter.eval_and_apply_sexp( + sexp, + Flatline._dataset(dataset, rows), + rows) + + @staticmethod + def apply_json(json_sexp, rows, dataset=None): + """Applies the given JSON sexp to a set of input rows. + + As usual, JSON sexps are represented as Python lists, + e.g. ["+", 1, 2]. + + Input rows are represented as a list of lists of native Python + values. The dataset info should be provided as a Dataset object. + If no dataset is provided, the field characteristics + of the input rows are guessed using `infer_fields`. + + """ + return Flatline.interpreter.eval_and_apply_js( + json_sexp, + Flatline._dataset(dataset, rows), + rows) diff --git a/bigml/flatline/flatline-node.js b/bigml/flatline/flatline-node.js new file mode 100644 index 00000000..9e6ab6b5 --- /dev/null +++ b/bigml/flatline/flatline-node.js @@ -0,0 +1,4898 @@ +if(typeof Math.imul == "undefined" || (Math.imul(0xffffffff,5) == 0)) { + Math.imul = function (a, b) { + var ah = (a >>> 16) & 0xffff; + var al = a & 0xffff; + var bh = (b >>> 16) & 0xffff; + var bl = b & 0xffff; + // the shift by 0 fixes the sign on the high part + // the final |0 converts the unsigned value into a signed value + return ((al * bl) + (((ah * bl + al * bh) << 16) >>> 0)|0); + } +} + + + ;var COMPILED=!0,goog=goog||{};goog.global=this||self;goog.isDef=function(a){return void 0!==a};goog.isString=function(a){return"string"==typeof a};goog.isBoolean=function(a){return"boolean"==typeof a};goog.isNumber=function(a){return"number"==typeof a}; +goog.exportPath_=function(a,b,c){a=a.split(".");c=c||goog.global;a[0]in c||"undefined"==typeof c.execScript||c.execScript("var "+a[0]);for(var d;a.length&&(d=a.shift());)a.length||void 0===b?c=c[d]&&c[d]!==Object.prototype[d]?c[d]:c[d]={}:c[d]=b}; +goog.define=function(a,b){if(!COMPILED){var c=goog.global.CLOSURE_UNCOMPILED_DEFINES,d=goog.global.CLOSURE_DEFINES;c&&void 0===c.nodeType&&Object.prototype.hasOwnProperty.call(c,a)?b=c[a]:d&&void 0===d.nodeType&&Object.prototype.hasOwnProperty.call(d,a)&&(b=d[a])}return b};goog.FEATURESET_YEAR=2012;goog.DEBUG=!0;goog.LOCALE="en";goog.TRUSTED_SITE=!0;goog.STRICT_MODE_COMPATIBLE=!1;goog.DISALLOW_TEST_ONLY_CODE=COMPILED&&!goog.DEBUG;goog.ENABLE_CHROME_APP_SAFE_SCRIPT_LOADING=!1; +goog.provide=function(a){if(goog.isInModuleLoader_())throw Error("goog.provide cannot be used within a module.");if(!COMPILED&&goog.isProvided_(a))throw Error('Namespace "'+a+'" already declared.');goog.constructNamespace_(a)};goog.constructNamespace_=function(a,b){if(!COMPILED){delete goog.implicitNamespaces_[a];for(var c=a;(c=c.substring(0,c.lastIndexOf(".")))&&!goog.getObjectByName(c);)goog.implicitNamespaces_[c]=!0}goog.exportPath_(a,b)}; +goog.getScriptNonce=function(a){if(a&&a!=goog.global)return goog.getScriptNonce_(a.document);null===goog.cspNonce_&&(goog.cspNonce_=goog.getScriptNonce_(goog.global.document));return goog.cspNonce_};goog.NONCE_PATTERN_=/^[\w+/_-]+[=]{0,2}$/;goog.cspNonce_=null;goog.getScriptNonce_=function(a){return(a=a.querySelector&&a.querySelector("script[nonce]"))&&(a=a.nonce||a.getAttribute("nonce"))&&goog.NONCE_PATTERN_.test(a)?a:""};goog.VALID_MODULE_RE_=/^[a-zA-Z_$][a-zA-Z0-9._$]*$/; +goog.module=function(a){if("string"!==typeof a||!a||-1==a.search(goog.VALID_MODULE_RE_))throw Error("Invalid module identifier");if(!goog.isInGoogModuleLoader_())throw Error("Module "+a+" has been loaded incorrectly. Note, modules cannot be loaded as normal scripts. They require some kind of pre-processing step. You're likely trying to load a module via a script tag or as a part of a concatenated bundle without rewriting the module. For more info see: https://github.com/google/closure-library/wiki/goog.module:-an-ES6-module-like-alternative-to-goog.provide."); +if(goog.moduleLoaderState_.moduleName)throw Error("goog.module may only be called once per module.");goog.moduleLoaderState_.moduleName=a;if(!COMPILED){if(goog.isProvided_(a))throw Error('Namespace "'+a+'" already declared.');delete goog.implicitNamespaces_[a]}};goog.module.get=function(a){return goog.module.getInternal_(a)}; +goog.module.getInternal_=function(a){if(!COMPILED){if(a in goog.loadedModules_)return goog.loadedModules_[a].exports;if(!goog.implicitNamespaces_[a])return a=goog.getObjectByName(a),null!=a?a:null}return null};goog.ModuleType={ES6:"es6",GOOG:"goog"};goog.moduleLoaderState_=null;goog.isInModuleLoader_=function(){return goog.isInGoogModuleLoader_()||goog.isInEs6ModuleLoader_()};goog.isInGoogModuleLoader_=function(){return!!goog.moduleLoaderState_&&goog.moduleLoaderState_.type==goog.ModuleType.GOOG}; +goog.isInEs6ModuleLoader_=function(){if(goog.moduleLoaderState_&&goog.moduleLoaderState_.type==goog.ModuleType.ES6)return!0;var a=goog.global.$jscomp;return a?"function"!=typeof a.getCurrentModulePath?!1:!!a.getCurrentModulePath():!1}; +goog.module.declareLegacyNamespace=function(){if(!COMPILED&&!goog.isInGoogModuleLoader_())throw Error("goog.module.declareLegacyNamespace must be called from within a goog.module");if(!COMPILED&&!goog.moduleLoaderState_.moduleName)throw Error("goog.module must be called prior to goog.module.declareLegacyNamespace.");goog.moduleLoaderState_.declareLegacyNamespace=!0}; +goog.declareModuleId=function(a){if(!COMPILED){if(!goog.isInEs6ModuleLoader_())throw Error("goog.declareModuleId may only be called from within an ES6 module");if(goog.moduleLoaderState_&&goog.moduleLoaderState_.moduleName)throw Error("goog.declareModuleId may only be called once per module.");if(a in goog.loadedModules_)throw Error('Module with namespace "'+a+'" already exists.');}if(goog.moduleLoaderState_)goog.moduleLoaderState_.moduleName=a;else{var b=goog.global.$jscomp;if(!b||"function"!=typeof b.getCurrentModulePath)throw Error('Module with namespace "'+ +a+'" has been loaded incorrectly.');b=b.require(b.getCurrentModulePath());goog.loadedModules_[a]={exports:b,type:goog.ModuleType.ES6,moduleId:a}}};goog.setTestOnly=function(a){if(goog.DISALLOW_TEST_ONLY_CODE)throw a=a||"",Error("Importing test-only code into non-debug environment"+(a?": "+a:"."));};goog.forwardDeclare=function(a){};COMPILED||(goog.isProvided_=function(a){return a in goog.loadedModules_||!goog.implicitNamespaces_[a]&&null!=goog.getObjectByName(a)},goog.implicitNamespaces_={"goog.module":!0}); +goog.getObjectByName=function(a,b){a=a.split(".");b=b||goog.global;for(var c=0;c>>0);goog.uidCounter_=0;goog.getHashCode=goog.getUid; +goog.removeHashCode=goog.removeUid;goog.cloneObject=function(a){var b=goog.typeOf(a);if("object"==b||"array"==b){if("function"===typeof a.clone)return a.clone();b="array"==b?[]:{};for(var c in a)b[c]=goog.cloneObject(a[c]);return b}return a};goog.bindNative_=function(a,b,c){return a.call.apply(a.bind,arguments)}; +goog.bindJs_=function(a,b,c){if(!a)throw Error();if(2c?Math.max(0,a.length+c):c;if("string"===typeof a)return"string"!==typeof b||1!=b.length?-1:a.indexOf(b,c);for(;cc&&(c=Math.max(0,a.length+c));if("string"===typeof a)return"string"!==typeof b||1!=b.length?-1:a.lastIndexOf(b,c);for(;0<=c;c--)if(c in a&&a[c]===b)return c;return-1}; +goog.array.forEach=goog.NATIVE_ARRAY_PROTOTYPES&&(goog.array.ASSUME_NATIVE_FUNCTIONS||Array.prototype.forEach)?function(a,b,c){goog.asserts.assert(null!=a.length);Array.prototype.forEach.call(a,b,c)}:function(a,b,c){for(var d=a.length,e="string"===typeof a?a.split(""):a,f=0;fb?null:"string"===typeof a?a.charAt(b):a[b]};goog.array.findIndex=function(a,b,c){for(var d=a.length,e="string"===typeof a?a.split(""):a,f=0;fb?null:"string"===typeof a?a.charAt(b):a[b]}; +goog.array.findIndexRight=function(a,b,c){var d=a.length,e="string"===typeof a?a.split(""):a;for(--d;0<=d;d--)if(d in e&&b.call(c,e[d],d,a))return d;return-1};goog.array.contains=function(a,b){return 0<=goog.array.indexOf(a,b)};goog.array.isEmpty=function(a){return 0==a.length};goog.array.clear=function(a){if(!goog.isArray(a))for(var b=a.length-1;0<=b;b--)delete a[b];a.length=0};goog.array.insert=function(a,b){goog.array.contains(a,b)||a.push(b)}; +goog.array.insertAt=function(a,b,c){goog.array.splice(a,c,0,b)};goog.array.insertArrayAt=function(a,b,c){goog.partial(goog.array.splice,a,c,0).apply(null,b)};goog.array.insertBefore=function(a,b,c){var d;2==arguments.length||0>(d=goog.array.indexOf(a,c))?a.push(b):goog.array.insertAt(a,b,d)};goog.array.remove=function(a,b){b=goog.array.indexOf(a,b);var c;(c=0<=b)&&goog.array.removeAt(a,b);return c}; +goog.array.removeLast=function(a,b){b=goog.array.lastIndexOf(a,b);return 0<=b?(goog.array.removeAt(a,b),!0):!1};goog.array.removeAt=function(a,b){goog.asserts.assert(null!=a.length);return 1==Array.prototype.splice.call(a,b,1).length};goog.array.removeIf=function(a,b,c){b=goog.array.findIndex(a,b,c);return 0<=b?(goog.array.removeAt(a,b),!0):!1};goog.array.removeAllIf=function(a,b,c){var d=0;goog.array.forEachRight(a,function(e,f){b.call(c,e,f,a)&&goog.array.removeAt(a,f)&&d++});return d}; +goog.array.concat=function(a){return Array.prototype.concat.apply([],arguments)};goog.array.join=function(a){return Array.prototype.concat.apply([],arguments)};goog.array.toArray=function(a){var b=a.length;if(0=arguments.length?Array.prototype.slice.call(a,b):Array.prototype.slice.call(a,b,c)}; +goog.array.removeDuplicates=function(a,b,c){b=b||a;var d=function(a){return goog.isObject(a)?"o"+goog.getUid(a):(typeof a).charAt(0)+a};c=c||d;d={};for(var e=0,f=0;f>>1);var l=c?b.call(e,a[k],k,a):b(d,a[k]);0b?1:ac?(goog.array.insertAt(a,b,-(c+1)),!0):!1};goog.array.binaryRemove=function(a,b,c){b=goog.array.binarySearch(a,b,c);return 0<=b?goog.array.removeAt(a,b):!1}; +goog.array.bucket=function(a,b,c){for(var d={},e=0;ec*(f-e))return[];if(0f;a+=c)d.push(a);return d};goog.array.repeat=function(a,b){for(var c=[],d=0;db&&Array.prototype.push.apply(a,a.splice(0,-b)));return a}; +goog.array.moveItem=function(a,b,c){goog.asserts.assert(0<=b&&ba?goog.i18n.bidi.Dir.RTL:b?null:goog.i18n.bidi.Dir.NEUTRAL:null==a?null:a?goog.i18n.bidi.Dir.RTL:goog.i18n.bidi.Dir.LTR};goog.i18n.bidi.ltrChars_="A-Za-zÀ-ÖØ-öø-ʸ̀-֐ऀ-῿‎Ⰰ-\ud801\ud804-\ud839\ud83c-\udbff豈-﬜︀-﹯﻽-￿";goog.i18n.bidi.rtlChars_="֑-ۯۺ-ࣿ‏\ud802-\ud803\ud83a-\ud83bיִ-﷿ﹰ-ﻼ";goog.i18n.bidi.htmlSkipReg_=/<[^>]*>|&[^;]+;/g; +goog.i18n.bidi.stripHtmlIfNeeded_=function(a,b){return b?a.replace(goog.i18n.bidi.htmlSkipReg_,""):a};goog.i18n.bidi.rtlCharReg_=new RegExp("["+goog.i18n.bidi.rtlChars_+"]");goog.i18n.bidi.ltrCharReg_=new RegExp("["+goog.i18n.bidi.ltrChars_+"]");goog.i18n.bidi.hasAnyRtl=function(a,b){return goog.i18n.bidi.rtlCharReg_.test(goog.i18n.bidi.stripHtmlIfNeeded_(a,b))};goog.i18n.bidi.hasRtlChar=goog.i18n.bidi.hasAnyRtl; +goog.i18n.bidi.hasAnyLtr=function(a,b){return goog.i18n.bidi.ltrCharReg_.test(goog.i18n.bidi.stripHtmlIfNeeded_(a,b))};goog.i18n.bidi.ltrRe_=new RegExp("^["+goog.i18n.bidi.ltrChars_+"]");goog.i18n.bidi.rtlRe_=new RegExp("^["+goog.i18n.bidi.rtlChars_+"]");goog.i18n.bidi.isRtlChar=function(a){return goog.i18n.bidi.rtlRe_.test(a)};goog.i18n.bidi.isLtrChar=function(a){return goog.i18n.bidi.ltrRe_.test(a)};goog.i18n.bidi.isNeutralChar=function(a){return!goog.i18n.bidi.isLtrChar(a)&&!goog.i18n.bidi.isRtlChar(a)}; +goog.i18n.bidi.ltrDirCheckRe_=new RegExp("^[^"+goog.i18n.bidi.rtlChars_+"]*["+goog.i18n.bidi.ltrChars_+"]");goog.i18n.bidi.rtlDirCheckRe_=new RegExp("^[^"+goog.i18n.bidi.ltrChars_+"]*["+goog.i18n.bidi.rtlChars_+"]");goog.i18n.bidi.startsWithRtl=function(a,b){return goog.i18n.bidi.rtlDirCheckRe_.test(goog.i18n.bidi.stripHtmlIfNeeded_(a,b))};goog.i18n.bidi.isRtlText=goog.i18n.bidi.startsWithRtl; +goog.i18n.bidi.startsWithLtr=function(a,b){return goog.i18n.bidi.ltrDirCheckRe_.test(goog.i18n.bidi.stripHtmlIfNeeded_(a,b))};goog.i18n.bidi.isLtrText=goog.i18n.bidi.startsWithLtr;goog.i18n.bidi.isRequiredLtrRe_=/^http:\/\/.*/;goog.i18n.bidi.isNeutralText=function(a,b){a=goog.i18n.bidi.stripHtmlIfNeeded_(a,b);return goog.i18n.bidi.isRequiredLtrRe_.test(a)||!goog.i18n.bidi.hasAnyLtr(a)&&!goog.i18n.bidi.hasAnyRtl(a)}; +goog.i18n.bidi.ltrExitDirCheckRe_=new RegExp("["+goog.i18n.bidi.ltrChars_+"][^"+goog.i18n.bidi.rtlChars_+"]*$");goog.i18n.bidi.rtlExitDirCheckRe_=new RegExp("["+goog.i18n.bidi.rtlChars_+"][^"+goog.i18n.bidi.ltrChars_+"]*$");goog.i18n.bidi.endsWithLtr=function(a,b){return goog.i18n.bidi.ltrExitDirCheckRe_.test(goog.i18n.bidi.stripHtmlIfNeeded_(a,b))};goog.i18n.bidi.isLtrExitText=goog.i18n.bidi.endsWithLtr; +goog.i18n.bidi.endsWithRtl=function(a,b){return goog.i18n.bidi.rtlExitDirCheckRe_.test(goog.i18n.bidi.stripHtmlIfNeeded_(a,b))};goog.i18n.bidi.isRtlExitText=goog.i18n.bidi.endsWithRtl;goog.i18n.bidi.rtlLocalesRe_=/^(ar|ckb|dv|he|iw|fa|nqo|ps|sd|ug|ur|yi|.*[-_](Adlm|Arab|Hebr|Nkoo|Rohg|Thaa))(?!.*[-_](Latn|Cyrl)($|-|_))($|-|_)/i;goog.i18n.bidi.isRtlLanguage=function(a){return goog.i18n.bidi.rtlLocalesRe_.test(a)};goog.i18n.bidi.bracketGuardTextRe_=/(\(.*?\)+)|(\[.*?\]+)|(\{.*?\}+)|(<.*?>+)/g; +goog.i18n.bidi.guardBracketInText=function(a,b){b=(void 0===b?goog.i18n.bidi.hasAnyRtl(a):b)?goog.i18n.bidi.Format.RLM:goog.i18n.bidi.Format.LRM;return a.replace(goog.i18n.bidi.bracketGuardTextRe_,b+"$\x26"+b)};goog.i18n.bidi.enforceRtlInHtml=function(a){return"\x3c"==a.charAt(0)?a.replace(/<\w+/,"$\x26 dir\x3drtl"):"\n\x3cspan dir\x3drtl\x3e"+a+"\x3c/span\x3e"};goog.i18n.bidi.enforceRtlInText=function(a){return goog.i18n.bidi.Format.RLE+a+goog.i18n.bidi.Format.PDF}; +goog.i18n.bidi.enforceLtrInHtml=function(a){return"\x3c"==a.charAt(0)?a.replace(/<\w+/,"$\x26 dir\x3dltr"):"\n\x3cspan dir\x3dltr\x3e"+a+"\x3c/span\x3e"};goog.i18n.bidi.enforceLtrInText=function(a){return goog.i18n.bidi.Format.LRE+a+goog.i18n.bidi.Format.PDF};goog.i18n.bidi.dimensionsRe_=/:\s*([.\d][.\w]*)\s+([.\d][.\w]*)\s+([.\d][.\w]*)\s+([.\d][.\w]*)/g;goog.i18n.bidi.leftRe_=/left/gi;goog.i18n.bidi.rightRe_=/right/gi;goog.i18n.bidi.tempRe_=/%%%%/g; +goog.i18n.bidi.mirrorCSS=function(a){return a.replace(goog.i18n.bidi.dimensionsRe_,":$1 $4 $3 $2").replace(goog.i18n.bidi.leftRe_,"%%%%").replace(goog.i18n.bidi.rightRe_,goog.i18n.bidi.LEFT).replace(goog.i18n.bidi.tempRe_,goog.i18n.bidi.RIGHT)};goog.i18n.bidi.doubleQuoteSubstituteRe_=/([\u0591-\u05f2])"/g;goog.i18n.bidi.singleQuoteSubstituteRe_=/([\u0591-\u05f2])'/g; +goog.i18n.bidi.normalizeHebrewQuote=function(a){return a.replace(goog.i18n.bidi.doubleQuoteSubstituteRe_,"$1״").replace(goog.i18n.bidi.singleQuoteSubstituteRe_,"$1׳")};goog.i18n.bidi.wordSeparatorRe_=/\s+/;goog.i18n.bidi.hasNumeralsRe_=/[\d\u06f0-\u06f9]/;goog.i18n.bidi.rtlDetectionThreshold_=.4; +goog.i18n.bidi.estimateDirection=function(a,b){let c=0,d=0,e=!1;a=goog.i18n.bidi.stripHtmlIfNeeded_(a,b).split(goog.i18n.bidi.wordSeparatorRe_);for(b=0;bgoog.i18n.bidi.rtlDetectionThreshold_?goog.i18n.bidi.Dir.RTL:goog.i18n.bidi.Dir.LTR}; +goog.i18n.bidi.detectRtlDirectionality=function(a,b){return goog.i18n.bidi.estimateDirection(a,b)==goog.i18n.bidi.Dir.RTL};goog.i18n.bidi.setElementDirAndAlign=function(a,b){a&&(b=goog.i18n.bidi.toDir(b))&&(a.style.textAlign=b==goog.i18n.bidi.Dir.RTL?goog.i18n.bidi.RIGHT:goog.i18n.bidi.LEFT,a.dir=b==goog.i18n.bidi.Dir.RTL?"rtl":"ltr")}; +goog.i18n.bidi.setElementDirByTextDirectionality=function(a,b){switch(goog.i18n.bidi.estimateDirection(b)){case goog.i18n.bidi.Dir.LTR:a.dir="ltr";break;case goog.i18n.bidi.Dir.RTL:a.dir="rtl";break;default:a.removeAttribute("dir")}};goog.i18n.bidi.DirectionalString=function(){};goog.html.TrustedResourceUrl=function(a,b){this.privateDoNotAccessOrElseTrustedResourceUrlWrappedValue_=a===goog.html.TrustedResourceUrl.CONSTRUCTOR_TOKEN_PRIVATE_&&b||"";this.TRUSTED_RESOURCE_URL_TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_=goog.html.TrustedResourceUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_};goog.html.TrustedResourceUrl.prototype.implementsGoogStringTypedString=!0;goog.html.TrustedResourceUrl.prototype.getTypedStringValue=function(){return this.privateDoNotAccessOrElseTrustedResourceUrlWrappedValue_.toString()}; +goog.html.TrustedResourceUrl.prototype.implementsGoogI18nBidiDirectionalString=!0;goog.html.TrustedResourceUrl.prototype.getDirection=function(){return goog.i18n.bidi.Dir.LTR}; +goog.html.TrustedResourceUrl.prototype.cloneWithParams=function(a,b){var c=goog.html.TrustedResourceUrl.unwrap(this);c=goog.html.TrustedResourceUrl.URL_PARAM_PARSER_.exec(c);var d=c[3]||"";return goog.html.TrustedResourceUrl.createTrustedResourceUrlSecurityPrivateDoNotAccessOrElse(c[1]+goog.html.TrustedResourceUrl.stringifyParams_("?",c[2]||"",a)+goog.html.TrustedResourceUrl.stringifyParams_("#",d,b))}; +goog.DEBUG&&(goog.html.TrustedResourceUrl.prototype.toString=function(){return"TrustedResourceUrl{"+this.privateDoNotAccessOrElseTrustedResourceUrlWrappedValue_+"}"});goog.html.TrustedResourceUrl.unwrap=function(a){return goog.html.TrustedResourceUrl.unwrapTrustedScriptURL(a).toString()}; +goog.html.TrustedResourceUrl.unwrapTrustedScriptURL=function(a){if(a instanceof goog.html.TrustedResourceUrl&&a.constructor===goog.html.TrustedResourceUrl&&a.TRUSTED_RESOURCE_URL_TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_===goog.html.TrustedResourceUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_)return a.privateDoNotAccessOrElseTrustedResourceUrlWrappedValue_;goog.asserts.fail("expected object of type TrustedResourceUrl, got '"+a+"' of type "+goog.typeOf(a));return"type_error:TrustedResourceUrl"}; +goog.html.TrustedResourceUrl.format=function(a,b){var c=goog.string.Const.unwrap(a);if(!goog.html.TrustedResourceUrl.BASE_URL_.test(c))throw Error("Invalid TrustedResourceUrl format: "+c);a=c.replace(goog.html.TrustedResourceUrl.FORMAT_MARKER_,function(a,e){if(!Object.prototype.hasOwnProperty.call(b,e))throw Error('Found marker, "'+e+'", in format string, "'+c+'", but no valid label mapping found in args: '+JSON.stringify(b));a=b[e];return a instanceof goog.string.Const?goog.string.Const.unwrap(a): +encodeURIComponent(String(a))});return goog.html.TrustedResourceUrl.createTrustedResourceUrlSecurityPrivateDoNotAccessOrElse(a)};goog.html.TrustedResourceUrl.FORMAT_MARKER_=/%{(\w+)}/g;goog.html.TrustedResourceUrl.BASE_URL_=/^((https:)?\/\/[0-9a-z.:[\]-]+\/|\/[^/\\]|[^:/\\%]+\/|[^:/\\%]*[?#]|about:blank#)/i;goog.html.TrustedResourceUrl.URL_PARAM_PARSER_=/^([^?#]*)(\?[^#]*)?(#[\s\S]*)?/; +goog.html.TrustedResourceUrl.formatWithParams=function(a,b,c,d){return goog.html.TrustedResourceUrl.format(a,b).cloneWithParams(c,d)};goog.html.TrustedResourceUrl.fromConstant=function(a){return goog.html.TrustedResourceUrl.createTrustedResourceUrlSecurityPrivateDoNotAccessOrElse(goog.string.Const.unwrap(a))};goog.html.TrustedResourceUrl.fromConstants=function(a){for(var b="",c=0;ca.length?"\x26":"")+encodeURIComponent(d)+"\x3d"+encodeURIComponent(String(g)))}}return b};goog.html.TrustedResourceUrl.CONSTRUCTOR_TOKEN_PRIVATE_={};goog.string.internal={};goog.string.internal.startsWith=function(a,b){return 0==a.lastIndexOf(b,0)};goog.string.internal.endsWith=function(a,b){const c=a.length-b.length;return 0<=c&&a.indexOf(b,c)==c};goog.string.internal.caseInsensitiveStartsWith=function(a,b){return 0==goog.string.internal.caseInsensitiveCompare(b,a.substr(0,b.length))};goog.string.internal.caseInsensitiveEndsWith=function(a,b){return 0==goog.string.internal.caseInsensitiveCompare(b,a.substr(a.length-b.length,b.length))}; +goog.string.internal.caseInsensitiveEquals=function(a,b){return a.toLowerCase()==b.toLowerCase()};goog.string.internal.isEmptyOrWhitespace=function(a){return/^[\s\xa0]*$/.test(a)};goog.string.internal.trim=goog.TRUSTED_SITE&&String.prototype.trim?function(a){return a.trim()}:function(a){return/^[\s\xa0]*([\s\S]*?)[\s\xa0]*$/.exec(a)[1]};goog.string.internal.caseInsensitiveCompare=function(a,b){a=String(a).toLowerCase();b=String(b).toLowerCase();return a/g; +goog.string.internal.QUOT_RE_=/"/g;goog.string.internal.SINGLE_QUOTE_RE_=/'/g;goog.string.internal.NULL_RE_=/\x00/g;goog.string.internal.ALL_RE_=/[\x00&<>"']/;goog.string.internal.whitespaceEscape=function(a,b){return goog.string.internal.newLineToBr(a.replace(/ /g," \x26#160;"),b)};goog.string.internal.contains=function(a,b){return-1!=a.indexOf(b)};goog.string.internal.caseInsensitiveContains=function(a,b){return goog.string.internal.contains(a.toLowerCase(),b.toLowerCase())}; +goog.string.internal.compareVersions=function(a,b){var c=0;a=goog.string.internal.trim(String(a)).split(".");b=goog.string.internal.trim(String(b)).split(".");const d=Math.max(a.length,b.length);for(let g=0;0==c&&gb?1:0};goog.html.SafeUrl=function(a,b){this.privateDoNotAccessOrElseSafeUrlWrappedValue_=a===goog.html.SafeUrl.CONSTRUCTOR_TOKEN_PRIVATE_&&b||"";this.SAFE_URL_TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_=goog.html.SafeUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_};goog.html.SafeUrl.INNOCUOUS_STRING="about:invalid#zClosurez";goog.html.SafeUrl.prototype.implementsGoogStringTypedString=!0;goog.html.SafeUrl.prototype.getTypedStringValue=function(){return this.privateDoNotAccessOrElseSafeUrlWrappedValue_.toString()}; +goog.html.SafeUrl.prototype.implementsGoogI18nBidiDirectionalString=!0;goog.html.SafeUrl.prototype.getDirection=function(){return goog.i18n.bidi.Dir.LTR};goog.DEBUG&&(goog.html.SafeUrl.prototype.toString=function(){return"SafeUrl{"+this.privateDoNotAccessOrElseSafeUrlWrappedValue_+"}"}); +goog.html.SafeUrl.unwrap=function(a){if(a instanceof goog.html.SafeUrl&&a.constructor===goog.html.SafeUrl&&a.SAFE_URL_TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_===goog.html.SafeUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_)return a.privateDoNotAccessOrElseSafeUrlWrappedValue_;goog.asserts.fail("expected object of type SafeUrl, got '"+a+"' of type "+goog.typeOf(a));return"type_error:SafeUrl"};goog.html.SafeUrl.fromConstant=function(a){return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(goog.string.Const.unwrap(a))}; +goog.html.SAFE_MIME_TYPE_PATTERN_=/^(?:audio\/(?:3gpp2|3gpp|aac|L16|midi|mp3|mp4|mpeg|oga|ogg|opus|x-m4a|x-wav|wav|webm)|image\/(?:bmp|gif|jpeg|jpg|png|tiff|webp|x-icon)|text\/csv|video\/(?:mpeg|mp4|ogg|webm|quicktime))(?:;\w+=(?:\w+|"[\w;=]+"))*$/i;goog.html.SafeUrl.isSafeMimeType=function(a){return goog.html.SAFE_MIME_TYPE_PATTERN_.test(a)};goog.html.SafeUrl.fromBlob=function(a){a=goog.html.SAFE_MIME_TYPE_PATTERN_.test(a.type)?goog.fs.url.createObjectUrl(a):goog.html.SafeUrl.INNOCUOUS_STRING;return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(a)}; +goog.html.DATA_URL_PATTERN_=/^data:([^,]*);base64,[a-z0-9+\/]+=*$/i;goog.html.SafeUrl.fromDataUrl=function(a){a=a.replace(/(%0A|%0D)/g,"");var b=a.match(goog.html.DATA_URL_PATTERN_);b=b&&goog.html.SAFE_MIME_TYPE_PATTERN_.test(b[1]);return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(b?a:goog.html.SafeUrl.INNOCUOUS_STRING)};goog.html.SafeUrl.fromTelUrl=function(a){goog.string.internal.caseInsensitiveStartsWith(a,"tel:")||(a=goog.html.SafeUrl.INNOCUOUS_STRING);return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(a)}; +goog.html.SIP_URL_PATTERN_=/^sip[s]?:[+a-z0-9_.!$%&'*\/=^`{|}~-]+@([a-z0-9-]+\.)+[a-z0-9]{2,63}$/i;goog.html.SafeUrl.fromSipUrl=function(a){goog.html.SIP_URL_PATTERN_.test(decodeURIComponent(a))||(a=goog.html.SafeUrl.INNOCUOUS_STRING);return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(a)};goog.html.SafeUrl.fromFacebookMessengerUrl=function(a){goog.string.internal.caseInsensitiveStartsWith(a,"fb-messenger://share")||(a=goog.html.SafeUrl.INNOCUOUS_STRING);return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(a)}; +goog.html.SafeUrl.fromWhatsAppUrl=function(a){goog.string.internal.caseInsensitiveStartsWith(a,"whatsapp://send")||(a=goog.html.SafeUrl.INNOCUOUS_STRING);return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(a)};goog.html.SafeUrl.fromSmsUrl=function(a){goog.string.internal.caseInsensitiveStartsWith(a,"sms:")&&goog.html.SafeUrl.isSmsUrlBodyValid_(a)||(a=goog.html.SafeUrl.INNOCUOUS_STRING);return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(a)}; +goog.html.SafeUrl.isSmsUrlBodyValid_=function(a){var b=a.indexOf("#");0+~[\]()=^$|]+$/.test(c))throw Error("Selector allows only [-_a-zA-Z0-9#.:* ,\x3e+~[\\]()\x3d^$|] and strings, got: "+a);if(!goog.html.SafeStyleSheet.hasBalancedBrackets_(c))throw Error("() and [] in selector must be balanced, got: "+a);b instanceof goog.html.SafeStyle|| +(b=goog.html.SafeStyle.create(b));a=a+"{"+goog.html.SafeStyle.unwrap(b).replace(/=a||"€"<=a&&"�">=a}; +goog.string.stripNewlines=function(a){return a.replace(/(\r\n|\r|\n)+/g," ")};goog.string.canonicalizeNewlines=function(a){return a.replace(/(\r\n|\r|\n)/g,"\n")};goog.string.normalizeWhitespace=function(a){return a.replace(/\xa0|\s/g," ")};goog.string.normalizeSpaces=function(a){return a.replace(/\xa0|[ \t]+/g," ")};goog.string.collapseBreakingSpaces=function(a){return a.replace(/[\t\r\n ]+/g," ").replace(/^[\t\r\n ]+|[\t\r\n ]+$/g,"")};goog.string.trim=goog.string.internal.trim; +goog.string.trimLeft=function(a){return a.replace(/^[\s\xa0]+/,"")};goog.string.trimRight=function(a){return a.replace(/[\s\xa0]+$/,"")};goog.string.caseInsensitiveCompare=goog.string.internal.caseInsensitiveCompare; +goog.string.numberAwareCompare_=function(a,b,c){if(a==b)return 0;if(!a)return-1;if(!b)return 1;for(var d=a.toLowerCase().match(c),e=b.toLowerCase().match(c),f=Math.min(d.length,e.length),g=0;gb&&(a=a.substring(0,b-3)+"...");c&&(a=goog.string.htmlEscape(a));return a};goog.string.truncateMiddle=function(a,b,c,d){c&&(a=goog.string.unescapeEntities(a));if(d&&a.length>b){d>b&&(d=b);var e=a.length-d;a=a.substring(0,b-d)+"..."+a.substring(e)}else a.length>b&&(d=Math.floor(b/2),e=a.length-d,a=a.substring(0,d+b%2)+"..."+a.substring(e));c&&(a=goog.string.htmlEscape(a));return a}; +goog.string.specialEscapeChars_={"\x00":"\\0","\b":"\\b","\f":"\\f","\n":"\\n","\r":"\\r","\t":"\\t","\x0B":"\\x0B",'"':'\\"',"\\":"\\\\","\x3c":"\\u003C"};goog.string.jsEscapeCache_={"'":"\\'"};goog.string.quote=function(a){a=String(a);for(var b=['"'],c=0;ce?d:goog.string.escapeChar(d))}b.push('"');return b.join("")}; +goog.string.escapeString=function(a){for(var b=[],c=0;cb)var c=a;else{if(256>b){if(c="\\x",16>b||256b&&(c+="0");c+=b.toString(16).toUpperCase()}return goog.string.jsEscapeCache_[a]=c};goog.string.contains=goog.string.internal.contains;goog.string.caseInsensitiveContains=goog.string.internal.caseInsensitiveContains; +goog.string.countOf=function(a,b){return a&&b?a.split(b).length-1:0};goog.string.removeAt=function(a,b,c){var d=a;0<=b&&b>>0;return b};goog.string.uniqueStringCounter_=2147483648*Math.random()|0; +goog.string.createUniqueString=function(){return"goog_"+goog.string.uniqueStringCounter_++};goog.string.toNumber=function(a){var b=Number(a);return 0==b&&goog.string.isEmptyOrWhitespace(a)?NaN:b};goog.string.isLowerCamelCase=function(a){return/^[a-z]+([A-Z][a-z]*)*$/.test(a)};goog.string.isUpperCamelCase=function(a){return/^([A-Z][a-z]*)+$/.test(a)};goog.string.toCamelCase=function(a){return String(a).replace(/\-([a-z])/g,function(a,c){return c.toUpperCase()})}; +goog.string.toSelectorCase=function(a){return String(a).replace(/([A-Z])/g,"-$1").toLowerCase()};goog.string.toTitleCase=function(a,b){b="string"===typeof b?goog.string.regExpEscape(b):"\\s";return a.replace(new RegExp("(^"+(b?"|["+b+"]+":"")+")([a-z])","g"),function(a,b,e){return b+e.toUpperCase()})};goog.string.capitalize=function(a){return String(a.charAt(0)).toUpperCase()+String(a.substr(1)).toLowerCase()}; +goog.string.parseInt=function(a){isFinite(a)&&(a=String(a));return"string"===typeof a?/^\s*-?0x/i.test(a)?parseInt(a,16):parseInt(a,10):NaN};goog.string.splitLimit=function(a,b,c){a=a.split(b);for(var d=[];0c&&(c=e)}return-1==c?a:a.slice(c+1)}; +goog.string.editDistance=function(a,b){var c=[],d=[];if(a==b)return 0;if(!a.length||!b.length)return Math.max(a.length,b.length);for(var e=0;ea*b?a+b:a};goog.math.lerp=function(a,b,c){return a+c*(b-a)};goog.math.nearlyEquals=function(a,b,c){return Math.abs(a-b)<=(c||1E-6)};goog.math.standardAngle=function(a){return goog.math.modulo(a,360)}; +goog.math.standardAngleInRadians=function(a){return goog.math.modulo(a,2*Math.PI)};goog.math.toRadians=function(a){return a*Math.PI/180};goog.math.toDegrees=function(a){return 180*a/Math.PI};goog.math.angleDx=function(a,b){return b*Math.cos(goog.math.toRadians(a))};goog.math.angleDy=function(a,b){return b*Math.sin(goog.math.toRadians(a))};goog.math.angle=function(a,b,c,d){return goog.math.standardAngle(goog.math.toDegrees(Math.atan2(d-b,c-a)))}; +goog.math.angleDifference=function(a,b){a=goog.math.standardAngle(b)-goog.math.standardAngle(a);180=a&&(a=360+a);return a};goog.math.sign=function(a){return 0a?-1:a}; +goog.math.longestCommonSubsequence=function(a,b,c,d){c=c||function(a,b){return a==b};d=d||function(b,c){return a[b]};for(var e=a.length,f=b.length,g=[],h=0;hg[h][k-1]?h--:k--;return l}; +goog.math.sum=function(a){return goog.array.reduce(arguments,function(a,c){return a+c},0)};goog.math.average=function(a){return goog.math.sum.apply(null,arguments)/arguments.length};goog.math.sampleVariance=function(a){var b=arguments.length;if(2>b)return 0;var c=goog.math.average.apply(null,arguments);return goog.math.sum.apply(null,goog.array.map(arguments,function(a){return Math.pow(a-c,2)}))/(b-1)};goog.math.standardDeviation=function(a){return Math.sqrt(goog.math.sampleVariance.apply(null,arguments))}; +goog.math.isInt=function(a){return isFinite(a)&&0==a%1};goog.math.isFiniteNumber=function(a){return isFinite(a)};goog.math.isNegativeZero=function(a){return 0==a&&0>1/a};goog.math.log10Floor=function(a){if(0a?1:0)}return 0==a?-Infinity:NaN};goog.math.safeFloor=function(a,b){goog.asserts.assert(void 0===b||0=a.length)throw goog.iter.StopIteration;if(b in a)return a[b++];b++}};return c}throw Error("Not implemented");}; +goog.iter.forEach=function(a,b,c){if(goog.isArrayLike(a))try{goog.array.forEach(a,b,c)}catch(d){if(d!==goog.iter.StopIteration)throw d;}else{a=goog.iter.toIterator(a);try{for(;;)b.call(c,a.next(),void 0,a)}catch(d){if(d!==goog.iter.StopIteration)throw d;}}};goog.iter.filter=function(a,b,c){var d=goog.iter.toIterator(a);a=new goog.iter.Iterator;a.next=function(){for(;;){var a=d.next();if(b.call(c,a,void 0,d))return a}};return a}; +goog.iter.filterFalse=function(a,b,c){return goog.iter.filter(a,goog.functions.not(b),c)};goog.iter.range=function(a,b,c){var d=0,e=a,f=c||1;1=e||0>f&&d<=e)throw goog.iter.StopIteration;var a=d;d+=f;return a};return g};goog.iter.join=function(a,b){return goog.iter.toArray(a).join(b)}; +goog.iter.map=function(a,b,c){var d=goog.iter.toIterator(a);a=new goog.iter.Iterator;a.next=function(){var a=d.next();return b.call(c,a,void 0,d)};return a};goog.iter.reduce=function(a,b,c,d){var e=c;goog.iter.forEach(a,function(a){e=b.call(d,e,a)});return e};goog.iter.some=function(a,b,c){a=goog.iter.toIterator(a);try{for(;;)if(b.call(c,a.next(),void 0,a))return!0}catch(d){if(d!==goog.iter.StopIteration)throw d;}return!1}; +goog.iter.every=function(a,b,c){a=goog.iter.toIterator(a);try{for(;;)if(!b.call(c,a.next(),void 0,a))return!1}catch(d){if(d!==goog.iter.StopIteration)throw d;}return!0};goog.iter.chain=function(a){return goog.iter.chainFromIterable(arguments)}; +goog.iter.chainFromIterable=function(a){var b=goog.iter.toIterator(a);a=new goog.iter.Iterator;var c=null;a.next=function(){for(;;){if(null==c){var a=b.next();c=goog.iter.toIterator(a)}try{return c.next()}catch(e){if(e!==goog.iter.StopIteration)throw e;c=null}}};return a};goog.iter.dropWhile=function(a,b,c){var d=goog.iter.toIterator(a);a=new goog.iter.Iterator;var e=!0;a.next=function(){for(;;){var a=d.next();if(!e||!b.call(c,a,void 0,d))return e=!1,a}};return a}; +goog.iter.takeWhile=function(a,b,c){var d=goog.iter.toIterator(a);a=new goog.iter.Iterator;a.next=function(){var a=d.next();if(b.call(c,a,void 0,d))return a;throw goog.iter.StopIteration;};return a};goog.iter.toArray=function(a){if(goog.isArrayLike(a))return goog.array.toArray(a);a=goog.iter.toIterator(a);var b=[];goog.iter.forEach(a,function(a){b.push(a)});return b}; +goog.iter.equals=function(a,b,c){a=goog.iter.zipLongest({},a,b);var d=c||goog.array.defaultCompareEquality;return goog.iter.every(a,function(a){return d(a[0],a[1])})};goog.iter.nextOrValue=function(a,b){try{return goog.iter.toIterator(a).next()}catch(c){if(c!=goog.iter.StopIteration)throw c;return b}}; +goog.iter.product=function(a){if(goog.array.some(arguments,function(a){return!a.length})||!arguments.length)return new goog.iter.Iterator;var b=new goog.iter.Iterator,c=arguments,d=goog.array.repeat(0,c.length);b.next=function(){if(d){for(var a=goog.array.map(d,function(a,b){return c[b][a]}),b=d.length-1;0<=b;b--){goog.asserts.assert(d);if(d[b]=b),a=goog.iter.limit(a,c-b));return a};goog.iter.hasDuplicates_=function(a){var b=[];goog.array.removeDuplicates(a,b);return a.length!=b.length};goog.iter.permutations=function(a,b){a=goog.iter.toArray(a);b=goog.array.repeat(a,"number"===typeof b?b:a.length);b=goog.iter.product.apply(void 0,b);return goog.iter.filter(b,function(a){return!goog.iter.hasDuplicates_(a)})}; +goog.iter.combinations=function(a,b){function c(a){return d[a]}var d=goog.iter.toArray(a);a=goog.iter.range(d.length);b=goog.iter.permutations(a,b);var e=goog.iter.filter(b,function(a){return goog.array.isSorted(a)});b=new goog.iter.Iterator;b.next=function(){return goog.array.map(e.next(),c)};return b}; +goog.iter.combinationsWithReplacement=function(a,b){function c(a){return d[a]}var d=goog.iter.toArray(a);a=goog.array.range(d.length);b=goog.array.repeat(a,b);b=goog.iter.product.apply(void 0,b);var e=goog.iter.filter(b,function(a){return goog.array.isSorted(a)});b=new goog.iter.Iterator;b.next=function(){return goog.array.map(e.next(),c)};return b};goog.structs.Map=function(a,b){this.map_={};this.keys_=[];this.version_=this.count_=0;var c=arguments.length;if(12*this.count_&&this.cleanupKeysArray_(),!0):!1}; +goog.structs.Map.prototype.cleanupKeysArray_=function(){if(this.count_!=this.keys_.length){for(var a=0,b=0;a=d.keys_.length)throw goog.iter.StopIteration;var e=d.keys_[b++];return a?e:d.map_[e]};return e};goog.structs.Map.hasKey_=function(a,b){return Object.prototype.hasOwnProperty.call(a,b)};goog.uri={};goog.uri.utils={};goog.uri.utils.CharCode_={AMPERSAND:38,EQUAL:61,HASH:35,QUESTION:63};goog.uri.utils.buildFromEncodedParts=function(a,b,c,d,e,f,g){var h="";a&&(h+=a+":");c&&(h+="//",b&&(h+=b+"@"),h+=c,d&&(h+=":"+d));e&&(h+=e);f&&(h+="?"+f);g&&(h+="#"+g);return h};goog.uri.utils.splitRe_=/^(?:([^:/?#.]+):)?(?:\/\/(?:([^/?#]*)@)?([^/#?]*?)(?::([0-9]+))?(?=[/#?]|$))?([^?#]+)?(?:\?([^#]*))?(?:#([\s\S]*))?$/; +goog.uri.utils.ComponentIndex={SCHEME:1,USER_INFO:2,DOMAIN:3,PORT:4,PATH:5,QUERY_DATA:6,FRAGMENT:7};goog.uri.utils.split=function(a){return a.match(goog.uri.utils.splitRe_)};goog.uri.utils.decodeIfPossible_=function(a,b){return a?b?decodeURI(a):decodeURIComponent(a):a};goog.uri.utils.getComponentByIndex_=function(a,b){return goog.uri.utils.split(b)[a]||null};goog.uri.utils.getScheme=function(a){return goog.uri.utils.getComponentByIndex_(goog.uri.utils.ComponentIndex.SCHEME,a)}; +goog.uri.utils.getEffectiveScheme=function(a){a=goog.uri.utils.getScheme(a);!a&&goog.global.self&&goog.global.self.location&&(a=goog.global.self.location.protocol,a=a.substr(0,a.length-1));return a?a.toLowerCase():""};goog.uri.utils.getUserInfoEncoded=function(a){return goog.uri.utils.getComponentByIndex_(goog.uri.utils.ComponentIndex.USER_INFO,a)};goog.uri.utils.getUserInfo=function(a){return goog.uri.utils.decodeIfPossible_(goog.uri.utils.getUserInfoEncoded(a))}; +goog.uri.utils.getDomainEncoded=function(a){return goog.uri.utils.getComponentByIndex_(goog.uri.utils.ComponentIndex.DOMAIN,a)};goog.uri.utils.getDomain=function(a){return goog.uri.utils.decodeIfPossible_(goog.uri.utils.getDomainEncoded(a),!0)};goog.uri.utils.getPort=function(a){return Number(goog.uri.utils.getComponentByIndex_(goog.uri.utils.ComponentIndex.PORT,a))||null};goog.uri.utils.getPathEncoded=function(a){return goog.uri.utils.getComponentByIndex_(goog.uri.utils.ComponentIndex.PATH,a)}; +goog.uri.utils.getPath=function(a){return goog.uri.utils.decodeIfPossible_(goog.uri.utils.getPathEncoded(a),!0)};goog.uri.utils.getQueryData=function(a){return goog.uri.utils.getComponentByIndex_(goog.uri.utils.ComponentIndex.QUERY_DATA,a)};goog.uri.utils.getFragmentEncoded=function(a){var b=a.indexOf("#");return 0>b?null:a.substr(b+1)};goog.uri.utils.setFragmentEncoded=function(a,b){return goog.uri.utils.removeFragment(a)+(b?"#"+b:"")};goog.uri.utils.getFragment=function(a){return goog.uri.utils.decodeIfPossible_(goog.uri.utils.getFragmentEncoded(a))}; +goog.uri.utils.getHost=function(a){a=goog.uri.utils.split(a);return goog.uri.utils.buildFromEncodedParts(a[goog.uri.utils.ComponentIndex.SCHEME],a[goog.uri.utils.ComponentIndex.USER_INFO],a[goog.uri.utils.ComponentIndex.DOMAIN],a[goog.uri.utils.ComponentIndex.PORT])};goog.uri.utils.getOrigin=function(a){a=goog.uri.utils.split(a);return goog.uri.utils.buildFromEncodedParts(a[goog.uri.utils.ComponentIndex.SCHEME],null,a[goog.uri.utils.ComponentIndex.DOMAIN],a[goog.uri.utils.ComponentIndex.PORT])}; +goog.uri.utils.getPathAndAfter=function(a){a=goog.uri.utils.split(a);return goog.uri.utils.buildFromEncodedParts(null,null,null,null,a[goog.uri.utils.ComponentIndex.PATH],a[goog.uri.utils.ComponentIndex.QUERY_DATA],a[goog.uri.utils.ComponentIndex.FRAGMENT])};goog.uri.utils.removeFragment=function(a){var b=a.indexOf("#");return 0>b?a:a.substr(0,b)}; +goog.uri.utils.haveSameDomain=function(a,b){a=goog.uri.utils.split(a);b=goog.uri.utils.split(b);return a[goog.uri.utils.ComponentIndex.DOMAIN]==b[goog.uri.utils.ComponentIndex.DOMAIN]&&a[goog.uri.utils.ComponentIndex.SCHEME]==b[goog.uri.utils.ComponentIndex.SCHEME]&&a[goog.uri.utils.ComponentIndex.PORT]==b[goog.uri.utils.ComponentIndex.PORT]}; +goog.uri.utils.assertNoFragmentsOrQueries_=function(a){goog.asserts.assert(0>a.indexOf("#")&&0>a.indexOf("?"),"goog.uri.utils: Fragment or query identifiers are not supported: [%s]",a)};goog.uri.utils.parseQueryData=function(a,b){if(a){a=a.split("\x26");for(var c=0;cb&&(b=a.length);var c=a.indexOf("?");if(0>c||c>b){c=b;var d=""}else d=a.substring(c+1,b);return[a.substr(0,c),d,a.substr(b)]};goog.uri.utils.joinQueryData_=function(a){return a[0]+(a[1]?"?"+a[1]:"")+a[2]};goog.uri.utils.appendQueryData_=function(a,b){return b?a?a+"\x26"+b:b:a};goog.uri.utils.appendQueryDataToUri_=function(a,b){if(!b)return a;a=goog.uri.utils.splitQueryData_(a);a[1]=goog.uri.utils.appendQueryData_(a[1],b);return goog.uri.utils.joinQueryData_(a)}; +goog.uri.utils.appendKeyValuePairs_=function(a,b,c){goog.asserts.assertString(a);if(goog.isArray(b)){goog.asserts.assertArray(b);for(var d=0;dd)return null;var e=a.indexOf("\x26",d);if(0>e||e>c)e=c;d+=b.length+1;return goog.string.urlDecode(a.substr(d,e-d))};goog.uri.utils.getParamValues=function(a,b){for(var c=a.search(goog.uri.utils.hashOrEndRe_),d=0,e,f=[];0<=(e=goog.uri.utils.findParam_(a,d,b,c));){d=a.indexOf("\x26",e);if(0>d||d>c)d=c;e+=b.length+1;f.push(goog.string.urlDecode(a.substr(e,d-e)))}return f}; +goog.uri.utils.trailingQueryPunctuationRe_=/[?&]($|#)/;goog.uri.utils.removeParam=function(a,b){for(var c=a.search(goog.uri.utils.hashOrEndRe_),d=0,e,f=[];0<=(e=goog.uri.utils.findParam_(a,d,b,c));)f.push(a.substring(d,e)),d=Math.min(a.indexOf("\x26",e)+1||c,c);f.push(a.substr(d));return f.join("").replace(goog.uri.utils.trailingQueryPunctuationRe_,"$1")};goog.uri.utils.setParam=function(a,b,c){return goog.uri.utils.appendParam(goog.uri.utils.removeParam(a,b),b,c)}; +goog.uri.utils.setParamsFromMap=function(a,b){a=goog.uri.utils.splitQueryData_(a);var c=a[1],d=[];c&&goog.array.forEach(c.split("\x26"),function(a){var c=a.indexOf("\x3d");c=0<=c?a.substr(0,c):a;b.hasOwnProperty(c)||d.push(a)});a[1]=goog.uri.utils.appendQueryData_(d.join("\x26"),goog.uri.utils.buildQueryDataFromMap(b));return goog.uri.utils.joinQueryData_(a)}; +goog.uri.utils.appendPath=function(a,b){goog.uri.utils.assertNoFragmentsOrQueries_(a);goog.string.endsWith(a,"/")&&(a=a.substr(0,a.length-1));goog.string.startsWith(b,"/")&&(b=b.substr(1));return goog.string.buildString(a,"/",b)}; +goog.uri.utils.setPath=function(a,b){goog.string.startsWith(b,"/")||(b="/"+b);a=goog.uri.utils.split(a);return goog.uri.utils.buildFromEncodedParts(a[goog.uri.utils.ComponentIndex.SCHEME],a[goog.uri.utils.ComponentIndex.USER_INFO],a[goog.uri.utils.ComponentIndex.DOMAIN],a[goog.uri.utils.ComponentIndex.PORT],b,a[goog.uri.utils.ComponentIndex.QUERY_DATA],a[goog.uri.utils.ComponentIndex.FRAGMENT])};goog.uri.utils.StandardQueryParam={RANDOM:"zx"}; +goog.uri.utils.makeUnique=function(a){return goog.uri.utils.setParam(a,goog.uri.utils.StandardQueryParam.RANDOM,goog.string.getRandomString())};goog.Uri=function(a,b){this.domain_=this.userInfo_=this.scheme_="";this.port_=null;this.fragment_=this.path_="";this.ignoreCase_=this.isReadOnly_=!1;var c;a instanceof goog.Uri?(this.ignoreCase_=void 0!==b?b:a.getIgnoreCase(),this.setScheme(a.getScheme()),this.setUserInfo(a.getUserInfo()),this.setDomain(a.getDomain()),this.setPort(a.getPort()),this.setPath(a.getPath()),this.setQueryData(a.getQueryData().clone()),this.setFragment(a.getFragment())):a&&(c=goog.uri.utils.split(String(a)))?(this.ignoreCase_= +!!b,this.setScheme(c[goog.uri.utils.ComponentIndex.SCHEME]||"",!0),this.setUserInfo(c[goog.uri.utils.ComponentIndex.USER_INFO]||"",!0),this.setDomain(c[goog.uri.utils.ComponentIndex.DOMAIN]||"",!0),this.setPort(c[goog.uri.utils.ComponentIndex.PORT]),this.setPath(c[goog.uri.utils.ComponentIndex.PATH]||"",!0),this.setQueryData(c[goog.uri.utils.ComponentIndex.QUERY_DATA]||"",!0),this.setFragment(c[goog.uri.utils.ComponentIndex.FRAGMENT]||"",!0)):(this.ignoreCase_=!!b,this.queryData_=new goog.Uri.QueryData(null, +null,this.ignoreCase_))};goog.Uri.RANDOM_PARAM=goog.uri.utils.StandardQueryParam.RANDOM; +goog.Uri.prototype.toString=function(){var a=[],b=this.getScheme();b&&a.push(goog.Uri.encodeSpecialChars_(b,goog.Uri.reDisallowedInSchemeOrUserInfo_,!0),":");var c=this.getDomain();if(c||"file"==b)a.push("//"),(b=this.getUserInfo())&&a.push(goog.Uri.encodeSpecialChars_(b,goog.Uri.reDisallowedInSchemeOrUserInfo_,!0),"@"),a.push(goog.Uri.removeDoubleEncoding_(goog.string.urlEncode(c))),c=this.getPort(),null!=c&&a.push(":",String(c));if(c=this.getPath())this.hasDomain()&&"/"!=c.charAt(0)&&a.push("/"), +a.push(goog.Uri.encodeSpecialChars_(c,"/"==c.charAt(0)?goog.Uri.reDisallowedInAbsolutePath_:goog.Uri.reDisallowedInRelativePath_,!0));(c=this.getEncodedQuery())&&a.push("?",c);(c=this.getFragment())&&a.push("#",goog.Uri.encodeSpecialChars_(c,goog.Uri.reDisallowedInFragment_));return a.join("")}; +goog.Uri.prototype.resolve=function(a){var b=this.clone(),c=a.hasScheme();c?b.setScheme(a.getScheme()):c=a.hasUserInfo();c?b.setUserInfo(a.getUserInfo()):c=a.hasDomain();c?b.setDomain(a.getDomain()):c=a.hasPort();var d=a.getPath();if(c)b.setPort(a.getPort());else if(c=a.hasPath()){if("/"!=d.charAt(0))if(this.hasDomain()&&!this.hasPath())d="/"+d;else{var e=b.getPath().lastIndexOf("/");-1!=e&&(d=b.getPath().substr(0,e+1)+d)}d=goog.Uri.removeDotSegments(d)}c?b.setPath(d):c=a.hasQuery();c?b.setQueryData(a.getQueryData().clone()): +c=a.hasFragment();c&&b.setFragment(a.getFragment());return b};goog.Uri.prototype.clone=function(){return new goog.Uri(this)};goog.Uri.prototype.getScheme=function(){return this.scheme_};goog.Uri.prototype.setScheme=function(a,b){this.enforceReadOnly();if(this.scheme_=b?goog.Uri.decodeOrEmpty_(a,!0):a)this.scheme_=this.scheme_.replace(/:$/,"");return this};goog.Uri.prototype.hasScheme=function(){return!!this.scheme_};goog.Uri.prototype.getUserInfo=function(){return this.userInfo_}; +goog.Uri.prototype.setUserInfo=function(a,b){this.enforceReadOnly();this.userInfo_=b?goog.Uri.decodeOrEmpty_(a):a;return this};goog.Uri.prototype.hasUserInfo=function(){return!!this.userInfo_};goog.Uri.prototype.getDomain=function(){return this.domain_};goog.Uri.prototype.setDomain=function(a,b){this.enforceReadOnly();this.domain_=b?goog.Uri.decodeOrEmpty_(a,!0):a;return this};goog.Uri.prototype.hasDomain=function(){return!!this.domain_};goog.Uri.prototype.getPort=function(){return this.port_}; +goog.Uri.prototype.setPort=function(a){this.enforceReadOnly();if(a){a=Number(a);if(isNaN(a)||0>a)throw Error("Bad port number "+a);this.port_=a}else this.port_=null;return this};goog.Uri.prototype.hasPort=function(){return null!=this.port_};goog.Uri.prototype.getPath=function(){return this.path_};goog.Uri.prototype.setPath=function(a,b){this.enforceReadOnly();this.path_=b?goog.Uri.decodeOrEmpty_(a,!0):a;return this};goog.Uri.prototype.hasPath=function(){return!!this.path_}; +goog.Uri.prototype.hasQuery=function(){return""!==this.queryData_.toString()};goog.Uri.prototype.setQueryData=function(a,b){this.enforceReadOnly();a instanceof goog.Uri.QueryData?(this.queryData_=a,this.queryData_.setIgnoreCase(this.ignoreCase_)):(b||(a=goog.Uri.encodeSpecialChars_(a,goog.Uri.reDisallowedInQuery_)),this.queryData_=new goog.Uri.QueryData(a,null,this.ignoreCase_));return this};goog.Uri.prototype.setQuery=function(a,b){return this.setQueryData(a,b)}; +goog.Uri.prototype.getEncodedQuery=function(){return this.queryData_.toString()};goog.Uri.prototype.getDecodedQuery=function(){return this.queryData_.toDecodedString()};goog.Uri.prototype.getQueryData=function(){return this.queryData_};goog.Uri.prototype.getQuery=function(){return this.getEncodedQuery()};goog.Uri.prototype.setParameterValue=function(a,b){this.enforceReadOnly();this.queryData_.set(a,b);return this}; +goog.Uri.prototype.setParameterValues=function(a,b){this.enforceReadOnly();goog.isArray(b)||(b=[String(b)]);this.queryData_.setValues(a,b);return this};goog.Uri.prototype.getParameterValues=function(a){return this.queryData_.getValues(a)};goog.Uri.prototype.getParameterValue=function(a){return this.queryData_.get(a)};goog.Uri.prototype.getFragment=function(){return this.fragment_};goog.Uri.prototype.setFragment=function(a,b){this.enforceReadOnly();this.fragment_=b?goog.Uri.decodeOrEmpty_(a):a;return this}; +goog.Uri.prototype.hasFragment=function(){return!!this.fragment_};goog.Uri.prototype.hasSameDomainAs=function(a){return(!this.hasDomain()&&!a.hasDomain()||this.getDomain()==a.getDomain())&&(!this.hasPort()&&!a.hasPort()||this.getPort()==a.getPort())};goog.Uri.prototype.makeUnique=function(){this.enforceReadOnly();this.setParameterValue(goog.Uri.RANDOM_PARAM,goog.string.getRandomString());return this};goog.Uri.prototype.removeParameter=function(a){this.enforceReadOnly();this.queryData_.remove(a);return this}; +goog.Uri.prototype.setReadOnly=function(a){this.isReadOnly_=a;return this};goog.Uri.prototype.isReadOnly=function(){return this.isReadOnly_};goog.Uri.prototype.enforceReadOnly=function(){if(this.isReadOnly_)throw Error("Tried to modify a read-only Uri");};goog.Uri.prototype.setIgnoreCase=function(a){this.ignoreCase_=a;this.queryData_&&this.queryData_.setIgnoreCase(a);return this};goog.Uri.prototype.getIgnoreCase=function(){return this.ignoreCase_}; +goog.Uri.parse=function(a,b){return a instanceof goog.Uri?a.clone():new goog.Uri(a,b)};goog.Uri.create=function(a,b,c,d,e,f,g,h){h=new goog.Uri(null,h);a&&h.setScheme(a);b&&h.setUserInfo(b);c&&h.setDomain(c);d&&h.setPort(d);e&&h.setPath(e);f&&h.setQueryData(f);g&&h.setFragment(g);return h};goog.Uri.resolve=function(a,b){a instanceof goog.Uri||(a=goog.Uri.parse(a));b instanceof goog.Uri||(b=goog.Uri.parse(b));return a.resolve(b)}; +goog.Uri.removeDotSegments=function(a){if(".."==a||"."==a)return"";if(goog.string.contains(a,"./")||goog.string.contains(a,"/.")){var b=goog.string.startsWith(a,"/");a=a.split("/");for(var c=[],d=0;d>4&15).toString(16)+(a&15).toString(16)};goog.Uri.removeDoubleEncoding_=function(a){return a.replace(/%25([0-9a-fA-F]{2})/g,"%$1")};goog.Uri.reDisallowedInSchemeOrUserInfo_=/[#\/\?@]/g;goog.Uri.reDisallowedInRelativePath_=/[#\?:]/g; +goog.Uri.reDisallowedInAbsolutePath_=/[#\?]/g;goog.Uri.reDisallowedInQuery_=/[#\?@]/g;goog.Uri.reDisallowedInFragment_=/#/g;goog.Uri.haveSameDomain=function(a,b){a=goog.uri.utils.split(a);b=goog.uri.utils.split(b);return a[goog.uri.utils.ComponentIndex.DOMAIN]==b[goog.uri.utils.ComponentIndex.DOMAIN]&&a[goog.uri.utils.ComponentIndex.PORT]==b[goog.uri.utils.ComponentIndex.PORT]};goog.Uri.QueryData=function(a,b,c){this.count_=this.keyMap_=null;this.encodedQuery_=a||null;this.ignoreCase_=!!c}; +goog.Uri.QueryData.prototype.ensureKeyMapInitialized_=function(){if(!this.keyMap_&&(this.keyMap_=new goog.structs.Map,this.count_=0,this.encodedQuery_)){var a=this;goog.uri.utils.parseQueryData(this.encodedQuery_,function(b,c){a.add(goog.string.urlDecode(b),c)})}}; +goog.Uri.QueryData.createFromMap=function(a,b,c){b=goog.structs.getKeys(a);if("undefined"==typeof b)throw Error("Keys are undefined");c=new goog.Uri.QueryData(null,null,c);a=goog.structs.getValues(a);for(var d=0;da?goog.reflect.cache(goog.math.Integer.IntCache_,a,function(a){return new goog.math.Integer([a|0],0>a?-1:0)}):new goog.math.Integer([a|0],0>a?-1:0)}; +goog.math.Integer.fromNumber=function(a){if(isNaN(a)||!isFinite(a))return goog.math.Integer.ZERO;if(0>a)return goog.math.Integer.fromNumber(-a).negate();for(var b=[],c=1,d=0;a>=c;d++)b[d]=a/c|0,c*=goog.math.Integer.TWO_PWR_32_DBL_;return new goog.math.Integer(b,0)};goog.math.Integer.fromBits=function(a){return new goog.math.Integer(a,a[a.length-1]&-2147483648?-1:0)}; +goog.math.Integer.fromString=function(a,b){if(0==a.length)throw Error("number format error: empty string");b=b||10;if(2>b||36f?(f=goog.math.Integer.fromNumber(Math.pow(b, +f)),d=d.multiply(f).add(goog.math.Integer.fromNumber(g))):(d=d.multiply(c),d=d.add(goog.math.Integer.fromNumber(g)))}return d};goog.math.Integer.TWO_PWR_32_DBL_=4294967296;goog.math.Integer.ZERO=goog.math.Integer.fromInt(0);goog.math.Integer.ONE=goog.math.Integer.fromInt(1);goog.math.Integer.TWO_PWR_24_=goog.math.Integer.fromInt(16777216);goog.math.Integer.prototype.toInt=function(){return 0a||36>>0).toString(a);c=e;if(c.isZero())return f+d;for(;6>f.length;)f="0"+f;d=""+f+d}};goog.math.Integer.prototype.getBits=function(a){return 0>a?0:athis.compare(a)};goog.math.Integer.prototype.lessThanOrEqual=function(a){return 0>=this.compare(a)};goog.math.Integer.prototype.compare=function(a){a=this.subtract(a);return a.isNegative()?-1:a.isZero()?0:1}; +goog.math.Integer.prototype.shorten=function(a){var b=a-1>>5;a=(a-1)%32;for(var c=[],d=0;d>>16,g=this.getBits(e)&65535,h=a.getBits(e)>>>16,k=a.getBits(e)&65535;g=d+g+k;f=(g>>>16)+f+h;d=f>>>16;g&=65535;f&=65535;c[e]=f<<16|g}return goog.math.Integer.fromBits(c)};goog.math.Integer.prototype.subtract=function(a){return this.add(a.negate())}; +goog.math.Integer.prototype.multiply=function(a){if(this.isZero()||a.isZero())return goog.math.Integer.ZERO;if(this.isNegative())return a.isNegative()?this.negate().multiply(a.negate()):this.negate().multiply(a).negate();if(a.isNegative())return this.multiply(a.negate()).negate();if(this.lessThan(goog.math.Integer.TWO_PWR_24_)&&a.lessThan(goog.math.Integer.TWO_PWR_24_))return goog.math.Integer.fromNumber(this.toNumber()*a.toNumber());for(var b=this.bits_.length+a.bits_.length,c=[],d=0;d<2*b;d++)c[d]= +0;for(d=0;d>>16,g=this.getBits(d)&65535,h=a.getBits(e)>>>16,k=a.getBits(e)&65535;c[2*d+2*e]+=g*k;goog.math.Integer.carry16_(c,2*d+2*e);c[2*d+2*e+1]+=f*k;goog.math.Integer.carry16_(c,2*d+2*e+1);c[2*d+2*e+1]+=g*h;goog.math.Integer.carry16_(c,2*d+2*e+1);c[2*d+2*e+2]+=f*h;goog.math.Integer.carry16_(c,2*d+2*e+2)}for(d=0;d>>16,a[b]&=65535,b++}; +goog.math.Integer.prototype.slowDivide_=function(a){if(this.isNegative()||a.isNegative())throw Error("slowDivide_ only works with positive integers.");for(var b=goog.math.Integer.ONE,c=a;c.lessThanOrEqual(this);)b=b.shiftLeft(1),c=c.shiftLeft(1);var d=b.shiftRight(1),e=c.shiftRight(1);c=c.shiftRight(2);for(b=b.shiftRight(2);!c.isZero();){var f=e.add(c);f.lessThanOrEqual(this)&&(d=d.add(b),e=f);c=c.shiftRight(1);b=b.shiftRight(1)}a=this.subtract(d.multiply(a));return new goog.math.Integer.DivisionResult(d, +a)};goog.math.Integer.prototype.divide=function(a){return this.divideAndRemainder(a).quotient};goog.math.Integer.DivisionResult=function(a,b){this.quotient=a;this.remainder=b}; +goog.math.Integer.prototype.divideAndRemainder=function(a){if(a.isZero())throw Error("division by zero");if(this.isZero())return new goog.math.Integer.DivisionResult(goog.math.Integer.ZERO,goog.math.Integer.ZERO);if(this.isNegative())return a=this.negate().divideAndRemainder(a),new goog.math.Integer.DivisionResult(a.quotient.negate(),a.remainder.negate());if(a.isNegative())return a=this.divideAndRemainder(a.negate()),new goog.math.Integer.DivisionResult(a.quotient.negate(),a.remainder);if(30=e?1:Math.pow(2,e-48);for(var f=goog.math.Integer.fromNumber(d),g=f.multiply(a);g.isNegative()||g.greaterThan(c);)d-=e,f=goog.math.Integer.fromNumber(d),g=f.multiply(a);f.isZero()&&(f=goog.math.Integer.ONE);b=b.add(f);c=c.subtract(g)}return new goog.math.Integer.DivisionResult(b,c)};goog.math.Integer.prototype.modulo=function(a){return this.divideAndRemainder(a).remainder}; +goog.math.Integer.prototype.not=function(){for(var a=this.bits_.length,b=[],c=0;c>5;a%=32;for(var c=this.bits_.length+b+(0>>32-a:this.getBits(e-b);return new goog.math.Integer(d,this.sign_)};goog.math.Integer.prototype.shiftRight=function(a){var b=a>>5;a%=32;for(var c=this.bits_.length-b,d=[],e=0;e>>a|this.getBits(e+b+1)<<32-a:this.getBits(e+b);return new goog.math.Integer(d,this.sign_)};goog.string.StringBuffer=function(a,b){null!=a&&this.append.apply(this,arguments)};goog.string.StringBuffer.prototype.buffer_="";goog.string.StringBuffer.prototype.set=function(a){this.buffer_=""+a};goog.string.StringBuffer.prototype.append=function(a,b,c){this.buffer_+=String(a);if(null!=b)for(let a=1;a>21;return 0==a||-1==a&&!(0==this.low_&&-2097152==this.high_)}toString(a){a=a||10;if(2>a||36>2);var c=Math.pow(a,b),d=module$contents$goog$math$Long_Long.fromBits(c, +c/module$contents$goog$math$Long_TWO_PWR_32_DBL_);c=this.div(d);d=Math.abs(this.subtract(c.multiply(d)).toNumber());var e=10==a?""+d:d.toString(a);e.length>>0}getNumBitsAbs(){if(this.isNegative())return this.equals(module$contents$goog$math$Long_Long.getMinValue())?64:this.negate().getNumBitsAbs();for(var a= +0!=this.high_?this.high_:this.low_,b=31;0this.high_}isOdd(){return 1==(this.low_&1)}equals(a){return this.low_==a.low_&&this.high_==a.high_}notEquals(a){return!this.equals(a)}lessThan(a){return 0>this.compare(a)}lessThanOrEqual(a){return 0>=this.compare(a)}greaterThan(a){return 0a.getLowBitsUnsigned()?1:-1:this.high_>a.high_?1:-1}negate(){var a=~this.low_+1|0;return module$contents$goog$math$Long_Long.fromBits(a,~this.high_+!a|0)}add(a){var b=this.high_>>>16,c=this.high_&65535,d=this.low_>>>16,e=a.high_>>>16,f=a.high_&65535,g=a.low_>>>16;a=(this.low_&65535)+(a.low_&65535);g=(a>>>16)+(d+g);d=g>>>16;d+=c+f;b=(d>>>16)+(b+e)&65535;return module$contents$goog$math$Long_Long.fromBits((g&65535)<<16|a&65535,b<<16|d&65535)}subtract(a){return this.add(a.negate())}multiply(a){if(this.isZero())return this; +if(a.isZero())return a;var b=this.high_>>>16,c=this.high_&65535,d=this.low_>>>16,e=this.low_&65535,f=a.high_>>>16,g=a.high_&65535,h=a.low_>>>16;a=a.low_&65535;var k=e*a;var l=(k>>>16)+d*a;var m=l>>>16;l=(l&65535)+e*h;m+=l>>>16;m+=c*a;var n=m>>>16;m=(m&65535)+d*h;n+=m>>>16;m=(m&65535)+e*g;n=n+(m>>>16)+(b*a+c*h+d*g+e*f)&65535;return module$contents$goog$math$Long_Long.fromBits((l&65535)<<16|k&65535,n<<16|m&65535)}div(a){if(a.isZero())throw Error("division by zero");if(this.isNegative()){if(this.equals(module$contents$goog$math$Long_Long.getMinValue())){if(a.equals(module$contents$goog$math$Long_Long.getOne())|| +a.equals(module$contents$goog$math$Long_Long.getNegOne()))return module$contents$goog$math$Long_Long.getMinValue();if(a.equals(module$contents$goog$math$Long_Long.getMinValue()))return module$contents$goog$math$Long_Long.getOne();var b=this.shiftRight(1).div(a).shiftLeft(1);if(b.equals(module$contents$goog$math$Long_Long.getZero()))return a.isNegative()?module$contents$goog$math$Long_Long.getOne():module$contents$goog$math$Long_Long.getNegOne();var c=this.subtract(a.multiply(b));return b.add(c.div(a))}return a.isNegative()? +this.negate().div(a.negate()):this.negate().div(a).negate()}if(this.isZero())return module$contents$goog$math$Long_Long.getZero();if(a.isNegative())return a.equals(module$contents$goog$math$Long_Long.getMinValue())?module$contents$goog$math$Long_Long.getZero():this.div(a.negate()).negate();var d=module$contents$goog$math$Long_Long.getZero();for(c=this;c.greaterThanOrEqual(a);){b=Math.max(1,Math.floor(c.toNumber()/a.toNumber()));var e=Math.ceil(Math.log(b)/Math.LN2);e=48>=e?1:Math.pow(2,e-48);for(var f= +module$contents$goog$math$Long_Long.fromNumber(b),g=f.multiply(a);g.isNegative()||g.greaterThan(c);)b-=e,f=module$contents$goog$math$Long_Long.fromNumber(b),g=f.multiply(a);f.isZero()&&(f=module$contents$goog$math$Long_Long.getOne());d=d.add(f);c=c.subtract(g)}return d}modulo(a){return this.subtract(this.div(a).multiply(a))}not(){return module$contents$goog$math$Long_Long.fromBits(~this.low_,~this.high_)}and(a){return module$contents$goog$math$Long_Long.fromBits(this.low_&a.low_,this.high_&a.high_)}or(a){return module$contents$goog$math$Long_Long.fromBits(this.low_| +a.low_,this.high_|a.high_)}xor(a){return module$contents$goog$math$Long_Long.fromBits(this.low_^a.low_,this.high_^a.high_)}shiftLeft(a){a&=63;if(0==a)return this;var b=this.low_;return 32>a?module$contents$goog$math$Long_Long.fromBits(b<>>32-a):module$contents$goog$math$Long_Long.fromBits(0,b<a?module$contents$goog$math$Long_Long.fromBits(this.low_>>>a|b<<32-a,b>>a):module$contents$goog$math$Long_Long.fromBits(b>> +a-32,0<=b?0:-1)}shiftRightUnsigned(a){a&=63;if(0==a)return this;var b=this.high_;return 32>a?module$contents$goog$math$Long_Long.fromBits(this.low_>>>a|b<<32-a,b>>>a):32==a?module$contents$goog$math$Long_Long.fromBits(b,0):module$contents$goog$math$Long_Long.fromBits(b>>>a-32,0)}static fromInt(a){var b=a|0;goog.asserts.assert(a===b,"value should be a 32-bit integer");return-128<=b&&128>b?module$contents$goog$math$Long_getCachedIntValue_(b):new module$contents$goog$math$Long_Long(b,0>b?-1:0)}static fromNumber(a){return 0< +a?a>=module$contents$goog$math$Long_TWO_PWR_63_DBL_?module$contents$goog$math$Long_Long.getMaxValue():new module$contents$goog$math$Long_Long(a,a/module$contents$goog$math$Long_TWO_PWR_32_DBL_):0>a?a<=-module$contents$goog$math$Long_TWO_PWR_63_DBL_?module$contents$goog$math$Long_Long.getMinValue():(new module$contents$goog$math$Long_Long(-a,-a/module$contents$goog$math$Long_TWO_PWR_32_DBL_)).negate():module$contents$goog$math$Long_Long.getZero()}static fromBits(a,b){return new module$contents$goog$math$Long_Long(a, +b)}static fromString(a,b){if("-"==a.charAt(0))return module$contents$goog$math$Long_Long.fromString(a.substring(1),b).negate();var c=parseInt(a,b||10);if(c<=module$contents$goog$math$Long_MAX_SAFE_INTEGER_)return new module$contents$goog$math$Long_Long(c%module$contents$goog$math$Long_TWO_PWR_32_DBL_|0,c/module$contents$goog$math$Long_TWO_PWR_32_DBL_|0);if(0==a.length)throw Error("number format error: empty string");if(0<=a.indexOf("-"))throw Error('number format error: interior "-" character: '+ +a);b=b||10;if(2>b||36f?(f=module$contents$goog$math$Long_Long.fromNumber(Math.pow(b,f)),d=d.multiply(f).add(module$contents$goog$math$Long_Long.fromNumber(g))):(d=d.multiply(c),d=d.add(module$contents$goog$math$Long_Long.fromNumber(g)))}return d}static isStringInRange(a, +b){b=b||10;if(2>b||36a?-1:0)})} +const module$contents$goog$math$Long_MAX_VALUE_FOR_RADIX_=" 111111111111111111111111111111111111111111111111111111111111111 2021110011022210012102010021220101220221 13333333333333333333333333333333 1104332401304422434310311212 1540241003031030222122211 22341010611245052052300 777777777777777777777 67404283172107811827 9223372036854775807 1728002635214590697 41a792678515120367 10b269549075433c37 4340724c6c71dc7a7 160e2ad3246366807 7fffffffffffffff 33d3d8307b214008 16agh595df825fa7 ba643dci0ffeehh 5cbfjia3fh26ja7 2heiciiie82dh97 1adaibb21dckfa7 i6k448cf4192c2 acd772jnc9l0l7 64ie1focnn5g77 3igoecjbmca687 27c48l5b37oaop 1bk39f3ah3dmq7 q1se8f0m04isb hajppbc1fc207 bm03i95hia437 7vvvvvvvvvvvv 5hg4ck9jd4u37 3tdtk1v8j6tpp 2pijmikexrxp7 1y2p0ij32e8e7".split(" "),module$contents$goog$math$Long_MIN_VALUE_FOR_RADIX_= +" -1000000000000000000000000000000000000000000000000000000000000000 -2021110011022210012102010021220101220222 -20000000000000000000000000000000 -1104332401304422434310311213 -1540241003031030222122212 -22341010611245052052301 -1000000000000000000000 -67404283172107811828 -9223372036854775808 -1728002635214590698 -41a792678515120368 -10b269549075433c38 -4340724c6c71dc7a8 -160e2ad3246366808 -8000000000000000 -33d3d8307b214009 -16agh595df825fa8 -ba643dci0ffeehi -5cbfjia3fh26ja8 -2heiciiie82dh98 -1adaibb21dckfa8 -i6k448cf4192c3 -acd772jnc9l0l8 -64ie1focnn5g78 -3igoecjbmca688 -27c48l5b37oaoq -1bk39f3ah3dmq8 -q1se8f0m04isc -hajppbc1fc208 -bm03i95hia438 -8000000000000 -5hg4ck9jd4u38 -3tdtk1v8j6tpq -2pijmikexrxp8 -1y2p0ij32e8e8".split(" "), +module$contents$goog$math$Long_MAX_SAFE_INTEGER_=9007199254740991,module$contents$goog$math$Long_TWO_PWR_32_DBL_=4294967296,module$contents$goog$math$Long_TWO_PWR_63_DBL_=0x7fffffffffffffff,module$contents$goog$math$Long_ZERO_=module$contents$goog$math$Long_Long.fromBits(0,0),module$contents$goog$math$Long_ONE_=module$contents$goog$math$Long_Long.fromBits(1,0),module$contents$goog$math$Long_NEG_ONE_=module$contents$goog$math$Long_Long.fromBits(-1,-1),module$contents$goog$math$Long_MAX_VALUE_=module$contents$goog$math$Long_Long.fromBits(4294967295, +2147483647),module$contents$goog$math$Long_MIN_VALUE_=module$contents$goog$math$Long_Long.fromBits(0,2147483648),module$contents$goog$math$Long_TWO_PWR_24_=module$contents$goog$math$Long_Long.fromBits(16777216,0);var cljs={core:{}};cljs.core._STAR_clojurescript_version_STAR_="1.10.773";cljs.core._STAR_unchecked_if_STAR_=!1;cljs.core._STAR_unchecked_arrays_STAR_=!1;cljs.core._STAR_warn_on_infer_STAR_=!1;if("undefined"===typeof cljs||"undefined"===typeof cljs.core||"undefined"===typeof cljs.core.PROTOCOL_SENTINEL)cljs.core.PROTOCOL_SENTINEL={};cljs.core.MODULE_URIS=null;cljs.core.MODULE_INFOS=null;cljs.core._STAR_target_STAR_="default";cljs.core._STAR_global_STAR_="default";cljs.core._STAR_ns_STAR_=null; +cljs.core._STAR_out_STAR_=null;cljs.core._STAR_assert_STAR_=!0;if("undefined"===typeof cljs||"undefined"===typeof cljs.core||"undefined"===typeof cljs.core._STAR_print_fn_STAR_)cljs.core._STAR_print_fn_STAR_=null;cljs.core._STAR_exec_tap_fn_STAR_=function(a){return"undefined"!==typeof setTimeout?(a=setTimeout(a,0),cljs.core.truth_(a)?!0:a):!1};if("undefined"===typeof cljs||"undefined"===typeof cljs.core||"undefined"===typeof cljs.core._STAR_print_err_fn_STAR_)cljs.core._STAR_print_err_fn_STAR_=null; +cljs.core.set_print_fn_BANG_=function(a){return cljs.core._STAR_print_fn_STAR_=a};cljs.core.set_print_err_fn_BANG_=function(a){return cljs.core._STAR_print_err_fn_STAR_=a};cljs.core._STAR_flush_on_newline_STAR_=!0;cljs.core._STAR_print_newline_STAR_=!0;cljs.core._STAR_print_readably_STAR_=!0;cljs.core._STAR_print_meta_STAR_=!1;cljs.core._STAR_print_dup_STAR_=!1;cljs.core._STAR_print_namespace_maps_STAR_=!1;cljs.core._STAR_print_length_STAR_=null;cljs.core._STAR_print_level_STAR_=null; +cljs.core._STAR_print_fn_bodies_STAR_=!1;if("undefined"===typeof cljs||"undefined"===typeof cljs.core||"undefined"===typeof cljs.core._STAR_loaded_libs_STAR_)cljs.core._STAR_loaded_libs_STAR_=null; +cljs.core.pr_opts=function(){return new cljs.core.PersistentArrayMap(null,5,[new cljs.core.Keyword(null,"flush-on-newline","flush-on-newline",-151457939),cljs.core._STAR_flush_on_newline_STAR_,new cljs.core.Keyword(null,"readably","readably",1129599760),cljs.core._STAR_print_readably_STAR_,new cljs.core.Keyword(null,"meta","meta",1499536964),cljs.core._STAR_print_meta_STAR_,new cljs.core.Keyword(null,"dup","dup",556298533),cljs.core._STAR_print_dup_STAR_,new cljs.core.Keyword(null,"print-length", +"print-length",1931866356),cljs.core._STAR_print_length_STAR_],null)};cljs.core.enable_console_print_BANG_=function(){cljs.core._STAR_print_newline_STAR_=!1;cljs.core.set_print_fn_BANG_(function(){return console.log.apply(console,goog.array.clone(arguments))});cljs.core.set_print_err_fn_BANG_(function(){return console.error.apply(console,goog.array.clone(arguments))});return null};cljs.core.truth_=function(a){return null!=a&&!1!==a};cljs.core.not_native=null; +cljs.core.identical_QMARK_=function(a,b){return a===b};cljs.core.nil_QMARK_=function(a){return null==a};cljs.core.array_QMARK_=function(a){return"nodejs"===cljs.core._STAR_target_STAR_?Array.isArray(a):a instanceof Array};cljs.core.number_QMARK_=function(a){return"number"===typeof a};cljs.core.not=function(a){return null==a?!0:!1===a?!0:!1};cljs.core.some_QMARK_=function(a){return null!=a};cljs.core.object_QMARK_=function(a){return null!=a?a.constructor===Object:!1};cljs.core.string_QMARK_=function(a){return goog.isString(a)}; +cljs.core.char_QMARK_=function(a){return"string"===typeof a&&1===a.length};cljs.core.any_QMARK_=function(a){return!0};cljs.core.native_satisfies_QMARK_=function(a,b){return a[goog.typeOf(null==b?null:b)]?!0:a._?!0:!1};cljs.core.is_proto_=function(a){return a.constructor.prototype===a};cljs.core._STAR_main_cli_fn_STAR_=null;cljs.core._STAR_command_line_args_STAR_=null;cljs.core.type=function(a){return null==a?null:a.constructor}; +cljs.core.missing_protocol=function(a,b){var c=cljs.core.type(b);c=cljs.core.truth_(cljs.core.truth_(c)?c.cljs$lang$type:c)?c.cljs$lang$ctorStr:goog.typeOf(b);return Error(["No protocol method ",a," defined for type ",c,": ",b].join(""))};cljs.core.type__GT_str=function(a){var b=a.cljs$lang$ctorStr;return cljs.core.truth_(b)?b:cljs.core.str.cljs$core$IFn$_invoke$arity$1(a)};cljs.core.load_file=function(a){return cljs.core.truth_(COMPILED)?null:goog.nodeGlobalRequire(a)}; +"undefined"!==typeof Symbol&&"function"===goog.typeOf(Symbol)?cljs.core.ITER_SYMBOL=Symbol.iterator:cljs.core.ITER_SYMBOL="@@iterator";cljs.core.CHAR_MAP={"]":"_RBRACK_","'":"_SINGLEQUOTE_","\x3d":"_EQ_",'"':"_DOUBLEQUOTE_","!":"_BANG_","*":"_STAR_","%":"_PERCENT_","|":"_BAR_","~":"_TILDE_","/":"_SLASH_","\\":"_BSLASH_","-":"_","?":"_QMARK_","\x26":"_AMPERSAND_",":":"_COLON_","\x3c":"_LT_","{":"_LBRACE_","}":"_RBRACE_","[":"_LBRACK_","#":"_SHARP_","^":"_CARET_","+":"_PLUS_","@":"_CIRCA_","\x3e":"_GT_"}; +cljs.core.DEMUNGE_MAP={_RBRACE_:"}",_COLON_:":",_BANG_:"!",_QMARK_:"?",_BSLASH_:"\\\\",_SLASH_:"/",_PERCENT_:"%",_PLUS_:"+",_SHARP_:"#",_LBRACE_:"{",_BAR_:"|",_LBRACK_:"[",_EQ_:"\x3d",_:"-",_TILDE_:"~",_RBRACK_:"]",_GT_:"\x3e",_SINGLEQUOTE_:"'",_CIRCA_:"@",_AMPERSAND_:"\x26",_DOUBLEQUOTE_:'\\"',_CARET_:"^",_LT_:"\x3c",_STAR_:"*"};cljs.core.DEMUNGE_PATTERN=null; +cljs.core.system_time=function(){if("undefined"!==typeof performance&&null!=performance.now)return performance.now();if("undefined"!==typeof process&&null!=process.hrtime){var a=process.hrtime();return(1E9*a[0]+a[1])/1E6}return(new Date).getTime()}; +cljs.core.make_array=function(a){switch(arguments.length){case 1:return cljs.core.make_array.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core.make_array.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:for(var b=[],c=arguments.length,d=0;;)if(db)throw Error("Assert failed: (not (neg? idx))");if(!(bb)throw Error("Assert failed: (not (neg? idx))");if(!(bb)throw Error("Assert failed: (not (neg? idx))");if(!(bb)throw Error("Assert failed: (not (neg? idx))");if(!(b>>-b};"undefined"!==typeof Math&&"undefined"!==typeof Math.imul&&0!==Math.imul(4294967295,5)?cljs.core.imul=function(a,b){return Math.imul(a,b)}:cljs.core.imul=function(a,b){var c=a&65535,d=b&65535;return c*d+((a>>>16&65535)*d+c*(b>>>16&65535)<<16>>>0)|0};cljs.core.m3_seed=0;cljs.core.m3_C1=-862048943;cljs.core.m3_C2=461845907; +cljs.core.m3_mix_K1=function(a){return cljs.core.imul(cljs.core.int_rotate_left(cljs.core.imul(a|0,cljs.core.m3_C1),15),cljs.core.m3_C2)};cljs.core.m3_mix_H1=function(a,b){return cljs.core.imul(cljs.core.int_rotate_left((a|0)^(b|0),13),5)+-430675100|0};cljs.core.m3_fmix=function(a,b){a=(a|0)^b;a=cljs.core.imul(a^a>>>16,-2048144789);a=cljs.core.imul(a^a>>>13,-1028477387);return a^a>>>16}; +cljs.core.m3_hash_int=function(a){if(0===a)return a;a=cljs.core.m3_mix_K1(a);a=cljs.core.m3_mix_H1(cljs.core.m3_seed,a);return cljs.core.m3_fmix(a,4)};cljs.core.m3_hash_unencoded_chars=function(a){a:{var b=1;for(var c=cljs.core.m3_seed;;)if(b>2)};cljs.core.instance_QMARK_=function(a,b){return b instanceof a};cljs.core.symbol_QMARK_=function(a){return a instanceof cljs.core.Symbol};cljs.core.hash_symbol=function(a){return cljs.core.hash_combine(cljs.core.m3_hash_unencoded_chars(a.name),cljs.core.hash_string(a.ns))}; +cljs.core.compare_symbols=function(a,b){if(a.str===b.str)return 0;if(cljs.core.truth_(cljs.core.not(a.ns)?b.ns:!1))return-1;if(cljs.core.truth_(a.ns)){if(cljs.core.not(b.ns))return 1;var c=goog.array.defaultCompare(a.ns,b.ns);return 0===c?goog.array.defaultCompare(a.name,b.name):c}return goog.array.defaultCompare(a.name,b.name)}; +cljs.core.Symbol=function(a,b,c,d,e){this.ns=a;this.name=b;this.str=c;this._hash=d;this._meta=e;this.cljs$lang$protocol_mask$partition0$=2154168321;this.cljs$lang$protocol_mask$partition1$=4096};cljs.core.Symbol.prototype.toString=function(){return this.str};cljs.core.Symbol.prototype.equiv=function(a){return this.cljs$core$IEquiv$_equiv$arity$2(null,a)};cljs.core.Symbol.prototype.cljs$core$IEquiv$_equiv$arity$2=function(a,b){return b instanceof cljs.core.Symbol?this.str===b.str:!1}; +cljs.core.Symbol.prototype.call=function(){var a=null,b=function(a,b){return cljs.core.get.cljs$core$IFn$_invoke$arity$2(b,this)},c=function(a,b,c){return cljs.core.get.cljs$core$IFn$_invoke$arity$3(b,this,c)};a=function(a,e,f){switch(arguments.length){case 2:return b.call(this,a,e);case 3:return c.call(this,a,e,f)}throw Error("Invalid arity: "+(arguments.length-1));};a.cljs$core$IFn$_invoke$arity$2=b;a.cljs$core$IFn$_invoke$arity$3=c;return a}(); +cljs.core.Symbol.prototype.apply=function(a,b){return this.call.apply(this,[this].concat(cljs.core.aclone(b)))};cljs.core.Symbol.prototype.cljs$core$IFn$_invoke$arity$1=function(a){return cljs.core.get.cljs$core$IFn$_invoke$arity$2(a,this)};cljs.core.Symbol.prototype.cljs$core$IFn$_invoke$arity$2=function(a,b){return cljs.core.get.cljs$core$IFn$_invoke$arity$3(a,this,b)};cljs.core.Symbol.prototype.cljs$core$IMeta$_meta$arity$1=function(a){return this._meta}; +cljs.core.Symbol.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return new cljs.core.Symbol(this.ns,this.name,this.str,this._hash,b)};cljs.core.Symbol.prototype.cljs$core$IHash$_hash$arity$1=function(a){a=this._hash;return null!=a?a:this._hash=a=cljs.core.hash_symbol(this)};cljs.core.Symbol.prototype.cljs$core$INamed$_name$arity$1=function(a){return this.name};cljs.core.Symbol.prototype.cljs$core$INamed$_namespace$arity$1=function(a){return this.ns}; +cljs.core.Symbol.prototype.cljs$core$IPrintWithWriter$_pr_writer$arity$3=function(a,b,c){return cljs.core._write(b,this.str)}; +cljs.core.Symbol.getBasis=function(){return new cljs.core.PersistentVector(null,5,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"ns","ns",2082130287,null),new cljs.core.Symbol(null,"name","name",-810760592,null),new cljs.core.Symbol(null,"str","str",-1564826950,null),cljs.core.with_meta(new cljs.core.Symbol(null,"_hash","_hash",-2130838312,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null)),new cljs.core.Symbol(null, +"_meta","_meta",-1716892533,null)],null)};cljs.core.Symbol.cljs$lang$type=!0;cljs.core.Symbol.cljs$lang$ctorStr="cljs.core/Symbol";cljs.core.Symbol.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/Symbol")};cljs.core.__GT_Symbol=function(a,b,c,d,e){return new cljs.core.Symbol(a,b,c,d,e)};cljs.core.var_QMARK_=function(a){return a instanceof cljs.core.Var}; +cljs.core.symbol=function(a){switch(arguments.length){case 1:return cljs.core.symbol.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core.symbol.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.core.symbol.cljs$core$IFn$_invoke$arity$1=function(a){for(;;){if(a instanceof cljs.core.Symbol)return a;if("string"===typeof a){var b=a.indexOf("/");return 1>b?cljs.core.symbol.cljs$core$IFn$_invoke$arity$2(null,a):cljs.core.symbol.cljs$core$IFn$_invoke$arity$2(a.substring(0,b),a.substring(b+1,a.length))}if(cljs.core.var_QMARK_(a))return a.sym;if(a instanceof cljs.core.Keyword)a=a.fqn;else throw Error("no conversion to symbol");}}; +cljs.core.symbol.cljs$core$IFn$_invoke$arity$2=function(a,b){var c=null!=a?[cljs.core.str.cljs$core$IFn$_invoke$arity$1(a),"/",cljs.core.str.cljs$core$IFn$_invoke$arity$1(b)].join(""):b;return new cljs.core.Symbol(a,b,c,null,null)};cljs.core.symbol.cljs$lang$maxFixedArity=2;cljs.core.Var=function(a,b,c){this.val=a;this.sym=b;this._meta=c;this.cljs$lang$protocol_mask$partition0$=6717441;this.cljs$lang$protocol_mask$partition1$=0}; +cljs.core.Var.prototype.isMacro=function(){return(this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null)).cljs$lang$macro};cljs.core.Var.prototype.toString=function(){return["#'",cljs.core.str.cljs$core$IFn$_invoke$arity$1(this.sym)].join("")};cljs.core.Var.prototype.cljs$core$IDeref$_deref$arity$1=function(a){return this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null)}; +cljs.core.Var.prototype.cljs$core$IMeta$_meta$arity$1=function(a){return this._meta};cljs.core.Var.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return new cljs.core.Var(this.val,this.sym,b)};cljs.core.Var.prototype.cljs$core$IEquiv$_equiv$arity$2=function(a,b){return b instanceof cljs.core.Var?cljs.core._EQ_.cljs$core$IFn$_invoke$arity$2(this.sym,b.sym):!1};cljs.core.Var.prototype.cljs$core$IHash$_hash$arity$1=function(a){return cljs.core.hash_symbol(this.sym)}; +cljs.core.Var.prototype.cljs$core$Fn$=cljs.core.PROTOCOL_SENTINEL; +cljs.core.Var.prototype.call=function(){var a=null,b=function(a){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$0?a.cljs$core$IFn$_invoke$arity$0():a.call(null)},c=function(a,b){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$1?a.cljs$core$IFn$_invoke$arity$1(b):a.call(null,b)},d=function(a,b,c){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0? +a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$2?a.cljs$core$IFn$_invoke$arity$2(b,c):a.call(null,b,c)},e=function(a,b,c,d){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$3?a.cljs$core$IFn$_invoke$arity$3(b,c,d):a.call(null,b,c,d)},f=function(a,b,c,d,e){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$4? +a.cljs$core$IFn$_invoke$arity$4(b,c,d,e):a.call(null,b,c,d,e)},g=function(a,b,c,d,e,f){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$5?a.cljs$core$IFn$_invoke$arity$5(b,c,d,e,f):a.call(null,b,c,d,e,f)},h=function(a,b,c,d,e,f,g){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$6?a.cljs$core$IFn$_invoke$arity$6(b,c,d,e,f,g): +a.call(null,b,c,d,e,f,g)},k=function(a,b,c,d,e,f,g,h){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$7?a.cljs$core$IFn$_invoke$arity$7(b,c,d,e,f,g,h):a.call(null,b,c,d,e,f,g,h)},l=function(a,b,c,d,e,f,g,h,k){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$8?a.cljs$core$IFn$_invoke$arity$8(b,c,d,e,f,g,h,k):a.call(null,b,c, +d,e,f,g,h,k)},m=function(a,b,c,d,e,f,g,h,k,l){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$9?a.cljs$core$IFn$_invoke$arity$9(b,c,d,e,f,g,h,k,l):a.call(null,b,c,d,e,f,g,h,k,l)},n=function(a,b,c,d,e,f,g,h,k,l,m){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$10?a.cljs$core$IFn$_invoke$arity$10(b,c,d,e,f,g,h,k,l,m):a.call(null, +b,c,d,e,f,g,h,k,l,m)},p=function(a,b,c,d,e,f,g,h,k,l,m,n){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$11?a.cljs$core$IFn$_invoke$arity$11(b,c,d,e,f,g,h,k,l,m,n):a.call(null,b,c,d,e,f,g,h,k,l,m,n)},q=function(a,b,c,d,e,f,g,h,k,l,m,n,p){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$12?a.cljs$core$IFn$_invoke$arity$12(b, +c,d,e,f,g,h,k,l,m,n,p):a.call(null,b,c,d,e,f,g,h,k,l,m,n,p)},r=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$13?a.cljs$core$IFn$_invoke$arity$13(b,c,d,e,f,g,h,k,l,m,n,p,q):a.call(null,b,c,d,e,f,g,h,k,l,m,n,p,q)},t=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$14? +a.cljs$core$IFn$_invoke$arity$14(b,c,d,e,f,g,h,k,l,m,n,p,q,r):a.call(null,b,c,d,e,f,g,h,k,l,m,n,p,q,r)},u=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$15?a.cljs$core$IFn$_invoke$arity$15(b,c,d,e,f,g,h,k,l,m,n,p,q,r,t):a.call(null,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t)},v=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0(): +a.val.call(null);return a.cljs$core$IFn$_invoke$arity$16?a.cljs$core$IFn$_invoke$arity$16(b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u):a.call(null,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u)},w=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$17?a.cljs$core$IFn$_invoke$arity$17(b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v):a.call(null,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v)},x=function(a,b,c,d,e,f,g,h,k, +l,m,n,p,q,r,t,u,v,w){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$18?a.cljs$core$IFn$_invoke$arity$18(b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w):a.call(null,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w)},A=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$19?a.cljs$core$IFn$_invoke$arity$19(b, +c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x):a.call(null,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x)},K=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x,A){a=this;a=a.val.cljs$core$IFn$_invoke$arity$0?a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null);return a.cljs$core$IFn$_invoke$arity$20?a.cljs$core$IFn$_invoke$arity$20(b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x,A):a.call(null,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x,A)},S=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x,A,K){a=this;return cljs.core.apply.cljs$core$IFn$_invoke$arity$variadic(a.val.cljs$core$IFn$_invoke$arity$0? +a.val.cljs$core$IFn$_invoke$arity$0():a.val.call(null),b,c,d,e,cljs.core.prim_seq.cljs$core$IFn$_invoke$arity$2([f,g,h,k,l,m,n,p,q,r,t,u,v,w,x,A,K],0))};a=function(a,z,B,C,D,E,F,G,H,I,J,L,M,N,O,P,Q,R,T,U,V,W){switch(arguments.length){case 1:return b.call(this,a);case 2:return c.call(this,a,z);case 3:return d.call(this,a,z,B);case 4:return e.call(this,a,z,B,C);case 5:return f.call(this,a,z,B,C,D);case 6:return g.call(this,a,z,B,C,D,E);case 7:return h.call(this,a,z,B,C,D,E,F);case 8:return k.call(this, +a,z,B,C,D,E,F,G);case 9:return l.call(this,a,z,B,C,D,E,F,G,H);case 10:return m.call(this,a,z,B,C,D,E,F,G,H,I);case 11:return n.call(this,a,z,B,C,D,E,F,G,H,I,J);case 12:return p.call(this,a,z,B,C,D,E,F,G,H,I,J,L);case 13:return q.call(this,a,z,B,C,D,E,F,G,H,I,J,L,M);case 14:return r.call(this,a,z,B,C,D,E,F,G,H,I,J,L,M,N);case 15:return t.call(this,a,z,B,C,D,E,F,G,H,I,J,L,M,N,O);case 16:return u.call(this,a,z,B,C,D,E,F,G,H,I,J,L,M,N,O,P);case 17:return v.call(this,a,z,B,C,D,E,F,G,H,I,J,L,M,N,O,P,Q); +case 18:return w.call(this,a,z,B,C,D,E,F,G,H,I,J,L,M,N,O,P,Q,R);case 19:return x.call(this,a,z,B,C,D,E,F,G,H,I,J,L,M,N,O,P,Q,R,T);case 20:return A.call(this,a,z,B,C,D,E,F,G,H,I,J,L,M,N,O,P,Q,R,T,U);case 21:return K.call(this,a,z,B,C,D,E,F,G,H,I,J,L,M,N,O,P,Q,R,T,U,V);case 22:return S.call(this,a,z,B,C,D,E,F,G,H,I,J,L,M,N,O,P,Q,R,T,U,V,W)}throw Error("Invalid arity: "+(arguments.length-1));};a.cljs$core$IFn$_invoke$arity$1=b;a.cljs$core$IFn$_invoke$arity$2=c;a.cljs$core$IFn$_invoke$arity$3=d;a.cljs$core$IFn$_invoke$arity$4= +e;a.cljs$core$IFn$_invoke$arity$5=f;a.cljs$core$IFn$_invoke$arity$6=g;a.cljs$core$IFn$_invoke$arity$7=h;a.cljs$core$IFn$_invoke$arity$8=k;a.cljs$core$IFn$_invoke$arity$9=l;a.cljs$core$IFn$_invoke$arity$10=m;a.cljs$core$IFn$_invoke$arity$11=n;a.cljs$core$IFn$_invoke$arity$12=p;a.cljs$core$IFn$_invoke$arity$13=q;a.cljs$core$IFn$_invoke$arity$14=r;a.cljs$core$IFn$_invoke$arity$15=t;a.cljs$core$IFn$_invoke$arity$16=u;a.cljs$core$IFn$_invoke$arity$17=v;a.cljs$core$IFn$_invoke$arity$18=w;a.cljs$core$IFn$_invoke$arity$19= +x;a.cljs$core$IFn$_invoke$arity$20=A;a.cljs$core$IFn$_invoke$arity$21=K;a.cljs$core$IFn$_invoke$arity$22=S;return a}();cljs.core.Var.prototype.apply=function(a,b){return this.call.apply(this,[this].concat(cljs.core.aclone(b)))};cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$0=function(){var a=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return a.cljs$core$IFn$_invoke$arity$0?a.cljs$core$IFn$_invoke$arity$0():a.call(null)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$1=function(a){var b=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return b.cljs$core$IFn$_invoke$arity$1?b.cljs$core$IFn$_invoke$arity$1(a):b.call(null,a)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$2=function(a,b){var c=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return c.cljs$core$IFn$_invoke$arity$2?c.cljs$core$IFn$_invoke$arity$2(a,b):c.call(null,a,b)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$3=function(a,b,c){var d=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return d.cljs$core$IFn$_invoke$arity$3?d.cljs$core$IFn$_invoke$arity$3(a,b,c):d.call(null,a,b,c)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$4=function(a,b,c,d){var e=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return e.cljs$core$IFn$_invoke$arity$4?e.cljs$core$IFn$_invoke$arity$4(a,b,c,d):e.call(null,a,b,c,d)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$5=function(a,b,c,d,e){var f=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return f.cljs$core$IFn$_invoke$arity$5?f.cljs$core$IFn$_invoke$arity$5(a,b,c,d,e):f.call(null,a,b,c,d,e)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$6=function(a,b,c,d,e,f){var g=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return g.cljs$core$IFn$_invoke$arity$6?g.cljs$core$IFn$_invoke$arity$6(a,b,c,d,e,f):g.call(null,a,b,c,d,e,f)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$7=function(a,b,c,d,e,f,g){var h=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return h.cljs$core$IFn$_invoke$arity$7?h.cljs$core$IFn$_invoke$arity$7(a,b,c,d,e,f,g):h.call(null,a,b,c,d,e,f,g)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$8=function(a,b,c,d,e,f,g,h){var k=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return k.cljs$core$IFn$_invoke$arity$8?k.cljs$core$IFn$_invoke$arity$8(a,b,c,d,e,f,g,h):k.call(null,a,b,c,d,e,f,g,h)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$9=function(a,b,c,d,e,f,g,h,k){var l=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return l.cljs$core$IFn$_invoke$arity$9?l.cljs$core$IFn$_invoke$arity$9(a,b,c,d,e,f,g,h,k):l.call(null,a,b,c,d,e,f,g,h,k)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$10=function(a,b,c,d,e,f,g,h,k,l){var m=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return m.cljs$core$IFn$_invoke$arity$10?m.cljs$core$IFn$_invoke$arity$10(a,b,c,d,e,f,g,h,k,l):m.call(null,a,b,c,d,e,f,g,h,k,l)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$11=function(a,b,c,d,e,f,g,h,k,l,m){var n=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return n.cljs$core$IFn$_invoke$arity$11?n.cljs$core$IFn$_invoke$arity$11(a,b,c,d,e,f,g,h,k,l,m):n.call(null,a,b,c,d,e,f,g,h,k,l,m)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$12=function(a,b,c,d,e,f,g,h,k,l,m,n){var p=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return p.cljs$core$IFn$_invoke$arity$12?p.cljs$core$IFn$_invoke$arity$12(a,b,c,d,e,f,g,h,k,l,m,n):p.call(null,a,b,c,d,e,f,g,h,k,l,m,n)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$13=function(a,b,c,d,e,f,g,h,k,l,m,n,p){var q=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return q.cljs$core$IFn$_invoke$arity$13?q.cljs$core$IFn$_invoke$arity$13(a,b,c,d,e,f,g,h,k,l,m,n,p):q.call(null,a,b,c,d,e,f,g,h,k,l,m,n,p)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$14=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q){var r=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return r.cljs$core$IFn$_invoke$arity$14?r.cljs$core$IFn$_invoke$arity$14(a,b,c,d,e,f,g,h,k,l,m,n,p,q):r.call(null,a,b,c,d,e,f,g,h,k,l,m,n,p,q)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$15=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r){var t=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return t.cljs$core$IFn$_invoke$arity$15?t.cljs$core$IFn$_invoke$arity$15(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r):t.call(null,a,b,c,d,e,f,g,h,k,l,m,n,p,q,r)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$16=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t){var u=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return u.cljs$core$IFn$_invoke$arity$16?u.cljs$core$IFn$_invoke$arity$16(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t):u.call(null,a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$17=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u){var v=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return v.cljs$core$IFn$_invoke$arity$17?v.cljs$core$IFn$_invoke$arity$17(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u):v.call(null,a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$18=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v){var w=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return w.cljs$core$IFn$_invoke$arity$18?w.cljs$core$IFn$_invoke$arity$18(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v):w.call(null,a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$19=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w){var x=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return x.cljs$core$IFn$_invoke$arity$19?x.cljs$core$IFn$_invoke$arity$19(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w):x.call(null,a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$20=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x){var A=this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null);return A.cljs$core$IFn$_invoke$arity$20?A.cljs$core$IFn$_invoke$arity$20(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x):A.call(null,a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x)}; +cljs.core.Var.prototype.cljs$core$IFn$_invoke$arity$21=function(a,b,c,d,e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x,A){return cljs.core.apply.cljs$core$IFn$_invoke$arity$variadic(this.val.cljs$core$IFn$_invoke$arity$0?this.val.cljs$core$IFn$_invoke$arity$0():this.val.call(null),a,b,c,d,cljs.core.prim_seq.cljs$core$IFn$_invoke$arity$2([e,f,g,h,k,l,m,n,p,q,r,t,u,v,w,x,A],0))}; +cljs.core.Var.getBasis=function(){return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"val","val",1769233139,null),new cljs.core.Symbol(null,"sym","sym",195671222,null),new cljs.core.Symbol(null,"_meta","_meta",-1716892533,null)],null)};cljs.core.Var.cljs$lang$type=!0;cljs.core.Var.cljs$lang$ctorStr="cljs.core/Var";cljs.core.Var.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/Var")}; +cljs.core.__GT_Var=function(a,b,c){return new cljs.core.Var(a,b,c)};cljs.core.iterable_QMARK_=function(a){return null!=a?a.cljs$lang$protocol_mask$partition1$&131072||cljs.core.PROTOCOL_SENTINEL===a.cljs$core$IIterable$?!0:a.cljs$lang$protocol_mask$partition1$?!1:cljs.core.native_satisfies_QMARK_(cljs.core.IIterable,a):cljs.core.native_satisfies_QMARK_(cljs.core.IIterable,a)};cljs.core.js_iterable_QMARK_=function(a){return null!=a&&null!=a[cljs.core.ITER_SYMBOL]};cljs.core.clone=function(a){return cljs.core._clone(a)}; +cljs.core.cloneable_QMARK_=function(a){return null!=a?a.cljs$lang$protocol_mask$partition1$&8192||cljs.core.PROTOCOL_SENTINEL===a.cljs$core$ICloneable$?!0:a.cljs$lang$protocol_mask$partition1$?!1:cljs.core.native_satisfies_QMARK_(cljs.core.ICloneable,a):cljs.core.native_satisfies_QMARK_(cljs.core.ICloneable,a)}; +cljs.core.seq=function(a){if(null==a)return null;if(null!=a&&(a.cljs$lang$protocol_mask$partition0$&8388608||cljs.core.PROTOCOL_SENTINEL===a.cljs$core$ISeqable$))return a.cljs$core$ISeqable$_seq$arity$1(null);if(cljs.core.array_QMARK_(a)||"string"===typeof a)return 0===a.length?null:new cljs.core.IndexedSeq(a,0,null);if(cljs.core.js_iterable_QMARK_(a))return cljs.core.es6_iterator_seq(goog.object.get(a,cljs.core.ITER_SYMBOL).call(a));if(cljs.core.native_satisfies_QMARK_(cljs.core.ISeqable,a))return cljs.core._seq(a); +throw Error([cljs.core.str.cljs$core$IFn$_invoke$arity$1(a)," is not ISeqable"].join(""));};cljs.core.first=function(a){if(null==a)return null;if(null!=a&&(a.cljs$lang$protocol_mask$partition0$&64||cljs.core.PROTOCOL_SENTINEL===a.cljs$core$ISeq$))return a.cljs$core$ISeq$_first$arity$1(null);a=cljs.core.seq(a);return null==a?null:cljs.core._first(a)}; +cljs.core.rest=function(a){return null!=a?null!=a&&(a.cljs$lang$protocol_mask$partition0$&64||cljs.core.PROTOCOL_SENTINEL===a.cljs$core$ISeq$)?a.cljs$core$ISeq$_rest$arity$1(null):(a=cljs.core.seq(a))?a.cljs$core$ISeq$_rest$arity$1(null):cljs.core.List.EMPTY:cljs.core.List.EMPTY};cljs.core.next=function(a){return null==a?null:null!=a&&(a.cljs$lang$protocol_mask$partition0$&128||cljs.core.PROTOCOL_SENTINEL===a.cljs$core$INext$)?a.cljs$core$INext$_next$arity$1(null):cljs.core.seq(cljs.core.rest(a))}; +cljs.core._EQ_=function(a){switch(arguments.length){case 1:return cljs.core._EQ_.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core._EQ_.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:for(var b=[],c=arguments.length,d=0;;)if(d=d)return-1;!(0c&&(c+=d,c=0>c?0:c);for(;;)if(cc?d+c:c;for(;;)if(0<=c){if(cljs.core._EQ_.cljs$core$IFn$_invoke$arity$2(cljs.core.nth.cljs$core$IFn$_invoke$arity$2(a,c),b))return c;--c}else return-1};cljs.core._lastIndexOf.cljs$lang$maxFixedArity=3; +cljs.core.IndexedSeqIterator=function(a,b){this.arr=a;this.i=b};cljs.core.IndexedSeqIterator.prototype.hasNext=function(){return this.ia?0:a};cljs.core.IndexedSeq.prototype.cljs$core$IReversible$_rseq$arity$1=function(a){a=this.cljs$core$ICounted$_count$arity$1(null);return 0b)throw Error("Index out of bounds");return cljs.core.linear_traversal_nth.cljs$core$IFn$_invoke$arity$2(a,b)}if(cljs.core.native_satisfies_QMARK_(cljs.core.IIndexed,a))return cljs.core._nth.cljs$core$IFn$_invoke$arity$2(a,b);throw Error(["nth not supported on this type ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(cljs.core.type__GT_str(cljs.core.type(a)))].join("")); +}; +cljs.core.nth.cljs$core$IFn$_invoke$arity$3=function(a,b,c){if("number"!==typeof b)throw Error("Index argument to nth must be a number.");if(null==a)return c;if(null!=a&&(a.cljs$lang$protocol_mask$partition0$&16||cljs.core.PROTOCOL_SENTINEL===a.cljs$core$IIndexed$))return a.cljs$core$IIndexed$_nth$arity$3(null,b,c);if(cljs.core.array_QMARK_(a))return-1b?c:cljs.core.linear_traversal_nth.cljs$core$IFn$_invoke$arity$3(a,b,c);if(cljs.core.native_satisfies_QMARK_(cljs.core.IIndexed,a))return cljs.core._nth.cljs$core$IFn$_invoke$arity$3(a,b,c);throw Error(["nth not supported on this type ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(cljs.core.type__GT_str(cljs.core.type(a)))].join(""));};cljs.core.nth.cljs$lang$maxFixedArity=3; +cljs.core.nthrest=function(a,b){for(var c=a;;)if(a=0a:a instanceof goog.math.Integer?a.isNegative():a instanceof module$contents$goog$math$Long_Long?a.isNegative():!1};cljs.core.nat_int_QMARK_=function(a){return cljs.core.integer_QMARK_(a)?!(0>a):a instanceof goog.math.Integer?cljs.core.not(a.isNegative()):a instanceof module$contents$goog$math$Long_Long?cljs.core.not(a.isNegative()):!1};cljs.core.float_QMARK_=function(a){return"number"===typeof a}; +cljs.core.double_QMARK_=function(a){return"number"===typeof a};cljs.core.infinite_QMARK_=function(a){return a===Number.POSITIVE_INFINITY||a===Number.NEGATIVE_INFINITY};cljs.core.contains_QMARK_=function(a,b){return cljs.core.get.cljs$core$IFn$_invoke$arity$3(a,b,cljs.core.lookup_sentinel)===cljs.core.lookup_sentinel?!1:!0}; +cljs.core.find=function(a,b){return cljs.core.ifind_QMARK_(a)?cljs.core._find(a,b):null!=a&&cljs.core.associative_QMARK_(a)&&cljs.core.contains_QMARK_(a,b)?new cljs.core.MapEntry(b,cljs.core.get.cljs$core$IFn$_invoke$arity$2(a,b),null):null}; +cljs.core.distinct_QMARK_=function(a){switch(arguments.length){case 1:return cljs.core.distinct_QMARK_.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core.distinct_QMARK_.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:for(var b=[],c=arguments.length,d=0;;)if(dd?1:0===c?0:cljs.core.compare_indexed.cljs$core$IFn$_invoke$arity$4(a,b,c,0)};cljs.core.compare_indexed.cljs$core$IFn$_invoke$arity$4=function(a,b,c,d){for(;;){var e=cljs.core.compare(cljs.core.nth.cljs$core$IFn$_invoke$arity$2(a,d),cljs.core.nth.cljs$core$IFn$_invoke$arity$2(b,d));if(0===e&&d+1b};cljs.core._GT_.cljs$core$IFn$_invoke$arity$variadic=function(a,b,c){for(;;)if(a>b)if(cljs.core.next(c))a=b,b=cljs.core.first(c),c=cljs.core.next(c);else return b>cljs.core.first(c);else return!1};cljs.core._GT_.cljs$lang$applyTo=function(a){var b=cljs.core.first(a),c=cljs.core.next(a);a=cljs.core.first(c);c=cljs.core.next(c);return this.cljs$core$IFn$_invoke$arity$variadic(b,a,c)};cljs.core._GT_.cljs$lang$maxFixedArity=2; +cljs.core._GT__EQ_=function(a){switch(arguments.length){case 1:return cljs.core._GT__EQ_.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core._GT__EQ_.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:for(var b=[],c=arguments.length,d=0;;)if(d=b};cljs.core._GT__EQ_.cljs$core$IFn$_invoke$arity$variadic=function(a,b,c){for(;;)if(a>=b)if(cljs.core.next(c))a=b,b=cljs.core.first(c),c=cljs.core.next(c);else return b>=cljs.core.first(c);else return!1}; +cljs.core._GT__EQ_.cljs$lang$applyTo=function(a){var b=cljs.core.first(a),c=cljs.core.next(a);a=cljs.core.first(c);c=cljs.core.next(c);return this.cljs$core$IFn$_invoke$arity$variadic(b,a,c)};cljs.core._GT__EQ_.cljs$lang$maxFixedArity=2;cljs.core.dec=function(a){return a-1}; +cljs.core.max=function(a){switch(arguments.length){case 1:return cljs.core.max.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core.max.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:for(var b=[],c=arguments.length,d=0;;)if(db?a:b};cljs.core.max.cljs$core$IFn$_invoke$arity$variadic=function(a,b,c){return cljs.core.reduce.cljs$core$IFn$_invoke$arity$3(cljs.core.max,a>b?a:b,c)};cljs.core.max.cljs$lang$applyTo=function(a){var b=cljs.core.first(a),c=cljs.core.next(a);a=cljs.core.first(c);c=cljs.core.next(c);return this.cljs$core$IFn$_invoke$arity$variadic(b,a,c)};cljs.core.max.cljs$lang$maxFixedArity=2; +cljs.core.min=function(a){switch(arguments.length){case 1:return cljs.core.min.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core.min.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:for(var b=[],c=arguments.length,d=0;;)if(d>b};cljs.core.bit_shift_right_zero_fill=function(a,b){return a>>>b};cljs.core.unsigned_bit_shift_right=function(a,b){return a>>>b};cljs.core.bit_count=function(a){a-=a>>1&1431655765;a=(a&858993459)+(a>>2&858993459);return 16843009*(a+(a>>4)&252645135)>>24}; +cljs.core._EQ__EQ_=function(a){switch(arguments.length){case 1:return cljs.core._EQ__EQ_.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core._EQ__EQ_.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:for(var b=[],c=arguments.length,d=0;;)if(da};cljs.core.nthnext=function(a,b){for(a=cljs.core.seq(a);;)if(a&&0c:b}()))b[c]=a.next(),c+=1;else return cljs.core.chunk_cons(cljs.core.array_chunk.cljs$core$IFn$_invoke$arity$3(b,0,c),cljs.core.chunkIteratorSeq.cljs$core$IFn$_invoke$arity$1?cljs.core.chunkIteratorSeq.cljs$core$IFn$_invoke$arity$1(a):cljs.core.chunkIteratorSeq.call(null,a));else return null},null,null)}; +cljs.core.TransformerIterator=function(a,b,c,d,e,f){this.buffer=a;this._next=b;this.completed=c;this.xf=d;this.sourceIter=e;this.multi=f}; +cljs.core.TransformerIterator.prototype.step=function(){if(this._next!==cljs.core.NONE)return!0;for(;;)if(this._next===cljs.core.NONE)if(this.buffer.isEmpty()){if(this.completed)return!1;if(this.sourceIter.hasNext()){if(this.multi)var a=cljs.core.apply.cljs$core$IFn$_invoke$arity$2(this.xf,cljs.core.cons(null,this.sourceIter.next()));else a=this.sourceIter.next(),a=this.xf.cljs$core$IFn$_invoke$arity$2?this.xf.cljs$core$IFn$_invoke$arity$2(null,a):this.xf.call(null,null,a);cljs.core.reduced_QMARK_(a)&& +(this.xf.cljs$core$IFn$_invoke$arity$1?this.xf.cljs$core$IFn$_invoke$arity$1(null):this.xf.call(null,null),this.completed=!0)}else this.xf.cljs$core$IFn$_invoke$arity$1?this.xf.cljs$core$IFn$_invoke$arity$1(null):this.xf.call(null,null),this.completed=!0}else this._next=this.buffer.remove();else return!0};cljs.core.TransformerIterator.prototype.hasNext=function(){return this.step()}; +cljs.core.TransformerIterator.prototype.next=function(){if(this.hasNext()){var a=this._next;this._next=cljs.core.NONE;return a}throw Error("No such element");};cljs.core.TransformerIterator.prototype.remove=function(){return Error("Unsupported operation")}; +cljs.core.TransformerIterator.getBasis=function(){return new cljs.core.PersistentVector(null,6,5,cljs.core.PersistentVector.EMPTY_NODE,[cljs.core.with_meta(new cljs.core.Symbol(null,"buffer","buffer",-2037140571,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null)),cljs.core.with_meta(new cljs.core.Symbol(null,"_next","_next",101877036,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266), +!0],null)),cljs.core.with_meta(new cljs.core.Symbol(null,"completed","completed",1154475024,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null)),cljs.core.with_meta(new cljs.core.Symbol(null,"xf","xf",2042434515,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null)),new cljs.core.Symbol(null,"sourceIter","sourceIter",1068220306,null),new cljs.core.Symbol(null,"multi","multi", +1450238522,null)],null)};cljs.core.TransformerIterator.cljs$lang$type=!0;cljs.core.TransformerIterator.cljs$lang$ctorStr="cljs.core/TransformerIterator";cljs.core.TransformerIterator.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/TransformerIterator")};cljs.core.__GT_TransformerIterator=function(a,b,c,d,e,f){return new cljs.core.TransformerIterator(a,b,c,d,e,f)};goog.object.set(cljs.core.TransformerIterator.prototype,cljs.core.ITER_SYMBOL,function(){return cljs.core.es6_iterator(this)}); +cljs.core.transformer_iterator=function(a,b,c){var d=new cljs.core.TransformerIterator(cljs.core.EMPTY,cljs.core.NONE,!1,null,b,c);d.xf=function(){var b=function(){var a=null,b=function(a,b){d.buffer=d.buffer.add(b);return a};a=function(a,c){switch(arguments.length){case 0:return null;case 1:return a;case 2:return b.call(this,a,c)}throw Error("Invalid arity: "+arguments.length);};a.cljs$core$IFn$_invoke$arity$0=function(){return null};a.cljs$core$IFn$_invoke$arity$1=function(a){return a};a.cljs$core$IFn$_invoke$arity$2= +b;return a}();return a.cljs$core$IFn$_invoke$arity$1?a.cljs$core$IFn$_invoke$arity$1(b):a.call(null,b)}();return d};cljs.core.TransformerIterator.create=function(a,b){return cljs.core.transformer_iterator(a,b,!1)};cljs.core.TransformerIterator.createMulti=function(a,b){return cljs.core.transformer_iterator(a,new cljs.core.MultiIterator(cljs.core.to_array(b)),!0)}; +cljs.core.sequence=function(a){switch(arguments.length){case 1:return cljs.core.sequence.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core.sequence.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:for(var b=[],c=arguments.length,d=0;;)if(da?0:a-1>>>5<<5};cljs.core.new_path=function(a,b,c){for(;;){if(0===b)return c;var d=cljs.core.pv_fresh_node(a);cljs.core.pv_aset(d,0,c);c=d;b-=5}}; +cljs.core.push_tail=function(a,b,c,d){var e=cljs.core.pv_clone_node(c),f=a.cnt-1>>>b&31;5===b?cljs.core.pv_aset(e,f,d):(c=cljs.core.pv_aget(c,f),null!=c?(b-=5,a=cljs.core.push_tail.cljs$core$IFn$_invoke$arity$4?cljs.core.push_tail.cljs$core$IFn$_invoke$arity$4(a,b,c,d):cljs.core.push_tail.call(null,a,b,c,d)):a=cljs.core.new_path(null,b-5,d),cljs.core.pv_aset(e,f,a));return e}; +cljs.core.vector_index_out_of_bounds=function(a,b){throw Error(["No item ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(a)," in vector of length ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(b)].join(""));};cljs.core.first_array_for_longvec=function(a){var b=a.root;for(a=a.shift;;)if(0=cljs.core.tail_off(a))return a.tail;var c=a.root;for(a=a.shift;;)if(0>>a&31),a-=5;else return c.arr}; +cljs.core.array_for=function(a,b){return 0<=b&&b>>b&31;cljs.core.pv_aset(f,g,function(){var f=b-5,k=cljs.core.pv_aget(c,g);return cljs.core.do_assoc.cljs$core$IFn$_invoke$arity$5?cljs.core.do_assoc.cljs$core$IFn$_invoke$arity$5(a,f,k,d,e):cljs.core.do_assoc.call(null,a,f,k,d,e)}())}return f}; +cljs.core.pop_tail=function(a,b,c){var d=a.cnt-2>>>b&31;if(5=this.cnt?new cljs.core.IndexedSeq(this.tail,0,null):cljs.core.chunked_seq.cljs$core$IFn$_invoke$arity$4(this,cljs.core.first_array_for_longvec(this),0,0)};cljs.core.PersistentVector.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return b===this.meta?this:new cljs.core.PersistentVector(b,this.cnt,this.shift,this.root,this.tail,this.__hash)}; +cljs.core.PersistentVector.prototype.cljs$core$ICollection$_conj$arity$2=function(a,b){if(32>this.cnt-cljs.core.tail_off(this)){a=this.tail.length;for(var c=Array(a+1),d=0;;)if(d>>5>1<c)return new cljs.core.PersistentVector(null,c,5,cljs.core.PersistentVector.EMPTY_NODE,a,null);var d=a.slice(0,32);b=32;for(var e=(new cljs.core.PersistentVector(null,32,5,cljs.core.PersistentVector.EMPTY_NODE,d,null)).cljs$core$IEditableCollection$_as_transient$arity$1(null);;)if(bb)return null;a=this.start+b;return ab||this.end<=this.start+b?cljs.core.vector_index_out_of_bounds(b,this.end-this.start):cljs.core._nth.cljs$core$IFn$_invoke$arity$2(this.v,this.start+b)};cljs.core.Subvec.prototype.cljs$core$IIndexed$_nth$arity$3=function(a,b,c){return 0>b||this.end<=this.start+b?c:cljs.core._nth.cljs$core$IFn$_invoke$arity$3(this.v,this.start+b,c)}; +cljs.core.Subvec.prototype.cljs$core$IVector$_assoc_n$arity$3=function(a,b,c){var d=this,e=d.start+b;if(0>b||d.end+1<=e)throw Error(["Index ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(b)," out of bounds [0,",cljs.core.str.cljs$core$IFn$_invoke$arity$1(this.cljs$core$ICounted$_count$arity$1(null)),"]"].join(""));return cljs.core.build_subvec(d.meta,cljs.core.assoc.cljs$core$IFn$_invoke$arity$3(d.v,e,c),d.start,function(){var a=d.end,b=e+1;return a>b?a:b}(),null)}; +cljs.core.Subvec.prototype.cljs$core$IIterable$_iterator$arity$1=function(a){return null!=this.v&&cljs.core.PROTOCOL_SENTINEL===this.v.cljs$core$APersistentVector$?cljs.core.ranged_iterator(this.v,this.start,this.end):cljs.core.seq_iter(this)};cljs.core.Subvec.prototype.cljs$core$IMeta$_meta$arity$1=function(a){return this.meta};cljs.core.Subvec.prototype.cljs$core$ICloneable$_clone$arity$1=function(a){return new cljs.core.Subvec(this.meta,this.v,this.start,this.end,this.__hash)}; +cljs.core.Subvec.prototype.cljs$core$ICounted$_count$arity$1=function(a){return this.end-this.start};cljs.core.Subvec.prototype.cljs$core$IStack$_peek$arity$1=function(a){return this.start===this.end?null:cljs.core._nth.cljs$core$IFn$_invoke$arity$2(this.v,this.end-1)};cljs.core.Subvec.prototype.cljs$core$IStack$_pop$arity$1=function(a){if(this.start===this.end)throw Error("Can't pop empty vector");return cljs.core.build_subvec(this.meta,this.v,this.start,this.end-1,null)}; +cljs.core.Subvec.prototype.cljs$core$IReversible$_rseq$arity$1=function(a){return this.start!==this.end?new cljs.core.RSeq(this,this.end-this.start-1,null):null};cljs.core.Subvec.prototype.cljs$core$IHash$_hash$arity$1=function(a){a=this.__hash;return null!=a?a:this.__hash=a=cljs.core.hash_ordered_coll(this)};cljs.core.Subvec.prototype.cljs$core$IEquiv$_equiv$arity$2=function(a,b){return cljs.core.equiv_sequential(this,b)}; +cljs.core.Subvec.prototype.cljs$core$IEmptyableCollection$_empty$arity$1=function(a){return cljs.core._with_meta(cljs.core.PersistentVector.EMPTY,this.meta)};cljs.core.Subvec.prototype.cljs$core$IReduce$_reduce$arity$2=function(a,b){return null!=this.v&&cljs.core.PROTOCOL_SENTINEL===this.v.cljs$core$APersistentVector$?cljs.core.pv_reduce.cljs$core$IFn$_invoke$arity$4(this.v,b,this.start,this.end):cljs.core.ci_reduce.cljs$core$IFn$_invoke$arity$2(this,b)}; +cljs.core.Subvec.prototype.cljs$core$IReduce$_reduce$arity$3=function(a,b,c){return null!=this.v&&cljs.core.PROTOCOL_SENTINEL===this.v.cljs$core$APersistentVector$?cljs.core.pv_reduce.cljs$core$IFn$_invoke$arity$5(this.v,b,c,this.start,this.end):cljs.core.ci_reduce.cljs$core$IFn$_invoke$arity$3(this,b,c)}; +cljs.core.Subvec.prototype.cljs$core$IAssociative$_assoc$arity$3=function(a,b,c){if("number"===typeof b)return this.cljs$core$IVector$_assoc_n$arity$3(null,b,c);throw Error("Subvec's key for assoc must be a number.");};cljs.core.Subvec.prototype.cljs$core$ISeqable$_seq$arity$1=function(a){var b=this;return function e(a){return a===b.end?null:cljs.core.cons(cljs.core._nth.cljs$core$IFn$_invoke$arity$2(b.v,a),new cljs.core.LazySeq(null,function(){return e(a+1)},null,null))}(b.start)}; +cljs.core.Subvec.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return b===this.meta?this:cljs.core.build_subvec(b,this.v,this.start,this.end,this.__hash)};cljs.core.Subvec.prototype.cljs$core$ICollection$_conj$arity$2=function(a,b){return cljs.core.build_subvec(this.meta,cljs.core._assoc_n(this.v,this.end,b),this.start,this.end+1,null)}; +cljs.core.Subvec.prototype.call=function(){var a=null;a=function(a,c,d){switch(arguments.length){case 2:return this.cljs$core$IIndexed$_nth$arity$2(null,c);case 3:return this.cljs$core$IIndexed$_nth$arity$3(null,c,d)}throw Error("Invalid arity: "+(arguments.length-1));};a.cljs$core$IFn$_invoke$arity$2=function(a,c){return this.cljs$core$IIndexed$_nth$arity$2(null,c)};a.cljs$core$IFn$_invoke$arity$3=function(a,c,d){return this.cljs$core$IIndexed$_nth$arity$3(null,c,d)};return a}(); +cljs.core.Subvec.prototype.apply=function(a,b){return this.call.apply(this,[this].concat(cljs.core.aclone(b)))};cljs.core.Subvec.prototype.cljs$core$IFn$_invoke$arity$1=function(a){return this.cljs$core$IIndexed$_nth$arity$2(null,a)};cljs.core.Subvec.prototype.cljs$core$IFn$_invoke$arity$2=function(a,b){return this.cljs$core$IIndexed$_nth$arity$3(null,a,b)}; +cljs.core.Subvec.getBasis=function(){return new cljs.core.PersistentVector(null,5,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"meta","meta",-1154898805,null),new cljs.core.Symbol(null,"v","v",1661996586,null),new cljs.core.Symbol(null,"start","start",1285322546,null),new cljs.core.Symbol(null,"end","end",1372345569,null),cljs.core.with_meta(new cljs.core.Symbol(null,"__hash","__hash",-1328796629,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable", +"mutable",875778266),!0],null))],null)};cljs.core.Subvec.cljs$lang$type=!0;cljs.core.Subvec.cljs$lang$ctorStr="cljs.core/Subvec";cljs.core.Subvec.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/Subvec")};cljs.core.__GT_Subvec=function(a,b,c,d,e){return new cljs.core.Subvec(a,b,c,d,e)};goog.object.set(cljs.core.Subvec.prototype,cljs.core.ITER_SYMBOL,function(){return cljs.core.es6_iterator(this)}); +cljs.core.build_subvec=function(a,b,c,d,e){for(;;)if(b instanceof cljs.core.Subvec)c=b.start+c,d=b.start+d,b=b.v;else{if(!cljs.core.vector_QMARK_(b))throw Error("v must satisfy IVector");if(0>c||dcljs.core.count(b))throw Error("Index out of bounds");return new cljs.core.Subvec(a,b,c,d,e)}}; +cljs.core.subvec=function(a){switch(arguments.length){case 2:return cljs.core.subvec.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return cljs.core.subvec.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};cljs.core.subvec.cljs$core$IFn$_invoke$arity$2=function(a,b){return cljs.core.subvec.cljs$core$IFn$_invoke$arity$3(a,b,cljs.core.count(a))}; +cljs.core.subvec.cljs$core$IFn$_invoke$arity$3=function(a,b,c){if(null==b||null==c)throw Error("Assert failed: (and (not (nil? start)) (not (nil? end)))");return cljs.core.build_subvec(null,a,b|0,c|0,null)};cljs.core.subvec.cljs$lang$maxFixedArity=3;cljs.core.tv_ensure_editable=function(a,b){return a===b.edit?b:new cljs.core.VectorNode(a,cljs.core.aclone(b.arr))};cljs.core.tv_editable_root=function(a){return new cljs.core.VectorNode({},cljs.core.aclone(a.arr))}; +cljs.core.tv_editable_tail=function(a){var b=[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null];cljs.core.array_copy(a,0,b,0,a.length);return b}; +cljs.core.tv_push_tail=function(a,b,c,d){var e=cljs.core.tv_ensure_editable(a.root.edit,c),f=a.cnt-1>>>b&31;cljs.core.pv_aset(e,f,5===b?d:function(){var c=cljs.core.pv_aget(e,f);if(null!=c){var h=b-5;return cljs.core.tv_push_tail.cljs$core$IFn$_invoke$arity$4?cljs.core.tv_push_tail.cljs$core$IFn$_invoke$arity$4(a,h,c,d):cljs.core.tv_push_tail.call(null,a,h,c,d)}return cljs.core.new_path(a.root.edit,b-5,d)}());return e}; +cljs.core.tv_pop_tail=function(a,b,c){c=cljs.core.tv_ensure_editable(a.root.edit,c);var d=a.cnt-2>>>b&31;if(5=cljs.core.tail_off(a))return a.tail;var c=a.root,d=c;for(a=a.shift;;)if(0>>a&31)),a-=5;else return d.arr};cljs.core.TransientVector=function(a,b,c,d){this.cnt=a;this.shift=b;this.root=c;this.tail=d;this.cljs$lang$protocol_mask$partition1$=88;this.cljs$lang$protocol_mask$partition0$=275}; +cljs.core.TransientVector.prototype.cljs$core$ITransientCollection$_conj_BANG_$arity$2=function(a,b){if(this.root.edit){if(32>this.cnt-cljs.core.tail_off(this))this.tail[this.cnt&31]=b;else{a=new cljs.core.VectorNode(this.root.edit,this.tail);var c=[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null];c[0]=b;this.tail=c;this.cnt>>>5>1<>>a&31;cljs.core.pv_aset(g,f,h(a-5,cljs.core.pv_aget(g,f)))}return g}(d.shift,d.root)}(),d.root=a),this;if(b===d.cnt)return this.cljs$core$ITransientCollection$_conj_BANG_$arity$2(null, +c);throw Error(["Index ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(b)," out of bounds for TransientVector of length",cljs.core.str.cljs$core$IFn$_invoke$arity$1(d.cnt)].join(""));}throw Error("assoc! after persistent!");}; +cljs.core.TransientVector.prototype.cljs$core$ITransientVector$_pop_BANG_$arity$1=function(a){if(this.root.edit){if(0===this.cnt)throw Error("Can't pop empty vector");if(1===this.cnt)this.cnt=0;else if(0<(this.cnt-1&31))--this.cnt;else{a=cljs.core.unchecked_editable_array_for(this,this.cnt-2);var b=cljs.core.tv_pop_tail(this,this.shift,this.root);b=null!=b?b:new cljs.core.VectorNode(this.root.edit,[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null, +null,null,null,null,null,null,null,null,null,null,null,null]);5b?1:0}; +cljs.core.obj_map__GT_hash_map=function(a,b,c){var d=a.keys,e=d.length,f=a.strobj;a=cljs.core.meta(a);for(var g=0,h=cljs.core.transient$(cljs.core.PersistentHashMap.EMPTY);;)if(gcljs.core.ObjMap.HASHMAP_THRESHOLD||this.keys.length>=cljs.core.ObjMap.HASHMAP_THRESHOLD)return cljs.core.obj_map__GT_hash_map(this,b,c);if(null!=cljs.core.scan_array(1,b,this.keys))return a=cljs.core.obj_clone(this.strobj,this.keys),goog.object.set(a,b,c),new cljs.core.ObjMap(this.meta,this.keys,a,this.update_count+1,null);a=cljs.core.obj_clone(this.strobj,this.keys);var d=cljs.core.aclone(this.keys); +goog.object.set(a,b,c);d.push(b);return new cljs.core.ObjMap(this.meta,d,a,this.update_count+1,null)}return cljs.core.obj_map__GT_hash_map(this,b,c)};cljs.core.ObjMap.prototype.cljs$core$IAssociative$_contains_key_QMARK_$arity$2=function(a,b){return goog.isString(b)&&null!=cljs.core.scan_array(1,b,this.keys)?!0:!1}; +cljs.core.ObjMap.prototype.cljs$core$ISeqable$_seq$arity$1=function(a){var b=this;return 0=a)return new cljs.core.PersistentArrayMap(this.meta,this.cnt-1,c,null);cljs.core._EQ_.cljs$core$IFn$_invoke$arity$2(b,this.arr[d])?d+=2:(c[e]=this.arr[d],c[e+1]=this.arr[d+1],e+=2,d+=2)}}else return this}; +cljs.core.PersistentArrayMap.prototype.cljs$core$IAssociative$_assoc$arity$3=function(a,b,c){a=cljs.core.array_map_index_of(this,b);if(-1===a)return this.cnt>>b&31}; +cljs.core.clone_and_set=function(a){switch(arguments.length){case 3:return cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);case 5:return cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$5(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$3=function(a,b,c){a=cljs.core.aclone(a);a[b]=c;return a};cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$5=function(a,b,c,d,e){a=cljs.core.aclone(a);a[b]=c;a[d]=e;return a};cljs.core.clone_and_set.cljs$lang$maxFixedArity=5;cljs.core.remove_pair=function(a,b){var c=Array(a.length-2);cljs.core.array_copy(a,0,c,0,2*b);cljs.core.array_copy(a,2*(b+1),c,2*b,c.length-2*b);return c}; +cljs.core.bitmap_indexed_node_index=function(a,b){return cljs.core.bit_count(a&b-1)};cljs.core.bitpos=function(a,b){return 1<<(a>>>b&31)}; +cljs.core.edit_and_set=function(a){switch(arguments.length){case 4:return cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$4(arguments[0],arguments[1],arguments[2],arguments[3]);case 6:return cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$6(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4],arguments[5]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$4=function(a,b,c,d){a=a.ensure_editable(b);a.arr[c]=d;return a};cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$6=function(a,b,c,d,e,f){a=a.ensure_editable(b);a.arr[c]=d;a.arr[e]=f;return a};cljs.core.edit_and_set.cljs$lang$maxFixedArity=6; +cljs.core.inode_kv_reduce=function(a,b,c){for(var d=a.length,e=0,f=c;;)if(eb?4:2*(b+1));cljs.core.array_copy(this.arr,0,c,0,2*b);return new cljs.core.BitmapIndexedNode(a,this.bitmap,c)}; +cljs.core.BitmapIndexedNode.prototype.inode_without_BANG_=function(a,b,c,d,e){var f=1<<(c>>>b&31);if(0===(this.bitmap&f))return this;var g=cljs.core.bitmap_indexed_node_index(this.bitmap,f),h=this.arr[2*g],k=this.arr[2*g+1];return null==h?(b=k.inode_without_BANG_(a,b+5,c,d,e),b===k?this:null!=b?cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$4(this,a,2*g+1,b):this.bitmap===f?null:this.edit_and_remove_pair(a,f,g)):cljs.core.key_test(d,h)?(e.val=!0,this.edit_and_remove_pair(a,f,g)):this}; +cljs.core.BitmapIndexedNode.prototype.edit_and_remove_pair=function(a,b,c){if(this.bitmap===b)return null;a=this.ensure_editable(a);var d=a.arr,e=d.length;a.bitmap^=b;cljs.core.array_copy(d,2*(c+1),d,2*c,e-2*(c+1));d[e-2]=null;d[e-1]=null;return a};cljs.core.BitmapIndexedNode.prototype.inode_seq=function(){return cljs.core.create_inode_seq.cljs$core$IFn$_invoke$arity$1(this.arr)};cljs.core.BitmapIndexedNode.prototype.kv_reduce=function(a,b){return cljs.core.inode_kv_reduce(this.arr,a,b)}; +cljs.core.BitmapIndexedNode.prototype.inode_lookup=function(a,b,c,d){var e=1<<(b>>>a&31);if(0===(this.bitmap&e))return d;var f=cljs.core.bitmap_indexed_node_index(this.bitmap,e);e=this.arr[2*f];f=this.arr[2*f+1];return null==e?f.inode_lookup(a+5,b,c,d):cljs.core.key_test(c,e)?f:d}; +cljs.core.BitmapIndexedNode.prototype.inode_assoc_BANG_=function(a,b,c,d,e,f){var g=1<<(c>>>b&31),h=cljs.core.bitmap_indexed_node_index(this.bitmap,g);if(0===(this.bitmap&g)){var k=cljs.core.bit_count(this.bitmap);if(2*k>>b&31]=cljs.core.BitmapIndexedNode.EMPTY.inode_assoc_BANG_(a,b+5,c,d,e,f);for(e=d=0;;)if(32>d)0===(this.bitmap>>>d&1)?d+=1:(h[d]=null!=this.arr[e]?cljs.core.BitmapIndexedNode.EMPTY.inode_assoc_BANG_(a,b+5,cljs.core.hash(this.arr[e]),this.arr[e],this.arr[e+1],f):this.arr[e+1],e+=2,d+=1);else break;return new cljs.core.ArrayNode(a,k+1,h)}b=Array(2*(k+4));cljs.core.array_copy(this.arr,0,b,0,2*h);b[2*h]=d;b[2*h+1]=e;cljs.core.array_copy(this.arr,2*h, +b,2*(h+1),2*(k-h));f.val=!0;a=this.ensure_editable(a);a.arr=b;a.bitmap|=g;return a}k=this.arr[2*h];g=this.arr[2*h+1];if(null==k)return k=g.inode_assoc_BANG_(a,b+5,c,d,e,f),k===g?this:cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$4(this,a,2*h+1,k);if(cljs.core.key_test(d,k))return e===g?this:cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$4(this,a,2*h+1,e);f.val=!0;return cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$6(this,a,2*h,null,2*h+1,cljs.core.create_node.cljs$core$IFn$_invoke$arity$7(a, +b+5,k,g,c,d,e))}; +cljs.core.BitmapIndexedNode.prototype.inode_assoc=function(a,b,c,d,e){var f=1<<(b>>>a&31),g=cljs.core.bitmap_indexed_node_index(this.bitmap,f);if(0===(this.bitmap&f)){var h=cljs.core.bit_count(this.bitmap);if(16<=h){g=[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null];g[b>>>a&31]=cljs.core.BitmapIndexedNode.EMPTY.inode_assoc(a+5,b,c,d,e);for(d=c=0;;)if(32>c)0===(this.bitmap>>>c&1)?c+=1:(g[c]= +null!=this.arr[d]?cljs.core.BitmapIndexedNode.EMPTY.inode_assoc(a+5,cljs.core.hash(this.arr[d]),this.arr[d],this.arr[d+1],e):this.arr[d+1],d+=2,c+=1);else break;return new cljs.core.ArrayNode(null,h+1,g)}a=Array(2*(h+1));cljs.core.array_copy(this.arr,0,a,0,2*g);a[2*g]=c;a[2*g+1]=d;cljs.core.array_copy(this.arr,2*g,a,2*(g+1),2*(h-g));e.val=!0;return new cljs.core.BitmapIndexedNode(null,this.bitmap|f,a)}h=this.arr[2*g];f=this.arr[2*g+1];if(null==h)return h=f.inode_assoc(a+5,b,c,d,e),h===f?this:new cljs.core.BitmapIndexedNode(null, +this.bitmap,cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$3(this.arr,2*g+1,h));if(cljs.core.key_test(c,h))return d===f?this:new cljs.core.BitmapIndexedNode(null,this.bitmap,cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$3(this.arr,2*g+1,d));e.val=!0;return new cljs.core.BitmapIndexedNode(null,this.bitmap,cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$5(this.arr,2*g,null,2*g+1,cljs.core.create_node.cljs$core$IFn$_invoke$arity$6(a+5,h,f,b,c,d)))}; +cljs.core.BitmapIndexedNode.prototype.inode_find=function(a,b,c,d){var e=1<<(b>>>a&31);if(0===(this.bitmap&e))return d;var f=cljs.core.bitmap_indexed_node_index(this.bitmap,e);e=this.arr[2*f];f=this.arr[2*f+1];return null==e?f.inode_find(a+5,b,c,d):cljs.core.key_test(c,e)?new cljs.core.MapEntry(e,f,null):d}; +cljs.core.BitmapIndexedNode.prototype.inode_without=function(a,b,c){var d=1<<(b>>>a&31);if(0===(this.bitmap&d))return this;var e=cljs.core.bitmap_indexed_node_index(this.bitmap,d),f=this.arr[2*e],g=this.arr[2*e+1];return null==f?(a=g.inode_without(a+5,b,c),a===g?this:null!=a?new cljs.core.BitmapIndexedNode(null,this.bitmap,cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$3(this.arr,2*e+1,a)):this.bitmap===d?null:new cljs.core.BitmapIndexedNode(null,this.bitmap^d,cljs.core.remove_pair(this.arr, +e))):cljs.core.key_test(c,f)?new cljs.core.BitmapIndexedNode(null,this.bitmap^d,cljs.core.remove_pair(this.arr,e)):this};cljs.core.BitmapIndexedNode.prototype.cljs$core$IIterable$_iterator$arity$1=function(a){return new cljs.core.NodeIterator(this.arr,0,null,null)}; +cljs.core.BitmapIndexedNode.getBasis=function(){return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"edit","edit",-1302639,null),cljs.core.with_meta(new cljs.core.Symbol(null,"bitmap","bitmap",501334601,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null)),cljs.core.with_meta(new cljs.core.Symbol(null,"arr","arr",2115492975,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null, +"mutable","mutable",875778266),!0],null))],null)};cljs.core.BitmapIndexedNode.cljs$lang$type=!0;cljs.core.BitmapIndexedNode.cljs$lang$ctorStr="cljs.core/BitmapIndexedNode";cljs.core.BitmapIndexedNode.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/BitmapIndexedNode")};cljs.core.__GT_BitmapIndexedNode=function(a,b,c){return new cljs.core.BitmapIndexedNode(a,b,c)};cljs.core.BitmapIndexedNode.EMPTY=new cljs.core.BitmapIndexedNode(null,0,[]); +cljs.core.pack_array_node=function(a,b,c){var d=a.arr,e=d.length;a=Array(2*(a.cnt-1));for(var f=0,g=1,h=0;;)if(f>>b&31,g=this.arr[f];if(null==g)return this;b=g.inode_without_BANG_(a,b+5,c,d,e);if(b===g)return this;if(null==b){if(8>=this.cnt)return cljs.core.pack_array_node(this,a,f);a=cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$4(this,a,f,b);--a.cnt;return a}return cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$4(this,a,f,b)};cljs.core.ArrayNode.prototype.inode_seq=function(){return cljs.core.create_array_node_seq.cljs$core$IFn$_invoke$arity$1(this.arr)}; +cljs.core.ArrayNode.prototype.kv_reduce=function(a,b){for(var c=this.arr.length,d=0;;)if(d>>a&31];return null!=e?e.inode_lookup(a+5,b,c,d):d}; +cljs.core.ArrayNode.prototype.inode_assoc_BANG_=function(a,b,c,d,e,f){var g=c>>>b&31,h=this.arr[g];if(null==h)return a=cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$4(this,a,g,cljs.core.BitmapIndexedNode.EMPTY.inode_assoc_BANG_(a,b+5,c,d,e,f)),a.cnt+=1,a;b=h.inode_assoc_BANG_(a,b+5,c,d,e,f);return b===h?this:cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$4(this,a,g,b)}; +cljs.core.ArrayNode.prototype.inode_assoc=function(a,b,c,d,e){var f=b>>>a&31,g=this.arr[f];if(null==g)return new cljs.core.ArrayNode(null,this.cnt+1,cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$3(this.arr,f,cljs.core.BitmapIndexedNode.EMPTY.inode_assoc(a+5,b,c,d,e)));a=g.inode_assoc(a+5,b,c,d,e);return a===g?this:new cljs.core.ArrayNode(null,this.cnt,cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$3(this.arr,f,a))}; +cljs.core.ArrayNode.prototype.inode_find=function(a,b,c,d){var e=this.arr[b>>>a&31];return null!=e?e.inode_find(a+5,b,c,d):d}; +cljs.core.ArrayNode.prototype.inode_without=function(a,b,c){var d=b>>>a&31,e=this.arr[d];return null!=e?(a=e.inode_without(a+5,b,c),a===e?this:null==a?8>=this.cnt?cljs.core.pack_array_node(this,null,d):new cljs.core.ArrayNode(null,this.cnt-1,cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$3(this.arr,d,a)):new cljs.core.ArrayNode(null,this.cnt,cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$3(this.arr,d,a))):this}; +cljs.core.ArrayNode.prototype.cljs$core$IIterable$_iterator$arity$1=function(a){return new cljs.core.ArrayNodeIterator(this.arr,0,null)}; +cljs.core.ArrayNode.getBasis=function(){return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"edit","edit",-1302639,null),cljs.core.with_meta(new cljs.core.Symbol(null,"cnt","cnt",1924510325,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null)),cljs.core.with_meta(new cljs.core.Symbol(null,"arr","arr",2115492975,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null, +"mutable","mutable",875778266),!0],null))],null)};cljs.core.ArrayNode.cljs$lang$type=!0;cljs.core.ArrayNode.cljs$lang$ctorStr="cljs.core/ArrayNode";cljs.core.ArrayNode.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/ArrayNode")};cljs.core.__GT_ArrayNode=function(a,b,c){return new cljs.core.ArrayNode(a,b,c)};cljs.core.hash_collision_node_find_index=function(a,b,c){b*=2;for(var d=0;;)if(da?d:cljs.core.key_test(c,this.arr[a])?this.arr[a+1]:d}; +cljs.core.HashCollisionNode.prototype.inode_assoc_BANG_=function(a,b,c,d,e,f){if(c===this.collision_hash){b=cljs.core.hash_collision_node_find_index(this.arr,this.cnt,d);if(-1===b){if(this.arr.length>2*this.cnt)return a=cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$6(this,a,2*this.cnt,d,2*this.cnt+1,e),f.val=!0,a.cnt+=1,a;b=this.arr.length;c=Array(b+2);cljs.core.array_copy(this.arr,0,c,0,b);c[b]=d;c[b+1]=e;f.val=!0;return this.ensure_editable_array(a,this.cnt+1,c)}return this.arr[b+1]===e?this: +cljs.core.edit_and_set.cljs$core$IFn$_invoke$arity$4(this,a,b+1,e)}return(new cljs.core.BitmapIndexedNode(a,1<<(this.collision_hash>>>b&31),[null,this,null,null])).inode_assoc_BANG_(a,b,c,d,e,f)}; +cljs.core.HashCollisionNode.prototype.inode_assoc=function(a,b,c,d,e){return b===this.collision_hash?(a=cljs.core.hash_collision_node_find_index(this.arr,this.cnt,c),-1===a?(a=2*this.cnt,b=Array(a+2),cljs.core.array_copy(this.arr,0,b,0,a),b[a]=c,b[a+1]=d,e.val=!0,new cljs.core.HashCollisionNode(null,this.collision_hash,this.cnt+1,b)):cljs.core._EQ_.cljs$core$IFn$_invoke$arity$2(this.arr[a+1],d)?this:new cljs.core.HashCollisionNode(null,this.collision_hash,this.cnt,cljs.core.clone_and_set.cljs$core$IFn$_invoke$arity$3(this.arr, +a+1,d))):(new cljs.core.BitmapIndexedNode(null,1<<(this.collision_hash>>>a&31),[null,this])).inode_assoc(a,b,c,d,e)};cljs.core.HashCollisionNode.prototype.ensure_editable_array=function(a,b,c){return a===this.edit?(this.arr=c,this.cnt=b,this):new cljs.core.HashCollisionNode(this.edit,this.collision_hash,b,c)}; +cljs.core.HashCollisionNode.prototype.inode_find=function(a,b,c,d){a=cljs.core.hash_collision_node_find_index(this.arr,this.cnt,c);return 0>a?d:cljs.core.key_test(c,this.arr[a])?new cljs.core.MapEntry(this.arr[a],this.arr[a+1],null):d}; +cljs.core.HashCollisionNode.prototype.inode_without=function(a,b,c){a=cljs.core.hash_collision_node_find_index(this.arr,this.cnt,c);return-1===a?this:1===this.cnt?null:new cljs.core.HashCollisionNode(null,this.collision_hash,this.cnt-1,cljs.core.remove_pair(this.arr,cljs.core.quot(a,2)))};cljs.core.HashCollisionNode.prototype.cljs$core$IIterable$_iterator$arity$1=function(a){return new cljs.core.NodeIterator(this.arr,0,null,null)}; +cljs.core.HashCollisionNode.getBasis=function(){return new cljs.core.PersistentVector(null,4,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"edit","edit",-1302639,null),cljs.core.with_meta(new cljs.core.Symbol(null,"collision-hash","collision-hash",-35831342,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null)),cljs.core.with_meta(new cljs.core.Symbol(null,"cnt","cnt",1924510325,null),new cljs.core.PersistentArrayMap(null, +1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null)),cljs.core.with_meta(new cljs.core.Symbol(null,"arr","arr",2115492975,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null))],null)};cljs.core.HashCollisionNode.cljs$lang$type=!0;cljs.core.HashCollisionNode.cljs$lang$ctorStr="cljs.core/HashCollisionNode";cljs.core.HashCollisionNode.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/HashCollisionNode")}; +cljs.core.__GT_HashCollisionNode=function(a,b,c,d){return new cljs.core.HashCollisionNode(a,b,c,d)}; +cljs.core.create_node=function(a){switch(arguments.length){case 6:return cljs.core.create_node.cljs$core$IFn$_invoke$arity$6(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4],arguments[5]);case 7:return cljs.core.create_node.cljs$core$IFn$_invoke$arity$7(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4],arguments[5],arguments[6]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.core.create_node.cljs$core$IFn$_invoke$arity$6=function(a,b,c,d,e,f){var g=cljs.core.hash(b);if(g===d)return new cljs.core.HashCollisionNode(null,g,2,[b,c,e,f]);var h=new cljs.core.Box(!1);return cljs.core.BitmapIndexedNode.EMPTY.inode_assoc(a,g,b,c,h).inode_assoc(a,d,e,f,h)}; +cljs.core.create_node.cljs$core$IFn$_invoke$arity$7=function(a,b,c,d,e,f,g){var h=cljs.core.hash(c);if(h===e)return new cljs.core.HashCollisionNode(null,h,2,[c,d,f,g]);var k=new cljs.core.Box(!1);return cljs.core.BitmapIndexedNode.EMPTY.inode_assoc_BANG_(a,b,h,c,d,k).inode_assoc_BANG_(a,b,e,f,g,k)};cljs.core.create_node.cljs$lang$maxFixedArity=7; +cljs.core.NodeSeq=function(a,b,c,d,e){this.meta=a;this.nodes=b;this.i=c;this.s=d;this.__hash=e;this.cljs$lang$protocol_mask$partition0$=32374988;this.cljs$lang$protocol_mask$partition1$=0};cljs.core.NodeSeq.prototype.toString=function(){return cljs.core.pr_str_STAR_(this)};cljs.core.NodeSeq.prototype.equiv=function(a){return this.cljs$core$IEquiv$_equiv$arity$2(null,a)}; +cljs.core.NodeSeq.prototype.indexOf=function(){var a=null,b=function(a){return cljs.core._indexOf.cljs$core$IFn$_invoke$arity$3(this,a,0)},c=function(a,b){return cljs.core._indexOf.cljs$core$IFn$_invoke$arity$3(this,a,b)};a=function(a,e){switch(arguments.length){case 1:return b.call(this,a);case 2:return c.call(this,a,e)}throw Error("Invalid arity: "+arguments.length);};a.cljs$core$IFn$_invoke$arity$1=b;a.cljs$core$IFn$_invoke$arity$2=c;return a}(); +cljs.core.NodeSeq.prototype.lastIndexOf=function(){var a=null,b=function(a){return cljs.core._lastIndexOf.cljs$core$IFn$_invoke$arity$3(this,a,cljs.core.count(this))},c=function(a,b){return cljs.core._lastIndexOf.cljs$core$IFn$_invoke$arity$3(this,a,b)};a=function(a,e){switch(arguments.length){case 1:return b.call(this,a);case 2:return c.call(this,a,e)}throw Error("Invalid arity: "+arguments.length);};a.cljs$core$IFn$_invoke$arity$1=b;a.cljs$core$IFn$_invoke$arity$2=c;return a}(); +cljs.core.NodeSeq.prototype.cljs$core$IMeta$_meta$arity$1=function(a){return this.meta};cljs.core.NodeSeq.prototype.cljs$core$INext$_next$arity$1=function(a){return null==this.s?cljs.core.create_inode_seq.cljs$core$IFn$_invoke$arity$3(this.nodes,this.i+2,null):cljs.core.create_inode_seq.cljs$core$IFn$_invoke$arity$3(this.nodes,this.i,cljs.core.next(this.s))};cljs.core.NodeSeq.prototype.cljs$core$IHash$_hash$arity$1=function(a){a=this.__hash;return null!=a?a:this.__hash=a=cljs.core.hash_ordered_coll(this)}; +cljs.core.NodeSeq.prototype.cljs$core$IEquiv$_equiv$arity$2=function(a,b){return cljs.core.equiv_sequential(this,b)};cljs.core.NodeSeq.prototype.cljs$core$IEmptyableCollection$_empty$arity$1=function(a){return cljs.core.List.EMPTY};cljs.core.NodeSeq.prototype.cljs$core$IReduce$_reduce$arity$2=function(a,b){return cljs.core.seq_reduce.cljs$core$IFn$_invoke$arity$2(b,this)}; +cljs.core.NodeSeq.prototype.cljs$core$IReduce$_reduce$arity$3=function(a,b,c){return cljs.core.seq_reduce.cljs$core$IFn$_invoke$arity$3(b,c,this)};cljs.core.NodeSeq.prototype.cljs$core$ISeq$_first$arity$1=function(a){return null==this.s?new cljs.core.MapEntry(this.nodes[this.i],this.nodes[this.i+1],null):cljs.core.first(this.s)}; +cljs.core.NodeSeq.prototype.cljs$core$ISeq$_rest$arity$1=function(a){a=null==this.s?cljs.core.create_inode_seq.cljs$core$IFn$_invoke$arity$3(this.nodes,this.i+2,null):cljs.core.create_inode_seq.cljs$core$IFn$_invoke$arity$3(this.nodes,this.i,cljs.core.next(this.s));return null!=a?a:cljs.core.List.EMPTY};cljs.core.NodeSeq.prototype.cljs$core$ISeqable$_seq$arity$1=function(a){return this}; +cljs.core.NodeSeq.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return b===this.meta?this:new cljs.core.NodeSeq(b,this.nodes,this.i,this.s,this.__hash)};cljs.core.NodeSeq.prototype.cljs$core$ICollection$_conj$arity$2=function(a,b){return cljs.core.cons(b,this)}; +cljs.core.NodeSeq.getBasis=function(){return new cljs.core.PersistentVector(null,5,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"meta","meta",-1154898805,null),new cljs.core.Symbol(null,"nodes","nodes",-459054278,null),new cljs.core.Symbol(null,"i","i",253690212,null),new cljs.core.Symbol(null,"s","s",-948495851,null),cljs.core.with_meta(new cljs.core.Symbol(null,"__hash","__hash",-1328796629,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable", +"mutable",875778266),!0],null))],null)};cljs.core.NodeSeq.cljs$lang$type=!0;cljs.core.NodeSeq.cljs$lang$ctorStr="cljs.core/NodeSeq";cljs.core.NodeSeq.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/NodeSeq")};cljs.core.__GT_NodeSeq=function(a,b,c,d,e){return new cljs.core.NodeSeq(a,b,c,d,e)};goog.object.set(cljs.core.NodeSeq.prototype,cljs.core.ITER_SYMBOL,function(){return cljs.core.es6_iterator(this)}); +cljs.core.create_inode_seq=function(a){switch(arguments.length){case 1:return cljs.core.create_inode_seq.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 3:return cljs.core.create_inode_seq.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.core.create_inode_seq.cljs$core$IFn$_invoke$arity$1=function(a){return cljs.core.create_inode_seq.cljs$core$IFn$_invoke$arity$3(a,0,null)}; +cljs.core.create_inode_seq.cljs$core$IFn$_invoke$arity$3=function(a,b,c){if(null==c)for(c=a.length;;)if(bthis.cnt?cljs.core.count(cljs.core.next(this))+1:this.cnt};cljs.core.PersistentTreeMapSeq.prototype.cljs$core$IHash$_hash$arity$1=function(a){a=this.__hash;return null!=a?a:this.__hash=a=cljs.core.hash_ordered_coll(this)};cljs.core.PersistentTreeMapSeq.prototype.cljs$core$IEquiv$_equiv$arity$2=function(a,b){return cljs.core.equiv_sequential(this,b)}; +cljs.core.PersistentTreeMapSeq.prototype.cljs$core$IEmptyableCollection$_empty$arity$1=function(a){return cljs.core.List.EMPTY};cljs.core.PersistentTreeMapSeq.prototype.cljs$core$IReduce$_reduce$arity$2=function(a,b){return cljs.core.seq_reduce.cljs$core$IFn$_invoke$arity$2(b,this)};cljs.core.PersistentTreeMapSeq.prototype.cljs$core$IReduce$_reduce$arity$3=function(a,b,c){return cljs.core.seq_reduce.cljs$core$IFn$_invoke$arity$3(b,c,this)}; +cljs.core.PersistentTreeMapSeq.prototype.cljs$core$ISeq$_first$arity$1=function(a){return cljs.core.peek(this.stack)};cljs.core.PersistentTreeMapSeq.prototype.cljs$core$ISeq$_rest$arity$1=function(a){a=cljs.core.first(this.stack);a=cljs.core.tree_map_seq_push(this.ascending_QMARK_?a.right:a.left,cljs.core.next(this.stack),this.ascending_QMARK_);return null!=a?new cljs.core.PersistentTreeMapSeq(null,a,this.ascending_QMARK_,this.cnt-1,null):cljs.core.List.EMPTY}; +cljs.core.PersistentTreeMapSeq.prototype.cljs$core$ISeqable$_seq$arity$1=function(a){return this};cljs.core.PersistentTreeMapSeq.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return b===this.meta?this:new cljs.core.PersistentTreeMapSeq(b,this.stack,this.ascending_QMARK_,this.cnt,this.__hash)};cljs.core.PersistentTreeMapSeq.prototype.cljs$core$ICollection$_conj$arity$2=function(a,b){return cljs.core.cons(b,this)}; +cljs.core.PersistentTreeMapSeq.getBasis=function(){return new cljs.core.PersistentVector(null,5,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"meta","meta",-1154898805,null),new cljs.core.Symbol(null,"stack","stack",847125597,null),cljs.core.with_meta(new cljs.core.Symbol(null,"ascending?","ascending?",-1938452653,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"tag","tag",-1290361223),new cljs.core.Symbol(null,"boolean","boolean",-278886877,null)],null)), +new cljs.core.Symbol(null,"cnt","cnt",1924510325,null),cljs.core.with_meta(new cljs.core.Symbol(null,"__hash","__hash",-1328796629,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null))],null)};cljs.core.PersistentTreeMapSeq.cljs$lang$type=!0;cljs.core.PersistentTreeMapSeq.cljs$lang$ctorStr="cljs.core/PersistentTreeMapSeq";cljs.core.PersistentTreeMapSeq.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/PersistentTreeMapSeq")}; +cljs.core.__GT_PersistentTreeMapSeq=function(a,b,c,d,e){return new cljs.core.PersistentTreeMapSeq(a,b,c,d,e)};goog.object.set(cljs.core.PersistentTreeMapSeq.prototype,cljs.core.ITER_SYMBOL,function(){return cljs.core.es6_iterator(this)});cljs.core.create_tree_map_seq=function(a,b,c){return new cljs.core.PersistentTreeMapSeq(null,cljs.core.tree_map_seq_push(a,null,b),b,c,null)}; +cljs.core.balance_left=function(a,b,c,d){return c instanceof cljs.core.RedNode?c.left instanceof cljs.core.RedNode?new cljs.core.RedNode(c.key,c.val,c.left.blacken(),new cljs.core.BlackNode(a,b,c.right,d,null),null):c.right instanceof cljs.core.RedNode?new cljs.core.RedNode(c.right.key,c.right.val,new cljs.core.BlackNode(c.key,c.val,c.left,c.right.left,null),new cljs.core.BlackNode(a,b,c.right.right,d,null),null):new cljs.core.BlackNode(a,b,c,d,null):new cljs.core.BlackNode(a,b,c,d,null)}; +cljs.core.balance_right=function(a,b,c,d){return d instanceof cljs.core.RedNode?d.right instanceof cljs.core.RedNode?new cljs.core.RedNode(d.key,d.val,new cljs.core.BlackNode(a,b,c,d.left,null),d.right.blacken(),null):d.left instanceof cljs.core.RedNode?new cljs.core.RedNode(d.left.key,d.left.val,new cljs.core.BlackNode(a,b,c,d.left.left,null),new cljs.core.BlackNode(d.key,d.val,d.left.right,d.right,null),null):new cljs.core.BlackNode(a,b,c,d,null):new cljs.core.BlackNode(a,b,c,d,null)}; +cljs.core.balance_left_del=function(a,b,c,d){if(c instanceof cljs.core.RedNode)return new cljs.core.RedNode(a,b,c.blacken(),d,null);if(d instanceof cljs.core.BlackNode)return cljs.core.balance_right(a,b,c,d.redden());if(d instanceof cljs.core.RedNode&&d.left instanceof cljs.core.BlackNode)return new cljs.core.RedNode(d.left.key,d.left.val,new cljs.core.BlackNode(a,b,c,d.left.left,null),cljs.core.balance_right(d.key,d.val,d.left.right,d.right.redden()),null);throw Error("red-black tree invariant violation"); +}; +cljs.core.balance_right_del=function(a,b,c,d){if(d instanceof cljs.core.RedNode)return new cljs.core.RedNode(a,b,c,d.blacken(),null);if(c instanceof cljs.core.BlackNode)return cljs.core.balance_left(a,b,c.redden(),d);if(c instanceof cljs.core.RedNode&&c.right instanceof cljs.core.BlackNode)return new cljs.core.RedNode(c.right.key,c.right.val,cljs.core.balance_left(c.key,c.val,c.left.redden(),c.right.left),new cljs.core.BlackNode(a,b,c.right.right,d,null),null);throw Error("red-black tree invariant violation");}; +cljs.core.tree_map_kv_reduce=function(a,b,c){var d=null!=a.left?function(){var d=a.left;return cljs.core.tree_map_kv_reduce.cljs$core$IFn$_invoke$arity$3?cljs.core.tree_map_kv_reduce.cljs$core$IFn$_invoke$arity$3(d,b,c):cljs.core.tree_map_kv_reduce.call(null,d,b,c)}():c;if(cljs.core.reduced_QMARK_(d))return d;var e=function(){var c=a.key,e=a.val;return b.cljs$core$IFn$_invoke$arity$3?b.cljs$core$IFn$_invoke$arity$3(d,c,e):b.call(null,d,c,e)}();if(cljs.core.reduced_QMARK_(e))return e;if(null!=a.right){var f= +a.right;return cljs.core.tree_map_kv_reduce.cljs$core$IFn$_invoke$arity$3?cljs.core.tree_map_kv_reduce.cljs$core$IFn$_invoke$arity$3(f,b,e):cljs.core.tree_map_kv_reduce.call(null,f,b,e)}return e};cljs.core.BlackNode=function(a,b,c,d,e){this.key=a;this.val=b;this.left=c;this.right=d;this.__hash=e;this.cljs$lang$protocol_mask$partition0$=166619935;this.cljs$lang$protocol_mask$partition1$=0};cljs.core.BlackNode.prototype.cljs$core$IFind$=cljs.core.PROTOCOL_SENTINEL; +cljs.core.BlackNode.prototype.cljs$core$IFind$_find$arity$2=function(a,b){switch(b){case 0:return new cljs.core.MapEntry(0,this.key,null);case 1:return new cljs.core.MapEntry(1,this.val,null);default:return null}}; +cljs.core.BlackNode.prototype.lastIndexOf=function(){var a=null,b=function(a){return cljs.core._lastIndexOf.cljs$core$IFn$_invoke$arity$3(this,a,cljs.core.count(this))},c=function(a,b){return cljs.core._lastIndexOf.cljs$core$IFn$_invoke$arity$3(this,a,b)};a=function(a,e){switch(arguments.length){case 1:return b.call(this,a);case 2:return c.call(this,a,e)}throw Error("Invalid arity: "+arguments.length);};a.cljs$core$IFn$_invoke$arity$1=b;a.cljs$core$IFn$_invoke$arity$2=c;return a}(); +cljs.core.BlackNode.prototype.indexOf=function(){var a=null,b=function(a){return cljs.core._indexOf.cljs$core$IFn$_invoke$arity$3(this,a,0)},c=function(a,b){return cljs.core._indexOf.cljs$core$IFn$_invoke$arity$3(this,a,b)};a=function(a,e){switch(arguments.length){case 1:return b.call(this,a);case 2:return c.call(this,a,e)}throw Error("Invalid arity: "+arguments.length);};a.cljs$core$IFn$_invoke$arity$1=b;a.cljs$core$IFn$_invoke$arity$2=c;return a}();cljs.core.BlackNode.prototype.add_right=function(a){return a.balance_right(this)}; +cljs.core.BlackNode.prototype.redden=function(){return new cljs.core.RedNode(this.key,this.val,this.left,this.right,null)};cljs.core.BlackNode.prototype.blacken=function(){return this};cljs.core.BlackNode.prototype.add_left=function(a){return a.balance_left(this)};cljs.core.BlackNode.prototype.replace=function(a,b,c,d){return new cljs.core.BlackNode(a,b,c,d,null)};cljs.core.BlackNode.prototype.balance_left=function(a){return new cljs.core.BlackNode(a.key,a.val,this,a.right,null)}; +cljs.core.BlackNode.prototype.balance_right=function(a){return new cljs.core.BlackNode(a.key,a.val,a.left,this,null)};cljs.core.BlackNode.prototype.remove_left=function(a){return cljs.core.balance_left_del(this.key,this.val,a,this.right)};cljs.core.BlackNode.prototype.kv_reduce=function(a,b){return cljs.core.tree_map_kv_reduce(this,a,b)};cljs.core.BlackNode.prototype.remove_right=function(a){return cljs.core.balance_right_del(this.key,this.val,this.left,a)}; +cljs.core.BlackNode.prototype.cljs$core$ILookup$_lookup$arity$2=function(a,b){return this.cljs$core$IIndexed$_nth$arity$3(null,b,null)};cljs.core.BlackNode.prototype.cljs$core$ILookup$_lookup$arity$3=function(a,b,c){return this.cljs$core$IIndexed$_nth$arity$3(null,b,c)};cljs.core.BlackNode.prototype.cljs$core$IIndexed$_nth$arity$2=function(a,b){if(0===b)return this.key;if(1===b)return this.val;throw Error("Index out of bounds");}; +cljs.core.BlackNode.prototype.cljs$core$IIndexed$_nth$arity$3=function(a,b,c){return 0===b?this.key:1===b?this.val:c};cljs.core.BlackNode.prototype.cljs$core$IVector$_assoc_n$arity$3=function(a,b,c){return(new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[this.key,this.val],null)).cljs$core$IVector$_assoc_n$arity$3(null,b,c)};cljs.core.BlackNode.prototype.cljs$core$IMeta$_meta$arity$1=function(a){return null}; +cljs.core.BlackNode.prototype.cljs$core$ICounted$_count$arity$1=function(a){return 2};cljs.core.BlackNode.prototype.cljs$core$IMapEntry$_key$arity$1=function(a){return this.key};cljs.core.BlackNode.prototype.cljs$core$IMapEntry$_val$arity$1=function(a){return this.val};cljs.core.BlackNode.prototype.cljs$core$IStack$_peek$arity$1=function(a){return this.val}; +cljs.core.BlackNode.prototype.cljs$core$IStack$_pop$arity$1=function(a){return new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[this.key],null)};cljs.core.BlackNode.prototype.cljs$core$IReversible$_rseq$arity$1=function(a){return new cljs.core.IndexedSeq([this.val,this.key],0,null)};cljs.core.BlackNode.prototype.cljs$core$IHash$_hash$arity$1=function(a){a=this.__hash;return null!=a?a:this.__hash=a=cljs.core.hash_ordered_coll(this)}; +cljs.core.BlackNode.prototype.cljs$core$IEquiv$_equiv$arity$2=function(a,b){return cljs.core.equiv_sequential(this,b)};cljs.core.BlackNode.prototype.cljs$core$IEmptyableCollection$_empty$arity$1=function(a){return null};cljs.core.BlackNode.prototype.cljs$core$IReduce$_reduce$arity$2=function(a,b){return cljs.core.ci_reduce.cljs$core$IFn$_invoke$arity$2(this,b)}; +cljs.core.BlackNode.prototype.cljs$core$IReduce$_reduce$arity$3=function(a,b,c){return cljs.core.ci_reduce.cljs$core$IFn$_invoke$arity$3(this,b,c)};cljs.core.BlackNode.prototype.cljs$core$IAssociative$_assoc$arity$3=function(a,b,c){return cljs.core.assoc.cljs$core$IFn$_invoke$arity$3(new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[this.key,this.val],null),b,c)}; +cljs.core.BlackNode.prototype.cljs$core$IAssociative$_contains_key_QMARK_$arity$2=function(a,b){return 0===b||1===b};cljs.core.BlackNode.prototype.cljs$core$ISeqable$_seq$arity$1=function(a){return new cljs.core.IndexedSeq([this.key,this.val],0,null)};cljs.core.BlackNode.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return cljs.core._with_meta(new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[this.key,this.val],null),b)}; +cljs.core.BlackNode.prototype.cljs$core$ICollection$_conj$arity$2=function(a,b){return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[this.key,this.val,b],null)}; +cljs.core.BlackNode.prototype.call=function(){var a=null;a=function(a,c,d){switch(arguments.length){case 2:return this.cljs$core$IIndexed$_nth$arity$2(null,c);case 3:return this.cljs$core$IIndexed$_nth$arity$3(null,c,d)}throw Error("Invalid arity: "+(arguments.length-1));};a.cljs$core$IFn$_invoke$arity$2=function(a,c){return this.cljs$core$IIndexed$_nth$arity$2(null,c)};a.cljs$core$IFn$_invoke$arity$3=function(a,c,d){return this.cljs$core$IIndexed$_nth$arity$3(null,c,d)};return a}(); +cljs.core.BlackNode.prototype.apply=function(a,b){return this.call.apply(this,[this].concat(cljs.core.aclone(b)))};cljs.core.BlackNode.prototype.cljs$core$IFn$_invoke$arity$1=function(a){return this.cljs$core$IIndexed$_nth$arity$2(null,a)};cljs.core.BlackNode.prototype.cljs$core$IFn$_invoke$arity$2=function(a,b){return this.cljs$core$IIndexed$_nth$arity$3(null,a,b)}; +cljs.core.BlackNode.getBasis=function(){return new cljs.core.PersistentVector(null,5,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"key","key",124488940,null),new cljs.core.Symbol(null,"val","val",1769233139,null),new cljs.core.Symbol(null,"left","left",1241415590,null),new cljs.core.Symbol(null,"right","right",1187949694,null),cljs.core.with_meta(new cljs.core.Symbol(null,"__hash","__hash",-1328796629,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null, +"mutable","mutable",875778266),!0],null))],null)};cljs.core.BlackNode.cljs$lang$type=!0;cljs.core.BlackNode.cljs$lang$ctorStr="cljs.core/BlackNode";cljs.core.BlackNode.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/BlackNode")};cljs.core.__GT_BlackNode=function(a,b,c,d,e){return new cljs.core.BlackNode(a,b,c,d,e)};goog.object.set(cljs.core.BlackNode.prototype,cljs.core.ITER_SYMBOL,function(){return cljs.core.es6_iterator(this)}); +cljs.core.RedNode=function(a,b,c,d,e){this.key=a;this.val=b;this.left=c;this.right=d;this.__hash=e;this.cljs$lang$protocol_mask$partition0$=166619935;this.cljs$lang$protocol_mask$partition1$=0};cljs.core.RedNode.prototype.cljs$core$IFind$=cljs.core.PROTOCOL_SENTINEL;cljs.core.RedNode.prototype.cljs$core$IFind$_find$arity$2=function(a,b){switch(b){case 0:return new cljs.core.MapEntry(0,this.key,null);case 1:return new cljs.core.MapEntry(1,this.val,null);default:return null}}; +cljs.core.RedNode.prototype.lastIndexOf=function(){var a=null,b=function(a){return cljs.core._lastIndexOf.cljs$core$IFn$_invoke$arity$3(this,a,cljs.core.count(this))},c=function(a,b){return cljs.core._lastIndexOf.cljs$core$IFn$_invoke$arity$3(this,a,b)};a=function(a,e){switch(arguments.length){case 1:return b.call(this,a);case 2:return c.call(this,a,e)}throw Error("Invalid arity: "+arguments.length);};a.cljs$core$IFn$_invoke$arity$1=b;a.cljs$core$IFn$_invoke$arity$2=c;return a}(); +cljs.core.RedNode.prototype.indexOf=function(){var a=null,b=function(a){return cljs.core._indexOf.cljs$core$IFn$_invoke$arity$3(this,a,0)},c=function(a,b){return cljs.core._indexOf.cljs$core$IFn$_invoke$arity$3(this,a,b)};a=function(a,e){switch(arguments.length){case 1:return b.call(this,a);case 2:return c.call(this,a,e)}throw Error("Invalid arity: "+arguments.length);};a.cljs$core$IFn$_invoke$arity$1=b;a.cljs$core$IFn$_invoke$arity$2=c;return a}(); +cljs.core.RedNode.prototype.add_right=function(a){return new cljs.core.RedNode(this.key,this.val,this.left,a,null)};cljs.core.RedNode.prototype.redden=function(){throw Error("red-black tree invariant violation");};cljs.core.RedNode.prototype.blacken=function(){return new cljs.core.BlackNode(this.key,this.val,this.left,this.right,null)};cljs.core.RedNode.prototype.add_left=function(a){return new cljs.core.RedNode(this.key,this.val,a,this.right,null)}; +cljs.core.RedNode.prototype.replace=function(a,b,c,d){return new cljs.core.RedNode(a,b,c,d,null)}; +cljs.core.RedNode.prototype.balance_left=function(a){return this.left instanceof cljs.core.RedNode?new cljs.core.RedNode(this.key,this.val,this.left.blacken(),new cljs.core.BlackNode(a.key,a.val,this.right,a.right,null),null):this.right instanceof cljs.core.RedNode?new cljs.core.RedNode(this.right.key,this.right.val,new cljs.core.BlackNode(this.key,this.val,this.left,this.right.left,null),new cljs.core.BlackNode(a.key,a.val,this.right.right,a.right,null),null):new cljs.core.BlackNode(a.key,a.val, +this,a.right,null)}; +cljs.core.RedNode.prototype.balance_right=function(a){return this.right instanceof cljs.core.RedNode?new cljs.core.RedNode(this.key,this.val,new cljs.core.BlackNode(a.key,a.val,a.left,this.left,null),this.right.blacken(),null):this.left instanceof cljs.core.RedNode?new cljs.core.RedNode(this.left.key,this.left.val,new cljs.core.BlackNode(a.key,a.val,a.left,this.left.left,null),new cljs.core.BlackNode(this.key,this.val,this.left.right,this.right,null),null):new cljs.core.BlackNode(a.key,a.val,a.left, +this,null)};cljs.core.RedNode.prototype.remove_left=function(a){return new cljs.core.RedNode(this.key,this.val,a,this.right,null)};cljs.core.RedNode.prototype.kv_reduce=function(a,b){return cljs.core.tree_map_kv_reduce(this,a,b)};cljs.core.RedNode.prototype.remove_right=function(a){return new cljs.core.RedNode(this.key,this.val,this.left,a,null)};cljs.core.RedNode.prototype.cljs$core$ILookup$_lookup$arity$2=function(a,b){return this.cljs$core$IIndexed$_nth$arity$3(null,b,null)}; +cljs.core.RedNode.prototype.cljs$core$ILookup$_lookup$arity$3=function(a,b,c){return this.cljs$core$IIndexed$_nth$arity$3(null,b,c)};cljs.core.RedNode.prototype.cljs$core$IIndexed$_nth$arity$2=function(a,b){if(0===b)return this.key;if(1===b)return this.val;throw Error("Index out of bounds");};cljs.core.RedNode.prototype.cljs$core$IIndexed$_nth$arity$3=function(a,b,c){return 0===b?this.key:1===b?this.val:c}; +cljs.core.RedNode.prototype.cljs$core$IVector$_assoc_n$arity$3=function(a,b,c){return(new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[this.key,this.val],null)).cljs$core$IVector$_assoc_n$arity$3(null,b,c)};cljs.core.RedNode.prototype.cljs$core$IMeta$_meta$arity$1=function(a){return null};cljs.core.RedNode.prototype.cljs$core$ICounted$_count$arity$1=function(a){return 2};cljs.core.RedNode.prototype.cljs$core$IMapEntry$_key$arity$1=function(a){return this.key}; +cljs.core.RedNode.prototype.cljs$core$IMapEntry$_val$arity$1=function(a){return this.val};cljs.core.RedNode.prototype.cljs$core$IStack$_peek$arity$1=function(a){return this.val};cljs.core.RedNode.prototype.cljs$core$IStack$_pop$arity$1=function(a){return new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[this.key],null)};cljs.core.RedNode.prototype.cljs$core$IReversible$_rseq$arity$1=function(a){return new cljs.core.IndexedSeq([this.val,this.key],0,null)}; +cljs.core.RedNode.prototype.cljs$core$IHash$_hash$arity$1=function(a){a=this.__hash;return null!=a?a:this.__hash=a=cljs.core.hash_ordered_coll(this)};cljs.core.RedNode.prototype.cljs$core$IEquiv$_equiv$arity$2=function(a,b){return cljs.core.equiv_sequential(this,b)};cljs.core.RedNode.prototype.cljs$core$IEmptyableCollection$_empty$arity$1=function(a){return null}; +cljs.core.RedNode.prototype.cljs$core$IReduce$_reduce$arity$2=function(a,b){return cljs.core.ci_reduce.cljs$core$IFn$_invoke$arity$2(this,b)};cljs.core.RedNode.prototype.cljs$core$IReduce$_reduce$arity$3=function(a,b,c){return cljs.core.ci_reduce.cljs$core$IFn$_invoke$arity$3(this,b,c)}; +cljs.core.RedNode.prototype.cljs$core$IAssociative$_assoc$arity$3=function(a,b,c){return cljs.core.assoc.cljs$core$IFn$_invoke$arity$3(new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[this.key,this.val],null),b,c)};cljs.core.RedNode.prototype.cljs$core$IAssociative$_contains_key_QMARK_$arity$2=function(a,b){return 0===b||1===b};cljs.core.RedNode.prototype.cljs$core$ISeqable$_seq$arity$1=function(a){return new cljs.core.IndexedSeq([this.key,this.val],0,null)}; +cljs.core.RedNode.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return cljs.core._with_meta(new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[this.key,this.val],null),b)};cljs.core.RedNode.prototype.cljs$core$ICollection$_conj$arity$2=function(a,b){return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[this.key,this.val,b],null)}; +cljs.core.RedNode.prototype.call=function(){var a=null;a=function(a,c,d){switch(arguments.length){case 2:return this.cljs$core$IIndexed$_nth$arity$2(null,c);case 3:return this.cljs$core$IIndexed$_nth$arity$3(null,c,d)}throw Error("Invalid arity: "+(arguments.length-1));};a.cljs$core$IFn$_invoke$arity$2=function(a,c){return this.cljs$core$IIndexed$_nth$arity$2(null,c)};a.cljs$core$IFn$_invoke$arity$3=function(a,c,d){return this.cljs$core$IIndexed$_nth$arity$3(null,c,d)};return a}(); +cljs.core.RedNode.prototype.apply=function(a,b){return this.call.apply(this,[this].concat(cljs.core.aclone(b)))};cljs.core.RedNode.prototype.cljs$core$IFn$_invoke$arity$1=function(a){return this.cljs$core$IIndexed$_nth$arity$2(null,a)};cljs.core.RedNode.prototype.cljs$core$IFn$_invoke$arity$2=function(a,b){return this.cljs$core$IIndexed$_nth$arity$3(null,a,b)}; +cljs.core.RedNode.getBasis=function(){return new cljs.core.PersistentVector(null,5,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"key","key",124488940,null),new cljs.core.Symbol(null,"val","val",1769233139,null),new cljs.core.Symbol(null,"left","left",1241415590,null),new cljs.core.Symbol(null,"right","right",1187949694,null),cljs.core.with_meta(new cljs.core.Symbol(null,"__hash","__hash",-1328796629,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable", +"mutable",875778266),!0],null))],null)};cljs.core.RedNode.cljs$lang$type=!0;cljs.core.RedNode.cljs$lang$ctorStr="cljs.core/RedNode";cljs.core.RedNode.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/RedNode")};cljs.core.__GT_RedNode=function(a,b,c,d,e){return new cljs.core.RedNode(a,b,c,d,e)};goog.object.set(cljs.core.RedNode.prototype,cljs.core.ITER_SYMBOL,function(){return cljs.core.es6_iterator(this)}); +cljs.core.tree_map_add=function(a,b,c,d,e){if(null==b)return new cljs.core.RedNode(c,d,null,null,null);var f=function(){var d=b.key;return a.cljs$core$IFn$_invoke$arity$2?a.cljs$core$IFn$_invoke$arity$2(c,d):a.call(null,c,d)}();if(0===f)return e[0]=b,null;if(0>f)return f=function(){var f=b.left;return cljs.core.tree_map_add.cljs$core$IFn$_invoke$arity$5?cljs.core.tree_map_add.cljs$core$IFn$_invoke$arity$5(a,f,c,d,e):cljs.core.tree_map_add.call(null,a,f,c,d,e)}(),null!=f?b.add_left(f):null;f=function(){var f= +b.right;return cljs.core.tree_map_add.cljs$core$IFn$_invoke$arity$5?cljs.core.tree_map_add.cljs$core$IFn$_invoke$arity$5(a,f,c,d,e):cljs.core.tree_map_add.call(null,a,f,c,d,e)}();return null!=f?b.add_right(f):null}; +cljs.core.tree_map_append=function(a,b){if(null==a)return b;if(null==b)return a;if(a instanceof cljs.core.RedNode){if(b instanceof cljs.core.RedNode){var c=function(){var c=a.right,e=b.left;return cljs.core.tree_map_append.cljs$core$IFn$_invoke$arity$2?cljs.core.tree_map_append.cljs$core$IFn$_invoke$arity$2(c,e):cljs.core.tree_map_append.call(null,c,e)}();return c instanceof cljs.core.RedNode?new cljs.core.RedNode(c.key,c.val,new cljs.core.RedNode(a.key,a.val,a.left,c.left,null),new cljs.core.RedNode(b.key, +b.val,c.right,b.right,null),null):new cljs.core.RedNode(a.key,a.val,a.left,new cljs.core.RedNode(b.key,b.val,c,b.right,null),null)}return new cljs.core.RedNode(a.key,a.val,a.left,function(){var c=a.right;return cljs.core.tree_map_append.cljs$core$IFn$_invoke$arity$2?cljs.core.tree_map_append.cljs$core$IFn$_invoke$arity$2(c,b):cljs.core.tree_map_append.call(null,c,b)}(),null)}if(b instanceof cljs.core.RedNode)return new cljs.core.RedNode(b.key,b.val,function(){var c=b.left;return cljs.core.tree_map_append.cljs$core$IFn$_invoke$arity$2? +cljs.core.tree_map_append.cljs$core$IFn$_invoke$arity$2(a,c):cljs.core.tree_map_append.call(null,a,c)}(),b.right,null);c=function(){var c=a.right,e=b.left;return cljs.core.tree_map_append.cljs$core$IFn$_invoke$arity$2?cljs.core.tree_map_append.cljs$core$IFn$_invoke$arity$2(c,e):cljs.core.tree_map_append.call(null,c,e)}();return c instanceof cljs.core.RedNode?new cljs.core.RedNode(c.key,c.val,new cljs.core.BlackNode(a.key,a.val,a.left,c.left,null),new cljs.core.BlackNode(b.key,b.val,c.right,b.right, +null),null):cljs.core.balance_left_del(a.key,a.val,a.left,new cljs.core.BlackNode(b.key,b.val,c,b.right,null))}; +cljs.core.tree_map_remove=function(a,b,c,d){if(null!=b){var e=function(){var d=b.key;return a.cljs$core$IFn$_invoke$arity$2?a.cljs$core$IFn$_invoke$arity$2(c,d):a.call(null,c,d)}();if(0===e)return d[0]=b,cljs.core.tree_map_append(b.left,b.right);if(0>e)return e=function(){var e=b.left;return cljs.core.tree_map_remove.cljs$core$IFn$_invoke$arity$4?cljs.core.tree_map_remove.cljs$core$IFn$_invoke$arity$4(a,e,c,d):cljs.core.tree_map_remove.call(null,a,e,c,d)}(),null!=e||null!=d[0]?b.left instanceof cljs.core.BlackNode? +cljs.core.balance_left_del(b.key,b.val,e,b.right):new cljs.core.RedNode(b.key,b.val,e,b.right,null):null;e=function(){var e=b.right;return cljs.core.tree_map_remove.cljs$core$IFn$_invoke$arity$4?cljs.core.tree_map_remove.cljs$core$IFn$_invoke$arity$4(a,e,c,d):cljs.core.tree_map_remove.call(null,a,e,c,d)}();return null!=e||null!=d[0]?b.right instanceof cljs.core.BlackNode?cljs.core.balance_right_del(b.key,b.val,b.left,e):new cljs.core.RedNode(b.key,b.val,b.left,e,null):null}return null}; +cljs.core.tree_map_replace=function(a,b,c,d){var e=b.key,f=a.cljs$core$IFn$_invoke$arity$2?a.cljs$core$IFn$_invoke$arity$2(c,e):a.call(null,c,e);return 0===f?b.replace(e,d,b.left,b.right):0>f?b.replace(e,b.val,function(){var e=b.left;return cljs.core.tree_map_replace.cljs$core$IFn$_invoke$arity$4?cljs.core.tree_map_replace.cljs$core$IFn$_invoke$arity$4(a,e,c,d):cljs.core.tree_map_replace.call(null,a,e,c,d)}(),b.right):b.replace(e,b.val,b.left,function(){var e=b.right;return cljs.core.tree_map_replace.cljs$core$IFn$_invoke$arity$4? +cljs.core.tree_map_replace.cljs$core$IFn$_invoke$arity$4(a,e,c,d):cljs.core.tree_map_replace.call(null,a,e,c,d)}())};cljs.core.PersistentTreeMap=function(a,b,c,d,e){this.comp=a;this.tree=b;this.cnt=c;this.meta=d;this.__hash=e;this.cljs$lang$protocol_mask$partition0$=418776847;this.cljs$lang$protocol_mask$partition1$=8192};cljs.core.PersistentTreeMap.prototype.cljs$core$IFind$=cljs.core.PROTOCOL_SENTINEL;cljs.core.PersistentTreeMap.prototype.cljs$core$IFind$_find$arity$2=function(a,b){return this.entry_at(b)}; +cljs.core.PersistentTreeMap.prototype.forEach=function(a){for(var b=cljs.core.seq(this),c=null,d=0,e=0;;)if(ec?b.left:b.right}else return null};cljs.core.PersistentTreeMap.prototype.has=function(a){return cljs.core.contains_QMARK_(this,a)}; +cljs.core.PersistentTreeMap.prototype.cljs$core$ILookup$_lookup$arity$2=function(a,b){return this.cljs$core$ILookup$_lookup$arity$3(null,b,null)};cljs.core.PersistentTreeMap.prototype.cljs$core$ILookup$_lookup$arity$3=function(a,b,c){a=this.entry_at(b);return null!=a?a.val:c};cljs.core.PersistentTreeMap.prototype.cljs$core$IKVReduce$_kv_reduce$arity$3=function(a,b,c){return null!=this.tree?cljs.core.unreduced(cljs.core.tree_map_kv_reduce(this.tree,b,c)):c}; +cljs.core.PersistentTreeMap.prototype.cljs$core$IMeta$_meta$arity$1=function(a){return this.meta};cljs.core.PersistentTreeMap.prototype.cljs$core$ICloneable$_clone$arity$1=function(a){return new cljs.core.PersistentTreeMap(this.comp,this.tree,this.cnt,this.meta,this.__hash)};cljs.core.PersistentTreeMap.prototype.cljs$core$ICounted$_count$arity$1=function(a){return this.cnt}; +cljs.core.PersistentTreeMap.prototype.cljs$core$IReversible$_rseq$arity$1=function(a){return 0e?(a=cljs.core.conj.cljs$core$IFn$_invoke$arity$2(a,d),d=d.left):d=d.right:0< +e?(a=cljs.core.conj.cljs$core$IFn$_invoke$arity$2(a,d),d=d.right):d=d.left}else return null==a?null:new cljs.core.PersistentTreeMapSeq(null,a,c,-1,null)}else return null};cljs.core.PersistentTreeMap.prototype.cljs$core$ISorted$_entry_key$arity$2=function(a,b){return cljs.core.key(b)};cljs.core.PersistentTreeMap.prototype.cljs$core$ISorted$_comparator$arity$1=function(a){return this.comp}; +cljs.core.PersistentTreeMap.getBasis=function(){return new cljs.core.PersistentVector(null,5,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"comp","comp",-1462482139,null),new cljs.core.Symbol(null,"tree","tree",1444219499,null),new cljs.core.Symbol(null,"cnt","cnt",1924510325,null),new cljs.core.Symbol(null,"meta","meta",-1154898805,null),cljs.core.with_meta(new cljs.core.Symbol(null,"__hash","__hash",-1328796629,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null, +"mutable","mutable",875778266),!0],null))],null)};cljs.core.PersistentTreeMap.cljs$lang$type=!0;cljs.core.PersistentTreeMap.cljs$lang$ctorStr="cljs.core/PersistentTreeMap";cljs.core.PersistentTreeMap.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/PersistentTreeMap")};cljs.core.__GT_PersistentTreeMap=function(a,b,c,d,e){return new cljs.core.PersistentTreeMap(a,b,c,d,e)};cljs.core.PersistentTreeMap.EMPTY=new cljs.core.PersistentTreeMap(cljs.core.compare,null,0,null,cljs.core.empty_unordered_hash); +goog.object.set(cljs.core.PersistentTreeMap.prototype,cljs.core.ITER_SYMBOL,function(){return cljs.core.es6_iterator(this)});cljs.core.hash_map=function(a){for(var b=[],c=arguments.length,d=0;;)if(d(a.cljs$core$IFn$_invoke$arity$1?a.cljs$core$IFn$_invoke$arity$1(c):a.call(null,c))?b:c}; +cljs.core.max_key.cljs$core$IFn$_invoke$arity$variadic=function(a,b,c,d){return cljs.core.reduce.cljs$core$IFn$_invoke$arity$3(function(b,c){return cljs.core.max_key.cljs$core$IFn$_invoke$arity$3(a,b,c)},cljs.core.max_key.cljs$core$IFn$_invoke$arity$3(a,b,c),d)};cljs.core.max_key.cljs$lang$applyTo=function(a){var b=cljs.core.first(a),c=cljs.core.next(a);a=cljs.core.first(c);var d=cljs.core.next(c);c=cljs.core.first(d);d=cljs.core.next(d);return this.cljs$core$IFn$_invoke$arity$variadic(b,a,c,d)}; +cljs.core.max_key.cljs$lang$maxFixedArity=3; +cljs.core.min_key=function(a){switch(arguments.length){case 2:return cljs.core.min_key.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return cljs.core.min_key.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);default:for(var b=[],c=arguments.length,d=0;;)if(d=this.count)throw Error("-drop-first of empty chunk");return new cljs.core.RangeChunk(this.start+this.step,this.step,this.count-1)}; +cljs.core.RangeChunk.getBasis=function(){return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"start","start",1285322546,null),new cljs.core.Symbol(null,"step","step",-1365547645,null),new cljs.core.Symbol(null,"count","count",-514511684,null)],null)};cljs.core.RangeChunk.cljs$lang$type=!0;cljs.core.RangeChunk.cljs$lang$ctorStr="cljs.core/RangeChunk";cljs.core.RangeChunk.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/RangeChunk")}; +cljs.core.__GT_RangeChunk=function(a,b,c){return new cljs.core.RangeChunk(a,b,c)};cljs.core.RangeIterator=function(a,b,c){this.i=a;this.end=b;this.step=c};cljs.core.RangeIterator.prototype.hasNext=function(){return 0this.end};cljs.core.RangeIterator.prototype.next=function(){var a=this.i;this.i+=this.step;return a}; +cljs.core.RangeIterator.getBasis=function(){return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[cljs.core.with_meta(new cljs.core.Symbol(null,"i","i",253690212,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null)),new cljs.core.Symbol(null,"end","end",1372345569,null),new cljs.core.Symbol(null,"step","step",-1365547645,null)],null)};cljs.core.RangeIterator.cljs$lang$type=!0; +cljs.core.RangeIterator.cljs$lang$ctorStr="cljs.core/RangeIterator";cljs.core.RangeIterator.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/RangeIterator")};cljs.core.__GT_RangeIterator=function(a,b,c){return new cljs.core.RangeIterator(a,b,c)};cljs.core.Range=function(a,b,c,d,e,f,g){this.meta=a;this.start=b;this.end=c;this.step=d;this.chunk=e;this.chunk_next=f;this.__hash=g;this.cljs$lang$protocol_mask$partition0$=32375006;this.cljs$lang$protocol_mask$partition1$=140800}; +cljs.core.Range.prototype.toString=function(){return cljs.core.pr_str_STAR_(this)};cljs.core.Range.prototype.equiv=function(a){return this.cljs$core$IEquiv$_equiv$arity$2(null,a)}; +cljs.core.Range.prototype.indexOf=function(){var a=null,b=function(a){return cljs.core._indexOf.cljs$core$IFn$_invoke$arity$3(this,a,0)},c=function(a,b){return cljs.core._indexOf.cljs$core$IFn$_invoke$arity$3(this,a,b)};a=function(a,e){switch(arguments.length){case 1:return b.call(this,a);case 2:return c.call(this,a,e)}throw Error("Invalid arity: "+arguments.length);};a.cljs$core$IFn$_invoke$arity$1=b;a.cljs$core$IFn$_invoke$arity$2=c;return a}(); +cljs.core.Range.prototype.lastIndexOf=function(){var a=null,b=function(a){return cljs.core._lastIndexOf.cljs$core$IFn$_invoke$arity$3(this,a,cljs.core.count(this))},c=function(a,b){return cljs.core._lastIndexOf.cljs$core$IFn$_invoke$arity$3(this,a,b)};a=function(a,e){switch(arguments.length){case 1:return b.call(this,a);case 2:return c.call(this,a,e)}throw Error("Invalid arity: "+arguments.length);};a.cljs$core$IFn$_invoke$arity$1=b;a.cljs$core$IFn$_invoke$arity$2=c;return a}(); +cljs.core.Range.prototype.forceChunk=function(){if(null==this.chunk){var a=this.cljs$core$ICounted$_count$arity$1(null);return 32this.end&&0===this.step)return this.start;throw Error("Index out of bounds");};cljs.core.Range.prototype.cljs$core$IIndexed$_nth$arity$3=function(a,b,c){return 0<=b&&bthis.end&&0===this.step?this.start:c}; +cljs.core.Range.prototype.cljs$core$IIterable$_iterator$arity$1=function(a){return new cljs.core.RangeIterator(this.start,this.end,this.step)};cljs.core.Range.prototype.cljs$core$IMeta$_meta$arity$1=function(a){return this.meta};cljs.core.Range.prototype.cljs$core$ICloneable$_clone$arity$1=function(a){return new cljs.core.Range(this.meta,this.start,this.end,this.step,this.chunk,this.chunk_next,this.__hash)}; +cljs.core.Range.prototype.cljs$core$INext$_next$arity$1=function(a){return 0this.end?new cljs.core.Range(null,this.start+this.step,this.end,this.step,null,null,null):null};cljs.core.Range.prototype.cljs$core$ICounted$_count$arity$1=function(a){return Math.ceil((this.end-this.start)/this.step)}; +cljs.core.Range.prototype.cljs$core$IHash$_hash$arity$1=function(a){a=this.__hash;return null!=a?a:this.__hash=a=cljs.core.hash_ordered_coll(this)};cljs.core.Range.prototype.cljs$core$IEquiv$_equiv$arity$2=function(a,b){return cljs.core.equiv_sequential(this,b)};cljs.core.Range.prototype.cljs$core$IEmptyableCollection$_empty$arity$1=function(a){return cljs.core.List.EMPTY}; +cljs.core.Range.prototype.cljs$core$IReduce$_reduce$arity$2=function(a,b){return cljs.core.ci_reduce.cljs$core$IFn$_invoke$arity$2(this,b)};cljs.core.Range.prototype.cljs$core$IReduce$_reduce$arity$3=function(a,b,c){for(a=this.start;;)if(0this.end){c=b.cljs$core$IFn$_invoke$arity$2?b.cljs$core$IFn$_invoke$arity$2(c,a):b.call(null,c,a);if(cljs.core.reduced_QMARK_(c))return cljs.core.deref(c);a+=this.step}else return c}; +cljs.core.Range.prototype.cljs$core$ISeq$_first$arity$1=function(a){return this.start};cljs.core.Range.prototype.cljs$core$ISeq$_rest$arity$1=function(a){a=this.cljs$core$INext$_next$arity$1(null);return null==a?cljs.core.List.EMPTY:a};cljs.core.Range.prototype.cljs$core$ISeqable$_seq$arity$1=function(a){return this};cljs.core.Range.prototype.cljs$core$IChunkedSeq$_chunked_first$arity$1=function(a){this.forceChunk();return this.chunk}; +cljs.core.Range.prototype.cljs$core$IChunkedSeq$_chunked_rest$arity$1=function(a){this.forceChunk();return null==this.chunk_next?cljs.core.List.EMPTY:this.chunk_next};cljs.core.Range.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return b===this.meta?this:new cljs.core.Range(b,this.start,this.end,this.step,this.chunk,this.chunk_next,this.__hash)};cljs.core.Range.prototype.cljs$core$ICollection$_conj$arity$2=function(a,b){return cljs.core.cons(b,this)}; +cljs.core.Range.prototype.cljs$core$IChunkedNext$_chunked_next$arity$1=function(a){return cljs.core.seq(this.cljs$core$IChunkedSeq$_chunked_rest$arity$1(null))}; +cljs.core.Range.getBasis=function(){return new cljs.core.PersistentVector(null,7,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"meta","meta",-1154898805,null),new cljs.core.Symbol(null,"start","start",1285322546,null),new cljs.core.Symbol(null,"end","end",1372345569,null),new cljs.core.Symbol(null,"step","step",-1365547645,null),cljs.core.with_meta(new cljs.core.Symbol(null,"chunk","chunk",449371907,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable", +"mutable",875778266),!0],null)),cljs.core.with_meta(new cljs.core.Symbol(null,"chunk-next","chunk-next",-547810434,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null)),cljs.core.with_meta(new cljs.core.Symbol(null,"__hash","__hash",-1328796629,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null))],null)};cljs.core.Range.cljs$lang$type=!0;cljs.core.Range.cljs$lang$ctorStr="cljs.core/Range"; +cljs.core.Range.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write(b,"cljs.core/Range")};cljs.core.__GT_Range=function(a,b,c,d,e,f,g){return new cljs.core.Range(a,b,c,d,e,f,g)};goog.object.set(cljs.core.Range.prototype,cljs.core.ITER_SYMBOL,function(){return cljs.core.es6_iterator(this)}); +cljs.core.range=function(a){switch(arguments.length){case 0:return cljs.core.range.cljs$core$IFn$_invoke$arity$0();case 1:return cljs.core.range.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core.range.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return cljs.core.range.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.core.range.cljs$core$IFn$_invoke$arity$0=function(){return cljs.core.range.cljs$core$IFn$_invoke$arity$3(0,Number.MAX_VALUE,1)};cljs.core.range.cljs$core$IFn$_invoke$arity$1=function(a){return cljs.core.range.cljs$core$IFn$_invoke$arity$3(0,a,1)};cljs.core.range.cljs$core$IFn$_invoke$arity$2=function(a,b){return cljs.core.range.cljs$core$IFn$_invoke$arity$3(a,b,1)}; +cljs.core.range.cljs$core$IFn$_invoke$arity$3=function(a,b,c){return 0c?b>=a?cljs.core.List.EMPTY:new cljs.core.Range(null,a,b,c,null,null,null):b===a?cljs.core.List.EMPTY:cljs.core.repeat.cljs$core$IFn$_invoke$arity$1(a)};cljs.core.range.cljs$lang$maxFixedArity=3; +cljs.core.take_nth=function(a){switch(arguments.length){case 1:return cljs.core.take_nth.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core.take_nth.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.core.take_nth.cljs$core$IFn$_invoke$arity$1=function(a){if("number"!==typeof a)throw Error("Assert failed: (number? n)");return function(b){var c=cljs.core.volatile_BANG_(-1);return function(){var d=null,e=function(){return b.cljs$core$IFn$_invoke$arity$0?b.cljs$core$IFn$_invoke$arity$0():b.call(null)},f=function(a){return b.cljs$core$IFn$_invoke$arity$1?b.cljs$core$IFn$_invoke$arity$1(a):b.call(null,a)},g=function(d,e){var f=c.cljs$core$IVolatile$_vreset_BANG_$arity$2(null,c.cljs$core$IDeref$_deref$arity$1(null)+ +1);return 0===cljs.core.rem(f,a)?b.cljs$core$IFn$_invoke$arity$2?b.cljs$core$IFn$_invoke$arity$2(d,e):b.call(null,d,e):d};d=function(a,b){switch(arguments.length){case 0:return e.call(this);case 1:return f.call(this,a);case 2:return g.call(this,a,b)}throw Error("Invalid arity: "+arguments.length);};d.cljs$core$IFn$_invoke$arity$0=e;d.cljs$core$IFn$_invoke$arity$1=f;d.cljs$core$IFn$_invoke$arity$2=g;return d}()}}; +cljs.core.take_nth.cljs$core$IFn$_invoke$arity$2=function(a,b){if("number"!==typeof a)throw Error("Assert failed: (number? n)");return new cljs.core.LazySeq(null,function(){var c=cljs.core.seq(b);return c?cljs.core.cons(cljs.core.first(c),cljs.core.take_nth.cljs$core$IFn$_invoke$arity$2(a,cljs.core.drop.cljs$core$IFn$_invoke$arity$2(a,c))):null},null,null)};cljs.core.take_nth.cljs$lang$maxFixedArity=2; +cljs.core.split_with=function(a,b){return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[cljs.core.take_while.cljs$core$IFn$_invoke$arity$2(a,b),cljs.core.drop_while.cljs$core$IFn$_invoke$arity$2(a,b)],null)}; +cljs.core.partition_by=function(a){switch(arguments.length){case 1:return cljs.core.partition_by.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core.partition_by.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.core.partition_by.cljs$core$IFn$_invoke$arity$1=function(a){return function(b){var c=cljs.core.array_list(),d=cljs.core.volatile_BANG_(new cljs.core.Keyword("cljs.core","none","cljs.core/none",926646439));return function(){var e=null,f=function(){return b.cljs$core$IFn$_invoke$arity$0?b.cljs$core$IFn$_invoke$arity$0():b.call(null)},g=function(a){if(!cljs.core.truth_(c.isEmpty())){var d=cljs.core.vec(c.toArray());c.clear();a=cljs.core.unreduced(b.cljs$core$IFn$_invoke$arity$2?b.cljs$core$IFn$_invoke$arity$2(a, +d):b.call(null,a,d))}return b.cljs$core$IFn$_invoke$arity$1?b.cljs$core$IFn$_invoke$arity$1(a):b.call(null,a)},h=function(e,f){var g=cljs.core.deref(d),h=a.cljs$core$IFn$_invoke$arity$1?a.cljs$core$IFn$_invoke$arity$1(f):a.call(null,f);cljs.core.vreset_BANG_(d,h);if(cljs.core.keyword_identical_QMARK_(g,new cljs.core.Keyword("cljs.core","none","cljs.core/none",926646439))||cljs.core._EQ_.cljs$core$IFn$_invoke$arity$2(h,g))return c.add(f),e;g=cljs.core.vec(c.toArray());c.clear();e=b.cljs$core$IFn$_invoke$arity$2? +b.cljs$core$IFn$_invoke$arity$2(e,g):b.call(null,e,g);cljs.core.reduced_QMARK_(e)||c.add(f);return e};e=function(a,b){switch(arguments.length){case 0:return f.call(this);case 1:return g.call(this,a);case 2:return h.call(this,a,b)}throw Error("Invalid arity: "+arguments.length);};e.cljs$core$IFn$_invoke$arity$0=f;e.cljs$core$IFn$_invoke$arity$1=g;e.cljs$core$IFn$_invoke$arity$2=h;return e}()}}; +cljs.core.partition_by.cljs$core$IFn$_invoke$arity$2=function(a,b){return new cljs.core.LazySeq(null,function(){var c=cljs.core.seq(b);if(c){var d=cljs.core.first(c),e=a.cljs$core$IFn$_invoke$arity$1?a.cljs$core$IFn$_invoke$arity$1(d):a.call(null,d),f=cljs.core.cons(d,cljs.core.take_while.cljs$core$IFn$_invoke$arity$2(function(b){return cljs.core._EQ_.cljs$core$IFn$_invoke$arity$2(e,a.cljs$core$IFn$_invoke$arity$1?a.cljs$core$IFn$_invoke$arity$1(b):a.call(null,b))},cljs.core.next(c)));return cljs.core.cons(f, +cljs.core.partition_by.cljs$core$IFn$_invoke$arity$2(a,new cljs.core.LazySeq(null,function(){return cljs.core.drop.cljs$core$IFn$_invoke$arity$2(cljs.core.count(f),c)},null,null)))}return null},null,null)};cljs.core.partition_by.cljs$lang$maxFixedArity=2; +cljs.core.frequencies=function(a){return cljs.core.persistent_BANG_(cljs.core.reduce.cljs$core$IFn$_invoke$arity$3(function(a,c){return cljs.core.assoc_BANG_.cljs$core$IFn$_invoke$arity$3(a,c,cljs.core.get.cljs$core$IFn$_invoke$arity$3(a,c,0)+1)},cljs.core.transient$(cljs.core.PersistentArrayMap.EMPTY),a))}; +cljs.core.reductions=function(a){switch(arguments.length){case 2:return cljs.core.reductions.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return cljs.core.reductions.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.core.reductions.cljs$core$IFn$_invoke$arity$2=function(a,b){return new cljs.core.LazySeq(null,function(){var c=cljs.core.seq(b);return c?cljs.core.reductions.cljs$core$IFn$_invoke$arity$3(a,cljs.core.first(c),cljs.core.rest(c)):new cljs.core.List(null,a.cljs$core$IFn$_invoke$arity$0?a.cljs$core$IFn$_invoke$arity$0():a.call(null),null,1,null)},null,null)}; +cljs.core.reductions.cljs$core$IFn$_invoke$arity$3=function(a,b,c){return cljs.core.reduced_QMARK_(b)?new cljs.core.List(null,cljs.core.deref(b),null,1,null):cljs.core.cons(b,new cljs.core.LazySeq(null,function(){var d=cljs.core.seq(c);return d?cljs.core.reductions.cljs$core$IFn$_invoke$arity$3(a,function(){var c=cljs.core.first(d);return a.cljs$core$IFn$_invoke$arity$2?a.cljs$core$IFn$_invoke$arity$2(b,c):a.call(null,b,c)}(),cljs.core.rest(d)):null},null,null))}; +cljs.core.reductions.cljs$lang$maxFixedArity=3; +cljs.core.juxt=function(a){switch(arguments.length){case 1:return cljs.core.juxt.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.core.juxt.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return cljs.core.juxt.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);default:for(var b=[],c=arguments.length,d=0;;)if(de?1:e);return e<=b.length?(e=cljs.core.subs.cljs$core$IFn$_invoke$arity$2(b,e),cljs.core.re_seq_STAR_.cljs$core$IFn$_invoke$arity$2?cljs.core.re_seq_STAR_.cljs$core$IFn$_invoke$arity$2(a,e):cljs.core.re_seq_STAR_.call(null,a,e)):null},null,null))}; +cljs.core.re_seq=function(a,b){if("string"===typeof b)return cljs.core.re_seq_STAR_(a,b);throw new TypeError("re-seq must match against a string.");};cljs.core.re_pattern=function(a){if(a instanceof RegExp)return a;var b=cljs.core.re_find(/^\(\?([idmsux]*)\)/,a),c=cljs.core.nth.cljs$core$IFn$_invoke$arity$3(b,0,null);b=cljs.core.nth.cljs$core$IFn$_invoke$arity$3(b,1,null);a=cljs.core.subs.cljs$core$IFn$_invoke$arity$2(a,null==c?0:c.length);c=RegExp;b=cljs.core.truth_(b)?b:"";return new c(a,b)}; +cljs.core.pr_sequential_writer=function(a,b,c,d,e,f,g){var h=cljs.core._STAR_print_level_STAR_;cljs.core._STAR_print_level_STAR_=null==cljs.core._STAR_print_level_STAR_?null:cljs.core._STAR_print_level_STAR_-1;try{if(null!=cljs.core._STAR_print_level_STAR_&&0>cljs.core._STAR_print_level_STAR_)return cljs.core._write(a,"#");cljs.core._write(a,c);if(0===(new cljs.core.Keyword(null,"print-length","print-length",1931866356)).cljs$core$IFn$_invoke$arity$1(f))cljs.core.seq(g)&&cljs.core._write(a,function(){var a= +(new cljs.core.Keyword(null,"more-marker","more-marker",-14717935)).cljs$core$IFn$_invoke$arity$1(f);return cljs.core.truth_(a)?a:"..."}());else{if(cljs.core.seq(g)){var k=cljs.core.first(g);b.cljs$core$IFn$_invoke$arity$3?b.cljs$core$IFn$_invoke$arity$3(k,a,f):b.call(null,k,a,f)}for(var l=cljs.core.next(g),m=(new cljs.core.Keyword(null,"print-length","print-length",1931866356)).cljs$core$IFn$_invoke$arity$1(f)-1;;)if(!l||null!=m&&0===m){cljs.core.seq(l)&&0===m&&(cljs.core._write(a,d),cljs.core._write(a, +function(){var a=(new cljs.core.Keyword(null,"more-marker","more-marker",-14717935)).cljs$core$IFn$_invoke$arity$1(f);return cljs.core.truth_(a)?a:"..."}()));break}else{cljs.core._write(a,d);var n=cljs.core.first(l);c=a;g=f;b.cljs$core$IFn$_invoke$arity$3?b.cljs$core$IFn$_invoke$arity$3(n,c,g):b.call(null,n,c,g);var p=cljs.core.next(l);c=m-1;l=p;m=c}}return cljs.core._write(a,e)}finally{cljs.core._STAR_print_level_STAR_=h}}; +cljs.core.write_all=function(a){for(var b=[],c=arguments.length,d=0;;)if(d=Number(c)?a:a=-1Number(a)?"-":0<=b.indexOf("+")?"+":0<=b.indexOf(" ")?" ":"";0<=Number(a)&&(d=f+d);if(isNaN(c)||d.length>=Number(c))return d;d=isNaN(e)?Math.abs(Number(a)).toString():Math.abs(Number(a)).toFixed(e);a=Number(c)-d.length-f.length;0<=b.indexOf("-",0)?d=f+d+goog.string.repeat(" ",a):(b=0<=b.indexOf("0",0)?"0":" ",d=f+goog.string.repeat(b,a)+d);return d}; +goog.string.format.demuxes_.d=function(a,b,c,d,e,f,g,h){return goog.string.format.demuxes_.f(parseInt(a,10),b,c,d,0,f,g,h)};goog.string.format.demuxes_.i=goog.string.format.demuxes_.d;goog.string.format.demuxes_.u=goog.string.format.demuxes_.d;var clojure={string:{}};clojure.string.seq_reverse=function(a){return cljs.core.reduce.call(null,cljs.core.conj,cljs.core.List.EMPTY,a)};clojure.string.re_surrogate_pair=/([\uD800-\uDBFF])([\uDC00-\uDFFF])/g;clojure.string.reverse=function(a){return a.replace(clojure.string.re_surrogate_pair,"$2$1").split("").reverse().join("")}; +clojure.string.replace_all=function(a,b,c){var d=RegExp,e=b.source;var f=cljs.core.truth_(b.ignoreCase)?"gi":"g";f=cljs.core.truth_(b.multiline)?[f,"m"].join(""):f;b=cljs.core.truth_(b.unicode)?[f,"u"].join(""):f;d=new d(e,b);return a.replace(d,c)}; +clojure.string.replace_with=function(a){return function(){var b=function(b){b=cljs.core.drop_last.call(null,2,b);return cljs.core._EQ_.call(null,cljs.core.count.call(null,b),1)?a.call(null,cljs.core.first.call(null,b)):a.call(null,cljs.core.vec.call(null,b))},c=function(a){var c=null;if(0=b||b>=2+cljs.core.count.call(null,a))return cljs.core.conj.call(null,cljs.core.vec.call(null,cljs.core.cons.call(null,"",cljs.core.map.call(null,cljs.core.str,cljs.core.seq.call(null,a)))),"");var c=cljs.core._EQ__EQ_;if(cljs.core.truth_(c.call(null,1,b)))return new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[a],null);if(cljs.core.truth_(c.call(null,2,b)))return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE, +["",a],null);b-=2;return cljs.core.conj.call(null,cljs.core.vec.call(null,cljs.core.cons.call(null,"",cljs.core.subvec.call(null,cljs.core.vec.call(null,cljs.core.map.call(null,cljs.core.str,cljs.core.seq.call(null,a))),0,b))),cljs.core.subs.call(null,a,b))}; +clojure.string.split=function(a){switch(arguments.length){case 2:return clojure.string.split.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return clojure.string.split.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};clojure.string.split.cljs$core$IFn$_invoke$arity$2=function(a,b){return clojure.string.split.call(null,a,b,0)}; +clojure.string.split.cljs$core$IFn$_invoke$arity$3=function(a,b,c){return clojure.string.discard_trailing_if_needed.call(null,c,"/(?:)/"===cljs.core.str.cljs$core$IFn$_invoke$arity$1(b)?clojure.string.split_with_empty_regex.call(null,a,c):1>c?cljs.core.vec.call(null,cljs.core.str.cljs$core$IFn$_invoke$arity$1(a).split(b)):function(){for(var d=a,e=c,f=cljs.core.PersistentVector.EMPTY;;){if(1===e)return cljs.core.conj.call(null,f,d);var g=cljs.core.re_find.call(null,b,d);if(null!=g){var h=d.indexOf(g); +g=d.substring(h+cljs.core.count.call(null,g));--e;f=cljs.core.conj.call(null,f,d.substring(0,h));d=g}else return cljs.core.conj.call(null,f,d)}}())};clojure.string.split.cljs$lang$maxFixedArity=3;clojure.string.split_lines=function(a){return clojure.string.split.call(null,a,/\n|\r\n/)};clojure.string.trim=function(a){return goog.string.trim(a)};clojure.string.triml=function(a){return goog.string.trimLeft(a)};clojure.string.trimr=function(a){return goog.string.trimRight(a)}; +clojure.string.trim_newline=function(a){for(var b=a.length;;){if(0===b)return"";var c=cljs.core.get.call(null,a,b-1);if("\n"===c||"\r"===c)--b;else return a.substring(0,b)}};clojure.string.blank_QMARK_=function(a){return goog.string.isEmptyOrWhitespace(goog.string.makeSafe(a))}; +clojure.string.escape=function(a,b){for(var c=new goog.string.StringBuffer,d=a.length,e=0;;){if(d===e)return c.toString();var f=a.charAt(e),g=cljs.core.get.call(null,b,f);null!=g?c.append(cljs.core.str.cljs$core$IFn$_invoke$arity$1(g)):c.append(f);e+=1}}; +clojure.string.index_of=function(a){switch(arguments.length){case 2:return clojure.string.index_of.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return clojure.string.index_of.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};clojure.string.index_of.cljs$core$IFn$_invoke$arity$2=function(a,b){a=a.indexOf(b);return 0>a?null:a}; +clojure.string.index_of.cljs$core$IFn$_invoke$arity$3=function(a,b,c){a=a.indexOf(b,c);return 0>a?null:a};clojure.string.index_of.cljs$lang$maxFixedArity=3; +clojure.string.last_index_of=function(a){switch(arguments.length){case 2:return clojure.string.last_index_of.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return clojure.string.last_index_of.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +clojure.string.last_index_of.cljs$core$IFn$_invoke$arity$2=function(a,b){a=a.lastIndexOf(b);return 0>a?null:a};clojure.string.last_index_of.cljs$core$IFn$_invoke$arity$3=function(a,b,c){a=a.lastIndexOf(b,c);return 0>a?null:a};clojure.string.last_index_of.cljs$lang$maxFixedArity=3;clojure.string.starts_with_QMARK_=function(a,b){return goog.string.startsWith(a,b)};clojure.string.ends_with_QMARK_=function(a,b){return goog.string.endsWith(a,b)}; +clojure.string.includes_QMARK_=function(a,b){return goog.string.contains(a,b)};var bigml={hideo:{}};bigml.hideo.util={};bigml.hideo.util.version={};bigml.hideo.util.version.version_string="0.8.1";bigml.hideo.util.version.version_major=0;bigml.hideo.util.version.version_minor=8;bigml.hideo.util.version.version_micro=1;bigml.hideo.util.version.project_name="dixie"; +bigml.hideo.util.version.version=new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[bigml.hideo.util.version.version_major,bigml.hideo.util.version.version_minor,bigml.hideo.util.version.version_micro],null); +bigml.hideo.util.version.version_EQ_=function(a,b){return cljs.core._EQ_.call(null,cljs.core.truth_(a)?a:new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[0,0,0],null),cljs.core.truth_(b)?b:new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[0,0,0],null))}; +bigml.hideo.util.version.version_LT_=function(a,b){return 0>cljs.core.compare.call(null,cljs.core.truth_(a)?a:new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[0,0,0],null),cljs.core.truth_(b)?b:new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[0,0,0],null))}; +bigml.hideo.util.version.version_LT__EQ_=function(a,b){return!(0cljs.core.compare.call(null,cljs.core.truth_(a)?a:new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[0,0,0],null),cljs.core.truth_(b)?b:new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[0,0,0],null)))};bigml.dixie={};bigml.dixie.flatline={};bigml.dixie.flatline.utils={};bigml.dixie.flatline.utils.version_string=bigml.hideo.util.version.version_string;bigml.dixie.flatline.utils.version=bigml.hideo.util.version.version;bigml.dixie.flatline.utils.registry=cljs.core.atom.call(null,cljs.core.PersistentArrayMap.EMPTY); +bigml.dixie.flatline.utils.deferror_BANG_=function(a){for(var b=[],c=arguments.length,d=0;;)if(d=Math.abs((e-g)/e))return 1-e*Math.exp(b*Math.log(a)-a-bigml.dixie.flatline.utils.log_gamma.call(null,b));h+=1;c=e+(h-b)*c;g=f+(h-b)*d;d=a*g+h*f;var k=g/d;f=(a*c+h*e)/d;g=e;c/=d;d=k;e=f;f=1}};bigml.dixie.flatline.utils.gser=function(a,b){for(var c=1/b,d=1/b,e=1;;){if(c<=1E-4*d)return d*Math.exp(b*Math.log(a)-a-bigml.dixie.flatline.utils.log_gamma.call(null,b));c=c*a/(b+e);d+=c;e+=1}}; +bigml.dixie.flatline.utils.gammacdf=function(a,b){return 0b?cljs.core.str.cljs$core$IFn$_invoke$arity$1(b):"":null].join(""):null}; +bigml.dixie.flatline.types.fn_names=function(a){a=bigml.dixie.flatline.types.fn_desc.call(null,a);return cljs.core.truth_(a)?cljs.core.sequential_QMARK_.call(null,a)?cljs.core.map.call(null,function(a){return bigml.dixie.flatline.types.format_fname.call(null,(new cljs.core.Keyword(null,"name","name",1843675177)).cljs$core$IFn$_invoke$arity$1(a),(new cljs.core.Keyword(null,"shift","shift",997140064)).cljs$core$IFn$_invoke$arity$1(a))},a):bigml.dixie.flatline.types.format_fname.call(null,(new cljs.core.Keyword(null, +"name","name",1843675177)).cljs$core$IFn$_invoke$arity$1(a),(new cljs.core.Keyword(null,"shift","shift",997140064)).cljs$core$IFn$_invoke$arity$1(a)):null};bigml.dixie.flatline.types.fn_shifts=function(a){a=bigml.dixie.flatline.types.fn_desc.call(null,a);return cljs.core.truth_(a)?cljs.core.sequential_QMARK_.call(null,a)?cljs.core.map.call(null,new cljs.core.Keyword(null,"shift","shift",997140064),a):(new cljs.core.Keyword(null,"shift","shift",997140064)).cljs$core$IFn$_invoke$arity$1(a):null}; +bigml.dixie.flatline.types.bool__GT_str=function(a){return!0===a||!1===a?cljs.core.str.cljs$core$IFn$_invoke$arity$1(a):a}; +bigml.dixie.flatline.types.stringify_booleans=function(a){return cljs.core.with_meta.call(null,cljs.core._EQ_.call(null,new cljs.core.Keyword(null,"boolean","boolean",-1919418404),bigml.dixie.flatline.types.fn_type.call(null,a))?function(b,c){return bigml.dixie.flatline.types.bool__GT_str.call(null,a.call(null,b,c))}:cljs.core.truth_(cljs.core.some.call(null,new cljs.core.PersistentHashSet(null,new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"boolean","boolean",-1919418404),null], +null),null),bigml.dixie.flatline.types.fn_types.call(null,a)))?function(b,c){return cljs.core.mapv.call(null,bigml.dixie.flatline.types.bool__GT_str,a.call(null,b,c))}:a,cljs.core.meta.call(null,a))}; +bigml.dixie.flatline.types.trim_strings=function(a){return cljs.core.with_meta.call(null,cljs.core._EQ_.call(null,new cljs.core.Keyword(null,"string","string",-1989541586),bigml.dixie.flatline.types.fn_type.call(null,a))?function(b,c){return bigml.dixie.flatline.utils.trim.call(null,a.call(null,b,c))}:cljs.core.truth_(cljs.core.some.call(null,new cljs.core.PersistentHashSet(null,new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"string","string",-1989541586),null],null),null),bigml.dixie.flatline.types.fn_types.call(null, +a)))?function(b,c){return cljs.core.mapv.call(null,bigml.dixie.flatline.utils.trim,a.call(null,b,c))}:a,cljs.core.meta.call(null,a))};bigml.dixie.flatline.types.check_numbers=function(a,b,c){0>a||bigml.dixie.flatline.errors.check_arity.call(null,b,a);return cljs.core.doall.call(null,cljs.core.map.call(null,function(a,c){return bigml.dixie.flatline.types.check_type.call(null,b,c+1,a,new cljs.core.Keyword(null,"numeric","numeric",-1495594714))},c,cljs.core.range.call(null)))};bigml.dixie.flatline.eval={};bigml.dixie.flatline.eval.max_window_width=100;bigml.dixie.flatline.eval.op_designator=function(a){return"string"===typeof a||a instanceof cljs.core.Symbol?cljs.core.keyword.call(null,a):a instanceof cljs.core.Keyword?a:null}; +if("undefined"===typeof bigml||"undefined"===typeof bigml.dixie||"undefined"===typeof bigml.dixie.flatline||"undefined"===typeof bigml.dixie.flatline.eval||"undefined"===typeof bigml.dixie.flatline.eval.primop)bigml.dixie.flatline.eval.primop=function(){var a=cljs.core.atom.call(null,cljs.core.PersistentArrayMap.EMPTY),b=cljs.core.atom.call(null,cljs.core.PersistentArrayMap.EMPTY),c=cljs.core.atom.call(null,cljs.core.PersistentArrayMap.EMPTY),d=cljs.core.atom.call(null,cljs.core.PersistentArrayMap.EMPTY), +e=cljs.core.get.call(null,cljs.core.PersistentArrayMap.EMPTY,new cljs.core.Keyword(null,"hierarchy","hierarchy",-1053470341),cljs.core.get_global_hierarchy.call(null));return new cljs.core.MultiFn(cljs.core.symbol.call(null,"bigml.dixie.flatline.eval","primop"),function(){var a=function(a,b){return bigml.dixie.flatline.eval.op_designator.call(null,a)},b=function(b,c){var d=null;if(1b%28}; +goog.date.getNumberOfDaysInMonth=function(a,b){switch(b){case goog.date.month.FEB:return goog.date.isLeapYear(a)?29:28;case goog.date.month.JUN:case goog.date.month.SEP:case goog.date.month.NOV:case goog.date.month.APR:return 30}return 31};goog.date.isSameDay=function(a,b){b=b||new Date(goog.now());return a.getDate()==b.getDate()&&goog.date.isSameMonth(a,b)};goog.date.isSameMonth=function(a,b){b=b||new Date(goog.now());return a.getMonth()==b.getMonth()&&goog.date.isSameYear(a,b)}; +goog.date.isSameYear=function(a,b){b=b||new Date(goog.now());return a.getFullYear()==b.getFullYear()};goog.date.getCutOffSameWeek_=function(a,b,c,d,e){a=new Date(a,b,c);d=void 0!==d?d:goog.date.weekDay.THU;e=e||goog.date.weekDay.MON;b=(a.getDay()+6)%7;return a.valueOf()+((d-e+7)%7-(b-e+7)%7)*goog.date.MS_PER_DAY}; +goog.date.getWeekNumber=function(a,b,c,d,e){a=goog.date.getCutOffSameWeek_(a,b,c,d,e);b=(new Date((new Date(a)).getFullYear(),0,1)).valueOf();return Math.floor(Math.round((a-b)/goog.date.MS_PER_DAY)/7)+1};goog.date.getYearOfWeek=function(a,b,c,d,e){a=goog.date.getCutOffSameWeek_(a,b,c,d,e);return(new Date(a)).getFullYear()};goog.date.min=function(a,b){return ab?a:b}; +goog.date.setIso8601DateTime=function(a,b){b=goog.string.trim(b);var c=-1==b.indexOf("T")?" ":"T";b=b.split(c);return goog.date.setIso8601DateOnly_(a,b[0])&&(2>b.length||goog.date.setIso8601TimeOnly_(a,b[1]))}; +goog.date.setIso8601DateOnly_=function(a,b){b=b.match(goog.date.splitDateStringRegex_);if(!b)return!1;var c=Number(b[2]),d=Number(b[3]),e=Number(b[4]),f=Number(b[5]),g=Number(b[6])||1;a.setFullYear(Number(b[1]));e?(a.setDate(1),a.setMonth(0),a.add(new goog.date.Interval(goog.date.Interval.DAYS,e-1))):f?goog.date.setDateFromIso8601Week_(a,f,g):(c&&(a.setDate(1),a.setMonth(c-1)),d&&a.setDate(d));return!0}; +goog.date.setDateFromIso8601Week_=function(a,b,c){a.setMonth(0);a.setDate(1);var d=a.getDay()||7;b=new goog.date.Interval(goog.date.Interval.DAYS,(4>=d?1-d:8-d)+(Number(c)+7*(Number(b)-1))-1);a.add(b)}; +goog.date.setIso8601TimeOnly_=function(a,b){var c=b.match(goog.date.splitTimezoneStringRegex_);if(c)if(b=b.substring(0,b.length-c[0].length),"Z"===c[0])var d=0;else d=60*Number(c[2])+Number(c[3]),d*="-"==c[1]?1:-1;b=b.match(goog.date.splitTimeStringRegex_);if(!b)return!1;if(c){goog.asserts.assertNumber(d);c=a.getYear();var e=a.getMonth(),f=a.getDate();b=Date.UTC(c,e,f,Number(b[1]),Number(b[2])||0,Number(b[3])||0,b[4]?1E3*Number(b[4]):0);a.setTime(b+6E4*d)}else a.setHours(Number(b[1])),a.setMinutes(Number(b[2])|| +0),a.setSeconds(Number(b[3])||0),a.setMilliseconds(b[4]?1E3*Number(b[4]):0);return!0}; +goog.date.Interval=function(a,b,c,d,e,f){"string"===typeof a?(this.years=a==goog.date.Interval.YEARS?b:0,this.months=a==goog.date.Interval.MONTHS?b:0,this.days=a==goog.date.Interval.DAYS?b:0,this.hours=a==goog.date.Interval.HOURS?b:0,this.minutes=a==goog.date.Interval.MINUTES?b:0,this.seconds=a==goog.date.Interval.SECONDS?b:0):(this.years=a||0,this.months=b||0,this.days=c||0,this.hours=d||0,this.minutes=e||0,this.seconds=f||0)}; +goog.date.Interval.fromIsoString=function(a){a=a.match(goog.date.splitDurationRegex_);if(!a)return null;var b=!(a[6]||a[7]||a[8]);if(b&&!(a[2]||a[3]||a[4])||b&&a[5])return null;b=a[1];var c=parseInt(a[2],10)||0,d=parseInt(a[3],10)||0,e=parseInt(a[4],10)||0,f=parseInt(a[6],10)||0,g=parseInt(a[7],10)||0;a=parseFloat(a[8])||0;return b?new goog.date.Interval(-c,-d,-e,-f,-g,-a):new goog.date.Interval(c,d,e,f,g,a)}; +goog.date.Interval.prototype.toIsoString=function(a){var b=Math.min(this.years,this.months,this.days,this.hours,this.minutes,this.seconds),c=Math.max(this.years,this.months,this.days,this.hours,this.minutes,this.seconds);if(0>b&&0b&&c.push("-");c.push("P");(this.years||a)&&c.push(Math.abs(this.years)+"Y");(this.months||a)&&c.push(Math.abs(this.months)+"M");(this.days||a)&&c.push(Math.abs(this.days)+"D");if(this.hours||this.minutes||this.seconds|| +a)c.push("T"),(this.hours||a)&&c.push(Math.abs(this.hours)+"H"),(this.minutes||a)&&c.push(Math.abs(this.minutes)+"M"),(this.seconds||a)&&c.push(Math.abs(this.seconds)+"S");return c.join("")};goog.date.Interval.prototype.equals=function(a){return a.years==this.years&&a.months==this.months&&a.days==this.days&&a.hours==this.hours&&a.minutes==this.minutes&&a.seconds==this.seconds}; +goog.date.Interval.prototype.clone=function(){return new goog.date.Interval(this.years,this.months,this.days,this.hours,this.minutes,this.seconds)};goog.date.Interval.YEARS="y";goog.date.Interval.MONTHS="m";goog.date.Interval.DAYS="d";goog.date.Interval.HOURS="h";goog.date.Interval.MINUTES="n";goog.date.Interval.SECONDS="s";goog.date.Interval.prototype.isZero=function(){return 0==this.years&&0==this.months&&0==this.days&&0==this.hours&&0==this.minutes&&0==this.seconds}; +goog.date.Interval.prototype.getInverse=function(){return this.times(-1)};goog.date.Interval.prototype.times=function(a){return new goog.date.Interval(this.years*a,this.months*a,this.days*a,this.hours*a,this.minutes*a,this.seconds*a)};goog.date.Interval.prototype.getTotalSeconds=function(){goog.asserts.assert(0==this.years&&0==this.months);return 60*(60*(24*this.days+this.hours)+this.minutes)+this.seconds}; +goog.date.Interval.prototype.add=function(a){this.years+=a.years;this.months+=a.months;this.days+=a.days;this.hours+=a.hours;this.minutes+=a.minutes;this.seconds+=a.seconds}; +goog.date.Date=function(a,b,c){"number"===typeof a?(this.date=this.buildDate_(a,b||0,c||1),this.maybeFixDst_(c||1)):goog.isObject(a)?(this.date=this.buildDate_(a.getFullYear(),a.getMonth(),a.getDate()),this.maybeFixDst_(a.getDate())):(this.date=new Date(goog.now()),a=this.date.getDate(),this.date.setHours(0),this.date.setMinutes(0),this.date.setSeconds(0),this.date.setMilliseconds(0),this.maybeFixDst_(a))}; +goog.date.Date.prototype.buildDate_=function(a,b,c){b=new Date(a,b,c);0<=a&&100>a&&b.setFullYear(b.getFullYear()-1900);return b};goog.date.Date.prototype.firstDayOfWeek_=goog.i18n.DateTimeSymbols.FIRSTDAYOFWEEK;goog.date.Date.prototype.firstWeekCutOffDay_=goog.i18n.DateTimeSymbols.FIRSTWEEKCUTOFFDAY;goog.date.Date.prototype.clone=function(){var a=new goog.date.Date(this.date);a.firstDayOfWeek_=this.firstDayOfWeek_;a.firstWeekCutOffDay_=this.firstWeekCutOffDay_;return a}; +goog.date.Date.prototype.getFullYear=function(){return this.date.getFullYear()};goog.date.Date.prototype.getYear=function(){return this.getFullYear()};goog.date.Date.prototype.getMonth=function(){return this.date.getMonth()};goog.date.Date.prototype.getDate=function(){return this.date.getDate()};goog.date.Date.prototype.getTime=function(){return this.date.getTime()};goog.date.Date.prototype.getDay=function(){return this.date.getDay()}; +goog.date.Date.prototype.getIsoWeekday=function(){return(this.getDay()+6)%7};goog.date.Date.prototype.getWeekday=function(){return(this.getIsoWeekday()-this.firstDayOfWeek_+7)%7};goog.date.Date.prototype.getUTCFullYear=function(){return this.date.getUTCFullYear()};goog.date.Date.prototype.getUTCMonth=function(){return this.date.getUTCMonth()};goog.date.Date.prototype.getUTCDate=function(){return this.date.getUTCDate()};goog.date.Date.prototype.getUTCDay=function(){return this.date.getDay()}; +goog.date.Date.prototype.getUTCHours=function(){return this.date.getUTCHours()};goog.date.Date.prototype.getUTCMinutes=function(){return this.date.getUTCMinutes()};goog.date.Date.prototype.getUTCIsoWeekday=function(){return(this.date.getUTCDay()+6)%7};goog.date.Date.prototype.getUTCWeekday=function(){return(this.getUTCIsoWeekday()-this.firstDayOfWeek_+7)%7};goog.date.Date.prototype.getFirstDayOfWeek=function(){return this.firstDayOfWeek_};goog.date.Date.prototype.getFirstWeekCutOffDay=function(){return this.firstWeekCutOffDay_}; +goog.date.Date.prototype.getNumberOfDaysInMonth=function(){return goog.date.getNumberOfDaysInMonth(this.getFullYear(),this.getMonth())};goog.date.Date.prototype.getWeekNumber=function(){return goog.date.getWeekNumber(this.getFullYear(),this.getMonth(),this.getDate(),this.firstWeekCutOffDay_,this.firstDayOfWeek_)};goog.date.Date.prototype.getYearOfWeek=function(){return goog.date.getYearOfWeek(this.getFullYear(),this.getMonth(),this.getDate(),this.firstWeekCutOffDay_,this.firstDayOfWeek_)}; +goog.date.Date.prototype.getDayOfYear=function(){for(var a=this.getDate(),b=this.getFullYear(),c=this.getMonth()-1;0<=c;c--)a+=goog.date.getNumberOfDaysInMonth(b,c);return a};goog.date.Date.prototype.getTimezoneOffset=function(){return this.date.getTimezoneOffset()};goog.date.Date.prototype.getTimezoneOffsetString=function(){var a=this.getTimezoneOffset();if(0==a)a="Z";else{var b=Math.abs(a)/60,c=Math.floor(b);b=60*(b-c);a=(0b&&(b+=12);var d=goog.date.getNumberOfDaysInMonth(c,b);d=Math.min(d,this.getDate());this.setDate(1);this.setFullYear(c);this.setMonth(b);this.setDate(d)}a.days&&(b=new Date(this.getYear(),this.getMonth(),this.getDate(),12),a=new Date(b.getTime()+864E5*a.days),this.setDate(1),this.setFullYear(a.getFullYear()),this.setMonth(a.getMonth()),this.setDate(a.getDate()), +this.maybeFixDst_(a.getDate()))};goog.date.Date.prototype.toIsoString=function(a,b){return[this.getFullYear(),goog.string.padNumber(this.getMonth()+1,2),goog.string.padNumber(this.getDate(),2)].join(a?"-":"")+(b?this.getTimezoneOffsetString():"")};goog.date.Date.prototype.toUTCIsoString=function(a,b){return[this.getUTCFullYear(),goog.string.padNumber(this.getUTCMonth()+1,2),goog.string.padNumber(this.getUTCDate(),2)].join(a?"-":"")+(b?"Z":"")}; +goog.date.Date.prototype.equals=function(a){return!(!a||this.getYear()!=a.getYear()||this.getMonth()!=a.getMonth()||this.getDate()!=a.getDate())};goog.date.Date.prototype.toString=function(){return this.toIsoString()};goog.date.Date.prototype.maybeFixDst_=function(a){this.getDate()!=a&&(a=this.getDate()=e:null,p=cljs.core.truth_(m)?cljs.core.truth_(e)?cljs.core.truth_(n)?cljs_time.internal.core.year_corrected_dim.call(null,m,e):n:e:cljs.core.truth_(e)?cljs.core.truth_(n)?cljs_time.internal.core.corrected_dim.call(null,e):n:e,q=cljs.core.truth_(f)?cljs.core.truth_(p)?1<=f&&f<=p:1<=f&&31>=f:null,r=cljs.core.truth_(l)?0<=l&&23>=l:null,t=cljs.core.truth_(b)?0<=b&&59>=b:null,u=cljs.core.truth_(h)? +0<=h&&60>=h:null,v=cljs.core.truth_(c)?0<=c&&999>=c:null,w=cljs.core.truth_(k)?1<=k&&53>=k:null,x=cljs.core.truth_(d)?1<=d&&7>=d:null;if(cljs.core.every_QMARK_.call(null,cljs.core.true_QMARK_,cljs.core.remove.call(null,cljs.core.nil_QMARK_,new cljs.core.PersistentVector(null,8,5,cljs.core.PersistentVector.EMPTY_NODE,[n,q,r,t,u,v,w,x],null)))){if(cljs.core.not.call(null,function(){var a=cljs.core.truth_(m)?m:cljs.core.truth_(e)?e:f;return cljs.core.truth_(a)?cljs.core.truth_(g)?g:cljs.core.truth_(k)? +k:d:a}()))return a;throw cljs.core.ex_info.call(null,"Mixing year, month, day and week-year week-number fields",new cljs.core.PersistentArrayMap(null,3,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"invalid-date","invalid-date",2030506573),new cljs.core.Keyword(null,"date","date",-1463434462),a,new cljs.core.Keyword(null,"errors","errors",-908790718),cljs.core.PersistentArrayMap.EMPTY],null));}throw cljs.core.ex_info.call(null,"Date is not valid",new cljs.core.PersistentArrayMap(null, +3,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"invalid-date","invalid-date",2030506573),new cljs.core.Keyword(null,"date","date",-1463434462),a,new cljs.core.Keyword(null,"errors","errors",-908790718),function(){var a=cljs.core.PersistentArrayMap.EMPTY;a=!1===n?cljs.core.assoc.call(null,a,new cljs.core.Keyword(null,"months","months",-45571637),e):a;a=!1===q?cljs.core.assoc.call(null,a,new cljs.core.Keyword(null,"days","days",-1394072564),f):a;a=!1===r?cljs.core.assoc.call(null, +a,new cljs.core.Keyword(null,"hours","hours",58380855),l):a;a=!1===t?cljs.core.assoc.call(null,a,new cljs.core.Keyword(null,"minutes","minutes",1319166394),b):a;a=!1===u?cljs.core.assoc.call(null,a,new cljs.core.Keyword(null,"seconds","seconds",-445266194),h):a;a=!1===v?cljs.core.assoc.call(null,a,new cljs.core.Keyword(null,"millis","millis",-1338288387),c):a;a=!1===w?cljs.core.assoc.call(null,a,new cljs.core.Keyword(null,"weekyear-week","weekyear-week",795291571),k):a;return!1===x?cljs.core.assoc.call(null, +a,new cljs.core.Keyword(null,"day-of-week","day-of-week",1639326729),d):a}()],null));};cljs_time.internal.core.index_of=function(a,b){return cljs.core.first.call(null,cljs.core.keep_indexed.call(null,function(a,d){return cljs.core.truth_(cljs_time.internal.core._EQ_.call(null,d,b))?a:null},a))}; +cljs_time.internal.core.format=function(a){for(var b=[],c=arguments.length,d=0;;)if(d=a?["0",cljs.core.str.cljs$core$IFn$_invoke$arity$1(a)].join(""):cljs.core.str.cljs$core$IFn$_invoke$arity$1(a)};cljs_time.internal.core.zero_pad.cljs$core$IFn$_invoke$arity$2=function(a,b){return 1>b?cljs.core.str.cljs$core$IFn$_invoke$arity$1(a):[clojure.string.join.call(null,cljs.core.take.call(null,b-cljs.core.str.cljs$core$IFn$_invoke$arity$1(a).length,cljs.core.repeat.call(null,"0"))),cljs.core.str.cljs$core$IFn$_invoke$arity$1(a)].join("")}; +cljs_time.internal.core.zero_pad.cljs$lang$maxFixedArity=2; +cljs_time.internal.core.multiplied_by=function(a,b){var c=function(a){return cljs.core.truth_(a)?a*b:null};return cljs.core.update_in.call(null,cljs.core.update_in.call(null,cljs.core.update_in.call(null,cljs.core.update_in.call(null,cljs.core.update_in.call(null,cljs.core.update_in.call(null,cljs.core.update_in.call(null,cljs.core.update_in.call(null,a,new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"millis","millis",-1338288387)],null),c), +new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"seconds","seconds",-445266194)],null),c),new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"minutes","minutes",1319166394)],null),c),new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"hours","hours",58380855)],null),c),new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE, +[new cljs.core.Keyword(null,"days","days",-1394072564)],null),c),new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"weeks","weeks",1844596125)],null),c),new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"months","months",-45571637)],null),c),new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"years","years",-1298579689)],null),c)}; +cljs_time.internal.core.get_week_year=function(a,b,c){var d=cljs_time.internal.core._EQ_.call(null,b,0),e=cljs_time.internal.core._EQ_.call(null,b,11);b=goog.date.getWeekNumber(a,b,c);return cljs.core.truth_(cljs.core.truth_(d)?52<=b:d)?a-1:cljs.core.truth_(cljs.core.truth_(e)?cljs_time.internal.core._EQ_.call(null,b,1):e)?a+1:a};goog.i18n.ordinalRules={};goog.i18n.ordinalRules.Keyword={ZERO:"zero",ONE:"one",TWO:"two",FEW:"few",MANY:"many",OTHER:"other"};goog.i18n.ordinalRules.defaultSelect_=function(a,b){return goog.i18n.ordinalRules.Keyword.OTHER};goog.i18n.ordinalRules.decimals_=function(a){a+="";const b=a.indexOf(".");return-1==b?0:a.length-b-1};goog.i18n.ordinalRules.get_vf_=function(a,b){b=void 0===b?Math.min(goog.i18n.ordinalRules.decimals_(a),3):b;const c=Math.pow(10,b);return{v:b,f:(a*c|0)%c}}; +goog.i18n.ordinalRules.get_wt_=function(a,b){if(0===b)return{w:0,t:0};for(;0===b%10;)b/=10,a--;return{w:a,t:b}};goog.i18n.ordinalRules.cySelect_=function(a,b){return 0==a||7==a||8==a||9==a?goog.i18n.ordinalRules.Keyword.ZERO:1==a?goog.i18n.ordinalRules.Keyword.ONE:2==a?goog.i18n.ordinalRules.Keyword.TWO:3==a||4==a?goog.i18n.ordinalRules.Keyword.FEW:5==a||6==a?goog.i18n.ordinalRules.Keyword.MANY:goog.i18n.ordinalRules.Keyword.OTHER}; +goog.i18n.ordinalRules.enSelect_=function(a,b){return 1==a%10&&11!=a%100?goog.i18n.ordinalRules.Keyword.ONE:2==a%10&&12!=a%100?goog.i18n.ordinalRules.Keyword.TWO:3==a%10&&13!=a%100?goog.i18n.ordinalRules.Keyword.FEW:goog.i18n.ordinalRules.Keyword.OTHER};goog.i18n.ordinalRules.ukSelect_=function(a,b){return 3==a%10&&13!=a%100?goog.i18n.ordinalRules.Keyword.FEW:goog.i18n.ordinalRules.Keyword.OTHER}; +goog.i18n.ordinalRules.itSelect_=function(a,b){return 11==a||8==a||80==a||800==a?goog.i18n.ordinalRules.Keyword.MANY:goog.i18n.ordinalRules.Keyword.OTHER};goog.i18n.ordinalRules.neSelect_=function(a,b){return 1<=a&&4>=a?goog.i18n.ordinalRules.Keyword.ONE:goog.i18n.ordinalRules.Keyword.OTHER}; +goog.i18n.ordinalRules.orSelect_=function(a,b){return 1==a||5==a||7<=a&&9>=a?goog.i18n.ordinalRules.Keyword.ONE:2==a||3==a?goog.i18n.ordinalRules.Keyword.TWO:4==a?goog.i18n.ordinalRules.Keyword.FEW:6==a?goog.i18n.ordinalRules.Keyword.MANY:goog.i18n.ordinalRules.Keyword.OTHER};goog.i18n.ordinalRules.beSelect_=function(a,b){return 2!=a%10&&3!=a%10||12==a%100||13==a%100?goog.i18n.ordinalRules.Keyword.OTHER:goog.i18n.ordinalRules.Keyword.FEW}; +goog.i18n.ordinalRules.azSelect_=function(a,b){a|=0;return 1==a%10||2==a%10||5==a%10||7==a%10||8==a%10||20==a%100||50==a%100||70==a%100||80==a%100?goog.i18n.ordinalRules.Keyword.ONE:3==a%10||4==a%10||100==a%1E3||200==a%1E3||300==a%1E3||400==a%1E3||500==a%1E3||600==a%1E3||700==a%1E3||800==a%1E3||900==a%1E3?goog.i18n.ordinalRules.Keyword.FEW:0==a||6==a%10||40==a%100||60==a%100||90==a%100?goog.i18n.ordinalRules.Keyword.MANY:goog.i18n.ordinalRules.Keyword.OTHER}; +goog.i18n.ordinalRules.kaSelect_=function(a,b){a|=0;return 1==a?goog.i18n.ordinalRules.Keyword.ONE:0==a||2<=a%100&&20>=a%100||40==a%100||60==a%100||80==a%100?goog.i18n.ordinalRules.Keyword.MANY:goog.i18n.ordinalRules.Keyword.OTHER};goog.i18n.ordinalRules.mrSelect_=function(a,b){return 1==a?goog.i18n.ordinalRules.Keyword.ONE:2==a||3==a?goog.i18n.ordinalRules.Keyword.TWO:4==a?goog.i18n.ordinalRules.Keyword.FEW:goog.i18n.ordinalRules.Keyword.OTHER}; +goog.i18n.ordinalRules.svSelect_=function(a,b){return 1!=a%10&&2!=a%10||11==a%100||12==a%100?goog.i18n.ordinalRules.Keyword.OTHER:goog.i18n.ordinalRules.Keyword.ONE};goog.i18n.ordinalRules.kkSelect_=function(a,b){return 6==a%10||9==a%10||0==a%10&&0!=a?goog.i18n.ordinalRules.Keyword.MANY:goog.i18n.ordinalRules.Keyword.OTHER}; +goog.i18n.ordinalRules.mkSelect_=function(a,b){a|=0;return 1==a%10&&11!=a%100?goog.i18n.ordinalRules.Keyword.ONE:2==a%10&&12!=a%100?goog.i18n.ordinalRules.Keyword.TWO:7!=a%10&&8!=a%10||17==a%100||18==a%100?goog.i18n.ordinalRules.Keyword.OTHER:goog.i18n.ordinalRules.Keyword.MANY};goog.i18n.ordinalRules.huSelect_=function(a,b){return 1==a||5==a?goog.i18n.ordinalRules.Keyword.ONE:goog.i18n.ordinalRules.Keyword.OTHER}; +goog.i18n.ordinalRules.frSelect_=function(a,b){return 1==a?goog.i18n.ordinalRules.Keyword.ONE:goog.i18n.ordinalRules.Keyword.OTHER};goog.i18n.ordinalRules.sqSelect_=function(a,b){return 1==a?goog.i18n.ordinalRules.Keyword.ONE:4==a%10&&14!=a%100?goog.i18n.ordinalRules.Keyword.MANY:goog.i18n.ordinalRules.Keyword.OTHER};goog.i18n.ordinalRules.caSelect_=function(a,b){return 1==a||3==a?goog.i18n.ordinalRules.Keyword.ONE:2==a?goog.i18n.ordinalRules.Keyword.TWO:4==a?goog.i18n.ordinalRules.Keyword.FEW:goog.i18n.ordinalRules.Keyword.OTHER}; +goog.i18n.ordinalRules.guSelect_=function(a,b){return 1==a?goog.i18n.ordinalRules.Keyword.ONE:2==a||3==a?goog.i18n.ordinalRules.Keyword.TWO:4==a?goog.i18n.ordinalRules.Keyword.FEW:6==a?goog.i18n.ordinalRules.Keyword.MANY:goog.i18n.ordinalRules.Keyword.OTHER};goog.i18n.ordinalRules.tkSelect_=function(a,b){return 6==a%10||9==a%10||10==a?goog.i18n.ordinalRules.Keyword.FEW:goog.i18n.ordinalRules.Keyword.OTHER}; +goog.i18n.ordinalRules.gdSelect_=function(a,b){return 1==a||11==a?goog.i18n.ordinalRules.Keyword.ONE:2==a||12==a?goog.i18n.ordinalRules.Keyword.TWO:3==a||13==a?goog.i18n.ordinalRules.Keyword.FEW:goog.i18n.ordinalRules.Keyword.OTHER};goog.i18n.ordinalRules.kwSelect_=function(a,b){return 1<=a&&4>=a||1<=a%100&&4>=a%100||21<=a%100&&24>=a%100||41<=a%100&&44>=a%100||61<=a%100&&64>=a%100||81<=a%100&&84>=a%100?goog.i18n.ordinalRules.Keyword.ONE:5==a||5==a%100?goog.i18n.ordinalRules.Keyword.MANY:goog.i18n.ordinalRules.Keyword.OTHER}; +goog.i18n.ordinalRules.asSelect_=function(a,b){return 1==a||5==a||7==a||8==a||9==a||10==a?goog.i18n.ordinalRules.Keyword.ONE:2==a||3==a?goog.i18n.ordinalRules.Keyword.TWO:4==a?goog.i18n.ordinalRules.Keyword.FEW:6==a?goog.i18n.ordinalRules.Keyword.MANY:goog.i18n.ordinalRules.Keyword.OTHER};goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.enSelect_;"af"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"am"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +"ar"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);if("ar_DZ"==goog.LOCALE||"ar-DZ"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;if("ar_EG"==goog.LOCALE||"ar-EG"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;"az"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.azSelect_);"be"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.beSelect_); +"bg"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"bn"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.asSelect_);"br"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"bs"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"ca"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.caSelect_);"chr"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +"cs"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"cy"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.cySelect_);"da"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"de"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);if("de_AT"==goog.LOCALE||"de-AT"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_; +if("de_CH"==goog.LOCALE||"de-CH"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;"el"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"en"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.enSelect_);if("en_AU"==goog.LOCALE||"en-AU"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.enSelect_;if("en_CA"==goog.LOCALE||"en-CA"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.enSelect_; +if("en_GB"==goog.LOCALE||"en-GB"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.enSelect_;if("en_IE"==goog.LOCALE||"en-IE"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.enSelect_;if("en_IN"==goog.LOCALE||"en-IN"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.enSelect_;if("en_SG"==goog.LOCALE||"en-SG"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.enSelect_; +if("en_US"==goog.LOCALE||"en-US"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.enSelect_;if("en_ZA"==goog.LOCALE||"en-ZA"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.enSelect_;"es"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);if("es_419"==goog.LOCALE||"es-419"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;if("es_ES"==goog.LOCALE||"es-ES"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_; +if("es_MX"==goog.LOCALE||"es-MX"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;if("es_US"==goog.LOCALE||"es-US"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;"et"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"eu"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"fa"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +"fi"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"fil"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.frSelect_);"fr"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.frSelect_);if("fr_CA"==goog.LOCALE||"fr-CA"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.frSelect_;"ga"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.frSelect_); +"gl"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"gsw"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"gu"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.guSelect_);"haw"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"he"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"hi"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.guSelect_); +"hr"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"hu"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.huSelect_);"hy"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.frSelect_);"id"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"in"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"is"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +"it"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.itSelect_);"iw"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"ja"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"ka"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.kaSelect_);"kk"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.kkSelect_);"km"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +"kn"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"ko"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"ky"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"ln"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"lo"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.frSelect_);"lt"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +"lv"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"mk"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.mkSelect_);"ml"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"mn"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"mo"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.frSelect_);"mr"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.mrSelect_); +"ms"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.frSelect_);"mt"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"my"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"nb"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"ne"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.neSelect_);"nl"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +"no"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);if("no_NO"==goog.LOCALE||"no-NO"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;"or"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.orSelect_);"pa"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"pl"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +"pt"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);if("pt_BR"==goog.LOCALE||"pt-BR"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;if("pt_PT"==goog.LOCALE||"pt-PT"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;"ro"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.frSelect_);"ru"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +"sh"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"si"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"sk"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"sl"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"sq"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.sqSelect_);"sr"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +if("sr_Latn"==goog.LOCALE||"sr-Latn"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;"sv"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.svSelect_);"sw"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"ta"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"te"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +"th"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"tl"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.frSelect_);"tr"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"uk"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.ukSelect_);"ur"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);"uz"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_); +"vi"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.frSelect_);"zh"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);if("zh_CN"==goog.LOCALE||"zh-CN"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;if("zh_HK"==goog.LOCALE||"zh-HK"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_;if("zh_TW"==goog.LOCALE||"zh-TW"==goog.LOCALE)goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_; +"zu"==goog.LOCALE&&(goog.i18n.ordinalRules.select=goog.i18n.ordinalRules.defaultSelect_);cljs.tools={};cljs.tools.reader={};cljs.tools.reader.impl={};cljs.tools.reader.impl.utils={};cljs.tools.reader.impl.utils.char$=function(a){return null==a?null:cljs.core.char$.call(null,a)};cljs.tools.reader.impl.utils.ex_info_QMARK_=function(a){return a instanceof cljs.core.ExceptionInfo}; +cljs.tools.reader.impl.utils.ReaderConditional=function(a,b,c,d,e){this.splicing_QMARK_=a;this.form=b;this.__meta=c;this.__extmap=d;this.__hash=e;this.cljs$lang$protocol_mask$partition0$=2230716170;this.cljs$lang$protocol_mask$partition1$=139264};cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$ILookup$_lookup$arity$2=function(a,b){return this.cljs$core$ILookup$_lookup$arity$3(null,b,null)}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$ILookup$_lookup$arity$3=function(a,b,c){switch(b instanceof cljs.core.Keyword?b.fqn:null){case "splicing?":return this.splicing_QMARK_;case "form":return this.form;default:return cljs.core.get.call(null,this.__extmap,b,c)}}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$IKVReduce$_kv_reduce$arity$3=function(a,b,c){return cljs.core.reduce.call(null,function(a,c){var d=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return b.call(null,a,d,c)},c,this)}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$IPrintWithWriter$_pr_writer$arity$3=function(a,b,c){return cljs.core.pr_sequential_writer.call(null,b,function(a){return cljs.core.pr_sequential_writer.call(null,b,cljs.core.pr_writer,""," ","",c,a)},"#cljs.tools.reader.impl.utils.ReaderConditional{",", ","}",c,cljs.core.concat.call(null,new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE, +[new cljs.core.Keyword(null,"splicing?","splicing?",-428596366),this.splicing_QMARK_],null),new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"form","form",-1624062471),this.form],null)],null),this.__extmap))}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$IIterable$_iterator$arity$1=function(a){return new cljs.core.RecordIter(0,this,2,new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"splicing?","splicing?",-428596366),new cljs.core.Keyword(null,"form","form",-1624062471)],null),cljs.core.truth_(this.__extmap)?cljs.core._iterator.call(null,this.__extmap):cljs.core.nil_iter.call(null))}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$IMeta$_meta$arity$1=function(a){return this.__meta};cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$ICloneable$_clone$arity$1=function(a){return new cljs.tools.reader.impl.utils.ReaderConditional(this.splicing_QMARK_,this.form,this.__meta,this.__extmap,this.__hash)};cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$ICounted$_count$arity$1=function(a){return 2+cljs.core.count.call(null,this.__extmap)}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$IHash$_hash$arity$1=function(a){a=this.__hash;return null!=a?a:this.__hash=a=-209062840^cljs.core.hash_unordered_coll.call(null,this)}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$IEquiv$_equiv$arity$2=function(a,b){return null!=b&&this.constructor===b.constructor&&cljs.core._EQ_.call(null,this.splicing_QMARK_,b.splicing_QMARK_)&&cljs.core._EQ_.call(null,this.form,b.form)&&cljs.core._EQ_.call(null,this.__extmap,b.__extmap)}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$IMap$_dissoc$arity$2=function(a,b){return cljs.core.contains_QMARK_.call(null,new cljs.core.PersistentHashSet(null,new cljs.core.PersistentArrayMap(null,2,[new cljs.core.Keyword(null,"splicing?","splicing?",-428596366),null,new cljs.core.Keyword(null,"form","form",-1624062471),null],null),null),b)?cljs.core.dissoc.call(null,cljs.core._with_meta.call(null,cljs.core.into.call(null,cljs.core.PersistentArrayMap.EMPTY,this),this.__meta), +b):new cljs.tools.reader.impl.utils.ReaderConditional(this.splicing_QMARK_,this.form,this.__meta,cljs.core.not_empty.call(null,cljs.core.dissoc.call(null,this.__extmap,b)),null)}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$IAssociative$_assoc$arity$3=function(a,b,c){a=cljs.core.keyword_identical_QMARK_;return cljs.core.truth_(a.call(null,new cljs.core.Keyword(null,"splicing?","splicing?",-428596366),b))?new cljs.tools.reader.impl.utils.ReaderConditional(c,this.form,this.__meta,this.__extmap,null):cljs.core.truth_(a.call(null,new cljs.core.Keyword(null,"form","form",-1624062471),b))?new cljs.tools.reader.impl.utils.ReaderConditional(this.splicing_QMARK_, +c,this.__meta,this.__extmap,null):new cljs.tools.reader.impl.utils.ReaderConditional(this.splicing_QMARK_,this.form,this.__meta,cljs.core.assoc.call(null,this.__extmap,b,c),null)}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$ISeqable$_seq$arity$1=function(a){return cljs.core.seq.call(null,cljs.core.concat.call(null,new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.MapEntry(new cljs.core.Keyword(null,"splicing?","splicing?",-428596366),this.splicing_QMARK_,null),new cljs.core.MapEntry(new cljs.core.Keyword(null,"form","form",-1624062471),this.form,null)],null),this.__extmap))}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return new cljs.tools.reader.impl.utils.ReaderConditional(this.splicing_QMARK_,this.form,b,this.__extmap,this.__hash)}; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$ICollection$_conj$arity$2=function(a,b){return cljs.core.vector_QMARK_.call(null,b)?this.cljs$core$IAssociative$_assoc$arity$3(null,cljs.core._nth.call(null,b,0),cljs.core._nth.call(null,b,1)):cljs.core.reduce.call(null,cljs.core._conj,this,b)}; +cljs.tools.reader.impl.utils.ReaderConditional.getBasis=function(){return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"splicing?","splicing?",1211935161,null),new cljs.core.Symbol(null,"form","form",16469056,null)],null)};cljs.tools.reader.impl.utils.ReaderConditional.cljs$lang$type=!0; +cljs.tools.reader.impl.utils.ReaderConditional.cljs$lang$ctorPrSeq=function(a){return new cljs.core.List(null,"cljs.tools.reader.impl.utils/ReaderConditional",null,1,null)};cljs.tools.reader.impl.utils.ReaderConditional.cljs$lang$ctorPrWriter=function(a,b){return cljs.core._write.call(null,b,"cljs.tools.reader.impl.utils/ReaderConditional")};cljs.tools.reader.impl.utils.__GT_ReaderConditional=function(a,b){return new cljs.tools.reader.impl.utils.ReaderConditional(a,b,null,null,null)}; +cljs.tools.reader.impl.utils.map__GT_ReaderConditional=function(a){var b=cljs.core.dissoc.call(null,a,new cljs.core.Keyword(null,"splicing?","splicing?",-428596366),new cljs.core.Keyword(null,"form","form",-1624062471));b=cljs.core.record_QMARK_.call(null,a)?cljs.core.into.call(null,cljs.core.PersistentArrayMap.EMPTY,b):b;return new cljs.tools.reader.impl.utils.ReaderConditional((new cljs.core.Keyword(null,"splicing?","splicing?",-428596366)).cljs$core$IFn$_invoke$arity$1(a),(new cljs.core.Keyword(null, +"form","form",-1624062471)).cljs$core$IFn$_invoke$arity$1(a),null,cljs.core.not_empty.call(null,b),null)};cljs.tools.reader.impl.utils.reader_conditional_QMARK_=function(a){return a instanceof cljs.tools.reader.impl.utils.ReaderConditional};cljs.tools.reader.impl.utils.reader_conditional=function(a,b){return new cljs.tools.reader.impl.utils.ReaderConditional(b,a,null,null,null)};cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$IPrintWithWriter$=cljs.core.PROTOCOL_SENTINEL; +cljs.tools.reader.impl.utils.ReaderConditional.prototype.cljs$core$IPrintWithWriter$_pr_writer$arity$3=function(a,b,c){cljs.core._write.call(null,b,["#?",cljs.core.truth_(this.splicing_QMARK_)?"@":null].join(""));return cljs.core.pr_writer.call(null,this.form,b,c)};cljs.tools.reader.impl.utils.ws_rx=/[\s]/;cljs.tools.reader.impl.utils.whitespace_QMARK_=function(a){return null==a?null:","===a?!0:cljs.tools.reader.impl.utils.ws_rx.test(a)}; +cljs.tools.reader.impl.utils.numeric_QMARK_=function(a){return null==a?null:goog.string.isNumeric(a)};cljs.tools.reader.impl.utils.newline_QMARK_=function(a){return"\n"===a||"\n"===a||null==a}; +cljs.tools.reader.impl.utils.desugar_meta=function(a){return a instanceof cljs.core.Keyword?cljs.core.PersistentArrayMap.createAsIfByAssoc([a,!0]):a instanceof cljs.core.Symbol?new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"tag","tag",-1290361223),a],null):"string"===typeof a?new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"tag","tag",-1290361223),a],null):a};cljs.tools.reader.impl.utils.last_id=cljs.core.atom.call(null,0); +cljs.tools.reader.impl.utils.next_id=function(){return cljs.core.swap_BANG_.call(null,cljs.tools.reader.impl.utils.last_id,cljs.core.inc)}; +cljs.tools.reader.impl.utils.namespace_keys=function(a,b){return function e(b){return new cljs.core.LazySeq(null,function(){for(;;){var d=cljs.core.seq.call(null,b);if(d){if(cljs.core.chunked_seq_QMARK_.call(null,d)){var g=cljs.core.chunk_first.call(null,d),h=cljs.core.count.call(null,g),k=cljs.core.chunk_buffer.call(null,h);return function(){for(var b=0;;)if(bthis.s_pos?(a=this.s.charAt(this.s_pos),this.s_pos+=1,a):null};cljs.tools.reader.reader_types.StringReader.prototype.cljs$tools$reader$reader_types$Reader$peek_char$arity$1=function(a){return this.s_len>this.s_pos?this.s.charAt(this.s_pos):null}; +cljs.tools.reader.reader_types.StringReader.getBasis=function(){return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"s","s",-948495851,null),new cljs.core.Symbol(null,"s-len","s-len",1869978331,null),cljs.core.with_meta(new cljs.core.Symbol(null,"s-pos","s-pos",-540562492,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null))],null)}; +cljs.tools.reader.reader_types.StringReader.cljs$lang$type=!0;cljs.tools.reader.reader_types.StringReader.cljs$lang$ctorStr="cljs.tools.reader.reader-types/StringReader";cljs.tools.reader.reader_types.StringReader.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write.call(null,b,"cljs.tools.reader.reader-types/StringReader")};cljs.tools.reader.reader_types.__GT_StringReader=function(a,b,c){return new cljs.tools.reader.reader_types.StringReader(a,b,c)}; +cljs.tools.reader.reader_types.NodeReadableReader=function(a,b){this.readable=a;this.buf=b};cljs.tools.reader.reader_types.NodeReadableReader.prototype.cljs$tools$reader$reader_types$Reader$=cljs.core.PROTOCOL_SENTINEL; +cljs.tools.reader.reader_types.NodeReadableReader.prototype.cljs$tools$reader$reader_types$Reader$read_char$arity$1=function(a){if(cljs.core.truth_(this.buf))return a=this.buf[0],this.buf=null,cljs.tools.reader.impl.utils.char$.call(null,a);a=cljs.core.str.cljs$core$IFn$_invoke$arity$1(this.readable.read(1));return cljs.core.truth_(a)?cljs.tools.reader.impl.utils.char$.call(null,a):null}; +cljs.tools.reader.reader_types.NodeReadableReader.prototype.cljs$tools$reader$reader_types$Reader$peek_char$arity$1=function(a){cljs.core.truth_(this.buf)||(this.buf=cljs.core.str.cljs$core$IFn$_invoke$arity$1(this.readable.read(1)));return cljs.core.truth_(this.buf)?cljs.tools.reader.impl.utils.char$.call(null,this.buf[0]):null}; +cljs.tools.reader.reader_types.NodeReadableReader.getBasis=function(){return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"readable","readable",2113054478,null),cljs.core.with_meta(new cljs.core.Symbol(null,"buf","buf",1426618187,null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"mutable","mutable",875778266),!0],null))],null)};cljs.tools.reader.reader_types.NodeReadableReader.cljs$lang$type=!0; +cljs.tools.reader.reader_types.NodeReadableReader.cljs$lang$ctorStr="cljs.tools.reader.reader-types/NodeReadableReader";cljs.tools.reader.reader_types.NodeReadableReader.cljs$lang$ctorPrWriter=function(a,b,c){return cljs.core._write.call(null,b,"cljs.tools.reader.reader-types/NodeReadableReader")};cljs.tools.reader.reader_types.__GT_NodeReadableReader=function(a,b){return new cljs.tools.reader.reader_types.NodeReadableReader(a,b)}; +cljs.tools.reader.reader_types.PushbackReader=function(a,b,c,d){this.rdr=a;this.buf=b;this.buf_len=c;this.buf_pos=d};cljs.tools.reader.reader_types.PushbackReader.prototype.cljs$tools$reader$reader_types$Reader$=cljs.core.PROTOCOL_SENTINEL; +cljs.tools.reader.reader_types.PushbackReader.prototype.cljs$tools$reader$reader_types$Reader$read_char$arity$1=function(a){a=this.buf_posc?'..."':'"';return['"',cljs.core.str.cljs$core$IFn$_invoke$arity$1(b.substring(0,function(){var a=b.length;return cd?"...}":"}")}); +cljs.core._add_method.call(null,cljs.tools.reader.impl.inspect.inspect_STAR_,new cljs.core.Keyword(null,"set","set",304602554),function(a,b){return cljs.tools.reader.impl.inspect.inspect_STAR__col.call(null,a,b,"#{","}")});cljs.core._add_method.call(null,cljs.tools.reader.impl.inspect.inspect_STAR_,new cljs.core.Keyword(null,"vector","vector",1902966158),function(a,b){return cljs.tools.reader.impl.inspect.inspect_STAR__col.call(null,a,b,"[","]")}); +cljs.core._add_method.call(null,cljs.tools.reader.impl.inspect.inspect_STAR_,new cljs.core.Keyword(null,"default","default",-1987822328),function(a,b){return cljs.core.pr_str.call(null,cljs.core.type.call(null,b))}); +cljs.tools.reader.impl.inspect.inspect=function(a){switch(arguments.length){case 1:return cljs.tools.reader.impl.inspect.inspect.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.tools.reader.impl.inspect.inspect.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.tools.reader.impl.inspect.inspect.cljs$core$IFn$_invoke$arity$1=function(a){return cljs.tools.reader.impl.inspect.inspect_STAR_.call(null,!1,a)};cljs.tools.reader.impl.inspect.inspect.cljs$core$IFn$_invoke$arity$2=function(a,b){return cljs.tools.reader.impl.inspect.inspect_STAR_.call(null,a,b)};cljs.tools.reader.impl.inspect.inspect.cljs$lang$maxFixedArity=2;cljs.tools.reader.impl.errors={}; +cljs.tools.reader.impl.errors.ex_details=function(a,b){b=new cljs.core.PersistentArrayMap(null,2,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"reader-exception","reader-exception",-1938323098),new cljs.core.Keyword(null,"ex-kind","ex-kind",1581199296),b],null);return cljs.tools.reader.reader_types.indexing_reader_QMARK_.call(null,a)?cljs.core.assoc.call(null,b,new cljs.core.Keyword(null,"file","file",-1269645878),cljs.tools.reader.reader_types.get_file_name.call(null,a), +new cljs.core.Keyword(null,"line","line",212345235),cljs.tools.reader.reader_types.get_line_number.call(null,a),new cljs.core.Keyword(null,"col","col",-1959363084),cljs.tools.reader.reader_types.get_column_number.call(null,a)):b}; +cljs.tools.reader.impl.errors.throw_ex=function(a){for(var b=[],c=arguments.length,d=0;;)if(d=a?"+":"-");a=Math.abs(a);b.push(goog.string.padNumber(Math.floor(a/60)%100,2),":",goog.string.padNumber(a%60,2));return b.join("")};goog.i18n.TimeZone.composePosixTimeZoneID_=function(a){if(0==a)return"Etc/GMT";var b=["Etc/GMT",0>a?"-":"+"];a=Math.abs(a);b.push(Math.floor(a/60)%100);a%=60;0!=a&&b.push(":",goog.string.padNumber(a,2));return b.join("")}; +goog.i18n.TimeZone.composeUTCString_=function(a){if(0==a)return"UTC";var b=["UTC",0>a?"+":"-"];a=Math.abs(a);b.push(Math.floor(a/60)%100);a%=60;0!=a&&b.push(":",a);return b.join("")};goog.i18n.TimeZone.prototype.getTimeZoneData=function(){return{id:this.timeZoneId_,std_offset:-this.standardOffset_,names:goog.array.clone(this.tzNames_),names_ext:goog.object.clone(this.tzNamesExt_),transitions:goog.array.clone(this.transitions_)}}; +goog.i18n.TimeZone.prototype.getDaylightAdjustment=function(a){a=Date.UTC(a.getUTCFullYear(),a.getUTCMonth(),a.getUTCDate(),a.getUTCHours(),a.getUTCMinutes())/goog.i18n.TimeZone.MILLISECONDS_PER_HOUR_;for(var b=0;b=this.transitions_[b];)b+=2;return 0==b?0:this.transitions_[b-1]};goog.i18n.TimeZone.prototype.getGMTString=function(a){return goog.i18n.TimeZone.composeGMTString_(this.getOffset(a))};goog.i18n.TimeZone.prototype.getUTCString=function(a){return goog.i18n.TimeZone.composeUTCString_(this.getOffset(a))}; +goog.i18n.TimeZone.prototype.getLongName=function(a){return this.tzNames_[this.isDaylightTime(a)?goog.i18n.TimeZone.NameType.DLT_LONG_NAME:goog.i18n.TimeZone.NameType.STD_LONG_NAME]};goog.i18n.TimeZone.prototype.getOffset=function(a){return this.standardOffset_-this.getDaylightAdjustment(a)};goog.i18n.TimeZone.prototype.getRFCTimeZoneString=function(a){a=-this.getOffset(a);var b=[0>a?"-":"+"];a=Math.abs(a);b.push(goog.string.padNumber(Math.floor(a/60)%100,2),goog.string.padNumber(a%60,2));return b.join("")}; +goog.i18n.TimeZone.prototype.getShortName=function(a){return this.tzNames_[this.isDaylightTime(a)?goog.i18n.TimeZone.NameType.DLT_SHORT_NAME:goog.i18n.TimeZone.NameType.STD_SHORT_NAME]};goog.i18n.TimeZone.prototype.getTimeZoneId=function(){return this.timeZoneId_};goog.i18n.TimeZone.prototype.isDaylightTime=function(a){return 0=cljs.core.count.call(null,b)&&cljs.core.every_QMARK_.call(null,function(b){return cljs.core.contains_QMARK_.call(null,a,b)},b)};bigml.dixie.fields={};bigml.dixie.fields.core={};bigml.dixie.fields.core.summary=new cljs.core.Keyword(null,"summary","summary",380847952);bigml.dixie.fields.core.with_summary=function(a,b){return cljs.core.truth_(b)?cljs.core.assoc.call(null,a,new cljs.core.Keyword(null,"summary","summary",380847952),b):a};bigml.dixie.fields.core.categories=cljs.core.comp.call(null,new cljs.core.Keyword(null,"categories","categories",178386610),bigml.dixie.fields.core.summary); +bigml.dixie.fields.core.with_categories=function(a,b){return cljs.core.assoc_in.call(null,a,new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[bigml.dixie.fields.core.summary,new cljs.core.Keyword(null,"categories","categories",178386610)],null),b)};bigml.dixie.fields.core.missing_count=cljs.core.comp.call(null,new cljs.core.Keyword(null,"missing_count","missing_count",-7853302),bigml.dixie.fields.core.summary); +bigml.dixie.fields.core.population=cljs.core.comp.call(null,new cljs.core.Keyword(null,"population","population",-1209901867),bigml.dixie.fields.core.summary);bigml.dixie.fields.core.variance=cljs.core.comp.call(null,new cljs.core.Keyword(null,"variance","variance",1132010827),bigml.dixie.fields.core.summary); +bigml.dixie.fields.core.distribution=function(a){return cljs.core.some.call(null,(new cljs.core.Keyword(null,"summary","summary",380847952)).cljs$core$IFn$_invoke$arity$1(a),new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"categories","categories",178386610),new cljs.core.Keyword(null,"bins","bins",1670395210),new cljs.core.Keyword(null,"counts","counts",234305892)],null))}; +bigml.dixie.fields.core.make_descriptor=function(a){for(var b=[],c=arguments.length,d=0;;)if(d=a)return cljs.core.conj.call(null,b,bigml.dixie.fields.core.digits.call(null,a));var c=cljs.core.quot.call(null,a,16);b=cljs.core.conj.call(null,b,bigml.dixie.fields.core.digits.call(null,cljs.core.mod.call(null,a,16)));a=c}}; +bigml.dixie.fields.core.num__GT_id=function(a){a=bigml.dixie.fields.core.hex_digits.call(null,a,cljs.core.List.EMPTY);return cljs.core.apply.call(null,cljs.core.str,cljs.core.concat.call(null,cljs.core.repeat.call(null,6-cljs.core.count.call(null,a),0),a))};bigml.dixie.fields.core.parent_ids=new cljs.core.Keyword(null,"parent_ids","parent_ids",-1815054092);bigml.dixie.fields.core.parent=cljs.core.comp.call(null,cljs.core.first,bigml.dixie.fields.core.parent_ids); +bigml.dixie.fields.core.with_parent_ids=function(a,b){return cljs.core.empty_QMARK_.call(null,b)?cljs.core.dissoc.call(null,a,new cljs.core.Keyword(null,"parent_ids","parent_ids",-1815054092)):cljs.core.assoc.call(null,a,new cljs.core.Keyword(null,"parent_ids","parent_ids",-1815054092),b)};bigml.dixie.fields.core.child_ids=new cljs.core.Keyword(null,"child_ids","child_ids",1297307435); +bigml.dixie.fields.core.with_child_ids=function(a,b){return cljs.core.empty_QMARK_.call(null,b)?cljs.core.dissoc.call(null,a,new cljs.core.Keyword(null,"child_ids","child_ids",1297307435)):cljs.core.assoc.call(null,a,new cljs.core.Keyword(null,"child_ids","child_ids",1297307435),b)}; +bigml.dixie.fields.core.make_child=function(a){for(var b=[],c=arguments.length,d=0;;)if(de?function(){var a=-e-1;return function(b, +c){return cljs.core.nth.call(null,c,a,null)}}():function(a,b){return cljs.core.nth.call(null,a,e,null)};return cljs.core.truth_((new cljs.core.Keyword(null,"txt_","txt_",-1749668228)).cljs$core$IFn$_invoke$arity$1(a))?function(a,b){return c.call(null,cljs.core.nth.call(null,f.call(null,a,b),d,null))}:bigml.dixie.fields.core.regions_QMARK_.call(null,a)?function(a,b){a=cljs.core.nth.call(null,f.call(null,a,b),d,null);return cljs.core.truth_(a)?cljs.core.seq.call(null,"string"===typeof a?c.call(null, +a):a):null}:function(a,b){a=cljs.core.nth.call(null,f.call(null,a,b),d,null);return cljs.core.truth_(a)?"string"===typeof a?c.call(null,a):a:null}};bigml.dixie.flatline.fields.make_getter.cljs$lang$maxFixedArity=3; +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"field","field",-1302436500),function(a,b,c,d){bigml.dixie.flatline.errors.check_arity.call(null,b,1,3);d=bigml.dixie.flatline.fields.find_desc.call(null,b,d,cljs.core.second.call(null,b),!0);a=function(){var a=cljs.core.second.call(null,c);if(cljs.core.truth_(a)){var d=bigml.dixie.flatline.types.constant_fn_QMARK_.call(null,a);a=cljs.core.truth_(d)?a.call(null):d;a=cljs.core.integer_QMARK_.call(null,a)?a: +!1;return cljs.core.truth_(a)?a:bigml.dixie.flatline.utils.raise.call(null,new cljs.core.Keyword(null,"flatline-type-error","flatline-type-error",845929452),"Field shift must be an int in %s",cljs.core.pr_str.call(null,b))}return null}();var e=cljs.core.assoc.call(null,d,new cljs.core.Keyword(null,"shift","shift",997140064),a),f=bigml.dixie.flatline.fields.make_getter.call(null,e,a);d=function(){var a=cljs.core.second.call(null,cljs.core.next.call(null,c));if(cljs.core.truth_(a)){if(cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null, +a))){var b=a.call(null);return function(a,c){a=f.call(null,a,c);return cljs.core.truth_(a)?a:b}}return function(b,c){var d=f.call(null,b,c);return cljs.core.truth_(d)?d:a.call(null,b,c)}}return f}();var g=function(){var a=bigml.dixie.flatline.types.optype__GT_type.call(null,cljs.core.name.call(null,function(){var a=(new cljs.core.Keyword(null,"optype","optype",-1789210098)).cljs$core$IFn$_invoke$arity$1(e);return cljs.core.truth_(a)?a:""}()));return cljs.core.truth_(a)?a:bigml.dixie.flatline.utils.raise.call(null, +new cljs.core.Keyword(null,"flatline-field-not-found","flatline-field-not-found",-1248006758),"No type information for field %s",cljs.core.pr_str.call(null,cljs.core.second.call(null,b)))}();return bigml.dixie.flatline.types.with_type.call(null,d,g,null,e,a)}); +bigml.dixie.flatline.expand.define_syntax_STAR_.call(null,new cljs.core.Symbol(null,"f","f",43394975,null),cljs.core.list(new cljs.core.Symbol(null,"id","id",252129435,null),new cljs.core.Symbol(null,"...","...",-1926939749,null)),cljs.core.list(new cljs.core.Symbol(null,"field","field",338095027,null),new cljs.core.Symbol(null,"id","id",252129435,null),new cljs.core.Symbol(null,"...","...",-1926939749,null))); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"missing","missing",362507769),function(a,b,c,d){bigml.dixie.flatline.errors.check_arity.call(null,b,1,2);var e=bigml.dixie.flatline.eval.primop.call(null,new cljs.core.Keyword(null,"field","field",-1302436500),b,c,d);a=cljs.core.dissoc.call(null,cljs.core.meta.call(null,e),new cljs.core.Keyword(null,"fd","fd",-1524403E3));var f=cljs.core.conj.call(null,cljs.core.set.call(null,(new cljs.core.Keyword(null,"missing_tokens", +"missing_tokens",-1726684288)).cljs$core$IFn$_invoke$arity$1(bigml.dixie.flatline.types.fn_desc.call(null,e))),null,"");return bigml.dixie.flatline.types.as_bool.call(null,cljs.core.with_meta.call(null,function(a,b){return cljs.core.contains_QMARK_.call(null,f,e.call(null,a,b))},a))}); +bigml.dixie.flatline.fields.row_types=function(a){a=bigml.dixie.fields.core.linearize_field_descriptors.call(null,a);return cljs.core.map.call(null,cljs.core.comp.call(null,bigml.dixie.flatline.types.optype__GT_type,cljs.core.name,bigml.dixie.fields.core.optype),a)}; +bigml.dixie.flatline.fields.row_getter=function(a){a=bigml.dixie.fields.core.linearize_field_descriptors.call(null,a);return bigml.dixie.flatline.types.with_type.call(null,function(a,c){return cljs.core.first.call(null,a)},bigml.dixie.flatline.fields.row_types.call(null,a),null,a)}; +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"all","all",892129742),function(a,b,c,d){bigml.dixie.flatline.errors.check_arity.call(null,b,0);return bigml.dixie.flatline.fields.row_getter.call(null,d)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"all-but","all-but",683089552),function(a,b,c,d){cljs.core.every_QMARK_.call(null,bigml.dixie.flatline.types.constant_fn_QMARK_,c)||bigml.dixie.flatline.utils.raise.call(null,new cljs.core.Keyword(null,"flatline-type-error","flatline-type-error",845929452),"all-but's arguments must be constants in %s",b);var e=cljs.core.set.call(null,cljs.core.keep.call(null,function(a){return cljs.core.first.call(null,bigml.dixie.flatline.fields.find_desc_STAR_.call(null, +b,d,a.call(null),!0))},c));a=cljs.core.filter.call(null,function(a){return cljs.core.not.call(null,e.call(null,(new cljs.core.Keyword(null,"id","id",-1388402092)).cljs$core$IFn$_invoke$arity$1(a)))},bigml.dixie.fields.core.linearize_field_descriptors.call(null,d));c=bigml.dixie.fields.core.col_positions.call(null,cljs.core.map.call(null,new cljs.core.Keyword(null,"id","id",-1388402092),a),d);var f=cljs.core.apply.call(null,cljs.core.juxt,cljs.core.map.call(null,function(a){return function(b){return cljs.core.nth.call(null, +b,a,null)}},c));c=cljs.core.map.call(null,function(a){return bigml.dixie.flatline.types.optype__GT_type.call(null,cljs.core.name.call(null,(new cljs.core.Keyword(null,"optype","optype",-1789210098)).cljs$core$IFn$_invoke$arity$1(a)))},a);return bigml.dixie.flatline.types.with_type.call(null,function(a,b){return f.call(null,cljs.core.first.call(null,a))},c,null,a)}); +bigml.dixie.flatline.fields.check_kv=function(a,b,c){var d=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);d=cljs.core.first.call(null,bigml.dixie.flatline.fields.find_desc_STAR_.call(null,a,b,d,!0));b=b.call(null,d);b=cljs.core.truth_(b)?bigml.dixie.flatline.utils.field_value.call(null,b,c):null;cljs.core.truth_(b)||bigml.dixie.flatline.utils.raise.call(null,new cljs.core.Keyword(null,"flatline-type-error","flatline-type-error",845929452),"Invalid value %s for field %s in %s", +cljs.core.pr_str.call(null,c),d,a);return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[d,b],null)}; +bigml.dixie.flatline.fields.idvs__GT_posvs=function(a,b,c){var d=bigml.dixie.fields.core.mappify_field_descriptors.call(null,b);c=cljs.core.keep.call(null,function(b){return bigml.dixie.flatline.fields.check_kv.call(null,a,d,b)},c);b=bigml.dixie.fields.core.col_positions.call(null,cljs.core.map.call(null,cljs.core.first,c),b);return cljs.core.map.call(null,function(a,b){return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[a,cljs.core.second.call(null,b)],null)},b, +c)};bigml.dixie.flatline.fields.not_there=cljs.core.gensym.call(null); +bigml.dixie.flatline.fields.follow_keys=function(a,b){for(;;){if(cljs.core.empty_QMARK_.call(null,b))return a;if(!cljs.core.coll_QMARK_.call(null,a)||cljs.core.empty_QMARK_.call(null,a))return null;var c=cljs.core.first.call(null,b),d=cljs.core.get.call(null,a,cljs.core.integer_QMARK_.call(null,c)?c:cljs.core.keyword.call(null,c),bigml.dixie.flatline.fields.not_there);a=cljs.core._EQ_.call(null,bigml.dixie.flatline.fields.not_there,d)?cljs.core.sequential_QMARK_.call(null,a)&&!cljs.core.integer_QMARK_.call(null, +c)?cljs.core.some.call(null,function(a,b,c,d){return function(a){return cljs.core._EQ_.call(null,cljs.core.first.call(null,a),c)?cljs.core.second.call(null,a):null}}(a,b,c,d),a):null:d;b=cljs.core.rest.call(null,b)}}; +bigml.dixie.flatline.fields.find_prop=function(a,b,c,d,e){b=bigml.dixie.flatline.fields.find_desc.call(null,a,b,c,e);b=bigml.dixie.flatline.fields.follow_keys.call(null,b,d);return"number"===typeof b?new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[b,new cljs.core.Keyword(null,"numeric","numeric",-1495594714)],null):"string"===typeof b?new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[b,new cljs.core.Keyword(null,"string","string",-1989541586)], +null):!0===b||!1===b?new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[b,new cljs.core.Keyword(null,"boolean","boolean",-1919418404)],null):cljs.core.truth_(e)?bigml.dixie.flatline.utils.raise.call(null,new cljs.core.Keyword(null,"flatline-malformed-expression","flatline-malformed-expression",1136890174),"Invalid field path %s in %s",cljs.core.pr_str.call(null,cljs.core.cons.call(null,c,d)),a):null}; +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"all-with-defaults","all-with-defaults",1393857544),function(){var a=function(a,b,e,f,g){cljs.core.every_QMARK_.call(null,bigml.dixie.flatline.types.constant_fn_QMARK_,e)||bigml.dixie.flatline.utils.raise.call(null,new cljs.core.Keyword(null,"flatline-type-error","flatline-type-error",845929452),"all-with-default's arguments must be constant in %s",b);cljs.core.odd_QMARK_.call(null,cljs.core.count.call(null, +e))&&bigml.dixie.flatline.utils.raise.call(null,new cljs.core.Keyword(null,"flatline-invalid-arguments","flatline-invalid-arguments",1649316504),"all-with-default's takes an even number of arguments in %s",b);a=bigml.dixie.fields.core.linearize_field_descriptors.call(null,f);e=cljs.core.partition.call(null,2,cljs.core.map.call(null,function(a){return a.call(null)},e));var c=cljs.core.into.call(null,cljs.core.PersistentArrayMap.EMPTY,bigml.dixie.flatline.fields.idvs__GT_posvs.call(null,b,a,e)),d=cljs.core.range.call(null, +0,cljs.core.count.call(null,a));b=cljs.core.map.call(null,function(a){return bigml.dixie.flatline.types.optype__GT_type.call(null,cljs.core.name.call(null,(new cljs.core.Keyword(null,"optype","optype",-1789210098)).cljs$core$IFn$_invoke$arity$1(a)))},a);return bigml.dixie.flatline.types.with_type.call(null,function(a,b){a=cljs.core.nth.call(null,a,0,null);return cljs.core.mapv.call(null,function(a,b){return null==a?c.call(null,b):a},a,d)},b,null,a)},b=function(b,d,e,f,g){var c=null;if(4a?null:Math.sqrt(a)}.call(null,e.call(null)),new cljs.core.Keyword(null,"numeric","numeric",-1495594714)):bigml.dixie.flatline.types.with_type.call(null, +function(a,b){a=e.call(null,a,b);return cljs.core.truth_(a)?(a=0>a?null:Math.sqrt(a),a):null},new cljs.core.Keyword(null,"numeric","numeric",-1495594714))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"ln","ln",1974894440),function(a,b,c,d){bigml.dixie.flatline.types.check_numbers.call(null,1,b,c);var e=cljs.core.first.call(null,c);return cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null,e))?bigml.dixie.flatline.types.constant_fn.call(null,function(a){return 0=a?Math.acos(a):null}.call(null,e.call(null)),new cljs.core.Keyword(null,"numeric","numeric",-1495594714)):bigml.dixie.flatline.types.with_type.call(null, +function(a,b){a=e.call(null,a,b);return cljs.core.truth_(a)?(a=-1<=a&&1>=a?Math.acos(a):null,a):null},new cljs.core.Keyword(null,"numeric","numeric",-1495594714))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"asin","asin",1750305199),function(a,b,c,d){bigml.dixie.flatline.types.check_numbers.call(null,1,b,c);var e=cljs.core.first.call(null,c);return cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null,e))?bigml.dixie.flatline.types.constant_fn.call(null,function(a){return-1<=a&&1>=a?Math.asin(a):null}.call(null,e.call(null)),new cljs.core.Keyword(null,"numeric","numeric",-1495594714)):bigml.dixie.flatline.types.with_type.call(null, +function(a,b){a=e.call(null,a,b);return cljs.core.truth_(a)?(a=-1<=a&&1>=a?Math.asin(a):null,a):null},new cljs.core.Keyword(null,"numeric","numeric",-1495594714))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"atan","atan",1627885634),function(a,b,c,d){bigml.dixie.flatline.types.check_numbers.call(null,1,b,c);var e=cljs.core.first.call(null,c);return cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null,e))?bigml.dixie.flatline.types.constant_fn.call(null,Math.atan(e.call(null)),new cljs.core.Keyword(null,"numeric","numeric",-1495594714)):bigml.dixie.flatline.types.with_type.call(null,function(a, +b){a=e.call(null,a,b);return cljs.core.truth_(a)?Math.atan(a):null},new cljs.core.Keyword(null,"numeric","numeric",-1495594714))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"to-radians","to-radians",1089873499),function(a,b,c,d){bigml.dixie.flatline.types.check_numbers.call(null,1,b,c);var e=cljs.core.first.call(null,c);return cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null,e))?bigml.dixie.flatline.types.constant_fn.call(null,function(a){return Math.PI/180*a}.call(null,e.call(null)),new cljs.core.Keyword(null,"numeric","numeric",-1495594714)):bigml.dixie.flatline.types.with_type.call(null, +function(a,b){a=e.call(null,a,b);return cljs.core.truth_(a)?Math.PI/180*a:null},new cljs.core.Keyword(null,"numeric","numeric",-1495594714))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"to-degrees","to-degrees",-325601763),function(a,b,c,d){bigml.dixie.flatline.types.check_numbers.call(null,1,b,c);var e=cljs.core.first.call(null,c);return cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null,e))?bigml.dixie.flatline.types.constant_fn.call(null,function(a){return 180/Math.PI*a}.call(null,e.call(null)),new cljs.core.Keyword(null,"numeric","numeric",-1495594714)):bigml.dixie.flatline.types.with_type.call(null, +function(a,b){a=e.call(null,a,b);return cljs.core.truth_(a)?180/Math.PI*a:null},new cljs.core.Keyword(null,"numeric","numeric",-1495594714))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"cosh","cosh",691647627),function(a,b,c,d){bigml.dixie.flatline.types.check_numbers.call(null,1,b,c);var e=cljs.core.first.call(null,c);return cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null,e))?bigml.dixie.flatline.types.constant_fn.call(null,Math.cosh(e.call(null)),new cljs.core.Keyword(null,"numeric","numeric",-1495594714)):bigml.dixie.flatline.types.with_type.call(null,function(a, +b){a=e.call(null,a,b);return cljs.core.truth_(a)?Math.cosh(a):null},new cljs.core.Keyword(null,"numeric","numeric",-1495594714))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"sinh","sinh",1664012467),function(a,b,c,d){bigml.dixie.flatline.types.check_numbers.call(null,1,b,c);var e=cljs.core.first.call(null,c);return cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null,e))?bigml.dixie.flatline.types.constant_fn.call(null,Math.sinh(e.call(null)),new cljs.core.Keyword(null,"numeric","numeric",-1495594714)):bigml.dixie.flatline.types.with_type.call(null,function(a, +b){a=e.call(null,a,b);return cljs.core.truth_(a)?Math.sinh(a):null},new cljs.core.Keyword(null,"numeric","numeric",-1495594714))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tanh","tanh",-1160049730),function(a,b,c,d){bigml.dixie.flatline.types.check_numbers.call(null,1,b,c);var e=cljs.core.first.call(null,c);return cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null,e))?bigml.dixie.flatline.types.constant_fn.call(null,Math.tanh(e.call(null)),new cljs.core.Keyword(null,"numeric","numeric",-1495594714)):bigml.dixie.flatline.types.with_type.call(null,function(a, +b){a=e.call(null,a,b);return cljs.core.truth_(a)?Math.tanh(a):null},new cljs.core.Keyword(null,"numeric","numeric",-1495594714))});bigml.dixie.flatline.expand.define_syntax_STAR_.call(null,new cljs.core.Symbol(null,"square","square",-1842001092,null),cljs.core.list(new cljs.core.Symbol(null,"x","x",-555367584,null)),cljs.core.list(new cljs.core.Symbol(null,"*","*",345799209,null),new cljs.core.Symbol(null,"x","x",-555367584,null),new cljs.core.Symbol(null,"x","x",-555367584,null))); +bigml.dixie.flatline.expand.define_syntax_STAR_.call(null,new cljs.core.Symbol(null,"even?","even?",-1827825394,null),cljs.core.list(new cljs.core.Symbol(null,"x","x",-555367584,null)),cljs.core.list(new cljs.core.Symbol(null,"\x3d","\x3d",-1501502141,null),0,cljs.core.list(new cljs.core.Symbol(null,"mod","mod",1510044207,null),new cljs.core.Symbol(null,"x","x",-555367584,null),2))); +bigml.dixie.flatline.expand.define_syntax_STAR_.call(null,new cljs.core.Symbol(null,"odd?","odd?",-1458588199,null),cljs.core.list(new cljs.core.Symbol(null,"x","x",-555367584,null)),cljs.core.list(new cljs.core.Symbol(null,"\x3d","\x3d",-1501502141,null),1,cljs.core.list(new cljs.core.Symbol(null,"mod","mod",1510044207,null),new cljs.core.Symbol(null,"x","x",-555367584,null),2))); +bigml.dixie.flatline.expand.define_syntax_STAR_.call(null,new cljs.core.Symbol(null,"zero?","zero?",325758897,null),cljs.core.list(new cljs.core.Symbol(null,"x","x",-555367584,null)),cljs.core.list(new cljs.core.Symbol(null,"\x3d","\x3d",-1501502141,null),0,new cljs.core.Symbol(null,"x","x",-555367584,null))); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"rand","rand",908504774),function(a,b,c,d){bigml.dixie.flatline.types.check_numbers.call(null,0,b,null);return bigml.dixie.flatline.types.with_type.call(null,function(){var a=function(a){return cljs.core.rand.call(null)},b=function(b){var c=null;if(0=c?(c=Math.sqrt(c),c=-1<=c&&1>=c?Math.asin(c):null,cljs.core.truth_(c)?2*c:null):null};bigml.dixie.flatline.math.spherical_distance_deg=function(a){for(var b=[],c=arguments.length,d=0;;)if(d=cljs.core.first.call(null,b)?a:a+cljs.core.second.call(null,b)},0,b)/(new cljs.core.Keyword(null,"population","population",-1209901867)).cljs$core$IFn$_invoke$arity$2(a,1)}; +bigml.dixie.flatline.fuzzy_logic.check_field_types=function(a,b){return cljs.core.every_QMARK_.call(null,function(a){return bigml.dixie.flatline.types.numeric_fn_QMARK_.call(null,a)||bigml.dixie.flatline.types.string_fn_QMARK_.call(null,a)},new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[a,b],null))?null:bigml.dixie.flatline.utils.raise.call(null,new cljs.core.Keyword(null,"flatline-type-error","flatline-type-error",845929452),"Both arguments expected to have type numeric or string")}; +bigml.dixie.flatline.fuzzy_logic.check_logic_values=function(a,b){return cljs.core.truth_(cljs.core.truth_(a)?cljs.core.truth_(b)?0<=a&&1>=a&&0<=b&&1>=b:b:a)?new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[a,b],null):null}; +bigml.dixie.flatline.fuzzy_logic.check_norm_parameter=function(a){for(var b=[],c=arguments.length,d=0;;)if(dbigml.dixie.flatline.fuzzy_logic.out_of_range.call(null, +b)?bigml.dixie.flatline.fields.make_getter.call(null,a,0):bigml.dixie.flatline.utils.raise.call(null,new cljs.core.Keyword(null,"flatline-invalid-arguments","flatline-invalid-arguments",1649316504),"More than 20%% of the input field values are not between 0 and 1, try to normalize them")):bigml.dixie.flatline.types.numeric_fn_QMARK_.call(null,c)?c:null}; +bigml.dixie.flatline.fuzzy_logic.apply_norm=function(a){for(var b=[],c=arguments.length,d=0;;)if(da?0:a};bigml.dixie.flatline.fuzzy_logic.tnorm_drastic=function(a,b){return 1===a?b:1===b?a:0};bigml.dixie.flatline.fuzzy_logic.tnorm_nilpotent_min=function(a,b){return 1b?a:b}; +bigml.dixie.flatline.fuzzy_logic.tconorm_probabilistic_sum=function(a,b){return a+b-a*b};bigml.dixie.flatline.fuzzy_logic.tconorm_bounded_sum=function(a,b){a+=b;return 1>a?a:1};bigml.dixie.flatline.fuzzy_logic.tconorm_drastic=function(a,b){return 0===a?b:0===b?a:1};bigml.dixie.flatline.fuzzy_logic.tconorm_nilpotent_max=function(a){for(var b=[],c=arguments.length,d=0;;)if(db+a?b>a?b:a:1};bigml.dixie.flatline.fuzzy_logic.tconorm_nilpotent_max.cljs$lang$maxFixedArity=0;bigml.dixie.flatline.fuzzy_logic.tconorm_nilpotent_max.cljs$lang$applyTo=function(a){return this.cljs$core$IFn$_invoke$arity$variadic(cljs.core.seq.call(null,a))}; +bigml.dixie.flatline.fuzzy_logic.tconorm_einstein_sum=function(a,b){return(a+b)/(1+a*b)}; +bigml.dixie.flatline.fuzzy_logic.tnorm_schweizer_sklar=function(a){bigml.dixie.flatline.fuzzy_logic.check_norm_parameter.call(null,a,"schweizer-sklar");return 0>a?function(b,c){return bigml.dixie.flatline.utils.pow.call(null,bigml.dixie.flatline.utils.pow.call(null,b,a)+bigml.dixie.flatline.utils.pow.call(null,c,a)+-1,1/a)}:0===a?bigml.dixie.flatline.fuzzy_logic.tnorm_product:0d?0:d}(),1/a)}:null};bigml.dixie.flatline.fuzzy_logic.tnorm_hamacher=function(a){bigml.dixie.flatline.fuzzy_logic.check_norm_parameter.call(null,a,"hamacher",0);var b=function(b,d){return b*d/(a+(1-a)*(b+(d-b*d)))};return 0===a?function(a,d){return a===d&&0===d?0:b.call(null,a,d)}:function(a,d){return b.call(null,a,d)}}; +bigml.dixie.flatline.fuzzy_logic.tnorm_frank=function(a){bigml.dixie.flatline.fuzzy_logic.check_norm_parameter.call(null,a,"frank",0);return 0===a?bigml.dixie.flatline.fuzzy_logic.tnorm_min:1===a?bigml.dixie.flatline.fuzzy_logic.tnorm_product:function(b,c){return bigml.dixie.flatline.utils.log.call(null,a,1+(bigml.dixie.flatline.utils.pow.call(null,a,b)-1)*(bigml.dixie.flatline.utils.pow.call(null,a,c)-1)/(a-1))}}; +bigml.dixie.flatline.fuzzy_logic.tnorm_yager=function(a){bigml.dixie.flatline.fuzzy_logic.check_norm_parameter.call(null,a,"yager",0);return 0===a?bigml.dixie.flatline.fuzzy_logic.tnorm_drastic:function(b,c){b=1-bigml.dixie.flatline.utils.pow.call(null,bigml.dixie.flatline.utils.pow.call(null,1-b,a)+bigml.dixie.flatline.utils.pow.call(null,1-c,a),1/a);return 0>b?0:b}}; +bigml.dixie.flatline.fuzzy_logic.tnorm_aczel_alsina=function(a){bigml.dixie.flatline.fuzzy_logic.check_norm_parameter.call(null,a,"aczel-alsina",0);return 0===a?bigml.dixie.flatline.fuzzy_logic.tnorm_drastic:function(b,c){return bigml.dixie.flatline.utils.exp.call(null,-1*bigml.dixie.flatline.utils.pow.call(null,bigml.dixie.flatline.utils.pow.call(null,bigml.dixie.flatline.utils.abs.call(null,bigml.dixie.flatline.utils.log.call(null,10,b)),a)+bigml.dixie.flatline.utils.pow.call(null,bigml.dixie.flatline.utils.abs.call(null, +bigml.dixie.flatline.utils.log.call(null,10,c)),a),1/a))}};bigml.dixie.flatline.fuzzy_logic.tnorm_dombi=function(a){bigml.dixie.flatline.fuzzy_logic.check_norm_parameter.call(null,a,"dombi",0);return 0===a?bigml.dixie.flatline.fuzzy_logic.tnorm_drastic:function(b,c){return 0===b||0===c?0:1/(1+bigml.dixie.flatline.utils.pow.call(null,bigml.dixie.flatline.utils.pow.call(null,(1-b)/b,a)+bigml.dixie.flatline.utils.pow.call(null,(1-c)/c,a),1/a))}}; +bigml.dixie.flatline.fuzzy_logic.tnorm_sugeno_weber=function(a){bigml.dixie.flatline.fuzzy_logic.check_norm_parameter.call(null,a,"sugeno-weber",-1);return cljs.core._EQ_.call(null,a,-1)?bigml.dixie.flatline.fuzzy_logic.tnorm_drastic:function(b,c){b=(b+c+-1+a*b*c)/(1+a);return 0>b?0:b}}; +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-min","tnorm-min",-908787833),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,a,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_min)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-product","tnorm-product",-1276477234),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,a,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_product)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-lukasiewicz","tnorm-lukasiewicz",-625721216),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,a,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_lukasiewicz)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-drastic","tnorm-drastic",1507892347),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,a,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_drastic)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-nilpotent-min","tnorm-nilpotent-min",-195877317),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,a,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_nilpotent_min)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tconorm-max","tconorm-max",-730874243),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,a,c,d,bigml.dixie.flatline.fuzzy_logic.tconorm_max)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tconorm-probabilistic-sum","tconorm-probabilistic-sum",-878905728),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,a,c,d,bigml.dixie.flatline.fuzzy_logic.tconorm_probabilistic_sum)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tconorm-bounded-sum","tconorm-bounded-sum",-1654068829),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,a,c,d,bigml.dixie.flatline.fuzzy_logic.tconorm_bounded_sum)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tconorm-drastic","tconorm-drastic",-453939904),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,a,c,d,bigml.dixie.flatline.fuzzy_logic.tconorm_drastic)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tconorm-nilpotent-max","tconorm-nilpotent-max",383516123),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,a,c,d,bigml.dixie.flatline.fuzzy_logic.tconorm_nilpotent_max)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tconorm-einstein-sum","tconorm-einstein-sum",1566574325),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,a,c,d,bigml.dixie.flatline.fuzzy_logic.tconorm_einstein_sum)}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-schweizer-sklar","tnorm-schweizer-sklar",1972745550),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);var e=cljs.core.nth.call(null,c,1,null);c=cljs.core.nth.call(null,c,2,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,e,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_schweizer_sklar.call(null,a.call(null)),new cljs.core.Keyword(null,"parametric","parametric",-273569731))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-hamacher","tnorm-hamacher",-908773099),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);var e=cljs.core.nth.call(null,c,1,null);c=cljs.core.nth.call(null,c,2,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,e,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_hamacher.call(null,a.call(null)),new cljs.core.Keyword(null,"parametric","parametric",-273569731))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-frank","tnorm-frank",1093115450),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);var e=cljs.core.nth.call(null,c,1,null);c=cljs.core.nth.call(null,c,2,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,e,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_frank.call(null,a.call(null)),new cljs.core.Keyword(null,"parametric","parametric",-273569731))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-yager","tnorm-yager",949011974),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);var e=cljs.core.nth.call(null,c,1,null);c=cljs.core.nth.call(null,c,2,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,e,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_yager.call(null,a.call(null)),new cljs.core.Keyword(null,"parametric","parametric",-273569731))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-aczel-alsina","tnorm-aczel-alsina",354898993),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);var e=cljs.core.nth.call(null,c,1,null);c=cljs.core.nth.call(null,c,2,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,e,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_aczel_alsina.call(null,a.call(null)),new cljs.core.Keyword(null,"parametric","parametric",-273569731))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-dombi","tnorm-dombi",-310984956),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);var e=cljs.core.nth.call(null,c,1,null);c=cljs.core.nth.call(null,c,2,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,e,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_dombi.call(null,a.call(null)),new cljs.core.Keyword(null,"parametric","parametric",-273569731))}); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"tnorm-sugeno-weber","tnorm-sugeno-weber",587108434),function(a,b,c,d){a=cljs.core.nth.call(null,c,0,null);var e=cljs.core.nth.call(null,c,1,null);c=cljs.core.nth.call(null,c,2,null);return bigml.dixie.flatline.fuzzy_logic.apply_norm.call(null,b,e,c,d,bigml.dixie.flatline.fuzzy_logic.tnorm_sugeno_weber.call(null,a.call(null)),new cljs.core.Keyword(null,"parametric","parametric",-273569731))});goog.i18n.DateTimeFormat=function(a,b){goog.asserts.assert(void 0!==a,"Pattern must be defined");goog.asserts.assert(void 0!==b||void 0!==goog.i18n.DateTimeSymbols,"goog.i18n.DateTimeSymbols or explicit symbols must be defined");this.patternParts_=[];this.dateTimeSymbols_=b||goog.i18n.DateTimeSymbols;"number"==typeof a?this.applyStandardPattern_(a):this.applyPattern_(a)}; +goog.i18n.DateTimeFormat.Format={FULL_DATE:0,LONG_DATE:1,MEDIUM_DATE:2,SHORT_DATE:3,FULL_TIME:4,LONG_TIME:5,MEDIUM_TIME:6,SHORT_TIME:7,FULL_DATETIME:8,LONG_DATETIME:9,MEDIUM_DATETIME:10,SHORT_DATETIME:11};goog.i18n.DateTimeFormat.TOKENS_=[/^'(?:[^']|'')*('|$)/,/^(?:G+|y+|Y+|M+|k+|S+|E+|a+|h+|K+|H+|c+|L+|Q+|d+|m+|s+|v+|V+|w+|z+|Z+)/,/^[^'GyYMkSEahKHcLQdmsvVwzZ]+/];goog.i18n.DateTimeFormat.PartTypes_={QUOTED_STRING:0,FIELD:1,LITERAL:2}; +goog.i18n.DateTimeFormat.getHours_=function(a){return a.getHours?a.getHours():0}; +goog.i18n.DateTimeFormat.prototype.applyPattern_=function(a){for(goog.i18n.DateTimeFormat.removeRlmInPatterns_&&(a=a.replace(/\u200f/g,""));a;){for(var b=a,c=0;ca)var b=this.dateTimeSymbols_.DATEFORMATS[a];else if(8>a)b=this.dateTimeSymbols_.TIMEFORMATS[a-4];else if(12>a)b=this.dateTimeSymbols_.DATETIMEFORMATS[a-8],b=b.replace("{1}",this.dateTimeSymbols_.DATEFORMATS[a-8]),b=b.replace("{0}",this.dateTimeSymbols_.TIMEFORMATS[a-8]);else{this.applyStandardPattern_(goog.i18n.DateTimeFormat.Format.MEDIUM_DATETIME);return}this.applyPattern_(b)}; +goog.i18n.DateTimeFormat.prototype.localizeNumbers_=function(a){return goog.i18n.DateTimeFormat.localizeNumbers(a,this.dateTimeSymbols_)};goog.i18n.DateTimeFormat.enforceAsciiDigits_=!1;goog.i18n.DateTimeFormat.removeRlmInPatterns_=!1;goog.i18n.DateTimeFormat.setEnforceAsciiDigits=function(a){goog.i18n.DateTimeFormat.enforceAsciiDigits_=a;goog.i18n.DateTimeFormat.removeRlmInPatterns_=a};goog.i18n.DateTimeFormat.isEnforceAsciiDigits=function(){return goog.i18n.DateTimeFormat.enforceAsciiDigits_}; +goog.i18n.DateTimeFormat.localizeNumbers=function(a,b){a=String(a);b=b||goog.i18n.DateTimeSymbols;if(void 0===b.ZERODIGIT||goog.i18n.DateTimeFormat.enforceAsciiDigits_)return a;for(var c=[],d=0;d=e?String.fromCharCode(b.ZERODIGIT+e-48):a.charAt(d))}return c.join("")};goog.i18n.DateTimeFormat.prototype.formatEra_=function(a,b){b=0b&&(b=-b);2==a&&(b%=100);return this.localizeNumbers_(goog.string.padNumber(b,a))};goog.i18n.DateTimeFormat.prototype.formatYearOfWeek_=function(a,b){b=goog.date.getYearOfWeek(b.getFullYear(),b.getMonth(),b.getDate(),this.dateTimeSymbols_.FIRSTWEEKCUTOFFDAY,this.dateTimeSymbols_.FIRSTDAYOFWEEK);0>b&&(b=-b);2==a&&(b%=100);return this.localizeNumbers_(goog.string.padNumber(b,a))}; +goog.i18n.DateTimeFormat.prototype.formatMonth_=function(a,b){b=b.getMonth();switch(a){case 5:return this.dateTimeSymbols_.NARROWMONTHS[b];case 4:return this.dateTimeSymbols_.MONTHS[b];case 3:return this.dateTimeSymbols_.SHORTMONTHS[b];default:return this.localizeNumbers_(goog.string.padNumber(b+1,a))}}; +goog.i18n.DateTimeFormat.validateDateHasTime_=function(a){if(!(a.getHours&&a.getSeconds&&a.getMinutes))throw Error("The date to format has no time (probably a goog.date.Date). Use Date or goog.date.DateTime, or use a pattern without time fields.");};goog.i18n.DateTimeFormat.prototype.format24Hours_=function(a,b){goog.i18n.DateTimeFormat.validateDateHasTime_(b);b=goog.i18n.DateTimeFormat.getHours_(b)||24;return this.localizeNumbers_(goog.string.padNumber(b,a))}; +goog.i18n.DateTimeFormat.prototype.formatFractionalSeconds_=function(a,b){b=b.getMilliseconds()/1E3;return this.localizeNumbers_(b.toFixed(Math.min(3,a)).substr(2)+(3a?1:0]};goog.i18n.DateTimeFormat.prototype.format1To12Hours_=function(a,b){goog.i18n.DateTimeFormat.validateDateHasTime_(b);b=goog.i18n.DateTimeFormat.getHours_(b)%12||12;return this.localizeNumbers_(goog.string.padNumber(b,a))}; +goog.i18n.DateTimeFormat.prototype.format0To11Hours_=function(a,b){goog.i18n.DateTimeFormat.validateDateHasTime_(b);b=goog.i18n.DateTimeFormat.getHours_(b)%12;return this.localizeNumbers_(goog.string.padNumber(b,a))};goog.i18n.DateTimeFormat.prototype.format0To23Hours_=function(a,b){goog.i18n.DateTimeFormat.validateDateHasTime_(b);b=goog.i18n.DateTimeFormat.getHours_(b);return this.localizeNumbers_(goog.string.padNumber(b,a))}; +goog.i18n.DateTimeFormat.prototype.formatStandaloneDay_=function(a,b){b=b.getDay();switch(a){case 5:return this.dateTimeSymbols_.STANDALONENARROWWEEKDAYS[b];case 4:return this.dateTimeSymbols_.STANDALONEWEEKDAYS[b];case 3:return this.dateTimeSymbols_.STANDALONESHORTWEEKDAYS[b];default:return this.localizeNumbers_(goog.string.padNumber(b,1))}}; +goog.i18n.DateTimeFormat.prototype.formatStandaloneMonth_=function(a,b){b=b.getMonth();switch(a){case 5:return this.dateTimeSymbols_.STANDALONENARROWMONTHS[b];case 4:return this.dateTimeSymbols_.STANDALONEMONTHS[b];case 3:return this.dateTimeSymbols_.STANDALONESHORTMONTHS[b];default:return this.localizeNumbers_(goog.string.padNumber(b+1,a))}};goog.i18n.DateTimeFormat.prototype.formatQuarter_=function(a,b){b=Math.floor(b.getMonth()/3);return 4>a?this.dateTimeSymbols_.SHORTQUARTERS[b]:this.dateTimeSymbols_.QUARTERS[b]}; +goog.i18n.DateTimeFormat.prototype.formatDate_=function(a,b){return this.localizeNumbers_(goog.string.padNumber(b.getDate(),a))};goog.i18n.DateTimeFormat.prototype.formatMinutes_=function(a,b){goog.i18n.DateTimeFormat.validateDateHasTime_(b);return this.localizeNumbers_(goog.string.padNumber(b.getMinutes(),a))};goog.i18n.DateTimeFormat.prototype.formatSeconds_=function(a,b){goog.i18n.DateTimeFormat.validateDateHasTime_(b);return this.localizeNumbers_(goog.string.padNumber(b.getSeconds(),a))}; +goog.i18n.DateTimeFormat.prototype.formatWeekOfYear_=function(a,b){b=goog.date.getWeekNumber(b.getFullYear(),b.getMonth(),b.getDate(),this.dateTimeSymbols_.FIRSTWEEKCUTOFFDAY,this.dateTimeSymbols_.FIRSTDAYOFWEEK);return this.localizeNumbers_(goog.string.padNumber(b,a))};goog.i18n.DateTimeFormat.prototype.formatTimeZoneRFC_=function(a,b,c){c=c||goog.i18n.TimeZone.createTimeZone(b.getTimezoneOffset());return 4>a?c.getRFCTimeZoneString(b):this.localizeNumbers_(c.getGMTString(b))}; +goog.i18n.DateTimeFormat.prototype.formatTimeZone_=function(a,b,c){c=c||goog.i18n.TimeZone.createTimeZone(b.getTimezoneOffset());return 4>a?c.getShortName(b):c.getLongName(b)};goog.i18n.DateTimeFormat.prototype.formatTimeZoneId_=function(a,b){b=b||goog.i18n.TimeZone.createTimeZone(a.getTimezoneOffset());return b.getTimeZoneId()};goog.i18n.DateTimeFormat.prototype.formatTimeZoneLocationId_=function(a,b,c){c=c||goog.i18n.TimeZone.createTimeZone(b.getTimezoneOffset());return 2>=a?c.getTimeZoneId():c.getGenericLocation(b)}; +goog.i18n.DateTimeFormat.prototype.formatField_=function(a,b,c,d,e){var f=a.length;switch(a.charAt(0)){case "G":return this.formatEra_(f,c);case "y":return this.formatYear_(f,c);case "Y":return this.formatYearOfWeek_(f,c);case "M":return this.formatMonth_(f,c);case "k":return this.format24Hours_(f,d);case "S":return this.formatFractionalSeconds_(f,d);case "E":return this.formatDayOfWeek_(f,c);case "a":return this.formatAmPm_(f,d);case "h":return this.format1To12Hours_(f,d);case "K":return this.format0To11Hours_(f, +d);case "H":return this.format0To23Hours_(f,d);case "c":return this.formatStandaloneDay_(f,c);case "L":return this.formatStandaloneMonth_(f,c);case "Q":return this.formatQuarter_(f,c);case "d":return this.formatDate_(f,c);case "m":return this.formatMinutes_(f,d);case "s":return this.formatSeconds_(f,d);case "v":return this.formatTimeZoneId_(b,e);case "V":return this.formatTimeZoneLocationId_(f,b,e);case "w":return this.formatWeekOfYear_(f,d);case "z":return this.formatTimeZone_(f,b,e);case "Z":return this.formatTimeZoneRFC_(f, +b,e);default:return""}};goog.date.UtcDateTime=function(a,b,c,d,e,f,g){a="number"===typeof a?Date.UTC(a,b||0,c||1,d||0,e||0,f||0,g||0):a?a.getTime():goog.now();this.date=new Date(a)};goog.inherits(goog.date.UtcDateTime,goog.date.DateTime);goog.date.UtcDateTime.fromTimestamp=function(a){var b=new goog.date.UtcDateTime;b.setTime(a);return b};goog.date.UtcDateTime.fromIsoString=function(a){var b=new goog.date.UtcDateTime(2E3);return goog.date.setIso8601DateTime(b,a)?b:null}; +goog.date.UtcDateTime.prototype.clone=function(){var a=new goog.date.UtcDateTime(this.date);a.setFirstDayOfWeek(this.getFirstDayOfWeek());a.setFirstWeekCutOffDay(this.getFirstWeekCutOffDay());return a};goog.date.UtcDateTime.prototype.add=function(a){if(a.years||a.months){var b=new goog.date.Interval(a.years,a.months);goog.date.Date.prototype.add.call(this,b)}a=1E3*(a.seconds+60*(a.minutes+60*(a.hours+24*a.days)));this.date=new Date(this.date.getTime()+a)}; +goog.date.UtcDateTime.prototype.getTimezoneOffset=function(){return 0};goog.date.UtcDateTime.prototype.getFullYear=goog.date.DateTime.prototype.getUTCFullYear;goog.date.UtcDateTime.prototype.getMonth=goog.date.DateTime.prototype.getUTCMonth;goog.date.UtcDateTime.prototype.getDate=goog.date.DateTime.prototype.getUTCDate;goog.date.UtcDateTime.prototype.getHours=goog.date.DateTime.prototype.getUTCHours;goog.date.UtcDateTime.prototype.getMinutes=goog.date.DateTime.prototype.getUTCMinutes; +goog.date.UtcDateTime.prototype.getSeconds=goog.date.DateTime.prototype.getUTCSeconds;goog.date.UtcDateTime.prototype.getMilliseconds=goog.date.DateTime.prototype.getUTCMilliseconds;goog.date.UtcDateTime.prototype.getDay=goog.date.DateTime.prototype.getUTCDay;goog.date.UtcDateTime.prototype.setFullYear=goog.date.DateTime.prototype.setUTCFullYear;goog.date.UtcDateTime.prototype.setMonth=goog.date.DateTime.prototype.setUTCMonth;goog.date.UtcDateTime.prototype.setDate=goog.date.DateTime.prototype.setUTCDate; +goog.date.UtcDateTime.prototype.setHours=goog.date.DateTime.prototype.setUTCHours;goog.date.UtcDateTime.prototype.setMinutes=goog.date.DateTime.prototype.setUTCMinutes;goog.date.UtcDateTime.prototype.setSeconds=goog.date.DateTime.prototype.setUTCSeconds;goog.date.UtcDateTime.prototype.setMilliseconds=goog.date.DateTime.prototype.setUTCMilliseconds;cljs_time.core={};cljs_time.core.deprecated=function(a){return cljs.core.println.call(null,"DEPRECATION WARNING: ",a)};cljs_time.core._EQ_=cljs_time.internal.core._EQ_;cljs_time.core.DateTimeProtocol=function(){};var cljs_time$core$DateTimeProtocol$year$dyn_29437=function(a){var b=cljs_time.core.year[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.year._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.year",a);}; +cljs_time.core.year=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$year$arity$1?a.cljs_time$core$DateTimeProtocol$year$arity$1(a):cljs_time$core$DateTimeProtocol$year$dyn_29437.call(null,a)};var cljs_time$core$DateTimeProtocol$month$dyn_29438=function(a){var b=cljs_time.core.month[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.month._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.month",a);}; +cljs_time.core.month=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$month$arity$1?a.cljs_time$core$DateTimeProtocol$month$arity$1(a):cljs_time$core$DateTimeProtocol$month$dyn_29438.call(null,a)};var cljs_time$core$DateTimeProtocol$day$dyn_29439=function(a){var b=cljs_time.core.day[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.day._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.day",a);}; +cljs_time.core.day=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$day$arity$1?a.cljs_time$core$DateTimeProtocol$day$arity$1(a):cljs_time$core$DateTimeProtocol$day$dyn_29439.call(null,a)}; +var cljs_time$core$DateTimeProtocol$day_of_week$dyn_29440=function(a){var b=cljs_time.core.day_of_week[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.day_of_week._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.day-of-week",a);}; +cljs_time.core.day_of_week=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$day_of_week$arity$1?a.cljs_time$core$DateTimeProtocol$day_of_week$arity$1(a):cljs_time$core$DateTimeProtocol$day_of_week$dyn_29440.call(null,a)}; +var cljs_time$core$DateTimeProtocol$hour$dyn_29441=function(a){var b=cljs_time.core.hour[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.hour._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.hour",a);};cljs_time.core.hour=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$hour$arity$1?a.cljs_time$core$DateTimeProtocol$hour$arity$1(a):cljs_time$core$DateTimeProtocol$hour$dyn_29441.call(null,a)}; +var cljs_time$core$DateTimeProtocol$minute$dyn_29442=function(a){var b=cljs_time.core.minute[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.minute._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.minute",a);}; +cljs_time.core.minute=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$minute$arity$1?a.cljs_time$core$DateTimeProtocol$minute$arity$1(a):cljs_time$core$DateTimeProtocol$minute$dyn_29442.call(null,a)};var cljs_time$core$DateTimeProtocol$sec$dyn_29443=function(a){var b=cljs_time.core.sec[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.sec._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.sec",a);}; +cljs_time.core.sec=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$sec$arity$1?a.cljs_time$core$DateTimeProtocol$sec$arity$1(a):cljs_time$core$DateTimeProtocol$sec$dyn_29443.call(null,a)};var cljs_time$core$DateTimeProtocol$second$dyn_29444=function(a){var b=cljs_time.core.second[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.second._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.second",a);}; +cljs_time.core.second=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$second$arity$1?a.cljs_time$core$DateTimeProtocol$second$arity$1(a):cljs_time$core$DateTimeProtocol$second$dyn_29444.call(null,a)}; +var cljs_time$core$DateTimeProtocol$milli$dyn_29445=function(a){var b=cljs_time.core.milli[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.milli._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.milli",a);};cljs_time.core.milli=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$milli$arity$1?a.cljs_time$core$DateTimeProtocol$milli$arity$1(a):cljs_time$core$DateTimeProtocol$milli$dyn_29445.call(null,a)}; +var cljs_time$core$DateTimeProtocol$equal_QMARK_$dyn_29446=function(a,b){var c=cljs_time.core.equal_QMARK_[goog.typeOf(null==a?null:a)];if(null!=c)return c.call(null,a,b);c=cljs_time.core.equal_QMARK_._;if(null!=c)return c.call(null,a,b);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.equal?",a);}; +cljs_time.core.equal_QMARK_=function(a,b){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$equal_QMARK_$arity$2?a.cljs_time$core$DateTimeProtocol$equal_QMARK_$arity$2(a,b):cljs_time$core$DateTimeProtocol$equal_QMARK_$dyn_29446.call(null,a,b)}; +var cljs_time$core$DateTimeProtocol$after_QMARK_$dyn_29447=function(a,b){var c=cljs_time.core.after_QMARK_[goog.typeOf(null==a?null:a)];if(null!=c)return c.call(null,a,b);c=cljs_time.core.after_QMARK_._;if(null!=c)return c.call(null,a,b);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.after?",a);}; +cljs_time.core.after_QMARK_=function(a,b){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$after_QMARK_$arity$2?a.cljs_time$core$DateTimeProtocol$after_QMARK_$arity$2(a,b):cljs_time$core$DateTimeProtocol$after_QMARK_$dyn_29447.call(null,a,b)}; +var cljs_time$core$DateTimeProtocol$before_QMARK_$dyn_29448=function(a,b){var c=cljs_time.core.before_QMARK_[goog.typeOf(null==a?null:a)];if(null!=c)return c.call(null,a,b);c=cljs_time.core.before_QMARK_._;if(null!=c)return c.call(null,a,b);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.before?",a);}; +cljs_time.core.before_QMARK_=function(a,b){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$before_QMARK_$arity$2?a.cljs_time$core$DateTimeProtocol$before_QMARK_$arity$2(a,b):cljs_time$core$DateTimeProtocol$before_QMARK_$dyn_29448.call(null,a,b)}; +var cljs_time$core$DateTimeProtocol$plus_$dyn_29449=function(a,b){var c=cljs_time.core.plus_[goog.typeOf(null==a?null:a)];if(null!=c)return c.call(null,a,b);c=cljs_time.core.plus_._;if(null!=c)return c.call(null,a,b);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.plus-",a);}; +cljs_time.core.plus_=function(a,b){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$plus_$arity$2?a.cljs_time$core$DateTimeProtocol$plus_$arity$2(a,b):cljs_time$core$DateTimeProtocol$plus_$dyn_29449.call(null,a,b)}; +var cljs_time$core$DateTimeProtocol$minus_$dyn_29450=function(a,b){var c=cljs_time.core.minus_[goog.typeOf(null==a?null:a)];if(null!=c)return c.call(null,a,b);c=cljs_time.core.minus_._;if(null!=c)return c.call(null,a,b);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.minus-",a);}; +cljs_time.core.minus_=function(a,b){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$minus_$arity$2?a.cljs_time$core$DateTimeProtocol$minus_$arity$2(a,b):cljs_time$core$DateTimeProtocol$minus_$dyn_29450.call(null,a,b)}; +var cljs_time$core$DateTimeProtocol$first_day_of_the_month_$dyn_29451=function(a){var b=cljs_time.core.first_day_of_the_month_[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.first_day_of_the_month_._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.first-day-of-the-month-",a);}; +cljs_time.core.first_day_of_the_month_=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$first_day_of_the_month_$arity$1?a.cljs_time$core$DateTimeProtocol$first_day_of_the_month_$arity$1(a):cljs_time$core$DateTimeProtocol$first_day_of_the_month_$dyn_29451.call(null,a)}; +var cljs_time$core$DateTimeProtocol$last_day_of_the_month_$dyn_29452=function(a){var b=cljs_time.core.last_day_of_the_month_[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.last_day_of_the_month_._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.last-day-of-the-month-",a);}; +cljs_time.core.last_day_of_the_month_=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$last_day_of_the_month_$arity$1?a.cljs_time$core$DateTimeProtocol$last_day_of_the_month_$arity$1(a):cljs_time$core$DateTimeProtocol$last_day_of_the_month_$dyn_29452.call(null,a)}; +var cljs_time$core$DateTimeProtocol$week_number_of_year$dyn_29453=function(a){var b=cljs_time.core.week_number_of_year[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.week_number_of_year._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.week-number-of-year",a);}; +cljs_time.core.week_number_of_year=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$week_number_of_year$arity$1?a.cljs_time$core$DateTimeProtocol$week_number_of_year$arity$1(a):cljs_time$core$DateTimeProtocol$week_number_of_year$dyn_29453.call(null,a)}; +var cljs_time$core$DateTimeProtocol$week_year$dyn_29454=function(a){var b=cljs_time.core.week_year[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.week_year._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"DateTimeProtocol.week-year",a);}; +cljs_time.core.week_year=function(a){return null!=a&&null!=a.cljs_time$core$DateTimeProtocol$week_year$arity$1?a.cljs_time$core$DateTimeProtocol$week_year$arity$1(a):cljs_time$core$DateTimeProtocol$week_year$dyn_29454.call(null,a)};cljs_time.core.InTimeUnitProtocol=function(){}; +var cljs_time$core$InTimeUnitProtocol$in_millis$dyn_29455=function(a){var b=cljs_time.core.in_millis[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.in_millis._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"InTimeUnitProtocol.in-millis",a);}; +cljs_time.core.in_millis=function(a){return null!=a&&null!=a.cljs_time$core$InTimeUnitProtocol$in_millis$arity$1?a.cljs_time$core$InTimeUnitProtocol$in_millis$arity$1(a):cljs_time$core$InTimeUnitProtocol$in_millis$dyn_29455.call(null,a)}; +var cljs_time$core$InTimeUnitProtocol$in_seconds$dyn_29456=function(a){var b=cljs_time.core.in_seconds[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.in_seconds._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"InTimeUnitProtocol.in-seconds",a);}; +cljs_time.core.in_seconds=function(a){return null!=a&&null!=a.cljs_time$core$InTimeUnitProtocol$in_seconds$arity$1?a.cljs_time$core$InTimeUnitProtocol$in_seconds$arity$1(a):cljs_time$core$InTimeUnitProtocol$in_seconds$dyn_29456.call(null,a)}; +var cljs_time$core$InTimeUnitProtocol$in_minutes$dyn_29457=function(a){var b=cljs_time.core.in_minutes[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.in_minutes._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"InTimeUnitProtocol.in-minutes",a);}; +cljs_time.core.in_minutes=function(a){return null!=a&&null!=a.cljs_time$core$InTimeUnitProtocol$in_minutes$arity$1?a.cljs_time$core$InTimeUnitProtocol$in_minutes$arity$1(a):cljs_time$core$InTimeUnitProtocol$in_minutes$dyn_29457.call(null,a)}; +var cljs_time$core$InTimeUnitProtocol$in_hours$dyn_29458=function(a){var b=cljs_time.core.in_hours[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.in_hours._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"InTimeUnitProtocol.in-hours",a);}; +cljs_time.core.in_hours=function(a){return null!=a&&null!=a.cljs_time$core$InTimeUnitProtocol$in_hours$arity$1?a.cljs_time$core$InTimeUnitProtocol$in_hours$arity$1(a):cljs_time$core$InTimeUnitProtocol$in_hours$dyn_29458.call(null,a)}; +var cljs_time$core$InTimeUnitProtocol$in_days$dyn_29459=function(a){var b=cljs_time.core.in_days[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.in_days._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"InTimeUnitProtocol.in-days",a);}; +cljs_time.core.in_days=function(a){return null!=a&&null!=a.cljs_time$core$InTimeUnitProtocol$in_days$arity$1?a.cljs_time$core$InTimeUnitProtocol$in_days$arity$1(a):cljs_time$core$InTimeUnitProtocol$in_days$dyn_29459.call(null,a)}; +var cljs_time$core$InTimeUnitProtocol$in_weeks$dyn_29460=function(a){var b=cljs_time.core.in_weeks[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.in_weeks._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"InTimeUnitProtocol.in-weeks",a);}; +cljs_time.core.in_weeks=function(a){return null!=a&&null!=a.cljs_time$core$InTimeUnitProtocol$in_weeks$arity$1?a.cljs_time$core$InTimeUnitProtocol$in_weeks$arity$1(a):cljs_time$core$InTimeUnitProtocol$in_weeks$dyn_29460.call(null,a)}; +var cljs_time$core$InTimeUnitProtocol$in_months$dyn_29461=function(a){var b=cljs_time.core.in_months[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.in_months._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"InTimeUnitProtocol.in-months",a);}; +cljs_time.core.in_months=function(a){return null!=a&&null!=a.cljs_time$core$InTimeUnitProtocol$in_months$arity$1?a.cljs_time$core$InTimeUnitProtocol$in_months$arity$1(a):cljs_time$core$InTimeUnitProtocol$in_months$dyn_29461.call(null,a)}; +var cljs_time$core$InTimeUnitProtocol$in_years$dyn_29462=function(a){var b=cljs_time.core.in_years[goog.typeOf(null==a?null:a)];if(null!=b)return b.call(null,a);b=cljs_time.core.in_years._;if(null!=b)return b.call(null,a);throw cljs.core.missing_protocol.call(null,"InTimeUnitProtocol.in-years",a);}; +cljs_time.core.in_years=function(a){return null!=a&&null!=a.cljs_time$core$InTimeUnitProtocol$in_years$arity$1?a.cljs_time$core$InTimeUnitProtocol$in_years$arity$1(a):cljs_time$core$InTimeUnitProtocol$in_years$dyn_29462.call(null,a)};cljs_time.core.Interval=function(a,b,c,d,e){this.start=a;this.end=b;this.__meta=c;this.__extmap=d;this.__hash=e;this.cljs$lang$protocol_mask$partition0$=2230716170;this.cljs$lang$protocol_mask$partition1$=139264}; +cljs_time.core.Interval.prototype.cljs$core$ILookup$_lookup$arity$2=function(a,b){return this.cljs$core$ILookup$_lookup$arity$3(null,b,null)};cljs_time.core.Interval.prototype.cljs$core$ILookup$_lookup$arity$3=function(a,b,c){switch(b instanceof cljs.core.Keyword?b.fqn:null){case "start":return this.start;case "end":return this.end;default:return cljs.core.get.call(null,this.__extmap,b,c)}}; +cljs_time.core.Interval.prototype.cljs$core$IKVReduce$_kv_reduce$arity$3=function(a,b,c){return cljs.core.reduce.call(null,function(a,c){var d=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return b.call(null,a,d,c)},c,this)}; +cljs_time.core.Interval.prototype.cljs$core$IPrintWithWriter$_pr_writer$arity$3=function(a,b,c){return cljs.core.pr_sequential_writer.call(null,b,function(a){return cljs.core.pr_sequential_writer.call(null,b,cljs.core.pr_writer,""," ","",c,a)},"#cljs-time.core.Interval{",", ","}",c,cljs.core.concat.call(null,new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"start", +"start",-355208981),this.start],null),new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"end","end",-268185958),this.end],null)],null),this.__extmap))}; +cljs_time.core.Interval.prototype.cljs$core$IIterable$_iterator$arity$1=function(a){return new cljs.core.RecordIter(0,this,2,new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"start","start",-355208981),new cljs.core.Keyword(null,"end","end",-268185958)],null),cljs.core.truth_(this.__extmap)?cljs.core._iterator.call(null,this.__extmap):cljs.core.nil_iter.call(null))};cljs_time.core.Interval.prototype.cljs$core$IMeta$_meta$arity$1=function(a){return this.__meta}; +cljs_time.core.Interval.prototype.cljs$core$ICloneable$_clone$arity$1=function(a){return new cljs_time.core.Interval(this.start,this.end,this.__meta,this.__extmap,this.__hash)};cljs_time.core.Interval.prototype.cljs$core$ICounted$_count$arity$1=function(a){return 2+cljs.core.count.call(null,this.__extmap)};cljs_time.core.Interval.prototype.cljs$core$IHash$_hash$arity$1=function(a){a=this.__hash;return null!=a?a:this.__hash=a=534314193^cljs.core.hash_unordered_coll.call(null,this)}; +cljs_time.core.Interval.prototype.cljs$core$IEquiv$_equiv$arity$2=function(a,b){return null!=b&&this.constructor===b.constructor&&cljs.core._EQ_.call(null,this.start,b.start)&&cljs.core._EQ_.call(null,this.end,b.end)&&cljs.core._EQ_.call(null,this.__extmap,b.__extmap)}; +cljs_time.core.Interval.prototype.cljs$core$IMap$_dissoc$arity$2=function(a,b){return cljs.core.contains_QMARK_.call(null,new cljs.core.PersistentHashSet(null,new cljs.core.PersistentArrayMap(null,2,[new cljs.core.Keyword(null,"start","start",-355208981),null,new cljs.core.Keyword(null,"end","end",-268185958),null],null),null),b)?cljs.core.dissoc.call(null,cljs.core._with_meta.call(null,cljs.core.into.call(null,cljs.core.PersistentArrayMap.EMPTY,this),this.__meta),b):new cljs_time.core.Interval(this.start, +this.end,this.__meta,cljs.core.not_empty.call(null,cljs.core.dissoc.call(null,this.__extmap,b)),null)}; +cljs_time.core.Interval.prototype.cljs$core$IAssociative$_assoc$arity$3=function(a,b,c){a=cljs.core.keyword_identical_QMARK_;return cljs.core.truth_(a.call(null,new cljs.core.Keyword(null,"start","start",-355208981),b))?new cljs_time.core.Interval(c,this.end,this.__meta,this.__extmap,null):cljs.core.truth_(a.call(null,new cljs.core.Keyword(null,"end","end",-268185958),b))?new cljs_time.core.Interval(this.start,c,this.__meta,this.__extmap,null):new cljs_time.core.Interval(this.start,this.end,this.__meta, +cljs.core.assoc.call(null,this.__extmap,b,c),null)};cljs_time.core.Interval.prototype.cljs$core$ISeqable$_seq$arity$1=function(a){return cljs.core.seq.call(null,cljs.core.concat.call(null,new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.MapEntry(new cljs.core.Keyword(null,"start","start",-355208981),this.start,null),new cljs.core.MapEntry(new cljs.core.Keyword(null,"end","end",-268185958),this.end,null)],null),this.__extmap))}; +cljs_time.core.Interval.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return new cljs_time.core.Interval(this.start,this.end,b,this.__extmap,this.__hash)};cljs_time.core.Interval.prototype.cljs$core$ICollection$_conj$arity$2=function(a,b){return cljs.core.vector_QMARK_.call(null,b)?this.cljs$core$IAssociative$_assoc$arity$3(null,cljs.core._nth.call(null,b,0),cljs.core._nth.call(null,b,1)):cljs.core.reduce.call(null,cljs.core._conj,this,b)}; +cljs_time.core.Interval.getBasis=function(){return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"start","start",1285322546,null),new cljs.core.Symbol(null,"end","end",1372345569,null)],null)};cljs_time.core.Interval.cljs$lang$type=!0;cljs_time.core.Interval.cljs$lang$ctorPrSeq=function(a){return new cljs.core.List(null,"cljs-time.core/Interval",null,1,null)}; +cljs_time.core.Interval.cljs$lang$ctorPrWriter=function(a,b){return cljs.core._write.call(null,b,"cljs-time.core/Interval")};cljs_time.core.__GT_Interval=function(a,b){return new cljs_time.core.Interval(a,b,null,null,null)}; +cljs_time.core.map__GT_Interval=function(a){var b=cljs.core.dissoc.call(null,a,new cljs.core.Keyword(null,"start","start",-355208981),new cljs.core.Keyword(null,"end","end",-268185958));b=cljs.core.record_QMARK_.call(null,a)?cljs.core.into.call(null,cljs.core.PersistentArrayMap.EMPTY,b):b;return new cljs_time.core.Interval((new cljs.core.Keyword(null,"start","start",-355208981)).cljs$core$IFn$_invoke$arity$1(a),(new cljs.core.Keyword(null,"end","end",-268185958)).cljs$core$IFn$_invoke$arity$1(a), +null,cljs.core.not_empty.call(null,b),null)};cljs_time.core.interval=function(a,b){if(!(a.getTime()<=b.getTime()))throw Error("Assert failed: (\x3c\x3d (.getTime start) (.getTime end))");return cljs_time.core.__GT_Interval.call(null,a,b)}; +cljs_time.core.Period=function(a,b,c,d,e,f,g,h,k,l,m){this.years=a;this.months=b;this.weeks=c;this.days=d;this.hours=e;this.minutes=f;this.seconds=g;this.millis=h;this.__meta=k;this.__extmap=l;this.__hash=m;this.cljs$lang$protocol_mask$partition0$=2230716170;this.cljs$lang$protocol_mask$partition1$=139264};cljs_time.core.Period.prototype.cljs$core$ILookup$_lookup$arity$2=function(a,b){return this.cljs$core$ILookup$_lookup$arity$3(null,b,null)}; +cljs_time.core.Period.prototype.cljs$core$ILookup$_lookup$arity$3=function(a,b,c){switch(b instanceof cljs.core.Keyword?b.fqn:null){case "years":return this.years;case "months":return this.months;case "weeks":return this.weeks;case "days":return this.days;case "hours":return this.hours;case "minutes":return this.minutes;case "seconds":return this.seconds;case "millis":return this.millis;default:return cljs.core.get.call(null,this.__extmap,b,c)}}; +cljs_time.core.Period.prototype.cljs$core$IKVReduce$_kv_reduce$arity$3=function(a,b,c){return cljs.core.reduce.call(null,function(a,c){var d=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return b.call(null,a,d,c)},c,this)}; +cljs_time.core.Period.prototype.cljs$core$IPrintWithWriter$_pr_writer$arity$3=function(a,b,c){return cljs.core.pr_sequential_writer.call(null,b,function(a){return cljs.core.pr_sequential_writer.call(null,b,cljs.core.pr_writer,""," ","",c,a)},"#cljs-time.core.Period{",", ","}",c,cljs.core.concat.call(null,new cljs.core.PersistentVector(null,8,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"years","years", +-1298579689),this.years],null),new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"months","months",-45571637),this.months],null),new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"weeks","weeks",1844596125),this.weeks],null),new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"days","days",-1394072564),this.days],null),new cljs.core.PersistentVector(null, +2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"hours","hours",58380855),this.hours],null),new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"minutes","minutes",1319166394),this.minutes],null),new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"seconds","seconds",-445266194),this.seconds],null),new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE, +[new cljs.core.Keyword(null,"millis","millis",-1338288387),this.millis],null)],null),this.__extmap))}; +cljs_time.core.Period.prototype.cljs$core$IIterable$_iterator$arity$1=function(a){return new cljs.core.RecordIter(0,this,8,new cljs.core.PersistentVector(null,8,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"years","years",-1298579689),new cljs.core.Keyword(null,"months","months",-45571637),new cljs.core.Keyword(null,"weeks","weeks",1844596125),new cljs.core.Keyword(null,"days","days",-1394072564),new cljs.core.Keyword(null,"hours","hours",58380855),new cljs.core.Keyword(null, +"minutes","minutes",1319166394),new cljs.core.Keyword(null,"seconds","seconds",-445266194),new cljs.core.Keyword(null,"millis","millis",-1338288387)],null),cljs.core.truth_(this.__extmap)?cljs.core._iterator.call(null,this.__extmap):cljs.core.nil_iter.call(null))};cljs_time.core.Period.prototype.cljs$core$IMeta$_meta$arity$1=function(a){return this.__meta}; +cljs_time.core.Period.prototype.cljs$core$ICloneable$_clone$arity$1=function(a){return new cljs_time.core.Period(this.years,this.months,this.weeks,this.days,this.hours,this.minutes,this.seconds,this.millis,this.__meta,this.__extmap,this.__hash)};cljs_time.core.Period.prototype.cljs$core$ICounted$_count$arity$1=function(a){return 8+cljs.core.count.call(null,this.__extmap)}; +cljs_time.core.Period.prototype.cljs$core$IHash$_hash$arity$1=function(a){a=this.__hash;return null!=a?a:this.__hash=a=1393857022^cljs.core.hash_unordered_coll.call(null,this)}; +cljs_time.core.Period.prototype.cljs$core$IEquiv$_equiv$arity$2=function(a,b){return null!=b&&this.constructor===b.constructor&&cljs.core._EQ_.call(null,this.years,b.years)&&cljs.core._EQ_.call(null,this.months,b.months)&&cljs.core._EQ_.call(null,this.weeks,b.weeks)&&cljs.core._EQ_.call(null,this.days,b.days)&&cljs.core._EQ_.call(null,this.hours,b.hours)&&cljs.core._EQ_.call(null,this.minutes,b.minutes)&&cljs.core._EQ_.call(null,this.seconds,b.seconds)&&cljs.core._EQ_.call(null,this.millis,b.millis)&& +cljs.core._EQ_.call(null,this.__extmap,b.__extmap)}; +cljs_time.core.Period.prototype.cljs$core$IMap$_dissoc$arity$2=function(a,b){return cljs.core.contains_QMARK_.call(null,new cljs.core.PersistentHashSet(null,new cljs.core.PersistentArrayMap(null,8,[new cljs.core.Keyword(null,"months","months",-45571637),null,new cljs.core.Keyword(null,"days","days",-1394072564),null,new cljs.core.Keyword(null,"seconds","seconds",-445266194),null,new cljs.core.Keyword(null,"hours","hours",58380855),null,new cljs.core.Keyword(null,"years","years",-1298579689),null, +new cljs.core.Keyword(null,"minutes","minutes",1319166394),null,new cljs.core.Keyword(null,"weeks","weeks",1844596125),null,new cljs.core.Keyword(null,"millis","millis",-1338288387),null],null),null),b)?cljs.core.dissoc.call(null,cljs.core._with_meta.call(null,cljs.core.into.call(null,cljs.core.PersistentArrayMap.EMPTY,this),this.__meta),b):new cljs_time.core.Period(this.years,this.months,this.weeks,this.days,this.hours,this.minutes,this.seconds,this.millis,this.__meta,cljs.core.not_empty.call(null, +cljs.core.dissoc.call(null,this.__extmap,b)),null)}; +cljs_time.core.Period.prototype.cljs$core$IAssociative$_assoc$arity$3=function(a,b,c){a=cljs.core.keyword_identical_QMARK_;return cljs.core.truth_(a.call(null,new cljs.core.Keyword(null,"years","years",-1298579689),b))?new cljs_time.core.Period(c,this.months,this.weeks,this.days,this.hours,this.minutes,this.seconds,this.millis,this.__meta,this.__extmap,null):cljs.core.truth_(a.call(null,new cljs.core.Keyword(null,"months","months",-45571637),b))?new cljs_time.core.Period(this.years,c,this.weeks,this.days, +this.hours,this.minutes,this.seconds,this.millis,this.__meta,this.__extmap,null):cljs.core.truth_(a.call(null,new cljs.core.Keyword(null,"weeks","weeks",1844596125),b))?new cljs_time.core.Period(this.years,this.months,c,this.days,this.hours,this.minutes,this.seconds,this.millis,this.__meta,this.__extmap,null):cljs.core.truth_(a.call(null,new cljs.core.Keyword(null,"days","days",-1394072564),b))?new cljs_time.core.Period(this.years,this.months,this.weeks,c,this.hours,this.minutes,this.seconds,this.millis, +this.__meta,this.__extmap,null):cljs.core.truth_(a.call(null,new cljs.core.Keyword(null,"hours","hours",58380855),b))?new cljs_time.core.Period(this.years,this.months,this.weeks,this.days,c,this.minutes,this.seconds,this.millis,this.__meta,this.__extmap,null):cljs.core.truth_(a.call(null,new cljs.core.Keyword(null,"minutes","minutes",1319166394),b))?new cljs_time.core.Period(this.years,this.months,this.weeks,this.days,this.hours,c,this.seconds,this.millis,this.__meta,this.__extmap,null):cljs.core.truth_(a.call(null, +new cljs.core.Keyword(null,"seconds","seconds",-445266194),b))?new cljs_time.core.Period(this.years,this.months,this.weeks,this.days,this.hours,this.minutes,c,this.millis,this.__meta,this.__extmap,null):cljs.core.truth_(a.call(null,new cljs.core.Keyword(null,"millis","millis",-1338288387),b))?new cljs_time.core.Period(this.years,this.months,this.weeks,this.days,this.hours,this.minutes,this.seconds,c,this.__meta,this.__extmap,null):new cljs_time.core.Period(this.years,this.months,this.weeks,this.days, +this.hours,this.minutes,this.seconds,this.millis,this.__meta,cljs.core.assoc.call(null,this.__extmap,b,c),null)}; +cljs_time.core.Period.prototype.cljs$core$ISeqable$_seq$arity$1=function(a){return cljs.core.seq.call(null,cljs.core.concat.call(null,new cljs.core.PersistentVector(null,8,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.MapEntry(new cljs.core.Keyword(null,"years","years",-1298579689),this.years,null),new cljs.core.MapEntry(new cljs.core.Keyword(null,"months","months",-45571637),this.months,null),new cljs.core.MapEntry(new cljs.core.Keyword(null,"weeks","weeks",1844596125),this.weeks,null), +new cljs.core.MapEntry(new cljs.core.Keyword(null,"days","days",-1394072564),this.days,null),new cljs.core.MapEntry(new cljs.core.Keyword(null,"hours","hours",58380855),this.hours,null),new cljs.core.MapEntry(new cljs.core.Keyword(null,"minutes","minutes",1319166394),this.minutes,null),new cljs.core.MapEntry(new cljs.core.Keyword(null,"seconds","seconds",-445266194),this.seconds,null),new cljs.core.MapEntry(new cljs.core.Keyword(null,"millis","millis",-1338288387),this.millis,null)],null),this.__extmap))}; +cljs_time.core.Period.prototype.cljs$core$IWithMeta$_with_meta$arity$2=function(a,b){return new cljs_time.core.Period(this.years,this.months,this.weeks,this.days,this.hours,this.minutes,this.seconds,this.millis,b,this.__extmap,this.__hash)}; +cljs_time.core.Period.prototype.cljs$core$ICollection$_conj$arity$2=function(a,b){return cljs.core.vector_QMARK_.call(null,b)?this.cljs$core$IAssociative$_assoc$arity$3(null,cljs.core._nth.call(null,b,0),cljs.core._nth.call(null,b,1)):cljs.core.reduce.call(null,cljs.core._conj,this,b)}; +cljs_time.core.Period.getBasis=function(){return new cljs.core.PersistentVector(null,8,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Symbol(null,"years","years",341951838,null),new cljs.core.Symbol(null,"months","months",1594959890,null),new cljs.core.Symbol(null,"weeks","weeks",-809839644,null),new cljs.core.Symbol(null,"days","days",246458963,null),new cljs.core.Symbol(null,"hours","hours",1698912382,null),new cljs.core.Symbol(null,"minutes","minutes",-1335269375,null),new cljs.core.Symbol(null, +"seconds","seconds",1195265333,null),new cljs.core.Symbol(null,"millis","millis",302243140,null)],null)};cljs_time.core.Period.cljs$lang$type=!0;cljs_time.core.Period.cljs$lang$ctorPrSeq=function(a){return new cljs.core.List(null,"cljs-time.core/Period",null,1,null)};cljs_time.core.Period.cljs$lang$ctorPrWriter=function(a,b){return cljs.core._write.call(null,b,"cljs-time.core/Period")}; +cljs_time.core.__GT_Period=function(a,b,c,d,e,f,g,h){return new cljs_time.core.Period(a,b,c,d,e,f,g,h,null,null,null)}; +cljs_time.core.map__GT_Period=function(a){var b=cljs.core.dissoc.call(null,a,new cljs.core.Keyword(null,"years","years",-1298579689),new cljs.core.Keyword(null,"months","months",-45571637),new cljs.core.Keyword(null,"weeks","weeks",1844596125),new cljs.core.Keyword(null,"days","days",-1394072564),new cljs.core.Keyword(null,"hours","hours",58380855),new cljs.core.Keyword(null,"minutes","minutes",1319166394),new cljs.core.Keyword(null,"seconds","seconds",-445266194),new cljs.core.Keyword(null,"millis", +"millis",-1338288387));b=cljs.core.record_QMARK_.call(null,a)?cljs.core.into.call(null,cljs.core.PersistentArrayMap.EMPTY,b):b;return new cljs_time.core.Period((new cljs.core.Keyword(null,"years","years",-1298579689)).cljs$core$IFn$_invoke$arity$1(a),(new cljs.core.Keyword(null,"months","months",-45571637)).cljs$core$IFn$_invoke$arity$1(a),(new cljs.core.Keyword(null,"weeks","weeks",1844596125)).cljs$core$IFn$_invoke$arity$1(a),(new cljs.core.Keyword(null,"days","days",-1394072564)).cljs$core$IFn$_invoke$arity$1(a), +(new cljs.core.Keyword(null,"hours","hours",58380855)).cljs$core$IFn$_invoke$arity$1(a),(new cljs.core.Keyword(null,"minutes","minutes",1319166394)).cljs$core$IFn$_invoke$arity$1(a),(new cljs.core.Keyword(null,"seconds","seconds",-445266194)).cljs$core$IFn$_invoke$arity$1(a),(new cljs.core.Keyword(null,"millis","millis",-1338288387)).cljs$core$IFn$_invoke$arity$1(a),null,cljs.core.not_empty.call(null,b),null)}; +cljs_time.core.period=function(a){switch(arguments.length){case 2:return cljs_time.core.period.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:for(var b=[],c=arguments.length,d=0;;)if(db.getTime()};goog.date.UtcDateTime.prototype.cljs_time$core$DateTimeProtocol$before_QMARK_$arity$2=function(a,b){return this.getTime()b.getTime()};goog.date.DateTime.prototype.cljs_time$core$DateTimeProtocol$before_QMARK_$arity$2=function(a,b){return this.getTime()cljs_time.core.compare_local_dates.call(null,this,b)};goog.date.Date.prototype.cljs_time$core$DateTimeProtocol$plus_$arity$2=function(a,b){return cljs_time.core.period_fn.call(null,b).call(null,cljs.core._PLUS_,this)};goog.date.Date.prototype.cljs_time$core$DateTimeProtocol$minus_$arity$2=function(a,b){return cljs_time.core.period_fn.call(null,b).call(null,cljs.core._,this)}; +goog.date.Date.prototype.cljs_time$core$DateTimeProtocol$first_day_of_the_month_$arity$1=function(a){return new goog.date.Date(this.getYear(),this.getMonth(),1)};goog.date.Date.prototype.cljs_time$core$DateTimeProtocol$last_day_of_the_month_$arity$1=function(a){return cljs_time.core.minus_.call(null,new goog.date.Date(this.getYear(),this.getMonth()+1,1),cljs_time.core.period.call(null,new cljs.core.Keyword(null,"days","days",-1394072564),1))}; +goog.date.Date.prototype.cljs_time$core$DateTimeProtocol$week_number_of_year$arity$1=function(a){return goog.date.getWeekNumber(this.getYear(),this.getMonth(),this.getDate())};goog.date.Date.prototype.cljs_time$core$DateTimeProtocol$week_year$arity$1=function(a){return cljs_time.internal.core.get_week_year.call(null,this.getYear(),this.getMonth(),this.getDate())}; +cljs_time.core.utc={id:"UTC",std_offset:0,names:new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,["UTC"],null),transitions:cljs.core.PersistentVector.EMPTY};cljs_time.core.default_ms_fn=function(){return function(){return(new goog.date.UtcDateTime).getTime()}};cljs_time.core.offset_ms_fn=function(a){return function(){return(new goog.date.UtcDateTime).getTime()+a}};cljs_time.core.static_ms_fn=function(a){return function(){return a}};cljs_time.core._STAR_ms_fn_STAR_=cljs_time.core.default_ms_fn.call(null); +cljs_time.core.now=function(){var a=new goog.date.UtcDateTime;a.setTime(cljs_time.core._STAR_ms_fn_STAR_.call(null));return a};cljs_time.core.time_now=function(){var a=new goog.date.DateTime;a.setTime(cljs_time.core._STAR_ms_fn_STAR_.call(null));return a};cljs_time.core.at_midnight=function(a){a=a.clone();a.setHours(0);a.setMinutes(0);a.setSeconds(0);a.setMilliseconds(0);return a};cljs_time.core.today_at_midnight=function(){return cljs_time.core.at_midnight.call(null,cljs_time.core.now.call(null))}; +cljs_time.core.epoch=function(){var a=new goog.date.UtcDateTime;a.setTime(0);return a}; +cljs_time.core.date_midnight=function(a){switch(arguments.length){case 1:return cljs_time.core.date_midnight.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.core.date_midnight.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return cljs_time.core.date_midnight.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.core.date_midnight.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.date_midnight.call(null,a,1,1)};cljs_time.core.date_midnight.cljs$core$IFn$_invoke$arity$2=function(a,b){return cljs_time.core.date_midnight.call(null,a,b,1)};cljs_time.core.date_midnight.cljs$core$IFn$_invoke$arity$3=function(a,b,c){return new goog.date.UtcDateTime(a,b-1,c)};cljs_time.core.date_midnight.cljs$lang$maxFixedArity=3; +cljs_time.core.date_time=function(a){switch(arguments.length){case 1:return cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);case 4:return cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$4(arguments[0],arguments[1],arguments[2],arguments[3]);case 5:return cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$5(arguments[0], +arguments[1],arguments[2],arguments[3],arguments[4]);case 6:return cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$6(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4],arguments[5]);case 7:return cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$7(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4],arguments[5],arguments[6]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.date_time.call(null,a,1,1,0,0,0,0)};cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$2=function(a,b){return cljs_time.core.date_time.call(null,a,b,1,0,0,0,0)};cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$3=function(a,b,c){return cljs_time.core.date_time.call(null,a,b,c,0,0,0,0)}; +cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$4=function(a,b,c,d){return cljs_time.core.date_time.call(null,a,b,c,d,0,0,0)};cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$5=function(a,b,c,d,e){return cljs_time.core.date_time.call(null,a,b,c,d,e,0,0)};cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$6=function(a,b,c,d,e,f){return cljs_time.core.date_time.call(null,a,b,c,d,e,f,0)}; +cljs_time.core.date_time.cljs$core$IFn$_invoke$arity$7=function(a,b,c,d,e,f,g){return new goog.date.UtcDateTime(a,b-1,c,d,e,f,g)};cljs_time.core.date_time.cljs$lang$maxFixedArity=7; +cljs_time.core.local_date_time=function(a){switch(arguments.length){case 1:return cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);case 4:return cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$4(arguments[0],arguments[1],arguments[2],arguments[3]); +case 5:return cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$5(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4]);case 6:return cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$6(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4],arguments[5]);case 7:return cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$7(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4],arguments[5],arguments[6]);default:throw Error(["Invalid arity: ", +cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.local_date_time.call(null,a,1,1,0,0,0,0)};cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$2=function(a,b){return cljs_time.core.local_date_time.call(null,a,b,1,0,0,0,0)};cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$3=function(a,b,c){return cljs_time.core.local_date_time.call(null,a,b,c,0,0,0,0)}; +cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$4=function(a,b,c,d){return cljs_time.core.local_date_time.call(null,a,b,c,d,0,0,0)};cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$5=function(a,b,c,d,e){return cljs_time.core.local_date_time.call(null,a,b,c,d,e,0,0)};cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$6=function(a,b,c,d,e,f){return cljs_time.core.local_date_time.call(null,a,b,c,d,e,f,0)}; +cljs_time.core.local_date_time.cljs$core$IFn$_invoke$arity$7=function(a,b,c,d,e,f,g){return new goog.date.DateTime(a,b-1,c,d,e,f,g)};cljs_time.core.local_date_time.cljs$lang$maxFixedArity=7;cljs_time.core.local_date=function(a,b,c){return new goog.date.Date(a,b-1,c)};cljs_time.core.today=function(){return new goog.date.Date(new Date(cljs_time.core._STAR_ms_fn_STAR_.call(null)))}; +cljs_time.core.time_zone_for_offset=function(a){switch(arguments.length){case 1:return cljs_time.core.time_zone_for_offset.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.core.time_zone_for_offset.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.core.time_zone_for_offset.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.time_zone_for_offset.call(null,a,null)}; +cljs_time.core.time_zone_for_offset.cljs$core$IFn$_invoke$arity$2=function(a,b){var c=0>a?new cljs.core.Keyword(null,"-","-",-2112348439):new cljs.core.Keyword(null,"+","+",1913524883),d=["UTC%s%02d",cljs.core.truth_(b)?":%02d":null].join("");a=0>a?-1*a:a;d=cljs.core.truth_(b)?cljs_time.internal.core.format.call(null,d,cljs.core.name.call(null,c),a,b):cljs_time.internal.core.format.call(null,d,cljs.core.name.call(null,c),a);return cljs.core.with_meta.call(null,new cljs.core.PersistentArrayMap(null, +4,[new cljs.core.Keyword(null,"id","id",-1388402092),d,new cljs.core.Keyword(null,"offset","offset",296498311),new cljs.core.PersistentVector(null,4,5,cljs.core.PersistentVector.EMPTY_NODE,[c,a,cljs.core.truth_(b)?b:0,0],null),new cljs.core.Keyword(null,"rules","rules",1198912366),"-",new cljs.core.Keyword(null,"names","names",-1943074658),new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[d],null)],null),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null, +"type","type",1174270348),new cljs.core.Keyword("cljs-time.core","time-zone","cljs-time.core/time-zone",751963705)],null))};cljs_time.core.time_zone_for_offset.cljs$lang$maxFixedArity=2;cljs_time.core.default_time_zone=function(){var a=new goog.date.DateTime;a.setTime(cljs_time.core._STAR_ms_fn_STAR_.call(null));a=-1*a.getTimezoneOffset()/60;return cljs_time.core.time_zone_for_offset.call(null,a|0,cljs.core.mod.call(null,a,1))};cljs_time.core.to_default_time_zone=function(a){return new goog.date.DateTime(a)}; +cljs_time.core.to_utc_time_zone=function(a){return goog.date.UtcDateTime.fromTimestamp(a.getTime())};cljs_time.core.from_default_time_zone=function(a){return new goog.date.DateTime(a.getYear(),a.getMonth(),a.getDate(),a.getHours(),a.getMinutes(),a.getSeconds(),a.getMilliseconds())}; +cljs_time.core.from_utc_time_zone=function(a){var b=a.getYear(),c=a.getMonth(),d=a.getDate();return cljs.core.truth_(cljs_time.core._EQ_.call(null,goog.date.Date,cljs.core.type.call(null,a)))?new goog.date.UtcDateTime(b,c,d):new goog.date.UtcDateTime(b,c,d,a.getHours(),a.getMinutes(),a.getSeconds(),a.getMilliseconds())}; +cljs_time.core.years=function(a){switch(arguments.length){case 0:return cljs_time.core.years.cljs$core$IFn$_invoke$arity$0();case 1:return cljs_time.core.years.cljs$core$IFn$_invoke$arity$1(arguments[0]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};cljs_time.core.years.cljs$core$IFn$_invoke$arity$0=function(){return cljs_time.core.years.call(null,null)}; +cljs_time.core.years.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.period.call(null,new cljs.core.Keyword(null,"years","years",-1298579689),a)};cljs_time.core.years.cljs$lang$maxFixedArity=1; +cljs_time.core.months=function(a){switch(arguments.length){case 0:return cljs_time.core.months.cljs$core$IFn$_invoke$arity$0();case 1:return cljs_time.core.months.cljs$core$IFn$_invoke$arity$1(arguments[0]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};cljs_time.core.months.cljs$core$IFn$_invoke$arity$0=function(){return cljs_time.core.months.call(null,null)}; +cljs_time.core.months.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.period.call(null,new cljs.core.Keyword(null,"months","months",-45571637),a)};cljs_time.core.months.cljs$lang$maxFixedArity=1; +cljs_time.core.weeks=function(a){switch(arguments.length){case 0:return cljs_time.core.weeks.cljs$core$IFn$_invoke$arity$0();case 1:return cljs_time.core.weeks.cljs$core$IFn$_invoke$arity$1(arguments[0]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};cljs_time.core.weeks.cljs$core$IFn$_invoke$arity$0=function(){return cljs_time.core.weeks.call(null,null)}; +cljs_time.core.weeks.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.period.call(null,new cljs.core.Keyword(null,"weeks","weeks",1844596125),a)};cljs_time.core.weeks.cljs$lang$maxFixedArity=1; +cljs_time.core.days=function(a){switch(arguments.length){case 0:return cljs_time.core.days.cljs$core$IFn$_invoke$arity$0();case 1:return cljs_time.core.days.cljs$core$IFn$_invoke$arity$1(arguments[0]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};cljs_time.core.days.cljs$core$IFn$_invoke$arity$0=function(){return cljs_time.core.days.call(null,null)}; +cljs_time.core.days.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.period.call(null,new cljs.core.Keyword(null,"days","days",-1394072564),a)};cljs_time.core.days.cljs$lang$maxFixedArity=1; +cljs_time.core.hours=function(a){switch(arguments.length){case 0:return cljs_time.core.hours.cljs$core$IFn$_invoke$arity$0();case 1:return cljs_time.core.hours.cljs$core$IFn$_invoke$arity$1(arguments[0]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};cljs_time.core.hours.cljs$core$IFn$_invoke$arity$0=function(){return cljs_time.core.hours.call(null,null)}; +cljs_time.core.hours.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.period.call(null,new cljs.core.Keyword(null,"hours","hours",58380855),a)};cljs_time.core.hours.cljs$lang$maxFixedArity=1; +cljs_time.core.minutes=function(a){switch(arguments.length){case 0:return cljs_time.core.minutes.cljs$core$IFn$_invoke$arity$0();case 1:return cljs_time.core.minutes.cljs$core$IFn$_invoke$arity$1(arguments[0]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};cljs_time.core.minutes.cljs$core$IFn$_invoke$arity$0=function(){return cljs_time.core.minutes.call(null,null)}; +cljs_time.core.minutes.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.period.call(null,new cljs.core.Keyword(null,"minutes","minutes",1319166394),a)};cljs_time.core.minutes.cljs$lang$maxFixedArity=1; +cljs_time.core.seconds=function(a){switch(arguments.length){case 0:return cljs_time.core.seconds.cljs$core$IFn$_invoke$arity$0();case 1:return cljs_time.core.seconds.cljs$core$IFn$_invoke$arity$1(arguments[0]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};cljs_time.core.seconds.cljs$core$IFn$_invoke$arity$0=function(){return cljs_time.core.seconds.call(null,null)}; +cljs_time.core.seconds.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.period.call(null,new cljs.core.Keyword(null,"seconds","seconds",-445266194),a)};cljs_time.core.seconds.cljs$lang$maxFixedArity=1; +cljs_time.core.millis=function(a){switch(arguments.length){case 0:return cljs_time.core.millis.cljs$core$IFn$_invoke$arity$0();case 1:return cljs_time.core.millis.cljs$core$IFn$_invoke$arity$1(arguments[0]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}};cljs_time.core.millis.cljs$core$IFn$_invoke$arity$0=function(){return cljs_time.core.millis.call(null,null)}; +cljs_time.core.millis.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.core.period.call(null,new cljs.core.Keyword(null,"millis","millis",-1338288387),a)};cljs_time.core.millis.cljs$lang$maxFixedArity=1; +cljs_time.core.plus=function(a){switch(arguments.length){case 2:return cljs_time.core.plus.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:for(var b=[],c=arguments.length,d=0;;)if(d=b?new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[parseInt(cljs.core.apply.call(null,cljs.core.str,cljs.core.take.call(null,c,a))),cljs.core.concat.call(null,cljs.core.drop.call(null, +c,a),d)],null):new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[parseInt(cljs.core.apply.call(null,cljs.core.str,a)),d],null)};cljs_time.internal.parse.parse_number.cljs$lang$maxFixedArity=3; +cljs_time.internal.parse.parse_period=function(a){switch(arguments.length){case 3:return cljs_time.internal.parse.parse_period.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);case 4:return cljs_time.internal.parse.parse_period.cljs$core$IFn$_invoke$arity$4(arguments[0],arguments[1],arguments[2],arguments[3]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_period.cljs$core$IFn$_invoke$arity$3=function(a,b,c){return cljs_time.internal.parse.parse_period.call(null,a,b,1,c)}; +cljs_time.internal.parse.parse_period.cljs$core$IFn$_invoke$arity$4=function(a,b,c,d){c=cljs_time.internal.parse.parse_number.call(null,a,c,d);a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[b,a],null),c],null)};cljs_time.internal.parse.parse_period.cljs$lang$maxFixedArity=4; +cljs_time.internal.parse.parse_year=function(a){switch(arguments.length){case 1:return cljs_time.internal.parse.parse_year.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.internal.parse.parse_year.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_year.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.internal.parse.parse_year.call(null,1,a)};cljs_time.internal.parse.parse_year.cljs$core$IFn$_invoke$arity$2=function(a,b){return function(c){return cljs_time.internal.parse.parse_period.call(null,c,new cljs.core.Keyword(null,"years","years",-1298579689),a,b)}};cljs_time.internal.parse.parse_year.cljs$lang$maxFixedArity=2; +cljs_time.internal.parse.parse_weekyear=function(a){switch(arguments.length){case 1:return cljs_time.internal.parse.parse_weekyear.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.internal.parse.parse_weekyear.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_weekyear.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.internal.parse.parse_year.call(null,1,a)};cljs_time.internal.parse.parse_weekyear.cljs$core$IFn$_invoke$arity$2=function(a,b){return function(c){return cljs_time.internal.parse.parse_period.call(null,c,new cljs.core.Keyword(null,"weekyear","weekyear",-74064500),a,b)}};cljs_time.internal.parse.parse_weekyear.cljs$lang$maxFixedArity=2; +cljs_time.internal.parse.parse_weekyear_week=function(a){switch(arguments.length){case 1:return cljs_time.internal.parse.parse_weekyear_week.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.internal.parse.parse_weekyear_week.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_weekyear_week.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.internal.parse.parse_year.call(null,1,a)};cljs_time.internal.parse.parse_weekyear_week.cljs$core$IFn$_invoke$arity$2=function(a,b){return function(c){return cljs_time.internal.parse.parse_period.call(null,c,new cljs.core.Keyword(null,"weekyear-week","weekyear-week",795291571),a,b)}};cljs_time.internal.parse.parse_weekyear_week.cljs$lang$maxFixedArity=2; +cljs_time.internal.parse.parse_month=function(a){switch(arguments.length){case 1:return cljs_time.internal.parse.parse_month.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.internal.parse.parse_month.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_month.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.internal.parse.parse_month.call(null,1,a)};cljs_time.internal.parse.parse_month.cljs$core$IFn$_invoke$arity$2=function(a,b){return function(c){return cljs_time.internal.parse.parse_period.call(null,c,new cljs.core.Keyword(null,"months","months",-45571637),a,b)}};cljs_time.internal.parse.parse_month.cljs$lang$maxFixedArity=2; +cljs_time.internal.parse.parse_day=function(a){switch(arguments.length){case 1:return cljs_time.internal.parse.parse_day.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.internal.parse.parse_day.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_day.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.internal.parse.parse_day.call(null,1,a)};cljs_time.internal.parse.parse_day.cljs$core$IFn$_invoke$arity$2=function(a,b){return function(c){return cljs_time.internal.parse.parse_period.call(null,c,new cljs.core.Keyword(null,"days","days",-1394072564),a,b)}};cljs_time.internal.parse.parse_day.cljs$lang$maxFixedArity=2; +cljs_time.internal.parse.parse_day_of_week=function(a){switch(arguments.length){case 1:return cljs_time.internal.parse.parse_day_of_week.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.internal.parse.parse_day_of_week.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_day_of_week.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.internal.parse.parse_day.call(null,1,a)};cljs_time.internal.parse.parse_day_of_week.cljs$core$IFn$_invoke$arity$2=function(a,b){return function(c){return cljs_time.internal.parse.parse_period.call(null,c,new cljs.core.Keyword(null,"day-of-week","day-of-week",1639326729),a,b)}};cljs_time.internal.parse.parse_day_of_week.cljs$lang$maxFixedArity=2; +cljs_time.internal.parse.parse_hours=function(a){switch(arguments.length){case 1:return cljs_time.internal.parse.parse_hours.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.internal.parse.parse_hours.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_hours.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.internal.parse.parse_hours.call(null,1,a)};cljs_time.internal.parse.parse_hours.cljs$core$IFn$_invoke$arity$2=function(a,b){return function(c){return cljs_time.internal.parse.parse_period.call(null,c,new cljs.core.Keyword(null,"hours","hours",58380855),a,b)}};cljs_time.internal.parse.parse_hours.cljs$lang$maxFixedArity=2; +cljs_time.internal.parse.parse_HOURS=function(a){switch(arguments.length){case 1:return cljs_time.internal.parse.parse_HOURS.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.internal.parse.parse_HOURS.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_HOURS.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.internal.parse.parse_HOURS.call(null,1,a)};cljs_time.internal.parse.parse_HOURS.cljs$core$IFn$_invoke$arity$2=function(a,b){return function(c){return cljs_time.internal.parse.parse_period.call(null,c,new cljs.core.Keyword(null,"HOURS","HOURS",-1611068963),a,b)}};cljs_time.internal.parse.parse_HOURS.cljs$lang$maxFixedArity=2; +cljs_time.internal.parse.parse_minutes=function(a){switch(arguments.length){case 1:return cljs_time.internal.parse.parse_minutes.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.internal.parse.parse_minutes.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_minutes.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.internal.parse.parse_minutes.call(null,1,a)};cljs_time.internal.parse.parse_minutes.cljs$core$IFn$_invoke$arity$2=function(a,b){return function(c){return cljs_time.internal.parse.parse_period.call(null,c,new cljs.core.Keyword(null,"minutes","minutes",1319166394),a,b)}};cljs_time.internal.parse.parse_minutes.cljs$lang$maxFixedArity=2; +cljs_time.internal.parse.parse_seconds=function(a){switch(arguments.length){case 1:return cljs_time.internal.parse.parse_seconds.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.internal.parse.parse_seconds.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_seconds.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.internal.parse.parse_seconds.call(null,1,a)};cljs_time.internal.parse.parse_seconds.cljs$core$IFn$_invoke$arity$2=function(a,b){return function(c){return cljs_time.internal.parse.parse_period.call(null,c,new cljs.core.Keyword(null,"seconds","seconds",-445266194),a,b)}};cljs_time.internal.parse.parse_seconds.cljs$lang$maxFixedArity=2; +cljs_time.internal.parse.parse_millis=function(a){switch(arguments.length){case 1:return cljs_time.internal.parse.parse_millis.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs_time.internal.parse.parse_millis.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs_time.internal.parse.parse_millis.cljs$core$IFn$_invoke$arity$1=function(a){return cljs_time.internal.parse.parse_millis.call(null,1,a)};cljs_time.internal.parse.parse_millis.cljs$core$IFn$_invoke$arity$2=function(a,b){return function(c){return cljs_time.internal.parse.parse_period.call(null,c,new cljs.core.Keyword(null,"millis","millis",-1338288387),a,b)}};cljs_time.internal.parse.parse_millis.cljs$lang$maxFixedArity=2; +cljs_time.internal.parse.timezone_adj=function(a,b,c){b=parseInt(b,10);c=parseInt(c,10);c=60*b+c;a=cljs.core._EQ_.call(null,a,"+")?cljs.core._:cljs.core._PLUS_;return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"timezone","timezone",1831928099),new goog.date.Interval(goog.date.Interval.MINUTES,a.call(null,c))],null)}; +cljs_time.internal.parse.parse_timezone=function(a){return function(b){var c=cljs.core.seq.call(null,b),d=cljs.core.first.call(null,c),e=cljs.core.next.call(null,c);c=function(){return cljs.core.ex_info.call(null,["Invalid timezone format: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(b)].join(""),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"parse-error","parse-error",255902478)],null))};var f=function(a){var b=clojure.string.join.call(null, +cljs.core.take.call(null,4,e)),c=cljs.core.re_find.call(null,/^(\d{2})(\d{2})/,b);return cljs.core.truth_(c)?(cljs.core.nth.call(null,c,0,null),b=cljs.core.nth.call(null,c,1,null),c=cljs.core.nth.call(null,c,2,null),new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[cljs_time.internal.parse.timezone_adj.call(null,a,b,c),cljs.core.drop.call(null,4,e)],null)):null},g=function(a){var b=clojure.string.join.call(null,cljs.core.take.call(null,5,e)),c=cljs.core.re_find.call(null, +/^(\d{2}):(\d{2})/,b);return cljs.core.truth_(c)?(cljs.core.nth.call(null,c,0,null),b=cljs.core.nth.call(null,c,1,null),c=cljs.core.nth.call(null,c,2,null),new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[cljs_time.internal.parse.timezone_adj.call(null,a,b,c),cljs.core.drop.call(null,5,e)],null)):null};if(cljs.core.truth_((new cljs.core.PersistentHashSet(null,new cljs.core.PersistentArrayMap(null,2,["+",null,"-",null],null),null)).call(null,d))){var h=a instanceof cljs.core.Keyword? +a.fqn:null;switch(h){case "dddd":f=f.call(null,d);if(cljs.core.truth_(f))return f;d=g.call(null,d);if(cljs.core.truth_(d))return d;throw c.call(null);case "long":f=f.call(null,d);if(cljs.core.truth_(f))return f;d=g.call(null,d);if(cljs.core.truth_(d))return d;throw c.call(null);default:throw Error(["No matching clause: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(h)].join(""));}}else{if(cljs.core._EQ_.call(null,d,"Z"))return new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE, +[new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"timezone","timezone",1831928099),cljs_time.internal.parse.timezone_adj.call(null,cljs.core._PLUS_,"0","0")],null)],null);switch(a instanceof cljs.core.Keyword?a.fqn:null){case "abbr":d=cljs.core.take.call(null,3,b);d=cljs_time.internal.parse.read_while.call(null,function(a){return cljs.core.re_find.call(null,/[A-Z]/,a)},d);g=cljs.core.nth.call(null,d,0,null);cljs.core.nth.call(null,d,1,null); +if(cljs.core._EQ_.call(null,cljs.core.count.call(null,g),3))return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"timezone","timezone",1831928099),clojure.string.join.call(null,g)],null),cljs.core.drop.call(null,3,b)],null);throw c.call(null);case "full":throw cljs.core.ex_info.call(null,["Cannot parse long form timezone:",cljs.core.str.cljs$core$IFn$_invoke$arity$1(b)].join(""), +new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"parse-error","parse-error",255902478)],null));default:throw c.call(null);}}}}; +cljs_time.internal.parse.parse_meridiem=function(){return function(a){var b=cljs.core.split_at.call(null,2,a),c=cljs.core.nth.call(null,b,0,null);a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);b=cljs.core.nth.call(null,b,1,null);var d=[cljs.core.str.cljs$core$IFn$_invoke$arity$1(a),cljs.core.str.cljs$core$IFn$_invoke$arity$1(c)].join("");if(cljs.core.truth_((new cljs.core.PersistentHashSet(null,new cljs.core.PersistentArrayMap(null,4,["AM",null,"am",null,"pm",null,"PM",null], +null),null)).call(null,d)))a=new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[d,b],null);else if(cljs.core.truth_((new cljs.core.PersistentHashSet(null,new cljs.core.PersistentArrayMap(null,2,["a",null,"p",null],null),null)).call(null,a)))a=new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[(new cljs.core.PersistentArrayMap(null,2,["a","am","p","pm"],null)).call(null,a),cljs.core.cons.call(null,c,b)],null);else if(cljs.core.truth_((new cljs.core.PersistentHashSet(null, +new cljs.core.PersistentArrayMap(null,2,["A",null,"P",null],null),null)).call(null,a)))a=new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[(new cljs.core.PersistentArrayMap(null,2,["A","am","P","pm"],null)).call(null,a),cljs.core.cons.call(null,c,b)],null);else throw cljs.core.ex_info.call(null,["Invalid meridiem format: ",d].join(""),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"parse-error", +"parse-error",255902478)],null));c=a;a=cljs.core.nth.call(null,c,0,null);c=cljs.core.nth.call(null,c,1,null);return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"meridiem","meridiem",1668960617),cljs.core.keyword.call(null,a)],null),clojure.string.join.call(null,c)],null)}}; +cljs_time.internal.parse.parse_period_name=function(a,b,c,d){c=cljs.core.concat.call(null,c,cljs.core.map.call(null,function(a){return cljs.core.subs.call(null,a,0,3)},c));var e=cljs.core.first.call(null,cljs.core.remove.call(null,cljs.core.comp.call(null,cljs.core.partial.call(null,cljs.core._EQ_,a),cljs.core.second),cljs.core.map.call(null,function(b){return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[b,cljs_time.internal.parse.replace.call(null,a,cljs.core.re_pattern.call(null, +["^",cljs.core.str.cljs$core$IFn$_invoke$arity$1(b)].join("")),"")],null)},c)));d=cljs.core.nth.call(null,e,0,null);e=cljs.core.nth.call(null,e,1,null);if(cljs.core.truth_(d))return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[b,cljs.core.mod.call(null,cljs_time.internal.core.index_of.call(null,c,d),12)],null),e],null);throw cljs.core.ex_info.call(null,["Could not parse ",cljs.core.name.call(null, +b)," name"].join(""),new cljs.core.PersistentArrayMap(null,4,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"parse-error","parse-error",255902478),new cljs.core.Keyword(null,"sub-type","sub-type",-997954412),new cljs.core.Keyword(null,"period-match-erroro","period-match-erroro",1058444722),new cljs.core.Keyword(null,"period","period",-352129191),b,new cljs.core.Keyword(null,"in","in",-1531184865),e],null));}; +cljs_time.internal.parse.parse_month_name=function(a){return function(b){return cljs.core.update_in.call(null,cljs_time.internal.parse.parse_period_name.call(null,b,new cljs.core.Keyword(null,"months","months",-45571637),cljs_time.internal.core.months,a),new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[0,1],null),cljs.core.inc)}}; +cljs_time.internal.parse.parse_day_name=function(a){return function(b){return cljs_time.internal.parse.parse_period_name.call(null,b,new cljs.core.Keyword(null,"days","days",-1394072564),cljs_time.internal.core.days,a)}}; +cljs_time.internal.parse.parse_quoted=function(a){var b=cljs.core.re_pattern.call(null,cljs.core.apply.call(null,cljs.core.str,"^",a));return function(c){c=clojure.string.join.call(null,c);var d=cljs_time.internal.parse.replace.call(null,c,b,"");if(cljs.core._EQ_.call(null,c,d))throw cljs.core.ex_info.call(null,"Quoted text not found",new cljs.core.PersistentArrayMap(null,2,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"parse-error","parse-error",255902478),new cljs.core.Keyword(null, +"where","where",-2044795965),new cljs.core.Keyword(null,"parse-quoted","parse-quoted",1180570118)],null));return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"quoted","quoted",2117344952),a],null),d],null)}}; +cljs_time.internal.parse.parse_ordinal_suffix=function(){return function(a){var b=cljs_time.internal.parse.parse_match.call(null,a,new cljs.core.Keyword(null,"ordinal-suffix","ordinal-suffix",-1311843199),"st");if(cljs.core.truth_(b))return b;b=cljs_time.internal.parse.parse_match.call(null,a,new cljs.core.Keyword(null,"ordinal-suffix","ordinal-suffix",-1311843199),"nd");if(cljs.core.truth_(b))return b;b=cljs_time.internal.parse.parse_match.call(null,a,new cljs.core.Keyword(null,"ordinal-suffix", +"ordinal-suffix",-1311843199),"rd");return cljs.core.truth_(b)?b:cljs_time.internal.parse.parse_match.call(null,a,new cljs.core.Keyword(null,"ordinal-suffix","ordinal-suffix",-1311843199),"th")}}; +cljs_time.internal.parse.lookup=function(a){var b=cljs.core.nth.call(null,a,0,null);a=cljs.core.nth.call(null,a,1,null);if(cljs.core._EQ_.call(null,b,new cljs.core.Keyword(null,"token","token",-1211463215)))switch(a){case "S":return cljs_time.internal.parse.parse_millis.call(null,1,2);case "SSS":return cljs_time.internal.parse.parse_millis.call(null,3,3);case "s":return cljs_time.internal.parse.parse_seconds.call(null,1,2);case "ss":return cljs_time.internal.parse.parse_seconds.call(null,2,2);case "m":return cljs_time.internal.parse.parse_minutes.call(null, +1,2);case "mm":return cljs_time.internal.parse.parse_minutes.call(null,2,2);case "h":return cljs_time.internal.parse.parse_hours.call(null,1,2);case "hh":return cljs_time.internal.parse.parse_hours.call(null,2,2);case "H":return cljs_time.internal.parse.parse_HOURS.call(null,1,2);case "HH":return cljs_time.internal.parse.parse_HOURS.call(null,2,2);case "d":return cljs_time.internal.parse.parse_day.call(null,1,2);case "dd":return cljs_time.internal.parse.parse_day.call(null,2,2);case "D":return cljs_time.internal.parse.parse_day.call(null, +1,3);case "DD":return cljs_time.internal.parse.parse_day.call(null,2,3);case "DDD":return cljs_time.internal.parse.parse_day.call(null,3,3);case "M":return cljs_time.internal.parse.parse_month.call(null,1,2);case "MM":return cljs_time.internal.parse.parse_month.call(null,1,2);case "MMM":return cljs_time.internal.parse.parse_month_name.call(null,!0);case "MMMM":return cljs_time.internal.parse.parse_month_name.call(null,!1);case "y":return cljs_time.internal.parse.parse_year.call(null,1,4);case "yy":return cljs_time.internal.parse.parse_year.call(null, +2,2);case "yyyy":return cljs_time.internal.parse.parse_year.call(null,4,4);case "Y":return cljs_time.internal.parse.parse_year.call(null,1,4);case "YY":return cljs_time.internal.parse.parse_year.call(null,2,2);case "YYYY":return cljs_time.internal.parse.parse_year.call(null,4,4);case "x":return cljs_time.internal.parse.parse_weekyear.call(null,1,4);case "xx":return cljs_time.internal.parse.parse_weekyear.call(null,2,2);case "xxxx":return cljs_time.internal.parse.parse_weekyear.call(null,4,4);case "w":return cljs_time.internal.parse.parse_weekyear_week.call(null, +1,2);case "ww":return cljs_time.internal.parse.parse_weekyear_week.call(null,2,2);case "E":return cljs_time.internal.parse.parse_day_name.call(null,!0);case "EEE":return cljs_time.internal.parse.parse_day_name.call(null,!0);case "EEEE":return cljs_time.internal.parse.parse_day_name.call(null,!1);case "e":return cljs_time.internal.parse.parse_day_of_week.call(null,1,2);case "a":return cljs_time.internal.parse.parse_meridiem.call(null);case "A":return cljs_time.internal.parse.parse_meridiem.call(null); +case "Z":return cljs_time.internal.parse.parse_timezone.call(null,new cljs.core.Keyword(null,"dddd","dddd",217016228));case "ZZ":return cljs_time.internal.parse.parse_timezone.call(null,new cljs.core.Keyword(null,"long","long",-171452093));case "ZZZ":return cljs_time.internal.parse.parse_timezone.call(null,new cljs.core.Keyword(null,"abbr","abbr",2088591884));case "ZZZZ":return cljs_time.internal.parse.parse_timezone.call(null,new cljs.core.Keyword(null,"abbr","abbr",2088591884));case "z":return cljs_time.internal.parse.parse_timezone.call(null, +new cljs.core.Keyword(null,"abbr","abbr",2088591884));case "zz":return cljs_time.internal.parse.parse_timezone.call(null,new cljs.core.Keyword(null,"abbr","abbr",2088591884));case "zzz":return cljs_time.internal.parse.parse_timezone.call(null,new cljs.core.Keyword(null,"abbr","abbr",2088591884));case "zzzz":return cljs_time.internal.parse.parse_timezone.call(null,new cljs.core.Keyword(null,"full","full",436801220));case "o":return cljs_time.internal.parse.parse_ordinal_suffix.call(null);default:throw cljs.core.ex_info.call(null, +["Illegal pattern component: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(a)].join(""),new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"illegal-pattern","illegal-pattern",-1810990520)],null));}else return cljs_time.internal.parse.parse_quoted.call(null,a)}; +cljs_time.internal.parse.parse=function(a,b){a=cljs.core.map.call(null,cljs_time.internal.parse.lookup,cljs_time.internal.parse.read_pattern.call(null,a));for(var c=cljs.core.seq.call(null,a),d=cljs.core.first.call(null,c),e=cljs.core.next.call(null,c),f=cljs.core.PersistentVector.EMPTY,g=b,h=a,k=f;;){var l=g,m=h,n=cljs.core.seq.call(null,m),p=cljs.core.first.call(null,n),q=cljs.core.next.call(null,n),r=p,t=q,u=k;g=function(a,c,d,e,f,g,h,k,l,m,n,p,q,r,u,t,J,L,M,N){return function(){return cljs.core.ex_info.call(null, +["Invalid format: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(b)," is malformed at ",cljs.core.pr_str.call(null,e)].join(""),new cljs.core.PersistentArrayMap(null,2,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"parse-error","parse-error",255902478),new cljs.core.Keyword(null,"sub-type","sub-type",-997954412),new cljs.core.Keyword(null,"invalid-format","invalid-format",-72676108)],null))}}(g,h,k,l,m,n,p,q,r,t,u,b,a,a,c,d,e,d,e,f);if(cljs.core.seq.call(null,l)){if(null== +r)throw g.call(null);h=r.call(null,l);g=cljs.core.nth.call(null,h,0,null);h=cljs.core.nth.call(null,h,1,null);u=cljs.core.conj.call(null,u,g);g=h;h=t;k=u}else{if(cljs.core.truth_(r))throw g.call(null);return u}}};cljs_time.internal.parse.infer_years=function(a,b){var c=(new goog.date.Date).getYear(),d=c-30;c-=cljs.core.mod.call(null,c,100);a=cljs.core.truth_(a)?a:cljs.core.truth_(b)?b:0;return ad?"AM":"PM":12>d?"am":"pm";return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[[cljs.core.str.cljs$core$IFn$_invoke$arity$1(b),d].join(""),c],null)}}; +cljs_time.internal.unparse.unparse_timezone=function(){return function(a,b){return b instanceof goog.date.UtcDateTime?new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[[cljs.core.str.cljs$core$IFn$_invoke$arity$1(a),cljs.core.str.cljs$core$IFn$_invoke$arity$1(b.getTimezoneOffsetString())].join(""),b],null):new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[a,b],null)}}; +cljs_time.internal.unparse.unparse_ordinal_suffix=function(a){return function(b,c){var d=a.call(null,c),e=function(){switch(d){case 1:return"st";case 2:return"nd";case 3:return"rd";case 21:return"st";case 22:return"nd";case 23:return"rd";case 31:return"st";default:return"th"}}();return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[[cljs.core.str.cljs$core$IFn$_invoke$arity$1(b),cljs.core.str.cljs$core$IFn$_invoke$arity$1(e)].join(""),c],null)}}; +cljs_time.internal.unparse.lookup=function(a){var b=cljs.core.nth.call(null,a,0,null);a=cljs.core.nth.call(null,a,1,null);if(cljs.core._EQ_.call(null,b,new cljs.core.Keyword(null,"token","token",-1211463215)))switch(a){case "S":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"millis","millis",-1338288387),1,2],null);case "SSS":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null, +"millis","millis",-1338288387),3,3],null);case "s":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"seconds","seconds",-445266194),1,2],null);case "ss":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"seconds","seconds",-445266194),2,2],null);case "m":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"minutes", +"minutes",1319166394),1,2],null);case "mm":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"minutes","minutes",1319166394),2,2],null);case "h":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"hours","hours",58380855),1,2],null);case "hh":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"hours","hours",58380855), +2,2],null);case "H":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"HOURS","HOURS",-1611068963),1,2],null);case "HH":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"HOURS","HOURS",-1611068963),2,2],null);case "d":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"day","day",-274800446),1,2],null);case "dd":return new cljs.core.PersistentVector(null, +3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"day","day",-274800446),2,2],null);case "D":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"day","day",-274800446),1,3],null);case "DD":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"day","day",-274800446),2,3],null);case "DDD":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE, +[new cljs.core.Keyword(null,"day","day",-274800446),3,3],null);case "M":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"month","month",-1960248533),1,2],null);case "MM":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"month","month",-1960248533),2,2],null);case "MMM":return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null, +"month-name","month-name",-605509534),!0],null);case "MMMM":return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"month-name","month-name",-605509534),!1],null);case "y":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"year","year",335913393),1,4],null);case "yy":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null, +"year","year",335913393),2,2],null);case "yyyy":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"year","year",335913393),4,4],null);case "Y":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"year","year",335913393),1,4],null);case "YY":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"year","year",335913393), +2,2],null);case "YYYY":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"year","year",335913393),4,4],null);case "x":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"weekyear","weekyear",-74064500),1,4],null);case "xx":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"weekyear","weekyear",-74064500),2,2],null); +case "xxxx":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"weekyear","weekyear",-74064500),4,4],null);case "w":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"weekyear-week","weekyear-week",795291571),1,2],null);case "ww":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"weekyear-week","weekyear-week",795291571), +2,2],null);case "e":return new cljs.core.PersistentVector(null,3,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"day-of-week","day-of-week",1639326729),1,1],null);case "E":return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"day-name","day-name",1806125744),!0],null);case "EEE":return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"day-name","day-name",1806125744), +!0],null);case "EEEE":return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"day-name","day-name",1806125744),!1],null);case "a":return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"meridiem","meridiem",1668960617),!1],null);case "A":return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"meridiem","meridiem",1668960617), +!0],null);case "Z":return new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"timezone","timezone",1831928099)],null);case "ZZ":return new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"timezone","timezone",1831928099)],null);case "o":return new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"ordinal-suffix","ordinal-suffix",-1311843199)], +null);default:throw Error(["No matching clause: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(a)].join(""));}else return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"quoted","quoted",2117344952),a],null)}; +cljs_time.internal.unparse.lookup_getter=function(a){a=a instanceof cljs.core.Keyword?a.fqn:null;switch(a){case "millis":return function(a){return a.getMilliseconds()};case "seconds":return function(a){return a.getSeconds()};case "minutes":return function(a){return a.getMinutes()};case "hours":return function(a){return a.getHours()};case "HOURS":return function(a){return a.getHours()};case "day":return function(a){return a.getDate()};case "month":return function(a){return a.getMonth()};case "year":return function(a){return a.getYear()}; +default:throw Error(["No matching clause: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(a)].join(""));}}; +cljs_time.internal.unparse.lookup_fn=function(a,b,c){var d=cljs.core.seq.call(null,c);c=cljs.core.first.call(null,d);d=cljs.core.next.call(null,d);c=c instanceof cljs.core.Keyword?c.fqn:null;switch(c){case "millis":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_millis,d);case "seconds":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_seconds,d);case "minutes":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_minutes,d);case "hours":return cljs.core.apply.call(null, +cljs_time.internal.unparse.unparse_hours,d);case "HOURS":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_HOURS,d);case "day":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_day,d);case "month":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_month,d);case "month-name":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_month_name,d);case "year":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_year,d);case "weekyear":return cljs.core.apply.call(null, +cljs_time.internal.unparse.unparse_weekyear,d);case "weekyear-week":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_weekyear_week,d);case "day-name":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_day_name,d);case "day-of-week":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_day_of_week,d);case "meridiem":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_meridiem,d);case "timezone":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_timezone, +d);case "ordinal-suffix":return a=a.call(null,b-1),a=cljs.core.nth.call(null,a,0,null),cljs_time.internal.unparse.unparse_ordinal_suffix.call(null,cljs_time.internal.unparse.lookup_getter.call(null,a));case "quoted":return cljs.core.apply.call(null,cljs_time.internal.unparse.unparse_quoted,d);default:throw Error(["No matching clause: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(c)].join(""));}}; +cljs_time.internal.unparse.unparse=function(a,b){a=cljs.core.mapv.call(null,cljs_time.internal.unparse.lookup,cljs_time.internal.parse.read_pattern.call(null,a));var c=cljs.core.map_indexed.call(null,cljs.core.partial.call(null,cljs_time.internal.unparse.lookup_fn,a),a);a=cljs.core.seq.call(null,c);cljs.core.first.call(null,a);cljs.core.next.call(null,a);a=b;for(b="";;){var d=cljs.core.seq.call(null,c);c=cljs.core.first.call(null,d);var e=cljs.core.next.call(null,d);d=c;c=e;if(null==d)return b;a= +d.call(null,b,a);b=cljs.core.nth.call(null,a,0,null);a=cljs.core.nth.call(null,a,1,null)}};goog.i18n.currency={};goog.i18n.currency.PRECISION_MASK_=7;goog.i18n.currency.POSITION_FLAG_=16;goog.i18n.currency.SPACE_FLAG_=32;goog.i18n.currency.tier2Enabled_=!1;goog.i18n.currency.isAvailable=function(a){return a in goog.i18n.currency.CurrencyInfo};goog.i18n.currency.addTier2Support=function(){if(!goog.i18n.currency.tier2Enabled_){for(const a in goog.i18n.currency.CurrencyInfoTier2)goog.i18n.currency.CurrencyInfo[a]=goog.i18n.currency.CurrencyInfoTier2[a];goog.i18n.currency.tier2Enabled_=!0}}; +goog.i18n.currency.getGlobalCurrencyPattern=function(a){const b=goog.i18n.currency.CurrencyInfo[a],c=b[0];return a==b[1]?goog.i18n.currency.getCurrencyPattern_(c,b[1]):a+" "+goog.i18n.currency.getCurrencyPattern_(c,b[1])};goog.i18n.currency.getGlobalCurrencySign=function(a){const b=goog.i18n.currency.CurrencyInfo[a];return a==b[1]?a:a+" "+b[1]};goog.i18n.currency.getGlobalCurrencySignWithFallback=function(a){var b=goog.i18n.currency.CurrencyInfo[a];return b?a==b[1]?a:a+" "+b[1]:a}; +goog.i18n.currency.getLocalCurrencyPattern=function(a){a=goog.i18n.currency.CurrencyInfo[a];return goog.i18n.currency.getCurrencyPattern_(a[0],a[1])};goog.i18n.currency.getLocalCurrencySign=function(a){return goog.i18n.currency.CurrencyInfo[a][1]};goog.i18n.currency.getLocalCurrencySignWithFallback=function(a){return a in goog.i18n.currency.CurrencyInfo?goog.i18n.currency.CurrencyInfo[a][1]:a}; +goog.i18n.currency.getPortableCurrencyPattern=function(a){a=goog.i18n.currency.CurrencyInfo[a];return goog.i18n.currency.getCurrencyPattern_(a[0],a[2])};goog.i18n.currency.getPortableCurrencySign=function(a){return goog.i18n.currency.CurrencyInfo[a][2]};goog.i18n.currency.isValid=function(a){if(!a||3!==a.length)return!1;for(let b=0;3>b;b++){const c=a[b];if("A">c||"Z"c||"z"this.negativePrefix_.length?d=!1:this.positivePrefix_.length=p)m+=p,e=!0;else if(n==h.charAt(0)){if(c|| +d)break;m+=".";c=!0}else if(n==k.charAt(0)&&(" "!=k.charAt(0)||b[0]+1a||0==a&&0>1/a;b.push(d?this.negativePrefix_:this.positivePrefix_);isFinite(a)?(a=a*(d?-1:1)*this.multiplier_,this.useExponentialNotation_?this.subformatExponential_(a,b):this.subformatFixed_(a,this.minimumIntegerDigits_, +b)):b.push(this.getNumberFormatSymbols_().INFINITY);b.push(d?this.negativeSuffix_:this.positiveSuffix_);b.push(c.suffix);return b.join("")}; +goog.i18n.NumberFormat.prototype.roundNumber_=function(a){var b=goog.i18n.NumberFormat.decimalShift_,c=b(a,this.maximumFractionDigits_);0this.maximumFractionDigits_)throw Error("Min value must be less than max value");c||(c=[]);a=this.roundNumber_(a);var d=a.intValue,e=a.fracValue,f=0==d?0:this.intLog10_(d)+1,g=0b.length&&(b="1"+goog.string.repeat("0",this.maximumFractionDigits_-b.length)+b);for(e=b.length;"0"==b.charAt(e-1)&&e>a+1;)e--;for(d=1;da?(a=-a,b.push(this.getNumberFormatSymbols_().MINUS_SIGN)):this.useSignForPositiveExponent_&&b.push(this.getNumberFormatSymbols_().PLUS_SIGN);a=""+a;for(var c=this.getNumberFormatSymbols_().ZERO_DIGIT,d=a.length;dthis.minimumIntegerDigits_?(d=c%this.maximumIntegerDigits_,0>d&&(d=this.maximumIntegerDigits_+d),a=goog.i18n.NumberFormat.decimalShift_(a,d),c-=d,d=1):1>this.minimumIntegerDigits_? +(c++,a=goog.i18n.NumberFormat.decimalShift_(a,-1)):(c-=this.minimumIntegerDigits_-1,a=goog.i18n.NumberFormat.decimalShift_(a,this.minimumIntegerDigits_-1));this.subformatFixed_(a,d,b);this.addExponentPart_(c,b)}};goog.i18n.NumberFormat.prototype.getDigit_=function(a){a=a.charCodeAt(0);if(48<=a&&58>a)return a-48;var b=this.getNumberFormatSymbols_().ZERO_DIGIT.charCodeAt(0);return b<=a&&ac&&g++;break;case goog.i18n.NumberFormat.PATTERN_ZERO_DIGIT_:if(0c&&g++;break;case goog.i18n.NumberFormat.PATTERN_GROUPING_SEPARATOR_:0 +d+e||1>this.minExponentDigits_)throw Error('Malformed exponential pattern "'+a+'"');k=!1;break;default:b[0]--,k=!1}0==e&&0c&&0d+e)||0==g)throw Error('Malformed pattern "'+a+'"');a=d+e+f;this.maximumFractionDigits_=0<=c?a-c:0;0<=c&&(this.minimumFractionDigits_=d+e-c,0>this.minimumFractionDigits_&&(this.minimumFractionDigits_=0));this.minimumIntegerDigits_=(0<=c?c:a)-d;this.useExponentialNotation_&&(this.maximumIntegerDigits_=d+this.minimumIntegerDigits_, +0==this.maximumFractionDigits_&&0==this.minimumIntegerDigits_&&(this.minimumIntegerDigits_=1));this.groupingArray_.push(Math.max(0,g));this.decimalSeparatorAlwaysShown_=0==c||c==a};goog.i18n.NumberFormat.NULL_UNIT_={prefix:"",suffix:"",divisorBase:0}; +goog.i18n.NumberFormat.prototype.getUnitFor_=function(a,b){var c=this.compactStyle_==goog.i18n.NumberFormat.CompactStyle.SHORT?goog.i18n.CompactNumberFormatSymbols.COMPACT_DECIMAL_SHORT_PATTERN:goog.i18n.CompactNumberFormatSymbols.COMPACT_DECIMAL_LONG_PATTERN;null==c&&(c=goog.i18n.CompactNumberFormatSymbols.COMPACT_DECIMAL_SHORT_PATTERN);if(3>a)return goog.i18n.NumberFormat.NULL_UNIT_;var d=goog.i18n.NumberFormat.decimalShift_;a=Math.min(14,a);var e=c[d(1,a)];for(--a;!e&&3<=a;)e=c[d(1,a)],a--;if(!e)return goog.i18n.NumberFormat.NULL_UNIT_; +b=e[b];return b&&"0"!=b?(b=/([^0]*)(0+)(.*)/.exec(b))?{prefix:b[1],suffix:b[3],divisorBase:a+1-(b[2].length-1)}:goog.i18n.NumberFormat.NULL_UNIT_:goog.i18n.NumberFormat.NULL_UNIT_}; +goog.i18n.NumberFormat.prototype.getUnitAfterRounding_=function(a,b){if(this.compactStyle_==goog.i18n.NumberFormat.CompactStyle.NONE)return goog.i18n.NumberFormat.NULL_UNIT_;a=Math.abs(a);b=Math.abs(b);var c=this.pluralForm_(a),d=1>=a?0:this.intLog10_(a);c=this.getUnitFor_(d,c).divisorBase;b=goog.i18n.NumberFormat.decimalShift_(b,-c);b=this.roundNumber_(b);a=goog.i18n.NumberFormat.decimalShift_(a,-c);a=this.roundNumber_(a);b=this.pluralForm_(b.intValue+b.fracValue);return this.getUnitFor_(c+this.intLog10_(a.intValue), +b)};goog.i18n.NumberFormat.prototype.intLog10_=function(a){if(!isFinite(a))return 0=a%10||9==a%10)&&(10>a%100||19a%100||79a%100||99=c%10&&(12>c%100||14=a.f%10&&(12>a.f%100||14=a?goog.i18n.pluralRules.Keyword.ONE:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.csSelect_=function(a,b){const c=a|0;a=goog.i18n.pluralRules.get_vf_(a,b);return 1==c&&0==a.v?goog.i18n.pluralRules.Keyword.ONE:2<=c&&4>=c&&0==a.v?goog.i18n.pluralRules.Keyword.FEW:0!=a.v?goog.i18n.pluralRules.Keyword.MANY:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.plSelect_=function(a,b){const c=a|0;a=goog.i18n.pluralRules.get_vf_(a,b);return 1==c&&0==a.v?goog.i18n.pluralRules.Keyword.ONE:0==a.v&&2<=c%10&&4>=c%10&&(12>c%100||14=c%10||0==a.v&&5<=c%10&&9>=c%10||0==a.v&&12<=c%100&&14>=c%100?goog.i18n.pluralRules.Keyword.MANY:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.shiSelect_=function(a,b){return 0==(a|0)||1==a?goog.i18n.pluralRules.Keyword.ONE:2<=a&&10>=a?goog.i18n.pluralRules.Keyword.FEW:goog.i18n.pluralRules.Keyword.OTHER};goog.i18n.pluralRules.lvSelect_=function(a,b){b=goog.i18n.pluralRules.get_vf_(a,b);return 0==a%10||11<=a%100&&19>=a%100||2==b.v&&11<=b.f%100&&19>=b.f%100?goog.i18n.pluralRules.Keyword.ZERO:1==a%10&&11!=a%100||2==b.v&&1==b.f%10&&11!=b.f%100||2!=b.v&&1==b.f%10?goog.i18n.pluralRules.Keyword.ONE:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.iuSelect_=function(a,b){return 1==a?goog.i18n.pluralRules.Keyword.ONE:2==a?goog.i18n.pluralRules.Keyword.TWO:goog.i18n.pluralRules.Keyword.OTHER};goog.i18n.pluralRules.heSelect_=function(a,b){const c=a|0;b=goog.i18n.pluralRules.get_vf_(a,b);return 1==c&&0==b.v?goog.i18n.pluralRules.Keyword.ONE:2==c&&0==b.v?goog.i18n.pluralRules.Keyword.TWO:0==b.v&&(0>a||10=a%100?goog.i18n.pluralRules.Keyword.FEW:11<=a%100&&19>=a%100?goog.i18n.pluralRules.Keyword.MANY:goog.i18n.pluralRules.Keyword.OTHER};goog.i18n.pluralRules.siSelect_=function(a,b){const c=a|0;b=goog.i18n.pluralRules.get_vf_(a,b);return 0==a||1==a||0==c&&1==b.f?goog.i18n.pluralRules.Keyword.ONE:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.cySelect_=function(a,b){return 0==a?goog.i18n.pluralRules.Keyword.ZERO:1==a?goog.i18n.pluralRules.Keyword.ONE:2==a?goog.i18n.pluralRules.Keyword.TWO:3==a?goog.i18n.pluralRules.Keyword.FEW:6==a?goog.i18n.pluralRules.Keyword.MANY:goog.i18n.pluralRules.Keyword.OTHER};goog.i18n.pluralRules.daSelect_=function(a,b){const c=a|0;b=goog.i18n.pluralRules.get_vf_(a,b);b=goog.i18n.pluralRules.get_wt_(b.v,b.f);return 1==a||0!=b.t&&(0==c||1==c)?goog.i18n.pluralRules.Keyword.ONE:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.ruSelect_=function(a,b){const c=a|0;a=goog.i18n.pluralRules.get_vf_(a,b);return 0==a.v&&1==c%10&&11!=c%100?goog.i18n.pluralRules.Keyword.ONE:0==a.v&&2<=c%10&&4>=c%10&&(12>c%100||14=c%10||0==a.v&&11<=c%100&&14>=c%100?goog.i18n.pluralRules.Keyword.MANY:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.gvSelect_=function(a,b){const c=a|0;a=goog.i18n.pluralRules.get_vf_(a,b);return 0==a.v&&1==c%10?goog.i18n.pluralRules.Keyword.ONE:0==a.v&&2==c%10?goog.i18n.pluralRules.Keyword.TWO:0!=a.v||0!=c%100&&20!=c%100&&40!=c%100&&60!=c%100&&80!=c%100?0!=a.v?goog.i18n.pluralRules.Keyword.MANY:goog.i18n.pluralRules.Keyword.OTHER:goog.i18n.pluralRules.Keyword.FEW}; +goog.i18n.pluralRules.beSelect_=function(a,b){return 1==a%10&&11!=a%100?goog.i18n.pluralRules.Keyword.ONE:2<=a%10&&4>=a%10&&(12>a%100||14=a%10||11<=a%100&&14>=a%100?goog.i18n.pluralRules.Keyword.MANY:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.gaSelect_=function(a,b){return 1==a?goog.i18n.pluralRules.Keyword.ONE:2==a?goog.i18n.pluralRules.Keyword.TWO:3<=a&&6>=a?goog.i18n.pluralRules.Keyword.FEW:7<=a&&10>=a?goog.i18n.pluralRules.Keyword.MANY:goog.i18n.pluralRules.Keyword.OTHER};goog.i18n.pluralRules.esSelect_=function(a,b){return 1==a?goog.i18n.pluralRules.Keyword.ONE:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.dsbSelect_=function(a,b){const c=a|0;a=goog.i18n.pluralRules.get_vf_(a,b);return 0==a.v&&1==c%100||1==a.f%100?goog.i18n.pluralRules.Keyword.ONE:0==a.v&&2==c%100||2==a.f%100?goog.i18n.pluralRules.Keyword.TWO:0==a.v&&3<=c%100&&4>=c%100||3<=a.f%100&&4>=a.f%100?goog.i18n.pluralRules.Keyword.FEW:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.lagSelect_=function(a,b){b=a|0;return 0==a?goog.i18n.pluralRules.Keyword.ZERO:0!=b&&1!=b||0==a?goog.i18n.pluralRules.Keyword.OTHER:goog.i18n.pluralRules.Keyword.ONE};goog.i18n.pluralRules.mkSelect_=function(a,b){const c=a|0;a=goog.i18n.pluralRules.get_vf_(a,b);return 0==a.v&&1==c%10&&11!=c%100||1==a.f%10&&11!=a.f%100?goog.i18n.pluralRules.Keyword.ONE:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.isSelect_=function(a,b){const c=a|0;a=goog.i18n.pluralRules.get_vf_(a,b);a=goog.i18n.pluralRules.get_wt_(a.v,a.f);return 0==a.t&&1==c%10&&11!=c%100||0!=a.t?goog.i18n.pluralRules.Keyword.ONE:goog.i18n.pluralRules.Keyword.OTHER};goog.i18n.pluralRules.kshSelect_=function(a,b){return 0==a?goog.i18n.pluralRules.Keyword.ZERO:1==a?goog.i18n.pluralRules.Keyword.ONE:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.roSelect_=function(a,b){const c=a|0;b=goog.i18n.pluralRules.get_vf_(a,b);return 1==c&&0==b.v?goog.i18n.pluralRules.Keyword.ONE:0!=b.v||0==a||2<=a%100&&19>=a%100?goog.i18n.pluralRules.Keyword.FEW:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.arSelect_=function(a,b){return 0==a?goog.i18n.pluralRules.Keyword.ZERO:1==a?goog.i18n.pluralRules.Keyword.ONE:2==a?goog.i18n.pluralRules.Keyword.TWO:3<=a%100&&10>=a%100?goog.i18n.pluralRules.Keyword.FEW:11<=a%100&&99>=a%100?goog.i18n.pluralRules.Keyword.MANY:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.gdSelect_=function(a,b){return 1==a||11==a?goog.i18n.pluralRules.Keyword.ONE:2==a||12==a?goog.i18n.pluralRules.Keyword.TWO:3<=a&&10>=a||13<=a&&19>=a?goog.i18n.pluralRules.Keyword.FEW:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.slSelect_=function(a,b){const c=a|0;a=goog.i18n.pluralRules.get_vf_(a,b);return 0==a.v&&1==c%100?goog.i18n.pluralRules.Keyword.ONE:0==a.v&&2==c%100?goog.i18n.pluralRules.Keyword.TWO:0==a.v&&3<=c%100&&4>=c%100||0!=a.v?goog.i18n.pluralRules.Keyword.FEW:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.ltSelect_=function(a,b){b=goog.i18n.pluralRules.get_vf_(a,b);return 1==a%10&&(11>a%100||19=a%10&&(11>a%100||19=a||11<=a&&99>=a?goog.i18n.pluralRules.Keyword.ONE:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.enSelect_=function(a,b){const c=a|0;a=goog.i18n.pluralRules.get_vf_(a,b);return 1==c&&0==a.v?goog.i18n.pluralRules.Keyword.ONE:goog.i18n.pluralRules.Keyword.OTHER}; +goog.i18n.pluralRules.kwSelect_=function(a,b){return 0==a?goog.i18n.pluralRules.Keyword.ZERO:1==a?goog.i18n.pluralRules.Keyword.ONE:2==a%100||22==a%100||42==a%100||62==a%100||82==a%100||0==a%1E3&&(1E3<=a%1E5&&2E4>=a%1E5||4E4==a%1E5||6E4==a%1E5||8E4==a%1E5)||0!=a&&1E5==a%1E6?goog.i18n.pluralRules.Keyword.TWO:3==a%100||23==a%100||43==a%100||63==a%100||83==a%100?goog.i18n.pluralRules.Keyword.FEW:1==a||1!=a%100&&21!=a%100&&41!=a%100&&61!=a%100&&81!=a%100?goog.i18n.pluralRules.Keyword.OTHER:goog.i18n.pluralRules.Keyword.MANY}; +goog.i18n.pluralRules.akSelect_=function(a,b){return 0<=a&&1>=a?goog.i18n.pluralRules.Keyword.ONE:goog.i18n.pluralRules.Keyword.OTHER};goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_;"af"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"am"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.hiSelect_);"ar"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.arSelect_); +if("ar_DZ"==goog.LOCALE||"ar-DZ"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.arSelect_;if("ar_EG"==goog.LOCALE||"ar-EG"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.arSelect_;"az"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"be"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.beSelect_);"bg"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_); +"bn"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.hiSelect_);"br"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.brSelect_);"bs"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.srSelect_);"ca"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_);"chr"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"cs"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.csSelect_); +"cy"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.cySelect_);"da"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.daSelect_);"de"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_);if("de_AT"==goog.LOCALE||"de-AT"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_;if("de_CH"==goog.LOCALE||"de-CH"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_; +"el"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"en"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_);if("en_AU"==goog.LOCALE||"en-AU"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_;if("en_CA"==goog.LOCALE||"en-CA"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_;if("en_GB"==goog.LOCALE||"en-GB"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_; +if("en_IE"==goog.LOCALE||"en-IE"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_;if("en_IN"==goog.LOCALE||"en-IN"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_;if("en_SG"==goog.LOCALE||"en-SG"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_;if("en_US"==goog.LOCALE||"en-US"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_; +if("en_ZA"==goog.LOCALE||"en-ZA"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_;"es"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);if("es_419"==goog.LOCALE||"es-419"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_;if("es_ES"==goog.LOCALE||"es-ES"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_;if("es_MX"==goog.LOCALE||"es-MX"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_; +if("es_US"==goog.LOCALE||"es-US"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_;"et"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_);"eu"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"fa"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.hiSelect_);"fi"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_);"fil"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.filSelect_); +"fr"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.frSelect_);if("fr_CA"==goog.LOCALE||"fr-CA"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.frSelect_;"ga"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.gaSelect_);"gl"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_);"gsw"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"gu"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.hiSelect_); +"haw"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"he"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.heSelect_);"hi"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.hiSelect_);"hr"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.srSelect_);"hu"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"hy"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.frSelect_); +"id"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_);"in"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_);"is"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.isSelect_);"it"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_);"iw"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.heSelect_);"ja"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_); +"ka"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"kk"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"km"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_);"kn"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.hiSelect_);"ko"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_);"ky"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_); +"ln"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.akSelect_);"lo"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_);"lt"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.ltSelect_);"lv"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.lvSelect_);"mk"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.mkSelect_);"ml"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_); +"mn"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"mo"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.roSelect_);"mr"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"ms"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_);"mt"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.mtSelect_);"my"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_); +"nb"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"ne"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"nl"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_);"no"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);if("no_NO"==goog.LOCALE||"no-NO"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_;"or"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_); +"pa"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.akSelect_);"pl"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.plSelect_);"pt"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.ptSelect_);if("pt_BR"==goog.LOCALE||"pt-BR"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.ptSelect_;if("pt_PT"==goog.LOCALE||"pt-PT"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_; +"ro"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.roSelect_);"ru"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.ruSelect_);"sh"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.srSelect_);"si"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.siSelect_);"sk"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.csSelect_);"sl"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.slSelect_); +"sq"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"sr"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.srSelect_);if("sr_Latn"==goog.LOCALE||"sr-Latn"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.srSelect_;"sv"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_);"sw"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_);"ta"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_); +"te"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"th"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_);"tl"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.filSelect_);"tr"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"uk"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.ruSelect_);"ur"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.enSelect_); +"uz"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.esSelect_);"vi"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_);"zh"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_);if("zh_CN"==goog.LOCALE||"zh-CN"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_;if("zh_HK"==goog.LOCALE||"zh-HK"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_; +if("zh_TW"==goog.LOCALE||"zh-TW"==goog.LOCALE)goog.i18n.pluralRules.select=goog.i18n.pluralRules.defaultSelect_;"zu"==goog.LOCALE&&(goog.i18n.pluralRules.select=goog.i18n.pluralRules.hiSelect_);goog.i18n.MessageFormat=function(a){this.pattern_=a;this.parsedPattern_=this.literals_=this.initialLiterals_=null;this.numberFormatter_=goog.i18n.MessageFormat.getNumberFormatter_()};goog.i18n.MessageFormat.numberFormatterSymbols_=null;goog.i18n.MessageFormat.compactNumberFormatterSymbols_=null;goog.i18n.MessageFormat.numberFormatter_=null;goog.i18n.MessageFormat.LITERAL_PLACEHOLDER_="﷟_";goog.i18n.MessageFormat.Element_={STRING:0,BLOCK:1}; +goog.i18n.MessageFormat.BlockType_={PLURAL:0,ORDINAL:1,SELECT:2,SIMPLE:3,STRING:4,UNKNOWN:5};goog.i18n.MessageFormat.OTHER_="other";goog.i18n.MessageFormat.REGEX_LITERAL_=/'([{}#].*?)'/g;goog.i18n.MessageFormat.REGEX_DOUBLE_APOSTROPHE_=/''/g; +goog.i18n.MessageFormat.getNumberFormatter_=function(){var a=goog.i18n.NumberFormatSymbols,b=goog.i18n.CompactNumberFormatSymbols;if(goog.i18n.MessageFormat.numberFormatterSymbols_!==a||goog.i18n.MessageFormat.compactNumberFormatterSymbols_!==b)goog.i18n.MessageFormat.numberFormatterSymbols_=a,goog.i18n.MessageFormat.compactNumberFormatterSymbols_=b,goog.i18n.MessageFormat.numberFormatter_=new goog.i18n.NumberFormat(goog.i18n.NumberFormat.Format.DECIMAL);return goog.i18n.MessageFormat.numberFormatter_}; +goog.i18n.MessageFormat.prototype.format=function(a){return this.format_(a,!1)};goog.i18n.MessageFormat.prototype.formatIgnoringPound=function(a){return this.format_(a,!0)}; +goog.i18n.MessageFormat.prototype.format_=function(a,b){this.init_();if(!this.parsedPattern_||0==this.parsedPattern_.length)return"";this.literals_=goog.array.clone(this.initialLiterals_);var c=[];this.formatBlock_(this.parsedPattern_,a,b,c);a=c.join("");for(b||goog.asserts.assert(-1==a.search("#"),"Not all # were replaced.");0=a)return d;a=cljs.core.second.call(null,cljs.core.first.call(null,cljs.core.rsubseq.call(null,f,cljs.core._LT_,e*a)));return cljs.core.truth_(a)? +a:d}}; +bigml.dixie.flatline.population.cdf=function(a){var b=null!=a&&(a.cljs$lang$protocol_mask$partition0$&64||cljs.core.PROTOCOL_SENTINEL===a.cljs$core$ISeq$)?cljs.core.apply.call(null,cljs.core.hash_map,a):a;a=cljs.core.get.call(null,b,new cljs.core.Keyword(null,"bins","bins",1670395210));b=cljs.core.get.call(null,b,new cljs.core.Keyword(null,"counts","counts",234305892));b=cljs.core.sort_by.call(null,cljs.core.first,cljs.core._LT_,cljs.core.empty_QMARK_.call(null,b)?a:b);a=cljs.core.map.call(null,cljs.core.first, +b);var c=cljs.core.last.call(null,a),d=cljs.core.first.call(null,a);b=cljs.core.map.call(null,cljs.core.second,b);var e=cljs.core.reduce.call(null,cljs.core._PLUS_,b);b=cljs.core.map.call(null,function(a){return a/e},b);b=cljs.core.reductions.call(null,cljs.core._PLUS_,b);var f=cljs.core.into.call(null,cljs.core.sorted_map.call(null),cljs.core.map.call(null,cljs.core.vector,a,b));return function(a){if(a>=c)return 1;if(af?d:c.call(null,b)}}; +bigml.dixie.flatline.args.choose_type=function(a){return cljs.core.juxt.call(null,bigml.dixie.flatline.types.fn_type,bigml.dixie.flatline.types.fn_desc).call(null,function(){var b=cljs.core.some.call(null,function(a){return bigml.dixie.flatline.types.var_fn_QMARK_.call(null,a)?a:null},a);return cljs.core.truth_(b)?b:cljs.core.first.call(null,a)}())}; +bigml.dixie.flatline.args.check_args=function(a){for(var b=[],c=arguments.length,d=0;;)if(d>16)+(b>>16)+(c>>16)<<16|c&65535}function c(a,b){return a>>>b|a<<32-b}a=function(a){a=a.replace(/\r\n/g,"\n");for(var b="",c=0;cd?b+=String.fromCharCode(d):(127d?b+=String.fromCharCode(d>>6|192):(b+=String.fromCharCode(d>>12|224),b+=String.fromCharCode(d>>6&63|128)),b+=String.fromCharCode(d&63|128))}return b}(a);return function(a){for(var b="",c=0;c<4*a.length;c++)b+= +"0123456789abcdef".charAt(a[c>>2]>>8*(3-c%4)+4&15)+"0123456789abcdef".charAt(a[c>>2]>>8*(3-c%4)&15);return b}(function(a,e){var d=[1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372, +1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298],g=[1779033703,3144134277,1013904242,2773480762,1359893119,2600822924,528734635,1541459225],h=Array(64),k,l;a[e>>5]|=128<<24-e%32;a[(e+64>>9<<4)+15]=e;for(k=0;kl;l++){if(16>l)h[l]=a[l+k];else{var v=l;var w=h[l-2];w=c(w,17)^c(w,19)^w>>>10;w=b(w,h[l-7]);var x=h[l-15];x=c(x,7)^c(x,18)^x>>>3;h[v]=b(b(w,x),h[l-16])}v=q;v=c(v,6)^c(v,11)^c(v,25);v=b(b(b(b(u,v),q&r^~q&t),d[l]),h[l]);u=e;u=c(u,2)^c(u,13)^c(u,22);w=b(u,e&m^e&n^m&n);u=t;t=r;r=q;q=b(p,v);p=n;n=m;m=e;e=b(v,w)}g[0]=b(e,g[0]);g[1]=b(m,g[1]);g[2]=b(n,g[2]);g[3]=b(p,g[3]);g[4]=b(q,g[4]);g[5]=b(r,g[5]);g[6]=b(t,g[6]);g[7]=b(u, +g[7])}return g}(function(a){for(var b=[],c=0;c<8*a.length;c+=8)b[c>>5]|=(a.charCodeAt(c/8)&255)<<24-c%32;return b}(a),8*a.length))};bigml.dixie.flatline.text={};bigml.dixie.flatline.text.term_stream=function(a){return"string"===typeof a?cljs.core.keep.call(null,cljs.core.not_empty,clojure.string.split.call(null,a,/[\W_]/)):null};bigml.dixie.flatline.text.term_stemmer=function(a){return cljs.core.identity};bigml.dixie.flatline.text.detect_language=cljs.core.constantly.call(null,new cljs.core.Keyword(null,"en","en",88457073)); +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"occurrences","occurrences",295025356),function(){var a=function(a,b,e,f){a=cljs.core.nth.call(null,e,0,null);f=cljs.core.nth.call(null,e,1,null);var c=cljs.core.nth.call(null,e,2,null);e=cljs.core.nth.call(null,e,3,null);bigml.dixie.flatline.errors.check_arity.call(null,b,2,4);e=cljs.core.truth_(e)?e:bigml.dixie.flatline.types.constant_fn.call(null,"none","string");c=cljs.core.truth_(c)?c:bigml.dixie.flatline.types.constant_fn.call(null, +!1);bigml.dixie.flatline.types.check_types.call(null,b,new cljs.core.PersistentVector(null,4,5,cljs.core.PersistentVector.EMPTY_NODE,[a,f,c,e],null),new cljs.core.PersistentVector(null,4,5,cljs.core.PersistentVector.EMPTY_NODE,[new cljs.core.Keyword(null,"string","string",-1989541586),new cljs.core.Keyword(null,"string","string",-1989541586),new cljs.core.Keyword(null,"boolean","boolean",-1919418404),new cljs.core.Keyword(null,"string","string",-1989541586)],null));cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null, +c))||bigml.dixie.flatline.utils.raise.call(null,new cljs.core.Keyword(null,"flatline-type-error","flatline-type-error",845929452),"Case flag must be a constant in %s",b);cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null,e))||bigml.dixie.flatline.utils.raise.call(null,new cljs.core.Keyword(null,"flatline-type-error","flatline-type-error",845929452),"Language must be a constant in %s",b);var d=bigml.dixie.flatline.text.term_stemmer.call(null,e.call(null)),k=cljs.core.truth_(d)? +cljs.core.truth_(c.call(null))?cljs.core.comp.call(null,d,clojure.string.lower_case):d:null;cljs.core.truth_(k)||bigml.dixie.flatline.utils.raise.call(null,new cljs.core.Keyword(null,"flatline-invalid-arguments","flatline-invalid-arguments",1649316504),"Unknown language %s in %s",e.call(null),b);if(cljs.core.truth_(bigml.dixie.flatline.types.constant_fn_QMARK_.call(null,f))){var l=k.call(null,f.call(null)),m=cljs.core.comp.call(null,cljs.core.PersistentHashSet.createAsIfByAssoc([l]),k);b=function(a){return cljs.core.truth_(a)? +(cljs.core._EQ_.call(null,l,k.call(null,a))?1:0)+cljs.core.count.call(null,cljs.core.filter.call(null,m,bigml.dixie.flatline.text.term_stream.call(null,a))):null};return bigml.dixie.flatline.eval.make_primop.call(null,b,new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[a],null),new cljs.core.Keyword(null,"numeric","numeric",-1495594714))}b=function(a,b){if(cljs.core.truth_(cljs.core.truth_(a)?b:a)){b=k.call(null,b);var c=cljs.core.comp.call(null,cljs.core.PersistentHashSet.createAsIfByAssoc([b]), +k);c=cljs.core.filter.call(null,c,bigml.dixie.flatline.text.term_stream.call(null,a));return(cljs.core._EQ_.call(null,b,a)?1:0)+cljs.core.count.call(null,c)}return null};return bigml.dixie.flatline.eval.make_primop.call(null,b,new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[a,f],null),new cljs.core.Keyword(null,"numeric","numeric",-1495594714))},b=function(b,d,e,f){var c=null;if(3b)&&1>=b}); +cljs.core._add_method.call(null,bigml.hideo.error.valid.validate_STAR_,new cljs.core.Keyword(null,"posint","posint",-2069727394),function(a,b){return cljs.core.integer_QMARK_.call(null,b)&&0b)});cljs.core._add_method.call(null,bigml.hideo.error.valid.validate_STAR_,new cljs.core.Keyword(null,"non-negnum","non-negnum",-1832609773),function(a,b){return"number"===typeof b&&!(0>b)}); +cljs.core._add_method.call(null,bigml.hideo.error.valid.validate_STAR_,new cljs.core.Keyword(null,"negint","negint",-1697065950),function(a,b){return cljs.core.integer_QMARK_.call(null,b)&&0>b});cljs.core._add_method.call(null,bigml.hideo.error.valid.validate_STAR_,new cljs.core.Keyword(null,"negnum","negnum",1224917298),function(a,b){return"number"===typeof b&&0>b}); +cljs.core._add_method.call(null,bigml.hideo.error.valid.validate_STAR_,new cljs.core.Keyword(null,"percentage","percentage",-1610213650),function(a,b){return"number"===typeof b&&!(0>b)&&100>=b});cljs.core._add_method.call(null,bigml.hideo.error.valid.validate_STAR_,new cljs.core.Keyword(null,"numseq","numseq",-2000947142),function(a,b){return bigml.hideo.error.valid.is_array_QMARK_.call(null,b)&&!cljs.core.empty_QMARK_.call(null,b)&&cljs.core.every_QMARK_.call(null,cljs.core.number_QMARK_,b)}); +cljs.core._add_method.call(null,bigml.hideo.error.valid.validate_STAR_,new cljs.core.Keyword(null,"posintseq","posintseq",417757074),function(a,b){return bigml.hideo.error.valid.is_array_QMARK_.call(null,b)?cljs.core.empty_QMARK_.call(null,b)?!1:cljs.core.apply.call(null,bigml.hideo.error.valid.valid_QMARK_,new cljs.core.Keyword(null,"posint","posint",-2069727394),b):!1}); +cljs.core._add_method.call(null,bigml.hideo.error.valid.validate_STAR_,new cljs.core.Keyword(null,"string","string",-1989541586),function(a,b){return"string"===typeof b});cljs.core._add_method.call(null,bigml.hideo.error.valid.validate_STAR_,new cljs.core.Keyword(null,"nestring","nestring",-1735528560),function(a,b){return"string"===typeof b&&0cljs.core.nth.call(null,a,1)&&cljs.core.nth.call(null,a,4)>cljs.core.nth.call(null,a,2)}; +bigml.dixie.fields.regions.bounding_box_QMARK_=cljs.core.comp.call(null,cljs.core.empty_QMARK_,cljs.core.first);bigml.dixie.fields.regions.make_bounding_box=function(a,b){return new cljs.core.PersistentVector(null,5,5,cljs.core.PersistentVector.EMPTY_NODE,["",0,0,a,b],null)};bigml.dixie.fields.regions.regions_QMARK_=function(a){return cljs.core.sequential_QMARK_.call(null,a)&&cljs.core.every_QMARK_.call(null,bigml.dixie.fields.regions.region_QMARK_,a)};bigml.dixie.fields.regions.regions__GT_str=cljs.core.pr_str;bigml.dixie.flatline.regions={}; +cljs.core._add_method.call(null,bigml.dixie.flatline.eval.primop,new cljs.core.Keyword(null,"region?","region?",1275253817),function(){var a=function(a,b,e,f){a=cljs.core.nth.call(null,e,0,null);bigml.dixie.flatline.errors.check_arity.call(null,b,1);return bigml.dixie.flatline.eval.make_primop.call(null,bigml.dixie.fields.regions.region_QMARK_,new cljs.core.PersistentVector(null,1,5,cljs.core.PersistentVector.EMPTY_NODE,[a],null),new cljs.core.Keyword(null,"boolean","boolean",-1919418404))},b=function(b, +d,e,f){var c=null;if(3>>32-g,c)}function d(a,c,d,e,f,g,h){a=b(a,b(b(c&e|d&~e,f),h));return b(a<>>32-g,c)}function e(a,c,d,e,f,g,h){a=b(a,b(b(c^d^e,f),h));return b(a<>>32-g,c)}function f(a,c,d,e,f,g,h){a=b(a, +b(b(d^(c|~e),f),h));return b(a<>>32-g,c)}function g(a){var b="",c;for(c=0;3>=c;c++){var d=a>>>8*c&255;d="0"+d.toString(16);b+=d.substr(d.length-2,2)}return b}var h=[];a=function(a){a=a.replace(/\r\n/g,"\n");for(var b="",c=0;cd?b+=String.fromCharCode(d):(127d?b+=String.fromCharCode(d>>6|192):(b+=String.fromCharCode(d>>12|224),b+=String.fromCharCode(d>>6&63|128)),b+=String.fromCharCode(d&63|128))}return b}(a);h=function(a){var b=a.length;var c= +b+8;for(var d=16*((c-c%64)/64+1),e=Array(d-1),f,g=0;g>>29;return e}(a);var k=1732584193;var l=4023233417;var m=2562383102;var n=271733878;for(a=0;a>>32-b}function c(a){var b="",c;for(c=7;0<=c;c--){var d=a>>>4*c&15;b+=d.toString(16)}return b}var d,e=Array(80),f=1732584193,g=4023233417,h=2562383102,k=271733878,l=3285377520;a=function(a){a=a.replace(/\r\n/g,"\n");for(var b="",c=0;cd?b+=String.fromCharCode(d):(127d?b+=String.fromCharCode(d>>6|192):(b+=String.fromCharCode(d>>12|224),b+=String.fromCharCode(d>>6&63|128)),b+=String.fromCharCode(d& +63|128))}return b}(a);var m=a.length;var n=[];for(d=0;d>>29);n.push(m<<3&4294967295);for(a=0;ad;d++)e[d]=n[a+d];for(d=16;79>=d;d++)e[d]=b(e[d-3]^e[d-8]^e[d-14]^e[d-16],1);p=f;m=g;var q=h;var r=k;var t=l;for(d=0;19>=d;d++){var u=b(p,5)+(m&q|~m&r)+t+e[d]+1518500249&4294967295;t=r;r=q;q=b(m,30);m=p;p=u}for(d=20;39>=d;d++)u=b(p,5)+(m^q^r)+t+e[d]+1859775393&4294967295,t=r,r=q,q=b(m,30),m=p,p=u;for(d=40;59>=d;d++)u=b(p,5)+(m&q|m&r|q&r)+t+e[d]+2400959708&4294967295,t=r,r=q,q=b(m,30),m=p,p=u;for(d=60;79>=d;d++)u=b(p,5)+(m^q^r)+t+e[d]+3395469782&4294967295,t=r,r=q,q=b(m,30),m=p,p=u;f=f+p&4294967295; +g=g+m&4294967295;h=h+q&4294967295;k=k+r&4294967295;l=l+t&4294967295}u=c(f)+c(g)+c(h)+c(k)+c(l);return u.toLowerCase()};bigml.dixie.flatline.strings={};bigml.dixie.flatline.strings.deep_merge_with=function(a){for(var b=[],c=arguments.length,d=0;;)if(d=parseInt(a,b)}; +cljs.tools.reader.read_char_STAR_=function(a,b,c,d){b=cljs.tools.reader.reader_types.read_char.call(null,a);if(null!=b){b=cljs.tools.reader.macro_terminating_QMARK_.call(null,b)||cljs.tools.reader.impl.utils.whitespace_QMARK_.call(null,b)?cljs.core.str.cljs$core$IFn$_invoke$arity$1(b):cljs.tools.reader.read_token.call(null,a,new cljs.core.Keyword(null,"character","character",380652989),b);c=b.length;if(1===c)return b.charAt(0);if(cljs.core._EQ_.call(null,b,"newline"))return"\n";if(cljs.core._EQ_.call(null, +b,"space"))return" ";if(cljs.core._EQ_.call(null,b,"tab"))return"\t";if(cljs.core._EQ_.call(null,b,"backspace"))return"\b";if(cljs.core._EQ_.call(null,b,"formfeed"))return"\f";if(cljs.core._EQ_.call(null,b,"return"))return"\r";if(goog.string.startsWith(b,"u"))return b=cljs.tools.reader.read_unicode_char.call(null,b,1,4,16),c=b.charCodeAt(0),c>cljs.tools.reader.upper_limit&&cc?c:!1):null};cljs.tools.reader.check_reserved_features=function(a,b){return cljs.core.truth_(cljs.core.get.call(null,cljs.tools.reader.RESERVED_FEATURES,b))?cljs.tools.reader.impl.errors.reader_error.call(null,a,"Feature name ",b," is reserved"):null}; +cljs.tools.reader.check_invalid_read_cond=function(a,b,c){return a===cljs.tools.reader.READ_FINISHED?0>c?cljs.tools.reader.impl.errors.reader_error.call(null,b,"read-cond requires an even number of forms"):cljs.tools.reader.impl.errors.reader_error.call(null,b,"read-cond starting on line ",c," requires an even number of forms"):null}; +cljs.tools.reader.read_suppress=function(a,b,c,d){var e=cljs.tools.reader._STAR_suppress_read_STAR_;cljs.tools.reader._STAR_suppress_read_STAR_=!0;try{var f=cljs.tools.reader.read_STAR_.call(null,b,!1,cljs.tools.reader.READ_EOF,")",c,d);cljs.tools.reader.check_eof_error.call(null,f,b,a);return f===cljs.tools.reader.READ_FINISHED?cljs.tools.reader.READ_FINISHED:null}finally{cljs.tools.reader._STAR_suppress_read_STAR_=e}}; +if("undefined"===typeof cljs||"undefined"===typeof cljs.tools||"undefined"===typeof cljs.tools.reader||"undefined"===typeof cljs.tools.reader.NO_MATCH)cljs.tools.reader.NO_MATCH={}; +cljs.tools.reader.match_feature=function(a,b,c,d){var e=cljs.tools.reader.read_STAR_.call(null,b,!1,cljs.tools.reader.READ_EOF,")",c,d);cljs.tools.reader.check_eof_error.call(null,e,b,a);if(cljs.core._EQ_.call(null,e,cljs.tools.reader.READ_FINISHED))return cljs.tools.reader.READ_FINISHED;cljs.tools.reader.check_reserved_features.call(null,b,e);if(cljs.tools.reader.has_feature_QMARK_.call(null,b,e,c))return c=cljs.tools.reader.read_STAR_.call(null,b,!1,cljs.tools.reader.READ_EOF,")",c,d),cljs.tools.reader.check_eof_error.call(null, +c,b,a),cljs.tools.reader.check_invalid_read_cond.call(null,c,b,a),c;a=cljs.tools.reader.read_suppress.call(null,a,b,c,d);return cljs.core.truth_(a)?a:cljs.tools.reader.NO_MATCH}; +cljs.tools.reader.read_cond_delimited=function(a,b,c,d){var e=cljs.tools.reader.reader_types.indexing_reader_QMARK_.call(null,a)?cljs.tools.reader.reader_types.get_line_number.call(null,a):-1;a:for(var f=cljs.tools.reader.NO_MATCH,g=null;;)if(f===cljs.tools.reader.NO_MATCH)if(f=cljs.tools.reader.match_feature.call(null,e,a,c,d),f===cljs.tools.reader.READ_FINISHED){c=cljs.tools.reader.READ_FINISHED;break a}else g=null;else if(g!==cljs.tools.reader.READ_FINISHED)g=cljs.tools.reader.read_suppress.call(null, +e,a,c,d);else{c=f;break a}return c===cljs.tools.reader.READ_FINISHED?a:cljs.core.truth_(b)?null!=c&&(c.cljs$lang$protocol_mask$partition0$&16777216||cljs.core.PROTOCOL_SENTINEL===c.cljs$core$ISequential$)?(goog.array.insertArrayAt(d,cljs.core.to_array.call(null,c),0),a):cljs.tools.reader.impl.errors.reader_error.call(null,a,"Spliced form list in read-cond-splicing must implement ISequential"):c}; +cljs.tools.reader.read_cond=function(a,b,c,d){if(cljs.core.not.call(null,cljs.core.truth_(c)?(new cljs.core.PersistentHashSet(null,new cljs.core.PersistentArrayMap(null,2,[new cljs.core.Keyword(null,"preserve","preserve",1276846509),null,new cljs.core.Keyword(null,"allow","allow",-1857325745),null],null),null)).call(null,(new cljs.core.Keyword(null,"read-cond","read-cond",1056899244)).cljs$core$IFn$_invoke$arity$1(c)):c))throw cljs.core.ex_info.call(null,"Conditional read not allowed",new cljs.core.PersistentArrayMap(null, +1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"runtime-exception","runtime-exception",-1495664514)],null));var e=cljs.tools.reader.reader_types.read_char.call(null,a);if(cljs.core.truth_(e))if(e=(b=cljs.core._EQ_.call(null,e,"@"))?cljs.tools.reader.reader_types.read_char.call(null,a):e,b&&(cljs.core.truth_(cljs.tools.reader._STAR_read_delim_STAR_)||cljs.tools.reader.impl.errors.reader_error.call(null,a,"cond-splice not in list")),e=cljs.tools.reader.impl.utils.whitespace_QMARK_.call(null, +e)?cljs.tools.reader.impl.commons.read_past.call(null,cljs.tools.reader.impl.utils.whitespace_QMARK_,a):e,cljs.core.truth_(e)){if(cljs.core.not_EQ_.call(null,e,"("))throw cljs.core.ex_info.call(null,"read-cond body must be a list",new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"runtime-exception","runtime-exception",-1495664514)],null));var f=cljs.tools.reader._STAR_suppress_read_STAR_;var g=cljs.tools.reader._STAR_suppress_read_STAR_; +g=cljs.core.truth_(g)?g:cljs.core._EQ_.call(null,new cljs.core.Keyword(null,"preserve","preserve",1276846509),(new cljs.core.Keyword(null,"read-cond","read-cond",1056899244)).cljs$core$IFn$_invoke$arity$1(c));cljs.tools.reader._STAR_suppress_read_STAR_=g;try{return cljs.core.truth_(cljs.tools.reader._STAR_suppress_read_STAR_)?cljs.tools.reader.impl.utils.reader_conditional.call(null,cljs.tools.reader.read_list.call(null,a,e,c,d),b):cljs.tools.reader.read_cond_delimited.call(null,a,b,c,d)}finally{cljs.tools.reader._STAR_suppress_read_STAR_= +f}}else return cljs.tools.reader.impl.errors.throw_eof_in_character.call(null,a);else return cljs.tools.reader.impl.errors.throw_eof_in_character.call(null,a)};cljs.tools.reader.arg_env=null;cljs.tools.reader.garg=function(a){return cljs.core.symbol.call(null,[-1===a?"rest":["p",cljs.core.str.cljs$core$IFn$_invoke$arity$1(a)].join(""),"__",cljs.core.str.cljs$core$IFn$_invoke$arity$1(cljs.tools.reader.impl.utils.next_id.call(null)),"#"].join(""))}; +cljs.tools.reader.read_fn=function(a,b,c,d){if(cljs.core.truth_(cljs.tools.reader.arg_env))throw cljs.core.ex_info.call(null,"Nested #()s are not allowed",new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"illegal-state","illegal-state",-1519851182)],null));b=cljs.tools.reader.arg_env;var e=cljs.core.sorted_map.call(null);cljs.tools.reader.arg_env=e;try{var f=cljs.tools.reader.read_STAR_.call(null,function(){cljs.tools.reader.reader_types.unread.call(null, +a,"(");return a}(),!0,null,c,d),g=cljs.core.rseq.call(null,cljs.tools.reader.arg_env),h=g?function(){var a=cljs.core.key.call(null,cljs.core.first.call(null,g)),b=function(){for(var b=1,c=cljs.core.transient$.call(null,cljs.core.PersistentVector.EMPTY);;){if(b>a)return cljs.core.persistent_BANG_.call(null,c);var d=b+1;c=cljs.core.conj_BANG_.call(null,c,function(){var a=cljs.core.get.call(null,cljs.tools.reader.arg_env,b);return cljs.core.truth_(a)?a:cljs.tools.reader.garg.call(null,b)}());b=d}}(); +return cljs.core.truth_(cljs.tools.reader.arg_env.call(null,-1))?cljs.core.conj.call(null,b,new cljs.core.Symbol(null,"\x26","\x26",-2144855648,null),cljs.tools.reader.arg_env.call(null,-1)):b}():cljs.core.PersistentVector.EMPTY;return new cljs.core.List(null,new cljs.core.Symbol(null,"fn*","fn*",-752876845,null),new cljs.core.List(null,h,new cljs.core.List(null,f,null,1,null),2,null),3,null)}finally{cljs.tools.reader.arg_env=b}}; +cljs.tools.reader.register_arg=function(a){if(cljs.core.truth_(cljs.tools.reader.arg_env)){var b=cljs.tools.reader.arg_env.call(null,a);if(cljs.core.truth_(b))return b;b=cljs.tools.reader.garg.call(null,a);cljs.tools.reader.arg_env=cljs.core.assoc.call(null,cljs.tools.reader.arg_env,a,b);return b}throw cljs.core.ex_info.call(null,"Arg literal not in #()",new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"illegal-state","illegal-state", +-1519851182)],null));}; +cljs.tools.reader.read_arg=function(a,b,c,d){if(null==cljs.tools.reader.arg_env)return cljs.tools.reader.read_symbol.call(null,a,b);b=cljs.tools.reader.reader_types.peek_char.call(null,a);if(cljs.tools.reader.impl.utils.whitespace_QMARK_.call(null,b)||cljs.tools.reader.macro_terminating_QMARK_.call(null,b)||null==b)return cljs.tools.reader.register_arg.call(null,1);if(cljs.core._EQ_.call(null,b,"\x26"))return cljs.tools.reader.reader_types.read_char.call(null,a),cljs.tools.reader.register_arg.call(null,-1); +a=cljs.tools.reader.read_STAR_.call(null,a,!0,null,c,d);if(cljs.core.integer_QMARK_.call(null,a))return cljs.tools.reader.register_arg.call(null,a);throw cljs.core.ex_info.call(null,"Arg literal must be %, %\x26 or %integer",new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"illegal-state","illegal-state",-1519851182)],null));};cljs.tools.reader.gensym_env=null; +cljs.tools.reader.read_unquote=function(a,b,c,d){b=cljs.tools.reader.reader_types.peek_char.call(null,a);return cljs.core.truth_(b)?cljs.core._EQ_.call(null,"@",b)?cljs.tools.reader.wrapping_reader.call(null,new cljs.core.Symbol("clojure.core","unquote-splicing","clojure.core/unquote-splicing",-552003150,null)).call(null,function(){cljs.tools.reader.reader_types.read_char.call(null,a);return a}(),"@",c,d):cljs.tools.reader.wrapping_reader.call(null,new cljs.core.Symbol("clojure.core","unquote","clojure.core/unquote", +843087510,null)).call(null,a,"~",c,d):null};cljs.tools.reader.unquote_splicing_QMARK_=function(a){return cljs.core.seq_QMARK_.call(null,a)&&cljs.core._EQ_.call(null,cljs.core.first.call(null,a),new cljs.core.Symbol("clojure.core","unquote-splicing","clojure.core/unquote-splicing",-552003150,null))}; +cljs.tools.reader.unquote_QMARK_=function(a){return cljs.core.seq_QMARK_.call(null,a)&&cljs.core._EQ_.call(null,cljs.core.first.call(null,a),new cljs.core.Symbol("clojure.core","unquote","clojure.core/unquote",843087510,null))}; +cljs.tools.reader.expand_list=function(a){a=cljs.core.seq.call(null,a);for(var b=cljs.core.transient$.call(null,cljs.core.PersistentVector.EMPTY);;)if(a){var c=cljs.core.first.call(null,a);b=cljs.core.conj_BANG_.call(null,b,cljs.tools.reader.unquote_QMARK_.call(null,c)?new cljs.core.List(null,new cljs.core.Symbol("clojure.core","list","clojure.core/list",-1119203325,null),new cljs.core.List(null,cljs.core.second.call(null,c),null,1,null),2,null):cljs.tools.reader.unquote_splicing_QMARK_.call(null, +c)?cljs.core.second.call(null,c):new cljs.core.List(null,new cljs.core.Symbol("clojure.core","list","clojure.core/list",-1119203325,null),new cljs.core.List(null,cljs.tools.reader.syntax_quote_STAR_.call(null,c),null,1,null),2,null));a=cljs.core.next.call(null,a)}else return cljs.core.seq.call(null,cljs.core.persistent_BANG_.call(null,b))}; +cljs.tools.reader.flatten_map=function(a){a=cljs.core.seq.call(null,a);for(var b=cljs.core.transient$.call(null,cljs.core.PersistentVector.EMPTY);;)if(a){var c=cljs.core.first.call(null,a);a=cljs.core.next.call(null,a);b=cljs.core.conj_BANG_.call(null,cljs.core.conj_BANG_.call(null,b,cljs.core.key.call(null,c)),cljs.core.val.call(null,c))}else return cljs.core.seq.call(null,cljs.core.persistent_BANG_.call(null,b))}; +cljs.tools.reader.register_gensym=function(a){if(cljs.core.not.call(null,cljs.tools.reader.gensym_env))throw cljs.core.ex_info.call(null,"Gensym literal not in syntax-quote",new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"illegal-state","illegal-state",-1519851182)],null));var b=cljs.core.get.call(null,cljs.tools.reader.gensym_env,a);if(cljs.core.truth_(b))return b;b=cljs.core.symbol.call(null,[cljs.core.subs.call(null,cljs.core.name.call(null, +a),0,cljs.core.name.call(null,a).length-1),"__",cljs.core.str.cljs$core$IFn$_invoke$arity$1(cljs.tools.reader.impl.utils.next_id.call(null)),"__auto__"].join(""));cljs.tools.reader.gensym_env=cljs.core.assoc.call(null,cljs.tools.reader.gensym_env,a,b);return b}; +cljs.tools.reader.add_meta=function(a,b){return null!=a&&(a.cljs$lang$protocol_mask$partition0$&262144||cljs.core.PROTOCOL_SENTINEL===a.cljs$core$IWithMeta$)&&cljs.core.seq.call(null,cljs.core.dissoc.call(null,cljs.core.meta.call(null,a),new cljs.core.Keyword(null,"line","line",212345235),new cljs.core.Keyword(null,"column","column",2078222095),new cljs.core.Keyword(null,"end-line","end-line",1837326455),new cljs.core.Keyword(null,"end-column","end-column",1425389514),new cljs.core.Keyword(null,"file", +"file",-1269645878),new cljs.core.Keyword(null,"source","source",-433931539)))?new cljs.core.List(null,new cljs.core.Symbol("cljs.core","with-meta","cljs.core/with-meta",749126446,null),new cljs.core.List(null,b,new cljs.core.List(null,cljs.tools.reader.syntax_quote_STAR_.call(null,cljs.core.meta.call(null,a)),null,1,null),2,null),3,null):b}; +cljs.tools.reader.syntax_quote_coll=function(a,b){b=new cljs.core.List(null,new cljs.core.Symbol("cljs.core","sequence","cljs.core/sequence",1908459032,null),new cljs.core.List(null,cljs.core.cons.call(null,new cljs.core.Symbol("cljs.core","concat","cljs.core/concat",-1133584918,null),cljs.tools.reader.expand_list.call(null,b)),null,1,null),2,null);return cljs.core.truth_(a)?new cljs.core.List(null,new cljs.core.Symbol("cljs.core","apply","cljs.core/apply",1757277831,null),new cljs.core.List(null, +a,new cljs.core.List(null,b,null,1,null),2,null),3,null):b};cljs.tools.reader.map_func=function(a){return 16<=cljs.core.count.call(null,a)?new cljs.core.Symbol("cljs.core","hash-map","cljs.core/hash-map",303385767,null):new cljs.core.Symbol("cljs.core","array-map","cljs.core/array-map",-1519210683,null)};cljs.tools.reader.bool_QMARK_=function(a){return a instanceof Boolean||!0===a||!1===a}; +cljs.tools.reader.resolve_symbol=function(a){throw cljs.core.ex_info.call(null,"resolve-symbol is not implemented",new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"sym","sym",-1444860305),a],null));}; +cljs.tools.reader.syntax_quote_STAR_=function(a){return cljs.tools.reader.add_meta.call(null,a,cljs.core.special_symbol_QMARK_.call(null,a)?new cljs.core.List(null,new cljs.core.Symbol(null,"quote","quote",1377916282,null),new cljs.core.List(null,a,null,1,null),2,null):a instanceof cljs.core.Symbol?new cljs.core.List(null,new cljs.core.Symbol(null,"quote","quote",1377916282,null),new cljs.core.List(null,cljs.core.not.call(null,cljs.core.namespace.call(null,a))&&goog.string.endsWith(cljs.core.name.call(null, +a),"#")?cljs.tools.reader.register_gensym.call(null,a):function(){var b=cljs.core.str.cljs$core$IFn$_invoke$arity$1(a);return goog.string.endsWith(b,".")?(b=cljs.core.symbol.call(null,cljs.core.subs.call(null,b,0,b.length-1)),cljs.core.symbol.call(null,[cljs.core.str.cljs$core$IFn$_invoke$arity$1(cljs.tools.reader.resolve_symbol.call(null,b)),"."].join(""))):cljs.tools.reader.resolve_symbol.call(null,a)}(),null,1,null),2,null):cljs.tools.reader.unquote_QMARK_.call(null,a)?cljs.core.second.call(null, +a):cljs.tools.reader.unquote_splicing_QMARK_.call(null,a)?function(){throw cljs.core.ex_info.call(null,"unquote-splice not in list",new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"illegal-state","illegal-state",-1519851182)],null));}():cljs.core.coll_QMARK_.call(null,a)?null!=a&&(a.cljs$lang$protocol_mask$partition0$&67108864||cljs.core.PROTOCOL_SENTINEL===a.cljs$core$IRecord$)?a:cljs.core.map_QMARK_.call(null,a)?cljs.tools.reader.syntax_quote_coll.call(null, +cljs.tools.reader.map_func.call(null,a),cljs.tools.reader.flatten_map.call(null,a)):cljs.core.vector_QMARK_.call(null,a)?new cljs.core.List(null,new cljs.core.Symbol("cljs.core","vec","cljs.core/vec",307622519,null),new cljs.core.List(null,cljs.tools.reader.syntax_quote_coll.call(null,null,a),null,1,null),2,null):cljs.core.set_QMARK_.call(null,a)?cljs.tools.reader.syntax_quote_coll.call(null,new cljs.core.Symbol("cljs.core","hash-set","cljs.core/hash-set",1130426749,null),a):cljs.core.seq_QMARK_.call(null, +a)||cljs.core.list_QMARK_.call(null,a)?function(){var b=cljs.core.seq.call(null,a);return b?cljs.tools.reader.syntax_quote_coll.call(null,null,b):cljs.core.list(new cljs.core.Symbol("cljs.core","list","cljs.core/list",-1331406371,null))}():function(){throw cljs.core.ex_info.call(null,"Unknown Collection type",new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"unsupported-operation","unsupported-operation",1890540953)],null));}(): +a instanceof cljs.core.Keyword||"number"===typeof a||"string"===typeof a||null==a||cljs.tools.reader.bool_QMARK_.call(null,a)||a instanceof RegExp?a:new cljs.core.List(null,new cljs.core.Symbol(null,"quote","quote",1377916282,null),new cljs.core.List(null,a,null,1,null),2,null))}; +cljs.tools.reader.read_syntax_quote=function(a,b,c,d){b=cljs.tools.reader.gensym_env;cljs.tools.reader.gensym_env=cljs.core.PersistentArrayMap.EMPTY;try{return cljs.tools.reader.syntax_quote_STAR_.call(null,cljs.tools.reader.read_STAR_.call(null,a,!0,null,c,d))}finally{cljs.tools.reader.gensym_env=b}}; +cljs.tools.reader.read_namespaced_map=function(a,b,c,d){var e=cljs.tools.reader.read_token.call(null,a,new cljs.core.Keyword(null,"namespaced-map","namespaced-map",1235665380),cljs.tools.reader.reader_types.read_char.call(null,a));b=cljs.core._EQ_.call(null,e,":")?cljs.core.ns_name.call(null,cljs.core._STAR_ns_STAR_):cljs.core._EQ_.call(null,":",cljs.core.first.call(null,e))?function(){var a=null==e?null:cljs.core.subs.call(null,e,1);a=null==a?null:cljs.tools.reader.impl.commons.parse_symbol.call(null, +a);a=null==a?null:cljs.tools.reader.impl.utils.second_SINGLEQUOTE_.call(null,a);a=null==a?null:cljs.core.symbol.call(null,a);return null==a?null:cljs.tools.reader.resolve_ns.call(null,a)}():function(){var a=null==e?null:cljs.tools.reader.impl.commons.parse_symbol.call(null,e);return null==a?null:cljs.tools.reader.impl.utils.second_SINGLEQUOTE_.call(null,a)}();return cljs.core.truth_(b)?"{"===cljs.tools.reader.impl.commons.read_past.call(null,cljs.tools.reader.impl.utils.whitespace_QMARK_,a)?(c=cljs.tools.reader.read_delimited.call(null, +new cljs.core.Keyword(null,"namespaced-map","namespaced-map",1235665380),"}",a,c,d),cljs.core.odd_QMARK_.call(null,cljs.core.count.call(null,c))&&cljs.tools.reader.impl.errors.throw_odd_map.call(null,a,null,null,c),d=cljs.tools.reader.impl.utils.namespace_keys.call(null,cljs.core.str.cljs$core$IFn$_invoke$arity$1(b),cljs.core.take_nth.call(null,2,c)),c=cljs.core.take_nth.call(null,2,cljs.core.rest.call(null,c)),cljs.core._EQ_.call(null,cljs.core.count.call(null,cljs.core.set.call(null,d)),cljs.core.count.call(null, +d))||cljs.tools.reader.impl.errors.throw_dup_keys.call(null,a,new cljs.core.Keyword(null,"namespaced-map","namespaced-map",1235665380),d),cljs.core.zipmap.call(null,d,c)):cljs.tools.reader.impl.errors.throw_ns_map_no_map.call(null,a,e):cljs.tools.reader.impl.errors.throw_bad_ns.call(null,a,e)}; +cljs.tools.reader.macros=function(a){switch(a){case '"':return cljs.tools.reader.read_string_STAR_;case ":":return cljs.tools.reader.read_keyword;case ";":return cljs.tools.reader.impl.commons.read_comment;case "'":return cljs.tools.reader.wrapping_reader.call(null,new cljs.core.Symbol(null,"quote","quote",1377916282,null));case "@":return cljs.tools.reader.wrapping_reader.call(null,new cljs.core.Symbol("clojure.core","deref","clojure.core/deref",188719157,null));case "^":return cljs.tools.reader.read_meta; +case "`":return cljs.tools.reader.read_syntax_quote;case "~":return cljs.tools.reader.read_unquote;case "(":return cljs.tools.reader.read_list;case ")":return cljs.tools.reader.read_unmatched_delimiter;case "[":return cljs.tools.reader.read_vector;case "]":return cljs.tools.reader.read_unmatched_delimiter;case "{":return cljs.tools.reader.read_map;case "}":return cljs.tools.reader.read_unmatched_delimiter;case "\\":return cljs.tools.reader.read_char_STAR_;case "%":return cljs.tools.reader.read_arg; +case "#":return cljs.tools.reader.read_dispatch;default:return null}}; +cljs.tools.reader.dispatch_macros=function(a){switch(a){case "^":return cljs.tools.reader.read_meta;case "'":return cljs.tools.reader.wrapping_reader.call(null,new cljs.core.Symbol(null,"var","var",870848730,null));case "(":return cljs.tools.reader.read_fn;case "{":return cljs.tools.reader.read_set;case "\x3c":return cljs.tools.reader.impl.commons.throwing_reader.call(null,"Unreadable form");case "\x3d":return cljs.tools.reader.impl.commons.throwing_reader.call(null,"read-eval not supported");case '"':return cljs.tools.reader.read_regex; +case "!":return cljs.tools.reader.impl.commons.read_comment;case "_":return cljs.tools.reader.read_discard;case "?":return cljs.tools.reader.read_cond;case ":":return cljs.tools.reader.read_namespaced_map;case "#":return cljs.tools.reader.read_symbolic_value;default:return null}}; +cljs.tools.reader.read_tagged=function(a,b,c,d){b=cljs.tools.reader.read_STAR_.call(null,a,!0,null,c,d);b instanceof cljs.core.Symbol||cljs.tools.reader.impl.errors.throw_bad_reader_tag.call(null,a,b);if(cljs.core.truth_(cljs.tools.reader._STAR_suppress_read_STAR_))return cljs.core.tagged_literal.call(null,b,cljs.tools.reader.read_STAR_.call(null,a,!0,null,c,d));var e=cljs.tools.reader._STAR_data_readers_STAR_.call(null,b);e=cljs.core.truth_(e)?e:cljs.tools.reader.default_data_readers.call(null,b); +if(cljs.core.truth_(e))return e.call(null,cljs.tools.reader.read_STAR_.call(null,a,!0,null,c,d));e=cljs.tools.reader._STAR_default_data_reader_fn_STAR_;return cljs.core.truth_(e)?e.call(null,b,cljs.tools.reader.read_STAR_.call(null,a,!0,null,c,d)):cljs.tools.reader.impl.errors.throw_unknown_reader_tag.call(null,a,b)};cljs.tools.reader._STAR_data_readers_STAR_=cljs.core.PersistentArrayMap.EMPTY;cljs.tools.reader._STAR_default_data_reader_fn_STAR_=null;cljs.tools.reader._STAR_suppress_read_STAR_=!1; +cljs.tools.reader.default_data_readers=cljs.core.PersistentArrayMap.EMPTY; +cljs.tools.reader.read_STAR__internal=function(a,b,c,d,e,f){for(;;){if(cljs.tools.reader.reader_types.source_logging_reader_QMARK_.call(null,a)&&!cljs.tools.reader.impl.utils.whitespace_QMARK_.call(null,cljs.tools.reader.reader_types.peek_char.call(null,a)))return cljs.tools.reader.reader_types.log_source_STAR_.call(null,a,function(){for(;;)if(goog.array.isEmpty(f)){var g=cljs.tools.reader.reader_types.read_char.call(null,a);if(!cljs.tools.reader.impl.utils.whitespace_QMARK_.call(null,g)){if(null== +g)return b?cljs.tools.reader.impl.errors.throw_eof_error.call(null,a,null):c;if(g===d)return cljs.tools.reader.READ_FINISHED;if(cljs.tools.reader.impl.commons.number_literal_QMARK_.call(null,a,g))return cljs.tools.reader.read_number.call(null,a,g);var h=cljs.tools.reader.macros.call(null,g);if(null!=h){if(g=h.call(null,a,g,e,f),g!==a)return g}else return cljs.tools.reader.read_symbol.call(null,a,g)}}else return g=f[0],goog.array.removeAt(f,0),g});if(goog.array.isEmpty(f)){var g=cljs.tools.reader.reader_types.read_char.call(null, +a);if(!cljs.tools.reader.impl.utils.whitespace_QMARK_.call(null,g)){if(null==g)return b?cljs.tools.reader.impl.errors.throw_eof_error.call(null,a,null):c;if(g===d)return cljs.tools.reader.READ_FINISHED;if(cljs.tools.reader.impl.commons.number_literal_QMARK_.call(null,a,g))return cljs.tools.reader.read_number.call(null,a,g);var h=cljs.tools.reader.macros.call(null,g);if(null!=h){if(g=h.call(null,a,g,e,f),g!==a)return g}else return cljs.tools.reader.read_symbol.call(null,a,g)}}else return g=f[0],goog.array.removeAt(f, +0),g}};cljs.tools.reader.read_STAR_=function(a){switch(arguments.length){case 5:return cljs.tools.reader.read_STAR_.cljs$core$IFn$_invoke$arity$5(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4]);case 6:return cljs.tools.reader.read_STAR_.cljs$core$IFn$_invoke$arity$6(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4],arguments[5]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.tools.reader.read_STAR_.cljs$core$IFn$_invoke$arity$5=function(a,b,c,d,e){return cljs.tools.reader.read_STAR_.call(null,a,b,c,null,d,e)}; +cljs.tools.reader.read_STAR_.cljs$core$IFn$_invoke$arity$6=function(a,b,c,d,e,f){try{return cljs.tools.reader.read_STAR__internal.call(null,a,b,c,d,e,f)}catch(g){if(g instanceof Error){b=g;if(cljs.tools.reader.impl.utils.ex_info_QMARK_.call(null,b)){c=cljs.core.ex_data.call(null,b);if(cljs.core._EQ_.call(null,new cljs.core.Keyword(null,"reader-exception","reader-exception",-1938323098),(new cljs.core.Keyword(null,"type","type",1174270348)).cljs$core$IFn$_invoke$arity$1(c)))throw b;throw cljs.core.ex_info.call(null, +b.message,cljs.core.merge.call(null,new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"reader-exception","reader-exception",-1938323098)],null),c,cljs.tools.reader.reader_types.indexing_reader_QMARK_.call(null,a)?new cljs.core.PersistentArrayMap(null,3,[new cljs.core.Keyword(null,"line","line",212345235),cljs.tools.reader.reader_types.get_line_number.call(null,a),new cljs.core.Keyword(null,"column","column",2078222095),cljs.tools.reader.reader_types.get_column_number.call(null, +a),new cljs.core.Keyword(null,"file","file",-1269645878),cljs.tools.reader.reader_types.get_file_name.call(null,a)],null):null),b);}throw cljs.core.ex_info.call(null,b.message,cljs.core.merge.call(null,new cljs.core.PersistentArrayMap(null,1,[new cljs.core.Keyword(null,"type","type",1174270348),new cljs.core.Keyword(null,"reader-exception","reader-exception",-1938323098)],null),cljs.tools.reader.reader_types.indexing_reader_QMARK_.call(null,a)?new cljs.core.PersistentArrayMap(null,3,[new cljs.core.Keyword(null, +"line","line",212345235),cljs.tools.reader.reader_types.get_line_number.call(null,a),new cljs.core.Keyword(null,"column","column",2078222095),cljs.tools.reader.reader_types.get_column_number.call(null,a),new cljs.core.Keyword(null,"file","file",-1269645878),cljs.tools.reader.reader_types.get_file_name.call(null,a)],null):null),b);}throw g;}};cljs.tools.reader.read_STAR_.cljs$lang$maxFixedArity=6; +cljs.tools.reader.read=function(a){switch(arguments.length){case 1:return cljs.tools.reader.read.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.tools.reader.read.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);case 3:return cljs.tools.reader.read.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.tools.reader.read.cljs$core$IFn$_invoke$arity$1=function(a){return cljs.tools.reader.read.call(null,a,!0,null)}; +cljs.tools.reader.read.cljs$core$IFn$_invoke$arity$2=function(a,b){a=null!=a&&(a.cljs$lang$protocol_mask$partition0$&64||cljs.core.PROTOCOL_SENTINEL===a.cljs$core$ISeq$)?cljs.core.apply.call(null,cljs.core.hash_map,a):a;var c=cljs.core.get.call(null,a,new cljs.core.Keyword(null,"eof","eof",-489063237),new cljs.core.Keyword(null,"eofthrow","eofthrow",-334166531));return cljs.tools.reader.read_STAR_.call(null,b,cljs.core._EQ_.call(null,c,new cljs.core.Keyword(null,"eofthrow","eofthrow",-334166531)), +c,null,a,cljs.core.to_array.call(null,cljs.core.PersistentVector.EMPTY))};cljs.tools.reader.read.cljs$core$IFn$_invoke$arity$3=function(a,b,c){return cljs.tools.reader.read_STAR_.call(null,a,b,c,null,cljs.core.PersistentArrayMap.EMPTY,cljs.core.to_array.call(null,cljs.core.PersistentVector.EMPTY))};cljs.tools.reader.read.cljs$lang$maxFixedArity=3; +cljs.tools.reader.read_string=function(a){switch(arguments.length){case 1:return cljs.tools.reader.read_string.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 2:return cljs.tools.reader.read_string.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.tools.reader.read_string.cljs$core$IFn$_invoke$arity$1=function(a){return cljs.tools.reader.read_string.call(null,cljs.core.PersistentArrayMap.EMPTY,a)};cljs.tools.reader.read_string.cljs$core$IFn$_invoke$arity$2=function(a,b){return cljs.core.truth_(cljs.core.truth_(b)?""!==b:b)?cljs.tools.reader.read.call(null,a,cljs.tools.reader.reader_types.string_push_back_reader.call(null,b)):null};cljs.tools.reader.read_string.cljs$lang$maxFixedArity=2; +cljs.tools.reader.read_PLUS_string=function(a){switch(arguments.length){case 1:return cljs.tools.reader.read_PLUS_string.cljs$core$IFn$_invoke$arity$1(arguments[0]);case 3:return cljs.tools.reader.read_PLUS_string.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);case 2:return cljs.tools.reader.read_PLUS_string.cljs$core$IFn$_invoke$arity$2(arguments[0],arguments[1]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join("")); +}};cljs.tools.reader.read_PLUS_string.cljs$core$IFn$_invoke$arity$1=function(a){return cljs.tools.reader.read_PLUS_string.call(null,a,!0,null)}; +cljs.tools.reader.read_PLUS_string.cljs$core$IFn$_invoke$arity$3=function(a,b,c){var d=function(b){return cljs.core.str.cljs$core$IFn$_invoke$arity$1((new cljs.core.Keyword(null,"buffer","buffer",617295198)).cljs$core$IFn$_invoke$arity$1(cljs.core.deref.call(null,a.frames)))},e=d.call(null,a).length,f=cljs.tools.reader.reader_types.source_logging_reader_QMARK_.call(null,a)&&!cljs.tools.reader.impl.utils.whitespace_QMARK_.call(null,cljs.tools.reader.reader_types.peek_char.call(null,a))?cljs.tools.reader.reader_types.log_source_STAR_.call(null, +a,function(){return cljs.tools.reader.read.call(null,a,b,c)}):cljs.tools.reader.read.call(null,a,b,c);d=cljs.core.subs.call(null,d.call(null,a),e).trim();return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[f,d],null)}; +cljs.tools.reader.read_PLUS_string.cljs$core$IFn$_invoke$arity$2=function(a,b){var c=function(a){return cljs.core.str.cljs$core$IFn$_invoke$arity$1((new cljs.core.Keyword(null,"buffer","buffer",617295198)).cljs$core$IFn$_invoke$arity$1(cljs.core.deref.call(null,b.frames)))},d=c.call(null,b).length,e=cljs.tools.reader.reader_types.source_logging_reader_QMARK_.call(null,b)&&!cljs.tools.reader.impl.utils.whitespace_QMARK_.call(null,cljs.tools.reader.reader_types.peek_char.call(null,b))?cljs.tools.reader.reader_types.log_source_STAR_.call(null, +b,function(){return cljs.tools.reader.read.call(null,a,b)}):cljs.tools.reader.read.call(null,a,b);c=cljs.core.subs.call(null,c.call(null,b),d).trim();return new cljs.core.PersistentVector(null,2,5,cljs.core.PersistentVector.EMPTY_NODE,[e,c],null)};cljs.tools.reader.read_PLUS_string.cljs$lang$maxFixedArity=3;cljs.tools.reader.edn={};cljs.tools.reader.edn.macro_terminating_QMARK_=function(a){return"#"!==a?"'"!==a?":"!==a?cljs.tools.reader.edn.macros.call(null,a):!1:!1:!1};cljs.tools.reader.edn.not_constituent_QMARK_=function(a){return"@"===a||"`"===a||"~"===a}; +cljs.tools.reader.edn.read_token=function(a){switch(arguments.length){case 3:return cljs.tools.reader.edn.read_token.cljs$core$IFn$_invoke$arity$3(arguments[0],arguments[1],arguments[2]);case 4:return cljs.tools.reader.edn.read_token.cljs$core$IFn$_invoke$arity$4(arguments[0],arguments[1],arguments[2],arguments[3]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.tools.reader.edn.read_token.cljs$core$IFn$_invoke$arity$3=function(a,b,c){return cljs.tools.reader.edn.read_token.call(null,a,b,c,!0)}; +cljs.tools.reader.edn.read_token.cljs$core$IFn$_invoke$arity$4=function(a,b,c,d){if(cljs.core.not.call(null,c))return cljs.tools.reader.impl.errors.throw_eof_at_start.call(null,a,b);if(cljs.core.truth_(cljs.core.truth_(d)?cljs.tools.reader.edn.not_constituent_QMARK_.call(null,c):d))return cljs.tools.reader.impl.errors.throw_bad_char.call(null,a,b,c);d=new goog.string.StringBuffer;for(cljs.tools.reader.reader_types.unread.call(null,a,c);;){if(cljs.tools.reader.impl.utils.whitespace_QMARK_.call(null, +c)||cljs.tools.reader.edn.macro_terminating_QMARK_.call(null,c)||null==c)return cljs.core.str.cljs$core$IFn$_invoke$arity$1(d);if(cljs.tools.reader.edn.not_constituent_QMARK_.call(null,c))return cljs.tools.reader.impl.errors.throw_bad_char.call(null,a,b,c);d.append(cljs.tools.reader.reader_types.read_char.call(null,a));c=cljs.tools.reader.reader_types.peek_char.call(null,a)}};cljs.tools.reader.edn.read_token.cljs$lang$maxFixedArity=4; +cljs.tools.reader.edn.read_dispatch=function(a,b,c){var d=cljs.tools.reader.reader_types.read_char.call(null,a);if(cljs.core.truth_(d)){b=cljs.tools.reader.edn.dispatch_macros.call(null,d);if(cljs.core.truth_(b))return b.call(null,a,d,c);c=cljs.tools.reader.edn.read_tagged.call(null,function(){cljs.tools.reader.reader_types.unread.call(null,a,d);return a}(),d,c);return cljs.core.truth_(c)?c:cljs.tools.reader.impl.errors.throw_no_dispatch.call(null,a,d)}return cljs.tools.reader.impl.errors.throw_eof_at_dispatch.call(null, +a)};cljs.tools.reader.edn.read_unmatched_delimiter=function(a,b,c){return cljs.tools.reader.impl.errors.throw_unmatch_delimiter.call(null,a,b)}; +cljs.tools.reader.edn.read_unicode_char=function(a){switch(arguments.length){case 4:return cljs.tools.reader.edn.read_unicode_char.cljs$core$IFn$_invoke$arity$4(arguments[0],arguments[1],arguments[2],arguments[3]);case 5:return cljs.tools.reader.edn.read_unicode_char.cljs$core$IFn$_invoke$arity$5(arguments[0],arguments[1],arguments[2],arguments[3],arguments[4]);default:throw Error(["Invalid arity: ",cljs.core.str.cljs$core$IFn$_invoke$arity$1(arguments.length)].join(""));}}; +cljs.tools.reader.edn.read_unicode_char.cljs$core$IFn$_invoke$arity$4=function(a,b,c,d){c=b+c;cljs.core.count.call(null,a)!==c&&cljs.tools.reader.impl.errors.throw_invalid_unicode_literal.call(null,null,a);for(var e=0;;){if(b===c)return String.fromCharCode(e);var f=cljs.tools.reader.impl.utils.char_code.call(null,cljs.core.nth.call(null,a,b),d);if(-1===f)return cljs.tools.reader.impl.errors.throw_invalid_unicode_digit_in_token.call(null,null,cljs.core.nth.call(null,a,b),a);e=f+e*d;b+=1}}; +cljs.tools.reader.edn.read_unicode_char.cljs$core$IFn$_invoke$arity$5=function(a,b,c,d,e){for(var f=1,g=cljs.tools.reader.impl.utils.char_code.call(null,b,c);;){if(-1===g)return cljs.tools.reader.impl.errors.throw_invalid_unicode_digit.call(null,a,b);if(f!==d){var h=cljs.tools.reader.reader_types.peek_char.call(null,a);if(cljs.core.truth_(function(){var a=cljs.tools.reader.impl.utils.whitespace_QMARK_.call(null,h);if(a)return a;a=cljs.tools.reader.edn.macros.call(null,h);return cljs.core.truth_(a)? +a:null==h}()))return cljs.core.truth_(e)?cljs.tools.reader.impl.errors.throw_invalid_unicode_len.call(null,a,f,d):String.fromCharCode(g);var k=cljs.tools.reader.impl.utils.char_code.call(null,h,c);cljs.tools.reader.reader_types.read_char.call(null,a);if(-1===k)return cljs.tools.reader.impl.errors.throw_invalid_unicode_digit.call(null,a,h);g=k+g*c;f+=1}else return String.fromCharCode(g)}};cljs.tools.reader.edn.read_unicode_char.cljs$lang$maxFixedArity=5;cljs.tools.reader.edn.upper_limit=55295; +cljs.tools.reader.edn.lower_limit=57344; +cljs.tools.reader.edn.read_char_STAR_=function(a,b,c){b=cljs.tools.reader.reader_types.read_char.call(null,a);if(null!=b){b=cljs.tools.reader.edn.macro_terminating_QMARK_.call(null,b)||cljs.tools.reader.edn.not_constituent_QMARK_.call(null,b)||cljs.tools.reader.impl.utils.whitespace_QMARK_.call(null,b)?cljs.core.str.cljs$core$IFn$_invoke$arity$1(b):cljs.tools.reader.edn.read_token.call(null,a,new cljs.core.Keyword(null,"character","character",380652989),b,!1);c=b.length;if(1===c)return cljs.core.nth.call(null, +b,0);if("newline"===b)return"\n";if("space"===b)return" ";if("tab"===b)return"\t";if("backspace"===b)return"\b";if("formfeed"===b)return"\f";if("return"===b)return"\r";if(goog.string.startsWith(b,"u"))return b=cljs.tools.reader.edn.read_unicode_char.call(null,b,1,4,16),c=b.charCodeAt(),c>cljs.tools.reader.edn.upper_limit&&cc?a:c}(),function(){var a=cljs.core.apply.call(null,cljs.core.max,c),b=bigml.dixie.flatline.eval.max_window_width;return a 0: + prediction /= float(total_weight) + if compact: + output = [prediction] + else: + output = {"prediction": prediction} + else: + output = votes.combine_to_distribution(normalize=True) + if not compact: + output = [{'category': class_name, + 'probability': probability} + for class_name, probability in + zip(self.class_names, output)] + + return output + + def predict_confidence(self, input_data, + missing_strategy=LAST_PREDICTION, + compact=False): + + """For classification models, Predicts a confidence for + each possible output class, based on input values. The input + fields must be a dictionary keyed by field name or field ID. + + For regressions, the output is a single element + containing the prediction and the associated confidence. + + WARNING: Only decision-tree based models in the Fusion object will + have an associated confidence, so the result for fusions that don't + contain such models can be None. + + :param input_data: Input data to be predicted + :param missing_strategy: LAST_PREDICTION|PROPORTIONAL missing strategy + for missing fields + :param compact: If False, prediction is returned as a list of maps, one + per class, with the keys "prediction" and "confidence" + mapped to the name of the class and it's confidence, + respectively. If True, returns a list of confidences + ordered by the sorted order of the class names. + """ + if not self.missing_numerics: + check_no_missing_numerics(input_data, self.model_fields) + + predictions = [] + weights = [] + for models_split in self.models_splits: + models = [] + for model in models_split: + model_type = get_resource_type(model) + if model_type == "fusion": + models.append(Fusion(model, api=self.api)) + else: + models.append(SupervisedModel(model, api=self.api)) + votes_split = [] + for model in models: + try: + kwargs = {"compact": False} + if model_type in ["model", "ensemble", "fusion"]: + kwargs.update({"missing_strategy": missing_strategy}) + prediction = model.predict_confidence( \ + input_data, **kwargs) + except Exception as exc: + # logistic regressions can raise this error if they + # have missing_numerics=False and some numeric missings + # are found and Linear Regressions have no confidence + continue + predictions.append(prediction) + weights.append(self.weights[self.model_ids.index( + model.resource_id)]) + if self.regression: + prediction = prediction["prediction"] + if self.regression: + prediction = 0 + confidence = 0 + total_weight = sum(weights) + for index, pred in enumerate(predictions): + prediction += pred.get("prediction") * weights[index] + confidence += pred.get("confidence") + if total_weight > 0: + prediction /= float(total_weight) + confidence /= float(len(predictions)) + if compact: + output = [prediction, confidence] + else: + output = {"prediction": prediction, "confidence": confidence} + else: + output = self._combine_confidences(predictions) + if not compact: + output = [{'category': class_name, + 'confidence': confidence} + for class_name, confidence in + zip(self.class_names, output)] + return output + + def _combine_confidences(self, predictions): + """Combining the confidences per class of classification models""" + output = [] + count = float(len(predictions)) + for class_name in self.class_names: + confidence = 0 + for prediction in predictions: + for category_info in prediction: + if category_info["category"] == class_name: + confidence += category_info.get("confidence") + break + output.append(round(confidence / count, DECIMALS)) + return output + + def weigh(self, prediction, model_id): + """Weighs the prediction according to the weight associated to the + current model in the fusion. + + """ + if isinstance(prediction, list): + for index, probability in enumerate(prediction): + probability *= self.weights[ \ + self.model_ids.index(model_id)] + prediction[index] = probability + else: + prediction *= self.weights[self.model_ids.index(model_id)] + + return prediction + + def predict(self, input_data, missing_strategy=LAST_PREDICTION, + operating_point=None, full=False): + """Makes a prediction based on a number of field values. + + input_data: Input data to be predicted + missing_strategy: LAST_PREDICTION|PROPORTIONAL missing strategy for + missing fields + operating_point: In classification models, this is the point of the + ROC curve where the model will be used at. The + operating point can be defined in terms of: + - the positive_class, the class that is important to + predict accurately + - the threshold, + the value that is stablished + as minimum for the positive_class to be predicted. + - the kind of measure used to set a threshold: + probability or confidence (if available) + The operating_point is then defined as a map with + two attributes, e.g.: + {"positive_class": "Iris-setosa", + "threshold": 0.5, + "kind": "probability"} + full: Boolean that controls whether to include the prediction's + attributes. By default, only the prediction is produced. If set + to True, the rest of available information is added in a + dictionary format. The dictionary keys can be: + - prediction: the prediction value + - probability: prediction's probability + - unused_fields: list of fields in the input data that + are not being used in the model + """ + + # Checks and cleans input_data leaving the fields used in the model + unused_fields = [] + new_data = self.filter_input_data( \ + input_data, + add_unused_fields=full) + if full: + input_data, unused_fields = new_data + else: + input_data = new_data + + if not self.missing_numerics: + check_no_missing_numerics(input_data, self.model_fields) + + # Strips affixes for numeric values and casts to the final field type + cast(input_data, self.fields) + + full_prediction = self._predict( \ + input_data, missing_strategy=missing_strategy, + operating_point=operating_point, + unused_fields=unused_fields) + if full: + return dict((key, value) for key, value in \ + full_prediction.items() if value is not None) + + return full_prediction['prediction'] + + def _predict(self, input_data, missing_strategy=LAST_PREDICTION, + operating_point=None, unused_fields=None): + """Makes a prediction based on a number of field values. Please, + note that this function does not check the types for the input + provided, so it's unsafe to use it directly without prior checking. + + """ + # When operating_point is used, we need the probabilities + # of all possible classes to decide, so se use + # the `predict_probability` method + if operating_point is None and self.operation_settings is not None: + operating_point = self.operation_settings.get("operating_point") + + if operating_point: + if self.regression: + raise ValueError("The operating_point argument can only be" + " used in classifications.") + prediction = self.predict_operating( \ + input_data, + missing_strategy=missing_strategy, + operating_point=operating_point) + return prediction + result = self.predict_probability( \ + input_data, + missing_strategy=missing_strategy, + compact=False) + confidence_result = self.predict_confidence( \ + input_data, + missing_strategy=missing_strategy, + compact=False) + + if not self.regression: + try: + for index, value in enumerate(result): + result[index].update( + {"confidence": confidence_result[index]["confidence"]}) + except Exception as exc: + pass + result = sorted(result, key=lambda x: - x["probability"])[0] + result["prediction"] = result["category"] + del result["category"] + else: + result.update( + {"confidence": confidence_result["confidence"]}) + + # adding unused fields, if any + if unused_fields: + result.update({'unused_fields': unused_fields}) + + return result + + def predict_operating(self, input_data, + missing_strategy=LAST_PREDICTION, + operating_point=None): + """Computes the prediction based on a user-given operating point. + + """ + if operating_point is None and self.operation_settings is not None: + operating_point = self.operation_settings.get("operating_point") + + # only probability is allowed as operating kind + operating_point.update({"kind": "probability"}) + kind, threshold, positive_class = parse_operating_point( \ + operating_point, OPERATING_POINT_KINDS, self.class_names, + self.operation_settings) + predictions = self.predict_probability(input_data, + missing_strategy, False) + + position = self.class_names.index(positive_class) + if predictions[position][kind] > threshold: + prediction = predictions[position] + else: + # if the threshold is not met, the alternative class with + # highest probability or confidence is returned + predictions.sort( \ + key=cmp_to_key( \ + lambda a, b: self._sort_predictions(a, b, kind))) + prediction = predictions[0: 2] + if prediction[0]["category"] == positive_class: + prediction = prediction[1] + else: + prediction = prediction[0] + prediction["prediction"] = prediction["category"] + del prediction["category"] + return prediction + + #pylint: disable=locally-disabled,invalid-name + def _sort_predictions(self, a, b, criteria): + """Sorts the categories in the predicted node according to the + given criteria + + """ + if a[criteria] == b[criteria]: + return sort_categories(a, b, self.objective_categories) + return 1 if b[criteria] > a[criteria] else -1 + + def dump(self, output=None, cache_set=None): + """Uses msgpack to serialize the resource object + If cache_set is filled with a cache set method, the method is called + + """ + self_vars = vars(self) + del self_vars["api"] + dump(self_vars, output=output, cache_set=cache_set) + + def dumps(self): + """Uses msgpack to serialize the resource object to a string + + """ + self_vars = vars(self) + del self_vars["api"] + dumps(self_vars) diff --git a/bigml/generators/__init__.py b/bigml/generators/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bigml/generators/boosted_tree.py b/bigml/generators/boosted_tree.py new file mode 100644 index 00000000..14bbf2be --- /dev/null +++ b/bigml/generators/boosted_tree.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Tree level output for python +This module defines functions that generate python code to make local +predictions +""" + +from bigml.tree_utils import COMPOSED_FIELDS, INDENT +from bigml.predict_utils.common import missing_branch, \ + none_value, get_node, get_predicate, mintree_split +from bigml.generators.tree_common import value_to_print, map_data, \ + missing_prefix_code, filter_nodes, split_condition_code +from bigml.util import NUMERIC + + +MISSING_OPERATOR = { + "=": "is", + "!=": "is not" +} + + +def missing_check_code(tree, offsets, fields, + field, depth, input_map, cmv): + """Builds the code to predict when the field is missing + """ + node = get_node(tree) + code = "%sif (%s is None):\n" % \ + (INDENT * depth, + map_data(fields[field]['slug'], input_map, True)) + value = value_to_print(node[offsets["output"]], NUMERIC) + code += "%sreturn {\"prediction\":%s" % (INDENT * (depth + 1), + value) + code += "}\n" + cmv.append(fields[field]['slug']) + return code + + +def boosted_plug_in_body(tree, offsets, fields, objective_id, regression, + depth=1, cmv=None, input_map=False, + ids_path=None, subtree=True): + """Translate the model into a set of "if" python statements. + `depth` controls the size of indentation. As soon as a value is missing + that node is returned without further evaluation. + """ + if cmv is None: + cmv = [] + body = "" + term_analysis_fields = [] + item_analysis_fields = [] + + + node = get_node(tree) + children = [] if node[offsets["children#"]] == 0 else \ + node[offsets["children"]] + children = filter_nodes(children, offsets, ids=ids_path, subtree=subtree) + + if children: + + # field used in the split + field = mintree_split(children) + + has_missing_branch = (missing_branch(children) or + none_value(children)) + # the missing is singled out as a special case only when there's + # no missing branch in the children list + one_branch = not has_missing_branch or \ + fields[field]['optype'] in COMPOSED_FIELDS + if (one_branch and not fields[field]['slug'] in cmv): + body += missing_check_code(tree, offsets, fields, + field, depth, input_map, cmv) + + for child in children: + [_, field, value, _, _] = get_predicate(child) + pre_condition = "" + # code when missing_splits has been used + if has_missing_branch and value is not None: + pre_condition = missing_prefix_code(child, fields, field, + input_map, cmv) + + # complete split condition code + body += split_condition_code( \ + child, fields, + depth, input_map, pre_condition, + term_analysis_fields, item_analysis_fields, cmv) + + # value to be determined in next node + next_level = boosted_plug_in_body( \ + child, offsets, fields, objective_id, regression, depth + 1, + cmv=cmv[:], input_map=input_map, ids_path=ids_path, + subtree=subtree) + + body += next_level[0] + term_analysis_fields.extend(next_level[1]) + item_analysis_fields.extend(next_level[2]) + else: + value = value_to_print(node[offsets["output"]], NUMERIC) + body = "%sreturn {\"prediction\":%s" % (INDENT * depth, value) + body += "}\n" + + return body, term_analysis_fields, item_analysis_fields diff --git a/bigml/generators/model.py b/bigml/generators/model.py new file mode 100644 index 00000000..51c65e92 --- /dev/null +++ b/bigml/generators/model.py @@ -0,0 +1,1057 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +Functions used to generate or write output from the decision tree models + +""" +import sys +import os +import math +import keyword + + +from functools import reduce, partial + +from bigml.path import Path, BRIEF +from bigml.basemodel import print_importance +from bigml.io import UnicodeWriter +from bigml.util import markdown_cleanup, prefix_as_comment, utf8, NUMERIC +from bigml.predicate import Predicate +from bigml.model import PYTHON_CONV +from bigml.predict_utils.common import missing_branch, \ + none_value, get_node, get_predicate +from bigml.predicate_utils.utils import predicate_to_rule, \ + EQ, NE, to_lisp_rule, INVERSE_OP +from bigml.tree_utils import MAX_ARGS_LENGTH, tableau_string, slugify, \ + sort_fields, TM_TOKENS, TM_ALL, TM_FULL_TERM, TERM_OPTIONS, ITEM_OPTIONS, \ + PYTHON_OPERATOR +from bigml.generators.tree import plug_in_body +from bigml.generators.boosted_tree import boosted_plug_in_body +from bigml.generators.tree import filter_nodes + + +# templates for static Python +BIGML_SCRIPT = os.path.dirname(__file__) + +TERM_TEMPLATE = "%s/static/term_analysis.txt" % BIGML_SCRIPT +ITEMS_TEMPLATE = "%s/static/items_analysis.txt" % BIGML_SCRIPT +HADOOP_CSV_TEMPLATE = "%s/static/python_hadoop_csv.txt" % \ + BIGML_SCRIPT +HADOOP_NEXT_TEMPLATE = "%s/static/python_hadoop_next.txt" % \ + BIGML_SCRIPT +HADOOP_REDUCER_TEMPLATE = "%s/static/python_hadoop_reducer.txt" % \ + BIGML_SCRIPT + +DEFAULT_IMPURITY = 0.2 + +INDENT = ' ' + +DFT_ATTR = "output" + + +MISSING_OPERATOR = { + EQ: "is", + NE: "is not" +} + +T_MISSING_OPERATOR = { + EQ: "ISNULL(", + NE: "NOT ISNULL(" +} + + +def print_distribution(distribution, out=sys.stdout): + """Prints distribution data + + """ + total = reduce(lambda x, y: x + y, + [group[1] for group in distribution]) + for group in distribution: + out.write(utf8( + " %s: %.2f%% (%d instance%s)\n" % ( + group[0], + round(group[1] * 1.0 / total, 4) * 100, + group[1], + "" if group[1] == 1 else "s"))) + + +def list_fields(model, out=sys.stdout): + """Prints descriptions of the fields for this model. + + """ + out.write(utf8('<%-32s : %s>\n' % ( + model.fields[model.objective_id]['name'], + model.fields[model.objective_id]['optype']))) + out.flush() + + if hasattr(model, "model_fields"): + fields = model.model_fields + else: + fields = model.fields + for field in [(val['name'], val['optype']) for key, val in + sort_fields(fields) + if key != model.objective_id]: + out.write(utf8('[%-32s : %s]\n' % (field[0], field[1]))) + out.flush() + return model.fields + + +def gini_impurity(distribution, count): + """Returns the gini impurity score associated to the distribution + in the node + + """ + purity = 0.0 + if distribution is None: + return None + for _, instances in distribution: + purity += math.pow(instances / float(count), 2) + return 1.0 - purity + + +def get_leaves(model, path=None, filter_function=None): + """Returns a list that includes all the leaves of the tree. + + """ + + leaves = [] + + if path is None: + path = [] + + offsets = model.offsets + + def get_tree_leaves(tree, fields, path, filter_function=None): + + leaves = [] + node = get_node(tree) + predicate = get_predicate(tree) + if isinstance(predicate, list): + [operator, field, value, term, missing] = get_predicate(tree) + path.append(to_lisp_rule(operator, field, value, term, missing, + fields[field])) + + children_number = node[offsets["children#"]] + children = [] if children_number == 0 else node[offsets["children"]] + + if children: + for child in children: + + leaves += get_tree_leaves(child, fields, + path[:], + filter_function=filter_function) + else: + leaf = { + 'id': node[offsets["id"]], + 'confidence': node[offsets["confidence"]], + 'count': node[offsets["count"]], + 'distribution': node[offsets["distribution"]], + 'impurity': gini_impurity(node[offsets["distribution"]], + node[offsets["count"]]), + 'output': node[offsets["output"]], + 'path': path} + if 'weighted_distribution' in offsets: + leaf.update( \ + {"weighted_distribution": node[offsets[ \ + "weighted_distribution"]], + "weight": node[offsets["weight"]]}) + if (not hasattr(filter_function, '__call__') + or filter_function(leaf)): + leaves += [leaf] + return leaves + return get_tree_leaves(model.tree, model.fields, path, + filter_function) + + +def impure_leaves(model, impurity_threshold=DEFAULT_IMPURITY): + """Returns a list of leaves that are impure + + """ + if model.regression or model.boosting: + raise AttributeError("This method is available for non-boosting" + " categorization models only.") + def is_impure(node, impurity_threshold=impurity_threshold): + """Returns True if the gini impurity of the node distribution + goes above the impurity threshold. + + """ + return node.get('impurity') > impurity_threshold + + is_impure = partial(is_impure, impurity_threshold=impurity_threshold) + return get_leaves(model, filter_function=is_impure) + + +def docstring(model): + """Returns the docstring describing the model. + + """ + objective_name = model.fields[model.objective_id]['name'] if \ + not model.boosting else \ + model.fields[model.boosting["objective_field"]]['name'] + docstring_cmt = ("Predictor for %s from %s\n" % ( + objective_name, + model.resource_id)) + model.description = ( + str( + markdown_cleanup(model.description).strip()) or + 'Predictive model by BigML - Machine Learning Made Easy') + docstring_cmt += "\n" + INDENT * 2 + ( + "%s" % prefix_as_comment(INDENT * 2, model.description)) + return docstring_cmt + + +def build_ids_map(tree, offsets, ids_map, parent_id=None): + """Builds a map for the tree from each node id to its parent + + """ + node = get_node(tree) + node_id = node[offsets["id"]] + ids_map[node_id] = parent_id + children_number = node[offsets["children#"]] + children = [] if children_number == 0 else node[offsets["children"]] + for child in children: + build_ids_map(child, offsets, ids_map, node_id) + + +def fill_ids_map(model): + """Filling the parent, child map + + """ + + if not (hasattr(model, "ids_map") and model.ids_map): + model.ids_map = {} + build_ids_map(model.tree, model.offsets, model.ids_map) + return model + + +def get_ids_path(model, filter_id): + """Builds the list of ids that go from a given id to the tree root + + """ + model = fill_ids_map(model) + + ids_path = [] + if filter_id is not None and model.tree[model.offsets["id"]] is not None: + if filter_id not in model.ids_map: + raise ValueError("The given id does not exist.") + ids_path = [filter_id] + last_id = filter_id + while model.ids_map[last_id] is not None: + ids_path.append(model.ids_map[last_id]) + last_id = model.ids_map[last_id] + return ids_path + + +def generate_rules(tree, offsets, objective_id, fields, + depth=0, ids_path=None, subtree=True): + """Translates a tree model into a set of IF-THEN rules. + + """ + rules_str = "" + + node = get_node(tree) + children_number = node[offsets["children#"]] + children = [] if children_number == 0 else node[offsets["children"]] + children = filter_nodes(children, offsets, ids=ids_path, + subtree=subtree) + if children: + for child in children: + predicate = get_predicate(child) + if isinstance(predicate, list): + [operator, field, value, term, missing] = predicate + child_node = get_node(child) + rules_str += ("%s IF %s %s\n" % + (INDENT * depth, + predicate_to_rule(operator, fields[field], + value, term, missing, + label='slug'), + "AND" if child_node[offsets["children#"]] > 0 + else "THEN")) + rules_str += generate_rules(child, offsets, objective_id, fields, + depth + 1, ids_path=ids_path, + subtree=subtree) + else: + rules_str += ("%s %s = %s\n" % + (INDENT * depth, + (fields[objective_id]['slug'] + if objective_id else "Prediction"), + node[offsets["output"]])) + return rules_str + + +def rules(model, out=sys.stdout, filter_id=None, subtree=True): + """Returns a IF-THEN rule set that implements the model. + + `out` is file descriptor to write the rules. + + """ + if model.boosting: + raise AttributeError("This method is not available for boosting" + " models.") + ids_path = get_ids_path(model, filter_id) + + def tree_rules(tree, offsets, objective_id, fields, + out, ids_path=None, subtree=True): + """Prints out an IF-THEN rule version of the tree. + + """ + for field in sort_fields(fields): + + slug = slugify(fields[field[0]]['name']) + fields[field[0]].update(slug=slug) + out.write(utf8(generate_rules(tree, offsets, objective_id, + fields, + ids_path=ids_path, + subtree=subtree))) + out.flush() + + return tree_rules(model.tree, model.offsets, model.objective_id, + model.fields, out, + ids_path=ids_path, subtree=subtree) + + +def python(model, out=sys.stdout, hadoop=False, + filter_id=None, subtree=True): + """Returns a basic python function that implements the model. + + `out` is file descriptor to write the python code. + + """ + if model.boosting: + raise AttributeError("This method is not available for boosting" + " models.") + ids_path = get_ids_path(model, filter_id) + if hadoop: + return (hadoop_python_mapper(model, out=out, + ids_path=ids_path, + subtree=subtree) or + hadoop_python_reducer(out=out)) + return tree_python(model.tree, model.offsets, model.fields, + model.objective_id, model.boosting, out, + docstring(model), ids_path=ids_path, subtree=subtree) + +def hadoop_python_mapper(model, out=sys.stdout, ids_path=None, + subtree=True): + """Generates a hadoop mapper header to make predictions in python + + """ + input_fields = [(value, key) for (key, value) in + sorted(list(model.inverted_fields.items()), + key=lambda x: x[1])] + parameters = [value for (key, value) in + input_fields if key != model.objective_id] + args = [] + for field in input_fields: + slug = slugify(model.fields[field[0]]['name']) + model.fields[field[0]].update(slug=slug) + if field[0] != model.objective_id: + args.append("\"" + model.fields[field[0]]['slug'] + "\"") + + with open(HADOOP_CSV_TEMPLATE) as template_handler: + output = template_handler.read() % ",".join(parameters) + + output += "\n%sself.INPUT_FIELDS = [%s]\n" % \ + ((INDENT * 3), (",\n " + INDENT * 8).join(args)) + + input_types = [] + prefixes = [] + suffixes = [] + count = 0 + fields = model.fields + for key in [field[0] for field in input_fields + if field[0] != model.objective_id]: + input_type = ('None' if not fields[key]['datatype'] in + PYTHON_CONV + else PYTHON_CONV[fields[key]['datatype']]) + input_types.append(input_type) + if 'prefix' in fields[key]: + prefixes.append("%s: %s" % (count, + repr(fields[key]['prefix']))) + if 'suffix' in fields[key]: + suffixes.append("%s: %s" % (count, + repr(fields[key]['suffix']))) + count += 1 + static_content = "%sself.INPUT_TYPES = [" % (INDENT * 3) + formatter = ",\n%s" % (" " * len(static_content)) + output += "\n%s%s%s" % (static_content, + formatter.join(input_types), + "]\n") + static_content = "%sself.PREFIXES = {" % (INDENT * 3) + formatter = ",\n%s" % (" " * len(static_content)) + output += "\n%s%s%s" % (static_content, + formatter.join(prefixes), + "}\n") + static_content = "%sself.SUFFIXES = {" % (INDENT * 3) + formatter = ",\n%s" % (" " * len(static_content)) + output += "\n%s%s%s" % (static_content, + formatter.join(suffixes), + "}\n") + + with open(HADOOP_NEXT_TEMPLATE) as template_handler: + output += template_handler.read() + + out.write(output) + out.flush() + + tree_python(model.tree, model.offsets, model.fields, model.objective_id, + False if not hasattr(model, "boosting") else model.boosting, + out, docstring(model), ids_path=ids_path, subtree=subtree) + + output = \ +""" +csv = CSVInput() +for values in csv: + if not isinstance(values, bool): + print u'%%s\\t%%s' %% (repr(values), repr(predict_%s(values))) +\n\n +""" % fields[model.objective_id]['slug'] + out.write(utf8(output)) + out.flush() + +def hadoop_python_reducer(out=sys.stdout): + """Generates a hadoop reducer to make predictions in python + + """ + + with open(HADOOP_REDUCER_TEMPLATE) as template_handler: + output = template_handler.read() + out.write(utf8(output)) + out.flush() + +def tree_python(tree, offsets, fields, objective_id, boosting, + out, docstring_str, input_map=False, + ids_path=None, subtree=True): + """Writes a python function that implements the model. + + """ + args = [] + args_tree = [] + parameters = sort_fields(fields) + if not input_map: + input_map = len(parameters) > MAX_ARGS_LENGTH + reserved_keywords = keyword.kwlist if not input_map else None + prefix = "_" if not input_map else "" + for field in parameters: + field_name_to_show = fields[field[0]]['name'].strip() + if field_name_to_show == "": + field_name_to_show = field[0] + slug = slugify(field_name_to_show, + reserved_keywords=reserved_keywords, prefix=prefix) + fields[field[0]].update(slug=slug) + if not input_map: + if field[0] != objective_id: + args.append("%s=None" % (slug)) + args_tree.append("%s=%s" % (slug, slug)) + if input_map: + args.append("data={}") + args_tree.append("data=data") + + function_name = fields[objective_id]['slug'] if \ + not boosting else fields[boosting["objective_field"]]['slug'] + if prefix == "_" and function_name[0] == prefix: + function_name = function_name[1:] + if function_name == "": + function_name = "field_" + objective_id + python_header = "# -*- coding: utf-8 -*-\n" + predictor_definition = ("def predict_%s" % + function_name) + depth = len(predictor_definition) + 1 + predictor = "%s(%s):\n" % (predictor_definition, + (",\n" + " " * depth).join(args)) + + predictor_doc = (INDENT + "\"\"\" " + docstring_str + + "\n" + INDENT + "\"\"\"\n") + body_fn = boosted_plug_in_body if boosting else plug_in_body + body, term_analysis_predicates, item_analysis_predicates = \ + body_fn(tree, offsets, fields, objective_id, + fields[objective_id]["optype"] == NUMERIC, + input_map=input_map, + ids_path=ids_path, subtree=subtree) + terms_body = "" + if term_analysis_predicates or item_analysis_predicates: + terms_body = term_analysis_body(fields, + term_analysis_predicates, + item_analysis_predicates) + predictor = python_header + predictor + \ + predictor_doc + terms_body + body + + predictor_model = "def predict" + depth = len(predictor_model) + 1 + predictor += "\n\n%s(%s):\n" % (predictor_model, + (",\n" + " " * depth).join(args)) + predictor += "%sprediction = predict_%s(%s)\n" % ( \ + INDENT, function_name, ", ".join(args_tree)) + + if boosting is not None: + predictor += "%sprediction.update({\"weight\": %s})\n" % \ + (INDENT, boosting.get("weight")) + if boosting.get("objective_class") is not None: + predictor += "%sprediction.update({\"class\": \"%s\"})\n" % \ + (INDENT, boosting.get("objective_class")) + predictor += "%sreturn prediction" % INDENT + + out.write(utf8(predictor)) + out.flush() + + +def term_analysis_body(fields, term_analysis_predicates, + item_analysis_predicates): + """ Writes auxiliary functions to handle the term and item + analysis fields + + """ + body = """ + import re +""" + # static content + + if term_analysis_predicates: + body += """ + tm_tokens = '%s' + tm_full_term = '%s' + tm_all = '%s' + +""" % (TM_TOKENS, TM_FULL_TERM, TM_ALL) + with open(TERM_TEMPLATE) as template_handler: + body += template_handler.read() + + term_analysis_options = {predicate[0] for predicate in + term_analysis_predicates} + term_analysis_predicates = set(term_analysis_predicates) + body += """ + term_analysis = {""" + for field_id in term_analysis_options: + field = fields[field_id] + body += """ + \"%s\": {""" % field['slug'] + options = sorted(field['term_analysis'].keys()) + for option in options: + if option in TERM_OPTIONS: + body += """ + \"%s\": %s,""" % (option, repr(field['term_analysis'][option])) + body += """ + },""" + body += """ + }""" + body += """ + term_forms = {""" + term_forms = {} + for field_id, term in term_analysis_predicates: + alternatives = [] + field = fields[field_id] + if field['slug'] not in term_forms: + term_forms[field['slug']] = {} + all_forms = field['summary'].get('term_forms', {}) + if all_forms: + alternatives = all_forms.get(term, []) + if alternatives: + terms = [term] + terms.extend(all_forms.get(term, [])) + term_forms[field['slug']][term] = terms + for field, field_term_forms in term_forms.items(): + body += """ + \"%s\": {""" % field + terms = sorted(field_term_forms.keys()) + for term in terms: + body += """ + \"%s\": %s,""" % (term, field_term_forms[term]) + body += """ + },""" + body += """ + } + +""" + if item_analysis_predicates: + with open(ITEMS_TEMPLATE) as template_handler: + body += template_handler.read() + + item_analysis_options = {predicate[0] for predicate in + item_analysis_predicates} + item_analysis_predicates = set(item_analysis_predicates) + body += """ + item_analysis = {""" + for field_id in item_analysis_options: + field = fields[field_id] + body += """ + \"%s\": {""" % field['slug'] + for option in field['item_analysis']: + if option in ITEM_OPTIONS: + body += """ + \"%s\": %s,""" % (option, repr(field['item_analysis'][option])) + body += """ + },""" + body += """ + } + +""" + + return body + + +def tableau(model, out=sys.stdout, hadoop=False, + filter_id=None, subtree=True, attr=DFT_ATTR): + """Returns a basic tableau function that implements the model. + + `out` is file descriptor to write the tableau code. + + """ + if model.boosting: + raise AttributeError("This method is not available for boosting" + " models.") + ids_path = get_ids_path(model, filter_id) + if hadoop: + return "Hadoop output not available." + response = tree_tableau(model.tree, model.offsets, model.fields, + model.objective_id, + out, ids_path=ids_path, + subtree=subtree, attr=attr) + if response: + out.write("END\n") + else: + out.write("\nThis function cannot be represented " + "in Tableau syntax.\n") + out.flush() + return None + + + +def tableau_body(tree, offsets, fields, objective_id, + body="", conditions=None, cmv=None, + ids_path=None, subtree=True, attr=DFT_ATTR): + """Translate the model into a set of "if" statements in Tableau syntax + + `depth` controls the size of indentation. As soon as a value is missing + that node is returned without further evaluation. + + """ + + if cmv is None: + cmv = [] + if body: + alternate = "ELSEIF" + else: + if conditions is None: + conditions = [] + alternate = "IF" + + node = get_node(tree) + children_number = node[offsets["children#"]] + children = [] if children_number == 0 else node[offsets["children"]] + children = filter_nodes(children, offsets, ids=ids_path, + subtree=subtree) + if children: + [_, field, _, _, _] = get_predicate(children[0]) + has_missing_branch = (missing_branch(children) or + none_value(children)) + # the missing is singled out as a special case only when there's + # no missing branch in the children list + if (not has_missing_branch and + fields[field]['name'] not in cmv): + conditions.append("ISNULL([%s])" % fields[field]['name']) + body += ("%s %s THEN " % + (alternate, " AND ".join(conditions))) + if fields[objective_id]['optype'] == 'numeric': + value = node[offsets[attr]] + else: + value = tableau_string(node[offsets[attr]]) + body += ("%s\n" % value) + cmv.append(fields[field]['name']) + alternate = "ELSEIF" + del conditions[-1] + + for child in children: + pre_condition = "" + post_condition = "" + [operator, field, ch_value, _, missing] = get_predicate(child) + if has_missing_branch and ch_value is not None: + negation = "" if missing else "NOT " + connection = "OR" if missing else "AND" + pre_condition = ( + "(%sISNULL([%s]) %s " % ( + negation, fields[field]['name'], connection)) + if not missing: + cmv.append(fields[field]['name']) + post_condition = ")" + optype = fields[field]['optype'] + if ch_value is None: + value = "" + elif optype in ['text', 'items']: + return "" + elif optype == 'numeric': + value = ch_value + else: + value = repr(ch_value) + + operator = ("" if ch_value is None else + PYTHON_OPERATOR[operator]) + if ch_value is None: + pre_condition = ( + T_MISSING_OPERATOR[operator]) + post_condition = ")" + + conditions.append("%s[%s]%s%s%s" % ( + pre_condition, + fields[field]['name'], + operator, + value, + post_condition)) + body = tableau_body(child, offsets, fields, objective_id, + body, conditions[:], cmv=cmv[:], + ids_path=ids_path, subtree=subtree, attr=attr) + del conditions[-1] + else: + if fields[objective_id]['optype'] == 'numeric': + value = tree[offsets[attr]] + else: + value = tableau_string(node[offsets[attr]]) + body += ( + "%s %s THEN" % (alternate, " AND ".join(conditions))) + body += " %s\n" % value + + return body + +def tree_tableau(tree, offsets, fields, objective_id, + out, ids_path=None, subtree=True, attr=DFT_ATTR): + """Writes a Tableau function that implements the model. + + """ + body = tableau_body(tree, offsets, fields, objective_id, + ids_path=ids_path, subtree=subtree, attr=attr) + if not body: + return False + out.write(utf8(body)) + out.flush() + return True + + +def group_prediction(model): + """Groups in categories or bins the predicted data + + dict - contains a dict grouping counts in 'total' and 'details' lists. + 'total' key contains a 3-element list. + - common segment of the tree for all instances + - data count + - predictions count + 'details' key contains a list of elements. Each element is a + 3-element list: + - complete path of the tree from the root to the leaf + - leaf predictions count + - confidence + """ + if model.boosting: + raise AttributeError("This method is not available for boosting" + " models.") + groups = {} + tree = model.tree + node = get_node(tree) + offsets = model.offsets + distribution = node[offsets["distribution"]] + + for group in distribution: + groups[group[0]] = {'total': [[], group[1], 0], + 'details': []} + path = [] + + def add_to_groups(groups, output, path, count, confidence, + impurity=None): + """Adds instances to groups array + + """ + group = output + if output not in groups: + groups[group] = {'total': [[], 0, 0], + 'details': []} + groups[group]['details'].append([path, count, confidence, + impurity]) + groups[group]['total'][2] += count + + def depth_first_search(tree, path): + """Search for leafs' values and instances + + """ + node = get_node(tree) + predicate = get_predicate(tree) + if isinstance(predicate, list): + [operation, field, value, term, _] = predicate + operator = INVERSE_OP[operation] + path.append(Predicate(operator, field, value, term)) + if term: + if field not in model.terms: + model.terms[field] = [] + if term not in model.terms[field]: + model.terms[field].append(term) + + if node[offsets["children#"]] == 0: + add_to_groups(groups, node[offsets["output"]], + path, node[offsets["count"]], + node[offsets["confidence"]], + gini_impurity(node[offsets["distribution"]], + node[offsets["count"]])) + return node[offsets["count"]] + children = node[offsets["children"]][:] + children.reverse() + + children_sum = 0 + for child in children: + children_sum += depth_first_search(child, path[:]) + if children_sum < node[offsets["count"]]: + add_to_groups(groups, node[offsets["output"]], path, + node[offsets["count"]] - children_sum, + node[offsets["confidence"]], + gini_impurity(node[offsets["distribution"]], + node[offsets["count"]])) + return node[offsets["count"]] + + depth_first_search(tree, path) + + return groups + + +def get_data_distribution(model): + """Returns training data distribution + + """ + if model.boosting: + raise AttributeError("This method is not available for boosting" + " models.") + node = get_node(model.tree) + + distribution = node[model.offsets["distribution"]] + + return sorted(distribution, key=lambda x: x[0]) + + +def get_prediction_distribution(model, groups=None): + """Returns model predicted distribution + + """ + if model.boosting: + raise AttributeError("This method is not available for boosting" + " models.") + if groups is None: + groups = group_prediction(model) + + predictions = [[group, groups[group]['total'][2]] for group in groups] + # remove groups that are not predicted + predictions = [prediction for prediction in predictions \ + if prediction[1] > 0] + + return sorted(predictions, key=lambda x: x[0]) + + +#pylint: disable=locally-disabled,redefined-builtin +def summarize(model, out=sys.stdout, format=BRIEF): + """Prints summary grouping distribution as class header and details + + """ + if model.boosting: + raise AttributeError("This method is not available for boosting" + " models.") + tree = model.tree + + def extract_common_path(groups): + """Extracts the common segment of the prediction path for a group + + """ + for group in groups: + details = groups[group]['details'] + common_path = [] + if len(details) > 0: + mcd_len = min([len(x[0]) for x in details]) + for i in range(0, mcd_len): + test_common_path = details[0][0][i] + for subgroup in details: + if subgroup[0][i] != test_common_path: + i = mcd_len + break + if i < mcd_len: + common_path.append(test_common_path) + groups[group]['total'][0] = common_path + if len(details) > 0: + groups[group]['details'] = sorted(details, + key=lambda x: x[1], + reverse=True) + + def confidence_error(value, impurity=None): + """Returns confidence for categoric objective fields + and error for numeric objective fields + """ + if value is None: + return "" + impurity_literal = "" + if impurity is not None and impurity > 0: + impurity_literal = "; impurity: %.2f%%" % (round(impurity, 4)) + objective_type = model.fields[model.objective_id]['optype'] + if objective_type == 'numeric': + return " [Error: %s]" % value + return " [Confidence: %.2f%%%s]" % (round(value, 4) * 100, + impurity_literal) + + distribution = get_data_distribution(model) + + out.write(utf8("Data distribution:\n")) + print_distribution(distribution, out=out) + out.write(utf8("\n\n")) + + groups = group_prediction(model) + predictions = get_prediction_distribution(model, groups) + + out.write(utf8("Predicted distribution:\n")) + print_distribution(predictions, out=out) + out.write(utf8("\n\n")) + + if model.field_importance: + out.write(utf8("Field importance:\n")) + print_importance(model, out=out) + + extract_common_path(groups) + + out.write(utf8("\n\nRules summary:")) + + node = get_node(tree) + count = node[model.offsets["count"]] + for group in [x[0] for x in predictions]: + details = groups[group]['details'] + path = Path(groups[group]['total'][0]) + data_per_group = groups[group]['total'][1] * 1.0 / count + pred_per_group = groups[group]['total'][2] * 1.0 / count + out.write(utf8("\n\n%s : (data %.2f%% / prediction %.2f%%) %s" % + (group, + round(data_per_group, 4) * 100, + round(pred_per_group, 4) * 100, + path.to_rules(model.fields, format=format)))) + + if len(details) == 0: + out.write(utf8("\n The model will never predict this" + " class\n")) + elif len(details) == 1: + subgroup = details[0] + out.write(utf8("%s\n" % confidence_error( + subgroup[2], impurity=subgroup[3]))) + else: + out.write(utf8("\n")) + for subgroup in details: + pred_per_sgroup = subgroup[1] * 1.0 / \ + groups[group]['total'][2] + path = Path(subgroup[0]) + path_chain = path.to_rules(model.fields, format=format) if \ + path.predicates else "(root node)" + out.write(utf8(" · %.2f%%: %s%s\n" % + (round(pred_per_sgroup, 4) * 100, + path_chain, + confidence_error(subgroup[2], + impurity=subgroup[3])))) + + out.flush() + + +def get_nodes_info(model, headers, leaves_only=False): + """Generator that yields the nodes information in a row format + + """ + if model.boosting: + raise AttributeError("This method is not available for boosting" + " models.") + + def get_tree_nodes_info(tree, offsets, regression, fields, objective_id, + headers=None, leaves_only=False): + """Yields the information associated to each of the tree nodes + + """ + row = [] + node = get_node(tree) + if not regression: + category_dict = dict(node[offsets["distribution"]]) + for header in headers: + if header == fields[objective_id]['name']: + row.append(node[offsets["output"]]) + continue + if header in ['confidence', 'error']: + row.append(node[offsets["confidence"]]) + continue + if header == 'impurity': + row.append(gini_impurity(node[offsets["distribution"]], + node[offsets["count"]])) + continue + if regression and header.startswith('bin'): + for bin_value, bin_instances in node[offsets["distribution"]]: + row.append(bin_value) + row.append(bin_instances) + break + if not regression: + row.append(category_dict.get(header)) + while len(row) < len(headers): + row.append(None) + if not leaves_only or not tree.children: + yield row + + if node[offsets["children#"]] > 0: + for child in node[offsets["children"]]: + for row in get_tree_nodes_info(child, offsets, regression, + fields, objective_id, headers, + leaves_only=leaves_only): + yield row + + return get_tree_nodes_info(model.tree, + model.offsets, + model.regression, + model.fields, + model.objective_id, + headers, leaves_only=leaves_only) + + +def tree_csv(model, file_name=None, leaves_only=False): + """Outputs the node structure to a CSV file or array + + """ + if model.boosting: + raise AttributeError("This method is not available for boosting" + " models.") + headers_names = [] + if model.regression: + headers_names.append( + model.fields[model.objective_id]['name']) + headers_names.append("error") + max_bins = get_node(model.tree)[model.offsets["max_bins"]] + for index in range(0, max_bins): + headers_names.append("bin%s_value" % index) + headers_names.append("bin%s_instances" % index) + else: + headers_names.append( + model.fields[model.objective_id]['name']) + headers_names.append("confidence") + headers_names.append("impurity") + node = get_node(model.tree) + for category, _ in node[model.offsets["distribution"]]: + headers_names.append(category) + + nodes_generator = get_nodes_info(model, headers_names, + leaves_only=leaves_only) + if file_name is not None: + with UnicodeWriter(file_name) as writer: + writer.writerow([utf8(header) + for header in headers_names]) + for row in nodes_generator: + writer.writerow([item if not isinstance(item, str) + else utf8(item) + for item in row]) + return file_name + rows = [] + rows.append(headers_names) + for row in nodes_generator: + rows.append(row) + return rows diff --git a/bigml/generators/static/items_analysis.txt b/bigml/generators/static/items_analysis.txt new file mode 100644 index 00000000..3e2c5321 --- /dev/null +++ b/bigml/generators/static/items_analysis.txt @@ -0,0 +1,20 @@ + def item_matches(text, field_name, item): + """ Counts the number of occurrences of item in text + + """ + options = item_analysis[field_name] + separator = options.get('separator', ' ') + regexp = options.get('separator_regexp') + if regexp is None: + regexp = r"%s" % re.escape(separator) + return count_items_matches(text, item, regexp) + + + def count_items_matches(text, item, regexp): + """ Counts the number of occurrences of the item in the text + + """ + expression = r'(^|%s)%s($|%s)' % (regexp, item, regexp) + pattern = re.compile(expression, flags=re.U) + matches = re.findall(pattern, text) + return len(matches) diff --git a/bigml/generators/static/python_haddop_csv.txt b/bigml/generators/static/python_haddop_csv.txt new file mode 100644 index 00000000..19c7a04a --- /dev/null +++ b/bigml/generators/static/python_haddop_csv.txt @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +import csv +import locale +locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') + + +class CSVInput(object): + """Reads and parses csv input from stdin + + Expects a data section (without headers) with the following fields: + %s + + Data is processed to fall into the corresponding input type by applying + INPUT_TYPES, and per field PREFIXES and SUFFIXES are removed. You can + also provide strings to be considered as no content markers in + MISSING_TOKENS. + """ + def __init__(self, input=sys.stdin): + """ Opens stdin and defines parsing constants + + """ + try: + self.reader = csv.reader(input, delimiter=',', quotechar='\"') diff --git a/bigml/generators/static/python_hadoop_next.txt b/bigml/generators/static/python_hadoop_next.txt new file mode 100644 index 00000000..325d56f9 --- /dev/null +++ b/bigml/generators/static/python_hadoop_next.txt @@ -0,0 +1,62 @@ + self.MISSING_TOKENS = ['?'] + except Exception, exc: + sys.stderr.write(\"Cannot read csv\" + \" input. %s\\n\" % str(exc)) + + def __iter__(self): + """ Iterator method + + """ + return self + + def next(self): + """ Returns processed data in a list structure + + """ + def normalize(value): + """Transforms to unicode and cleans missing tokens + """ + value = unicode(value.decode('utf-8')) + return \"\" if value in self.MISSING_TOKENS else value + + def cast(function_value): + """Type related transformations + """ + function, value = function_value + if not len(value): + return None + if function is None: + return value + else: + return function(value) + + try: + values = self.reader.next() + except StopIteration: + raise StopIteration() + if len(values) < len(self.INPUT_FIELDS): + sys.stderr.write(\"Found %s fields when %s were expected.\\n\" % + (len(values), len(self.INPUT_FIELDS))) + raise StopIteration() + else: + values = values[0:len(self.INPUT_FIELDS)] + try: + values = map(normalize, values) + for key in self.PREFIXES: + prefix_len = len(self.PREFIXES[key]) + if values[key][0:prefix_len] == self.PREFIXES[key]: + values[key] = values[key][prefix_len:] + for key in self.SUFFIXES: + suffix_len = len(self.SUFFIXES[key]) + if values[key][-suffix_len:] == self.SUFFIXES[key]: + values[key] = values[key][0:-suffix_len] + function_tuples = zip(self.INPUT_TYPES, values) + values = map(cast, function_tuples) + data = {} + for i in range(len(values)): + data.update({self.INPUT_FIELDS[i]: values[i]}) + return data + except Exception, exc: + sys.stderr.write(\"Error in data transformations. %s\\n\" % str(exc)) + return False +\n\n diff --git a/bigml/generators/static/python_hadoop_reducer.txt b/bigml/generators/static/python_hadoop_reducer.txt new file mode 100644 index 00000000..dca65d4f --- /dev/null +++ b/bigml/generators/static/python_hadoop_reducer.txt @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys + +count = 0 +previous = None + +def print_result(values, prediction, count): + """Prints input data and predicted value as an ordered list. + + """ + result = \"[%s, %s]\" % (values, prediction) + print u\"%s\\t%s\" % (result, count) + +for line in sys.stdin: + values, prediction = line.strip().split('\\t') + if previous is None: + previous = (values, prediction) + if values != previous[0]: + print_result(previous[0], previous[1], count) + previous = (values, prediction) + count = 0 + count += 1 +if count > 0: + print_result(previous[0], previous[1], count) diff --git a/bigml/generators/static/term_analysis.txt b/bigml/generators/static/term_analysis.txt new file mode 100644 index 00000000..7a416544 --- /dev/null +++ b/bigml/generators/static/term_analysis.txt @@ -0,0 +1,52 @@ + def term_matches(text, field_name, term): + """ Counts the number of occurences of term and its variants in text + + """ + forms_list = term_forms[field_name].get(term, [term]) + options = term_analysis[field_name] + token_mode = options.get('token_mode', tm_tokens) + case_sensitive = options.get('case_sensitive', False) + first_term = forms_list[0] + if token_mode == tm_full_term: + return full_term_match(text, first_term, case_sensitive) + else: + # In token_mode='all' we will match full terms using equals and + # tokens using contains + if token_mode == tm_all and len(forms_list) == 1: + pattern = re.compile(r'^.+\b.+$', re.U) + if re.match(pattern, first_term): + return full_term_match(text, first_term, case_sensitive) + return term_matches_tokens(text, forms_list, case_sensitive) + + + def full_term_match(text, full_term, case_sensitive): + """Counts the match for full terms according to the case_sensitive + option + + """ + if not case_sensitive: + text = text.lower() + full_term = full_term.lower() + return 1 if text == full_term else 0 + + def get_tokens_flags(case_sensitive): + """Returns flags for regular expression matching depending on text + analysis options + + """ + flags = re.U + if not case_sensitive: + flags = (re.I | flags) + return flags + + + def term_matches_tokens(text, forms_list, case_sensitive): + """ Counts the number of occurrences of the words in forms_list in + the text + + """ + flags = get_tokens_flags(case_sensitive) + expression = r'(\b|_)%s(\b|_)' % '(\\b|_)|(\\b|_)'.join(forms_list) + pattern = re.compile(expression, flags=flags) + matches = re.findall(pattern, text) + return len(matches) diff --git a/bigml/generators/tree.py b/bigml/generators/tree.py new file mode 100644 index 00000000..95d7200e --- /dev/null +++ b/bigml/generators/tree.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Tree level output for python +This module defines functions that generate python code to make local +predictions +""" + +from bigml.tree_utils import INDENT, COMPOSED_FIELDS + +from bigml.predict_utils.common import missing_branch, \ + none_value, get_node, get_predicate, mintree_split +from bigml.generators.tree_common import value_to_print, map_data, \ + missing_prefix_code, filter_nodes, split_condition_code + + +MISSING_OPERATOR = { + "=": "is", + "!=": "is not" +} + + +def missing_check_code(tree, offsets, fields, objective_id, + field, depth, input_map, cmv, metric): + """Builds the code to predict when the field is missing + """ + code = "%sif (%s is None):\n" % \ + (INDENT * depth, + map_data(fields[field]['slug'], input_map, True)) + node = get_node(tree) + value = value_to_print(node[offsets["output"]], + fields[objective_id]['optype']) + code += "%sreturn {\"prediction\": %s," \ + " \"%s\": %s}\n" % \ + (INDENT * (depth + 1), value, metric, node[offsets["confidence"]]) + cmv.append(fields[field]['slug']) + return code + + +def plug_in_body(tree, offsets, fields, objective_id, regression, + depth=1, cmv=None, input_map=False, + ids_path=None, subtree=True): + """Translate the model into a set of "if" python statements. + `depth` controls the size of indentation. As soon as a value is missing + that node is returned without further evaluation. + """ + # label for the confidence measure and initialization + metric = "error" if regression else "confidence" + if cmv is None: + cmv = [] + body = "" + term_analysis_fields = [] + item_analysis_fields = [] + + node = get_node(tree) + children = [] if node[offsets["children#"]] == 0 else \ + node[offsets["children"]] + children = filter_nodes(children, offsets, ids=ids_path, + subtree=subtree) + if children: + + # field used in the split + field = mintree_split(children) + + has_missing_branch = (missing_branch(children) or + none_value(children)) + # the missing is singled out as a special case only when there's + # no missing branch in the children list + one_branch = not has_missing_branch or \ + fields[field]['optype'] in COMPOSED_FIELDS + if (one_branch and + not fields[field]['slug'] in cmv): + body += missing_check_code(tree, offsets, fields, objective_id, + field, depth, input_map, cmv, metric) + + for child in children: + [_, field, value, _, _] = get_predicate(child) + pre_condition = "" + # code when missing_splits has been used + if has_missing_branch and value is not None: + pre_condition = missing_prefix_code(child, fields, field, + input_map, cmv) + + # complete split condition code + body += split_condition_code( \ + child, fields, depth, input_map, pre_condition, + term_analysis_fields, item_analysis_fields, cmv) + + # value to be determined in next node + next_level = plug_in_body(child, offsets, fields, objective_id, + regression, depth + 1, cmv=cmv[:], + input_map=input_map, ids_path=ids_path, + subtree=subtree) + + body += next_level[0] + term_analysis_fields.extend(next_level[1]) + item_analysis_fields.extend(next_level[2]) + else: + value = value_to_print(node[offsets["output"]], + fields[objective_id]['optype']) + body = "%sreturn {\"prediction\":%s, \"%s\":%s}\n" % ( \ + INDENT * depth, value, metric, node[offsets["confidence"]]) + + return body, term_analysis_fields, item_analysis_fields diff --git a/bigml/generators/tree_common.py b/bigml/generators/tree_common.py new file mode 100644 index 00000000..4a46b8e6 --- /dev/null +++ b/bigml/generators/tree_common.py @@ -0,0 +1,133 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Tree level output for python +This module defines functions that generate python code to make local +predictions +""" + +from bigml.tree_utils import ( + INDENT, PYTHON_OPERATOR, NUMERIC_VALUE_FIELDS) +from bigml.predict_utils.common import \ + get_node, get_predicate, MISSING_OFFSET + +MISSING_OPERATOR = { + "=": "is", + "!=": "is not" +} + + +def value_to_print(value, optype): + """String of code that represents a value according to its type + """ + # the value is numeric for these fields + if (optype in NUMERIC_VALUE_FIELDS or value is None): + return value + return "\"%s\"" % value.replace('"', '\\"') + + +def map_data(field, input_map=False, missing=False): + """Returns the subject of the condition in map format when + more than MAX_ARGS_LENGTH arguments are used. + """ + if input_map: + if missing: + return "data.get('%s')" % field + return "data['%s']" % field + return field + + +def missing_prefix_code(tree, fields, field, input_map, cmv): + """Part of the condition that checks for missings when missing_splits + has been used + """ + + predicate = get_predicate(tree) + missing = predicate[MISSING_OFFSET] + negation = "" if missing else " not" + connection = "or" if missing else "and" + if not missing: + cmv.append(fields[field]['slug']) + return "%s is%s None %s " % (map_data(fields[field]['slug'], + input_map, + True), + negation, + connection) + + +def split_condition_code(tree, fields, depth, input_map, + pre_condition, term_analysis_fields, + item_analysis_fields, cmv): + """Condition code for the split + """ + + predicate = get_predicate(tree) + [operation, field, value, term, _] = predicate + optype = fields[field]['optype'] + value = value_to_print(value, optype) + + if optype in ['text', 'items']: + if optype == 'text': + term_analysis_fields.append((field, term)) + matching_function = "term_matches" + else: + item_analysis_fields.append((field, term)) + matching_function = "item_matches" + + return "%sif (%s%s(%s, \"%s\", %s%s) %s " \ + "%s):\n" % \ + (INDENT * depth, pre_condition, matching_function, + map_data(fields[field]['slug'], + input_map, + False), + fields[field]['slug'], + 'u' if isinstance(term, str) else '', + value_to_print(term, 'categorical'), + PYTHON_OPERATOR[operation], + value) + + operator = (MISSING_OPERATOR[operation] if + value is None else + PYTHON_OPERATOR[operation]) + if value is None: + cmv.append(fields[field]['slug']) + return "%sif (%s%s %s %s):\n" % \ + (INDENT * depth, pre_condition, + map_data(fields[field]['slug'], input_map, + False), + operator, + value) + + +def filter_nodes(trees_list, offsets, ids=None, subtree=True): + """Filters the contents of a trees_list. If any of the nodes is in the + ids list, the rest of nodes are removed. If none is in the ids list + we include or exclude the nodes depending on the subtree flag. + + """ + if not trees_list: + return None + trees = trees_list[:] + if ids is not None: + for tree in trees: + node = get_node(tree) + node_id = node[offsets["id"]] + if node_id in ids: + trees = [tree] + return trees + if not subtree: + trees = [] + return trees diff --git a/bigml/images/__init__.py b/bigml/images/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bigml/images/featurizers.py b/bigml/images/featurizers.py new file mode 100644 index 00000000..d6919ed1 --- /dev/null +++ b/bigml/images/featurizers.py @@ -0,0 +1,467 @@ +# -*- coding: utf-8 -*- +#pylint: disable=invalid-name +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Image Featurizers + +This module defines the classes that produce the features extracted from +images in BigML. They are used in Modefields to extend the original input +data provided for local predictions. + +""" +import os +import math +import numpy as np + + +from PIL import Image +from sensenet.models.wrappers import create_image_feature_extractor +from bigml.featurizer import Featurizer, expand_date +from bigml.constants import IMAGE + +TOP_SIZE = 512 +N_BINS = 16 +INTENSITY_RANGE = 256 +BIN_WIDTH = INTENSITY_RANGE / N_BINS +HOG_BINS = 9 +HOG_BIN_WIDTH = np.pi / HOG_BINS +DECOMPS = ["horizontal", "diagonal", "vertical"] + +PRETRAINED = "pretrained_cnn" +WAVELET = "wavelet_subbands" + +def resize_to(image, top_size=TOP_SIZE): + """Resizing the image to a maximum width or height """ + width, height = image.size + if width > top_size or height > top_size: + if width > height: + ratio = height / width + image = image.resize((top_size , int(ratio * top_size)), + Image.BICUBIC) + else: + ratio = width / height + image = image.resize((int(ratio * top_size), top_size), + Image.BICUBIC) + return image + + +def grid_coords(image_a, grid_size): + """ getting the start and end positions for each grid """ + try: + height, width, _ = image_a.shape + except ValueError: + height, width = image_a.shape + f_grid_size = float(grid_size) + h_step = height / f_grid_size + w_step = width / f_grid_size + coords = [] + for h in range(0, grid_size): + for w in range(0, grid_size): + h_start = int(max([0, math.floor(h * h_step)])) + w_start = int(max([0, math.floor(w * w_step)])) + h_end = int(min([height, math.ceil((h + 1) * h_step)])) + w_end = int(min([width, math.ceil((w + 1) * w_step)])) + coords.append([h_start, w_start, h_end, w_end]) + return coords + + +def dimensions_extractor(image_file): + """Returns the features related to the image dimensions: + file size, width, height, aspect ratio + """ + file_size = os.stat(image_file).st_size + image = Image.open(image_file) + width, height = image.size + aspect_ratio = width / float(height) + return [file_size, width, height, aspect_ratio] + + +def average_pixels_extractor(image_file): + """ Averaging pixels for the entire image, 3x3 and 4x4 grids + The image passed as argument should already be resized to 512 max + """ + image = Image.open(image_file) + image = resize_to(image) + image_a = np.array(image) + avg_pixels = [np.average(image_a[:, :, n]) for n in range(0, 3)] + coords = grid_coords(image_a, 3) + coords.extend(grid_coords(image_a, 4)) + for h_start, w_start, h_end, w_end in coords: + avg_pixels.extend( + [np.average(image_a[h_start: h_end, w_start: w_end, n]) + for n in range(0, 3)]) + return avg_pixels + + +def get_bin(value, bin_width): + """Returns the bin where a value falls in.""" + return math.floor(value / bin_width) + + +def get_luminance(image_a): + """Getting the Y coordinate in YUV in terms of the RGB channel info.""" + r = image_a[:, :, 0] + g = image_a[:, :, 1] + b = image_a[:, :, 2] + + image_l = 0.299 * r + 0.587 * g + 0.114 * b + image_l = image_l.astype('d') + return image_l + +def level_histogram_extractor(image_file): + """Level histogram feature extractor.""" + image = Image.open(image_file) + image = resize_to(image) + image_a = np.array(image) + height, width, _ = image_a.shape + pixels_per_channel = width * height + output = [0] * 3 * N_BINS + for c in range(0, 3): + offset = N_BINS * c + for h in range(0, height): + for w in range(0, width): + bin_index = get_bin(image_a[h][w][c], BIN_WIDTH) + output[bin_index + offset] += 1 + for index, _ in enumerate(output): + output[index] /= pixels_per_channel + + return output + + +def HOG_transform(image_a): + """Histogram of Gradients transformation.""" + image_l = get_luminance(image_a) + height, width = image_l.shape + if height > 2 and width > 2: + trans_image = np.empty(((height - 2), (width - 2), 2)) + trans_image.astype('d') + for y in range(0, (height - 2)): + for x in range(0, (width - 2)): + py = y + 1 + px = x + 1 + x_edge = image_l[py][x] - image_l[py][px + 1] + y_edge = image_l[y][px] - image_l[py + 1][px] + + trans_image[y][x][0] = math.sqrt( + x_edge * x_edge + y_edge * y_edge) + + # Convert to zero - pi radians + if x_edge == 0: + if y_edge > 0: + trans_image[y][x][1] = np.pi + elif y_edge < 0: + trans_image[y][x][1] = 0 + else: + trans_image[y][x][1] = np.nan + else: + trans_image[y][x][1] = math.atan( + y_edge / x_edge) + (np.pi / 2) + else: + trans_image = np.empty((height, width, 2)) + for y in range(0, height): + for x in range(0, width): + trans_image[y][x][0] = 0 + trans_image[y][x][1] = np.nan + + return trans_image + + +def HOG_aggregate(trans_image, grid_size): + """Histogram of Gradients aggregation.""" + # Laplace correction to avoid zero norm; kind of arbitrary + features = np.ones(((grid_size * grid_size), HOG_BINS)) + + bounds = grid_coords(trans_image, grid_size) + for index, bound in enumerate(bounds): + h_start, w_start, h_end, w_end = bound + for y in range(h_start, h_end): + for x in range(w_start, w_end): + mag = trans_image[y][x][0] + angle = trans_image[y][x][1] + + if mag > 0: + if angle >= np.pi: + low = HOG_BINS - 1 + else: + low = get_bin(angle, HOG_BIN_WIDTH) + high = (low + 1) % HOG_BINS + + high_weight = ( + angle - low * HOG_BIN_WIDTH) / HOG_BIN_WIDTH + low_weight = 1 - high_weight + + # Split vote between adjacent bins + features[index][low] += mag * low_weight + features[index][high] += mag * high_weight + norm = np.linalg.norm(features[index]) + features[index] = features[index] / norm + return features + + +def HOG_extractor(image_file): + """Histogram of Gradients Feature extractor""" + image = Image.open(image_file) + image = image.convert('RGB') + image = resize_to(image) + image_a = np.array(image) + transform = HOG_transform(image_a) + features = HOG_aggregate(transform, 1) + features3x3 = HOG_aggregate(transform, 3) + features4x4 = HOG_aggregate(transform, 4) + features_list = list(features.reshape(-1)) + features_list.extend(list(features3x3.reshape(-1))) + features_list.extend(list(features4x4.reshape(-1))) + return features_list + + +def energy_parameters(values, coords): + """Energy parameters computation.""" + if len(values) < 2 and len(values[0]) < 2: + return np.array([values[0][0], 0]) + count = 0 + mean = 0 + sum_sq = 0 + h_start, w_start, h_end, w_end = coords + + for y in range(h_start, h_end): + for x in range(w_start, w_end): + new_value = values[y][x] + count += 1 + delta1 = new_value - mean + mean += delta1 / count + delta2 = new_value - mean + sum_sq += delta1 * delta2 + + return np.array([mean, sum_sq / (count - 1)]) + + +def haar1Ds(signal): + """1-dimensional Haard components.""" + output = np.empty((2, max([1, int(len(signal) / 2)]))) + + if len(signal) > 1: + for i in range(0, len(signal) - 1, 2): + index = int(i / 2) + output[0][index] = (signal[i] + signal[i + 1]) / 2 + output[1][index] = abs(signal[i] - signal[i + 1]) + + else: + output[0][0] = signal[0] + output[1][0] = 0 + + return output + + +def haar1D(image, vertical): + """1-dimensional Haard vertical component.""" + if vertical: + image = image.transpose() + + output = np.empty((2, len(image), max([1, int(len(image[0]) / 2)]))) + + for i, cell in enumerate(image): + row_decomp = haar1Ds(cell) + output[0][i] = row_decomp[0] + output[1][i] = row_decomp[1] + + if vertical: + output = np.array([output[0].transpose(), + output[1].transpose()]) + + return output + + +def haar2D(image): + """2-dimensional Haard components.""" + h_mean, h_detail = haar1D(image, False) + average, vertical = haar1D(h_mean, True) + horizontal, diagonal = haar1D(h_detail, True) + + return np.array([average, horizontal, diagonal, vertical]) + + +def wavelet_subbands_aggregate(trans_image, grid_size): + """Wavelet subbands aggregation. """ + index = 0 + features = np.empty((((len(trans_image) - 1) * len(DECOMPS) + 1) * + grid_size * grid_size * 2,)) + features.astype('d') + bounds = [] + for cell in trans_image: + bounds.append(grid_coords(cell[0], grid_size)) + for cell_index in range(grid_size * grid_size): + for i, row in enumerate(trans_image): + for cell in row: + params = energy_parameters( + cell, bounds[i][cell_index]) + features[index] = params[0] + features[index + 1] = params[1] + + index += len(params) + + return features + + +def wavelet_subbands_transform(image_a, levels): + """Haard Wavelet subbands transformation.""" + image_l = get_luminance(image_a) + + output = [] + + for _ in range(0, levels): + level_output = [] + decomp = haar2D(image_l) + for j in range(0, len(DECOMPS)): + level_output.append(decomp[j + 1]) + image_l = decomp[0] + output.append(level_output) + + output.append([image_l]) + + return output + + +def wavelet_subbands_extractor(image_file, levels): + """Wavelet subbands feature extractor.""" + image = Image.open(image_file) + image = image.convert('RGB') + image = resize_to(image) + image_a = np.array(image) + transform = wavelet_subbands_transform(image_a, levels) + features = wavelet_subbands_aggregate(transform, 1) + features2x2 = wavelet_subbands_aggregate(transform, 2) + features_list = list(features.reshape(-1)) + features_list.extend(list(features2x2.reshape(-1))) + return features_list + + +IMAGE_EXTRACTORS = { + "dimensions": dimensions_extractor, + "average_pixels": average_pixels_extractor, + "level_histogram": level_histogram_extractor, + "histogram_of_gradients": HOG_extractor +} + +IMAGE_PROVENANCE = list(IMAGE_EXTRACTORS.keys()) + [PRETRAINED, WAVELET] + + +#pylint: disable=locally-disabled,bare-except +def get_image_extractors(res_object, field_id): + """Returns the feature extractor function for an image field""" + extractors = [] + try: + extracted_features = res_object.fields[field_id].get( + "image_analysis", {}).get("extracted_features") + for feature in extracted_features: + if isinstance(feature, list) and feature[0] == PRETRAINED: + _, cnn_name = feature[:] + extractors.append(lambda x, param=cnn_name: list( + create_image_feature_extractor(param, None)(x))[0]) + elif isinstance(feature, list) and feature[0] == WAVELET: + _, levels = feature[:] + extractors.append(lambda x, param=levels: + wavelet_subbands_extractor(x, param)) + else: + extractors.append(IMAGE_EXTRACTORS[feature]) + + except: + pass + return extractors + + +def expand_image(res_object, parent_id, image_file): + """ Retrieves all the values of the subfields generated from + a parent image field + + """ + expanded = {} + keys = res_object.fields[parent_id]["child_ids"] + values = [] + for generator in res_object.generators[parent_id]: + values.extend(generator(image_file)) + expanded = dict(zip(keys, values)) + return expanded + + +class ImageFeaturizer(Featurizer): + """This class provides methods for image Feature extraction.""" + + def __init__(self, fields, input_fields, selected_fields=None, + preferred_only=True): + self.fields = fields + self.input_fields = input_fields + self.subfields = {} + self.generators = {} + self.preferred_only = preferred_only + self.selected_fields = self.add_subfields(selected_fields, + preferred_only=preferred_only) + super().__init__(fields, input_fields, selected_fields, preferred_only) + + def _add_subfield(self, field_id, field): + """Adding a subfield and the corresponding generator """ + parent_id = field["parent_ids"][0] + subfield = {field_id: field["datatype"]} + if parent_id in list(self.subfields.keys()): + self.subfields[parent_id].update(subfield) + else: + parent_type = self.fields[parent_id]["optype"] + expand_fn_list = get_image_extractors(self, parent_id) \ + if parent_type == IMAGE else [expand_date] + self.selected_fields[parent_id] = self.fields[parent_id] + self.subfields[parent_id] = subfield + self.generators.update({parent_id: expand_fn_list}) + + def add_subfields(self, selected_fields=None, preferred_only=True): + """Adding the subfields information in the fields structure and the + generating functions for the subfields values. + """ + # filling preferred fields with preferred input fields + fields = selected_fields or self.fields + + if selected_fields is None: + selected_fields = {} + selected_fields.update({field_id: field for field_id, field \ + in fields.items() if field_id in self.input_fields \ + and (not preferred_only or self.fields[field_id].get( + "preferred", True))}) + self.selected_fields = selected_fields + + # computing the generated subfields + for fid, finfo in list(self.selected_fields.items()): + if finfo.get('parent_optype', False) == 'datetime' or \ + finfo.get('provenance', False) in IMAGE_PROVENANCE: + # datetime and image subfields + self._add_subfield(fid, finfo) + + return self.selected_fields + + def extend_input(self, input_data): + """Computing the values for the generated subfields and adding them + to the original input data. Parent fields will be removed. + """ + extended = {} + for f_id, value in list(input_data.items()): + if f_id in self.generators.keys(): + if not self.preferred_only: + extended[f_id] = value + if self.fields[f_id]["optype"] == IMAGE: + extended.update(expand_image(self, f_id, input_data[f_id])) + else: + extended.update( + self.generators[f_id][0](self, f_id, input_data[f_id])) + else: + extended[f_id] = value + return extended diff --git a/bigml/images/utils.py b/bigml/images/utils.py new file mode 100644 index 00000000..26378deb --- /dev/null +++ b/bigml/images/utils.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Miscellaneous utility functions for image processing + +""" +import os +import tempfile + +from PIL import Image + +from bigml.constants import TEMP_DIR, TOP_IMAGE_SIZE as TOP_SIZE, DECIMALS + + +def resize_to(image, top_size=TOP_SIZE): + """Resizing the image to a maximum width or height """ + width, height = image.size + if width > top_size or height > top_size: + if width > height: + ratio = height / width + image = image.resize((top_size , int(ratio * top_size)), + Image.BICUBIC) + else: + ratio = width / height + image = image.resize((int(ratio * top_size), top_size), + Image.BICUBIC) + return image + + +def to_relative_coordinates(image_file, regions_list): + """Transforms predictions with regions having absolute pixels regions + to the relative format used remotely and rounds to the same precision. + """ + + if regions_list: + image_obj = Image.open(image_file) + width, height = image_obj.size + for index, region in enumerate(regions_list): + [xmin, ymin, xmax, ymax] = region["box"] + region["box"] = [round(xmin / width, DECIMALS), + round(ymin / height, DECIMALS), + round(xmax / width, DECIMALS), + round(ymax / height, DECIMALS)] + region["score"] = round(region["score"], DECIMALS) + regions_list[index] = region + return regions_list + + +def remote_preprocess(image_file): + """Emulating the preprocessing of images done in the backend to + get closer results in local predictions + """ + # converting to jpg + image = Image.open(image_file) + if not (image_file.lower().endswith(".jpg") or + image_file.lower().endswith(".jpeg")): + image = image.convert('RGB') + # resizing to top size=512 + resize_to(image) + with tempfile.NamedTemporaryFile(delete=False) as temp_fp: + tmp_file_name = os.path.join(TEMP_DIR, "%s.jpg" % temp_fp.name) + # compressing to 90% + image.save(tmp_file_name, quality=90) + return tmp_file_name diff --git a/bigml/io.py b/bigml/io.py new file mode 100644 index 00000000..c9dc0a20 --- /dev/null +++ b/bigml/io.py @@ -0,0 +1,145 @@ +# -*- coding: utf-8 -*- +# pylint: disable=R1732 +# +# Copyright (c) 2015-2025 BigML, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +"""Python I/O functions. + +:author: jao +:date: Wed Apr 08, 2015-2025 17:52 + +""" + +import csv + + +class UnicodeReader(): + """Adapter to read files + + """ + def __init__(self, filename, dialect=csv.excel, + encoding="utf-8", **kwargs): + """Constructor method for the reader + + """ + self.filename = filename + self.dialect = dialect + self.encoding = encoding + self.kwargs = kwargs + self.file_handler = None + self.reader = None + + def open_reader(self): + """Opening the file + + """ + if self.filename.__class__.__name__ == 'UTF8Recoder': + self.file_handler = self.filename + else: + self.file_handler = open(self.filename, 'rt', + encoding=self.encoding, newline='') + self.reader = csv.reader(self.file_handler, dialect=self.dialect, + **self.kwargs) + return self + + def __enter__(self): + """Opening files + + """ + return self.open_reader() + + def __exit__(self, ftype, value, traceback): + """Closing on exit + + """ + self.close_reader() + + def __next__(self): + """Reading records + + """ + return next(self.reader) + + def __iter__(self): + """Iterator + + """ + return self + + def close_reader(self): + """Closing the file + + """ + if not self.filename.__class__.__name__ == 'UTF8Recoder': + self.file_handler.close() + + +class UnicodeWriter(): + """Adapter to write files + + """ + def __init__(self, filename, dialect=csv.excel, + encoding="utf-8", **kwargs): + """Constructor method for the writer + + """ + self.filename = filename + self.dialect = dialect + self.encoding = encoding + self.kwargs = kwargs + self.file_handler = None + self.writer = None + + def open_writer(self): + """Opening the file + + """ + self.file_handler = open(self.filename, 'wt', + encoding=self.encoding, newline='') + self.writer = csv.writer(self.file_handler, dialect=self.dialect, + **self.kwargs) + return self + + def close_writer(self): + """Closing the file + + """ + self.file_handler.close() + + def __enter__(self): + """Opening the file + + """ + return self.open_writer() + + def __exit__(self, ftype, value, traceback): + """Closing on exit + + """ + self.close_writer() + + def writerow(self, row): + """Writer emulating CSV writerow + + """ + self.writer.writerow(row) + + def writerows(self, rows): + """Writer emulating CSV writerows + + """ + for row in rows: + self.writerow(row) diff --git a/bigml/iris_ensemble/ensemble_5f580eb0e84f942429000c22 b/bigml/iris_ensemble/ensemble_5f580eb0e84f942429000c22 new file mode 100644 index 00000000..91039cd1 --- /dev/null +++ b/bigml/iris_ensemble/ensemble_5f580eb0e84f942429000c22 @@ -0,0 +1 @@ +{"code": 200, "resource": "ensemble/5f580eb0e84f942429000c22", "location": "https://bigml.io/andromeda/ensemble/5f580eb0e84f942429000c22", "object": {"boosting": null, "category": 0, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2020-09-08T23:07:28.350000", "creator": "mmartin", "credits": 0.01735687255859375, "credits_per_prediction": 0.0, "dataset": "dataset/5f580e962fb31c516d000f0a", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "distributions": [{"importance": [["000003", 0.65193], ["100004", 0.3272], ["000001", 0.02087]], "predictions": {"categories": [["Iris-setosa", 48], ["Iris-versicolor", 48], ["Iris-virginica", 54]]}, "training": {"categories": [["Iris-setosa", 48], ["Iris-versicolor", 47], ["Iris-virginica", 55]]}}, {"importance": [["000003", 0.9548], ["000001", 0.03384], ["100004", 0.01137]], "predictions": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 51], ["Iris-virginica", 49]]}, "training": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]]}}, {"importance": [["000003", 0.88756], ["100004", 0.08608], ["000000", 0.02636]], "predictions": {"categories": [["Iris-setosa", 46], ["Iris-versicolor", 49], ["Iris-virginica", 55]]}, "training": {"categories": [["Iris-setosa", 46], ["Iris-versicolor", 50], ["Iris-virginica", 54]]}}, {"importance": [["000003", 0.69065], ["100004", 0.29616], ["000001", 0.01319]], "predictions": {"categories": [["Iris-setosa", 48], ["Iris-versicolor", 50], ["Iris-virginica", 52]]}, "training": {"categories": [["Iris-setosa", 48], ["Iris-versicolor", 51], ["Iris-virginica", 51]]}}, {"importance": [["000003", 0.92673], ["100004", 0.05445], ["000000", 0.01882]], "predictions": {"categories": [["Iris-setosa", 54], ["Iris-versicolor", 46], ["Iris-virginica", 50]]}, "training": {"categories": [["Iris-setosa", 54], ["Iris-versicolor", 47], ["Iris-virginica", 49]]}}], "ensemble": {"fields": {"000000": {"column_number": 0, "datatype": "double", "generated": false, "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "generated": false, "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000003": {"column_number": 2, "datatype": "double", "generated": false, "name": "petal width", "optype": "numeric", "order": 2, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "100004": {"column_number": 3, "datatype": "double", "description": "", "generated": true, "label": "", "name": "petal length", "optype": "numeric", "order": 3, "preferred": true, "provenance": "flatline", "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "100005": {"column_number": 4, "datatype": "string", "description": "", "generated": true, "label": "", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "provenance": "flatline", "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}}, "ensemble_sample": {"rate": 1.0, "replacement": true, "seed": "f0864448cf4447869a965d1ca580946c"}, "error_models": 0, "fields_meta": {"count": 5, "limit": -1, "offset": 0, "query_total": 5, "total": 5}, "finished_models": 5, "focus_field": null, "focus_field_name": null, "importance": {"000000": 0.00904, "000001": 0.01358, "000003": 0.82233, "100004": 0.15505}, "input_fields": ["000000", "000001", "000003", "100004"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "models": ["model/5f580eb2440ca135f602213e", "model/5f580eb2440ca135f6022140", "model/5f580eb2440ca135f6022142", "model/5f580eb3440ca135f6022144", "model/5f580eb3440ca135f6022146"], "name": "iris [extended]", "name_options": "bootstrap decision forest, 10-node, 5-model, pruned, deterministic order", "node_threshold": 10, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_models": 5, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "100005", "objective_field_details": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "order": 4}, "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["100005"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": "project/5f5670e85299633dc000fbd8", "randomize": false, "range": null, "replacement": false, "resource": "ensemble/5f580eb0e84f942429000c22", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4550, "source": "source/5f5671b1946b3047cc009818", "source_status": true, "split_candidates": 32, "split_field": null, "split_field_name": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 1630, "message": "The ensemble has been created", "progress": 1}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2020-09-08T23:07:31.227000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/iris_ensemble/model_5f580eb2440ca135f602213e b/bigml/iris_ensemble/model_5f580eb2440ca135f602213e new file mode 100644 index 00000000..03505b40 --- /dev/null +++ b/bigml/iris_ensemble/model_5f580eb2440ca135f602213e @@ -0,0 +1 @@ +{"code": 200, "resource": "model/5f580eb2440ca135f602213e", "location": "https://bigml.io/andromeda/model/5f580eb2440ca135f602213e", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2020-09-08T23:07:30.454000", "creator": "mmartin", "credits": 0.0, "credits_per_prediction": 0.0, "dataset": "dataset/5f580e962fb31c516d000f0a", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": true, "ensemble_id": "5f580eb0e84f942429000c22", "ensemble_index": 0, "excluded_fields": [], "fields_meta": {"count": 5, "limit": -1, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000003", "100004"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["Iris-setosa", 48], ["Iris-versicolor", 48], ["Iris-virginica", 54]]}, "training": {"categories": [["Iris-setosa", 48], ["Iris-versicolor", 47], ["Iris-virginica", 55]]}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "generated": false, "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "generated": false, "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000003": {"column_number": 2, "datatype": "double", "generated": false, "name": "petal width", "optype": "numeric", "order": 2, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "100004": {"column_number": 3, "datatype": "double", "description": "", "generated": true, "label": "", "name": "petal length", "optype": "numeric", "order": 3, "preferred": true, "provenance": "flatline", "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "100005": {"column_number": 4, "datatype": "string", "description": "", "generated": true, "label": "", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "provenance": "flatline", "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000003", 0.65193], ["100004", 0.3272], ["000001", 0.02087]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000001": {"column_number": 1, "datatype": "double", "generated": false, "name": "sepal width", "optype": "numeric", "preferred": true}, "000003": {"column_number": 2, "datatype": "double", "generated": false, "name": "petal width", "optype": "numeric", "preferred": true}, "100004": {"column_number": 3, "datatype": "double", "description": "", "generated": true, "label": "", "name": "petal length", "optype": "numeric", "preferred": true, "provenance": "flatline"}, "100005": {"column_number": 4, "datatype": "string", "description": "", "generated": true, "label": "", "name": "species", "optype": "categorical", "preferred": true, "provenance": "flatline", "term_analysis": {"enabled": true}}}, "node_threshold": 10, "root": {"children": [{"children": [{"children": [{"confidence": 0.91799, "count": 43, "id": 3, "objective_summary": {"categories": [["Iris-virginica", 43]]}, "output": "Iris-virginica", "predicate": {"field": "100004", "operator": ">", "value": 5.05}}, {"children": [{"children": [{"confidence": 0.34237, "count": 2, "id": 6, "objective_summary": {"categories": [["Iris-versicolor", 2]]}, "output": "Iris-versicolor", "predicate": {"field": "000001", "operator": ">", "value": 3.1}}, {"confidence": 0.74116, "count": 11, "id": 7, "objective_summary": {"categories": [["Iris-virginica", 11]]}, "output": "Iris-virginica", "predicate": {"field": "000001", "operator": "<=", "value": 3.1}}], "confidence": 0.57765, "count": 13, "id": 5, "objective_summary": {"categories": [["Iris-virginica", 11], ["Iris-versicolor", 2]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 1.75}}, {"confidence": 0.43849, "count": 3, "id": 8, "objective_summary": {"categories": [["Iris-versicolor", 3]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": "<=", "value": 1.75}}], "confidence": 0.44404, "count": 16, "id": 4, "objective_summary": {"categories": [["Iris-virginica", 11], ["Iris-versicolor", 5]]}, "output": "Iris-virginica", "predicate": {"field": "100004", "operator": "<=", "value": 5.05}}], "confidence": 0.81648, "count": 59, "id": 2, "objective_summary": {"categories": [["Iris-virginica", 54], ["Iris-versicolor", 5]]}, "output": "Iris-virginica", "predicate": {"field": "100004", "operator": ">", "value": 4.75}}, {"confidence": 0.87941, "count": 43, "id": 9, "objective_summary": {"categories": [["Iris-versicolor", 42], ["Iris-virginica", 1]]}, "output": "Iris-versicolor", "predicate": {"field": "100004", "operator": "<=", "value": 4.75}}], "confidence": 0.44282, "count": 102, "id": 1, "objective_summary": {"categories": [["Iris-virginica", 55], ["Iris-versicolor", 47]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 0.8}}, {"confidence": 0.9259, "count": 48, "id": 10, "objective_summary": {"categories": [["Iris-setosa", 48]]}, "output": "Iris-setosa", "predicate": {"field": "000003", "operator": "<=", "value": 0.8}}], "confidence": 0.29377, "count": 150, "id": 0, "objective_summary": {"categories": [["Iris-virginica", 55], ["Iris-setosa", 48], ["Iris-versicolor", 47]]}, "output": "Iris-virginica", "predicate": true}}, "name": "iris [extended] - 0", "name_options": "10-node, pruned, deterministic order", "node_threshold": 10, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "100005", "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["100005"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": "project/5f5670e85299633dc000fbd8", "randomize": false, "range": null, "replacement": false, "resource": "model/5f580eb2440ca135f602213e", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4550, "source": "source/5f5671b1946b3047cc009818", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 0, "message": "The model has been created", "progress": 1}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2020-09-08T23:08:29.441000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/iris_ensemble/model_5f580eb2440ca135f6022140 b/bigml/iris_ensemble/model_5f580eb2440ca135f6022140 new file mode 100644 index 00000000..7a755a91 --- /dev/null +++ b/bigml/iris_ensemble/model_5f580eb2440ca135f6022140 @@ -0,0 +1 @@ +{"code": 200, "resource": "model/5f580eb2440ca135f6022140", "location": "https://bigml.io/andromeda/model/5f580eb2440ca135f6022140", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2020-09-08T23:07:30.728000", "creator": "mmartin", "credits": 0.0, "credits_per_prediction": 0.0, "dataset": "dataset/5f580e962fb31c516d000f0a", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": true, "ensemble_id": "5f580eb0e84f942429000c22", "ensemble_index": 1, "excluded_fields": [], "fields_meta": {"count": 5, "limit": -1, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000003", "100004"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 51], ["Iris-virginica", 49]]}, "training": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]]}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "generated": false, "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "generated": false, "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000003": {"column_number": 2, "datatype": "double", "generated": false, "name": "petal width", "optype": "numeric", "order": 2, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "100004": {"column_number": 3, "datatype": "double", "description": "", "generated": true, "label": "", "name": "petal length", "optype": "numeric", "order": 3, "preferred": true, "provenance": "flatline", "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "100005": {"column_number": 4, "datatype": "string", "description": "", "generated": true, "label": "", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "provenance": "flatline", "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000003", 0.9548], ["000001", 0.03384], ["100004", 0.01137]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000001": {"column_number": 1, "datatype": "double", "generated": false, "name": "sepal width", "optype": "numeric", "preferred": true}, "000003": {"column_number": 2, "datatype": "double", "generated": false, "name": "petal width", "optype": "numeric", "preferred": true}, "100004": {"column_number": 3, "datatype": "double", "description": "", "generated": true, "label": "", "name": "petal length", "optype": "numeric", "preferred": true, "provenance": "flatline"}, "100005": {"column_number": 4, "datatype": "string", "description": "", "generated": true, "label": "", "name": "species", "optype": "categorical", "preferred": true, "provenance": "flatline", "term_analysis": {"enabled": true}}}, "node_threshold": 10, "root": {"children": [{"children": [{"confidence": 0.9197, "count": 44, "id": 2, "objective_summary": {"categories": [["Iris-virginica", 44]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 1.75}}, {"children": [{"children": [{"children": [{"confidence": 0.09453, "count": 2, "id": 6, "objective_summary": {"categories": [["Iris-versicolor", 1], ["Iris-virginica", 1]]}, "output": "Iris-versicolor", "predicate": {"field": "100004", "operator": ">", "value": 5.05}}, {"confidence": 0.60966, "count": 6, "id": 7, "objective_summary": {"categories": [["Iris-versicolor", 6]]}, "output": "Iris-versicolor", "predicate": {"field": "100004", "operator": "<=", "value": 5.05}}], "confidence": 0.52911, "count": 8, "id": 5, "objective_summary": {"categories": [["Iris-versicolor", 7], ["Iris-virginica", 1]]}, "output": "Iris-versicolor", "predicate": {"field": "000001", "operator": ">", "value": 2.6}}, {"confidence": 0.56551, "count": 5, "id": 8, "objective_summary": {"categories": [["Iris-virginica", 5]]}, "output": "Iris-virginica", "predicate": {"field": "000001", "operator": "<=", "value": 2.6}}], "confidence": 0.29143, "count": 13, "id": 4, "objective_summary": {"categories": [["Iris-versicolor", 7], ["Iris-virginica", 6]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": ">", "value": 1.45}}, {"confidence": 0.91799, "count": 43, "id": 9, "objective_summary": {"categories": [["Iris-versicolor", 43]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": "<=", "value": 1.45}}], "confidence": 0.78531, "count": 56, "id": 3, "objective_summary": {"categories": [["Iris-versicolor", 50], ["Iris-virginica", 6]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": "<=", "value": 1.75}}], "confidence": 0.40383, "count": 100, "id": 1, "objective_summary": {"categories": [["Iris-versicolor", 50], ["Iris-virginica", 50]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": ">", "value": 0.8}}, {"confidence": 0.92865, "count": 50, "id": 10, "objective_summary": {"categories": [["Iris-setosa", 50]]}, "output": "Iris-setosa", "predicate": {"field": "000003", "operator": "<=", "value": 0.8}}], "confidence": 0.26289, "count": 150, "id": 0, "objective_summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]]}, "output": "Iris-setosa", "predicate": true}}, "name": "iris [extended] - 1", "name_options": "10-node, pruned, deterministic order", "node_threshold": 10, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "100005", "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["100005"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": "project/5f5670e85299633dc000fbd8", "randomize": false, "range": null, "replacement": false, "resource": "model/5f580eb2440ca135f6022140", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4550, "source": "source/5f5671b1946b3047cc009818", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 0, "message": "The model has been created", "progress": 1}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2020-09-08T23:08:30.072000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/iris_ensemble/model_5f580eb2440ca135f6022142 b/bigml/iris_ensemble/model_5f580eb2440ca135f6022142 new file mode 100644 index 00000000..63dbfe47 --- /dev/null +++ b/bigml/iris_ensemble/model_5f580eb2440ca135f6022142 @@ -0,0 +1 @@ +{"code": 200, "resource": "model/5f580eb2440ca135f6022142", "location": "https://bigml.io/andromeda/model/5f580eb2440ca135f6022142", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2020-09-08T23:07:30.866000", "creator": "mmartin", "credits": 0.0, "credits_per_prediction": 0.0, "dataset": "dataset/5f580e962fb31c516d000f0a", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": true, "ensemble_id": "5f580eb0e84f942429000c22", "ensemble_index": 2, "excluded_fields": [], "fields_meta": {"count": 5, "limit": -1, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000003", "100004"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["Iris-setosa", 46], ["Iris-versicolor", 49], ["Iris-virginica", 55]]}, "training": {"categories": [["Iris-setosa", 46], ["Iris-versicolor", 50], ["Iris-virginica", 54]]}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "generated": false, "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "generated": false, "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000003": {"column_number": 2, "datatype": "double", "generated": false, "name": "petal width", "optype": "numeric", "order": 2, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "100004": {"column_number": 3, "datatype": "double", "description": "", "generated": true, "label": "", "name": "petal length", "optype": "numeric", "order": 3, "preferred": true, "provenance": "flatline", "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "100005": {"column_number": 4, "datatype": "string", "description": "", "generated": true, "label": "", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "provenance": "flatline", "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000003", 0.88756], ["100004", 0.08608], ["000000", 0.02636]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000000": {"column_number": 0, "datatype": "double", "generated": false, "name": "sepal length", "optype": "numeric", "preferred": true}, "000003": {"column_number": 2, "datatype": "double", "generated": false, "name": "petal width", "optype": "numeric", "preferred": true}, "100004": {"column_number": 3, "datatype": "double", "description": "", "generated": true, "label": "", "name": "petal length", "optype": "numeric", "preferred": true, "provenance": "flatline"}, "100005": {"column_number": 4, "datatype": "string", "description": "", "generated": true, "label": "", "name": "species", "optype": "categorical", "preferred": true, "provenance": "flatline", "term_analysis": {"enabled": true}}}, "node_threshold": 10, "root": {"children": [{"children": [{"children": [{"confidence": 0.91433, "count": 41, "id": 3, "objective_summary": {"categories": [["Iris-virginica", 41]]}, "output": "Iris-virginica", "predicate": {"field": "100004", "operator": ">", "value": 5.05}}, {"children": [{"confidence": 0.34237, "count": 2, "id": 5, "objective_summary": {"categories": [["Iris-versicolor", 2]]}, "output": "Iris-versicolor", "predicate": {"field": "000000", "operator": ">", "value": 6.5}}, {"confidence": 0.62264, "count": 11, "id": 6, "objective_summary": {"categories": [["Iris-virginica", 10], ["Iris-versicolor", 1]]}, "output": "Iris-virginica", "predicate": {"field": "000000", "operator": "<=", "value": 6.5}}], "confidence": 0.49743, "count": 13, "id": 4, "objective_summary": {"categories": [["Iris-virginica", 10], ["Iris-versicolor", 3]]}, "output": "Iris-virginica", "predicate": {"field": "100004", "operator": "<=", "value": 5.05}}], "confidence": 0.84893, "count": 54, "id": 2, "objective_summary": {"categories": [["Iris-virginica", 51], ["Iris-versicolor", 3]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 1.55}}, {"children": [{"confidence": 0.43849, "count": 3, "id": 8, "objective_summary": {"categories": [["Iris-virginica", 3]]}, "output": "Iris-virginica", "predicate": {"field": "100004", "operator": ">", "value": 4.95}}, {"confidence": 0.92444, "count": 47, "id": 9, "objective_summary": {"categories": [["Iris-versicolor", 47]]}, "output": "Iris-versicolor", "predicate": {"field": "100004", "operator": "<=", "value": 4.95}}], "confidence": 0.83783, "count": 50, "id": 7, "objective_summary": {"categories": [["Iris-versicolor", 47], ["Iris-virginica", 3]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": "<=", "value": 1.55}}], "confidence": 0.42424, "count": 104, "id": 1, "objective_summary": {"categories": [["Iris-virginica", 54], ["Iris-versicolor", 50]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 0.75}}, {"confidence": 0.92292, "count": 46, "id": 10, "objective_summary": {"categories": [["Iris-setosa", 46]]}, "output": "Iris-setosa", "predicate": {"field": "000003", "operator": "<=", "value": 0.75}}], "confidence": 0.28756, "count": 150, "id": 0, "objective_summary": {"categories": [["Iris-virginica", 54], ["Iris-versicolor", 50], ["Iris-setosa", 46]]}, "output": "Iris-virginica", "predicate": true}}, "name": "iris [extended] - 2", "name_options": "10-node, pruned, deterministic order", "node_threshold": 10, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "100005", "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["100005"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": "project/5f5670e85299633dc000fbd8", "randomize": false, "range": null, "replacement": false, "resource": "model/5f580eb2440ca135f6022142", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4550, "source": "source/5f5671b1946b3047cc009818", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 0, "message": "The model has been created", "progress": 1}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2020-09-08T23:08:30.597000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/iris_ensemble/model_5f580eb3440ca135f6022144 b/bigml/iris_ensemble/model_5f580eb3440ca135f6022144 new file mode 100644 index 00000000..1143259a --- /dev/null +++ b/bigml/iris_ensemble/model_5f580eb3440ca135f6022144 @@ -0,0 +1 @@ +{"code": 200, "resource": "model/5f580eb3440ca135f6022144", "location": "https://bigml.io/andromeda/model/5f580eb3440ca135f6022144", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2020-09-08T23:07:31.009000", "creator": "mmartin", "credits": 0.0, "credits_per_prediction": 0.0, "dataset": "dataset/5f580e962fb31c516d000f0a", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": true, "ensemble_id": "5f580eb0e84f942429000c22", "ensemble_index": 3, "excluded_fields": [], "fields_meta": {"count": 5, "limit": -1, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000003", "100004"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["Iris-setosa", 48], ["Iris-versicolor", 50], ["Iris-virginica", 52]]}, "training": {"categories": [["Iris-setosa", 48], ["Iris-versicolor", 51], ["Iris-virginica", 51]]}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "generated": false, "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "generated": false, "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000003": {"column_number": 2, "datatype": "double", "generated": false, "name": "petal width", "optype": "numeric", "order": 2, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "100004": {"column_number": 3, "datatype": "double", "description": "", "generated": true, "label": "", "name": "petal length", "optype": "numeric", "order": 3, "preferred": true, "provenance": "flatline", "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "100005": {"column_number": 4, "datatype": "string", "description": "", "generated": true, "label": "", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "provenance": "flatline", "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000003", 0.69065], ["100004", 0.29616], ["000001", 0.01319]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000001": {"column_number": 1, "datatype": "double", "generated": false, "name": "sepal width", "optype": "numeric", "preferred": true}, "000003": {"column_number": 2, "datatype": "double", "generated": false, "name": "petal width", "optype": "numeric", "preferred": true}, "100004": {"column_number": 3, "datatype": "double", "description": "", "generated": true, "label": "", "name": "petal length", "optype": "numeric", "preferred": true, "provenance": "flatline"}, "100005": {"column_number": 4, "datatype": "string", "description": "", "generated": true, "label": "", "name": "species", "optype": "categorical", "preferred": true, "provenance": "flatline", "term_analysis": {"enabled": true}}}, "node_threshold": 10, "root": {"children": [{"children": [{"children": [{"confidence": 0.92292, "count": 46, "id": 3, "objective_summary": {"categories": [["Iris-virginica", 46]]}, "output": "Iris-virginica", "predicate": {"field": "100004", "operator": ">", "value": 5.05}}, {"confidence": 0.20765, "count": 3, "id": 4, "objective_summary": {"categories": [["Iris-virginica", 2], ["Iris-versicolor", 1]]}, "output": "Iris-virginica", "predicate": {"field": "100004", "operator": "<=", "value": 5.05}}], "confidence": 0.89306, "count": 49, "id": 2, "objective_summary": {"categories": [["Iris-virginica", 48], ["Iris-versicolor", 1]]}, "output": "Iris-virginica", "predicate": {"field": "100004", "operator": ">", "value": 4.95}}, {"children": [{"children": [{"confidence": 0.34237, "count": 2, "id": 7, "objective_summary": {"categories": [["Iris-versicolor", 2]]}, "output": "Iris-versicolor", "predicate": {"field": "000001", "operator": ">", "value": 3.1}}, {"confidence": 0.43849, "count": 3, "id": 8, "objective_summary": {"categories": [["Iris-virginica", 3]]}, "output": "Iris-virginica", "predicate": {"field": "000001", "operator": "<=", "value": 3.1}}], "confidence": 0.23072, "count": 5, "id": 6, "objective_summary": {"categories": [["Iris-virginica", 3], ["Iris-versicolor", 2]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 1.7}}, {"confidence": 0.9259, "count": 48, "id": 9, "objective_summary": {"categories": [["Iris-versicolor", 48]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": "<=", "value": 1.7}}], "confidence": 0.8463, "count": 53, "id": 5, "objective_summary": {"categories": [["Iris-versicolor", 50], ["Iris-virginica", 3]]}, "output": "Iris-versicolor", "predicate": {"field": "100004", "operator": "<=", "value": 4.95}}], "confidence": 0.40474, "count": 102, "id": 1, "objective_summary": {"categories": [["Iris-versicolor", 51], ["Iris-virginica", 51]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": ">", "value": 0.8}}, {"confidence": 0.9259, "count": 48, "id": 10, "objective_summary": {"categories": [["Iris-setosa", 48]]}, "output": "Iris-setosa", "predicate": {"field": "000003", "operator": "<=", "value": 0.8}}], "confidence": 0.26903, "count": 150, "id": 0, "objective_summary": {"categories": [["Iris-versicolor", 51], ["Iris-virginica", 51], ["Iris-setosa", 48]]}, "output": "Iris-versicolor", "predicate": true}}, "name": "iris [extended] - 3", "name_options": "10-node, pruned, deterministic order", "node_threshold": 10, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "100005", "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["100005"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": "project/5f5670e85299633dc000fbd8", "randomize": false, "range": null, "replacement": false, "resource": "model/5f580eb3440ca135f6022144", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4550, "source": "source/5f5671b1946b3047cc009818", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 0, "message": "The model has been created", "progress": 1}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2020-09-08T23:08:31.225000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/iris_ensemble/model_5f580eb3440ca135f6022146 b/bigml/iris_ensemble/model_5f580eb3440ca135f6022146 new file mode 100644 index 00000000..34e0f4dc --- /dev/null +++ b/bigml/iris_ensemble/model_5f580eb3440ca135f6022146 @@ -0,0 +1 @@ +{"code": 200, "resource": "model/5f580eb3440ca135f6022146", "location": "https://bigml.io/andromeda/model/5f580eb3440ca135f6022146", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2020-09-08T23:07:31.125000", "creator": "mmartin", "credits": 0.0, "credits_per_prediction": 0.0, "dataset": "dataset/5f580e962fb31c516d000f0a", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": true, "ensemble_id": "5f580eb0e84f942429000c22", "ensemble_index": 4, "excluded_fields": [], "fields_meta": {"count": 5, "limit": -1, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000003", "100004"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["Iris-setosa", 54], ["Iris-versicolor", 46], ["Iris-virginica", 50]]}, "training": {"categories": [["Iris-setosa", 54], ["Iris-versicolor", 47], ["Iris-virginica", 49]]}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "generated": false, "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "generated": false, "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000003": {"column_number": 2, "datatype": "double", "generated": false, "name": "petal width", "optype": "numeric", "order": 2, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "100004": {"column_number": 3, "datatype": "double", "description": "", "generated": true, "label": "", "name": "petal length", "optype": "numeric", "order": 3, "preferred": true, "provenance": "flatline", "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "100005": {"column_number": 4, "datatype": "string", "description": "", "generated": true, "label": "", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "provenance": "flatline", "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000003", 0.92673], ["100004", 0.05445], ["000000", 0.01882]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000000": {"column_number": 0, "datatype": "double", "generated": false, "name": "sepal length", "optype": "numeric", "preferred": true}, "000001": {"column_number": 1, "datatype": "double", "generated": false, "name": "sepal width", "optype": "numeric", "preferred": true}, "000003": {"column_number": 2, "datatype": "double", "generated": false, "name": "petal width", "optype": "numeric", "preferred": true}, "100004": {"column_number": 3, "datatype": "double", "description": "", "generated": true, "label": "", "name": "petal length", "optype": "numeric", "preferred": true, "provenance": "flatline"}, "100005": {"column_number": 4, "datatype": "string", "description": "", "generated": true, "label": "", "name": "species", "optype": "categorical", "preferred": true, "provenance": "flatline", "term_analysis": {"enabled": true}}}, "node_threshold": 10, "root": {"children": [{"children": [{"children": [{"confidence": 0.92135, "count": 45, "id": 3, "objective_summary": {"categories": [["Iris-virginica", 45]]}, "output": "Iris-virginica", "predicate": {"field": "000000", "operator": ">", "value": 5.95}}, {"confidence": 0.20654, "count": 1, "id": 4, "objective_summary": {"categories": [["Iris-versicolor", 1]]}, "output": "Iris-versicolor", "predicate": {"field": "000000", "operator": "<=", "value": 5.95}}], "confidence": 0.88664, "count": 46, "id": 2, "objective_summary": {"categories": [["Iris-virginica", 45], ["Iris-versicolor", 1]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 1.75}}, {"children": [{"children": [{"confidence": 0.5101, "count": 4, "id": 7, "objective_summary": {"categories": [["Iris-versicolor", 4]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": ">", "value": 1.65}}, {"confidence": 0.37553, "count": 5, "id": 8, "objective_summary": {"categories": [["Iris-virginica", 4], ["Iris-versicolor", 1]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": "<=", "value": 1.65}}], "confidence": 0.26665, "count": 9, "id": 6, "objective_summary": {"categories": [["Iris-versicolor", 5], ["Iris-virginica", 4]]}, "output": "Iris-versicolor", "predicate": {"field": "100004", "operator": ">", "value": 4.95}}, {"confidence": 0.91433, "count": 41, "id": 9, "objective_summary": {"categories": [["Iris-versicolor", 41]]}, "output": "Iris-versicolor", "predicate": {"field": "100004", "operator": "<=", "value": 4.95}}], "confidence": 0.81161, "count": 50, "id": 5, "objective_summary": {"categories": [["Iris-versicolor", 46], ["Iris-virginica", 4]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": "<=", "value": 1.75}}], "confidence": 0.41196, "count": 96, "id": 1, "objective_summary": {"categories": [["Iris-virginica", 49], ["Iris-versicolor", 47]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 0.8}}, {"confidence": 0.93358, "count": 54, "id": 10, "objective_summary": {"categories": [["Iris-setosa", 54]]}, "output": "Iris-setosa", "predicate": {"field": "000003", "operator": "<=", "value": 0.8}}], "confidence": 0.28756, "count": 150, "id": 0, "objective_summary": {"categories": [["Iris-setosa", 54], ["Iris-virginica", 49], ["Iris-versicolor", 47]]}, "output": "Iris-setosa", "predicate": true}}, "name": "iris [extended] - 4", "name_options": "10-node, pruned, deterministic order", "node_threshold": 10, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "100005", "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["100005"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": "project/5f5670e85299633dc000fbd8", "randomize": false, "range": null, "replacement": false, "resource": "model/5f580eb3440ca135f6022146", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4550, "source": "source/5f5671b1946b3047cc009818", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 0, "message": "The model has been created", "progress": 1}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2020-09-08T23:08:31.730000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/item.py b/bigml/item.py new file mode 100644 index 00000000..3314507a --- /dev/null +++ b/bigml/item.py @@ -0,0 +1,187 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Item object for the Association resource. + + This module defines each item in an Association resource. +""" + +from bigml.associationrule import SUPPORTED_LANGUAGES +from bigml.predicate_utils.utils import term_matches, item_matches + +class Item(): + """ Object encapsulating an Association resource item as described in + https://bigml.com/developers/associations + + """ + + def __init__(self, index, item_info, fields): + self.index = index + self.complement = item_info.get('complement', False) + self.complement_index = item_info.get('complement_index') + self.count = item_info.get('count') + self.description = item_info.get('description') + self.field_id = item_info.get('field_id') + self.field_info = item_info.get('field_info', fields[self.field_id]) + self.name = item_info.get('name') + self.bin_end = item_info.get('bin_end') + self.bin_start = item_info.get('bin_start') + + def out_format(self, language="JSON"): + """Transforming the item structure to a string in the required format + + """ + if language in SUPPORTED_LANGUAGES: + return getattr(self, "to_%s" % language)() + return self + + def to_csv(self): + """Transforming the item to CSV formats + + """ + output = [self.complement, self.complement_index, self.count, + self.description, self.field_info['name'], self.name, + self.bin_end, self.bin_start] + return output + + def to_json(self): + """Transforming the item relevant information to JSON + + """ + item_dict = {} + item_dict.update(self.__dict__) + del item_dict["field_info"] + del item_dict["complement_index"] + del item_dict["index"] + return item_dict + + def to_lisp_rule(self): + """Returns the LISP flatline expression to filter this item + + """ + flatline = "" + if self.name is None: + return "(missing? (f %s))" % self.field_id + field_type = self.field_info['optype'] + if field_type == "numeric": + start = self.bin_end if self.complement else \ + self.bin_start + end = self.bin_start if self.complement else \ + self.bin_end + if start is not None and end is not None: + if start < end: + flatline = "(and (< %s (f %s)) (<= (f %s) %s))" % \ + (start, self.field_id, self.field_id, end) + else: + flatline = "(or (> (f %s) %s) (<= (f %s) %s))" % \ + (self.field_id, start, self.field_id, end) + elif start is not None: + flatline = "(> (f %s) %s)" % (self.field_id, start) + else: + flatline = "(<= (f %s) %s)" % (self.field_id, end) + elif field_type == "categorical": + operator = "!=" if self.complement else "=" + flatline = "(%s (f %s) %s)" % ( + operator, self.field_id, self.name) + elif field_type == "text": + operator = "=" if self.complement else ">" + options = self.field_info['term_analysis'] + case_insensitive = not options.get('case_sensitive', False) + case_insensitive = 'true' if case_insensitive else 'false' + language = options.get('language') + language = "" if language is None else " %s" % language + flatline = "(%s (occurrences (f %s) %s %s%s) 0)" % ( + operator, self.field_id, self.name, + case_insensitive, language) + elif field_type == 'items': + operator = "!" if self.complement else "" + flatline = "(%s (contains-items? %s %s))" % ( + operator, self.field_id, self.name) + return flatline + + def describe(self): + """Human-readable description of a item_dict + + """ + description = "" + if self.name is None: + return "%s is %smissing" % ( + self.field_info['name'], "not " if self.complement else "") + field_name = self.field_info['name'] + field_type = self.field_info['optype'] + if field_type == "numeric": + start = self.bin_end if self.complement else \ + self.bin_start + end = self.bin_start if self.complement else \ + self.bin_end + if start is not None and end is not None: + if start < end: + description = "%s < %s <= %s" % (start, + field_name, + end) + else: + description = "%s > %s or <= %s" % (field_name, + start, + end) + elif start is not None: + description = "%s > %s" % (field_name, start) + else: + description = "%s <= %s" % (field_name, end) + elif field_type == "categorical": + operator = "!=" if self.complement else "=" + description = "%s %s %s" % (field_name, operator, self.name) + elif field_type in ["text", "items"]: + operator = "excludes" if self.complement else "includes" + description = "%s %s %s" % (field_name, operator, self.name) + else: + description = self.name + return description + + def matches(self, value): + """ Checks whether the value is in a range for numeric fields or + matches a category for categorical fields. + + """ + field_type = self.field_info['optype'] + if value is None: + return self.name is None + if field_type == "numeric" and ( + self.bin_end is not None or self.bin_start is not None): + if self.bin_start is not None and self.bin_end is not None: + result = self.bin_start <= value <= self.bin_end + elif self.bin_end is not None: + result = value <= self.bin_end + else: + result = value >= self.bin_start + elif field_type == 'categorical': + result = self.name == value + elif field_type == 'text': + # for text fields, the item.name or the related term_forms should + # be in the considered value + all_forms = self.field_info['summary'].get('term_forms', {}) + term_forms = all_forms.get(self.name, []) + terms = [self.name] + terms.extend(term_forms) + options = self.field_info['term_analysis'] + result = term_matches(value, terms, options) > 0 + elif field_type == 'items': + # for item fields, the item.name should be in the considered value + # surrounded by separators or regexp + options = self.field_info['item_analysis'] + result = item_matches(value, self.name, options) > 0 + if self.complement: + result = not result + return result diff --git a/bigml/laminar/__init__.py b/bigml/laminar/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bigml/laminar/constants.py b/bigml/laminar/constants.py new file mode 100644 index 00000000..8009710c --- /dev/null +++ b/bigml/laminar/constants.py @@ -0,0 +1,47 @@ +NUMERIC = "numeric" +CATEGORICAL = "categorical" + +TEST_MODEL = "test" +SINGLE_MODEL = "single" +MODEL_SEARCH = "search" +SHUTDOWN = "shutdown" + +DEFAULT_PORT = 8042 +DEFAULT_MAX_JOBS = 4 + +ERROR = "error" +QUEUED = "queued" +STARTED = "started" +IN_PROGRESS = "in-progress" +FINISHED = "finished" + +# This can be any x where np.exp(x) + 1 == np.exp(x) Going up to 512 +# isn't strictly necessary, but hey, why not? +LARGE_EXP = 512 + +EPSILON = 1e-4 + +# Parameters that can appear in the layers of models +MATRIX_PARAMS = [ + 'weights' +] + +VEC_PARAMS = [ + 'mean', + 'variance', + 'offset', + 'scale', + 'stdev' +] + +# Model search parameters +VALIDATION_FRAC = 0.15 +MAX_VALIDATION_ROWS = 4096 +LEARN_INCREMENT = 8 +MAX_QUEUE = LEARN_INCREMENT * 4 +N_CANDIDATES = MAX_QUEUE * 64 + +# Activation constants +ALPHA = 1.6732632423543772848170429916717 +LAMBDA = 1.0507009873554804934193349852946 +LEAKY_RELU_CONST = 0.1 diff --git a/bigml/laminar/numpy_ops.py b/bigml/laminar/numpy_ops.py new file mode 100644 index 00000000..85c21ea4 --- /dev/null +++ b/bigml/laminar/numpy_ops.py @@ -0,0 +1,192 @@ +# -*- coding: utf-8 -*- +#pylint: disable=invalid-name,missing-function-docstring +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Activation functions and helpers in numpy + +Here are most of the vector operations and helper functions we use in +numpy. +""" + +import numpy as np + +#pylint: disable=locally-disabled,no-name-in-module +from scipy.special import expit + +from bigml.laminar.constants import LARGE_EXP, MATRIX_PARAMS, \ + VEC_PARAMS, ALPHA, LAMBDA, LEAKY_RELU_CONST + + +def to_numpy_array(xs): + if isinstance(xs, np.ndarray): + return np.copy(xs) + return np.array(xs, dtype=np.float32) + + +def softplus(xs): + x_cpy = to_numpy_array(xs) + x_cpy[x_cpy < LARGE_EXP] = np.log(np.exp(x_cpy[x_cpy < LARGE_EXP]) + 1) + return x_cpy + + +def relu(xs): + x_cpy = to_numpy_array(xs) + return x_cpy * (x_cpy > 0) + + +def softmax(xs): + x_cpy = to_numpy_array(xs) + + shape0 = 1 + if len(x_cpy.shape) > 1: + shape0 = x_cpy.shape[0] + + x_cpy = x_cpy.reshape(shape0, -1) + + maxes = np.amax(x_cpy, axis=1) + maxes = maxes.reshape(maxes.shape[0], 1) + + exps = np.exp(x_cpy - maxes) + dist = exps / np.sum(exps, axis=1).reshape((-1, 1)) + + return dist + + +def selu(xs): + x_cpy = to_numpy_array(xs) + + return np.where(x_cpy > 0, + LAMBDA * x_cpy, + LAMBDA * ALPHA * (np.exp(x_cpy) - 1)) + +def leaky_relu(xs): + x_cpy = to_numpy_array(xs) + + return np.maximum(x_cpy, x_cpy * LEAKY_RELU_CONST) + + +ACTIVATORS = { + 'tanh': np.tanh, + 'sigmoid': expit, + 'softplus': softplus, + 'relu': relu, + 'softmax': softmax, + 'identity': lambda x: x, + 'linear': lambda x: x, + 'swish': lambda x: x * expit(x), + 'mish': lambda x: np.tanh(softplus(x)), + 'relu6': lambda x: np.clip(relu(x), 0, 6), + 'leaky_relu': leaky_relu, + 'selu': selu} + + +def plus(mat, vec): + return mat + vec + + +def dot(mat1, mat2): + output = [] + for row1 in mat1: + new_row = [] + for row2 in mat2: + new_row.append(np.dot(row1, row2).tolist()) + output.append(new_row) + return output + +def batch_norm(X, mean, stdev, shift, scale): + return scale * (X - mean) / stdev + shift + + +def init_layer(layer, ftype=np.float64): + out_layer = {} + for key in layer: + if layer[key] is not None: + if key in MATRIX_PARAMS: + out_layer[key] = np.array(layer[key], dtype=ftype) + elif key in VEC_PARAMS: + out_layer[key] = np.array(layer[key], dtype=ftype) + else: + out_layer[key] = layer[key] + else: + out_layer[key] = layer[key] + + return out_layer + + +def init_layers(layers): + return [init_layer(layer) for layer in layers] + + +def destandardize(vec, v_mean, v_stdev): + return vec * v_stdev + v_mean + + +def to_width(mat, width): + if width > len(mat[0]): + ntiles = int(np.ceil(width / float(len(mat[0])))) + else: + ntiles = 1 + + return np.tile(mat, (1, ntiles))[:, :width] + + +def add_residuals(residuals, values): + to_add = to_width(values, len(residuals[0])) + return to_add + residuals + + +def sum_and_normalize(youts, is_regression): + ysums = sum(youts) + + if is_regression: + return ysums / len(youts) + return ysums / np.sum(ysums, axis=1).reshape(-1, 1) + + +def propagate(x_in, layers): + last_X = identities = to_numpy_array(x_in) + + if any(layer["residuals"] for layer in layers): + first_identities = not any(layer["residuals"] for layer in layers[:2]) + else: + first_identities = False + + for i, layer in enumerate(layers): + w = layer['weights'] + m = layer['mean'] + s = layer['stdev'] + b = layer['offset'] + g = layer['scale'] + + afn = layer['activation_function'] + + X_dot_w = dot(last_X, w) + if m is not None and s is not None: + next_in = batch_norm(X_dot_w, m, s, b, g) + else: + next_in = plus(X_dot_w, b) + + if layer['residuals']: + next_in = add_residuals(next_in, identities) + last_X = ACTIVATORS[afn](next_in) + identities = last_X + else: + last_X = ACTIVATORS[afn](next_in) + + if first_identities and i == 0: + identities = last_X + + return last_X diff --git a/bigml/laminar/preprocess_np.py b/bigml/laminar/preprocess_np.py new file mode 100644 index 00000000..95e64899 --- /dev/null +++ b/bigml/laminar/preprocess_np.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- +#pylint: disable=invalid-name,missing-function-docstring +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Pre-processing fields for deepent computations """ + +import math +import numpy as np + +from bigml.laminar.constants import NUMERIC, CATEGORICAL + + +MODE_CONCENTRATION = 0.1 +MODE_STRENGTH = 3 + +MEAN = "mean" +STANDARD_DEVIATION = "stdev" + +ZERO = "zero_value" +ONE = "one_value" + +def index(alist, value): + try: + return alist.index(value) + except ValueError: + return None + +def one_hot(vector, possible_values): + idxs = list(enumerate(index(possible_values, v) for v in vector)) + valid_pairs = [x for x in idxs if x[1] is not None] + outvec = np.zeros((len(idxs), len(possible_values)), dtype=np.float32) + for v in valid_pairs: + outvec[v[0], v[1]] = 1 + return outvec + +def standardize(vector, mn, stdev): + newvec = vector - mn + + if stdev > 0: + newvec = newvec / stdev + + fill_dft = lambda x: 0.0 if math.isnan(x) else x + newvec = np.vectorize(fill_dft)(newvec) + return newvec + +#pylint: disable=locally-disabled,unused-argument +def binarize(vector, zero, one): + if one == 0.0: + vector[vector == one] = 1.0 + vector[(vector != one) & (vector != 1.0)] = 0.0 + else: + vector[vector != one] = 0.0 + vector[vector == one] = 1.0 + + return vector + +def moments(amap): + return amap[MEAN], amap[STANDARD_DEVIATION] + +def bounds(amap): + return amap[ZERO], amap[ONE] + +def transform(vector, spec): + vtype = spec['type'] + + if vtype == NUMERIC: + if STANDARD_DEVIATION in spec: + mn, stdev = moments(spec) + output = standardize(vector, mn, stdev) + elif ZERO in spec: + low, high = bounds(spec) + output = binarize(vector, low, high) + else: + raise ValueError("'%s' is not a valid numeric spec!" % str(spec)) + elif vtype == CATEGORICAL: + output = one_hot(vector, spec['values']) + else: + raise ValueError("'%s' is not a valid spec type!" % vtype) + return output + + +def tree_predict(tree, point): + node = tree[:] + + while node[-1] is not None: + if point[node[0]] <= node[1]: + node = node[2] + else: + node = node[3] + + return node[0] + + +def get_embedding(X, model): + if isinstance(model, list): + preds = None + for tree in model: + tree_preds = [] + for row in X: + tree_preds.append(tree_predict(tree, row)) + + if preds is None: + preds = np.array(tree_preds, dtype='float64') + else: + preds += np.array(tree_preds, dtype='float64') + + if len(preds[0]) > 1: + preds /= preds.sum(axis=1, keepdims=True) + else: + preds /= len(model) + + return preds + raise ValueError("Model is unknown type!") + + +def tree_transform(X, trees): + outdata = None + + for feature_range, model in trees: + sidx, eidx = feature_range + inputs = X[:, sidx:eidx] + outarray = get_embedding(inputs, model) + if outdata is not None: + outdata = np.c_[outdata, outarray] + else: + outdata = outarray + return np.c_[outdata, X] + + +def preprocess(columns, specs): + outdata = None + + for spec in specs: + column = columns[spec['index']] + + if spec['type'] == NUMERIC: + column = np.asarray(column, dtype=np.float32) + + outarray = transform(column, spec) + if outdata is not None: + outdata = np.c_[outdata, outarray] + else: + outdata = outarray + + return outdata diff --git a/bigml/linear.py b/bigml/linear.py new file mode 100644 index 00000000..c6e00407 --- /dev/null +++ b/bigml/linear.py @@ -0,0 +1,425 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A local Predictive Linear Regression. + +This module defines a Linear Regression to make predictions locally or +embedded into your application without needing to send requests to +BigML.io. + +This module can help you enormously to +reduce the latency for each prediction and let you use your linear +regressions offline. + +Example usage (assuming that you have previously set up the BIGML_USERNAME +and BIGML_API_KEY environment variables and that you own the +linearregression/id below): + +from bigml.api import BigML +from bigml.linear import LinearRegression + +api = BigML() + +linear_regression = LinearRegression( + 'linearregression/5026965515526876630001b2') +linear_regression.predict({"petal length": 3, "petal width": 1, + "sepal length": 1}) + +""" +import logging +import math + +try: + import numpy as np + from scipy.stats import t as student_t + STATS = True +except ImportError: + STATS = False + + +from bigml.api import FINISHED +from bigml.api import get_status, get_api_connection, get_linear_regression_id +from bigml.util import cast, check_no_training_missings, flatten, \ + use_cache, load, dump, dumps, get_data_transformations, NUMERIC +from bigml.basemodel import get_resource_dict, extract_objective +from bigml.modelfields import ModelFields +from bigml.constants import DECIMALS + +try: + from bigml.laminar.numpy_ops import dot +except ImportError: + from bigml.laminar.math_ops import dot + + +LOGGER = logging.getLogger('BigML') + +EXPANSION_ATTRIBUTES = {"categorical": "categories", "text": "tag_clouds", + "items": "items"} + +CATEGORICAL = "categorical" +CONFIDENCE = 0.95 + +DUMMY = "dummy" +CONTRAST = "contrast" +OTHER = "other" + +def get_terms_array(terms, unique_terms, field_id): + """ Returns an array that represents the frequency of terms as ordered + in the reference `terms` parameter. + + """ + input_terms = unique_terms.get(field_id, []) + terms_array = [0] * len(terms) + try: + for term, frequency in input_terms: + index = terms.index(term) + terms_array[index] = frequency + except ValueError: + pass + return terms_array + + +class LinearRegression(ModelFields): + """ A lightweight wrapper around a linear regression model. + + Uses a BigML remote linear regression model to build a local version + that can be used to generate predictions locally. + + """ + + def __init__(self, linear_regression, api=None, cache_get=None): + + self.xtx_inverse = [] + if use_cache(cache_get): + # using a cache to store the model attributes + self.__dict__ = load(get_linear_regression_id(linear_regression), + cache_get) + for index, elem in enumerate(self.xtx_inverse): + self.xtx_inverse[index] = np.array(elem) + return + + self.resource_id = None + self.name = None + self.description = None + self.parent_id = None + self.input_fields = [] + self.term_forms = {} + self.tag_clouds = {} + self.term_analysis = {} + self.items = {} + self.item_analysis = {} + self.categories = {} + self.coefficients = [] + self.data_field_types = {} + self.field_codings = {} + self.bias = None + self.xtx_inverse = [] + self.mean_squared_error = None + self.number_of_parameters = None + self.number_of_samples = None + self.default_numeric_value = None + api = get_api_connection(api) + self.resource_id, linear_regression = get_resource_dict( \ + linear_regression, "linearregression", api=api) + + if 'object' in linear_regression and \ + isinstance(linear_regression['object'], dict): + linear_regression = linear_regression['object'] + self.parent_id = linear_regression.get('dataset') + self.name = linear_regression.get('name') + self.description = linear_regression.get('description') + try: + self.input_fields = linear_regression.get("input_fields", []) + self.default_numeric_value = linear_regression.get( \ + "default_numeric_value") + self.dataset_field_types = linear_regression.get( + "dataset_field_types", {}) + self.weight_field = linear_regression.get("weight_field") + objective_field = linear_regression['objective_fields'] if \ + linear_regression['objective_fields'] else \ + linear_regression['objective_field'] + except KeyError: + raise ValueError("Failed to find the linear regression expected " + "JSON structure. Check your arguments.") + if 'linear_regression' in linear_regression and \ + isinstance(linear_regression['linear_regression'], dict): + status = get_status(linear_regression) + if 'code' in status and status['code'] == FINISHED: + linear_regression_info = linear_regression[ \ + 'linear_regression'] + fields = linear_regression_info.get('fields', {}) + + if not self.input_fields: + self.input_fields = [ \ + field_id for field_id, _ in + sorted(list(fields.items()), + key=lambda x: x[1].get("column_number"))] + self.coeff_ids = self.input_fields[:] + self.coefficients = linear_regression_info.get( \ + 'coefficients', []) + self.bias = linear_regression_info.get('bias', True) + self.field_codings = linear_regression_info.get( \ + 'field_codings', {}) + self.number_of_parameters = linear_regression_info.get( \ + "number_of_parameters") + missing_tokens = linear_regression_info.get("missing_tokens") + + objective_id = extract_objective(objective_field) + ModelFields.__init__( + self, fields, + objective_id=objective_id, categories=True, + numerics=True, missing_tokens=missing_tokens) + self.field_codings = linear_regression_info.get( \ + 'field_codings', {}) + self.format_field_codings() + for field_id in self.field_codings: + if field_id not in fields and \ + field_id in self.inverted_fields: + self.field_codings.update( \ + {self.inverted_fields[field_id]: \ + self.field_codings[field_id]}) + del self.field_codings[field_id] + stats = linear_regression_info["stats"] + if STATS and stats is not None and \ + stats.get("xtx_inverse") is not None: + self.xtx_inverse = stats["xtx_inverse"][:] + self.mean_squared_error = stats["mean_squared_error"] + self.number_of_samples = stats["number_of_samples"] + # to be used in predictions + self.t_crit = student_t.interval( \ + CONFIDENCE, + self.number_of_samples - self.number_of_parameters)[1] + self.xtx_inverse = list( \ + np.linalg.inv(np.array(self.xtx_inverse))) + + else: + raise Exception("The linear regression isn't finished yet") + else: + raise Exception("Cannot create the LinearRegression instance." + " Could not find the 'linear_regression' key" + " in the resource:\n\n%s" % + linear_regression) + + def expand_input(self, input_data, unique_terms, compact=False): + """ Creates an input array with the values in input_data and + unique_terms and the following rules: + - fields are ordered as input_fields + - numeric fields contain the value or 0 if missing + - categorial fields are one-hot encoded and classes are sorted as + they appear in the field summary. If missing_count > 0 a last + missing element is added set to 1 if the field is missing and 0 + otherwise + - text and items fields are expanded into their elements as found + in the corresponding summmary information and their values treated + as numerics. + """ + input_array = [] + for field_id in self.coeff_ids: + field = self.fields[field_id] + optype = field["optype"] + missing = False + new_inputs = [] + if optype == NUMERIC: + if field_id in input_data: + value = input_data.get(field_id, 0) + else: + missing = True + value = 0 + new_inputs = [value] + else: + terms = getattr(self, EXPANSION_ATTRIBUTES[optype])[field_id] + length = len(terms) + if field_id in unique_terms: + new_inputs = get_terms_array( \ + terms, unique_terms, field_id) + else: + new_inputs = [0] * length + missing = True + + if field["summary"]["missing_count"] > 0 or \ + (optype == CATEGORICAL and \ + self.field_codings[field_id].get(DUMMY) is None): + new_inputs.append(int(missing)) + + if optype == CATEGORICAL: + new_inputs = self.categorical_encoding( \ + new_inputs, field_id, compact) + + input_array.extend(new_inputs) + + if self.bias or not compact: + input_array.append(1) + + return input_array + + def categorical_encoding(self, inputs, field_id, compact): + """Returns the result of combining the encoded categories + according to the field_codings projections + + The result is the components generated by the categorical field + """ + + new_inputs = inputs[:] + + projections = self.field_codings[field_id].get( \ + CONTRAST, self.field_codings[field_id].get(OTHER)) + if projections is not None: + new_inputs = flatten(dot(projections, [new_inputs])) + + if compact and self.field_codings[field_id].get(DUMMY) is not None: + dummy_class = self.field_codings[field_id][DUMMY] + index = self.categories[field_id].index(dummy_class) + cat_new_inputs = new_inputs[0: index] + if len(new_inputs) > (index + 1): + cat_new_inputs.extend(new_inputs[index + 1 :]) + new_inputs = cat_new_inputs + + return new_inputs + + def predict(self, input_data, full=False): + """Returns the prediction and the confidence intervals + + input_data: Input data to be predicted + full: Boolean that controls whether to include the prediction's + attributes. By default, only the prediction is produced. If set + to True, the rest of available information is added in a + dictionary format. The dictionary keys can be: + - prediction: the prediction value + - unused_fields: list of fields in the input data that + are not being used in the model + + """ + + # Checks and cleans input_data leaving the fields used in the model + unused_fields = [] + norm_input_data = self.filter_input_data( \ + input_data, + add_unused_fields=full) + if full: + norm_input_data, unused_fields = norm_input_data + + # Strips affixes for numeric values and casts to the final field type + cast(norm_input_data, self.fields) + + # In case that the training data has no missings, input data shouldn't + check_no_training_missings(norm_input_data, self.model_fields, + self.weight_field, + self.objective_id) + + # Computes text and categorical field expansion + unique_terms = self.get_unique_terms(norm_input_data) + + # Creates an input vector with the values for all expanded fields. + input_array = self.expand_input(norm_input_data, unique_terms) + compact_input_array = self.expand_input(norm_input_data, unique_terms, + True) + + prediction = dot([flatten(self.coefficients)], [input_array])[0][0] + + result = { + "prediction": round(prediction, DECIMALS)} + if self.xtx_inverse: + result.update({"confidence_bounds": self.confidence_bounds( \ + compact_input_array)}) + + if full: + result.update({"unused_fields": unused_fields}) + else: + result = result["prediction"] + + return result + + + def predict_probability(self, input_data, compact=False): + """Method to homogeinize predictions in fusions and composites + + """ + + prediction = self.predict(input_data, full=not compact) + + if compact: + output = [prediction] + else: + output = prediction + + return output + + + def confidence_bounds(self, input_array): + """Computes the confidence interval for the prediction + + """ + product = dot(dot([input_array], self.xtx_inverse), + [input_array])[0][0] + valid = True + try: + confidence_interval = self.t_crit * math.sqrt( \ + self.mean_squared_error * product) + prediction_interval = self.t_crit * math.sqrt( \ + self.mean_squared_error * (product + 1)) + valid = True + except ValueError: + valid = False + confidence_interval, prediction_interval = (0, 0) + + return {"confidence_interval": confidence_interval, + "prediction_interval": prediction_interval, + "valid": valid} + + def format_field_codings(self): + """ Changes the field codings format to the dict notation + + """ + if isinstance(self.field_codings, list): + self.field_codings_list = self.field_codings[:] + field_codings = self.field_codings[:] + self.field_codings = {} + for element in field_codings: + field_id = element['field'] + if element["coding"] == DUMMY: + self.field_codings[field_id] = {\ + element["coding"]: element['dummy_class']} + else: + self.field_codings[field_id] = {\ + element["coding"]: element['coefficients']} + + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + Avoiding to set it in a Mixin to maintain the current dump function. + """ + return get_data_transformations(self.resource_id, self.parent_id) + + def dump(self, output=None, cache_set=None): + """Uses msgpack to serialize the resource object + If cache_set is filled with a cache set method, the method is called + + """ + self_vars = vars(self) + xtx = self_vars["xtx_inverse"] + for index, elem in enumerate(xtx): + self_vars["xtx_inverse"][index] = list(elem) + dump(self_vars, output=output, cache_set=cache_set) + + def dumps(self): + """Uses msgpack to serialize the resource object to a string + + """ + self_vars = vars(self) + xtx = self_vars["xtx_inverse"] + for index, elem in enumerate(xtx): + self_vars["xtx_inverse"][index] = list(elem) + dumps(self_vars) diff --git a/bigml/local_model.py b/bigml/local_model.py new file mode 100644 index 00000000..c8ed68c9 --- /dev/null +++ b/bigml/local_model.py @@ -0,0 +1,237 @@ +# -*- coding: utf-8 -*- +# pylint: disable=super-init-not-called +# +# Copyright 2023-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A local Predictive model class abstracting all kind of models + +This module abstracts any BigML model to make predictions locally or +embedded into your application without needing to send requests to +BigML.io. + +This module cannot only save you a few credits, but also enormously +reduce the latency for each prediction and let you use your supervised models +offline. + +Example usage (assuming that you have previously set up the BIGML_USERNAME +and BIGML_API_KEY environment variables and that you own the +logisticregression/id below): + +from bigml.api import BigML +from bigml.local_model import LocalModel + +api = BigML() + +model = LocalModel( + 'logisticregression/5026965515526876630001b2') +model.predict({"petal length": 3, "petal width": 1, + "sepal length": 1, "sepal width": 0.5}) + +""" + +import json +import os + + +from bigml.api import get_resource_id, get_resource_type, \ + get_api_connection, get_ensemble_id +from bigml.basemodel import BaseModel +from bigml.model import Model +from bigml.ensemble import Ensemble +from bigml.logistic import LogisticRegression +from bigml.deepnet import Deepnet +from bigml.linear import LinearRegression +from bigml.fusion import Fusion +from bigml.cluster import Cluster +from bigml.anomaly import Anomaly +from bigml.association import Association +from bigml.timeseries import TimeSeries +try: + from bigml.topicmodel import TopicModel + TOPIC_ENABLED = True +except ImportError: + TOPIC_ENABLED = False +from bigml.pca import PCA +from bigml.constants import OUT_NEW_FIELDS, OUT_NEW_HEADERS, INTERNAL +from bigml.util import get_data_format, get_formatted_data, format_data + + +SUPERVISED_CLASSES = { + "model": Model, + "ensemble": Ensemble, + "logisticregression": LogisticRegression, + "deepnet": Deepnet, + "linearregression": LinearRegression, + "fusion": Fusion} + + +DFT_OUTPUTS = ["prediction", "probability"] + + +MODEL_CLASSES = { + "cluster": Cluster, + "anomaly": Anomaly, + "association": Association, + "pca": PCA, + "timeseries": TimeSeries} +MODEL_CLASSES.update(SUPERVISED_CLASSES) +if TOPIC_ENABLED: + MODEL_CLASSES.update({"topicmodel": TopicModel}) + + +def extract_id(model, api): + """Extract the resource id from: + - a resource ID string + - a list of resources (ensemble + models) + - a resource structure + - the name of the file that contains a resource structure + + """ + # the string can be a path to a JSON file + if isinstance(model, str): + try: + path = os.path.dirname(os.path.abspath(model)) + with open(model) as model_file: + model = json.load(model_file) + resource_id = get_resource_id(model) + if resource_id is None: + raise ValueError("The JSON file does not seem" + " to contain a valid BigML resource" + " representation.") + api.storage = path + except IOError: + # if it is not a path, it can be a model id + resource_id = get_resource_id(model) + if resource_id is None: + for resource_type in MODEL_CLASSES.keys(): + if model.find("%s/" % resource_type) > -1: + raise Exception( + api.error_message(model, + resource_type=resource_type, + method="get")) + raise IOError("Failed to open the expected JSON file" + " at %s." % model) + except ValueError: + raise ValueError("Failed to interpret %s." + " JSON file expected.") + if isinstance(model, list): + resource_id = get_ensemble_id(model[0]) + if resource_id is None: + raise ValueError("The first argument does not contain a valid" + " BigML model structure.") + else: + resource_id = get_resource_id(model) + if resource_id is None: + raise ValueError("The first argument does not contain a valid" + " BigML model structure.") + return resource_id, model + + +class LocalModel(BaseModel): + """ A lightweight wrapper around any BigML model. + + Uses any BigML remote model to build a local version + that can be used to generate predictions locally. + + """ + + def __init__(self, model, api=None, cache_get=None, + operation_settings=None): + + self.api = get_api_connection(api) + resource_id, model = extract_id(model, self.api) + resource_type = get_resource_type(resource_id) + if resource_type == "topicmodel" and not TOPIC_ENABLED: + raise ValueError("Failed to import the TopicModel class. " + "Please, check the bindings extra options to install" + " the class.") + kwargs = {"api": self.api, "cache_get": cache_get} + if resource_type in SUPERVISED_CLASSES.keys() and \ + resource_type != "linearregression": + kwargs.update({"operation_settings": operation_settings}) + local_model = MODEL_CLASSES[resource_type](model, **kwargs) + self.__class__.__bases__ = local_model.__class__.__bases__ + for attr, value in list(local_model.__dict__.items()): + setattr(self, attr, value) + self.local_model = local_model + self.supervised = resource_type in SUPERVISED_CLASSES.keys() + self.name = self.local_model.name + self.description = self.local_model.description + + def predict(self, *args, **kwargs): + """Delegating method to local model object""" + return self.local_model.predict(*args, **kwargs) + + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + """ + return self.local_model.data_transformations() + + def batch_predict(self, input_data_list, outputs=None, **kwargs): + """Creates a batch prediction for a list of inputs using the local + BigML model. Allows to define some output settings to + decide the fields to be added to the input_data (prediction, + probability, etc.) and the name that we want to assign to these new + fields. The outputs argument accepts a dictionary with keys + "output_fields", to contain a list of the prediction properties to add + (["prediction", "probability"] by default) and "output_headers", to + contain a list of the headers to be used when adding them (identical + to "output_fields" list, by default). + + :param input_data_list: List of input data to be predicted + :type input_data_list: list or Panda's dataframe + :param dict outputs: properties that define the headers and fields to + be added to the input data + :return: the list of input data plus the predicted values + :rtype: list or Panda's dataframe depending on the input type in + input_data_list + """ + if isinstance(self.local_model, (Association, TimeSeries)): + raise ValueError("The method is not available for Associations or " + "TimeSeries.") + if self.supervised: + if outputs is None: + outputs = {} + new_fields = outputs.get(OUT_NEW_FIELDS, DFT_OUTPUTS) + new_headers = outputs.get(OUT_NEW_HEADERS, new_fields) + if len(new_fields) > len(new_headers): + new_headers.expand(new_fields[len(new_headers):]) + else: + new_headers = new_headers[0: len(new_fields)] + data_format = get_data_format(input_data_list) + inner_data_list = get_formatted_data(input_data_list, INTERNAL) + kwargs.update({"full": True}) + for input_data in inner_data_list: + prediction = self.predict(input_data, **kwargs) + for index, key in enumerate(new_fields): + try: + input_data[new_headers[index]] = prediction[key] + except KeyError: + pass + if data_format != INTERNAL: + return format_data(inner_data_list, out_format=data_format) + return inner_data_list + return self.local_model.batch_predict(input_data_list, + outputs=outputs, **kwargs) + + #pylint: disable=locally-disabled,arguments-differ + def dump(self, **kwargs): + """Delegate to local model""" + self.local_model.dump(**kwargs) + + def dumps(self): + """Delegate to local model""" + return self.local_model.dumps() diff --git a/bigml/logistic.py b/bigml/logistic.py new file mode 100644 index 00000000..67199512 --- /dev/null +++ b/bigml/logistic.py @@ -0,0 +1,631 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A local Predictive Logistic Regression. + +This module defines a Logistic Regression to make predictions locally or +embedded into your application without needing to send requests to +BigML.io. + +This module can help you enormously to +reduce the latency for each prediction and let you use your logistic +regressions offline. + +Example usage (assuming that you have previously set up the BIGML_USERNAME +and BIGML_API_KEY environment variables and that you own the +logisticregression/id below): + +from bigml.api import BigML +from bigml.logistic import LogisticRegression + +api = BigML() + +logistic_regression = LogisticRegression( + 'logisticregression/5026965515526876630001b2') +logistic_regression.predict({"petal length": 3, "petal width": 1, + "sepal length": 1, "sepal width": 0.5}) + +""" +import logging +import math +import copy + +from functools import cmp_to_key + +from bigml.api import FINISHED +from bigml.api import get_status, get_api_connection, \ + get_logistic_regression_id +from bigml.util import cast, check_no_missing_numerics, use_cache, load, \ + get_data_transformations, PRECISION, NUMERIC +from bigml.basemodel import get_resource_dict, extract_objective +from bigml.model import parse_operating_point, sort_categories +from bigml.modelfields import ModelFields + +LOGGER = logging.getLogger('BigML') + +EXPANSION_ATTRIBUTES = {"categorical": "categories", "text": "tag_clouds", + "items": "items"} + + +def balance_input(input_data, fields): + """Balancing the values in the input_data using the corresponding + field scales + + """ + + for field in input_data: + if fields[field]['optype'] == NUMERIC: + mean = fields[field]['summary'].get('mean', 0) + stddev = fields[field]['summary'].get( \ + 'standard_deviation', 0) + if mean is None: + mean = 0 + if stddev is None: + stddev = 0 + # if stddev is not positive, we only substract the mean + input_data[field] = input_data[field] - mean if \ + stddev <= 0 else (input_data[field] - mean) / stddev + + +class LogisticRegression(ModelFields): + """ A lightweight wrapper around a logistic regression model. + + Uses a BigML remote logistic regression model to build a local version + that can be used to generate predictions locally. + + + """ + + #pylint: disable=locally-disabled,invalid-name + def __init__(self, logistic_regression, api=None, cache_get=None, + operation_settings=None): + """ + :param logistic_regression: logistic_regression object or id, list of + ensemble model objects or ids or list of + ensemble obj and local model objects + (see Model) + :param api: connection object. If None, a new connection object is + instantiated. + :param max_models: integer that limits the number of models instantiated + and held in memory at the same time while predicting. + If None, no limit is set and all the ensemble models + are instantiated and held in memory permanently. + :param cache_get: user-provided function that should return the JSON + information describing the model or the corresponding + LogisticRegression object. Can be used to read these + objects from a cache storage. + :param operation_settings: Dict object that contains operating options + """ + + if use_cache(cache_get): + # using a cache to store the model attributes + self.__dict__ = load(get_logistic_regression_id( \ + logistic_regression), cache_get) + self.operation_settings = self._add_operation_settings( + operation_settings) + return + + self.resource_id = None + self.name = None + self.description = None + self.parent_id = None + self.class_names = None + self.input_fields = [] + self.term_forms = {} + self.tag_clouds = {} + self.term_analysis = {} + self.items = {} + self.item_analysis = {} + self.categories = {} + self.coefficients = {} + self.data_field_types = {} + self.field_codings = {} + self.numeric_fields = {} + self.default_numeric_value = None + self.bias = None + self.missing_numerics = None + self.c = None + self.eps = None + self.lr_normalize = None + self.balance_fields = None + self.regularization = None + self.flat_coefficients = None + api = get_api_connection(api) + + old_coefficients = False + + self.resource_id, logistic_regression = get_resource_dict( \ + logistic_regression, "logisticregression", api=api) + + if 'object' in logistic_regression and \ + isinstance(logistic_regression['object'], dict): + logistic_regression = logistic_regression['object'] + self.parent_id = logistic_regression.get('dataset') + self.name = logistic_regression.get("name") + self.description = logistic_regression.get("description") + try: + self.input_fields = logistic_regression.get("input_fields", []) + self.default_numeric_value = logistic_regression.get( + "default_numeric_value") + self.dataset_field_types = logistic_regression.get( + "dataset_field_types", {}) + self.weight_field = logistic_regression.get("weight_field") + objective_field = logistic_regression['objective_fields'] if \ + logistic_regression['objective_fields'] else \ + logistic_regression['objective_field'] + except KeyError: + raise ValueError("Failed to find the logistic regression expected " + "JSON structure. Check your arguments.") + if 'logistic_regression' in logistic_regression and \ + isinstance(logistic_regression['logistic_regression'], dict): + status = get_status(logistic_regression) + if 'code' in status and status['code'] == FINISHED: + logistic_regression_info = logistic_regression[ \ + 'logistic_regression'] + fields = logistic_regression_info.get('fields', {}) + + if not self.input_fields: + self.input_fields = [ \ + field_id for field_id, _ in + sorted(list(fields.items()), + key=lambda x: x[1].get("column_number"))] + self.coefficients.update(logistic_regression_info.get( \ + 'coefficients', [])) + if not isinstance(list(self.coefficients.values())[0][0], list): + old_coefficients = True + self.bias = logistic_regression_info.get('bias', True) + self.c = logistic_regression_info.get('c') + self.eps = logistic_regression_info.get('eps') + self.lr_normalize = logistic_regression_info.get('normalize') + self.balance_fields = logistic_regression_info.get( \ + 'balance_fields') + self.regularization = logistic_regression_info.get( \ + 'regularization') + self.field_codings = logistic_regression_info.get( \ + 'field_codings', {}) + # old models have no such attribute, so we set it to False in + # this case + self.missing_numerics = logistic_regression_info.get( \ + 'missing_numerics', False) + objective_id = extract_objective(objective_field) + missing_tokens = logistic_regression_info.get("missing_tokens") + ModelFields.__init__( + self, fields, + objective_id=objective_id, categories=True, + numerics=True, missing_tokens=missing_tokens, + operation_settings=operation_settings) + self.field_codings = logistic_regression_info.get( \ + 'field_codings', {}) + self.format_field_codings() + for field_id in self.field_codings: + if field_id not in self.fields and \ + field_id in self.inverted_fields: + self.field_codings.update( \ + {self.inverted_fields[field_id]: \ + self.field_codings[field_id]}) + del self.field_codings[field_id] + if old_coefficients: + self.map_coefficients() + categories = self.categories[self.objective_id] + if len(list(self.coefficients.keys())) > len(categories): + self.class_names = [""] + else: + self.class_names = [] + self.class_names.extend(sorted(categories)) + # order matters + self.objective_categories = categories + else: + raise Exception("The logistic regression isn't finished yet") + else: + raise Exception("Cannot create the LogisticRegression instance." + " Could not find the 'logistic_regression' key" + " in the resource:\n\n%s" % + logistic_regression) + + #pylint: disable=locally-disabled,invalid-name + def _sort_predictions(self, a, b, criteria): + """Sorts the categories in the predicted node according to the + given criteria + + """ + if a[criteria] == b[criteria]: + return sort_categories(a, b, self.objective_categories) + return 1 if b[criteria] > a[criteria] else - 1 + + def predict_probability(self, input_data, compact=False): + """Predicts a probability for each possible output class, + based on input values. The input fields must be a dictionary + keyed by field name or field ID. + + :param input_data: Input data to be predicted + :param compact: If False, prediction is returned as a list of maps, one + per class, with the keys "prediction" and "probability" + mapped to the name of the class and it's probability, + respectively. If True, returns a list of probabilities + ordered by the sorted order of the class names. + """ + distribution = self.predict(input_data, full=True)['distribution'] + distribution.sort(key=lambda x: x['category']) + + if compact: + return [category['probability'] for category in distribution] + return distribution + + def predict_confidence(self, input_data, compact=False): + """For logistic regressions we assume that probability can be used + as confidence. + """ + if compact: + return self.predict_probability(input_data, compact=compact) + return [{"category": pred["category"], + "confidence": pred["probability"]} + for pred in self.predict_probability(input_data, + compact=compact)] + + def predict_operating(self, input_data, + operating_point=None): + """Computes the prediction based on a user-given operating point. + + """ + + kind, threshold, positive_class = parse_operating_point( + operating_point, ["probability"], + self.class_names, self.operation_settings) + predictions = self.predict_probability(input_data, False) + position = self.class_names.index(positive_class) + if predictions[position][kind] > threshold: + prediction = predictions[position] + else: + # if the threshold is not met, the alternative class with + # highest probability or confidence is returned + predictions.sort( \ + key=cmp_to_key( \ + lambda a, b: self._sort_predictions(a, b, kind))) + prediction = predictions[0: 2] + if prediction[0]["category"] == positive_class: + prediction = prediction[1] + else: + prediction = prediction[0] + prediction["prediction"] = prediction["category"] + del prediction["category"] + prediction['confidence'] = prediction['probability'] + return prediction + + def predict_operating_kind(self, input_data, + operating_kind=None): + """Computes the prediction based on a user-given operating kind. + + """ + kind = operating_kind.lower() + if kind == "probability": + predictions = self.predict_probability(input_data, + False) + else: + raise ValueError("Only probability is allowed as operating kind" + " for logistic regressions.") + predictions.sort( \ + key=cmp_to_key( \ + lambda a, b: self._sort_predictions(a, b, kind))) + prediction = predictions[0] + prediction["prediction"] = prediction["category"] + del prediction["category"] + prediction['confidence'] = prediction['probability'] + return prediction + + #pylint: disable=locally-disabled,consider-using-dict-items + def predict(self, input_data, + operating_point=None, operating_kind=None, + full=False): + """Returns the class prediction and the probability distribution + + input_data: Input data to be predicted + operating_point: In classification models, this is the point of the + ROC curve where the model will be used at. The + operating point can be defined in terms of: + - the positive_class, the class that is important to + predict accurately + - the probability_threshold, + the probability that is stablished + as minimum for the positive_class to be predicted. + The operating_point is then defined as a map with + two attributes, e.g.: + {"positive_class": "Iris-setosa", + "probability_threshold": 0.5} + operating_kind: "probability". Sets the + property that decides the prediction. Used only if + no operating_point is used + full: Boolean that controls whether to include the prediction's + attributes. By default, only the prediction is produced. If set + to True, the rest of available information is added in a + dictionary format. The dictionary keys can be: + - prediction: the prediction value + - probability: prediction's probability + - distribution: distribution of probabilities for each + of the objective field classes + - unused_fields: list of fields in the input data that + are not being used in the model + + """ + + # Checks and cleans input_data leaving the fields used in the model + unused_fields = [] + norm_input_data = self.filter_input_data( \ + input_data, + add_unused_fields=full) + if full: + norm_input_data, unused_fields = norm_input_data + + # Strips affixes for numeric values and casts to the final field type + cast(norm_input_data, self.fields) + + # When operating_point is used, we need the probabilities + # of all possible classes to decide, so se use + # the `predict_probability` method + if operating_point is None and self.operation_settings is not None: + operating_point = self.operation_settings.get("operating_point") + if operating_kind is None and self.operation_settings is not None: + operating_kind = self.operation_settings.get("operating_kind") + + if operating_point: + return self.predict_operating( \ + norm_input_data, operating_point=operating_point) + if operating_kind: + return self.predict_operating_kind( \ + norm_input_data, operating_kind=operating_kind) + + # In case that missing_numerics is False, checks that all numeric + # fields are present in input data. + if not self.missing_numerics and self.default_numeric_value is None: + check_no_missing_numerics(norm_input_data, self.model_fields, + self.weight_field) + + if self.balance_fields: + balance_input(norm_input_data, self.fields) + + # Computes text and categorical field expansion + unique_terms = self.get_unique_terms(norm_input_data) + + probabilities = {} + total = 0 + # Computes the contributions for each category + for category in self.coefficients: + probability = self.category_probability( \ + norm_input_data, unique_terms, category) + try: + order = self.categories[self.objective_id].index(category) + except ValueError: + if category == '': + order = len(self.categories[self.objective_id]) + probabilities[category] = {"category": category, + "probability": probability, + "order": order} + total += probabilities[category]["probability"] + # Normalizes the contributions to get a probability + for category in probabilities: + probabilities[category]["probability"] /= total + probabilities[category]["probability"] = round( \ + probabilities[category]["probability"], PRECISION) + + # Chooses the most probable category as prediction + predictions = sorted(list(probabilities.items()), + key=lambda x: (x[1]["probability"], + - x[1]["order"]), reverse=True) + for prediction, probability in predictions: + del probability['order'] + prediction, probability = predictions[0] + + result = { + "prediction": prediction, + "probability": probability["probability"], + "distribution": [{"category": category, + "probability": probability["probability"]} + for category, probability in predictions]} + + if full: + result.update({'unused_fields': unused_fields, 'confidence': + result['probability']}) + else: + result = result["prediction"] + + return result + + def category_probability(self, numeric_inputs, unique_terms, category): + """Computes the probability for a concrete category + + """ + probability = 0 + norm2 = 0 + + # numeric input data + for field_id in numeric_inputs: + coefficients = self.get_coefficients(category, field_id) + probability += coefficients[0] * numeric_inputs[field_id] + if self.lr_normalize: + norm2 += math.pow(numeric_inputs[field_id], 2) + + # text, items and categories + for field_id in unique_terms: + if field_id in self.input_fields: + coefficients = self.get_coefficients(category, field_id) + for term, occurrences in unique_terms[field_id]: + try: + one_hot = True + if field_id in self.tag_clouds: + index = self.tag_clouds[field_id].index(term) + elif field_id in self.items: + index = self.items[field_id].index(term) + elif field_id in self.categories and ( \ + not field_id in self.field_codings or \ + list(self.field_codings[field_id].keys())[0] == \ + "dummy"): + index = self.categories[field_id].index(term) + elif field_id in self.categories: + one_hot = False + index = self.categories[field_id].index(term) + coeff_index = 0 + for contribution in \ + list(self.field_codings[field_id].values())[0]: + probability += \ + coefficients[coeff_index] * \ + contribution[index] * occurrences + coeff_index += 1 + if one_hot: + probability += coefficients[index] * \ + occurrences + norm2 += math.pow(occurrences, 2) + except ValueError: + pass + + # missings + for field_id in self.input_fields: + contribution = False + coefficients = self.get_coefficients(category, field_id) + if field_id in self.numeric_fields and \ + field_id not in numeric_inputs: + probability += coefficients[1] + contribution = True + elif field_id in self.tag_clouds and (field_id not in \ + unique_terms \ + or not unique_terms[field_id]): + probability += coefficients[ \ + len(self.tag_clouds[field_id])] + contribution = True + elif field_id in self.items and (field_id not in \ + unique_terms \ + or not unique_terms[field_id]): + probability += coefficients[len(self.items[field_id])] + contribution = True + elif field_id in self.categories and \ + field_id != self.objective_id and \ + field_id not in unique_terms: + if field_id not in self.field_codings or \ + list(self.field_codings[field_id].keys())[0] == "dummy": + probability += coefficients[ \ + len(self.categories[field_id])] + else: + coeff_index = 0 + for contribution in \ + list(self.field_codings[field_id].values())[0]: + probability += coefficients[coeff_index] * \ + contribution[-1] + coeff_index += 1 + contribution = True + if contribution and self.lr_normalize: + norm2 += 1 + + # the bias term is the last in the coefficients list + probability += self.coefficients[category][\ + len(self.coefficients[category]) - 1][0] + + if self.bias: + norm2 += 1 + if self.lr_normalize: + try: + probability /= math.sqrt(norm2) + except ZeroDivisionError: + # this should never happen + probability = float('NaN') + + try: + probability = 1 / (1 + math.exp(-probability)) + except OverflowError: + probability = 0 if probability < 0 else 1 + # truncate probability to 5 digits, as in the backend + probability = round(probability, 5) + return probability + + def map_coefficients(self): + """ Maps each field to the corresponding coefficients subarray + + """ + field_ids = [ \ + field_id for field_id in self.input_fields + if field_id != self.objective_id] + shift = 0 + for field_id in field_ids: + optype = self.fields[field_id]['optype'] + if optype in list(EXPANSION_ATTRIBUTES.keys()): + # text and items fields have one coefficient per + # text plus a missing terms coefficient plus a bias + # coefficient + # categorical fields too, unless they use a non-default + # field coding. + if optype != 'categorical' or \ + not field_id in self.field_codings or \ + list(self.field_codings[field_id].keys())[0] == "dummy": + length = len(getattr( + self, EXPANSION_ATTRIBUTES[optype])[field_id]) + # missing coefficient + length += 1 + else: + length = len(list(self.field_codings[field_id].values())[0]) + else: + # numeric fields have one coefficient and an additional one + # if self.missing_numerics is True + length = 2 if self.missing_numerics else 1 + self.fields[field_id]['coefficients_shift'] = shift + self.fields[field_id]['coefficients_length'] = length + shift += length + self.group_coefficients() + + def get_coefficients(self, category, field_id): + """ Returns the set of coefficients for the given category and fieldIds + + """ + coeff_index = self.input_fields.index(field_id) + return self.coefficients[category][coeff_index] + + def group_coefficients(self): + """ Groups the coefficients of the flat array in old formats to the + grouped array, as used in the current notation + + """ + coefficients = copy.deepcopy(self.coefficients) + self.flat_coefficients = coefficients + for category in coefficients: + self.coefficients[category] = [] + for field_id in self.input_fields: + shift = self.fields[field_id]['coefficients_shift'] + length = self.fields[field_id]['coefficients_length'] + coefficients_group = \ + coefficients[category][shift : length + shift] + self.coefficients[category].append(coefficients_group) + self.coefficients[category].append( \ + [coefficients[category][len(coefficients[category]) - 1]]) + + def format_field_codings(self): + """ Changes the field codings format to the dict notation + + """ + if isinstance(self.field_codings, list): + self.field_codings_list = self.field_codings[:] + field_codings = self.field_codings[:] + self.field_codings = {} + for element in field_codings: + field_id = element['field'] + if element["coding"] == "dummy": + self.field_codings[field_id] = {\ + element["coding"]: element['dummy_class']} + else: + self.field_codings[field_id] = {\ + element["coding"]: element['coefficients']} + + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + Avoiding to set it in a Mixin to maintain the current dump function. + """ + return get_data_transformations(self.resource_id, self.parent_id) diff --git a/bigml/model.py b/bigml/model.py new file mode 100644 index 00000000..560d5c37 --- /dev/null +++ b/bigml/model.py @@ -0,0 +1,802 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2013-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A local Predictive Model. + +This module defines a Model to make predictions locally or +embedded into your application without needing to send requests to +BigML.io. + +This module can help you enormously to +reduce the latency for each prediction and let you use your models +offline. + +You can also visualize your predictive model in IF-THEN rule format +and even generate a python function that implements the model. + +Example usage (assuming that you have previously set up the BIGML_USERNAME +and BIGML_API_KEY environment variables and that you own the model/id below): + +from bigml.api import BigML +from bigml.model import Model + +api = BigML() + +model = Model('model/5026965515526876630001b2') +model.predict({"petal length": 3, "petal width": 1}) + +You can also see model in a IF-THEN rule format with: + +model.rules() + +Or auto-generate a python function code for the model with: + +model.python() + +""" +import logging +#pylint: disable=locally-disabled,unused-import +import locale + +from functools import cmp_to_key + +import bigml.predict_utils.classification as c +import bigml.predict_utils.regression as r +import bigml.predict_utils.boosting as b + +from bigml.predict_utils.common import FIELD_OFFSET, extract_distribution +from bigml.exceptions import NoRootDecisionTree + +from bigml.api import FINISHED, STATUSES +from bigml.api import get_status, get_api_connection, get_model_id +from bigml.util import find_locale, cast, use_cache, load, \ + get_data_transformations +from bigml.util import DEFAULT_LOCALE, PRECISION, NUMERIC +from bigml.constants import LAST_PREDICTION, PROPORTIONAL, DECIMALS +from bigml.basemodel import BaseModel, get_resource_dict +from bigml.multivote import ws_confidence +from bigml.prediction import Prediction + + +LOGGER = logging.getLogger('BigML') + +OPERATING_POINT_KINDS = ["probability", "confidence"] + +DICTIONARY = "dict" + +OUT_FORMATS = [DICTIONARY, "list"] + + +BOOSTING = "boosting" +REGRESSION = "regression" +CLASSIFICATION = "classification" + +# we use the atof conversion for integers to include integers written as +# 10.0 +PYTHON_CONV = { + "double": "locale.atof", + "float": "locale.atof", + "integer": "lambda x: int(locale.atof(x))", + "int8": "lambda x: int(locale.atof(x))", + "int16": "lambda x: int(locale.atof(x))", + "int32": "lambda x: int(locale.atof(x))", + "int64": "lambda x: long(locale.atof(x))", + "day": "lambda x: int(locale.atof(x))", + "month": "lambda x: int(locale.atof(x))", + "year": "lambda x: int(locale.atof(x))", + "hour": "lambda x: int(locale.atof(x))", + "minute": "lambda x: int(locale.atof(x))", + "second": "lambda x: int(locale.atof(x))", + "millisecond": "lambda x: int(locale.atof(x))", + "day-of-week": "lambda x: int(locale.atof(x))", + "day-of-month": "lambda x: int(locale.atof(x))"} + + +#pylint: disable=locally-disabled,eval-used +PYTHON_FUNC = {numtype: eval(function) + for numtype, function in PYTHON_CONV.items()} + + +#pylint: disable=locally-disabled,invalid-name +def init_structure(to): + """Creates the empty structure to store predictions depending on the + chosen format. + + """ + if to is not None and to not in OUT_FORMATS: + raise ValueError("The allowed formats are %s." % \ + ", ".join(OUT_FORMATS)) + return {} if to is DICTIONARY else () if to is None \ + else [] + + +#pylint: disable=locally-disabled,unused-argument,invalid-name,redefined-builtin +def cast_prediction(full_prediction, to=None, + confidence=False, probability=False, + path=False, distribution=False, + count=False, next=False, d_min=False, + d_max=False, median=False, + unused_fields=False): + """Creates the output filtering the attributes in a full + prediction. + + to: defines the output format. The current + values are: None, `list` and `dict`. If not set, the result + will be expressed as a tuple. The other two options will + produce a list and a dictionary respectively. In the case of lists, + the attributes are stored in the same order used in + the signature of the function. + confidence: Boolean. If True, adds the confidence to the output + probability: Boolean. If True, adds the probability to the output + path: Boolean. If True adds the prediction path to the output + distribution: distribution of probabilities for each + of the objective field classes + count: Boolean. If True adds the number of training instances in the + prediction node to the output + next: Boolean. If True adds the next predicate field to the output + d_min: Boolean. If True adds the predicted node distribution + minimum to the output + d_max: Boolean. If True adds the predicted node distribution + maximum to the output + median: Boolean. If True adds the median of the predicted node + distribution to the output + unused_fields: Boolean. If True adds the fields used in the input + data that have not been used by the model. + + """ + prediction_properties = [ \ + "prediction", "confidence", "probability", "path", "distribution", + "count", "next", "d_min", "d_max", "median", "unused_fields"] + result = init_structure(to) + for prop in prediction_properties: + value = full_prediction.get(prop) + if prop == prediction_properties[0] or eval(prop): + if to is None: + # tuple + result = result + (value,) + elif to == DICTIONARY: + result.update({prop: value}) + else: + # list + result.append(value) + return result + + +#pylint: disable=locally-disabled,invalid-name,redefined-outer-name +def sort_categories(a, b, categories_list): + """Sorts a list of dictionaries with category keys according to their + value and order in the categories_list. If not found, alphabetic order is + used. + + """ + index_a = categories_list.index(a["category"]) + index_b = categories_list.index(b["category"]) + if index_a < 0 and index_b < 0: + index_a = a['category'] + index_b = b['category'] + if index_b < index_a: + return 1 + if index_b > index_a: + return -1 + return 0 + + +def parse_operating_point(operating_point, operating_kinds, class_names, + operation_settings): + """Checks the operating point contents and extracts the three defined + variables + + """ + # When operating_point is used, we need the probabilities + # of all possible classes to decide, so se use + # the `predict_probability` method + if operating_point is None and operation_settings is not None: + operating_point = operation_settings.get("operating_point") + + if "kind" not in operating_point: + raise ValueError("Failed to find the kind of operating point.") + if operating_point["kind"] not in operating_kinds: + raise ValueError("Unexpected operating point kind. Allowed values" + " are: %s." % ", ".join(operating_kinds)) + if "threshold" not in operating_point: + raise ValueError("Failed to find the threshold of the operating" + "point.") + if operating_point["threshold"] > 1 or \ + operating_point["threshold"] < 0: + raise ValueError("The threshold value should be in the 0 to 1" + " range.") + if "positive_class" not in operating_point: + raise ValueError("The operating point needs to have a" + " positive_class attribute.") + positive_class = operating_point["positive_class"] + if positive_class not in class_names: + raise ValueError("The positive class must be one of the" + "objective field classes: %s." % + ", ".join(class_names)) + kind = operating_point["kind"] + threshold = operating_point["threshold"] + + return kind, threshold, positive_class + + +def to_prediction(model, value_as_string, data_locale=DEFAULT_LOCALE): + """Given a prediction string, returns its value in the required type + + """ + if not isinstance(value_as_string, str): + value_as_string = str(value_as_string, "utf-8") + + objective_id = model.objective_id + if model.fields[objective_id]['optype'] == NUMERIC: + if data_locale is None: + data_locale = model.locale + find_locale(data_locale) + datatype = model.fields[objective_id]['datatype'] + cast_function = PYTHON_FUNC.get(datatype, None) + if cast_function is not None: + return cast_function(value_as_string) + return value_as_string + + +def average_confidence(model): + """Average for the confidence of the predictions resulting from + running the training data through the model + + """ + if model.boosting: + raise AttributeError("This method is not available for boosting" + " models.") + total = 0.0 + cumulative_confidence = 0 + groups = model.group_prediction() + for _, predictions in list(groups.items()): + for _, count, confidence in predictions['details']: + cumulative_confidence += count * confidence + total += count + return float('nan') if total == 0.0 else cumulative_confidence + + +def tree_predict(tree, tree_type, weighted, fields, + input_data, missing_strategy=LAST_PREDICTION): + """Makes a prediction based on a number of field values. + + The input fields must be keyed by Id. There are two possible + strategies to predict when the value for the splitting field + is missing: + 0 - LAST_PREDICTION: the last issued prediction is returned. + 1 - PROPORTIONAL: as we cannot choose between the two branches + in the tree that stem from this split, we consider both. The + algorithm goes on until the final leaves are reached and + all their predictions are used to decide the final prediction. + """ + + if missing_strategy == PROPORTIONAL: + if tree_type == REGRESSION: + return r.regression_proportional_predict(tree, weighted, fields, + input_data) + + if tree_type == CLASSIFICATION: + # classification + return c.classification_proportional_predict(tree, weighted, + fields, + input_data) + # boosting + return b.boosting_proportional_predict(tree, fields, input_data) + + if tree_type == REGRESSION: + # last prediction missing strategy + return r.regression_last_predict(tree, weighted, fields, input_data) + if tree_type == CLASSIFICATION: + return c.classification_last_predict(tree, weighted, fields, + input_data) + # boosting + return b.boosting_last_predict(tree, fields, input_data) + + +def laplacian_term(root_dist, weighted): + """Correction term based on the training dataset distribution + + """ + + if weighted: + category_map = {category[0]: 0.0 for category in root_dist} + else: + total = float(sum([category[1] for category in root_dist])) + category_map = {category[0]: category[1] / total + for category in root_dist} + return category_map + + +class Model(BaseModel): + """ A lightweight wrapper around a Tree model. + + Uses a BigML remote model to build a local version that can be used + to generate predictions locally. + + """ + + def __init__(self, model, api=None, fields=None, cache_get=None, + operation_settings=None): + """The Model constructor can be given as first argument: + - a model structure + - a model id + - a path to a JSON file containing a model structure + + :param model: The model info or reference + :param api: Connection object that will be used to download the deepnet + info if not locally available + :param cache_get: Get function that handles memory-cached objects + :param operation_settings: Dict object that contains operating options + + The operation_settings will depend on the type of ML problem: + - regressions: no operation_settings allowed + - classifications: operating_point, operating_kind + + """ + + if use_cache(cache_get): + # using a cache to store the model attributes + self.__dict__ = load(get_model_id(model), cache_get) + return + + self.resource_id = None + self.name = None + self.description = None + self.parent_id = None + self.ids_map = {} + self.terms = {} + self.regression = False + self.boosting = None + self.class_names = None + self.default_numeric_value = None + api = get_api_connection(api) + # retrieving model information from + self.resource_id, model = get_resource_dict( \ + model, "model", api=api, no_check_fields=fields is not None) + if 'object' in model and isinstance(model['object'], dict): + model = model['object'] + try: + self.parent_id = model.get('dataset') + self.name = model.get('name') + self.description = model.get('description') + except AttributeError: + raise ValueError("Failed to find the expected " + "JSON structure. Check your arguments.") + if 'model' in model and isinstance(model['model'], dict): + status = get_status(model) + if 'code' in status and status['code'] == FINISHED: + # fill boosting info before creating modelfields + if model.get("boosted_ensemble"): + self.boosting = model.get('boosting', False) + if self.boosting == {}: + self.boosting = False + + self.default_numeric_value = model.get('default_numeric_value') + self.input_fields = model["input_fields"] + BaseModel.__init__(self, model, api=api, fields=fields, + operation_settings=operation_settings) + + try: + root = model['model']['root'] + except KeyError: + raise NoRootDecisionTree("Model %s has no `root` element" + " and cannot be used" + % self.resource_id) + self.weighted = "weighted_objective_summary" in root + + terms = {} + + if self.boosting: + # build boosted tree + self.tree = b.build_boosting_tree( \ + model['model']['root'], terms=terms) + elif self.regression: + self.root_distribution = model['model'][ \ + 'distribution']['training'] + # build regression tree + self.tree = r.build_regression_tree(root, \ + distribution=self.root_distribution, \ + weighted=self.weighted, terms=terms) + else: + # build classification tree + self.root_distribution = model['model'][\ + 'distribution']['training'] + self.laplacian_term = laplacian_term( \ + extract_distribution(self.root_distribution)[1], + self.weighted) + self.tree = c.build_classification_tree( \ + model['model']['root'], \ + distribution=self.root_distribution, \ + weighted=self.weighted, terms=terms) + self.class_names = sorted( \ + [category[0] for category in \ + self.root_distribution["categories"]]) + self.objective_categories = [category for \ + category, _ in self.fields[self.objective_id][ \ + "summary"]["categories"]] + + if not hasattr(self, "tag_clouds"): + self.tag_clouds = {} + if not hasattr(self, "items"): + self.items = {} + + if terms: + # only the terms used in the model are kept + for field_id, field_terms in terms.items(): + if self.tag_clouds.get(field_id): + self.tag_clouds[field_id] = field_terms + elif self.items.get(field_id): + self.items[field_id] = field_terms + + if self.boosting: + self.tree_type = BOOSTING + self.offsets = b.OFFSETS + elif self.regression: + self.tree_type = REGRESSION + self.offsets = r.OFFSETS[str(self.weighted)] + else: + self.tree_type = CLASSIFICATION + self.offsets = c.OFFSETS[str(self.weighted)] + else: + raise Exception("Cannot create the Model instance." + " Only correctly finished models can be" + " used. The model status is currently:" + " %s\n" % STATUSES[status['code']]) + else: + raise Exception("Cannot create the Model instance. Could not" + " find the 'model' key in the resource:" + "\n\n%s" % model) + + def _to_output(self, output_map, compact, value_key): + if compact: + return [round(output_map.get(name, 0.0), PRECISION) + for name in self.class_names] + output = [] + for name in self.class_names: + output.append({ + 'category': name, + value_key: round(output_map.get(name, 0.0), PRECISION) + }) + return output + + def predict_confidence(self, input_data, missing_strategy=LAST_PREDICTION, + compact=False): + """For classification models, Predicts a one-vs.-rest confidence value + for each possible output class, based on input values. This + confidence value is a lower confidence bound on the predicted + probability of the given class. The input fields must be a + dictionary keyed by field name for field ID. + + For regressions, the output is a single element list + containing the prediction. + + :param input_data: Input data to be predicted + :param missing_strategy: LAST_PREDICTION|PROPORTIONAL missing strategy + for missing fields + :param compact: If False, prediction is returned as a list of maps, one + per class, with the keys "prediction" and "confidence" + mapped to the name of the class and its confidence, + respectively. If True, returns a list of confidences + ordered by the sorted order of the class names. + + """ + if self.regression: + prediction = self.predict(input_data, + missing_strategy=missing_strategy, + full=not compact) + + if compact: + output = [prediction] + else: + output = cast_prediction(prediction, to=DICTIONARY, + confidence=True) + return output + + if self.boosting: + raise AttributeError("This method is available for non-boosting" + " models only.") + + root_dist = self.root_distribution + category_map = {category[0]: 0.0 for category in root_dist} + prediction = self.predict(input_data, + missing_strategy=missing_strategy, + full=True) + + distribution = prediction['distribution'] + population = prediction['count'] + + for class_info in distribution: + name = class_info[0] + category_map[name] = ws_confidence(name, distribution, + ws_n=population) + + return self._to_output(category_map, compact, "confidence") + + def _probabilities(self, distribution): + """Computes the probability of a distribution using a Laplacian + correction. + + """ + total = 0 if self.weighted else 1 + + category_map = {} + category_map.update(self.laplacian_term) + for class_info in distribution: + category_map[class_info[0]] += class_info[1] + total += class_info[1] + + for k in category_map: + category_map[k] /= total + return category_map + + def predict_probability(self, input_data, + missing_strategy=LAST_PREDICTION, + compact=False): + """For classification models, Predicts a probability for + each possible output class, based on input values. The input + fields must be a dictionary keyed by field name for field ID. + + For regressions, the output is a single element list + containing the prediction. + + :param input_data: Input data to be predicted + :param missing_strategy: LAST_PREDICTION|PROPORTIONAL missing strategy + for missing fields + :param compact: If False, prediction is returned as a list of maps, one + per class, with the keys "prediction" and "probability" + mapped to the name of the class and it's probability, + respectively. If True, returns a list of probabilities + ordered by the sorted order of the class names. + """ + if self.regression or self.boosting: + prediction = self.predict(input_data, + missing_strategy=missing_strategy, + full=not compact) + + if compact: + output = [prediction] + else: + output = prediction + else: + + prediction = self.predict(input_data, + missing_strategy=missing_strategy, + full=True) + category_map = self._probabilities(prediction['distribution']) + output = self._to_output(category_map, compact, "probability") + + return output + + def predict_operating(self, input_data, + missing_strategy=LAST_PREDICTION, + operating_point=None): + """Computes the prediction based on a user-given operating point. + + """ + + kind, threshold, positive_class = parse_operating_point( \ + operating_point, OPERATING_POINT_KINDS, self.class_names, + self.operation_settings) + if kind == "probability": + predictions = self.predict_probability(input_data, + missing_strategy, False) + else: + predictions = self.predict_confidence(input_data, + missing_strategy, False) + + position = self.class_names.index(positive_class) + if predictions[position][kind] > threshold: + prediction = predictions[position] + else: + # if the threshold is not met, the alternative class with + # highest probability or confidence is returned + predictions.sort( \ + key=cmp_to_key( \ + lambda a, b: self._sort_predictions(a, b, kind))) + prediction = predictions[0: 2] + if prediction[0]["category"] == positive_class: + prediction = prediction[1] + else: + prediction = prediction[0] + prediction["prediction"] = prediction["category"] + del prediction["category"] + return prediction + + #pylint: disable=locally-disabled,invalid-name,redefined-outer-name + def _sort_predictions(self, a, b, criteria): + """Sorts the categories in the predicted node according to the + given criteria + + """ + if a[criteria] == b[criteria]: + return sort_categories(a, b, self.objective_categories) + return 1 if b[criteria] > a[criteria] else -1 + + def predict_operating_kind(self, input_data, + missing_strategy=LAST_PREDICTION, + operating_kind=None): + """Computes the prediction based on a user-given operating kind. + + """ + kind = operating_kind.lower() + if kind not in OPERATING_POINT_KINDS: + raise ValueError("Allowed operating kinds are %s. %s found." % + (", ".join(OPERATING_POINT_KINDS), kind)) + if kind == "probability": + predictions = self.predict_probability(input_data, + missing_strategy, False) + else: + predictions = self.predict_confidence(input_data, + missing_strategy, False) + + if self.regression: + prediction = predictions + else: + predictions.sort( \ + key=cmp_to_key( \ + lambda a, b: self._sort_predictions(a, b, kind))) + prediction = predictions[0] + prediction["prediction"] = prediction["category"] + del prediction["category"] + return prediction + + def predict(self, input_data, missing_strategy=LAST_PREDICTION, + operating_point=None, operating_kind=None, full=False): + """Makes a prediction based on a number of field values. + + input_data: Input data to be predicted + missing_strategy: LAST_PREDICTION|PROPORTIONAL missing strategy for + missing fields + operating_point: In classification models, this is the point of the + ROC curve where the model will be used at. The + operating point can be defined in terms of: + - the positive_class, the class that is important to + predict accurately + - the probability_threshold (or confidence_threshold), + the probability (or confidence) that is stablished + as minimum for the positive_class to be predicted. + The operating_point is then defined as a map with + two attributes, e.g.: + {"positive_class": "Iris-setosa", + "probability_threshold": 0.5} + or + {"positive_class": "Iris-setosa", + "confidence_threshold": 0.5} + operating_kind: "probability" or "confidence". Sets the + property that decides the prediction. Used only if + no operating_point is used + full: Boolean that controls whether to include the prediction's + attributes. By default, only the prediction is produced. If set + to True, the rest of available information is added in a + dictionary format. The dictionary keys can be: + - prediction: the prediction value + - confidence: prediction's confidence + - probability: prediction's probability + - path: rules that lead to the prediction + - count: number of training instances supporting the + prediction + - next: field to check in the next split + - min: minim value of the training instances in the + predicted node + - max: maximum value of the training instances in the + predicted node + - median: median of the values of the training instances + in the predicted node + - unused_fields: list of fields in the input data that + are not being used in the model + """ + + # Checks and cleans input_data leaving the fields used in the model + unused_fields = [] + norm_input_data = self.filter_input_data( \ + input_data, + add_unused_fields=full) + if full: + norm_input_data, unused_fields = norm_input_data + + # Strips affixes for numeric values and casts to the final field type + cast(norm_input_data, self.fields) + + if operating_point is None and self.operation_settings is not None: + operating_point = self.operation_settings.get("operating_point") + if operating_kind is None and self.operation_settings is not None: + operating_kind = self.operation_settings.get("operating_kind") + + full_prediction = self._predict( \ + norm_input_data, missing_strategy=missing_strategy, + operating_point=operating_point, operating_kind=operating_kind, + unused_fields=unused_fields) + if self.regression: + full_prediction['prediction'] = round( + full_prediction['prediction'], DECIMALS) + if full: + return dict((key, value) for key, value in \ + full_prediction.items() if value is not None) + return full_prediction['prediction'] + + def _predict(self, input_data, missing_strategy=LAST_PREDICTION, + operating_point=None, operating_kind=None, + unused_fields=None): + """Makes a prediction based on a number of field values. Please, + note that this function does not check the types for the input + provided, so it's unsafe to use it directly without prior checking. + + """ + # When operating_point is used, we need the probabilities + # (or confidences) of all possible classes to decide, so se use + # the `predict_probability` or `predict_confidence` methods + if operating_point: + if self.regression: + raise ValueError("The operating_point argument can only be" + " used in classifications.") + prediction = self.predict_operating( \ + input_data, + missing_strategy=missing_strategy, + operating_point=operating_point) + return prediction + + if operating_kind: + if self.regression: + raise ValueError("The operating_kind argument can only be" + " used in classifications.") + prediction = self.predict_operating_kind( \ + input_data, + missing_strategy=missing_strategy, + operating_kind=operating_kind) + return prediction + + prediction = tree_predict( \ + self.tree, self.tree_type, self.weighted, self.fields, + input_data, missing_strategy=missing_strategy) + + if self.boosting and missing_strategy == PROPORTIONAL: + # output has to be recomputed and comes in a different format + g_sum, h_sum, population, path = prediction + prediction = Prediction( \ + - g_sum / (h_sum + self.boosting.get("lambda", 1)), + path, + None, + distribution=None, + count=population, + median=None, + distribution_unit=None) + + result = vars(prediction) + # changing key name to prediction + result['prediction'] = result['output'] + del result['output'] + # next + field = (None if len(prediction.children) == 0 else + prediction.children[0][FIELD_OFFSET]) + if field is not None and field in self.model_fields: + field = self.model_fields[field]['name'] + result.update({'next': field}) + del result['children'] + if not self.regression and not self.boosting: + probabilities = self._probabilities(result['distribution']) + result['probability'] = probabilities[result['prediction']] + # adding unused fields, if any + if unused_fields: + result.update({'unused_fields': unused_fields}) + + return result + + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + Avoiding to set it in a Mixin to maintain the current dump function. + """ + return get_data_transformations(self.resource_id, self.parent_id) diff --git a/bigml/modelfields.py b/bigml/modelfields.py new file mode 100644 index 00000000..964015f0 --- /dev/null +++ b/bigml/modelfields.py @@ -0,0 +1,434 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2013-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A ModelFields resource. + +This module defines a ModelFields class to hold the information associated +to the fields of the model resource in BigML. +It becomes the starting point for the Model class, that +is used for local predictions. + +""" +import logging +import re +import copy + + +from bigml.util import invert_dictionary, dump, dumps, DEFAULT_LOCALE +from bigml.constants import DEFAULT_MISSING_TOKENS, FIELDS_PARENT, \ + ENSEMBLE_PATH, DEFAULT_OPERATION_SETTINGS +from bigml.api_handlers.resourcehandler import get_resource_type +from bigml.predicate import TM_FULL_TERM, TM_ALL + +LOGGER = logging.getLogger('BigML') + +NUMERIC = "numeric" + + +def parse_terms(text, case_sensitive=True): + """Returns the list of parsed terms + + """ + if text is None: + return [] + expression = r'(\b|_)([^\b_\s]+?)(\b|_)' + pattern = re.compile(expression) + return [match[1] if case_sensitive else match[1].lower() + for match in re.findall(pattern, text)] + + +def parse_items(text, regexp): + """Returns the list of parsed items + + """ + if text is None: + return [] + pattern = re.compile(regexp, flags=re.U) + return [term.strip() for term in pattern.split(text)] + + +def check_resource_fields(resource): + """Checks the resource structure to see whether it contains the required + fields information + + """ + inner_key = FIELDS_PARENT.get(get_resource_type(resource), 'model') + if check_resource_structure(resource, inner_key): + resource = resource.get('object', resource) + fields = resource.get("fields", + resource.get(inner_key, {}).get('fields')) + input_fields = resource.get("input_fields") + # models only need model_fields to work. The rest of resources will + # need all fields to work + model_fields = list(resource.get(inner_key, {}).get( \ + 'model_fields', {}).keys()) + # fusions don't have input fields + if input_fields is None and inner_key != "fusion": + return False + if not model_fields: + fields_meta = resource.get('fields_meta', \ + resource.get(inner_key, {}).get('fields_meta', {})) + try: + return fields_meta['count'] == fields_meta['total'] + except KeyError: + # stored old models will not have the fields_meta info, so + # we return True to avoid failing in this case + return True + else: + if fields is None: + return False + return all(field_id in list(fields.keys()) \ + for field_id in model_fields) + return False + + +def check_resource_structure(resource, inner_key=None): + """Checks the resource structure to see if it contains all the + main expected keys + + """ + if inner_key is None: + inner_key = FIELDS_PARENT.get(get_resource_type(resource), 'model') + # for datasets, only checking the resource ID + if inner_key is None: + return (isinstance(resource, dict) and 'resource' in resource and + resource['resource'] is not None) + # for the rest of models + return (isinstance(resource, dict) and 'resource' in resource and + resource['resource'] is not None and + (('object' in resource and inner_key in resource['object']) or + inner_key in resource)) + + +def get_unique_terms(terms, term_forms, tag_cloud): + """Extracts the unique terms that occur in one of the alternative forms in + term_forms or in the tag cloud. + + """ + + extend_forms = {} + for term, forms in list(term_forms.items()): + for form in forms: + extend_forms[form] = term + extend_forms[term] = term + terms_set = {} + for term in terms: + if term in tag_cloud: + if term not in terms_set: + terms_set[term] = 0 + terms_set[term] += 1 + elif term in extend_forms: + term = extend_forms[term] + if term not in terms_set: + terms_set[term] = 0 + terms_set[term] += 1 + return list(terms_set.items()) + + +class ModelFields: + """ A lightweight wrapper of the field information in the model, cluster + or anomaly objects + + """ + #pylint: disable=locally-disabled,no-member,access-member-before-definition + def __init__(self, fields, objective_id=None, data_locale=None, + missing_tokens=None, categories=False, + numerics=False, operation_settings=None, model_fields=None): + if isinstance(fields, dict): + tmp_fields = copy.deepcopy(fields) + try: + self.objective_id = objective_id + self.uniquify_varnames(tmp_fields) + self.inverted_fields = invert_dictionary(tmp_fields) + self.fields = tmp_fields + if not (hasattr(self, "input_fields") and self.input_fields): + self.input_fields = [field_id for field_id, field in \ + sorted(list(self.fields.items()), + key=lambda x: x[1].get("column_number")) \ + if not self.objective_id or \ + field_id != self.objective_id] + if model_fields is not None: + self.model_fields = model_fields + else: + self.model_fields = {} + self.model_fields.update( + {field_id: field for field_id, field \ + in self.fields.items() if field_id in \ + self.input_fields and self.fields[field_id].get( + "preferred", True)}) + self.data_locale = data_locale + self.missing_tokens = missing_tokens + if self.data_locale is None: + self.data_locale = DEFAULT_LOCALE + if self.missing_tokens is None: + self.missing_tokens = DEFAULT_MISSING_TOKENS + # adding text and items information to handle terms + # expansion + self.term_forms = [] + self.tag_clouds = {} + self.term_analysis = {} + self.items = {} + self.item_analysis = {} + if categories: + self.categories = {} + self.add_terms(categories, numerics) + + if self.objective_id is not None and \ + hasattr(self, "resource_id") and self.resource_id and \ + get_resource_type(self.resource_id) != ENSEMBLE_PATH: + # Only for models. Ensembles need their own logic + self.regression = \ + (not hasattr(self, "boosting") or not self.boosting) \ + and self.fields[self.objective_id][ \ + 'optype'] == NUMERIC \ + or (hasattr(self, "boosting") and self.boosting and \ + self.boosting.get("objective_class") is None) + self.operation_settings = self._add_operation_settings( + operation_settings) + except KeyError: + raise Exception("Wrong field structure.") + + def _add_operation_settings(self, operation_settings): + """Checks and adds the user-given operation settings """ + if operation_settings is None: + return None + if self.regression: + raise ValueError("No operating settings are allowed" + " for regressions") + return {setting: operation_settings[setting] for + setting in operation_settings.keys() if setting in + DEFAULT_OPERATION_SETTINGS + } + + def add_terms(self, categories=False, numerics=False): + """Adds the terms information of text and items fields + + """ + for field_id, field in list(self.fields.items()): + if field['optype'] == 'text' and \ + self.fields[field_id]['summary'].get('tag_cloud'): + self.term_forms.append(field_id) + self.tag_clouds[field_id] = [] + self.tag_clouds[field_id] = [tag for [tag, _] in field[ + 'summary']['tag_cloud']] + del self.fields[field_id]["summary"]["tag_cloud"] + self.term_analysis[field_id] = {} + self.term_analysis[field_id].update( + field['term_analysis']) + if field['optype'] == 'items' and \ + self.fields[field_id]["summary"].get("items"): + self.items[field_id] = [] + self.items[field_id] = [item for item, _ in \ + field['summary']['items']] + del self.fields[field_id]["summary"]["items"] + self.item_analysis[field_id] = {} + self.item_analysis[field_id].update( + field['item_analysis']) + if categories and field['optype'] == 'categorical' and \ + self.fields[field_id]["summary"]["categories"]: + self.categories[field_id] = [category for \ + [category, _] in field['summary']['categories']] + if field['optype'] == 'datetime' and \ + hasattr(self, "coeff_ids"): + self.coeff_id = [coeff_id for coeff_id in self.coeff_ids \ + if coeff_id != field_id] + if numerics and hasattr(self, "missing_numerics") and \ + self.missing_numerics and field['optype'] == 'numeric' \ + and hasattr(self, "numeric_fields"): + self.numeric_fields[field_id] = True + + + def uniquify_varnames(self, fields): + """Tests if the fields names are unique. If they aren't, a + transformation is applied to ensure unicity. + + """ + unique_names = {fields[key]['name'] for key in fields} + if len(unique_names) < len(fields): + self.transform_repeated_names(fields) + + def transform_repeated_names(self, fields): + """If a field name is repeated, it will be transformed adding its + column number. If that combination is also a field name, the + field id will be added. + + """ + # The objective field treated first to avoid changing it. + if self.objective_id: + unique_names = [fields[self.objective_id]['name']] + else: + unique_names = [] + + field_ids = sorted([field_id for field_id in fields + if field_id != self.objective_id]) + for field_id in field_ids: + new_name = fields[field_id]['name'] + if new_name in unique_names: + new_name = "{0}{1}".format(fields[field_id]['name'], + fields[field_id]['column_number']) + if new_name in unique_names: + new_name = "{0}_{1}".format(new_name, field_id) + fields[field_id]['name'] = new_name + unique_names.append(new_name) + + def normalize(self, value): + """Transforms to unicode and cleans missing tokens + + """ + if isinstance(value, str) and not isinstance(value, str): + value = str(value, "utf-8") + return None if hasattr(self, "missing_tokens") and \ + value in self.missing_tokens else value + + def fill_numeric_defaults(self, input_data): + """Fills the value set as default for numeric missing fields if user + created the model with the default_numeric_value option + + """ + if hasattr(self, "default_numeric_value") and \ + self.default_numeric_value is not None: + for key in self.fields: + if key in self.model_fields and \ + (self.objective_id is None or \ + key != self.objective_id) and \ + self.fields[key]["optype"] == NUMERIC and \ + input_data.get(key) is None: + input_data[key] = self.fields[key]["summary"].get( \ + self.default_numeric_value, 0) + return input_data + + def filter_input_data(self, input_data, + add_unused_fields=False): + """Filters the keys given in input_data checking against model fields. + If `add_unused_fields` is set to True, it also + provides information about the ones that are not used. + + """ + unused_fields = [] + new_input = {} + tmp_input = {} + tmp_input.update(input_data) + if isinstance(tmp_input, dict): + # remove all missing values + for key, value in list(tmp_input.items()): + value = self.normalize(value) + if value is None: + del tmp_input[key] + for key, value in list(tmp_input.items()): + if key not in self.fields: + key = self.inverted_fields.get(key, key) + # only the fields that are listed in input_fields and appear + # as preferred are used in predictions + if key in self.model_fields and \ + (self.objective_id is None or \ + key != self.objective_id): + new_input[key] = value + else: + unused_fields.append(key) + # Feature generation (datetime and image features) is now done + # when a Pipeline is created for the model, so no features are + # added any more at this point. + # We fill the input with the chosen default, if selected + new_input = self.fill_numeric_defaults(new_input) + final_input = {} + for key, value in new_input.items(): + if key in self.model_fields: + final_input.update({key: value}) + result = (final_input, unused_fields) if add_unused_fields else \ + final_input + return result + LOGGER.error("Failed to read input data in the expected" + " {field:value} format.") + return ({}, []) if add_unused_fields else {} + + def get_unique_terms(self, input_data): + """Parses the input data to find the list of unique terms in the + tag cloud + + """ + unique_terms = {} + for field_id in self.term_forms: + if field_id in input_data: + input_data_field = input_data.get(field_id, '') + if isinstance(input_data_field, str): + case_sensitive = self.term_analysis[field_id].get( + 'case_sensitive', True) + token_mode = self.term_analysis[field_id].get( + 'token_mode', 'all') + if token_mode != TM_FULL_TERM: + terms = parse_terms(input_data_field, + case_sensitive=case_sensitive) + else: + terms = [] + full_term = input_data_field if case_sensitive \ + else input_data_field.lower() + # We add full_term if needed. Note that when there's + # only one term in the input_data, full_term and term are + # equal. Then full_term will not be added to avoid + # duplicated counters for the term. + if token_mode == TM_FULL_TERM or \ + (token_mode == TM_ALL and (len(terms) == 0 or + terms[0] != full_term)): + terms.append(full_term) + unique_terms[field_id] = get_unique_terms( + terms, self.fields[field_id]["summary"]["term_forms"], + self.tag_clouds.get(field_id, [])) + else: + unique_terms[field_id] = [(input_data_field, 1)] + del input_data[field_id] + # the same for items fields + #pylint: disable=locally-disabled,consider-using-dict-items + for field_id in self.item_analysis: + if field_id in input_data: + input_data_field = input_data.get(field_id, '') + if isinstance(input_data_field, str): + # parsing the items in input_data + separator = self.item_analysis[field_id].get( + 'separator', ' ') + regexp = self.item_analysis[field_id].get( + 'separator_regexp') + if regexp is None: + regexp = r'%s' % re.escape(separator) + terms = parse_items(input_data_field, regexp) + unique_terms[field_id] = get_unique_terms( + terms, {}, + self.items.get(field_id, [])) + else: + unique_terms[field_id] = [(input_data_field, 1)] + del input_data[field_id] + + if hasattr(self, "categories") and self.categories: + for field_id in self.categories: + if field_id in input_data: + input_data_field = input_data.get(field_id, '') + unique_terms[field_id] = [(input_data_field, 1)] + del input_data[field_id] + return unique_terms + + def dump(self, output=None, cache_set=None): + """Uses msgpack to serialize the resource object + If cache_set is filled with a cache set method, the method is called + + """ + self_vars = vars(self) + dump(self_vars, output=output, cache_set=cache_set) + + def dumps(self): + """Uses msgpack to serialize the resource object to a string + + """ + self_vars = vars(self) + return dumps(self_vars) diff --git a/bigml/multimodel.py b/bigml/multimodel.py new file mode 100644 index 00000000..85e7eb9e --- /dev/null +++ b/bigml/multimodel.py @@ -0,0 +1,336 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2012-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A Multiple Local Predictive Model. + +This module defines a Multiple Model to make predictions locally using multiple +local models. + +This module can help you enormously to +reduce the latency for each prediction and let you use your models +offline. + +from bigml.api import BigML +from bigml.multimodel import MultiModel + +api = BigML() + +model = MultiModel([api.get_model(model['resource']) for model in + api.list_models(query_string="tags__in=my_tag") + ['objects']]) + +model.predict({"petal length": 3, "petal width": 1}) + +""" +import logging +import ast + +from functools import partial + +from bigml.exceptions import NoRootDecisionTree +from bigml.model import Model, cast_prediction, to_prediction +from bigml.model import LAST_PREDICTION +from bigml.util import get_predictions_file_name +from bigml.multivote import MultiVote +from bigml.multivote import PLURALITY_CODE, CONFIDENCE_CODE, PROBABILITY_CODE +from bigml.multivotelist import MultiVoteList +from bigml.io import UnicodeWriter, UnicodeReader + + +LOGGER = logging.getLogger('BigML') + + +def read_votes(votes_files, to_prediction_fn, data_locale=None): + """Reads the votes found in the votes' files. + + Returns a list of MultiVote objects containing the list of predictions. + votes_files parameter should contain the path to the files where votes + are stored + In to_prediction parameter we expect the method of a local model object + that casts the string prediction values read from the file to their + real type. For instance + >>> local_model = Model(model) + >>> prediction = local_model.to_prediction("1") + >>> isinstance(prediction, int) + True + >>> read_votes(["my_predictions_file"], local_model.to_prediction) + data_locale should contain the string identification for the locale + used in numeric formatting. + """ + votes = [] + for order, votes_file in enumerate(votes_files): + index = 0 + with UnicodeReader(votes_file) as rdr: + for row in rdr: + prediction = to_prediction_fn(row[0], data_locale=data_locale) + if index > (len(votes) - 1): + votes.append(MultiVote([])) + distribution = None + instances = None + if len(row) > 2: + distribution = ast.literal_eval(row[2]) + instances = int(row[3]) + try: + confidence = float(row[1]) + except ValueError: + confidence = 0.0 + prediction_row = [prediction, confidence, order, + distribution, instances] + votes[index].append_row(prediction_row) + index += 1 + return votes + + +class MultiModel(): + """A multiple local model. + + Uses a number of BigML remote models to build a local version that can be + used to generate predictions locally. + + """ + + def __init__(self, models, api=None, fields=None, class_names=None, + cache_get=None, operation_settings=None): + + self.models = [] + self.class_names = class_names + + if isinstance(models, list): + if all(isinstance(model, Model) for model in models): + self.models = models + else: + for model in models: + # some models have no root info and should not be added + try: + self.models.append(Model( + model, api=api, fields=fields, + cache_get=cache_get, + operation_settings=operation_settings)) + except NoRootDecisionTree: + pass + else: + try: + self.models.append(Model( + models, api=api, fields=fields, + cache_get=cache_get, + operation_settings=operation_settings)) + except NoRootDecisionTree: + pass + + def list_models(self): + """Lists all the model/ids that compound the multi model. + + """ + return [model.resource() for model in self.models] + + def predict(self, input_data, method=PLURALITY_CODE, options=None, + missing_strategy=LAST_PREDICTION, full=False): + """Makes a prediction based on the prediction made by every model. + + The method parameter is a numeric key to the following combination + methods in classifications/regressions: + 0 - majority vote (plurality)/ average: PLURALITY_CODE + 1 - confidence weighted majority vote / error weighted: + CONFIDENCE_CODE + 2 - probability weighted majority vote / average: + PROBABILITY_CODE + 3 - threshold filtered vote / doesn't apply: + THRESHOLD_CODE + """ + + votes = self.generate_votes(input_data, + missing_strategy=missing_strategy) + + result = votes.combine(method=method, options=options, full=full) + if full: + unused_fields = set(input_data.keys()) + for _, prediction in enumerate(votes.predictions): + unused_fields = unused_fields.intersection( \ + set(prediction.get("unused_fields", []))) + if not isinstance(result, dict): + result = {"prediction": result} + result['unused_fields'] = list(unused_fields) + + return result + + def generate_votes(self, input_data, + missing_strategy=LAST_PREDICTION): + """ Generates a MultiVote object that contains the predictions + made by each of the models. + """ + votes = MultiVote([]) + for model in self.models: + prediction_info = model.predict( \ + input_data, missing_strategy=missing_strategy, full=True) + + if model.boosting is not None: + votes.boosting = True + prediction_info.update( \ + {"weight": model.boosting.get("weight")}) + if model.boosting.get("objective_class") is not None: + prediction_info.update( \ + {"class": model.boosting.get("objective_class")}) + + votes.append(prediction_info) + + return votes + + #pylint: disable=locally-disabled,protected-access + def _generate_votes(self, input_data, missing_strategy=LAST_PREDICTION, + unused_fields=None): + """ Generates a MultiVote object that contains the predictions + made by each of the models. Please note that this function + calls a _predict method which assumes input data has been + properly checked against the model fields. Only casting + to the correct type will be applied. + """ + votes = MultiVote([]) + for model in self.models: + prediction_info = model._predict( \ + input_data, + missing_strategy=missing_strategy, unused_fields=unused_fields) + + if model.boosting is not None: + votes.boosting = True + prediction_info.update( \ + {"weight": model.boosting.get("weight")}) + if model.boosting.get("objective_class") is not None: + prediction_info.update( \ + {"class": model.boosting.get("objective_class")}) + + votes.append(prediction_info) + + return votes + + def generate_votes_distribution(self, + input_data, + missing_strategy=LAST_PREDICTION, + method=PROBABILITY_CODE): + """Generates a MultiVoteList object to contain the predictions + of a list of models as the list of classes and their predicted + probabilities or confidence. + """ + votes = [] + for model in self.models: + model.class_names = self.class_names + if method == PLURALITY_CODE: + prediction_info = [0.0] * len(self.class_names) + prediction = model.predict( + input_data, + missing_strategy=missing_strategy, + full=False) + prediction_info[self.class_names.index(prediction)] = 1.0 + else: + predict_method = model.predict_confidence \ + if method == CONFIDENCE_CODE \ + else model.predict_probability + prediction_info = predict_method( + input_data, + compact=True, + missing_strategy=missing_strategy) + votes.append(prediction_info) + + return MultiVoteList(votes) + + def batch_predict(self, input_data_list, output_file_path=None, + reuse=False, + missing_strategy=LAST_PREDICTION, headers=None, + to_file=True, use_median=False): + """Makes predictions for a list of input data. + + When the to_file argument is set to True, the predictions + generated for each model are stored in an output + file. The name of the file will use the following syntax: + model_[id of the model]__predictions.csv + For instance, when using model/50c0de043b563519830001c2 to predict, + the output file name will be + model_50c0de043b563519830001c2__predictions.csv + On the contrary, if it is False, the function returns a list + of MultiVote objects with the model's predictions. + """ + add_headers = (isinstance(input_data_list[0], list) and + headers is not None and + len(headers) == len(input_data_list[0])) + if not add_headers and not isinstance(input_data_list[0], dict): + raise ValueError("Input data list is not a dictionary or the" + " headers and input data information are not" + " consistent.") + order = 0 + if not to_file: + votes = [] + + for model in self.models: + order += 1 + out = None + if to_file: + output_file = get_predictions_file_name(model.resource_id, + output_file_path) + if reuse: + try: + with open(output_file): + continue + except IOError: + pass + try: + out = UnicodeWriter(output_file) + except IOError: + raise Exception("Cannot find %s directory." % + output_file_path) + + if out: + out.open_writer() + for index, input_data in enumerate(input_data_list): + if add_headers: + input_data = dict(list(zip(headers, input_data))) + prediction = model.predict(input_data, + missing_strategy=missing_strategy, + full=True) + if model.regression: + # if median is to be used, we just replace the prediction + if use_median: + prediction["prediction"] = prediction["median"] + if to_file: + prediction = cast_prediction(prediction, to="list", + confidence=True, + distribution=True, + count=True) + out.writerow(prediction) + else: + if len(votes) <= index: + votes.append(MultiVote([])) + votes[index].append(prediction) + if out: + out.close_writer() + if not to_file: + return votes + return output_file_path + + def batch_votes(self, predictions_file_path, data_locale=None): + """Adds the votes for predictions generated by the models. + + Returns a list of MultiVote objects each of which contains a list + of predictions. + """ + + votes_files = [] + for model in self.models: + votes_files.append( + get_predictions_file_name( + model.resource_id, + predictions_file_path)) + return read_votes( + votes_files, partial(to_prediction, self.models[0]), + data_locale=data_locale) diff --git a/bigml/multivote.py b/bigml/multivote.py new file mode 100644 index 00000000..873e79aa --- /dev/null +++ b/bigml/multivote.py @@ -0,0 +1,791 @@ +# -*- coding: utf-8 -*- +#pylint: disable=dangerous-default-value +# +# Copyright 2012-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +"""Auxiliar class for predictions combination. + +""" +import logging +import numbers +import math + + +from bigml.util import PRECISION + + +LOGGER = logging.getLogger('BigML') + +PLURALITY = 'plurality' +CONFIDENCE = 'confidence weighted' +PROBABILITY = 'probability weighted' +THRESHOLD = 'threshold' +BOOSTING = 'boosting' +PLURALITY_CODE = 0 +CONFIDENCE_CODE = 1 +PROBABILITY_CODE = 2 +THRESHOLD_CODE = 3 +# negative combiner codes are meant for internal use only +BOOSTING_CODE = -1 +# note that -2 and -3 codes are also used in BigMLer +# COMBINATION = -2 +# AGGREGATION = -3 + +PREDICTION_HEADERS = ['prediction', 'confidence', 'order', 'distribution', + 'count'] +COMBINATION_WEIGHTS = { + PLURALITY: None, + CONFIDENCE: 'confidence', + PROBABILITY: 'probability', + THRESHOLD: None, + BOOSTING: 'weight'} +COMBINER_MAP = { + PLURALITY_CODE: PLURALITY, + CONFIDENCE_CODE: CONFIDENCE, + PROBABILITY_CODE: PROBABILITY, + THRESHOLD_CODE: THRESHOLD, + BOOSTING_CODE: BOOSTING} +WEIGHT_KEYS = { + PLURALITY: None, + CONFIDENCE: ['confidence'], + PROBABILITY: ['distribution', 'count'], + THRESHOLD: None, + BOOSTING: ['weight']} +BOOSTING_CLASS = 'class' +CONFIDENCE_W = COMBINATION_WEIGHTS[CONFIDENCE] + +DEFAULT_METHOD = 0 +BINS_LIMIT = 32 + +def weighted_sum(predictions, weight=None): + """Returns a weighted sum of the predictions + + """ + return sum([prediction["prediction"] * prediction[weight] for + prediction in predictions]) + + +def softmax(predictions): + """Returns the softmax values from a distribution given as a dictionary + like: + {"category": {"probability": probability, "order": order}} + + """ + total = 0.0 + normalized = {} + for category, cat_info in list(predictions.items()): + normalized[category] = { \ + "probability": math.exp(cat_info["probability"]), + "order": cat_info["order"]} + total += normalized[category]["probability"] + return float('nan') if total == 0 else \ + {category: {"probability": cat_info["probability"] / total, + "order": cat_info["order"]} + for category, cat_info in list(normalized.items())} + + +def ws_confidence(prediction, distribution, ws_z=1.96, ws_n=None): + """Wilson score interval computation of the distribution for the prediction + + expected arguments: + prediction: the value of the prediction for which confidence is + computed + distribution: a distribution-like structure of predictions and + the associated weights. (e.g. + [['Iris-setosa', 10], ['Iris-versicolor', 5]]) + ws_z: percentile of the standard normal distribution + ws_n: total number of instances in the distribution. If absent, + the number is computed as the sum of weights in the + provided distribution + + """ + if isinstance(distribution, list): + distribution = dict(distribution) + ws_p = distribution[prediction] + if ws_p < 0: + raise ValueError("The distribution weight must be a positive value") + ws_norm = float(sum(distribution.values())) + if ws_norm != 1.0: + ws_p = ws_p / ws_norm + if ws_n is None: + ws_n = ws_norm + else: + ws_n = float(ws_n) + if ws_n < 1: + raise ValueError("The total of instances in the distribution must be" + " a positive integer") + ws_z = float(ws_z) + ws_z2 = ws_z * ws_z + ws_factor = ws_z2 / ws_n + ws_sqrt = math.sqrt((ws_p * (1 - ws_p) + ws_factor / 4) / ws_n) + return round((ws_p + ws_factor / 2 - ws_z * ws_sqrt) / (1 + ws_factor), + PRECISION) + + +def merge_distributions(distribution, new_distribution): + """Adds up a new distribution structure to a map formatted distribution + + """ + for value, instances in list(new_distribution.items()): + if value not in distribution: + distribution[value] = 0 + distribution[value] += instances + return distribution + + +def merge_bins(distribution, limit): + """Merges the bins of a regression distribution to the given limit number + + """ + length = len(distribution) + if limit < 1 or length <= limit or length < 2: + return distribution + index_to_merge = 2 + shortest = float('inf') + for index in range(1, length): + distance = distribution[index][0] - distribution[index - 1][0] + if distance < shortest: + shortest = distance + index_to_merge = index + new_distribution = distribution[: index_to_merge - 1] + left = distribution[index_to_merge - 1] + right = distribution[index_to_merge] + new_bin = [(left[0] * left[1] + right[0] * right[1]) / + (left[1] + right[1]), left[1] + right[1]] + new_distribution.append(new_bin) + if index_to_merge < (length - 1): + new_distribution.extend(distribution[(index_to_merge + 1):]) + return merge_bins(new_distribution, limit) + + +class MultiVote(): + """A multiple vote prediction + + Uses a number of predictions to generate a combined prediction. + + """ + @classmethod + def grouped_distribution(cls, instance): + """Returns a distribution formed by grouping the distributions of + each predicted node. + + """ + joined_distribution = {} + distribution_unit = 'counts' + for prediction in instance.predictions: + joined_distribution = merge_distributions( + joined_distribution, + dict((x[0], x[1]) for x in prediction['distribution'])) + # when there's more instances, sort elements by their mean + distribution = [list(element) for element in + sorted(list(joined_distribution.items()), + key=lambda x: x[0])] + if distribution_unit == 'counts': + distribution_unit = ('bins' if len(distribution) > BINS_LIMIT + else 'counts') + if distribution_unit != 'categories': + distribution = merge_bins(distribution, BINS_LIMIT) + return {'distribution': distribution, + 'distribution_unit': distribution_unit} + + @classmethod + def avg(cls, instance, full=False): + """Returns the average of a list of numeric values. + + If full is True, the combined confidence (as the + average of confidences of the multivote predictions) is also + returned + """ + if (instance.predictions and full and + not all(CONFIDENCE_W in prediction + for prediction in instance.predictions)): + raise Exception("Not enough data to use the selected " + "prediction method. Try creating your" + " model anew.") + total = len(instance.predictions) + result = 0.0 + median_result = 0.0 + confidence = 0.0 + instances = 0 + missing_confidence = 0 + d_min = float('Inf') + d_max = float('-Inf') + for prediction in instance.predictions: + result += prediction['prediction'] + if full: + if 'median' in prediction: + median_result += prediction['median'] + # some buggy models don't produce a valid confidence value + if prediction[CONFIDENCE_W] is not None and \ + prediction[CONFIDENCE_W] > 0: + confidence += prediction[CONFIDENCE_W] + else: + missing_confidence += 1 + instances += prediction['count'] + if 'min' in prediction and prediction['min'] < d_min: + d_min = prediction['min'] + if 'max' in prediction and prediction['max'] > d_max: + d_max = prediction['max'] + if full: + output = {'prediction': result / total if total > 0 else \ + float('nan')} + # some strange models have no confidence + output.update( + {'confidence': round( \ + confidence / (total - missing_confidence), PRECISION) \ + if total > 0 else 0}) + output.update(cls.grouped_distribution(instance)) + output.update({'count': instances}) + if median_result > 0: + output.update({ + 'median': median_result / total if \ + total > 0 else float('nan')}) + if d_min < float('Inf'): + output.update({'min': d_min}) + if d_max > float('-Inf'): + output.update({'max': d_max}) + return output + return result / total if total > 0 else float('nan') + + @classmethod + def error_weighted(cls, instance, full=False): + """Returns the prediction combining votes using error to compute weight + + If full is true, the combined confidence (as the + error weighted average of the confidences of the multivote + predictions) is also returned + """ + if (instance.predictions and full and + not all(CONFIDENCE_W in prediction + for prediction in instance.predictions)): + raise Exception("Not enough data to use the selected " + "prediction method. Try creating your" + " model anew.") + top_range = 10 + result = 0.0 + median_result = 0.0 + instances = 0 + d_min = float('Inf') + d_max = float('-Inf') + normalization_factor = cls.normalize_error(instance, top_range) + if normalization_factor == 0: + if full: + return {"prediction": float('nan')} + return float('nan') + if full: + combined_error = 0.0 + for prediction in instance.predictions: + result += prediction['prediction'] * prediction['_error_weight'] + if full: + if 'median' in prediction: + median_result += (prediction['median'] * + prediction['_error_weight']) + instances += prediction['count'] + if 'min' in prediction and prediction['min'] < d_min: + d_min = prediction['min'] + if 'max' in prediction and prediction['max'] > d_max: + d_max = prediction['max'] + # some buggy models don't produce a valid confidence value + if prediction[CONFIDENCE_W] is not None: + combined_error += (prediction[CONFIDENCE_W] * + prediction['_error_weight']) + del prediction['_error_weight'] + if full: + output = {'prediction': result / normalization_factor} + output.update({'confidence': + round(combined_error / normalization_factor, + PRECISION)}) + output.update(cls.grouped_distribution(instance)) + output.update({'count': instances}) + if median_result > 0: + output.update({'median': median_result / normalization_factor}) + if d_min < float('Inf'): + output.update({'min': d_min}) + if d_max > float('-Inf'): + output.update({'max': d_max}) + return output + return result / normalization_factor + + @classmethod + def normalize_error(cls, instance, top_range): + """Normalizes error to a [0, top_range] and builds probabilities + + """ + if instance.predictions and not all(CONFIDENCE_W in prediction + for prediction + in instance.predictions): + raise Exception("Not enough data to use the selected " + "prediction method. Try creating your" + " model anew.") + + error_values = [] + for prediction in instance.predictions: + if prediction[CONFIDENCE_W] is not None: + error_values.append(prediction[CONFIDENCE_W]) + max_error = max(error_values) + min_error = min(error_values) + error_range = 1.0 * (max_error - min_error) + normalize_factor = 0 + if error_range > 0: + # Shifts and scales predictions errors to [0, top_range]. + # Then builds e^-[scaled error] and returns the normalization + # factor to fit them between [0, 1] + for prediction in instance.predictions: + delta = (min_error - prediction[CONFIDENCE_W]) + prediction['_error_weight'] = math.exp(delta / error_range * + top_range) + normalize_factor += prediction['_error_weight'] + else: + for prediction in instance.predictions: + prediction['_error_weight'] = 1 + normalize_factor = len(error_values) + return normalize_factor + + def __init__(self, predictions, boosting_offsets=None): + """Init method, builds a MultiVote with a list of predictions + The constuctor expects a list of well formed predictions like: + {'prediction': 'Iris-setosa', 'confidence': 0.7} + Each prediction can also contain an 'order' key that is used + to break even in votations. The list order is used by default. + The boosting_offsets can contain the offset used in boosting models, so + whenever is not None votes will be considered from boosting models. + """ + self.predictions = [] + self.boosting = boosting_offsets is not None + self.boosting_offsets = boosting_offsets + + if isinstance(predictions, list): + self.predictions.extend(predictions) + else: + self.predictions.append(predictions) + + if not all('order' in prediction for prediction in predictions): + + for i, prediction in enumerate(self.predictions): + prediction['order'] = i + + def is_regression(self): + """Returns True if all the predictions are numbers + + """ + if self.boosting: + return any(prediction.get('class') is None for + prediction in self.predictions) + return all(isinstance(prediction['prediction'], numbers.Number) + for prediction in self.predictions) + + def next_order(self): + """Return the next order to be assigned to a prediction + + Predictions in MultiVote are ordered in arrival sequence when + added using the constructor or the append and extend methods. + This order is used to break even cases in combination + methods for classifications. + """ + if self.predictions: + return self.predictions[-1]['order'] + 1 + return 0 + + def combine(self, method=DEFAULT_METHOD, options=None, full=False): + """Reduces a number of predictions voting for classification and + averaging predictions for regression. + + method will determine the voting method (plurality, confidence + weighted, probability weighted or threshold). + If full is true, the combined confidence (as a weighted + average of the confidences of votes for the combined prediction) + will also be given. + """ + # there must be at least one prediction to be combined + if not self.predictions: + raise Exception("No predictions to be combined.") + + method = COMBINER_MAP.get(method, COMBINER_MAP[DEFAULT_METHOD]) + keys = WEIGHT_KEYS.get(method, None) + # and all predictions should have the weight-related keys + if keys is not None: + for key in keys: + if not all(key in prediction for prediction + in self.predictions): + raise Exception("Not enough data to use the selected " + "prediction method. Try creating your" + " model anew.") + if self.boosting: + for prediction in self.predictions: + if prediction[COMBINATION_WEIGHTS[BOOSTING]] is None: + prediction[COMBINATION_WEIGHTS[BOOSTING]] = 0 + if self.is_regression(): + # sum all gradients weighted by their "weight" plus the + # boosting offset + return weighted_sum(self.predictions, weight="weight") + \ + self.boosting_offsets + return self.classification_boosting_combiner( \ + options, full=full) + if self.is_regression(): + for prediction in self.predictions: + if prediction[CONFIDENCE_W] is None: + prediction[CONFIDENCE_W] = 0 + function = NUMERICAL_COMBINATION_METHODS.get(method, + self.__class__.avg) + return function(self, full=full) + if method == THRESHOLD: + if options is None: + options = {} + predictions = self.single_out_category(options) + elif method == PROBABILITY: + predictions = MultiVote([]) + predictions.predictions = self.probability_weight() + else: + predictions = self + return predictions.combine_categorical( + COMBINATION_WEIGHTS.get(method, None), + full=full) + + def probability_weight(self): + """Reorganizes predictions depending on training data probability + + """ + predictions = [] + for prediction_info in self.predictions: + if 'distribution' not in prediction_info or \ + 'count' not in prediction_info: + raise Exception("Probability weighting is not available " + "because distribution information is missing.") + total = prediction_info['count'] + if total < 1 or not isinstance(total, int): + raise Exception("Probability weighting is not available " + "because distribution seems to have %s " + "as number of instances in a node" % total) + order = prediction_info['order'] + for prediction, instances in prediction_info['distribution']: + predictions.append({ \ + 'prediction': prediction, + 'probability': round(float(instances) / total, PRECISION), + 'count': instances, + 'order': order}) + return predictions + + def combine_distribution(self, weight_label='probability'): + """Builds a distribution based on the predictions of the MultiVote + + Given the array of predictions, we build a set of predictions with + them and associate the sum of weights (the weight being the + contents of the weight_label field of each prediction) + """ + if not all(weight_label in prediction + for prediction in self.predictions): + raise Exception("Not enough data to use the selected " + "prediction method. Try creating your" + " model anew.") + distribution = {} + total = 0 + for prediction in self.predictions: + if prediction['prediction'] not in distribution: + distribution[prediction['prediction']] = 0.0 + distribution[prediction['prediction']] += prediction[weight_label] + total += prediction['count'] + if total > 0: + distribution = [[key, value] for key, value in + list(distribution.items())] + else: + distribution = [] + return distribution, total + + def combine_categorical(self, weight_label=None, full=False): + """Returns the prediction combining votes by using the given weight: + + weight_label can be set as: + None: plurality (1 vote per prediction) + 'confidence': confidence weighted (confidence as a vote value) + 'probability': probability weighted (probability as a vote value) + + If full is true, the combined confidence (as a weighted + average of the confidences of the votes for the combined + prediction) will also be given. + """ + mode = {} + instances = 0 + if weight_label is None: + weight = 1 + for prediction in self.predictions: + if weight_label is not None: + if weight_label not in list(COMBINATION_WEIGHTS.values()): + raise Exception("Wrong weight_label value.") + if weight_label not in prediction: + raise Exception("Not enough data to use the selected " + "prediction method. Try creating your" + " model anew.") + weight = prediction[weight_label] + category = prediction['prediction'] + if full: + instances += prediction['count'] + if category in mode: + mode[category] = {"count": mode[category]["count"] + weight, + "order": mode[category]["order"]} + else: + mode[category] = {"count": weight, + "order": prediction['order']} + prediction = sorted(list(mode.items()), key=lambda x: (x[1]['count'], + -x[1]['order'], + x[0]), + reverse=True)[0][0] + if full: + output = {'prediction': prediction} + if 'confidence' in self.predictions[0]: + prediction, combined_confidence = self.weighted_confidence( + prediction, weight_label) + # if prediction had no confidence, compute it from distribution + else: + if 'probability' in self.predictions[0]: + combined_distribution = self.combine_distribution() + distribution, count = combined_distribution + combined_confidence = ws_confidence(prediction, + distribution, + ws_n=count) + output.update({'confidence': + round(combined_confidence, PRECISION)}) + if 'probability' in self.predictions[0]: + for prediction in self.predictions: + if prediction['prediction'] == output['prediction']: + output['probability'] = prediction['probability'] + if 'distribution' in self.predictions[0]: + output.update(self.__class__.grouped_distribution(self)) + output.update({'count': instances}) + return output + return prediction + + def weighted_confidence(self, combined_prediction, weight_label): + """Compute the combined weighted confidence from a list of predictions + + """ + predictions = [prediction for prediction in self.predictions \ + if prediction['prediction'] == combined_prediction] + if (weight_label is not None and + (not isinstance(weight_label, str) or + any(not CONFIDENCE_W or weight_label not in prediction + for prediction in predictions))): + raise ValueError("Not enough data to use the selected " + "prediction method. Lacks %s information." % + weight_label) + final_confidence = 0.0 + total_weight = 0.0 + weight = 1 + for prediction in predictions: + if weight_label is not None: + weight = prediction[weight_label] + final_confidence += weight * prediction[CONFIDENCE_W] + total_weight += weight + final_confidence = (final_confidence / total_weight + if total_weight > 0 else float('nan')) + return combined_prediction, final_confidence + + def classification_boosting_combiner(self, options, full=False): + """Combines the predictions for a boosted classification ensemble + Applies the regression boosting combiner, but per class. Tie breaks + use the order of the categories in the ensemble summary to decide. + + """ + grouped_predictions = {} + for prediction in self.predictions: + if prediction.get(BOOSTING_CLASS) is not None: + objective_class = prediction.get(BOOSTING_CLASS) + if grouped_predictions.get(objective_class) is None: + grouped_predictions[objective_class] = [] + grouped_predictions[objective_class].append(prediction) + categories = options.get("categories", []) + predictions = {key: { \ + "probability": weighted_sum(value, weight="weight") + \ + self.boosting_offsets.get(key, 0), + "order": categories.index(key)} for + key, value in list(grouped_predictions.items())} + predictions = softmax(predictions) + predictions = sorted( \ + list(predictions.items()), key=lambda x: \ + (- x[1]["probability"], x[1]["order"])) + prediction, prediction_info = predictions[0] + confidence = round(prediction_info["probability"], PRECISION) + if full: + return {"prediction": prediction, + "probability": confidence, \ + "probabilities": [ \ + {"category": prediction, + "probability": round(prediction_info["probability"], + PRECISION)} + for prediction, prediction_info in predictions]} + return prediction + + def append(self, prediction_info): + """Adds a new prediction into a list of predictions + + prediction_info should contain at least: + - prediction: whose value is the predicted category or value + + for instance: + {'prediction': 'Iris-virginica'} + + it may also contain the keys: + - confidence: whose value is the confidence/error of the prediction + - distribution: a list of [category/value, instances] pairs + describing the distribution at the prediction node + - count: the total number of instances of the training set in the + node + """ + if isinstance(prediction_info, dict): + if 'prediction' in prediction_info: + order = self.next_order() + prediction_info['order'] = order + self.predictions.append(prediction_info) + else: + LOGGER.warning("Failed to add the prediction.\n" + "The minimal key for the prediction is " + "'prediction': " + "\n{'prediction': 'Iris-virginica'") + + def single_out_category(self, options): + """Singles out the votes for a chosen category and returns a prediction + for this category iff the number of votes reaches at least the given + threshold. + + """ + if options is None or any(option not in options for option in + ["threshold", "category"]): + raise Exception("No category and threshold information was" + " found. Add threshold and category info." + " E.g. {\"threshold\": 6, \"category\":" + " \"Iris-virginica\"}.") + length = len(self.predictions) + if options["threshold"] > length: + raise Exception("You cannot set a threshold value larger than " + "%s. The ensemble has not enough models to use" + " this threshold value." % length) + if options["threshold"] < 1: + raise Exception("The threshold must be a positive value") + category_predictions = [] + rest_of_predictions = [] + for prediction in self.predictions: + if prediction['prediction'] == options["category"]: + category_predictions.append(prediction) + else: + rest_of_predictions.append(prediction) + if len(category_predictions) >= options["threshold"]: + return MultiVote(category_predictions) + return MultiVote(rest_of_predictions) + + def append_row(self, prediction_row, + prediction_headers=PREDICTION_HEADERS): + """Adds a new prediction into a list of predictions + + prediction_headers should contain the labels for the prediction_row + values in the same order. + + prediction_headers should contain at least the following string + - 'prediction': whose associated value in prediction_row + is the predicted category or value + + for instance: + prediction_row = ['Iris-virginica'] + prediction_headers = ['prediction'] + + it may also contain the following headers and values: + - 'confidence': whose associated value in prediction_row + is the confidence/error of the prediction + - 'distribution': a list of [category/value, instances] pairs + describing the distribution at the prediction node + - 'count': the total number of instances of the training set in the + node + """ + + if (isinstance(prediction_row, list) and + isinstance(prediction_headers, list) and + len(prediction_row) == len(prediction_headers) and + 'prediction' in prediction_headers): + order = self.next_order() + try: + index = prediction_headers.index('order') + prediction_row[index] = order + except ValueError: + prediction_headers.append('order') + prediction_row.append(order) + prediction_info = {} + for i, prediction_row_item in enumerate(prediction_row): + prediction_info.update({prediction_headers[i]: + prediction_row_item}) + self.predictions.append(prediction_info) + else: + LOGGER.error("WARNING: failed to add the prediction.\n" + "The row must have label 'prediction' at least.") + + def extend(self, predictions_info): + """Given a list of predictions, extends the list with another list of + predictions and adds the order information. For instance, + predictions_info could be: + + [{'prediction': 'Iris-virginica', 'confidence': 0.3}, + {'prediction': 'Iris-versicolor', 'confidence': 0.8}] + where the expected prediction keys are: prediction (compulsory), + confidence, distribution and count. + """ + if isinstance(predictions_info, list): + order = self.next_order() + for i, prediction in enumerate(predictions_info): + if isinstance(prediction, dict): + prediction['order'] = order + i + self.append(prediction) + else: + LOGGER.error("WARNING: failed to add the prediction.\n" + "Only dict like predictions are expected.") + else: + LOGGER.error("WARNING: failed to add the predictions.\n" + "Only a list of dict-like predictions are expected.") + + def extend_rows(self, predictions_rows, + prediction_headers=PREDICTION_HEADERS): + """Given a list of predictions, extends the list with a list of + predictions and adds the order information. For instance, + predictions_info could be: + + [['Iris-virginica', 0.3], + ['Iris-versicolor', 0.8]] + and their respective labels are extracted from predition_headers, + that for this example would be: + ['prediction', 'confidence'] + + The expected prediction elements are: prediction (compulsory), + confidence, distribution and count. + """ + order = self.next_order() + try: + index = prediction_headers.index('order') + except ValueError: + index = len(prediction_headers) + prediction_headers.append('order') + if isinstance(predictions_rows, list): + for i, prediction in enumerate(predictions_rows): + if isinstance(prediction, list): + if index == len(prediction): + prediction.append(order + i) + else: + prediction[index] = order + i + self.append_row(prediction, prediction_headers) + else: + LOGGER.error("WARNING: failed to add the prediction.\n" + "Only row-like predictions are expected.") + else: + LOGGER.error("WARNING: failed to add the predictions.\n" + "Only a list of row-like predictions are expected.") + +NUMERICAL_COMBINATION_METHODS = { + PLURALITY: MultiVote.avg, + CONFIDENCE: MultiVote.error_weighted, + PROBABILITY: MultiVote.avg} diff --git a/bigml/multivotelist.py b/bigml/multivotelist.py new file mode 100644 index 00000000..72f2cb56 --- /dev/null +++ b/bigml/multivotelist.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +"""Auxiliar class for lists of predictions combination. + +""" +import logging + +from bigml.util import PRECISION + +LOGGER = logging.getLogger('BigML') + + +class MultiVoteList(): + """A multiple vote prediction in compact format + + Uses a number of predictions to generate a combined prediction. + The input should be an ordered list of probability, counts or confidences + for each of the classes in the objective field. + + """ + + def __init__(self, predictions): + """Init method, builds a MultiVoteList with a list of predictions + The constuctor expects a list of well formed predictions like: + [0.2, 0.34, 0.48] which might correspond to confidences of + three different classes in the objective field. + """ + if isinstance(predictions, list): + self.predictions = predictions + else: + raise ValueError("Expected a list of values to create a" + "MultiVoteList. Found %s instead" % predictions) + + def extend(self, predictions_list): + """Extending the extend method in lists + + """ + if isinstance(predictions_list, MultiVoteList): + predictions_list = predictions_list.predictions + self.predictions.extend(predictions_list) + + def append(self, prediction): + """Extending the append method in lists + + """ + self.predictions.append(prediction) + + def combine_to_distribution(self, normalize=True): + """Receives a list of lists. Each element is the list of probabilities + or confidences + associated to each class in the ensemble, as described in the + `class_names` attribute and ordered in the same sequence. Returns the + probability obtained by adding these predictions into a single one + by adding their probabilities and normalizing. + """ + total = 0.0 + output = [0.0] * len(self.predictions[0]) + + for distribution in self.predictions: + for i, vote_value in enumerate(distribution): + output[i] += vote_value + total += vote_value + if not normalize: + total = len(self.predictions) + + for i, value in enumerate(output): + output[i] = round(value / total, PRECISION) + + return output diff --git a/bigml/path.py b/bigml/path.py new file mode 100644 index 00000000..e85a2ac3 --- /dev/null +++ b/bigml/path.py @@ -0,0 +1,261 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Path structure based on Predicates for the BigML local Model + +This module defines an auxiliary Path structure that is used +to store the predicates' info. + +""" +from bigml.predicate import Predicate + + +EXTENDED = 0 +BRIEF = 1 +NUMERIC = 'numeric' +CATEGORICAL = 'categorical' +TEXT = 'text' +DATETIME = 'datetime' +ITEMS = 'items' + +REVERSE_OP = {'<': '>', '>': '<'} + + +def reverse(operator): + """Reverses the unequality operators + + """ + return "%s%s" % (REVERSE_OP[operator[0]], operator[1:]) + + +def merge_rules(list_of_predicates, fields, label='name'): + """Summarizes the predicates referring to the same field + + """ + if list_of_predicates: + field_id = list_of_predicates[0].field + field_type = fields[field_id]['optype'] + missing_flag = None + name = fields[field_id][label] + last_predicate = list_of_predicates[-1] + # if the last predicate is "is missing" forget about the rest + if last_predicate.operator == "=" and last_predicate.value is None: + return "%s is missing" % name + # if the last predicate is "is not missing" + if last_predicate.operator[0] in ["!", "/"] and \ + last_predicate.value is None: + if len(list_of_predicates) == 1: + # if there's only one predicate, then write "is not missing" + return "%s is not missing" % name + list_of_predicates = list_of_predicates[0: -1] + missing_flag = False + if last_predicate.missing: + missing_flag = True + + if field_type == NUMERIC: + return merge_numeric_rules( \ + list_of_predicates, fields, label=label, + missing_flag=missing_flag) + + if field_type == TEXT: + return merge_text_rules( \ + list_of_predicates, fields, label=label) + + if field_type == CATEGORICAL: + return merge_categorical_rules( \ + list_of_predicates, fields, label=label, + missing_flag=missing_flag) + + return " and ".join( + [predicate.to_rule(fields, label=label).strip() for + predicate in list_of_predicates]) + return "" + + +def merge_numeric_rules(list_of_predicates, fields, label='name', + missing_flag=None): + """ Summarizes the numeric predicates for the same field + + """ + minor = (None, float('-inf')) + major = (None, float('inf')) + equal = None + + for predicate in list_of_predicates: + if (predicate.operator.startswith('>') and + predicate.value > minor[1]): + minor = (predicate, predicate.value) + if (predicate.operator.startswith('<') and + predicate.value < major[1]): + major = (predicate, predicate.value) + if predicate.operator[0] in ['!', '=', '/', 'i']: + equal = predicate + break + if equal is not None: + return equal.to_rule(fields, label=label, missing=missing_flag) + rule = '' + field_id = list_of_predicates[0].field + name = fields[field_id][label] + + if minor[0] is not None and major[0] is not None: + predicate, value = minor + rule = "%s %s " % (value, reverse(predicate.operator)) + rule += name + predicate, value = major + rule += " %s %s " % (predicate.operator, value) + if missing_flag: + rule += " or missing" + else: + predicate = minor[0] if minor[0] is not None else major[0] + rule = predicate.to_rule(fields, label=label, missing=missing_flag) + return rule + + +def merge_text_rules(list_of_predicates, fields, label='name'): + """ Summarizes the text predicates for the same field + + """ + contains = [] + not_contains = [] + for predicate in list_of_predicates: + if ((predicate.operator == '<' and predicate.value <= 1) or + (predicate.operator == '<=' and predicate.value == 0)): + not_contains.append(predicate) + else: + contains.append(predicate) + rules = [] + rules_not = [] + if contains: + rules.append(contains[0].to_rule(fields, label=label).strip()) + for predicate in contains[1:]: + if predicate.term not in rules: + rules.append(predicate.term) + rule = " and ".join(rules) + if not_contains: + if not rules: + rules_not.append( + not_contains[0].to_rule(fields, label=label).strip()) + else: + rules_not.append( + " and %s" % \ + not_contains[0].to_rule(fields, label=label).strip()) + for predicate in not_contains[1:]: + if predicate.term not in rules_not: + rules_not.append(predicate.term) + rule += " or ".join(rules_not) + return rule + + +def merge_categorical_rules(list_of_predicates, + fields, label='name', missing_flag=None): + """ Summarizes the categorical predicates for the same field + + """ + equal = [] + not_equal = [] + + for predicate in list_of_predicates: + if predicate.operator.startswith("!"): + not_equal.append(predicate) + else: + equal.append(predicate) + rules = [] + rules_not = [] + if equal: + rules.append(equal[0].to_rule( \ + fields, label=label, missing=False).strip()) + for predicate in equal[1:]: + if not predicate.value in rules: + rules.append(predicate.value) + rule = " and ".join(rules) + if not_equal and not rules: + rules_not.append(not_equal[0].to_rule( \ + fields, label=label, missing=False).strip()) + for predicate in not_equal[1:]: + if predicate.value not in rules_not: + rules_not.append(predicate.value) + if rules_not: + connector = " and " if rule else "" + rule += connector + " or ".join(rules_not) + if missing_flag: + rule += " or missing" + return rule + + +class Path(): + """A Path as a list of Predicates + + """ + def __init__(self, predicates=None): + """ Path instance constructor accepts only lists of Predicate objects + + """ + if not predicates: + self.predicates = [] + elif isinstance(predicates, list) and \ + isinstance(predicates[0], Predicate): + self.predicates = predicates + else: + raise ValueError("The Path constructor accepts a list of Predicate" + " objects. Please check the arguments for the" + " constructor.") + + #pylint: disable=locally-disabled,redefined-builtin + def to_rules(self, fields, label='name', format=EXTENDED): + """ Builds rules string from a list lf predicates in different formats + + """ + if format == EXTENDED: + return self.to_extended_rules(fields, label=label) + if format == BRIEF: + return self.to_brief_rules(fields, label=label) + raise ValueError("Invalid format. The list of valid formats are 0 " + "(extended) or 1 (brief).") + + def to_extended_rules(self, fields, label='name'): + """ Builds rules string in ordered and extended format + + """ + list_of_rules = [] + for predicate in self.predicates: + list_of_rules.append( + predicate.to_rule(fields, label=label).strip()) + return " and ".join(list_of_rules) + + def to_brief_rules(self, fields, label='name'): + """ Builds rules string in brief format (grouped and unordered) + + """ + groups_of_rules = {} + list_of_fields = [] + for predicate in self.predicates: + if predicate.field not in groups_of_rules: + groups_of_rules[predicate.field] = [] + list_of_fields.append(predicate.field) + groups_of_rules[predicate.field].append(predicate) + + lines = [] + for field in list_of_fields: + lines.append( + merge_rules(groups_of_rules[field], + fields, label=label)) + return " and ".join(lines) + + def append(self, predicate): + """ Adds new predicate to the path + + """ + self.predicates.append(predicate) diff --git a/bigml/pca.py b/bigml/pca.py new file mode 100644 index 00000000..22eb37c8 --- /dev/null +++ b/bigml/pca.py @@ -0,0 +1,376 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A local Partial Component Analysis. + +This module defines a PCA to make projections locally or +embedded into your application without needing to send requests to +BigML.io. + +This module can help you enormously to +reduce the latency for each prediction and let you use your PCAs offline. + +Example usage (assuming that you have previously set up the BIGML_USERNAME +and BIGML_API_KEY environment variables and that you own the +logisticregression/id below): + +from bigml.api import BigML +from bigml.pca import PCA + +api = BigML() + +pca = PCA( + 'pca/5026965515526876630001b2') +pca.projection({"petal length": 3, "petal width": 1, + "sepal length": 1, "sepal width": 0.5}) + +""" +import logging +import math + + +from bigml.api import FINISHED +from bigml.api import get_status, get_api_connection, get_pca_id +from bigml.util import cast, use_cache, load, NUMERIC, get_data_format, \ + get_formatted_data, format_data, get_data_transformations +from bigml.basemodel import get_resource_dict +from bigml.modelfields import ModelFields +from bigml.constants import OUT_NEW_FIELDS, OUT_NEW_HEADERS, INTERNAL + + +try: + from bigml.laminar.numpy_ops import dot +except ImportError: + from bigml.laminar.math_ops import dot + +LOGGER = logging.getLogger('BigML') + +EXPANSION_ATTRIBUTES = {"categorical": "categories", "text": "tag_clouds", + "items": "items"} + +CATEGORICAL = "categorical" + + +def get_terms_array(terms, unique_terms, field, field_id): + """ Returns an array that represents the frequency of terms as ordered + in the reference `terms` parameter. + + """ + input_terms = unique_terms.get(field_id, []) + terms_array = [0] * len(terms) + if field['optype'] == CATEGORICAL and \ + field["summary"].get("missing_count", 0) > 0: + terms_array.append(int(field_id not in unique_terms)) + try: + for term, frequency in input_terms: + index = terms.index(term) + terms_array[index] = frequency + except ValueError: + pass + return terms_array + + +class PCA(ModelFields): + """ A lightweight wrapper around a PCA. + + Uses a BigML remote PCA to build a local version + that can be used to generate projections locally. + + """ + + def __init__(self, pca, api=None, cache_get=None): + + if use_cache(cache_get): + # using a cache to store the model attributes + self.__dict__ = load(get_pca_id(pca), cache_get) + return + + self.resource_id = None + self.name = None + self.description = None + self.parent_id = None + self.input_fields = [] + self.default_numeric_value = None + self.term_forms = {} + self.tag_clouds = {} + self.dataset_field_types = {} + self.term_analysis = {} + self.categories = {} + self.categories_probabilities = {} + self.items = {} + self.fields = {} + self.item_analysis = {} + self.standardize = None + self.famd_j = 1 + api = get_api_connection(api) + + self.resource_id, pca = get_resource_dict( \ + pca, "pca", api=api) + + if 'object' in pca and \ + isinstance(pca['object'], dict): + pca = pca['object'] + try: + self.parent_id = pca.get('dataset') + self.name = pca.get("name") + self.description = pca.get("description") + self.input_fields = pca.get("input_fields", []) + self.default_numeric_value = pca.get("default_numeric_value") + self.dataset_field_types = pca.get("dataset_field_types", {}) + self.famd_j = 1 if (self.dataset_field_types['categorical'] != \ + self.dataset_field_types['total']) else \ + self.dataset_field_types['categorical'] + except (AttributeError, KeyError): + raise ValueError("Failed to find the pca expected " + "JSON structure. Check your arguments.") + if 'pca' in pca and \ + isinstance(pca['pca'], dict): + status = get_status(pca) + if 'code' in status and status['code'] == FINISHED: + pca_info = pca[ \ + 'pca'] + fields = pca_info.get('fields', {}) + self.fields = fields + if not self.input_fields: + self.input_fields = [ \ + field_id for field_id, _ in + sorted(list(self.fields.items()), + key=lambda x: x[1].get("column_number"))] + missing_tokens = pca_info.get("missing_tokens") + for field_id, field in fields.items(): + if field["optype"] == "categorical": + probabilities = [probability for _, probability in \ + field["summary"]["categories"]] + if field["summary"].get("missing_count", 0) > 0: + probabilities.append( + field["summary"]["missing_count"]) + total = float(sum(probabilities)) + if total > 0: + probabilities = [probability / total for probability \ + in probabilities] + self.categories_probabilities[field_id] = probabilities + ModelFields.__init__( + self, fields, + objective_id=None, categories=True, + numerics=False, missing_tokens=missing_tokens) + + self.components = pca_info.get('components') + self.eigenvectors = pca_info.get('eigenvectors') + self.cumulative_variance = pca_info.get('cumulative_variance') + self.text_stats = pca_info.get('text_stats') + self.standardized = pca_info.get('standardized') + self.variance = pca_info.get('variance') + + else: + raise Exception("The pca isn't finished yet") + else: + raise Exception("Cannot create the PCA instance." + " Could not find the 'pca' key" + " in the resource:\n\n%s" % + pca) + + + def projection(self, input_data, max_components=None, + variance_threshold=None, full=False): + """Returns the projection of input data in the new components + + input_data: Input data to be projected + + """ + + norm_input_data = self.filter_input_data( \ + input_data, + add_unused_fields=False) + + # Strips affixes for numeric values and casts to the final field type + cast(norm_input_data, self.fields) + + # Computes text and categorical field expansion into an input array of + # terms and frequencies + unique_terms = self.get_unique_terms(norm_input_data) + + + # Creates an input vector with the values for all expanded fields. + # The input mask marks the non-missing or categorical fields + # The `missings` variable is a boolean indicating whether there's + # non-categorical fields missing + input_array, missings, input_mask = self.expand_input(norm_input_data, + unique_terms) + components = self.eigenvectors[:] + if max_components is not None: + components = components[0: max_components] + if variance_threshold is not None: + for index, cumulative in enumerate(self.cumulative_variance): + if cumulative > variance_threshold: + components = components[0: index + 1] + + result = [value[0] for value in dot(components, [input_array])] + + # if non-categorical fields values are missing in input data + # there's an additional normalization + if missings: + missing_sums = self.missing_factors(input_mask) + for index, value in enumerate(result): + result[index] = value / missing_sums[index] \ + if missing_sums[index] > 0 else value + if full: + result = dict(list(zip(["PC%s" % index \ + for index in range(1, len(components) + 1)], result))) + return result + + + def missing_factors(self, input_mask): + """Returns the factors to divide the PCA values when input + data has missings + + """ + + sum_eigenvectors = [] + for row in self.eigenvectors: + eigenvector = [a * b for a, b in zip(input_mask, row)] + sum_eigenvectors.append(dot([eigenvector], [eigenvector])[0][0]) + return sum_eigenvectors + + + def _get_mean_stdev(self, field, field_id=None, index=None): + """Returns the quantities to be used as mean and stddev to normalize + + """ + if field['optype'] == CATEGORICAL and index is not None: + mean = self.categories_probabilities[field_id][index] + stdev = self.famd_j * math.sqrt(mean * self.famd_j) + return mean, stdev + if field['optype'] == NUMERIC: + return field["summary"]["mean"], \ + field["summary"]["standard_deviation"] + return self.text_stats[field_id]['means'][index], \ + self.text_stats[field_id]['standard_deviations'][index] + + + def expand_input(self, input_data, unique_terms): + """ Creates an input array with the values in input_data and + unique_terms and the following rules: + - fields are ordered as input_fields + - numeric fields contain the value or 0 if missing + - categorial fields are one-hot encoded and classes are sorted as + they appear in the field summary. If missing_count > 0 a last + missing element is added set to 1 if the field is missing and o + otherwise + - text and items fields are expanded into their elements as found + in the corresponding summmary information and their values treated + as numerics. + """ + input_array = [] + input_mask = [] + missings = False + for field_id in self.input_fields: + field = self.fields[field_id] + optype = field["optype"] + if optype == NUMERIC: + input_mask.append(int(field_id in input_data)) + if field_id in input_data: + value = input_data.get(field_id, 0) + if self.standardized: + mean, stdev = self._get_mean_stdev(field) + value -= mean + if stdev > 0: + value /= stdev + else: + missings = True + value = 0 + input_array.append(value) + else: + terms = getattr(self, EXPANSION_ATTRIBUTES[optype])[field_id] + if field_id in unique_terms: + new_inputs = get_terms_array( \ + terms, unique_terms, field, field_id) + input_mask.extend( \ + [1] * len(new_inputs)) + else: + new_inputs = [0] * len(terms) + if optype != CATEGORICAL: + missings = True + input_mask.extend([0] * len(terms)) + else: + input_mask.extend([1] * len(terms)) + if field["summary"]["missing_count"] > 0: + new_inputs.append(1) + input_mask.append(1) + + if self.standardized: + for index2, frequency in enumerate(new_inputs): + mean, stdev = self._get_mean_stdev( \ + field, field_id, index2) + new_inputs[index2] = frequency - mean + if stdev > 0: + new_inputs[index2] /= stdev + # indexes of non-missing values + input_array.extend(new_inputs) + + return input_array, missings, input_mask + + def predict(self, input_data, max_components=None, + variance_threshold=None, full=False): + """Method to homogeneize the local models interface for all BigML + models. It returns the projection method result. + """ + return self.projection(input_data, max_components=max_components, + variance_threshold=variance_threshold, full=full) + + def batch_predict(self, input_data_list, outputs=None, **kwargs): + """Creates a batch projection for a list of inputs using the local + topic model. Allows to define some output settings to + decide the fields to be added to the input_data (prediction, + probability, etc.) and the name that we want to assign to these new + fields. The outputs argument accepts a dictionary with keys + "output_fields", to contain a list of the prediction properties to add + (all principal components by default) and "output_headers", to + contain a list of the headers to be used when adding them (identical + to "output_fields" list, by default). + + :param input_data_list: List of input data to be predicted + :type input_data_list: list or Panda's dataframe + :param dict outputs: properties that define the headers and fields to + be added to the input data + :return: the list of input data plus the predicted values + :rtype: list or Panda's dataframe depending on the input type in + input_data_list + """ + if outputs is None: + outputs = {} + new_fields = outputs.get(OUT_NEW_FIELDS, ["PC%s" % index + for index in range(1, len(self.eigenvectors) + 1)]) + new_headers = outputs.get(OUT_NEW_HEADERS, new_fields) + if len(new_fields) > len(new_headers): + new_headers.expand(new_fields[len(new_headers):]) + else: + new_headers = new_headers[0: len(new_fields)] + data_format = get_data_format(input_data_list) + inner_data_list = get_formatted_data(input_data_list, INTERNAL) + for input_data in inner_data_list: + kwargs.update({"full": True}) + prediction = self.projection(input_data, **kwargs) + for index, key in enumerate(new_fields): + input_data[new_headers[index]] = prediction[key] + if data_format != INTERNAL: + return format_data(inner_data_list, out_format=data_format) + return inner_data_list + + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + """ + return get_data_transformations(self.resource_id, self.parent_id) diff --git a/bigml/pipeline/__init__.py b/bigml/pipeline/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bigml/pipeline/pipeline.py b/bigml/pipeline/pipeline.py new file mode 100644 index 00000000..20cbb8b9 --- /dev/null +++ b/bigml/pipeline/pipeline.py @@ -0,0 +1,417 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,cyclic-import +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +Pipeline: Classes that encapsulate the information needed to add the new +fields and predictions defined in a sequence of transformations or models. +The arguments to create a Pipeline are its name and the list of +datasets and models (and/or anomaly dectectors, clusters, +etc.) that describe the input data processing to be used. + +""" + +import os +import zipfile + +from datetime import datetime + +from bigml.api import get_api_connection, get_resource_id, get_resource_type +from bigml.util import use_cache, load, check_dir, get_data_format, \ + format_data, save_json, fs_cache_get, fs_cache_set, \ + dump, asciify +from bigml.constants import STORAGE +from bigml.dataset import Dataset +from bigml.supervised import SupervisedModel +from bigml.cluster import Cluster +from bigml.anomaly import Anomaly +from bigml.pca import PCA +from bigml.pipeline.transformer import BMLDataTransformer, DataTransformer + +try: + from bigml.topicmodel import TopicModel + NO_TOPIC = False +except ImportError: + NO_TOPIC = True + + +if NO_TOPIC: + LOCAL_CLASSES = { + "dataset": Dataset, + "cluster": Cluster, + "anomaly": Anomaly, + "pca": PCA, + } +else: + LOCAL_CLASSES = { + "dataset": Dataset, + "cluster": Cluster, + "anomaly": Anomaly, + "topicmodel": TopicModel, + "pca": PCA, + } + + +def get_datasets_chain(dataset, dataset_list=None): + """Builds recursively the chain of datasets leading to a dataset """ + if dataset_list is None: + dataset_list = [] + dataset_list.append(dataset) + if dataset.origin_dataset is None: + return dataset_list + + return get_datasets_chain(dataset.origin_dataset, dataset_list) + + +def get_datasets_dict(dataset, dataset_dict=None): + """Stores a dictionary dataset_id -> Dataset for the chain of datasets """ + if dataset_dict is None: + dataset_dict = {} + dataset_dict.update({dataset.resource_id: dataset}) + if dataset.origin_dataset is None: + return dataset_dict + + return get_datasets_dict(dataset.origin_dataset, dataset_dict) + + +def check_in_path(path, resource_list): + """Checks whether a list of resources is stored in a folder """ + for resource_id in resource_list: + if not os.path.exists(os.path.join( + path, resource_id.replace("/", "_"))): + return False + return True + + +class Pipeline(DataTransformer): + """Class to define sequential transformations. The transformations can + come from BigML resources or be defined as Pipe steps defined as functions + to be applied to DataFrame pipes, scikit pipelines + + """ + def __init__(self, name, steps=None, resource_id=None, description=None): + """Builds a Pipeline from the list of steps provided in the `steps` + argument. It is compulsory to assign a name that will be used as + reference + :param name: Reference name for the pipeline + :type name: str + :param steps: List of DataTransformers. All of them need to offer a + `.transform` method + :type steps: list + :param description: Description of the transformations in the pipeline + :type description: str + """ + super().__init__(None, # no generator is provided + None, # no data format is assumed + resource_id or name, + name, + description) + + self.steps = [] + self.extend(steps) + + def extend(self, steps=None): + """Adding new transformations to the Pipeline steps""" + if steps is None: + steps = [] + for step in steps: + if not hasattr(step, "transform"): + raise ValueError("Failed to find the .transform method in " + "all the Pipeline steps.") + self.steps.extend(steps) + + def transform(self, input_data_list, out_format=None): + """Applying the Pipeline transformations and predictions on the + list of input data. `out_format` forces the output format + to either a DataFrame or a list of dictionaries. + + """ + result = self.data_transform(input_data_list) + if out_format is not None: + current_format = get_data_format(result) + if current_format != out_format: + return format_data(result, out_format) + return result + + def data_transform(self, input_data_list): + """Delegates transformation to each DataTransformer step""" + current_format = get_data_format(input_data_list) + if len(self.steps) == 0: + return input_data_list + inner_data_list = input_data_list + for index, step in enumerate(self.steps[:-1]): + try: + inner_data_list = step.transform(inner_data_list) + except Exception as exc: + raise ValueError( + "Failed to apply step number %s in pipeline %s: %s" % + (index, self.name, exc)) + try: + inner_data_list = self.steps[-1].transform( + inner_data_list, out_format=current_format) + if hasattr(self.steps[-1], "add_input") and \ + self.steps[-1].add_input: + self.steps[-1].merge_input_data( + input_data_list, inner_data_list, + out_format=current_format) + except Exception as exc: + raise ValueError("Failed to apply the last step: %s" % exc) + return inner_data_list + + +class BMLPipeline(Pipeline): + """The class represents the sequential transformations (and predictions) + that the input data goes through in a prediction workflow. + Reproduces the pre-modeling steps that need to be applied before + the application of the model predict (centroid, anomaly score, etc.) + method to add the final prediction. The mandatory arguments for the class + are: + - name: Each pipeline needs to be identified with a unique name + - resource_list: A list of resource IDs. Only datasets and supervised + or unsupervised model resources are allowed. + + When a dataset is provided, only the chain of transformations leading to + that dataset structure is applied. When a model is provided, the input + data is pre-modeled using that chain of transformations and the result + is used as input for the predict-like method of the model, that adds the + prediction to the result. If the pipeline is expected to use strictly + the resources in the original resource_list, you can use the last_step + argument + + """ + def __init__(self, name, resource_list=None, description=None, api=None, + cache_get=None, init_settings=None, execution_settings=None, + last_step=False): + """The pipeline needs + :param name: A unique name that will be used when caching the + resources it needs to be executed. + :type name: str + :param resource_list: A dataset/model ID or a list of them + to define the transformations and predictions + to be added to the input data. + :type resource_list: list + Optionally, it can receive: + :param description: A description of the pipeline procedure + :type description: str + :param api: A BigML API connection object + :type api: BigML + :param cache_get: A cache_get function to retrieve cached resources + :type cache_get: function + :param init_settings: A dictionary describing the optional arguments + added when instantiating the local model + (one per model ID) + e.g.: + {"deepnet/111111111111111111": { + "operation_settings": { + "region_score_threshold": 0.6}}, + "deepnet/222222222222222222": { + "operation_settings": { + "region_score_threshold": 0.7}}} + :type init_settings: dict + :param execution_settings: A dictionary describing the optional + arguments added when creating the + predictions. + e.g.: + {"model/111111111111111111": { + "missing_strategy": 1}, + "model/222222222222222222": { + "operating_kind": "confidence"}} + :type execution_settings: dict + + """ + + if resource_list is None and use_cache(cache_get): + self.__dict__ = load(name, cache_get) + else: + super().__init__(name, description=description) + + # API related attributes + if resource_list is None: + resource_list = [] + self.resource_list = resource_list + if isinstance(resource_list, str): + self.resource_list = [resource_list] + for item in self.resource_list: + resource_id = get_resource_id(item) + if resource_id is None: + raise ValueError("Only resource IDs are allowed as first " + "argument.") + self.init_settings = init_settings or {} + self.execution_settings = execution_settings or {} + self._api = get_api_connection(api) + if self._api.storage is None: + self._api.storage = self._get_pipeline_storage() + self._cache_get = cache_get + self.steps = [] + self.extend(self.__retrieve_steps(last_step)) + + def __retrieve_steps(self, last_step): + """Retrieving the steps that need to be used to reproduce the + transformations leading to the resources given in the original list + """ + local_resources = [] + init_settings = self.init_settings.copy() + execution_settings = self.execution_settings.copy() + datasets = {} + steps = [] + + kwargs = {} + if self._api is not None: + kwargs["api"] = self._api + if self._cache_get is not None: + kwargs["cache_get"] = self._cache_get + + for resource_id in self.resource_list: + init_settings[resource_id] = init_settings.get( + resource_id, {}) + init_settings[resource_id].update(kwargs) + + for index, resource in enumerate(self.resource_list): + resource_id = get_resource_id(resource) + resource_type = get_resource_type(resource_id) + local_class = LOCAL_CLASSES.get(resource_type, SupervisedModel) + kwargs = init_settings.get(resource_id, {}) + local_resource = local_class(resource, **kwargs) + if isinstance(local_resource, SupervisedModel): + execution_settings[resource_id] = \ + execution_settings.get( + resource_id, {}) + execution_settings[resource_id].update({"full": True}) + local_resources.append([local_resource]) + if (hasattr(local_resource, "parent_id") and \ + get_resource_type(local_resource.parent_id) == "dataset"): + if local_resource.parent_id in datasets: + dataset = datasets[local_resource.parent_id] + else: + dataset = Dataset(local_resource.parent_id, + api=self._api) + datasets = get_datasets_dict(dataset, datasets) + if not last_step: + dataset_chain = get_datasets_chain(dataset) + local_resources[index].extend(dataset_chain) + local_resources[index].reverse() + + try: + new_resources = local_resources[0][:] + except IndexError: + new_resources = [] + for index, resources in enumerate(local_resources): + if index < 1: + continue + for resource in resources: + if resource not in new_resources: + new_resources.append(resource) + local_resources = new_resources + for local_resource in local_resources: + # non-flatline datasets will not add transformations + if isinstance(local_resource, Dataset) and \ + local_resource.origin_dataset is not None and \ + local_resource.transformations is None: + continue + execution_settings = self.execution_settings.get( + local_resource.resource_id, {}) + steps.append(BMLDataTransformer( + local_resource, **execution_settings)) + return steps + + def _get_pipeline_storage(self): + """ Creating a separate folder inside the given storage folder to + contain the pipeline related models based on the pipeline name. + If the folder already exists, first we check that all the resources + in the resources list are already stored there. If that's not the + case, we rename the folder by adding a datetime suffix and create a + new pipeline folder to store them. + """ + if self._api.storage is None: + self._api.storage = STORAGE + path = os.path.join(self._api.storage, self.name) + if os.path.exists(path): + if check_in_path(path, self.resource_list): + return path + # adding a suffix to store old pipeline version + datetime_str = str(datetime.now()).replace(" ", "_") + bck_path = f"{path}_{datetime_str}_bck" + os.rename(path, bck_path) + check_dir(path) + return path + + def export(self, output_directory=None): + """Exports all the resources needed in the pipeline to the user-given + output directory. The entire pipeline folder is exported and its name + is used as filename. + """ + def zipdir(path, ziph): + # ziph is zipfile handle + for root, _, files in os.walk(path): + for file in files: + ziph.write(os.path.join(root, file), + os.path.relpath(os.path.join(root, file), + os.path.join(path, '..'))) + + if output_directory is None: + output_directory = os.getcwd() + check_dir(output_directory) + name = asciify(self.name) + out_filename = os.path.join(output_directory, f"{name}.zip") + + # write README file with the information that describes the Pipeline + name = self.name + description = self.description or "" + resources = ", ".join(self.resource_list) + readme = (f"Pipeline name: {name}\n{description}\n\n" + f"Built from: {resources}") + with open(os.path.join(self._api.storage, "README.txt"), "w", + encoding="utf-8") as readme_handler: + readme_handler.write(readme) + # write JSON file describing the pipeline resources + pipeline_vars = vars(self) + stored_vars = {} + for key, value in pipeline_vars.items(): + if not key.startswith("_") and not key == "steps": + stored_vars.update({key: value}) + pipeline_filename = os.path.join(self._api.storage, asciify(self.name)) + save_json(stored_vars, pipeline_filename) + with zipfile.ZipFile(out_filename, 'w', zipfile.ZIP_DEFLATED) as zipf: + zipdir(self._api.storage, zipf) + + def dump(self, output_dir=None, cache_set=None): + """Uses msgpack to serialize the resource object and all its steps + If cache_set is filled with a cache set method, the method is called + to store the serialized value + """ + pipeline_vars = vars(self) + stored_vars = {} + for key, value in pipeline_vars.items(): + if not key.startswith("_") and not key == "steps": + stored_vars.update({key: value}) + if output_dir is not None: + check_dir(output_dir) + cache_set = cache_set or fs_cache_set(output_dir) + dump(stored_vars, output=None, cache_set=cache_set) + for step in self.steps: + step.dump(cache_set=cache_set) + + @classmethod + def load(cls, name, dump_dir): + """Restores the information of the pipeline and its steps from a + previously dumped pipeline file. The objects used in each step + of the pipeline are expected to be in the same + """ + if dump_dir is not None and name is not None: + return cls(name, + None, + cache_get=fs_cache_get(dump_dir)) + return None diff --git a/bigml/pipeline/transformer.py b/bigml/pipeline/transformer.py new file mode 100644 index 00000000..3b983cd8 --- /dev/null +++ b/bigml/pipeline/transformer.py @@ -0,0 +1,275 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +DataTransformer classes that handle the transformations generated on input +data by Feature Engineering, Models, Anomaly Detectors, etc. +The BMLDataTransformer will take care of transformations that use BigML +objects as transformation generators. Other libraries, like Pandas +(DFDataTransfomer) and scikit-learn (SKDataTransformer) +will need their own DataTransformer subclasses to define +their own transformations. + +""" + +import types + +from datetime import datetime + +from bigml.constants import INTERNAL, DATAFRAME, OUT_NEW_HEADERS +from bigml.util import get_formatted_data, format_data, get_data_format + +try: + from pandas import DataFrame, concat + PANDAS_READY = True +except ImportError: + PANDAS_READY = False + + +class DataTransformer(): + """Base class to handle transformations. It offers a transform method + that can handle list of dictionaries or Pandas DataFrames a inputs and + delegates to the `data_transform` method the actual transformations to + be applied and should be implemented in the classes derived from it. + """ + + def __init__(self, generator, data_format, resource_id=None, name=None, + description=None): + """Adds initial attributes: + - generator: object, function or list of functions that will be + doing the transformation + - data_format: whether to accept a DataFrame or a list of dictionaries + as inputs for the generator + - resource_id: unique identifier for the data transformer object + - name: name for the data transformer + - description: description for the transformations in the data + transformer + """ + self.generator = generator + self.data_format = data_format + self.resource_id = resource_id + self.name = name + self.description = description + + def _formatted_input(self, input_data_list): + """Returns a copy of the input data list in the expected format """ + return get_formatted_data(input_data_list, self.data_format) + + def transform(self, input_data_list, out_format=None): + """Returns a new input_data_list where the transformations defined + in the generator have been applied. It handles format transformation + if needed before applying the generator function. + """ + data_format = get_data_format(input_data_list) + inner_data_list = self._formatted_input(input_data_list) + result = self.data_transform(inner_data_list) + if self.data_format != data_format and out_format is None: + return format_data(result, data_format) + if self.data_format != out_format: + return format_data(result, out_format) + return result + + def data_transform(self, input_data_list): + """Method to be re-implemented in each of the data transformers. Using + identity by default.""" + raise NotImplementedError("This method needs to be implemented") + + +class BMLDataTransformer(DataTransformer): + """Transformer wrapper for BigML resources.""" + def __init__(self, local_resource, outputs=None, **kwargs): + """Receives a local resource (Dataset, SupervisedModel, Cluster...) + and creates a `DataTransformer` from it to apply the corresponding + transformations. + - for Datasets, Flatline transformations (if any) are applied + - for models, a batch prediction (scoring, topic distribution, etc.) is + applied and added to the original input. + + Optional arguments are: + :param outputs: dictionary of output fields and headers + :type outputs: dict + :param kwargs: dictionary of runtime settings for batch predictions + (e.g. missing_strategy, operating_point, etc.) + :type kwargs: dict + """ + try: + generator = local_resource.transform + self.add_input = False + except AttributeError: + if hasattr(local_resource, "batch_predict"): + generator = lambda x : \ + local_resource.batch_predict(x, outputs=outputs, **kwargs) + self.add_input = True + else: + raise ValueError("The local resource needs to provide " + "a transform, or batch_predict " + "method to generate transformations.") + super().__init__(generator, + INTERNAL, + local_resource.resource_id, + local_resource.name, + local_resource.description) + self.local_resource = local_resource + self.dump = local_resource.dump + + def data_transform(self, input_data_list): + """Returns a list of dictionaries with the generated transformations. + The input list is expected to be a list of dictionaries""" + return self.generator(input_data_list) + + def merge_input_data(self, input_data_list, output_data_list, + out_format=None): + """Adding input data to the output """ + data_format = get_data_format(input_data_list) + input_data_list = self._formatted_input(input_data_list) + output_data_list = self._formatted_input(output_data_list) + for index, input_data in enumerate(input_data_list): + for key, value in input_data.items(): + if key not in output_data_list[index]: + output_data_list[index].update({key: value}) + if self.data_format != out_format: + return format_data(output_data_list, data_format) + return output_data_list + + +class DFDataTransformer(DataTransformer): + """DataTransformer wrapper for DataFrames """ + def __init__(self, generator, resource_id=None, name=None, + description=None): + """Receives the function or list of functions to be applied on + the input DataFrame + Optional parameters are: + :param resource_id: unique ID for the DataTransformer + :type resource_id: str + :param name: DataTransformer name + :type name: str + :param description: Description for the transformations. + :type description: str + """ + if not isinstance(generator, list): + generator = [generator] + for index, item in enumerate(generator): + if not isinstance(item, tuple) and isinstance( + item, types.FunctionType): + generator[index] = (item, [], {}) + elif isinstance(item, tuple) and isinstance( + item[0], types.FunctionType): + try: + args = item[1] + if not isinstance(args, list): + raise ValueError("The syntax of the first argument is " + " function or (function, list, dict)") + except IndexError: + args = [] + try: + kwargs = item[2] + if not isinstance(kwargs, dict): + raise ValueError("The syntax of the first argument is " + " function or (function, list, dict)") + except IndexError: + kwargs = {} + + generator[index] = (item[0], args, kwargs) + else: + raise ValueError("Only functions or tuples of functions are " + "allowed as first argument.") + + super().__init__(generator, + DATAFRAME, + resource_id or "dftrans_%s" % + str(datetime.now()).replace(" ", "_"), + name, + description) + + def data_transform(self, input_data_list): + """Calling the corresponding method in the generator. + The input_data_list is expected to be a Dataframe. + + """ + result = input_data_list.copy() + for function, args, kwargs in self.generator: + result = result.pipe(function, *args, **kwargs) + return result + + +class SKDataTransformer(DataTransformer): + """DataTransformer wrapper for scikit learn pipelines or transformations """ + def __init__(self, generator, resource_id=None, name=None, + description=None, output=None): + """Receives the pipeline or transformation to be applied on + the input DataFrame + Optional parameters are: + :param resource_id: unique ID for the DataTransformer + :type resource_id: str + :param name: DataTransformer name + :type name: str + :param description: Description for the transformations. + :type description: str + :param output: Dictionary containing the headers to be used for the + new fields generated in the transformation. + :type output: dict + """ + + try: + generator_fn = generator.transform + self.add_input = False + except AttributeError: + try: + generator_fn = generator.predict + self.add_input = True + except AttributeError: + try: + generator_fn = generator.score + self.add_input = True + except AttributeError: + raise ValueError("Failed to find a .transform, .predict " + "or .score method in the first argument " + "object.") + + super().__init__(generator_fn, + DATAFRAME, + resource_id or "sktrans_%s" % + str(datetime.now()).replace(" ", "_"), + name, + description) + self.output = output or {} + try: + self.output_headers = generator.get_feature_names_out() + except AttributeError: + self.output_headers = self.output.get(OUT_NEW_HEADERS) + + def data_transform(self, input_data_list): + """Calling the corresponding method in the generator. + The input_data_list is expected to be a Dataframe. + + """ + result = self.generator(input_data_list) + try: + result = result.toarray() + except AttributeError: + pass + df_kwargs = {"index": input_data_list.index} + if self.output_headers is not None: + df_kwargs.update({"columns": self.output_headers}) + result = DataFrame(result, **df_kwargs) + if not self.add_input: + return result + return concat([input_data_list, result], axis=1) + + @staticmethod + def merge_input_data(input_data_list, output_data_list): + """Adding input data to the output """ + return concat([input_data_list, output_data_list], axis=1) diff --git a/bigml/predicate.py b/bigml/predicate.py new file mode 100644 index 00000000..ed6ec690 --- /dev/null +++ b/bigml/predicate.py @@ -0,0 +1,140 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2013-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Predicate structure for the BigML local Model +This module defines an auxiliary Predicate structure that is used in the Tree +to save the node's predicate info. +""" + +import re + +from bigml.predicate_utils.utils import TM_TOKENS, TM_FULL_TERM, TM_ALL, \ + FULL_TERM_PATTERN, OPERATOR_CODE +from bigml.predicate_utils.utils import apply_predicate +from bigml.util import plural + +RELATIONS = { + '<=': 'no more than %s %s', + '>=': '%s %s at most', + '>': 'more than %s %s', + '<': 'less than %s %s' +} + +class Predicate(): + """A predicate to be evaluated in a tree's node. + """ + def __init__(self, operation, field, value, term=None): + self.operator = operation + self.missing = False + + if self.operator.endswith("*"): + self.operator = self.operator[0: -1] + self.missing = True + elif operation == 'in' and None in value: + self.missing = True + + self.field = field + self.value = value + self.term = term + + def is_full_term(self, fields): + """Returns a boolean showing if a term is considered as a full_term + """ + if self.term is not None: + # new optype has to be handled in tokens + if fields[self.field]['optype'] == 'items': + return False + options = fields[self.field]['term_analysis'] + token_mode = options.get('token_mode', TM_TOKENS) + if token_mode == TM_FULL_TERM: + return True + if token_mode == TM_ALL: + return re.match(FULL_TERM_PATTERN, self.term) + return False + + def to_rule(self, fields, label='name', missing=None): + """Builds rule string from a predicate + """ + # externally forcing missing to True or False depending on the path + if missing is None: + missing = self.missing + if label is not None: + name = fields[self.field][label] + else: + name = "" + full_term = self.is_full_term(fields) + relation_missing = " or missing" if missing else "" + if self.term is not None: + relation_suffix = '' + if ((self.operator == '<' and self.value <= 1) or + (self.operator == '<=' and self.value == 0)): + relation_literal = ('is not equal to' if full_term + else 'does not contain') + else: + relation_literal = 'is equal to' if full_term else 'contains' + if not full_term: + if self.operator != '>' or self.value != 0: + relation_suffix = (RELATIONS[self.operator] % + (self.value, + plural('time', self.value))) + return "%s %s %s %s%s" % (name, relation_literal, + self.term, relation_suffix, + relation_missing) + if self.value is None: + return "%s %s" % (name, + "is missing" if self.operator == '=' + else "is not missing") + return "%s %s %s%s" % (name, + self.operator, + self.value, + relation_missing) + + def to_lisp_rule(self, fields): + """Builds rule string in LISP from a predicate + """ + if self.term is not None: + if fields[self.field]['optype'] == 'text': + options = fields[self.field]['term_analysis'] + case_insensitive = not options.get('case_sensitive', False) + case_insensitive = 'true' if case_insensitive else 'false' + language = options.get('language') + language = "" if language is None else " %s" % language + return "(%s (occurrences (f %s) %s %s%s) %s)" % ( + self.operator, self.field, self.term, + case_insensitive, language, self.value) + + if fields[self.field]['optype'] == 'items': + return "(%s (if (contains-items? %s %s) 1 0) %s)" % ( + self.operator, self.field, self.term, + self.value) + if self.value is None: + negation = "" if self.operator == "=" else "not " + return "(%s missing? %s)" % (negation, self.field) + rule = "(%s (f %s) %s)" % (self.operator, + self.field, + self.value) + if self.missing: + rule = "(or (missing? %s) %s)" % (self.field, rule) + return rule + + def apply(self, input_data, fields): + """Applies the operators defined in the predicate as strings to + the provided input data + """ + + return apply_predicate(OPERATOR_CODE.get(self.operator), self.field, + self.value, self.term, self.missing, input_data, + fields[self.field]) diff --git a/bigml/predicate_utils/__init__.py b/bigml/predicate_utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bigml/predicate_utils/utils.py b/bigml/predicate_utils/utils.py new file mode 100644 index 00000000..7239d01e --- /dev/null +++ b/bigml/predicate_utils/utils.py @@ -0,0 +1,308 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +Common auxiliar functions to be used in the node predicate evaluation +""" +import operator +import re + +from bigml.util import plural + +# Operator Codes +LT = 0 +LE = 1 +EQ = 2 +NE = 3 +GE = 4 +GT = 5 +IN = 6 + +# Map operator string to its corresponding code +OPERATOR_CODE = {"<": LT, + "<=": LE, + "=": EQ, + "!=": NE, + "/=": NE, + ">=": GE, + ">": GT, + "in": IN} + +# Map operator code to its corresponding function +OPERATOR = [operator.lt, + operator.le, + operator.eq, + operator.ne, + operator.ge, + operator.gt, + operator.contains] + +INVERSE_OP = dict(zip(OPERATOR_CODE.values(), OPERATOR_CODE.keys())) + +RELATIONS = { + '<=': 'no more than %s %s', + '>=': '%s %s at most', + '>': 'more than %s %s', + '<': 'less than %s %s' +} + +TM_TOKENS = 'tokens_only' +TM_FULL_TERM = 'full_terms_only' +TM_ALL = 'all' +FULL_TERM_PATTERN = re.compile(r'^.+\b.+$', re.U) + +OPERATION_OFFSET = 2 +FIELD_OFFSET = 3 +VALUE_OFFSET = 4 +TERM_OFFSET = 5 +MISSING_OFFSET = 6 + +PREDICATE_INFO_LENGTH = 5 + + +def term_matches(text, forms_list, options): + """ Counts the number of occurences of the words in forms_list in the text + The terms in forms_list can either be tokens or full terms. The + matching for tokens is contains and for full terms is equals. + """ + token_mode = options.get('token_mode', TM_TOKENS) + case_sensitive = options.get('case_sensitive', False) + first_term = forms_list[0] + if token_mode == TM_FULL_TERM: + return full_term_match(text, first_term, case_sensitive) + + return term_matches_tokens(text, forms_list, case_sensitive) + + +def is_full_term(term, field): + """Returns a boolean showing if a term is considered as a full_term + """ + if term is not None: + # new optype has to be handled in tokens + if field['optype'] == 'items': + return False + options = field['term_analysis'] + token_mode = options.get('token_mode', TM_TOKENS) + if token_mode == TM_FULL_TERM: + return True + if token_mode == TM_ALL: + return re.match(FULL_TERM_PATTERN, term) + return False + + +def full_term_match(text, full_term, case_sensitive): + """Counts the match for full terms according to the case_sensitive option + """ + if not case_sensitive: + text = text.lower() + full_term = full_term.lower() + + return 1 if text == full_term else 0 + + +def get_tokens_flags(case_sensitive): + """Returns flags for regular expression matching depending on text analysis + options + """ + flags = re.U + if not case_sensitive: + flags = (re.I | flags) + + return flags + + +def term_matches_tokens(text, forms_list, case_sensitive): + """Counts the number of occurences of the words in forms_list in the text + """ + flags = get_tokens_flags(case_sensitive) + + expression = r'(\b|_)%s(\b|_)' % '(\\b|_)|(\\b|_)'.join([re.escape(term) \ + for term in forms_list]) + pattern = re.compile(expression, flags=flags) + matches = re.findall(pattern, text) + return len(matches) + + +def item_matches(text, item, options): + """Counts the number of occurences of the item in the text + The matching considers the separator or + the separating regular expression. + """ + separator = options.get('separator', ' ') + regexp = options.get('separator_regexp') + if regexp is None: + regexp = r"%s" % re.escape(separator) + + return count_items_matches(text, item, regexp) + + +def count_items_matches(text, item, regexp): + """Counts the number of occurences of the item in the text.""" + expression = r'(^|%s)%s($|%s)' % (regexp, re.escape(item), regexp) + pattern = re.compile(expression, flags=re.U) + matches = re.findall(pattern, text) + + return len(matches) + +def apply_predicates(node, input_data, fields, normalize_repeats=False): + """Evaluates the predicate for a particular input data.""" + shift = 1 if normalize_repeats else 0 + num_predicates = node[1 + shift] + + predicates_ok = 0 + + for i in range(num_predicates): + operation = node[OPERATION_OFFSET + (PREDICATE_INFO_LENGTH * i) + shift] + field = node[FIELD_OFFSET + (PREDICATE_INFO_LENGTH * i) + shift] + value = node[VALUE_OFFSET + (PREDICATE_INFO_LENGTH * i) + shift] + term = node[TERM_OFFSET + (PREDICATE_INFO_LENGTH * i) + shift] + missing = node[MISSING_OFFSET + (PREDICATE_INFO_LENGTH * i) + shift] + + predicate_ok = apply_predicate(operation, field, value, term, missing, + input_data, fields[field]) + if predicate_ok: + predicates_ok += 1 + + return predicates_ok + +def apply_predicate(operation, field, value, term, missing, input_data, + field_info): + """Applies the operators defined in the predicate as strings to + the provided input data + """ + # for missing operators + if input_data.get(field) is None: + # text and item fields will treat missing values by following the + # doesn't contain branch + if term is None: + return missing or ( + operation == EQ and value is None) + elif operation == NE and value is None: + return True + + if term is not None: + if field_info['optype'] == 'text': + all_forms = field_info['summary'].get('term_forms', {}) + term_forms = all_forms.get(term, []) + terms = [term] + terms.extend(term_forms) + options = field_info['term_analysis'] + input_terms = term_matches(input_data.get(field, ""), terms, + options) + return OPERATOR[operation](input_terms, value) + # new items optype + options = field_info['item_analysis'] + input_items = item_matches(input_data.get(field, ""), term, + options) + return OPERATOR[operation](input_items, value) + if operation == IN: + return OPERATOR[operation](value, input_data[field]) + return OPERATOR[operation](input_data[field], value) + + +def pack_predicate(predicate): + """Compacts the predicate condition + + """ + node = [] + if predicate and predicate is not True: + operation = predicate.get('operator') + value = predicate.get('value') + missing = False + if operation.endswith("*"): + operation = operation[0: -1] + missing = True + elif operation == 'in' and None in value: + missing = True + + node.append(OPERATOR_CODE.get(operation)) + node.append(predicate.get('field')) + node.append(value) + node.append(predicate.get('term')) + node.append(missing) + else: + node.append(True) + return node + + +def predicate_to_rule(operation, field_info, value, term, + missing, label='name'): + """Predicate condition string + + """ + # externally forcing missing to True or False depending on the path + if missing is None: + missing = False + if label is not None: + name = field_info[label] + else: + name = "" + operation = INVERSE_OP[operation] + full_term = is_full_term(term, field_info) + relation_missing = " or missing" if missing else "" + if term is not None: + relation_suffix = '' + if ((operation == '<' and value <= 1) or + (operation == '<=' and value == 0)): + relation_literal = ('is not equal to' if full_term + else 'does not contain') + else: + relation_literal = 'is equal to' if full_term else 'contains' + if not full_term: + if operation != '>' or value != 0: + relation_suffix = (RELATIONS[operation] % + (value, + plural('time', value))) + return "%s %s %s %s%s" % (name, relation_literal, + term, relation_suffix, + relation_missing) + if value is None: + return "%s %s" % (name, + "is missing" if operation == '=' + else "is not missing") + return "%s %s %s%s" % (name, + operation, + value, + relation_missing) + + +def to_lisp_rule(operation, field, value, term, + missing, field_info): + """Builds rule string in LISP from a predicate + + """ + if term is not None: + if field_info['optype'] == 'text': + options = field_info['term_analysis'] + case_insensitive = not options.get('case_sensitive', False) + case_insensitive = 'true' if case_insensitive else 'false' + language = options.get('language') + language = "" if language is None else " %s" % language + return "(%s (occurrences (f %s) %s %s%s) %s)" % ( + operation, field, term, + case_insensitive, language, value) + if field_info['optype'] == 'items': + return "(%s (if (contains-items? %s %s) 1 0) %s)" % ( + operation, field, term, value) + if value is None: + negation = "" if operation == "=" else "not " + return "(%s missing? %s)" % (negation, field) + rule = "(%s (f %s) %s)" % (operation, + field, + value) + if missing: + rule = "(or (missing? %s) %s)" % (field, rule) + return rule diff --git a/bigml/predicates.py b/bigml/predicates.py new file mode 100644 index 00000000..54537858 --- /dev/null +++ b/bigml/predicates.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Predicates structure for the BigML local AnomalyTree + +This module defines an auxiliary Predicates structure that is used in the +AnomalyTree to save the node's predicates info. + +""" +from bigml.predicate import Predicate + +class Predicates(): + """A list of predicates to be evaluated in an anomaly tree's node. + + """ + def __init__(self, predicates_list): + self.predicates = [] + for predicate in predicates_list: + if predicate is True: + self.predicates.append(True) + else: + self.predicates.append( + Predicate(predicate.get('op'), + predicate.get('field'), + predicate.get('value'), + predicate.get('term'))) + + def to_rule(self, fields, label='name'): + """ Builds rule string from a predicates list + + """ + return " and ".join([predicate.to_rule(fields, label=label) for + predicate in self.predicates + if not isinstance(predicate, bool)]) + + def apply(self, input_data, fields): + """ Applies the operators defined in each of the predicates to + the provided input data + + """ + + return all(predicate.apply(input_data, fields) for + predicate in self.predicates + if isinstance(predicate, Predicate)) diff --git a/bigml/predict_utils/__init__.py b/bigml/predict_utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bigml/predict_utils/boosting.py b/bigml/predict_utils/boosting.py new file mode 100644 index 00000000..1380e96d --- /dev/null +++ b/bigml/predict_utils/boosting.py @@ -0,0 +1,165 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Predict utilities for boosting models + +""" +from bigml.predict_utils.common import one_branch, \ + get_node, get_predicate, FIELD_OFFSET +from bigml.predicate_utils.utils import predicate_to_rule, apply_predicate, \ + pack_predicate +from bigml.prediction import Prediction + + +OFFSETS = { \ + "id": 0, + "output": 1, + "count": 2, + "g_sum": 3, + "h_sum": 4, + "children#": 5, + "children": 6} + + +def build_boosting_tree(node_dict, node=None, terms=None): + """Builds a compressed version of the tree structure as an list of + lists. Starting from the root node, that is represented by a list: + [#predicates, op-code, field, value, term, missing...] + + And each child is represented by a list whose elements are: + [id, output, count, g_sum, h_sum, + #children, children_nodes_list*] + """ + if terms is None: + terms = {} + predicate = node_dict.get('predicate', True) + outer = node if node else list(pack_predicate(predicate)) + children = node_dict.get("children", []) + outer.append(node_dict.get("id")) + outer.append(node_dict.get("output")) + outer.append(node_dict.get("count")) + outer.append(node_dict.get("g_sum")) + outer.append(node_dict.get("h_sum")) + outer.append(len(children)) + children_list = [] + for child in children: + predicate = child.get('predicate') + field = predicate.get("field") + if field not in terms: + terms[field] = [] + term = predicate.get("term") + if term not in terms[field]: + terms[field].append(term) + inner = pack_predicate(predicate) + build_boosting_tree(child, node=inner, terms=terms) + children_list.append(inner) + if children_list: + outer.append(children_list) + + return outer + + +#pylint: disable=locally-disabled,inconsistent-return-statements +def boosting_proportional_predict(tree, fields, input_data, path=None, + missing_found=False): + """Makes a prediction based on a number of field values considering all + the predictions of the leaves that fall in a subtree. + + Each time a splitting field has no value assigned, we consider + both branches of the split to be true, merging their + predictions. The function returns the merged distribution and the + last node reached by a unique path. + + """ + + if path is None: + path = [] + + node = get_node(tree) + children_number = node[OFFSETS["children#"]] + children = [] if children_number == 0 else node[OFFSETS["children"]] + g_sum = node[OFFSETS["g_sum"]] + h_sum = node[OFFSETS["h_sum"]] + count = node[OFFSETS["count"]] + + if not children: + return (g_sum, h_sum, count, path) + if one_branch(children, input_data) or \ + fields[children[0][FIELD_OFFSET]]["optype"] in \ + ["text", "items"]: + for child in children: + [operator, field, value, term, missing] = get_predicate(child) + if apply_predicate(operator, field, value, term, missing, + input_data, fields[field]): + new_rule = predicate_to_rule(operator, fields[field], value, + term, missing) + if new_rule not in path and not missing_found: + path.append(new_rule) + return boosting_proportional_predict( \ + child, fields, + input_data, path, missing_found) + else: + # missing value found, the unique path stops + missing_found = True + g_sums = 0.0 + h_sums = 0.0 + population = 0 + for child in children: + g_sum, h_sum, count, _ = \ + boosting_proportional_predict( \ + child, fields, input_data, + path, missing_found) + g_sums += g_sum + h_sums += h_sum + population += count + return (g_sums, h_sums, population, path) + + +def boosting_last_predict(tree, fields, input_data, path=None): + """Predict function for boosting and last prediction strategy + + """ + + if path is None: + path = [] + node = get_node(tree) + + children_number = node[OFFSETS["children#"]] + children = [] if children_number == 0 else node[OFFSETS["children"]] + count = node[OFFSETS["count"]] + + if children: + for child in children: + [operator, field, value, term, missing] = get_predicate(child) + if apply_predicate(operator, field, value, term, missing, + input_data, fields[field]): + path.append(predicate_to_rule(operator, fields[field], + value, term, missing)) + return boosting_last_predict( \ + child, fields, \ + input_data, path=path) + + return Prediction( + node[OFFSETS["output"]], + path, + None, + distribution=None, + count=count, + median=None, + distribution_unit=None, + children=children, + d_min=None, + d_max=None) diff --git a/bigml/predict_utils/classification.py b/bigml/predict_utils/classification.py new file mode 100644 index 00000000..862b32c7 --- /dev/null +++ b/bigml/predict_utils/classification.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Predict utilities for classifications + +""" +from bigml.predict_utils.common import last_prediction_predict, \ + proportional_predict, extract_distribution +from bigml.predicate_utils.utils import pack_predicate +from bigml.prediction import Prediction +from bigml.multivote import ws_confidence + + +OFFSETS = { \ + "False": {"id": 0, + "output": 1, + "count": 2, + "confidence": 3, + "distribution": 4, + "children#": 5, + "children": 6}, + "True": {"id": 0, + "output": 1, + "count": 2, + "confidence": 3, + "distribution": 4, + "wdistribution": 5, + "weight": 6, + "children#": 7, + "children": 8}} + + +def build_classification_tree(node_dict, node=None, distribution=None, + weighted=False, terms=None): + """Builds a compressed version of the tree structure as an list of + lists. Starting from the root node, that is represented by a list: + [weight, #predicates, op-code, field, value, term, missing...] + + And each child is represented by a list whose elements are: + [children#, id, output, count, confidence, output, distribution, + distribution_unit, + wdistribution, wdistribution_unit, children_nodes_list*] + """ + if terms is None: + terms = {} + predicate = node_dict.get('predicate', True) + outer = node if node else list(pack_predicate(predicate)) + outer.append(node_dict.get("id")) + outer.append(node_dict.get("output")) + outer.append(node_dict.get("count")) + outer.append(node_dict.get("confidence")) + distribution = distribution if distribution is not None else \ + node_dict.get("objective_summary") + _, distribution = extract_distribution(distribution) + outer.append(distribution) + if weighted: + _, wdistribution = extract_distribution( \ + node_dict.get("weighted_objective_summary")) + outer.append(wdistribution) + outer.append(node_dict.get("weight")) + children = node_dict.get("children", []) + outer.append(len(children)) + children_list = [] + for child in children: + predicate = child.get('predicate') + field = predicate.get("field") + if field not in terms: + terms[field] = [] + term = predicate.get("term") + if term not in terms[field]: + terms[field].append(term) + inner = pack_predicate(predicate) + build_classification_tree(child, node=inner, weighted=weighted, + terms=terms) + children_list.append(inner) + if children_list: + outer.append(children_list) + return outer + + +def classification_proportional_predict(tree, weighted, fields, input_data): + """Prediction for classification using proportional strategy + + """ + offset = OFFSETS[str(weighted)] + (final_distribution, _, _, last_node, population, + _, path) = proportional_predict( \ + tree, offset, fields, input_data, path=None) + + distribution = [list(element) for element in + sorted(list(final_distribution.items()), + key=lambda x: (-x[1], x[0]))] + return Prediction( \ + distribution[0][0], + path, + ws_confidence(distribution[0][0], final_distribution, + ws_n=population), + distribution, + population, + None, + 'categories', + [] if last_node[OFFSETS[str(weighted)]["children#"]] == 0 else \ + last_node[OFFSETS[str(weighted)]["children"]]) + + +def classification_last_predict(tree, weighted, fields, input_data): + """Predict for classification and last prediction missing strategy + + """ + return last_prediction_predict(tree, OFFSETS[str(weighted)], fields, + input_data) diff --git a/bigml/predict_utils/common.py b/bigml/predict_utils/common.py new file mode 100644 index 00000000..6b967f52 --- /dev/null +++ b/bigml/predict_utils/common.py @@ -0,0 +1,215 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Common predict utilities + +""" +from bigml.predicate_utils.utils import apply_predicate, predicate_to_rule +from bigml.prediction import Prediction + +from bigml.multivote import merge_distributions + +OPERATION_OFFSET = 0 +FIELD_OFFSET = 1 +VALUE_OFFSET = 2 +TERM_OFFSET = 3 +MISSING_OFFSET = 4 + +PREDICATE_INFO_LENGTH = 5 + +DISTRIBUTION_GROUPS = ['bins', 'counts', 'categories'] + + +def mintree_split(children): + """Returns the field ID for the split + + """ + return children[0][FIELD_OFFSET] + + +def one_branch(children, input_data): + """Check if there's only one branch to be followed + + """ + no_missing = mintree_split(children) in input_data + return (no_missing or missing_branch(children) + or none_value(children)) + + +def missing_branch(children): + """Checks if the missing values are assigned to a special branch + + """ + return any(child[MISSING_OFFSET] for child in children) + + +def none_value(children): + """Checks if the predicate has a None value + + """ + return any(child[VALUE_OFFSET] is None for child in children) + + +def extract_distribution(summary): + """Extracts the distribution info from the objective_summary structure + in any of its grouping units: bins, counts or categories + + """ + for group in DISTRIBUTION_GROUPS: + if group in summary: + return group, summary.get(group) + return None, [] + + +def last_prediction_predict(tree, offsets, fields, input_data, path=None): + """ Predictions for last prediction missing strategy + + """ + + if path is None: + path = [] + + node = get_node(tree) + + children_number = node[offsets["children#"]] + children = [] if children_number == 0 else node[offsets["children"]] + + for child in children: + [operator, field, value, term, missing] = get_predicate(child) + if apply_predicate(operator, field, value, term, missing, + input_data, fields[field]): + new_rule = predicate_to_rule(operator, fields[field], value, + term, missing) + path.append(new_rule) + return last_prediction_predict(child, + offsets, fields, + input_data, path=path) + + if "wdistribution" in offsets: + output_distribution = node[offsets["wdistribution"]] + output_unit = 'categories' if "distribution_unit" not in offsets else \ + node[offsets["wdistribution_unit"]] + else: + output_distribution = node[offsets["distribution"]] + output_unit = 'categories' if "distribution_unit" not in offsets else \ + node[offsets["distribution_unit"]] + + return Prediction( \ + node[offsets["output"]], + path, + node[offsets["confidence"]], + distribution=output_distribution, + count=node[offsets["count"]], + median=None if offsets.get("median") is None else \ + node[offsets["median"]], + distribution_unit=output_unit, + children=[] if node[offsets["children#"]] == 0 else \ + node[offsets["children"]], + d_min=None if offsets.get("min") is None else \ + node[offsets["min"]], + d_max=None if offsets.get("max") is None else \ + node[offsets["max"]]) + + +#pylint: disable=locally-disabled,inconsistent-return-statements +def proportional_predict(tree, offsets, fields, input_data, path=None, + missing_found=False, median=False, parent=None): + """Makes a prediction based on a number of field values averaging + the predictions of the leaves that fall in a subtree. + + Each time a splitting field has no value assigned, we consider + both branches of the split to be true, merging their + predictions. The function returns the merged distribution and the + last node reached by a unique path. + + """ + + if path is None: + path = [] + + node = get_node(tree) + + final_distribution = {} + children_number = node[offsets["children#"]] + if "wdistribution" in offsets: + distribution = node[offsets["wdistribution"]] + else: + distribution = node[offsets["distribution"]] + children = [] if children_number == 0 else node[offsets["children"]] + t_min = None if offsets.get("min") is None else node[offsets["min"]] + t_max = None if offsets.get("max") is None else node[offsets["max"]] + count = node[offsets["count"]] + + if children_number == 0: + return (merge_distributions({}, dict((x[0], x[1]) + for x in distribution)), + t_min, t_max, node, count, parent, path) + if one_branch(children, input_data) or \ + fields[children[0][FIELD_OFFSET]]["optype"] in \ + ["text", "items"]: + for child in children: + [operator, field, value, term, missing] = get_predicate(child) + if apply_predicate(operator, field, value, term, missing, + input_data, fields[field]): + new_rule = predicate_to_rule(operator, fields[field], value, + term, missing) + if new_rule not in path and not missing_found: + path.append(new_rule) + return proportional_predict( \ + child, offsets, fields, + input_data, path, + missing_found, median, parent=node) + else: + # missing value found, the unique path stops + missing_found = True + minimums = [] + maximums = [] + population = 0 + for child in children: + (subtree_distribution, subtree_min, + subtree_max, _, subtree_pop, _, path) = \ + proportional_predict( \ + child, offsets, fields, + input_data, path, missing_found, median, parent=node) + if subtree_min is not None: + minimums.append(subtree_min) + if subtree_max is not None: + maximums.append(subtree_max) + population += subtree_pop + final_distribution = merge_distributions( + final_distribution, subtree_distribution) + return (final_distribution, + min(minimums) if minimums else None, + max(maximums) if maximums else None, node, population, + parent, path) + + +def get_node(tree): + """Extracts the properties of the node + + """ + if isinstance(tree[0], bool) and tree[0]: # predicate is True + return tree[1:] + return tree[PREDICATE_INFO_LENGTH:] + + +def get_predicate(tree): + """Extracts the predicate for the node + + """ + if isinstance(tree[0], bool) and tree[0]: + return True + return tree[0: PREDICATE_INFO_LENGTH] diff --git a/bigml/predict_utils/regression.py b/bigml/predict_utils/regression.py new file mode 100644 index 00000000..4c291f05 --- /dev/null +++ b/bigml/predict_utils/regression.py @@ -0,0 +1,269 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Predict utilities for regressions + +""" +import numbers +import math + + +from scipy import stats + +from bigml.predict_utils.common import last_prediction_predict, \ + proportional_predict, extract_distribution +from bigml.predicate_utils.utils import pack_predicate +from bigml.util import PRECISION +from bigml.prediction import Prediction +from bigml.multivote import BINS_LIMIT, merge_bins + + +OFFSETS = { \ + "False": {"id": 0, + "output": 1, + "count": 2, + "confidence": 3, + "distribution": 4, + "distribution_unit": 5, + "max_bins": 6, + "max": 7, + "min": 8, + "median": 9, + "children#": 10, + "children": 11}, + "True": {"id": 0, + "output": 1, + "count": 2, + "confidence": 3, + "distribution": 4, + "distribution_unit": 5, + "max_bins": 6, + "max": 7, + "min": 8, + "median": 9, + "wdistribution": 10, + "wdistribution_unit": 11, + "weight": 12, + "children#": 13, + "children": 14}} + + +def dist_median(distribution, count): + """Returns the median value for a distribution + + """ + counter = 0 + previous_value = None + for value, instances in distribution: + counter += instances + if counter > count / 2.0: + if (not count % 2 and (counter - 1) == (count / 2) and + previous_value is not None): + return (value + previous_value) / 2.0 + return value + previous_value = value + return None + + +def mean(distribution): + """Computes the mean of a distribution in the [[point, instances]] syntax + + """ + addition = 0.0 + count = 0.0 + for point, instances in distribution: + addition += point * instances + count += instances + if count > 0: + return addition / count + return float('nan') + + +def unbiased_sample_variance(distribution, distribution_mean=None): + """Computes the standard deviation of a distribution in the + [[point, instances]] syntax + + """ + addition = 0.0 + count = 0.0 + if (distribution_mean is None or not + isinstance(distribution_mean, numbers.Number)): + distribution_mean = mean(distribution) + for point, instances in distribution: + addition += ((point - distribution_mean) ** 2) * instances + count += instances + if count > 1: + return addition / (count - 1) + return float('nan') + + +def regression_error(distribution_variance, population, r_z=1.96): + """Computes the variance error + + """ + if population > 0: + chi_distribution = stats.chi2(population) + ppf = chi_distribution.ppf(1 - math.erf(r_z / math.sqrt(2))) + if ppf != 0: + error = distribution_variance * (population - 1) / ppf + error = error * ((math.sqrt(population) + r_z) ** 2) + return math.sqrt(error / population) + return float('nan') + + +def build_regression_tree(node_dict, node=None, distribution=None, + weighted=False, terms=None): + """Builds a compressed version of the tree structure as an list of + lists. Starting from the root node, that is represented by a list: + [weight, #predicates, op-code, field, value, term, missing...] + + And each child is represented by a list whose elements are: + [#children, id, output, count, confidence, output, distribution, + distribution_unit, max_bins, max. min, median, + wdistribution, wdistribution_unit, children_nodes_list*] + """ + if terms is None: + terms = {} + predicate = node_dict.get('predicate', True) + outer = node if node else list(pack_predicate(predicate)) + outer.append(node_dict.get("id")) + outer.append(node_dict.get("output")) + outer.append(node_dict.get("count")) + outer.append(node_dict.get("confidence")) + distribution = distribution if distribution is not None else \ + node_dict.get("objective_summary") + distribution_unit, distribution = extract_distribution(distribution) + outer.append(distribution) + outer.append(distribution_unit) + node_median = None + summary = node_dict.get("summary", {}) + if "summary" in node_dict: + node_median = summary.get('median') + if not node_median: + node_median = dist_median(distribution, node_dict.get("count")) + node_max = summary.get('maximum') or \ + max([value for [value, _] in distribution]) + node_min = summary.get('minimum') or \ + min([value for [value, _] in distribution]) + node_max_bins = max(node_dict.get('max_bins', 0), + len(distribution)) + outer.append(node_max_bins) + outer.append(node_max) + outer.append(node_min) + outer.append(node_median) + if weighted: + wdistribution_unit, wdistribution = extract_distribution( \ + node_dict.get("weighted_objective_summary")) + outer.append(wdistribution) + outer.append(wdistribution_unit) + outer.append(node_dict.get("weight")) + children = node_dict.get("children", []) + outer.append(len(children)) + children_list = [] + for child in children: + predicate = child.get('predicate') + field = predicate.get("field") + if field not in terms: + terms[field] = [] + term = predicate.get("term") + if term not in terms[field]: + terms[field].append(term) + inner = pack_predicate(predicate) + build_regression_tree(child, node=inner, weighted=weighted, terms=terms) + children_list.append(inner) + if children_list: + outer.append(children_list) + + return outer + + +def regression_proportional_predict(tree, weighted, fields, input_data): + """Proportional prediction for regressions + + """ + + offset = OFFSETS[str(weighted)] + (final_distribution, d_min, d_max, last_node, population, + parent_node, path) = proportional_predict( \ + tree, offset, fields, input_data, path=None) + # singular case: + # when the prediction is the one given in a 1-instance node + if len(list(final_distribution.items())) == 1: + prediction, instances = list(final_distribution.items())[0] + if instances == 1: + return Prediction( \ + last_node[offset["output"]], + path, + last_node[offset["confidence"]], + distribution=last_node[offset["distribution"]] \ + if not weighted else \ + last_node[offset["wdistribution"]], + count=instances, + median=last_node[offset["median"]], + distribution_unit=last_node[offset["distribution_unit"]], + children=[] if last_node[offset["children#"]] == 0 else \ + last_node[offset["children"]], + d_min=last_node[offset["min"]], + d_max=last_node[offset["max"]]) + # when there's more instances, sort elements by their mean + distribution = [list(element) for element in + sorted(list(final_distribution.items()), + key=lambda x: x[0])] + distribution_unit = ('bins' if len(distribution) > BINS_LIMIT + else 'counts') + distribution = merge_bins(distribution, BINS_LIMIT) + total_instances = sum([instances + for _, instances in distribution]) + if len(distribution) == 1: + # where there's only one bin, there will be no error, but + # we use a correction derived from the parent's error + prediction = distribution[0][0] + if total_instances < 2: + total_instances = 1 + try: + # some strange models can have nodes with no confidence + confidence = round(parent_node[offset["confidence"]] / + math.sqrt(total_instances), + PRECISION) + except AttributeError: + confidence = None + else: + prediction = mean(distribution) + # weighted trees use the unweighted population to + # compute the associated error + confidence = round(regression_error( + unbiased_sample_variance(distribution, prediction), + population), PRECISION) + return Prediction( \ + prediction, + path, + confidence, + distribution=distribution, + count=total_instances, + median=dist_median(distribution, total_instances), + distribution_unit=distribution_unit, + children=[] if last_node[offset["children#"]] == 0 else \ + last_node[offset["children"]], + d_min=d_min, + d_max=d_max) + + +def regression_last_predict(tree, weighted, fields, input_data): + """Predict for regression and last prediction missing strategy + + """ + return last_prediction_predict(tree, OFFSETS[str(weighted)], fields, + input_data) diff --git a/bigml/prediction.py b/bigml/prediction.py new file mode 100644 index 00000000..19327510 --- /dev/null +++ b/bigml/prediction.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""class for the Tree Prediction object + +This module defines an auxiliary Prediction object that is used in the +Tree module to store all the available prediction info. +""" + +class Prediction(): + """A Prediction object containing the predicted Node info or the + subtree grouped prediction info for proportional missing strategy + + """ + def __init__(self, output, path, confidence, + distribution=None, count=None, distribution_unit=None, + median=None, children=None, d_max=None, d_min=None): + self.output = output + self.path = path + self.confidence = confidence + self.distribution = [] if distribution is None else distribution + self.count = (sum([instances for _, instances in self.distribution]) + if count is None else count) + self.distribution_unit = ('categorical' if distribution_unit is None + else distribution_unit) + self.median = median + self.children = [] if children is None else children + self.min = d_min + self.max = d_max diff --git a/bigml/shapwrapper.py b/bigml/shapwrapper.py new file mode 100644 index 00000000..65586ca2 --- /dev/null +++ b/bigml/shapwrapper.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- +# pylint: disable=super-init-not-called +# +# Copyright 2023-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A wrapper for models to produce predictions as expected by Shap Explainer + +""" +import numpy as np + +from bigml.supervised import SupervisedModel, extract_id +from bigml.fusion import Fusion +from bigml.fields import Fields +from bigml.api import get_resource_type, get_api_connection + + +class ShapWrapper(): + """ A lightweight wrapper around any supervised model that offers a + predict method adapted to the expected Shap Explainer syntax""" + + def __init__(self, model, api=None, cache_get=None, + operation_settings=None): + + self.api = get_api_connection(api) + resource_id, model = extract_id(model, self.api) + resource_type = get_resource_type(resource_id) + model_class = Fusion if resource_type == "fusion" else SupervisedModel + self.local_model = model_class(model, api=api, cache_get=cache_get, + operation_settings=operation_settings) + objective_id = getattr(self.local_model, "objective_id", None) + self.fields = Fields(self.local_model.fields, + objective_field=objective_id) + self.objective_categories = self.local_model.objective_categories + self.x_headers = [self.fields.field_name(field_id) for field_id in + self.fields.sorted_field_ids()] + self.y_header = self.fields.field_name(self.fields.objective_field) + + def predict(self, x_test, **kwargs): + """Prediction method that interfaces with the Shap library""" + input_data_list = self.fields.from_numpy(x_test) + batch_prediction = self.local_model.batch_predict( + input_data_list, outputs={"output_fields": ["prediction"], + "output_headers": [self.y_header]}, + all_fields=False, **kwargs) + objective_field = self.fields.objective_field_info() + pred_fields = Fields(objective_field) + return pred_fields.to_numpy(batch_prediction, + objective=True).reshape(-1) + + def predict_proba(self, x_test): + """Prediction method that interfaces with the Shap library""" + if self.local_model.regression: + raise ValueError("This method is only available for classification" + " models.") + input_data_list = self.fields.from_numpy(x_test) + np_list = np.empty(shape=(len(input_data_list), + len(self.objective_categories))) + for index, input_data in enumerate(input_data_list): + prediction = self.local_model.predict_probability( + input_data, compact=True) + np_list[index] = np.asarray([prediction]) + return np_list diff --git a/bigml/supervised.py b/bigml/supervised.py new file mode 100644 index 00000000..57155fa8 --- /dev/null +++ b/bigml/supervised.py @@ -0,0 +1,233 @@ +# -*- coding: utf-8 -*- +# pylint: disable=super-init-not-called +# +# Copyright 2018-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A local Predictive Supervised model class + +This module defines a supervised model to make predictions locally or +embedded into your application without needing to send requests to +BigML.io. + +This module cannot only save you a few credits, but also enormously +reduce the latency for each prediction and let you use your supervised models +offline. + +Example usage (assuming that you have previously set up the BIGML_USERNAME +and BIGML_API_KEY environment variables and that you own the +logisticregression/id below): + +from bigml.api import BigML +from bigml.supervised import SupervisedModel + +api = BigML() + +model = SupervisedModel( + 'logisticregression/5026965515526876630001b2') +model.predict({"petal length": 3, "petal width": 1, + "sepal length": 1, "sepal width": 0.5}) + +""" + +import json +import os + + +from bigml.api import get_resource_id, get_resource_type, \ + get_api_connection, get_ensemble_id +from bigml.basemodel import BaseModel +from bigml.model import Model +from bigml.ensemble import Ensemble +from bigml.logistic import LogisticRegression +from bigml.deepnet import Deepnet +from bigml.linear import LinearRegression +from bigml.constants import OUT_NEW_FIELDS, OUT_NEW_HEADERS, INTERNAL +from bigml.util import get_data_format, get_formatted_data, format_data + + +COMPONENT_CLASSES = { + "model": Model, + "ensemble": Ensemble, + "logisticregression": LogisticRegression, + "deepnet": Deepnet, + "linearregression": LinearRegression} + +DFT_OUTPUTS = ["prediction", "probability"] + + +def extract_id(model, api): + """Extract the resource id from: + - a resource ID string + - a list of resources (ensemble + models) + - a resource structure + - the name of the file that contains a resource structure + + """ + # the string can be a path to a JSON file + if isinstance(model, str): + try: + path = os.path.dirname(os.path.abspath(model)) + with open(model) as model_file: + model = json.load(model_file) + resource_id = get_resource_id(model) + if resource_id is None: + raise ValueError("The JSON file does not seem" + " to contain a valid BigML resource" + " representation.") + api.storage = path + except IOError: + # if it is not a path, it can be a model id + resource_id = get_resource_id(model) + if resource_id is None: + for resource_type in COMPONENT_CLASSES.keys(): + if model.find("%s/" % resource_type) > -1: + raise Exception( + api.error_message(model, + resource_type=resource_type, + method="get")) + raise IOError("Failed to open the expected JSON file" + " at %s." % model) + except ValueError: + raise ValueError("Failed to interpret %s." + " JSON file expected.") + if isinstance(model, list): + resource_id = get_ensemble_id(model[0]) + if resource_id is None: + raise ValueError("The first argument does not contain a valid" + " supervised model structure.") + else: + resource_id = get_resource_id(model) + if resource_id is None: + raise ValueError("The first argument does not contain a valid" + " supervised model structure.") + return resource_id, model + + +class SupervisedModel(BaseModel): + """ A lightweight wrapper around any supervised model. + + Uses any BigML remote supervised model to build a local version + that can be used to generate predictions locally. + + """ + + def __init__(self, model, api=None, cache_get=None, + operation_settings=None): + + self.api = get_api_connection(api) + resource_id, model = extract_id(model, self.api) + resource_type = get_resource_type(resource_id) + kwargs = {"api": self.api, "cache_get": cache_get} + if resource_type != "linearregression": + kwargs.update({"operation_settings": operation_settings}) + local_model = COMPONENT_CLASSES[resource_type](model, **kwargs) + self.__class__.__bases__ = local_model.__class__.__bases__ + for attr, value in list(local_model.__dict__.items()): + setattr(self, attr, value) + self.local_model = local_model + self.regression = resource_type == "linearregression" or \ + self.local_model.regression + if not self.regression: + self.objective_categories = self.local_model.objective_categories + self.name = self.local_model.name + self.description = self.local_model.description + + def predict(self, *args, **kwargs): + """Delegating method to local model object""" + return self.local_model.predict(*args, **kwargs) + + def predict_probability(self, *args, **kwargs): + """Delegating method to local model object""" + new_kwargs = {} + new_kwargs.update(kwargs) + try: + return self.local_model.predict_probability(*args, **new_kwargs) + except TypeError: + del new_kwargs["missing_strategy"] + return self.local_model.predict_probability(*args, **new_kwargs) + + def predict_confidence(self, *args, **kwargs): + """Delegating method to local model object""" + new_kwargs = {} + new_kwargs.update(kwargs) + try: + return self.local_model.predict_confidence(*args, **new_kwargs) + except TypeError: + del new_kwargs["missing_strategy"] + return self.local_model.predict_confidence(*args, **new_kwargs) + + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + """ + return self.local_model.data_transformations() + + def batch_predict(self, input_data_list, outputs=None, all_fields=True, + **kwargs): + """Creates a batch prediction for a list of inputs using the local + supervised model. Allows to define some output settings to + decide the fields to be added to the input_data (prediction, + probability, etc.) and the name that we want to assign to these new + fields. The outputs argument accepts a dictionary with keys + "output_fields", to contain a list of the prediction properties to add + (["prediction", "probability"] by default) and "output_headers", to + contain a list of the headers to be used when adding them (identical + to "output_fields" list, by default). + + :param input_data_list: List of input data to be predicted + :type input_data_list: list or Panda's dataframe + :param dict outputs: properties that define the headers and fields to + be added to the input data + :param boolean all_fields: whether all the fields in the input data + should be part of the response + :return: the list of input data plus the predicted values + :rtype: list or Panda's dataframe depending on the input type in + input_data_list + """ + if outputs is None: + outputs = {} + new_fields = outputs.get(OUT_NEW_FIELDS, DFT_OUTPUTS) + new_headers = outputs.get(OUT_NEW_HEADERS, new_fields) + if len(new_fields) > len(new_headers): + new_headers.expand(new_fields[len(new_headers):]) + else: + new_headers = new_headers[0: len(new_fields)] + data_format = get_data_format(input_data_list) + inner_data_list = get_formatted_data(input_data_list, INTERNAL) + predictions_list = [] + kwargs.update({"full": True}) + for input_data in inner_data_list: + prediction = self.predict(input_data, **kwargs) + prediction_data = {} + if all_fields: + prediction_data.update(input_data) + for index, key in enumerate(new_fields): + try: + prediction_data[new_headers[index]] = prediction[key] + except KeyError: + pass + predictions_list.append(prediction_data) + if data_format != INTERNAL: + return format_data(predictions_list, out_format=data_format) + return predictions_list + + #pylint: disable=locally-disabled,arguments-differ + def dump(self, **kwargs): + """Delegate to local model""" + self.local_model.dump(**kwargs) + + def dumps(self): + """Delegate to local model""" + return self.local_model.dumps() diff --git a/bigml/tests/__init__.py b/bigml/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bigml/tests/compare_dataset_steps.py b/bigml/tests/compare_dataset_steps.py new file mode 100644 index 00000000..04bc9110 --- /dev/null +++ b/bigml/tests/compare_dataset_steps.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import json + +from bigml.dataset import Dataset + +from .world import res_filename, eq_ + + +def i_create_a_local_dataset_from_file(step, dataset_file): + """Step: I create a local dataset from a file""" + step.bigml["local_dataset"] = Dataset(res_filename(dataset_file)) + + +def the_transformed_data_is(step, input_data, output_data): + """Checking expected transformed data""" + if input_data is None: + input_data = "{}" + if output_data is None: + output_data = "{}" + input_data = json.loads(input_data) + output_data = json.loads(output_data) + transformed_data = step.bigml["local_dataset"].transform([input_data]) + for key, value in transformed_data[0].items(): + eq_(output_data.get(key), value) diff --git a/bigml/tests/compare_forecasts_steps.py b/bigml/tests/compare_forecasts_steps.py new file mode 100644 index 00000000..0d4fe85a --- /dev/null +++ b/bigml/tests/compare_forecasts_steps.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +import json + +from .world import eq_, approx_ + + +def i_create_a_local_forecast(step, input_data): + """Step: I create a local forecast for """ + input_data = json.loads(input_data) + step.bigml["local_forecast"] = step.bigml[ \ + "local_time_series"].forecast(input_data) + + +def the_local_forecast_is(step, local_forecasts): + """Step: the local forecast is """ + local_forecasts = json.loads(local_forecasts) + attrs = ["point_forecast", "model"] + for field_id in local_forecasts: + forecast = step.bigml["local_forecast"][field_id] + local_forecast = local_forecasts[field_id] + eq_(len(forecast), len(local_forecast), msg="forecast: %s" % forecast) + for index, forecast_item in enumerate(forecast): + for attr in attrs: + if isinstance(forecast_item[attr], list): + for pos, item in enumerate(forecast_item[attr]): + approx_(local_forecast[index][attr][pos], + item, precision=5) + else: + eq_(forecast_item[attr], local_forecast[index][attr]) diff --git a/bigml/tests/compare_pipeline_steps.py b/bigml/tests/compare_pipeline_steps.py new file mode 100644 index 00000000..146ea408 --- /dev/null +++ b/bigml/tests/compare_pipeline_steps.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +import json +import os +import zipfile + +from bigml.pipeline.pipeline import BMLPipeline, Pipeline +from bigml.api import BigML + +from .world import res_filename, eq_, ok_ + + +def i_expand_file_with_models_list(step, pipeline_file, models_list): + """Extracting models from zip""" + inner_files = [] + models_list = json.loads(models_list) + for resource_id in models_list: + inner_files.append(resource_id.replace("/", "_")) + + pipeline_file = res_filename(pipeline_file) + with zipfile.ZipFile(pipeline_file, 'r') as zip_ref: + filenames = [os.path.basename(filename) for + filename in zip_ref.namelist()] + ok_(all(filename in filenames for filename in inner_files)) + zip_ref.extractall(os.path.dirname(pipeline_file)) + + +def i_create_a_local_pipeline_from_models_list( + step, models_list, name, storage=None): + """Step: I create a local pipeline for named """ + if not isinstance(models_list, list): + models_list = json.loads(models_list) + kwargs = {} + if storage is not None: + kwargs = {'api': BigML(storage=res_filename(storage))} + step.bigml["local_pipeline"] = BMLPipeline(name, + models_list, + **kwargs) + return step.bigml["local_pipeline"] + + +def the_pipeline_transformed_data_is(step, input_data, output_data): + """Checking pipeline's transform""" + if input_data is None: + input_data = "{}" + if output_data is None: + output_data = "{}" + input_data = json.loads(input_data) + output_data = json.loads(output_data) + transformed_data = step.bigml["local_pipeline"].transform([input_data]) + for key, value in transformed_data[0].items(): + eq_(output_data.get(key), value) + + +def the_pipeline_result_key_is(step, input_data, key, value, precision=None): + """Checking pipeline transformed property""" + if input_data is None: + input_data = "{}" + input_data = json.loads(input_data) + transformed_data = step.bigml["local_pipeline"].transform([input_data]) + pipe_value = transformed_data[0].get(key) + if precision is not None and not isinstance(value, str): + pipe_value = round(pipe_value, precision) + value = round(value, precision) + eq_(str(value), str(pipe_value)) + + +def i_create_composed_pipeline(step, pipelines_list, name): + """Creating local Pipeline""" + step.bigml["local_pipeline"] = Pipeline(name, pipelines_list) diff --git a/bigml/tests/compare_predictions_steps.py b/bigml/tests/compare_predictions_steps.py new file mode 100644 index 00000000..b0019411 --- /dev/null +++ b/bigml/tests/compare_predictions_steps.py @@ -0,0 +1,734 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +#pylint: disable=locally-disabled,pointless-string-statement +# +# Copyright 2012-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +import json +import os + +from zipfile import ZipFile +from bigml.model import Model, cast_prediction +from bigml.logistic import LogisticRegression +from bigml.cluster import Cluster +from bigml.anomaly import Anomaly +from bigml.association import Association +from bigml.multimodel import MultiModel +from bigml.topicmodel import TopicModel +from bigml.deepnet import Deepnet +from bigml.linear import LinearRegression +from bigml.supervised import SupervisedModel +from bigml.local_model import LocalModel +from bigml.fusion import Fusion +from bigml.pca import PCA +from bigml.shapwrapper import ShapWrapper + + +from .create_prediction_steps import check_prediction +from .world import world, res_filename, eq_, approx_, ok_ + + +def extract_zip(input_zip): + """Extracting file names in zip""" + with ZipFile(input_zip) as zip_handler: + return {name: zip_handler.read(name) for name in \ + zip_handler.namelist()} + + +def i_retrieve_a_list_of_remote_models(step, tag): + """Step: I retrieve a list of remote models tagged with """ + world.list_of_models = [ \ + world.api.get_model(model['resource']) for model in + world.api.list_models(query_string="project=%s&tags__in=%s" % \ + (world.project_id, tag))['objects']] + + +def i_retrieve_a_list_of_remote_logistic_regressions(step, tag): + """Step: I retrieve a list of remote logistic regression tagged with + + """ + world.list_of_models = [ \ + world.api.get_logistic_regression(model['resource']) for model in + world.api.list_logistic_regressions( \ + query_string="project=%s&tags__in=%s" % \ + (world.project_id, tag))['objects']] + + +def i_retrieve_a_list_of_remote_linear_regressions(step, tag): + """Step: I retrieve a list of remote linear regression tagged with """ + world.list_of_models = [ \ + world.api.get_linear_regression(model['resource']) for model in + world.api.list_linear_regressions( \ + query_string="project=%s&tags__in=%s" % \ + (world.project_id, tag))['objects']] + + +def i_create_a_local_model_from_file(step, model_file): + """Step: I create a local model from a file""" + step.bigml["local_model"] = Model(res_filename(model_file)) + + +def i_create_a_local_deepnet_from_zip_file(step, deepnet_file, + operation_settings=None): + """Step: I create a local deepnet from a file""" + zipped_files = extract_zip(res_filename(deepnet_file)) + deepnet = json.loads(list(zipped_files.values())[0]) + step.bigml["local_model"] = Deepnet(deepnet, + operation_settings=operation_settings) + + +def i_create_a_local_supervised_model_from_file(step, model_file): + """Step: I create a local supervised model from a file""" + step.bigml["local_model"] = SupervisedModel(res_filename(model_file)) + + +def i_create_a_local_shap_wrapper_from_file(step, model_file): + """Step: I create a local ShapWrapper from a file""" + step.bigml["local_model"] = ShapWrapper(res_filename(model_file)) + + +def i_create_a_local_model(step, pre_model=False): + """Step: I create a local model""" + step.bigml["local_model"] = Model(world.model) + if pre_model: + step.bigml["local_pipeline"] = step.bigml["local_model"].data_transformations() + + +def i_create_a_local_fusion(step): + """Step: I create a local fusion""" + step.bigml["local_model"] = Fusion(world.fusion['resource']) + step.bigml["local_ensemble"] = None + + +def i_create_a_local_supervised_model(step, model_type=None): + """Step: I create a local supervised model""" + if model_type is None: + model_type = "model" + model = getattr(world, model_type) + step.bigml["local_model"] = SupervisedModel(model) + + +def i_create_a_local_bigml_model(step, model_type=None): + """Step: I create a local BigML model""" + if model_type is None: + model_type = "model" + model = getattr(world, model_type) + step.bigml["local_model"] = LocalModel(model) + + +def i_create_a_local_bigml_model_prediction(step, data=None, + prediction_type=None, **kwargs): + """Step: I create a local prediction for """ + if data is None: + data = "{}" + data = json.loads(data) + if prediction_type is None: + prediction_type = "prediction" + if kwargs is None: + kwargs = {} + kwargs.update({"full": True}) + step.bigml["local_%s" % prediction_type] = step.bigml[ + "local_model"].predict(data, **kwargs) + + +def the_local_bigml_prediction_is(step, value, prediction_type=None, key=None, + precision=None): + """Step: the local BigML model prediction is + """ + prediction = step.bigml["local_%s" % prediction_type] + if key is not None: + prediction = prediction[key] + eq_(value, prediction, precision=precision) + + + +def i_create_a_local_prediction_with_confidence(step, data=None, + pre_model=None): + """Step: I create a local prediction for with confidence""" + if data is None: + data = "{}" + input_data = json.loads(data) + if pre_model is not None: + input_data = pre_model.transform([input_data])[0] + step.bigml["local_prediction"] = step.bigml["local_model"].predict( + input_data, full=True) + + +def i_create_a_shap_local_prediction(step, data=None): + """Step: I create a local prediction for """ + if data is None: + data = "[]" + step.bigml["local_prediction"] = step.bigml["local_model"].predict( + data).tolist()[0] + + +def i_create_a_local_prediction(step, data=None, pre_model=None): + """Step: I create a local prediction for """ + if data is None: + data = "{}" + data = json.loads(data) + if pre_model is not None: + data = pre_model.transform([data])[0] + step.bigml["local_prediction"] = step.bigml["local_model"].predict(data, full=True) + + +def i_create_a_local_regions_prediction(step, image_file=None): + """Step: I create a local images prediction for """ + if image_file is None: + return None + data = res_filename(image_file) + step.bigml["local_prediction"] = step.bigml["local_model"].predict(data, full=True) + return step.bigml["local_prediction"] + + +def i_create_a_local_prediction_op(step, data=None, operating_point=None): + """Step: I create a local prediction for in operating point + + """ + if data is None: + data = "{}" + ok_(operating_point is not None) + data = json.loads(data) + step.bigml["local_prediction"] = step.bigml["local_model"].predict( \ + data, operating_point=operating_point) + + +def i_create_a_local_ensemble_prediction_op(step, data=None, operating_point=None): + """Step: I create a local ensemble prediction for in operating + point + """ + if data is None: + data = "{}" + ok_(operating_point is not None) + data = json.loads(data) + step.bigml["local_prediction"] = step.bigml["local_ensemble"].predict( \ + data, operating_point=operating_point) + + +def i_create_local_probabilities(step, data=None): + """Step: I create local probabilities for """ + if data is None: + data = "{}" + data = json.loads(data) + model = step.bigml["local_model"] + step.bigml["local_probabilities"] = model.predict_probability( + data, compact=True) + + +def i_create_shap_local_probabilities(step, data=None): + """Step: I create shap local probabilities for """ + model = step.bigml["local_model"] + step.bigml["local_probabilities"] = model.predict_proba( + data).tolist()[0] + + +def i_create_a_local_ensemble_prediction(step, data=None): + """Step: I create a local ensemble prediction for """ + if data is None: + data = "{}" + data = json.loads(data) + step.bigml["local_prediction"] = step.bigml["local_ensemble"].predict(data) + + +def i_create_a_local_deepnet_prediction(step, data=None, image_fields=None, + full=False): + """Step: I create a local deepnet prediction for """ + if data is None: + data = "{}" + if image_fields is None: + image_fields = [] + data = json.loads(data) + for field in image_fields: + if field in data: + data[field] = res_filename(data[field]) + step.bigml["local_prediction"] = step.bigml["local_model"].predict(data, full=full) + + +def i_create_a_local_deepnet_prediction_with_op(step, data=None, + operating_point=None): + """Step: I create a local deepnet prediction with operating point + for + """ + if data is None: + data = "{}" + data = json.loads(data) + step.bigml["local_prediction"] = step.bigml["local_model"].predict( \ + data, operating_point=operating_point) + + +def i_create_a_local_median_prediction(step, data=None): + """Step: I create a local prediction using median for """ + if data is None: + data = "{}" + data = json.loads(data) + step.bigml["local_prediction"] = step.bigml["local_model"].predict(data, full=True) + + +def i_create_a_local_mm_median_batch_prediction(step, data=None): + """Step: I create a local multimodel batch prediction using median + for + """ + if data is None: + data = "{}" + data = json.loads(data) + step.bigml["local_prediction"] = step.bigml["local_model"].batch_predict( + [data], to_file=False, use_median=True)[0].predictions[0]['prediction'] + + +def i_create_a_local_proportional_median_prediction(step, data=None): + """Step: I create a proportional missing strategy local prediction + using median for + """ + if data is None: + data = "{}" + data = json.loads(data) + step.bigml["local_prediction"] = step.bigml["local_model"].predict( \ + data, missing_strategy=1, full=True) + + +def i_create_a_local_cluster(step, pre_model=False): + """Step: I create a local cluster""" + step.bigml["local_cluster"] = Cluster(world.cluster["resource"]) + if pre_model: + step.bigml["local_pipeline"] = step.bigml["local_cluster"].data_transformations() + + +def i_create_a_local_centroid(step, data=None, pre_model=None): + """Step: I create a local centroid for """ + if data is None: + data = "{}" + data = json.loads(data) + for key, value in list(data.items()): + if value == "": + del data[key] + if pre_model is not None: + data = pre_model.transform([data])[0] + step.bigml["local_centroid"] = step.bigml["local_cluster"].centroid(data) + + +def the_local_centroid_is(step, centroid, distance): + """Step: the local centroid is with distance """ + check_prediction(step.bigml["local_centroid"]['centroid_name'], centroid) + check_prediction(step.bigml["local_centroid"]['distance'], distance) + + +def i_create_a_local_anomaly(step, pre_model=False): + """Step: I create a local anomaly detector""" + step.bigml["local_anomaly"] = Anomaly(world.anomaly["resource"]) + if pre_model: + step.bigml["local_pipeline"] = step.bigml["local_anomaly"].data_transformations() + + +def i_create_a_local_anomaly_score(step, input_data, pre_model=None): + """Step: I create a local anomaly score for """ + input_data = json.loads(input_data) + if pre_model is not None: + input_data = pre_model.transform([input_data])[0] + step.bigml["local_anomaly_score"] = step.bigml["local_anomaly"].anomaly_score( \ + input_data) + + +def the_local_anomaly_score_is(step, score): + """Step: the local anomaly score is """ + eq_(str(round(step.bigml["local_anomaly_score"], 2)), + str(round(float(score), 2))) + + +def i_create_a_local_association(step, pre_model=False): + """Step: I create a local association""" + step.bigml["local_association"] = Association(world.association) + if pre_model: + step.bigml["local_pipeline"] = step.bigml["local_association"].data_transformations() + + +def i_create_a_proportional_local_prediction(step, data=None): + """Step: I create a proportional missing strategy local prediction for + + """ + if data is None: + data = "{}" + data = json.loads(data) + step.bigml["local_prediction"] = step.bigml["local_model"].predict( + data, missing_strategy=1, full=True) + step.bigml["local_prediction"] = cast_prediction(step.bigml["local_prediction"], + to="list", + confidence=True) + + +def i_create_a_prediction_from_a_multi_model(step, data=None): + """Step: I create a prediction from a multi model for """ + if data is None: + data = "{}" + data = json.loads(data) + step.bigml["local_prediction"] = step.bigml["local_model"].predict(data) + + +def i_create_a_batch_prediction_from_a_multi_model(step, data=None): + """Step: I create a batch multimodel prediction for """ + if data is None: + data = "[{}]" + data = json.loads(data) + step.bigml["local_prediction"] = step.bigml["local_model"].batch_predict(data, + to_file=False) + +def the_batch_mm_predictions_are(step, predictions): + """Step: the predictions are """ + if predictions is None: + predictions = "[{}]" + predictions = json.loads(predictions) + for index, prediction in enumerate(predictions): + multivote = step.bigml["local_prediction"][index] + for mv_prediction in multivote.predictions: + eq_(mv_prediction['prediction'], prediction) + + +def the_multiple_local_prediction_is(step, prediction): + """Step: the multiple local prediction is """ + local_prediction = step.bigml["local_prediction"] + prediction = json.loads(prediction) + eq_(local_prediction, prediction) + + +def the_local_prediction_confidence_is(step, confidence): + """Step: the local prediction's confidence is """ + if isinstance(step.bigml["local_prediction"], (list, tuple)): + local_confidence = step.bigml["local_prediction"][1] + else: + local_confidence = step.bigml["local_prediction"].get('confidence', \ + step.bigml["local_prediction"].get('probability')) + local_confidence = round(float(local_confidence), 4) + confidence = round(float(confidence), 4) + eq_(local_confidence, confidence) + + +def the_highest_local_prediction_confidence_is( + step, input_data, confidence, missing_strategy=None): + """Step: the highest local prediction's confidence for is + """ + input_data = json.loads(input_data) + kwargs = {} + if missing_strategy is not None: + kwargs.update({"missing_strategy": missing_strategy}) + local_confidence = step.bigml["local_model"].predict_confidence(input_data, + **kwargs) + if isinstance(local_confidence, dict): + local_confidence = round(float(local_confidence["confidence"]), 4) + else: + local_confidence = round(float(max([pred["confidence"] for pred in local_confidence])), 4) + confidence = round(float(confidence), 4) + eq_(local_confidence, confidence) + + +def the_local_prediction_is(step, prediction, precision=4): + """Step: the local prediction is """ + if isinstance(step.bigml["local_prediction"], (list, tuple)): + local_prediction = step.bigml["local_prediction"][0] + elif isinstance(step.bigml["local_prediction"], dict): + local_prediction = step.bigml["local_prediction"]['prediction'] + else: + local_prediction = step.bigml["local_prediction"] + if hasattr(world, "local_ensemble") and step.bigml["local_ensemble"] is not None: + step.bigml["local_model"] = step.bigml["local_ensemble"] + if (hasattr(step.bigml["local_model"], "regression") and \ + step.bigml["local_model"].regression) or \ + (isinstance(step.bigml["local_model"], MultiModel) and \ + step.bigml["local_model"].models[0].regression): + local_prediction = round(float(local_prediction), precision) + prediction = round(float(prediction), precision) + approx_(local_prediction, float(prediction), precision=precision) + else: + if isinstance(local_prediction, str): + eq_(local_prediction, prediction) + else: + if isinstance(prediction, str): + prediction = float(prediction) + eq_(round(local_prediction, precision), + round(float(prediction), precision)) + + +def the_local_regions_prediction_is(step, prediction): + """Step: the local regions prediction is """ + prediction = json.loads(prediction) + eq_(prediction, step.bigml["local_prediction"]) + + +def the_local_probabilities_are(step, prediction): + """Step: the local probabilities are """ + local_probabilities = step.bigml["local_probabilities"] + expected_probabilities = [float(p) for p in json.loads(prediction)] + + for local, expected in zip(local_probabilities, expected_probabilities): + approx_(local, expected, precision=4) + + +def the_local_proba_prediction_is(step, proba_prediction): + """Step: the local probabilities prediction is """ + local_probabilities = step.bigml["local_probabilities"] + + for local, expected in zip(local_probabilities, proba_prediction): + approx_(local, expected, precision=4) + + +def the_local_ensemble_prediction_is(step, prediction): + """Step: the local ensemble prediction is """ + if isinstance(step.bigml["local_prediction"], (list, tuple)): + local_prediction = step.bigml["local_prediction"][0] + elif isinstance(step.bigml["local_prediction"], dict): + local_prediction = step.bigml["local_prediction"]['prediction'] + else: + local_prediction = step.bigml["local_prediction"] + if step.bigml["local_ensemble"].regression: + approx_(local_prediction, float(prediction), precision=5) + else: + eq_(local_prediction, prediction) + + +def the_local_probability_is(step, probability): + """Step: the local probability is """ + local_probability = step.bigml["local_prediction"]["probability"] + if isinstance(probability, str): + probability = float(probability) + eq_(local_probability, probability, precision=4) + + +def the_local_confidence_is(step, confidence): + """Step: the local confidence is """ + local_confidence = step.bigml["local_prediction"]["confidence"] + if isinstance(confidence, str): + confidence = float(confidence) + eq_(local_confidence, confidence, precision=4) + + +def eq_local_and_remote_probability(step): + """Step: check local and remote probability""" + local_probability = round(step.bigml["local_prediction"]["probability"], 3) + remote_probability = round(world.prediction["probability"], 3) + approx_(local_probability, remote_probability) + + +def i_create_a_local_multi_model(step): + """Step: I create a local multi model""" + step.bigml["local_model"] = MultiModel(world.list_of_models) + step.bigml["local_ensemble"] = None + + +def i_create_a_batch_prediction(step, input_data_list, directory): + """Step: I create a batch prediction for and save it + in + """ + if len(directory) > 0 and not os.path.exists(directory): + os.makedirs(directory) + input_data_list = json.loads(input_data_list) + ok_(isinstance(input_data_list, list)) + step.bigml["local_model"].batch_predict(input_data_list, directory) + + +def i_combine_the_votes(step, directory): + """Step: I combine the votes in """ + world.votes = step.bigml["local_model"].batch_votes(directory) + + +def the_plurality_combined_prediction(step, predictions): + """Step: the plurality combined predictions are """ + predictions = json.loads(predictions) + for i, votes_row in enumerate(world.votes): + combined_prediction = votes_row.combine() + check_prediction(combined_prediction, predictions[i]) + + +def the_confidence_weighted_prediction(step, predictions): + """Step: the confidence weighted predictions are """ + predictions = json.loads(predictions) + for i, votes_row in enumerate(world.votes): + combined_prediction = votes_row.combine(1) + eq_(combined_prediction, predictions[i]) + + +def i_create_a_local_logistic_model(step, pre_model=False): + """Step: I create a local logistic regression model""" + step.bigml["local_model"] = LogisticRegression(world.logistic_regression) + if pre_model: + step.bigml["local_pipeline"] = step.bigml[ + "local_model"].data_transformations() + if hasattr(world, "local_ensemble"): + step.bigml["local_ensemble"] = None + + +def i_create_a_local_deepnet(step): + """Step: I create a local deepnet model""" + step.bigml["local_model"] = Deepnet({"resource": world.deepnet['resource'], + "object": world.deepnet}) + if hasattr(world, "local_ensemble"): + step.bigml["local_ensemble"] = None + + +def i_create_a_local_topic_model(step): + """Step: I create a local topic model""" + step.bigml["local_topic_model"] = TopicModel(world.topic_model) + + +def the_topic_distribution_is(step, distribution): + """Step: the topic distribution is """ + eq_(json.loads(distribution), + world.topic_distribution['topic_distribution']['result']) + + +def the_local_topic_distribution_is(step, distribution): + """Step: the local topic distribution is """ + distribution = json.loads(distribution) + for index, topic_dist in enumerate(step.bigml["local_topic_distribution"]): + approx_(topic_dist["probability"], distribution[index]) + + +def the_association_set_is_like_file(step, filename): + """Step: the association set is like file """ + filename = res_filename(filename) + result = world.association_set.get("association_set",{}).get("result", []) + """ Uncomment if different text settings are used + with open(filename, "w") as filehandler: + json.dump(result, filehandler) + """ + with open(filename) as filehandler: + file_result = json.load(filehandler) + eq_(result, file_result) + + +def i_create_a_local_association_set(step, data, pre_model=None): + """Step: I create a local association set""" + data = json.loads(data) + if pre_model is not None: + data = pre_model.transform([data])[0] + step.bigml["local_association_set"] = step.bigml["local_association"].association_set( \ + data) + + +def the_local_association_set_is_like_file(step, filename): + """Step: the local association set is like file """ + filename = res_filename(filename) + """ Uncomment if different text settings are used + with open(filename, "w") as filehandler: + json.dump(result, filehandler) + """ + with open(filename) as filehandler: + file_result = json.load(filehandler) + for index, result in enumerate(file_result): + approx_(result['score'], step.bigml["local_association_set"][ + index]['score']) + eq_(result['rules'], + step.bigml["local_association_set"][index]['rules']) + + +def i_create_a_local_prediction_op_kind(step, data=None, operating_kind=None): + """Step: I create a local prediction for in operating kind + + """ + if data is None: + data = "{}" + ok_(operating_kind is not None) + data = json.loads(data) + step.bigml["local_prediction"] = step.bigml["local_model"].predict( \ + data, operating_kind=operating_kind) + + +def i_create_a_local_ensemble_prediction_op_kind( \ + step, data=None, operating_kind=None): + """Step: I create a local ensemble prediction for in operating + kind """ + if data is None: + data = "{}" + ok_(operating_kind is not None) + data = json.loads(data) + step.bigml["local_prediction"] = step.bigml["local_ensemble"].predict( \ + data, operating_kind=operating_kind) + + +def i_create_a_local_deepnet_prediction_op_kind( \ + step, data=None, operating_kind=None): + """Step: I create a local deepnet for in operating kind + + """ + if data is None: + data = "{}" + ok_(operating_kind is not None) + data = json.loads(data) + step.bigml["local_prediction"] = step.bigml["local_model"].predict( \ + data, operating_kind=operating_kind) + + +def i_create_a_local_logistic_prediction_op_kind( \ + step, data=None, operating_kind=None): + """Step: I create a local logistic regression for in operating + kind + """ + if data is None: + data = "{}" + ok_(operating_kind is not None) + data = json.loads(data) + step.bigml["local_prediction"] = step.bigml["local_model"].predict( \ + data, operating_kind=operating_kind) + + +def create_local_pca(step, pre_model=False): + """Step: I create a local PCA""" + step.bigml["local_pca"] = PCA(world.pca["resource"]) + if pre_model: + step.bigml["local_pipeline"] = step.bigml["local_pca"].data_transformations() + + +def i_create_a_local_linear(step): + """Step: I create a local linear regression""" + step.bigml["local_model"] = LinearRegression(world.linear_regression["resource"]) + + +def i_create_a_local_projection(step, data=None, pre_model=None): + """Step: I create a local projection for """ + if data is None: + data = "{}" + data = json.loads(data) + if pre_model is not None: + data = pre_model.transform([data])[0] + for key, value in list(data.items()): + if value == "": + del data[key] + step.bigml["local_projection"] = step.bigml["local_pca"].projection(data, full=True) + for name, value in list(step.bigml["local_projection"].items()): + step.bigml["local_projection"][name] = round(value, 5) + + +def i_create_a_local_linear_prediction(step, data=None): + """Step: I create a local linear regression prediction for """ + if data is None: + data = "{}" + data = json.loads(data) + for key, value in list(data.items()): + if value == "": + del data[key] + step.bigml["local_prediction"] = step.bigml["local_model"].predict(data, full=True) + for name, value in list(step.bigml["local_prediction"].items()): + if isinstance(value, float): + step.bigml["local_prediction"][name] = round(value, 5) + + +def the_local_projection_is(step, projection): + """Step: checking the local projection""" + if projection is None: + projection = "{}" + projection = json.loads(projection) + eq_(len(list(projection.keys())), len(list(step.bigml["local_projection"].keys()))) + for name, _ in list(projection.items()): + eq_(step.bigml["local_projection"][name], projection[name], + msg="local: %s, %s - expected: %s" % ( \ + name, step.bigml["local_projection"][name], projection[name])) diff --git a/bigml/tests/compute_lda_prediction_steps.py b/bigml/tests/compute_lda_prediction_steps.py new file mode 100644 index 00000000..5ec5f6e8 --- /dev/null +++ b/bigml/tests/compute_lda_prediction_steps.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2016-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from bigml.topicmodel import TopicModel + +from .world import eq_, approx_ + + +def i_make_a_prediction(step, model, text, expected): + """Step: predict the topic distribution for the text """ + topic_model = TopicModel(model) + distribution = topic_model.distribution(text) + + msg = ("Computed distribution is %s, but expected distribution is %s" % + (str(distribution), str(expected))) + + eq_(len(distribution), len(expected), msg=msg) + + for dis, exp in zip(distribution, expected): + approx_(dis['probability'], exp['probability'], precision=6, msg=msg) diff --git a/bigml/tests/compute_multivote_prediction_steps.py b/bigml/tests/compute_multivote_prediction_steps.py new file mode 100644 index 00000000..251423c1 --- /dev/null +++ b/bigml/tests/compute_multivote_prediction_steps.py @@ -0,0 +1,90 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2012, 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import json + +from bigml.multivote import MultiVote + +from .world import world, res_filename, eq_, ok_ + +DIGITS = 5 + + +def i_create_a_multivote(step, predictions_file): + """Step: I create a MultiVote for the set of predictions in file + + """ + predictions_path = res_filename(predictions_file) + try: + with open(predictions_file, 'r') as predictions_path: + world.multivote = MultiVote(json.load(predictions_path)) + except IOError: + ok_(False, "Failed to read %s" % predictions_path) + + +def compute_prediction(step, method): + """Step: I compute the prediction with confidence using method + + """ + try: + prediction = world.multivote.combine(int(method), full=True) + world.combined_prediction = prediction["prediction"] + world.combined_confidence = prediction["confidence"] + except ValueError: + ok_(False, "Incorrect method") + + +def compute_prediction_no_confidence(step, method): + """Step: I compute the prediction without confidence using method + """ + try: + world.combined_prediction_nc = world.multivote.combine(int(method)) + except ValueError: + ok_(False, "Incorrect method") + + +def check_combined_prediction(step, prediction): + """Step: the combined prediction is """ + if world.multivote.is_regression(): + try: + eq_(round(world.combined_prediction, DIGITS), + round(float(prediction), DIGITS)) + except ValueError as exc: + ok_(False, str(exc)) + else: + eq_(world.combined_prediction, prediction) + + +def check_combined_prediction_no_confidence(step, prediction): + """Step: the combined prediction without confidence is """ + if world.multivote.is_regression(): + try: + eq_(round(world.combined_prediction_nc, DIGITS), + round(float(prediction), DIGITS)) + except ValueError as exc: + ok_(False, str(exc)) + else: + eq_(world.combined_prediction, prediction) + + +def check_combined_confidence(step, confidence): + """Step: the confidence for the combined prediction is """ + try: + eq_(round(world.combined_confidence, DIGITS), + round(float(confidence), DIGITS)) + except ValueError as exc: + ok_(False, str(exc)) diff --git a/bigml/tests/create_anomaly_steps.py b/bigml/tests/create_anomaly_steps.py new file mode 100644 index 00000000..f0b18d3a --- /dev/null +++ b/bigml/tests/create_anomaly_steps.py @@ -0,0 +1,174 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import json + +from bigml.api import HTTP_CREATED +from bigml.api import FINISHED, FAULTY +from bigml.anomaly import Anomaly + +from .world import world, res_filename, eq_, ok_ +from .read_resource_steps import wait_until_status_code_is + + +def i_check_anomaly_datasets_and_datasets_ids(step): + """Step: I check the anomaly detector stems from the original dataset + list + """ + anomaly = world.anomaly + ok_('datasets' in anomaly and + anomaly['datasets'] == step.bigml["dataset_ids"], + ("The anomaly detector contains only %s and the dataset ids are %s" % + (",".join(anomaly['datasets']), ",".join(step.bigml["dataset_ids"])))) + + +def i_check_anomaly_dataset_and_datasets_ids(step): + """Step: I check the anomaly detector stems from the original dataset""" + anomaly = world.anomaly + ok_('dataset' in anomaly and anomaly['dataset'] == world.dataset[ + 'resource'], + ("The anomaly detector contains only %s and the dataset id is %s" % + (anomaly['dataset'], world.dataset['resource']))) + + +def i_create_an_anomaly(step, shared=None): + """Step: I create an anomaly detector""" + i_create_an_anomaly_from_dataset(step, shared=shared) + + +def clone_anomaly(step, anomaly): + """Step: I clone anomaly""" + resource = world.api.clone_anomaly(anomaly, + {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.anomaly = resource['object'] + # save reference + world.anomalies.append(resource['resource']) + + +def the_cloned_anomaly_is(step, anomaly): + """Checking expected cloned anomaly""" + eq_(world.anomaly["origin"], anomaly) + + +def i_create_an_anomaly_from_dataset(step, shared=None): + """Step: I create an anomaly detector from a dataset""" + if shared is None or world.shared.get("anomaly", {}).get(shared) is None: + dataset = world.dataset.get('resource') + resource = world.api.create_anomaly(dataset, {'seed': 'BigML'}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.anomaly = resource['object'] + world.anomalies.append(resource['resource']) + + +def i_create_an_anomaly_with_top_n_from_dataset(step, top_n): + """Step: I create an anomaly detector with anomalies from + a dataset + """ + dataset = world.dataset.get('resource') + resource = world.api.create_anomaly( + dataset, {'seed': 'BigML', 'top_n': int(top_n)}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED, + "Expected: %s, found: %s" % (HTTP_CREATED, world.status)) + world.location = resource['location'] + world.anomaly = resource['object'] + world.anomalies.append(resource['resource']) + + +def i_create_an_anomaly_with_params(step, parms=None): + """Step: I create an anomaly detector with from a dataset""" + dataset = world.dataset.get('resource') + if parms is not None: + parms = json.loads(parms) + else: + parms = {} + parms.update({"seed": 'BigML'}) + resource = world.api.create_anomaly( + dataset, parms) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED, + "Expected: %s, found: %s" % (HTTP_CREATED, world.status)) + world.location = resource['location'] + world.anomaly = resource['object'] + world.anomalies.append(resource['resource']) + + +def i_create_an_anomaly_from_dataset_list(step): + """Step: I create an anomaly detector from a dataset list""" + resource = world.api.create_anomaly(step.bigml["dataset_ids"], + {'seed': 'BigML'}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.anomaly = resource['object'] + world.anomalies.append(resource['resource']) + + +def wait_until_anomaly_status_code_is(step, code1, code2, secs): + """Step: I wait until the anomaly detector status code is either + or less than + """ + world.anomaly = wait_until_status_code_is( + code1, code2, secs, world.anomaly) + + +def the_anomaly_is_finished_in_less_than(step, secs, shared=None): + """Step: I wait until the anomaly detector is ready less than """ + if shared is None or world.shared.get("anomaly", {}).get(shared) is None: + wait_until_anomaly_status_code_is(step, FINISHED, FAULTY, secs) + if shared is not None: + if "anomaly" not in world.shared: + world.shared["anomaly"] = {} + world.shared["anomaly"][shared] = world.anomaly + else: + world.anomaly = world.shared["anomaly"][shared] + print("Reusing %s" % world.anomaly["resource"]) + + +def create_dataset_with_anomalies(step): + """Step: I create a dataset with only the anomalies""" + local_anomalies = Anomaly(world.anomaly['resource']) + world.dataset = world.api.create_dataset( + world.dataset['resource'], + {"lisp_filter": local_anomalies.anomalies_filter()}) + world.datasets.append(world.dataset['resource']) + + +def the_dataset_has_n_rows(step, rows): + """Step: I check that the dataset has rows""" + eq_(world.dataset['rows'], int(rows)) + + +def i_export_anomaly(step, filename): + """Step: I export the anomaly""" + world.api.export(world.anomaly.get('resource'), + filename=res_filename(filename)) + + +def i_create_local_anomaly_from_file(step, export_file): + """Step: I create a local anomaly from file """ + step.bigml["local_anomaly"] = Anomaly(res_filename(export_file)) + + +def check_anomaly_id_local_id(step): + """Step: the anomaly ID and the local anomaly ID match""" + eq_(step.bigml["local_anomaly"].resource_id, world.anomaly["resource"]) diff --git a/bigml/tests/create_association_steps.py b/bigml/tests/create_association_steps.py new file mode 100644 index 00000000..b54cd9be --- /dev/null +++ b/bigml/tests/create_association_steps.py @@ -0,0 +1,157 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import json + +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY +from bigml.association import Association + +from .read_resource_steps import wait_until_status_code_is +from .world import world, res_filename, eq_ + + +def i_check_association_name(step, name): + """Step: the association name is """ + association_name = world.association['name'] + eq_(name, association_name) + + +def i_create_an_association_from_dataset(step, shared=None): + """Step: I create an association from a dataset""" + if shared is None or world.shared.get("association", {}).get("shared") is None: + dataset = world.dataset.get('resource') + resource = world.api.create_association(dataset, {'name': 'new association'}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.association = resource['object'] + world.associations.append(resource['resource']) + + +def i_create_an_association_from_dataset_with_params(step, parms=None): + """Step: I create an association from a dataset with params """ + dataset = world.dataset.get('resource') + if parms is not None: + parms = json.loads(parms) + else: + parms = {} + parms.update({'name': 'new association'}) + resource = world.api.create_association(dataset, parms) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.association = resource['object'] + world.associations.append(resource['resource']) + + +def i_create_an_association_with_strategy_from_dataset(step, strategy): + """Step: I create an association with search strategy + from a dataset + """ + dataset = world.dataset.get('resource') + resource = world.api.create_association( + dataset, {'name': 'new association', 'search_strategy': strategy}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.association = resource['object'] + world.associations.append(resource['resource']) + + +def i_update_association_name(step, name): + """Step: I update the association name to """ + resource = world.api.update_association(world.association['resource'], + {'name': name}) + world.status = resource['code'] + eq_(world.status, HTTP_ACCEPTED) + world.location = resource['location'] + world.association = resource['object'] + + +def wait_until_association_status_code_is(step, code1, code2, secs): + """Step: I wait until the association status code is either or + less than + """ + world.association = wait_until_status_code_is( + code1, code2, secs, world.association) + + +def the_association_is_finished_in_less_than(step, secs, shared=None): + """Steps: I wait until the association is ready less than """ + if shared is None or world.shared.get("association", {}).get(shared) is None: + wait_until_association_status_code_is(step, FINISHED, FAULTY, secs) + if shared is not None: + if "association" not in world.shared: + world.shared["association"] = {} + world.shared["association"][shared] = world.association + else: + world.association = world.shared["association"][shared] + print("Reusing %s" % world.association["resource"]) + + +def i_create_a_local_association(step): + """Step: I create a local association""" + step.bigml["local_association"] = Association(world.association) + + +def i_get_rules_for_item_list(step, item_list): + """Step: I get the rules for """ + world.association_rules = step.bigml["local_association"].get_rules( + item_list=item_list) + + +def the_first_rule_is(step, rule): + """Step: the first rule is """ + found_rules = [] + for a_rule in world.association_rules: + found_rules.append(a_rule.to_json()) + eq_(rule, found_rules[0]) + + +def i_export_association(step, filename): + """Step: I export the association""" + world.api.export(world.association.get('resource'), + filename=res_filename(filename)) + + +def i_create_local_association_from_file(step, export_file): + """Step: I create a local association from file """ + step.bigml["local_association"] = Association(res_filename(export_file)) + + +def check_association_id_local_id(step): + """Step: the association ID and the local association ID match""" + eq_(step.bigml["local_association"].resource_id, + world.association["resource"]) + + +def clone_association(step, association): + """Step: I clone association""" + resource = world.api.clone_association(association, + {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.association = resource['object'] + # save reference + world.associations.append(resource['resource']) + + +def the_cloned_association_is(step, association): + """The association is a clone""" + eq_(world.association["origin"], association) diff --git a/bigml/tests/create_batch_prediction_steps.py b/bigml/tests/create_batch_prediction_steps.py new file mode 100644 index 00000000..7988a3f9 --- /dev/null +++ b/bigml/tests/create_batch_prediction_steps.py @@ -0,0 +1,230 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2012-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from bigml.api import HTTP_CREATED +from bigml.api import FINISHED, FAULTY +from bigml.io import UnicodeReader + +from .read_resource_steps import wait_until_status_code_is +from .world import world, res_filename, eq_, ok_ + + +def i_create_a_batch_prediction(step): + """Step: I create a batch prediction for the dataset with the model""" + dataset = world.dataset.get('resource') + model = world.model.get('resource') + resource = world.api.create_batch_prediction(model, dataset) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.batch_prediction = resource['object'] + world.batch_predictions.append(resource['resource']) + + +def i_create_a_batch_prediction_ensemble(step, params=None): + """Step: I create a batch prediction for the dataset with the ensemble and + """ + if params is None: + params = {} + dataset = world.dataset.get('resource') + ensemble = world.ensemble.get('resource') + resource = world.api.create_batch_prediction(ensemble, dataset, params) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.batch_prediction = resource['object'] + world.batch_predictions.append(resource['resource']) + + +def wait_until_batch_prediction_status_code_is(step, code1, code2, secs): + """Step: I wait until the batch prediction status code is either + or less than """ + world.batch_prediction = wait_until_status_code_is( + code1, code2, secs, world.batch_prediction) + + +def wait_until_batch_centroid_status_code_is(step, code1, code2, secs): + """Step: I wait until the batch centroid status code is either or + less than """ + world.batch_centroid = wait_until_status_code_is( + code1, code2, secs, world.batch_centroid) + + +def wait_until_batch_anomaly_score_status_code_is(step, code1, code2, secs): + """Step: I wait until the batch anomaly score status code is either + or less than """ + world.batch_anomlay_score = wait_until_status_code_is( + code1, code2, secs, world.batch_anomaly_score) + + +def the_batch_prediction_is_finished_in_less_than(step, secs): + """Step: I wait until the batch prediction is ready less than """ + wait_until_batch_prediction_status_code_is(step, FINISHED, FAULTY, secs) + + +def the_batch_centroid_is_finished_in_less_than(step, secs): + """Step: I wait until the batch centroid is ready less than """ + wait_until_batch_centroid_status_code_is(step, FINISHED, FAULTY, secs) + + +def the_batch_anomaly_score_is_finished_in_less_than(step, secs): + """Step: I wait until the batch anomaly score is ready less than """ + wait_until_batch_anomaly_score_status_code_is(step, FINISHED, FAULTY, secs) + + +def i_download_predictions_file(step, filename): + """Step: I download the created predictions file to """ + file_object = world.api.download_batch_prediction( + world.batch_prediction, filename=res_filename(filename)) + ok_(file_object is not None) + world.output = file_object + + +def i_download_centroid_file(step, filename): + """Step: I download the created centroid file to """ + file_object = world.api.download_batch_centroid( + world.batch_centroid, filename=res_filename(filename)) + ok_(file_object is not None) + world.output = file_object + + +def i_download_anomaly_score_file(step, filename): + """Step: I download the created anomaly score file to """ + file_object = world.api.download_batch_anomaly_score( + world.batch_anomaly_score, filename=res_filename(filename)) + ok_(file_object is not None) + world.output = file_object + + +def check_rows(prediction_rows, test_rows): + """Checking rows identity""" + row_num = 0 + for row in prediction_rows: + check_row = next(test_rows) + row_num += 1 + eq_(len(check_row), len (row)) + for index, cell in enumerate(row): + dot = cell.find(".") + if dot > 0: + try: + decs = min(len(cell), len(check_row[index])) - dot - 1 + cell = round(float(cell), decs) + check_row[index] = round(float(check_row[index]), decs) + except ValueError: + pass + eq_(check_row[index], cell, + "Got: %s/ Expected: %s in line %s" % (row, check_row, row_num)) + + +def i_check_predictions(step, check_file): + """Step: I download the created anomaly score file to """ + with UnicodeReader(world.output) as prediction_rows: + with UnicodeReader(res_filename(check_file)) as test_rows: + check_rows(prediction_rows, test_rows) + + +def i_check_batch_centroid(step, check_file): + """Step: the batch centroid file is like """ + i_check_predictions(step, check_file) + + +def i_check_batch_anomaly_score(step, check_file): + """Step: the batch anomaly score file is like """ + i_check_predictions(step, check_file) + + +def i_check_batch_centroid_is_ok(step): + """Step: I check the batch centroid is ok""" + ok_(world.api.ok(world.batch_centroid)) + + +def i_check_batch_anomaly_score_is_ok(step): + """Step: I check the batch anomaly score is ok""" + ok_(world.api.ok(world.batch_anomaly_score)) + + +def i_create_a_batch_prediction_with_cluster(step): + """Step: I create a batch centroid for the dataset""" + dataset = world.dataset.get('resource') + cluster = world.cluster.get('resource') + resource = world.api.create_batch_centroid(cluster, dataset) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.batch_centroid = resource['object'] + world.batch_centroids.append(resource['resource']) + + +def i_create_a_batch_prediction_with_anomaly(step): + """Step: I create a batch anomaly score""" + dataset = world.dataset.get('resource') + anomaly = world.anomaly.get('resource') + resource = world.api.create_batch_anomaly_score(anomaly, dataset) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.batch_anomaly_score = resource['object'] + world.batch_anomaly_scores.append(resource['resource']) + + +def i_create_a_linear_batch_prediction(step): + """Step: I create a linear batch prediction""" + dataset = world.dataset.get('resource') + linear_regression = world.linear_regression.get('resource') + resource = world.api.create_batch_prediction(linear_regression, dataset) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.batch_prediction = resource['object'] + world.batch_predictions.append(resource['resource']) + + +def i_create_a_source_from_batch_prediction(step): + """Step: I create a source from the batch prediction""" + batch_prediction = world.batch_prediction.get('resource') + resource = world.api.source_from_batch_prediction(batch_prediction) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.source = resource['object'] + world.sources.append(resource['resource']) + + +def i_create_a_batch_prediction_logistic_model(step): + """Step: I create a batch prediction for the dataset with the logistic + regression + """ + dataset = world.dataset.get('resource') + logistic = world.logistic_regression.get('resource') + resource = world.api.create_batch_prediction(logistic, dataset) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.batch_prediction = resource['object'] + world.batch_predictions.append(resource['resource']) + + +def i_create_a_batch_prediction_fusion(step): + """Step: I create a batch prediction for the dataset with the fusion""" + dataset = world.dataset.get('resource') + fusion = world.fusion.get('resource') + resource = world.api.create_batch_prediction(fusion, dataset) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.batch_prediction = resource['object'] + world.batch_predictions.append(resource['resource']) diff --git a/bigml/tests/create_batch_projection_steps.py b/bigml/tests/create_batch_projection_steps.py new file mode 100644 index 00000000..d18debf7 --- /dev/null +++ b/bigml/tests/create_batch_projection_steps.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2018-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +from bigml.api import HTTP_CREATED +from bigml.api import FINISHED, FAULTY +from bigml.io import UnicodeReader + +from .read_resource_steps import wait_until_status_code_is +from .world import world, res_filename, eq_, ok_ + + +def i_create_a_batch_projection(step): + """Step: I create a batch projection for the dataset with the PCA""" + dataset = world.dataset.get('resource') + pca = world.pca.get('resource') + resource = world.api.create_batch_projection(pca, dataset) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.batch_projection = resource['object'] + world.batch_projections.append(resource['resource']) + + +def wait_until_batch_projection_status_code_is(step, code1, code2, secs): + """Step: I wait until the batch projection status code is either + or less than + """ + world.batch_projection = wait_until_status_code_is( + code1, code2, secs, world.batch_projection) + + +def the_batch_projection_is_finished_in_less_than(step, secs): + """Step: I wait until the batch projection is ready less than """ + wait_until_batch_projection_status_code_is(step, FINISHED, FAULTY, secs) + + +def i_download_projections_file(step, filename): + """Step: I download the created projections file to """ + file_object = world.api.download_batch_projection( + world.batch_projection, filename=res_filename(filename)) + ok_(file_object is not None) + world.output = file_object + + +def i_check_projections(step, check_file): + """Step: the batch projection file is like """ + with UnicodeReader(world.output) as projection_rows: + with UnicodeReader(res_filename(check_file)) as test_rows: + check_csv_rows(projection_rows, test_rows) + + +def check_csv_rows(projections, expected): + """Checking expected projections""" + for projection in projections: + eq_(projection, next(expected)) diff --git a/bigml/tests/create_cluster_steps.py b/bigml/tests/create_cluster_steps.py new file mode 100644 index 00000000..f6c9e002 --- /dev/null +++ b/bigml/tests/create_cluster_steps.py @@ -0,0 +1,172 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2012-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import json +import os + +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY +from bigml.api import get_status +from bigml.cluster import Cluster + +from .read_resource_steps import wait_until_status_code_is +from .world import world, res_filename, eq_ + + +def i_create_a_cluster(step, shared=None): + """Step: I create a cluster""" + if shared is None or world.shared.get("cluster", {}).get(shared) is None: + dataset = world.dataset.get('resource') + resource = world.api.create_cluster( + dataset, {'seed': 'BigML', + 'cluster_seed': 'BigML', + 'k': 8}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.cluster = resource['object'] + world.clusters.append(resource['resource']) + + +def i_create_a_cluster_from_dataset_list(step): + """Step: I create a cluster from a dataset list""" + resource = world.api.create_cluster(step.bigml["dataset_ids"]) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.cluster = resource['object'] + world.clusters.append(resource['resource']) + + +def i_create_a_cluster_with_options(step, options): + """Step: I create a cluster with options """ + dataset = world.dataset.get('resource') + options = json.loads(options) + options.update({'seed': 'BigML', + 'cluster_seed': 'BigML', + 'k': 8}) + resource = world.api.create_cluster( + dataset, options) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.cluster = resource['object'] + world.clusters.append(resource['resource']) + + +def wait_until_cluster_status_code_is(step, code1, code2, secs): + """Step: I wait until the cluster status code is either or + less than """ + world.cluster = wait_until_status_code_is( + code1, code2, secs, world.cluster) + + +def the_cluster_is_finished_in_less_than(step, secs, shared=None): + """Step: I wait until the cluster is ready less than """ + if shared is None or world.shared.get("cluster", {}).get(shared) is None: + wait_until_cluster_status_code_is(step, FINISHED, FAULTY, secs) + if shared is not None: + if "cluster" not in world.shared: + world.shared["cluster"] = {} + world.shared["cluster"][shared] = world.cluster + else: + world.cluster = world.shared["cluster"][shared] + print("Reusing %s" % world.cluster["resource"]) + + +def make_the_cluster_shared(step): + """Step: I make the cluster shared""" + resource = world.api.update_cluster(world.cluster['resource'], + {'shared': True}) + world.status = resource['code'] + eq_(world.status, HTTP_ACCEPTED) + world.location = resource['location'] + world.cluster = resource['object'] + + +def get_sharing_info(step): + """Step: I get the cluster sharing info""" + world.shared_hash = world.cluster['shared_hash'] + world.sharing_key = world.cluster['sharing_key'] + + +def cluster_from_shared_url(step): + """Step: I check the cluster status using the model's shared url""" + world.cluster = world.api.get_cluster("shared/cluster/%s" % world.shared_hash) + eq_(get_status(world.cluster)['code'], FINISHED) + + +def cluster_from_shared_key(step): + """Step: I check the cluster status using the model's shared key""" + username = os.environ.get("BIGML_USERNAME") + world.cluster = world.api.get_cluster(world.cluster['resource'], + shared_username=username, shared_api_key=world.sharing_key) + eq_(get_status(world.cluster)['code'], FINISHED) + + +def closest_in_cluster(step, reference, closest): + """Step: the data point in the cluster closest to is """ + local_cluster = step.bigml["local_cluster"] + reference = json.loads(reference) + closest = json.loads(closest) + result = local_cluster.closest_in_cluster( \ + reference, number_of_points=1)["closest"][0] + result = json.loads(json.dumps(result)) + eq_(closest, result) + + +def closest_centroid_in_cluster(step, reference, closest_id): + """Step: the centroid in the cluster closest to is + + """ + local_cluster = step.bigml["local_cluster"] + reference = json.loads(reference) + result = local_cluster.sorted_centroids( \ + reference) + result = result["centroids"][0]["centroid_id"] + eq_(closest_id, result) + +def i_export_cluster(step, filename): + """Step: I export the cluster""" + world.api.export(world.cluster.get('resource'), + filename=res_filename(filename)) + + +def i_create_local_cluster_from_file(step, export_file): + """Step: I create a local cluster from file """ + step.bigml["local_cluster"] = Cluster(res_filename(export_file)) + + +def check_cluster_id_local_id(step): + """Step: the cluster ID and the local cluster ID match""" + eq_(step.bigml["local_cluster"].resource_id, world.cluster["resource"]) + + +def clone_cluster(step, cluster): + """Step: I clone cluster""" + resource = world.api.clone_cluster(cluster, + {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.cluster = resource['object'] + # save reference + world.clusters.append(resource['resource']) + + +def the_cloned_cluster_is(step, cluster): + """Checking the cluster is a clone""" + eq_(world.cluster["origin"], cluster) diff --git a/bigml/tests/create_configuration_steps.py b/bigml/tests/create_configuration_steps.py new file mode 100644 index 00000000..5116986d --- /dev/null +++ b/bigml/tests/create_configuration_steps.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY + +from .world import world, eq_ +from .read_resource_steps import wait_until_status_code_is + + +def i_create_configuration(step, configurations): + """Step: I create a configuration""" + resource = world.api.create_configuration( + configurations, {"name": "configuration"}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.configuration = resource['object'] + world.configurations.append(resource['resource']) + + +def i_update_configuration(step, changes): + """Step: I update a configuration""" + resource = world.api.update_configuration( + world.configuration["resource"], changes) + world.status = resource['code'] + eq_(world.status, HTTP_ACCEPTED) + world.location = resource['location'] + world.configuration = resource['object'] + + +def wait_until_configuration_status_code_is(step, code1, code2, secs): + """Step: I wait until the configuration status code is either or + less than + """ + world.configuration = wait_until_status_code_is( + code1, code2, secs, world.configuration) + + +def the_configuration_is_finished_in_less_than(step, secs): + """Step: I wait until the configuration is ready less than """ + wait_until_configuration_status_code_is(step, FINISHED, FAULTY, secs) + + +def i_check_configuration_name(step, name): + """Step: the configuration name is """ + eq_(world.configuration["name"], name["name"]) + + +def i_check_configuration_conf(step, confs): + """Step: the configuration contents are """ + eq_(world.configuration["configurations"], confs) diff --git a/bigml/tests/create_correlation_steps.py b/bigml/tests/create_correlation_steps.py new file mode 100644 index 00000000..c5421c6b --- /dev/null +++ b/bigml/tests/create_correlation_steps.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY + +from .read_resource_steps import wait_until_status_code_is +from .world import world, eq_ + +def i_check_correlation_name(step, name): + """Step: the correlation name is """ + correlation_name = world.correlation['name'] + eq_(name, correlation_name) + +def i_create_a_correlation_from_dataset(step): + """Step: I create a correlation from a dataset""" + dataset = world.dataset.get('resource') + resource = world.api.create_correlation(dataset, {'name': 'new correlation'}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.correlation = resource['object'] + world.correlations.append(resource['resource']) + + +def i_update_correlation_name(step, name): + """Step: I update the correlation name to """ + resource = world.api.update_correlation(world.correlation['resource'], + {'name': name}) + world.status = resource['code'] + eq_(world.status, HTTP_ACCEPTED) + world.location = resource['location'] + world.correlation = resource['object'] + + +def wait_until_correlation_status_code_is(step, code1, code2, secs): + """Step: I wait until the correlation status code is either + or less than + """ + world.correlation = wait_until_status_code_is( + code1, code2, secs, world.correlation) + + +def the_correlation_is_finished_in_less_than(step, secs): + """Step: I wait until the correlation is ready less than """ + wait_until_correlation_status_code_is(step, FINISHED, FAULTY, secs) diff --git a/bigml/tests/create_dataset_steps.py b/bigml/tests/create_dataset_steps.py new file mode 100644 index 00000000..b341ba51 --- /dev/null +++ b/bigml/tests/create_dataset_steps.py @@ -0,0 +1,237 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2012-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import json + +from bigml.api import HTTP_CREATED, HTTP_OK, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY +from bigml.api import get_status + +from .read_resource_steps import wait_until_status_code_is +from .world import world, res_filename, eq_ + + +def i_create_a_dataset(step, shared=None): + """Step: I create a dataset""" + if shared is None or world.shared.get("dataset", {}).get(shared) is None: + resource = world.api.create_dataset(world.source['resource']) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.dataset = resource['object'] + world.datasets.append(resource['resource']) + + +def i_export_a_dataset(step, local_file): + """Step: I download the dataset file to """ + world.api.download_dataset(world.dataset['resource'], + filename=res_filename(local_file)) + + +def files_equal(step, local_file, data): + """Step: file is like file """ + with open(res_filename(local_file)) as handler: + contents_local_file = handler.read() + with open(res_filename(data)) as handler: + contents_data = handler.read() + eq_(contents_local_file, contents_data) + + +def i_create_a_dataset_with(step, data="{}"): + """Step: I create a dataset with """ + resource = world.api.create_dataset(world.source['resource'], + json.loads(data)) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.dataset = resource['object'] + world.datasets.append(resource['resource']) + + +def wait_until_dataset_status_code_is(step, code1, code2, secs): + """Step: I wait until the dataset status code is either or + less than + """ + world.dataset = wait_until_status_code_is( + code1, code2, secs, world.dataset) + + +def the_dataset_is_finished_in_less_than(step, secs, shared=None): + """Step: I wait until the dataset is ready less than """ + if shared is None or world.shared.get("dataset", {}).get(shared) is None: + wait_until_dataset_status_code_is(step, FINISHED, FAULTY, secs) + if shared is not None: + if "dataset" not in world.shared: + world.shared["dataset"] = {} + world.shared["dataset"][shared]= world.dataset + else: + world.dataset = world.shared["dataset"][shared] + print("Reusing %s" % world.dataset["resource"]) + + +def make_the_dataset_public(step): + """Step: I make the dataset public""" + resource = world.api.update_dataset(world.dataset['resource'], + {'private': False}) + world.status = resource['code'] + eq_(world.status, HTTP_ACCEPTED) + world.location = resource['location'] + world.dataset = resource['object'] + + +def build_local_dataset_from_public_url(step): + """Step: I get the dataset status using the dataset's public url""" + world.dataset = world.api.get_dataset("public/%s" % + world.dataset['resource']) + +def dataset_status_finished(step): + """Step: the dataset's status is FINISHED""" + eq_(get_status(world.dataset)['code'], FINISHED) + + +def i_create_a_split_dataset(step, rate): + """Step: I create a dataset extracting a sample""" + world.origin_dataset = world.dataset + resource = world.api.create_dataset(world.dataset['resource'], + {'sample_rate': float(rate)}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.dataset = resource['object'] + world.datasets.append(resource['resource']) + + +def i_create_a_multidataset(step, ranges): + """Step: I create a multidataset with ranges """ + ranges = json.loads(ranges) + datasets = world.datasets[-len(ranges):] + world.origin_dataset = world.dataset + resource = world.api.create_dataset( \ + datasets, + {'sample_rates': dict(list(zip(datasets, ranges)))}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.dataset = resource['object'] + world.datasets.append(resource['resource']) + + +def i_create_a_multidataset_mixed_format(step, ranges): + """Step: I create a multi-dataset with same datasets and the first sample + rate + """ + ranges = json.loads(ranges) + dataset = world.dataset['resource'] + origins = [] + for value in ranges: + if value == 1: + origins.append(dataset) + else: + origins.append({"id": dataset, + "sample_rate": value}) + world.origin_dataset = world.dataset + resource = world.api.create_dataset( \ + origins) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.dataset = resource['object'] + world.datasets.append(resource['resource']) + + +def i_compare_datasets_instances(step): + """Step: I compare the datasets' instances""" + world.datasets_instances = (world.dataset['rows'], + world.origin_dataset['rows']) + + +def proportion_datasets_instances(step, rate): + """Step: the proportion of instances between datasets is """ + eq_(int(world.datasets_instances[1] * float(rate)), + world.datasets_instances[0]) + + +def i_create_a_dataset_from_cluster(step, centroid_id): + """Step: I create a dataset associated to centroid """ + resource = world.api.create_dataset( + world.cluster['resource'], + args={'centroid': centroid_id}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.dataset = resource['object'] + world.datasets.append(resource['resource']) + + +def i_create_a_dataset_from_cluster_centroid(step): + """Step: I create a dataset from the cluster and the centroid""" + i_create_a_dataset_from_cluster(step, world.centroid['centroid_id']) + + +def is_associated_to_centroid_id(step, centroid_id): + """Step: the dataset is associated to the centroid + of the cluster + """ + cluster = world.api.get_cluster(world.cluster['resource']) + world.status = cluster['code'] + eq_(world.status, HTTP_OK) + eq_("dataset/%s" % (cluster['object']['cluster_datasets'][centroid_id]), + world.dataset['resource']) + + +def i_check_dataset_from_cluster_centroid(step): + """Step: I check that the dataset is created for the cluster and the + centroid + """ + is_associated_to_centroid_id(step, world.centroid['centroid_id']) + + +def i_update_dataset_with(step, data="{}"): + """Step: I update the dataset with params """ + resource = world.api.update_dataset(world.dataset.get('resource'), + json.loads(data)) + world.status = resource['code'] + eq_(world.status, HTTP_ACCEPTED) + + +def clone_dataset(step, dataset): + """Step: I clone dataset""" + resource = world.api.clone_dataset(dataset, {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.dataset = resource['object'] + # save reference + world.datasets.append(resource['resource']) + + +def the_cloned_dataset_is(step, dataset): + """Checking the dataset is a clone""" + eq_(world.dataset["origin"], dataset) + + +def check_annotations(step, annotations_field, annotations_num): + """Checking the dataset contains a number of annotations""" + annotations_num = int(annotations_num) + field = world.dataset["fields"][annotations_field] + if field["optype"] == "regions": + count = field["summary"]["regions"]["sum"] + else: + count = 0 + for _, num in field["summary"]["categories"]: + count += num + eq_(count, annotations_num) diff --git a/bigml/tests/create_ensemble_steps.py b/bigml/tests/create_ensemble_steps.py new file mode 100644 index 00000000..7113dfde --- /dev/null +++ b/bigml/tests/create_ensemble_steps.py @@ -0,0 +1,204 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member,broad-except +# +# Copyright 2012-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import json +import os + +from bigml.api import HTTP_CREATED +from bigml.api import FINISHED, FAULTY +from bigml.ensemble import Ensemble +from bigml.ensemblepredictor import EnsemblePredictor +from bigml.model import Model +from bigml.supervised import SupervisedModel +from bigml.local_model import LocalModel + +from .read_resource_steps import wait_until_status_code_is +from .world import world, res_filename, eq_ + +NO_MISSING_SPLITS = {'missing_splits': False} +ENSEMBLE_SAMPLE = {'seed': 'BigML', + 'ensemble_sample': {"rate": 0.7, "seed": 'BigML'}} + + +def i_create_an_ensemble(step, number_of_models=2, shared=None): + """Step: I create an ensemble of models""" + if shared is None or world.shared.get("ensemble", {}).get(shared) is None: + dataset = world.dataset.get('resource') + try: + number_of_models = int(number_of_models) + # tlp is no longer used + args = {'number_of_models': number_of_models} + except Exception: + args = {} + args.update(NO_MISSING_SPLITS) + args.update(ENSEMBLE_SAMPLE) + resource = world.api.create_ensemble(dataset, args=args) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.ensemble = resource['object'] + world.ensemble_id = resource['resource'] + world.ensembles.append(resource['resource']) + + +def wait_until_ensemble_status_code_is(step, code1, code2, secs): + """Step: I wait until the ensemble status code is either or + less than + """ + world.ensemble = wait_until_status_code_is( + code1, code2, secs, world.ensemble) + + +def the_ensemble_is_finished_in_less_than(step, secs, shared=None): + """Step: I wait until the ensemble is ready less than """ + if shared is None or world.shared.get("ensemble", {}).get(shared) is None: + wait_until_ensemble_status_code_is(step, FINISHED, FAULTY, secs) + if shared is not None: + if "ensemble" not in world.shared: + world.shared["ensemble"] = {} + world.shared["ensemble"][shared] = world.ensemble + else: + world.ensemble = world.shared["ensemble"][shared] + world.ensemble_id = world.ensemble["resource"] + print("Reusing %s" % world.ensemble["resource"]) + + +def create_local_ensemble(step, path=None): + """Step: I create a local Ensemble""" + if path is None: + step.bigml["local_ensemble"] = Ensemble(world.ensemble_id, world.api) + step.bigml["local_model"] = Model( + step.bigml["local_ensemble"].model_ids[0], world.api) + else: + step.bigml["local_ensemble"] = Ensemble(res_filename(path)) + step.bigml["local_model"] = step.bigml[ + "local_ensemble"].multi_model.models[0] + + +def create_local_supervised_ensemble(step): + """Step: I create a local Ensemble""" + step.bigml["local_ensemble"] = SupervisedModel(world.ensemble_id, world.api) + step.bigml["local_model"] = Model(step.bigml[ + "local_ensemble"].model_ids[0], world.api) + + +def create_local_bigml_ensemble(step): + """Step: I create a local Ensemble""" + step.bigml["local_ensemble"] = LocalModel(world.ensemble_id, world.api) + step.bigml["local_model"] = Model(step.bigml[ + "local_ensemble"].model_ids[0], world.api) + +def create_local_ensemble_predictor(step, directory): + """Step: I create a local EnsemblePredictor from """ + directory_path = res_filename(directory) + with open(os.path.join(directory_path, "ensemble.json")) as file_handler: + ensemble = json.load(file_handler) + step.bigml["local_ensemble"] = EnsemblePredictor(ensemble, directory) + + +def load_full_ensemble(step, directory): + """Step: Given I load the full ensemble information from """ + model_list = [] + directory_path = res_filename(directory) + with open(os.path.join(directory_path, "ensemble.json")) as file_handler: + ensemble = json.load(file_handler) + model_list.append(ensemble) + for model_id in ensemble["object"]["models"]: + with open(os.path.join(directory_path, model_id.replace("/", "_"))) \ + as file_handler: + model = json.load(file_handler) + model_list.append(model) + return model_list + + +def create_local_ensemble_with_list(step, number_of_models): + """Step: I create a local Ensemble with the last + models + """ + step.bigml["local_ensemble"] = Ensemble(world.models[-int(number_of_models):], + world.api) + + +def create_local_ensemble_from_list(step, model_list): + """Step: I create a local ensemble from the ensemble + models list + """ + step.bigml["local_ensemble"] = Ensemble(model_list) + + +def create_local_ensemble_with_list_of_local_models(step, number_of_models): + """Step: I create a local Ensemble with the last + local models""" + local_models = [Model(model) for model in + world.models[-int(number_of_models):]] + step.bigml["local_ensemble"] = Ensemble(local_models, world.api) + + +def field_importance_print(step, field_importance): + """Step: the field importance text is """ + field_importance_data = step.bigml["local_ensemble"].field_importance_data()[0] + eq_(field_importance_data, json.loads(field_importance)) + + +def i_create_an_ensemble_with_params(step, params): + """Step: I create an ensemble with """ + dataset = world.dataset.get('resource') + try: + args = json.loads(params) + except Exception: + args = {} + args.update(ENSEMBLE_SAMPLE) + resource = world.api.create_ensemble(dataset, args=args) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.ensemble = resource['object'] + world.ensemble_id = resource['resource'] + world.ensembles.append(resource['resource']) + + +def i_export_ensemble(step, filename): + """Step: I export the ensemble""" + world.api.export(world.ensemble.get('resource'), + filename=res_filename(filename)) + + +def i_create_local_ensemble_from_file(step, export_file): + """Step: I create a local ensemble from file """ + step.bigml["local_ensemble"] = Ensemble(res_filename(export_file)) + + +def check_ensemble_id_local_id(step): + """Step: the ensemble ID and the local ensemble ID match""" + eq_(step.bigml["local_ensemble"].resource_id, world.ensemble["resource"]) + + +def clone_ensemble(step, ensemble): + """Step: I clone ensemble""" + resource = world.api.clone_ensemble(ensemble, + {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.ensemble = resource['object'] + # save reference + world.ensembles.append(resource['resource']) + + +def the_cloned_ensemble_is(step, ensemble): + """Checking the ensemble is a clone""" + eq_(world.ensemble["origin"], ensemble) diff --git a/bigml/tests/create_evaluation_steps.py b/bigml/tests/create_evaluation_steps.py new file mode 100644 index 00000000..c7412a38 --- /dev/null +++ b/bigml/tests/create_evaluation_steps.py @@ -0,0 +1,125 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2012, 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import json + +from bigml.api import HTTP_CREATED +from bigml.api import FINISHED, FAULTY +from bigml.evaluation import Evaluation + +from .read_resource_steps import wait_until_status_code_is +from .world import world, eq_, ok_, res_filename, approx_ + +def i_create_an_evaluation(step, shared=None): + """Step: I create an evaluation for the model with the dataset""" + dataset = world.dataset.get('resource') + model = world.model.get('resource') + resource = world.api.create_evaluation(model, dataset) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.evaluation = resource['object'] + world.evaluations.append(resource['resource']) + + +def i_create_an_evaluation_ensemble(step, params=None): + """Step: I create an evaluation for the ensemble with the dataset""" + if params is None: + params = {} + dataset = world.dataset.get('resource') + ensemble = world.ensemble.get('resource') + resource = world.api.create_evaluation(ensemble, dataset, params) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.evaluation = resource['object'] + world.evaluations.append(resource['resource']) + + +def i_create_an_evaluation_logistic(step): + """Step: I create an evaluation for the logistic regression with + the dataset + """ + dataset = world.dataset.get('resource') + logistic = world.logistic_regression.get('resource') + resource = world.api.create_evaluation(logistic, dataset) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.evaluation = resource['object'] + world.evaluations.append(resource['resource']) + + +def i_create_an_evaluation_deepnet(step): + """Step: I create an evaluation for the deepnet with the dataset""" + dataset = world.dataset.get('resource') + deepnet = world.deepnet.get('resource') + resource = world.api.create_evaluation(deepnet, dataset) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.evaluation = resource['object'] + world.evaluations.append(resource['resource']) + + +def i_create_an_evaluation_fusion(step): + """Step: I create an evaluation for the fusion with the dataset""" + dataset = world.dataset.get('resource') + fusion = world.fusion.get('resource') + resource = world.api.create_evaluation(fusion, dataset) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.evaluation = resource['object'] + world.evaluations.append(resource['resource']) + + +def wait_until_evaluation_status_code_is(step, code1, code2, secs): + """Step: I wait until the evaluation status code is either or + less than """ + world.evaluation = wait_until_status_code_is( + code1, code2, secs, world.evaluation) + + +def the_evaluation_is_finished_in_less_than(step, secs): + """Step: I wait until the evaluation is ready less than """ + wait_until_evaluation_status_code_is(step, FINISHED, FAULTY, secs) + + +def the_measured_measure_is_value(step, measure, value): + """Step: the measured is """ + ev_ = world.evaluation['result']['model'][measure] + 0.0 + eq_(ev_, float(value), "The %s is: %s and %s is expected" % ( + measure, ev_, float(value))) + + +def the_measured_measure_is_greater_value(step, measure, value): + """Step: the measured is greater than """ + ok_(float(world.evaluation['result']['model'][measure]) > float(value)) + +def i_create_a_local_evaluation(step, filename): + """Step: I create an Evaluation from the JSON file""" + filename = res_filename(filename) + with open(filename) as handler: + evaluation = json.load(handler) + local_evaluation = Evaluation(evaluation) + step.bigml["local_evaluation"] = local_evaluation + +def the_local_metric_is_value(step, metric, value): + """Step: The metric in the local evaluation is """ + approx_(getattr(step.bigml["local_evaluation"], metric), value, + precision=4) diff --git a/bigml/tests/create_execution_steps.py b/bigml/tests/create_execution_steps.py new file mode 100644 index 00000000..6d4d69a6 --- /dev/null +++ b/bigml/tests/create_execution_steps.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY +from bigml.execution import Execution + +from .read_resource_steps import wait_until_status_code_is +from .world import world, eq_ + + +def the_execution_and_attributes(step, param, param_value, result): + """Step: the script id is correct, the value of is + and the result is + """ + eq_(world.script['resource'], world.execution['script']) + eq_(world.execution['execution']['results'][0], result) + res_param_value = world.execution[param] + eq_(res_param_value, param_value, + ("The execution %s is %s and the expected %s is %s" % + (param, param_value, param, param_value))) + + +def the_execution_ids_and_attributes(step, number_of_scripts, + param, param_value, result): + """Step: the script ids are correct, the value of is + and the result is + """ + scripts = world.scripts[-number_of_scripts:] + eq_(scripts, world.execution['scripts']) + eq_(world.execution['execution']['results'], result) + res_param_value = world.execution[param] + eq_(res_param_value, param_value, + ("The execution %s is %s and the expected %s is %s" % + (param, param_value, param, param_value))) + + +def i_create_an_execution(step): + """Step: I create a whizzml execution from an existing script""" + resource = world.api.create_execution(world.script['resource'], + {"project": world.project_id}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.execution = resource['object'] + world.executions.append(resource['resource']) + + +def i_create_an_execution_from_list(step, number_of_scripts=2): + """Step: I create a whizzml execution from the last two scripts""" + scripts = world.scripts[-number_of_scripts:] + resource = world.api.create_execution(scripts, + {"project": world.project_id}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.execution = resource['object'] + world.executions.append(resource['resource']) + + +def i_update_an_execution(step, param, param_value): + """Step: I update the execution with , """ + resource = world.api.update_execution(world.execution['resource'], + {param: param_value}) + world.status = resource['code'] + eq_(world.status, HTTP_ACCEPTED) + world.location = resource['location'] + world.execution = resource['object'] + + +def wait_until_execution_status_code_is(step, code1, code2, secs): + """Step: I wait until the execution status code is either or + less than """ + world.execution = wait_until_status_code_is( + code1, code2, secs, world.execution) + + +def the_execution_is_finished(step, secs): + """Steps: I wait until the script is ready less than """ + wait_until_execution_status_code_is(step, FINISHED, FAULTY, secs) + + +def create_local_execution(step): + """Step: I create a local execution""" + step.bigml["local_execution"] = Execution(world.execution) + + +def the_local_execution_result_is(step, result): + """Step: And the local execution result is """ + eq_(step.bigml["local_execution"].result, result) diff --git a/bigml/tests/create_external_steps.py b/bigml/tests/create_external_steps.py new file mode 100644 index 00000000..08bb6f22 --- /dev/null +++ b/bigml/tests/create_external_steps.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2020-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import json + +from bigml.api import HTTP_ACCEPTED +from bigml.api import FINISHED +from bigml.api import FAULTY + +from .read_resource_steps import wait_until_status_code_is +from .world import world, eq_, ok_ + + +def i_create_external_connector(step): + """Step: I create an external connector""" + resource = world.api.create_external_connector(None, \ + {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.external_connector = resource['object'] + # save reference + world.external_connectors.append(resource['resource']) + + +def wait_until_external_connector_status_code_is(step, code1, code2, secs): + """Step: I wait until the external connector status code is either + or less than + """ + world.external_connector = wait_until_status_code_is( + code1, code2, secs, world.external_connector) + + +def the_external_connector_is_finished(step, secs): + """Step: I wait until the external_connector is ready less than """ + wait_until_external_connector_status_code_is(step, FINISHED, FAULTY, secs) + + +def i_update_external_connector_with(step, data="{}"): + """Step: I update the external_connector with params """ + resource = world.api.update_external_connector( \ + world.external_connector.get('resource'), json.loads(data)) + world.status = resource['code'] + eq_(world.status, HTTP_ACCEPTED) + + +def external_connector_has_args(step, args="{}"): + """Step: the external connector exists and has args """ + args = json.loads(args) + for key, value in list(args.items()): + if key in world.external_connector: + eq_(world.external_connector[key], value, + "Expected key %s: %s. Found %s" % (key, value, world.external_connector[key])) + else: + ok_(False, "No key %s in external connector." % key) diff --git a/bigml/tests/create_forecast_steps.py b/bigml/tests/create_forecast_steps.py new file mode 100644 index 00000000..15a922b8 --- /dev/null +++ b/bigml/tests/create_forecast_steps.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import json + +from bigml.api import HTTP_CREATED + +from .world import world, eq_ + + +def i_create_a_forecast(step, data=None): + """Creating forecast """ + if data is None: + data = "{}" + time_series = world.time_series['resource'] + data = json.loads(data) + resource = world.api.create_forecast(time_series, data) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.forecast = resource['object'] + world.forecasts.append(resource['resource']) + + +def the_forecast_is(step, predictions): + """Checking forecast""" + predictions = json.loads(predictions) + attrs = ["point_forecast", "model"] + for field_id in predictions: + forecast = world.forecast['forecast']['result'][field_id] + prediction = predictions[field_id] + eq_(len(forecast), len(prediction), "forecast: %s" % forecast) + for index, item in enumerate(forecast): + for attr in attrs: + eq_(item[attr], prediction[index][attr]) diff --git a/bigml/tests/create_lda_steps.py b/bigml/tests/create_lda_steps.py new file mode 100644 index 00000000..cd06ac96 --- /dev/null +++ b/bigml/tests/create_lda_steps.py @@ -0,0 +1,185 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2012-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import json +import os + +from bigml.api import HTTP_CREATED +from bigml.api import HTTP_ACCEPTED +from bigml.api import FINISHED +from bigml.api import FAULTY +from bigml.api import get_status +from bigml.topicmodel import TopicModel + +from .world import world, res_filename, eq_ +from .read_resource_steps import wait_until_status_code_is + + +def i_create_a_topic_model(step): + """Step: I create a Topic Model""" + dataset = world.dataset.get('resource') + resource = world.api.create_topic_model( + dataset, {'seed': 'BigML', 'topicmodel_seed': 'BigML'}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.topic_model = resource['object'] + world.topic_models.append(resource['resource']) + + +def i_create_a_topic_model_from_dataset_list(step): + """Step: I create a topic model from a dataset list""" + resource = world.api.create_topic_model(step.bigml["dataset_ids"]) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.topic_model = resource['object'] + world.topic_models.append(resource['resource']) + + +def i_create_a_topic_model_with_options(step, options): + """Step: I create a topic model with options """ + dataset = world.dataset.get('resource') + options = json.loads(options) + options.update({'seed': 'BigML', + 'topicmodel_seed': 'BigML'}) + resource = world.api.create_topic_model( + dataset, options) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.topic_model = resource['object'] + world.topic_models.append(resource['resource']) + + +def i_update_topic_model_name(step, name): + """Step: I update the topic model name to """ + resource = world.api.update_topic_model(world.topic_model['resource'], + {'name': name}) + world.status = resource['code'] + eq_(world.status, HTTP_ACCEPTED) + world.location = resource['location'] + world.topic_model = resource['object'] + + +def wait_until_topic_model_status_code_is(step, code1, code2, secs): + """Step: I wait until the topic model status code is either + or less than + """ + world.topic_model = wait_until_status_code_is( + code1, code2, secs, world.topic_model) + + +def the_topic_model_is_finished_in_less_than(step, secs): + """Steps: I wait until the topic model is ready less than """ + wait_until_topic_model_status_code_is(step, FINISHED, FAULTY, secs) + + +def make_the_topic_model_shared(step): + """Step: I make the topic model shared """ + resource = world.api.update_topic_model(world.topic_model['resource'], + {'shared': True}) + world.status = resource['code'] + eq_(world.status, HTTP_ACCEPTED) + world.location = resource['location'] + world.topic_model = resource['object'] + + +def get_sharing_info(step): + """Step: I get the topic_model sharing info""" + world.shared_hash = world.topic_model['shared_hash'] + world.sharing_key = world.topic_model['sharing_key'] + + +def topic_model_from_shared_url(step): + """Step: I check the topic model status using the topic model\'s + shared url + """ + world.topic_model = world.api.get_topic_model("shared/topicmodel/%s" % + world.shared_hash) + eq_(get_status(world.topic_model)['code'], FINISHED) + + +def topic_model_from_shared_key(step): + """Step: I check the topic model status using the topic model\'s + shared key + """ + username = os.environ.get("BIGML_USERNAME") + world.topic_model = world.api.get_topic_model( \ + world.topic_model['resource'], + shared_username=username, shared_api_key=world.sharing_key) + eq_(get_status(world.topic_model)['code'], FINISHED) + + +def i_check_topic_model_name(step, name): + """Step: the topic model name is """ + topic_model_name = world.topic_model['name'] + eq_(name, topic_model_name) + + +def i_create_a_topic_distribution(step, data=None): + """Step: Create topic distribution """ + if data is None: + data = "{}" + topic_model = world.topic_model['resource'] + data = json.loads(data) + resource = world.api.create_topic_distribution(topic_model, data) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.topic_distribution = resource['object'] + world.topic_distributions.append(resource['resource']) + + +def i_create_a_local_topic_distribution(step, data=None): + """Step: I create a local topic distribution""" + step.bigml["local_topic_distribution"] = \ + step.bigml["local_topic_model"].distribution(json.loads(data)) + + +def i_export_topic_model(step, filename): + """Step: I export the topic model""" + world.api.export(world.topic_model.get('resource'), + filename=res_filename(filename)) + + +def i_create_local_topic_model_from_file(step, export_file): + """Step: I create a local topic model from file """ + step.bigml["local_topic_model"] = TopicModel(res_filename(export_file)) + + +def check_topic_model_id_local_id(step): + """Step: the topic model ID and the local topic model ID match""" + eq_(step.bigml["local_topic_model"].resource_id, + world.topic_model["resource"]) + + +def clone_topic_model(step, topic_model): + """Step: I clone topic model""" + resource = world.api.clone_topic_model(topic_model, + {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.topic_model = resource['object'] + # save reference + world.topic_models.append(resource['resource']) + + +def the_cloned_topic_model_is(step, topic_model): + """Check cloned topic model""" + eq_(world.topic_model["origin"], topic_model) diff --git a/bigml/tests/create_library_steps.py b/bigml/tests/create_library_steps.py new file mode 100644 index 00000000..dd8cb5d2 --- /dev/null +++ b/bigml/tests/create_library_steps.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY + +from .read_resource_steps import wait_until_status_code_is +from .world import world, eq_ + + +def the_library_code_and_attributes(step, source_code, param, param_value): + """Step: the library code is and the value of + is + """ + res_param_value = world.library[param] + eq_(res_param_value, param_value, + ("The library %s is %s and the expected %s is %s" % + (param, param_value, param, param_value))) + + +def i_create_a_library(step, source_code): + """Step: I create a whizzml library from a excerpt of code """ + resource = world.api.create_library(source_code, + {"project": world.project_id}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.library = resource['object'] + world.libraries.append(resource['resource']) + + +def i_update_a_library(step, param, param_value): + """Step: I update the library with , """ + resource = world.api.update_library(world.library['resource'], + {param: param_value}) + world.status = resource['code'] + eq_(world.status, HTTP_ACCEPTED) + world.location = resource['location'] + world.library = resource['object'] + + +def wait_until_library_status_code_is(step, code1, code2, secs): + """Step: I wait until the library status code is either or + less than + """ + world.library = wait_until_status_code_is( + code1, code2, secs, world.library) + + +def the_library_is_finished(step, secs): + """Step: I wait until the library is ready less than """ + wait_until_library_status_code_is(step, FINISHED, FAULTY, secs) diff --git a/bigml/tests/create_linear_steps.py b/bigml/tests/create_linear_steps.py new file mode 100644 index 00000000..88fae1b9 --- /dev/null +++ b/bigml/tests/create_linear_steps.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2019-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import json + +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY + +from .read_resource_steps import wait_until_status_code_is +from .world import world, eq_ + + +def i_check_linear_name(step, name): + """Step: the linear name is """ + linear_name = world.linear_regression['name'] + eq_(name, linear_name) + + +def i_create_a_linear_regression_from_dataset(step, shared=None): + """Step: I create a Linear Regression from a dataset""" + if shared is None or \ + world.shared.get("linear_regression", {}).get(shared) is None: + dataset = world.dataset.get('resource') + resource = world.api.create_linear_regression( + dataset, {'name': 'new linear regression'}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.linear_regression = resource['object'] + world.linear_regressions.append(resource['resource']) + + +def i_create_a_linear_regression_with_params(step, params): + """Step: I create a Linear Regression from a dataset""" + i_create_a_linear_regression_with_objective_and_params(step, None, params) + + +def i_create_a_linear_regression_with_objective_and_params( + step, objective=None, params=None): + """Step: I create a Linear Regression with objective and params """ + if params is not None: + params = json.loads(params) + else: + params = {} + if objective is not None: + params.update({"objective_field": objective}) + dataset = world.dataset.get('resource') + resource = world.api.create_linear_regression(dataset, params) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.linear_regression = resource['object'] + world.linear_regressions.append(resource['resource']) + + +def i_create_a_linear_regression(step, shared=None): + """Creating linear regression from dataset """ + i_create_a_linear_regression_from_dataset(step, shared=shared) + + +def i_update_linear_regression_name(step, name): + """Step: I update the linear regression name to """ + resource = world.api.update_linear_regression( \ + world.linear_regression['resource'], + {'name': name}) + world.status = resource['code'] + eq_(world.status, HTTP_ACCEPTED) + world.location = resource['location'] + world.linear_regression = resource['object'] + + +def wait_until_linear_regression_status_code_is(step, code1, code2, secs): + """Step: I wait until the linear regression status code is either + or less than + """ + world.linear_regression = wait_until_status_code_is( + code1, code2, secs, world.linear_regression) + + +def the_linear_regression_is_finished_in_less_than(step, secs, shared=None): + """#Step: I wait until the linear is ready less than """ + if shared is None or \ + world.shared.get("linear_regression", {}).get(shared) is None: + wait_until_linear_regression_status_code_is(step, FINISHED, FAULTY, secs) + if shared is not None: + if "linear_regression" not in world.shared: + world.shared["linear_regression"] = {} + world.shared["linear_regression"][shared] = world.linear_regression + else: + world.linear_regression = world.shared["linear_regression"][shared] + print("Reusing %s" % world.linear_regression["resource"]) + + +def clone_linear_regression(step, linear_regression): + """Step: I clone linear regression""" + resource = world.api.clone_linear_regression( + linear_regression, {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.linear_regression = resource['object'] + # save reference + world.linear_regressions.append(resource['resource']) + +def the_cloned_linear_regression_is(step, linear_regression): + """Checking linear regression is a clone""" + eq_(world.linear_regression["origin"], linear_regression) diff --git a/bigml/tests/create_model_steps.py b/bigml/tests/create_model_steps.py new file mode 100644 index 00000000..811daf30 --- /dev/null +++ b/bigml/tests/create_model_steps.py @@ -0,0 +1,698 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2012-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import json +import os + +from bigml.api import HTTP_OK +from bigml.api import HTTP_CREATED +from bigml.api import HTTP_ACCEPTED +from bigml.api import FINISHED +from bigml.api import FAULTY +from bigml.api import get_status +from bigml.api import BigML +from bigml.model import Model +from bigml.logistic import LogisticRegression +from bigml.linear import LinearRegression +from bigml.deepnet import Deepnet +from bigml.fusion import Fusion +from bigml.ensemble import Ensemble +from bigml.generators.model import get_leaves + + +from .read_resource_steps import wait_until_status_code_is +from .world import world, res_filename, eq_, ok_ + + +NO_MISSING_SPLITS = {'missing_splits': False} + + +def i_create_a_model(step, shared=None): + """Step: I create a model""" + if shared is None or world.shared.get("model", {}).get(shared) is None: + dataset = world.dataset.get('resource') + resource = world.api.create_model(dataset, args=NO_MISSING_SPLITS) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.model = resource['object'] + world.models.append(resource['resource']) + + +def i_export_model(step, pmml, filename): + """Step: I export the model to file """ + world.api.export(world.model["resource"], res_filename(filename), pmml) + + +def i_export_tags_model(step, filename, tag): + """Step: I export the last model""" + world.api.export_last(tag, + filename=res_filename(filename)) + + +def i_create_a_balanced_model(step): + """Step: I create a balanced model""" + dataset = world.dataset.get('resource') + args = {} + args.update(NO_MISSING_SPLITS) + args.update({"balance_objective": True}) + resource = world.api.create_model(dataset, args=args) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.model = resource['object'] + world.models.append(resource['resource']) + + +def i_create_a_model_from_dataset_list(step): + """Step: I create a model from a dataset list""" + resource = world.api.create_model(step.bigml["dataset_ids"], + args=NO_MISSING_SPLITS) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.model = resource['object'] + world.models.append(resource['resource']) + + +def wait_until_model_status_code_is(step, code1, code2, secs): + """Step: I wait until the model status code is either + or less than + """ + wait_until_status_code_is(code1, code2, secs, world.model) + + +def the_model_is_finished_in_less_than(step, secs, shared=None): + """Step: I wait until the model is ready less than """ + if shared is None or world.shared.get("model", {}).get(shared) is None: + wait_until_model_status_code_is(step, FINISHED, FAULTY, secs) + if shared is not None: + if "model" not in world.shared: + world.shared["model"] = {} + world.shared["model"][shared] = world.model + print("New %s" % world.model["resource"]) + else: + world.model = world.shared["model"][shared] + print("Reusing %s" % world.model["resource"]) + + +def i_create_a_model_with(step, data="{}"): + """Step: I create a model with """ + args = json.loads(data) + if not 'missing_splits' in args: + args.update(NO_MISSING_SPLITS) + resource = world.api.create_model(world.dataset.get('resource'), + args=args) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.model = resource['object'] + world.models.append(resource['resource']) + + +def i_create_a_model_with_missing_splits(step): + """Step: I create a model with missing splits""" + i_create_a_model_with(step, data='{"missing_splits": true}') + + +def i_create_a_weighted_model_with_missing_splits(step): + """Step: I create a model with missing splits""" + i_create_a_model_with(step, data='{"missing_splits": true, "balance_objective": true}') + + +def make_the_model_public(step): + """Step: I make the model public""" + resource = world.api.update_model(world.model['resource'], + {'private': False, 'white_box': True}) + world.status = resource['code'] + if world.status != HTTP_ACCEPTED: + print("unexpected status: %s" % world.status) + eq_(world.status, HTTP_ACCEPTED) + world.location = resource['location'] + world.model = resource['object'] + + +def model_from_public_url(step): + """Step: I check the model status using the model''s public url""" + world.model = world.api.get_model("public/%s" % world.model['resource']) + eq_(get_status(world.model)['code'], FINISHED) + + +def make_the_model_shared(step, cloneable=False): + """Step: I make the model shared""" + shared = {'shared': True} + if cloneable: + shared.update({"shared_clonable": True}) + resource = world.api.update_model(world.model['resource'], + shared) + world.api.ok(resource) + world.status = resource['code'] + eq_(world.status, HTTP_ACCEPTED) + world.location = resource['location'] + world.model = resource['object'] + + +def get_sharing_info(step): + """Step: I get the model sharing info""" + world.shared_hash = world.model['shared_hash'] + world.sharing_key = world.model['sharing_key'] + + +def model_from_shared_url(step): + """Step: I check the model status using the model's shared url""" + world.model = world.api.get_model("shared/model/%s" % world.shared_hash) + eq_(get_status(world.model)['code'], FINISHED) + + +def model_from_shared_key(step): + """Step: I check the model status using the model's shared key""" + username = os.environ.get("BIGML_USERNAME") + world.model = world.api.get_model(world.model['resource'], + shared_username=username, shared_api_key=world.sharing_key) + eq_(get_status(world.model)['code'], FINISHED) + + +def field_name_to_new_name(step, field_id, new_name): + """Step: field's name is changed to """ + eq_(step.bigml["local_model"].fields[field_id]['name'], new_name) + + +def i_create_a_model_from_cluster(step, centroid_id): + """Step: I create a model associated to centroid """ + resource = world.api.create_model( + world.cluster['resource'], + args={'centroid': centroid_id}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.model = resource['object'] + world.models.append(resource['resource']) + + +def is_associated_to_centroid_id(step, centroid_id): + """Step: the model is associated to the centroid of the + cluster + """ + cluster = world.api.get_cluster(world.cluster['resource']) + world.status = cluster['code'] + eq_(world.status, HTTP_OK) + eq_("model/%s" % (cluster['object']['cluster_models'][centroid_id]), + world.model['resource']) + + +def i_create_a_logistic_model(step, shared=None): + """Step: I create a logistic regression model""" + if shared is None or world.shared.get("logistic", {}).get(shared) is None: + dataset = world.dataset.get('resource') + resource = world.api.create_logistic_regression(dataset) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.logistic_regression = resource['object'] + world.logistic_regressions.append(resource['resource']) + + +def i_create_a_logistic_model_with_objective_and_parms(step, objective=None, + parms=None): + """Step: I create a logistic regression model with objective + and parms + """ + dataset = world.dataset.get('resource') + if parms is None: + parms = {} + else: + parms = json.loads(parms) + if objective is not None: + parms.update({"objective_field": objective}) + resource = world.api.create_logistic_regression( \ + dataset, parms) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.logistic_regression = resource['object'] + world.logistic_regressions.append(resource['resource']) + +def wait_until_logistic_model_status_code_is(step, code1, code2, secs): + """Step: I wait until the logistic regression model status code is either + or less than + """ + world.logistic_regression = wait_until_status_code_is( + code1, code2, secs, world.logistic_regression) + + +def the_logistic_model_is_finished_in_less_than(step, secs, shared=None): + """Step: I wait until the logistic regression model is ready less than + + """ + if shared is None or world.shared.get("logistic", {}).get(shared) is None: + wait_until_logistic_model_status_code_is(step, FINISHED, FAULTY, secs) + if shared is not None: + if "logistic" not in world.shared: + world.shared["logistic"] = {} + world.shared["logistic"][shared] = world.logistic_regression + else: + world.logistic_regression = world.shared["logistic"][shared] + print("Reusing %s" % world.logistic_regression["resource"]) + + +def i_create_a_deepnet(step, shared=None): + """Step: I create a deepnet model""" + if shared is None or world.shared.get("deepnet", {}).get(shared) is None: + dataset = world.dataset.get('resource') + resource = world.api.create_deepnet(dataset) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.deepnet = resource['object'] + world.deepnets.append(resource['resource']) + + +def i_create_a_quick_deepnet(step): + """Step: I create a quick deepnet""" + dataset = world.dataset.get('resource') + resource = world.api.create_deepnet(dataset, {"max_training_time": 100}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.deepnet = resource['object'] + world.deepnets.append(resource['resource']) + + +def i_create_a_no_suggest_deepnet(step, shared=None): + """Step: I create a non-suggested deepnet model""" + if shared is None or \ + world.shared.get("deepnet", {}).get(shared) is None: + dataset = world.dataset.get('resource') + resource = world.api.create_deepnet(dataset, {"suggest_structure": False, + "max_iterations": 100, + "deepnet_seed": "bigml"}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.deepnet = resource['object'] + world.deepnets.append(resource['resource']) + + +def i_create_a_deepnet_with_objective_and_params(step, objective=None, parms=None): + """Step: I create a deepnet model with objective and parms + + """ + dataset = world.dataset.get('resource') + if parms is None: + parms = {} + else: + parms = json.loads(parms) + if objective is not None: + parms.update({"objective_field": objective}) + resource = world.api.create_deepnet(dataset, parms) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.deepnet = resource['object'] + world.deepnets.append(resource['resource']) + + +def wait_until_deepnet_model_status_code_is(step, code1, code2, secs): + """Step: I wait until the deepnet model status code is either + or less than + """ + world.deepnet = wait_until_status_code_is(code1, code2, secs, world.deepnet) + + +def the_deepnet_is_finished_in_less_than(step, secs, shared=None): + """Step: wait until the deepnet model is ready less than """ + if shared is None or world.shared.get("deepnet", {}).get(shared) is None: + wait_until_deepnet_model_status_code_is(step, FINISHED, FAULTY, secs) + if shared is not None: + if "deepnete" not in world.shared: + world.shared["deepnet"] = {} + world.shared["deepnet"][shared] = world.deepnet + else: + world.deepnet = world.shared["deepnet"][shared] + print("Reusing %s" % world.deepnet["resource"]) + + +def i_check_model_stored(step, filename, pmml): + """Step: I check the model is stored in file in """ + with open(res_filename(filename)) as file_handler: + content = file_handler.read() + model_id = world.model["resource"][ \ + (world.model["resource"].index("/") + 1):] + ok_(content.index(model_id) > -1) + + +def i_read_model_file(step, filename): + """Step: I read model from file """ + with open(res_filename(filename)) as file_handler: + content = file_handler.read() + world.model = json.loads(content) + + +def i_create_an_optiml(step): + """Step: I create an optiml""" + dataset = world.dataset.get('resource') + resource = world.api.create_optiml(dataset) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.optiml = resource['object'] + world.optimls.append(resource['resource']) + + +def i_create_an_optiml_with_objective_and_params(step, objective=None, parms=None): + """Step: I create an optiml model with objective and parms + + """ + dataset = world.dataset.get('resource') + if parms is None: + parms = {} + else: + parms = json.loads(parms) + if objective is not None: + parms.update({"objective_field": objective}) + resource = world.api.create_optiml(dataset, parms) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.optiml = resource['object'] + world.optimls.append(resource['resource']) + + +def wait_until_optiml_status_code_is(step, code1, code2, secs): + """Step: I wait until the optiml status code is either or + less than + """ + world.optiml = wait_until_status_code_is(code1, code2, secs, world.optiml) + + +def the_optiml_is_finished_in_less_than(step, secs): + """Step: I wait until the optiml is ready less than """ + wait_until_optiml_status_code_is(step, FINISHED, FAULTY, secs) + + +def i_update_optiml_name(step, name): + """Step: I update the optiml name to """ + resource = world.api.update_optiml(world.optiml['resource'], + {'name': name}) + world.status = resource['code'] + eq_(world.status, HTTP_ACCEPTED) + world.location = resource['location'] + world.optiml = resource['object'] + + +def i_check_optiml_name(step, name): + """Step: the optiml name is """ + optiml_name = world.optiml['name'] + eq_(name, optiml_name) + + +def i_create_a_fusion(step): + """Step: I create a fusion""" + resource = world.api.create_fusion(world.list_of_models, + {"project": world.project_id}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.fusion = resource['object'] + world.fusions.append(resource['resource']) + + +def i_create_a_fusion_with_weights(step, weights=None): + """Step: I create a fusion with weights""" + if weights is None: + weights = list(range(1, len(world.list_of_models))) + else: + weights = json.loads(weights) + models = [] + try: + for index, model in enumerate(world.list_of_models): + models.append({"id": model["resource"], "weight": weights[index]}) + except IndexError: + pass + resource = world.api.create_fusion(models, + {"project": world.project_id}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.fusion = resource['object'] + world.fusions.append(resource['resource']) + + +def i_create_a_fusion_with_objective_and_params(step, objective, parms=None): + """Step: I create a fusion with objective and parms """ + models = world.list_models + if parms is None: + parms = {} + else: + parms = json.loads(parms) + parms.update({"objective_field": objective, "project": world.project_id}) + resource = world.api.create_fusion(models, parms) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.fusion = resource['object'] + world.fusions.append(resource['resource']) + + +def wait_until_fusion_status_code_is(step, code1, code2, secs): + """Step: I wait until the fusion status code is either or + less than + """ + world.fusion = wait_until_status_code_is(code1, code2, secs, world.fusion) + + +def the_fusion_is_finished_in_less_than(step, secs): + """Step: I wait until the fusion is ready less than """ + wait_until_fusion_status_code_is(step, FINISHED, FAULTY, secs) + + +def i_update_fusion_name(step, name): + """Step: I update the fusion name to """ + resource = world.api.update_fusion(world.fusion['resource'], + {'name': name}) + world.status = resource['code'] + eq_(world.status, HTTP_ACCEPTED) + world.location = resource['location'] + world.fusion = resource['object'] + + +def i_check_fusion_name(step, name): + """Step: the fusion name is """ + fusion_name = world.fusion['name'] + eq_(name, fusion_name) + + +def i_create_local_model_from_file(step, export_file): + """Step: I create a local model from file """ + step.bigml["local_model"] = Model( \ + res_filename(export_file), + api=BigML("wrong-user", "wrong-api-key")) + + +def check_model_id_local_id(step): + """Step: the model ID and the local model ID match""" + eq_(step.bigml["local_model"].resource_id, world.model["resource"]) + + +def i_export_ensemble(step, filename): + """Step: I export the ensemble""" + world.api.export(world.ensemble.get('resource'), + filename=res_filename(filename)) + + +def i_create_local_ensemble_from_file(step, export_file): + """Step: I create a local ensemble from file """ + step.bigml["local_ensemble"] = Ensemble( \ + res_filename(export_file), + api=BigML("wrong-user", "wrong-api-key")) + + +def check_ensemble_id_local_id(step): + """Step: the ensemble ID and the local ensemble ID match""" + eq_(step.bigml["local_ensemble"].resource_id, world.ensemble["resource"]) + + +def i_export_logistic_regression(step, filename): + """Step: I export the logistic regression""" + world.api.export(world.logistic_regression.get('resource'), + filename=res_filename(filename)) + + +def i_create_local_logistic_regression_from_file(step, export_file): + """Step: I create a local logistic regressin from file """ + step.bigml["local_logistic"] = LogisticRegression( \ + res_filename(export_file), + api=BigML("wrong-user", "wrong-api-key")) + + +def check_logistic_regression_id_local_id(step): + """Step: the logistic ID and the local logistic ID match""" + eq_(step.bigml["local_logistic"].resource_id, world.logistic_regression["resource"]) + + +def i_export_deepnet(step, filename): + """Step: I export the deepnet""" + world.api.export(world.deepnet.get('resource'), + filename=res_filename(filename)) + + +def i_create_local_deepnet_from_file(step, export_file): + """Step: I create a local deepnet from file """ + step.bigml["local_deepnet"] = Deepnet(res_filename(export_file), + api=BigML("wrong-user", "wrong-api-key")) + + +def i_export_fusion(step, filename): + """Step: I export the fusion""" + world.api.export(world.fusion.get('resource'), + filename=res_filename(filename)) + + +def i_create_local_fusion_from_file(step, export_file): + """Step: I create a local fusion from file """ + step.bigml["local_fusion"] = Fusion( \ + res_filename(export_file), api=BigML("wrong-user", "wrong-api-key")) + + +def check_fusion_id_local_id(step): + """Step: the fusion ID and the local fusion ID match""" + eq_(step.bigml["local_fusion"].resource_id, world.fusion["resource"]) + + +def i_export_linear_regression(step, filename): + """Step: I export the linear regression""" + world.api.export(world.linear_regression.get('resource'), + filename=res_filename(filename)) + + +def i_create_local_linear_regression_from_file(step, export_file): + """Step: I create a local linear regression from file """ + step.bigml["local_linear_regression"] = LinearRegression( \ + res_filename(export_file), api=BigML("wrong-user", "wrong-api-key")) + + +def check_linear_regression_id_local_id(step): + """Step: the linear regression ID and the local linear regression ID + match + """ + eq_(step.bigml["local_linear_regression"].resource_id, + world.linear_regression["resource"]) + + +def local_logistic_prediction_is(step, input_data, prediction): + """Checking local logistic prediction""" + eq_(step.bigml["local_logistic"].predict(input_data), prediction) + + +def local_linear_prediction_is(step, input_data, prediction): + """Checking local linear prediction""" + eq_(step.bigml["local_linear_regression"].predict(input_data), + prediction, precision=5) + +def local_deepnet_prediction_is(step, input_data, prediction): + """Checking local deepnet prediction""" + eq_(step.bigml["local_deepnet"].predict(input_data), prediction, precision=4) + + +def local_ensemble_prediction_is(step, input_data, prediction): + """Checking local ensemble prediction""" + eq_(step.bigml["local_ensemble"].predict(input_data), prediction, precision=5) + + +def local_model_prediction_is(step, input_data, prediction): + """Checking local model prediction""" + eq_(step.bigml["local_model"].predict(input_data), prediction, precision=5) + + +def local_cluster_prediction_is(step, input_data, prediction): + """Checking local cluster prediction""" + eq_(step.bigml["local_cluster"].centroid(input_data), prediction) + + +def local_anomaly_prediction_is(step, input_data, prediction): + """Checking local anomaly prediction""" + eq_(step.bigml["local_anomaly"].anomaly_score(input_data), prediction) + + +def local_association_prediction_is(step, input_data, prediction): + """Checking local association prediction""" + eq_(step.bigml["local_association"].association_set(input_data), prediction) + + +def local_time_series_prediction_is(step, input_data, prediction): + """Checking local time series prediction""" + eq_(step.bigml["local_time_series"].centroid(input_data), prediction) + + +def clone_model(step, model): + """Step: I clone model + """ + resource = world.api.clone_model(model, {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.model = resource['object'] + # save reference + world.models.append(resource['resource']) + + +def the_cloned_model_is(step, model): + """Checking the model is a clone""" + eq_(world.model["origin"], model) + + +def clone_deepnet(step, deepnet): + """Step: I clone deepnet""" + resource = world.api.clone_deepnet(deepnet, {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.deepnet = resource['object'] + # save reference + world.deepnets.append(resource['resource']) + + +def the_cloned_deepnet_is(step, deepnet): + """Checking the deepnet is a clone""" + eq_(world.deepnet["origin"], deepnet) + + +def clone_logistic_regression(step, logistic_regression): + """Step: I clone logistic regression""" + resource = world.api.clone_logistic_regression( + logistic_regression, {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.logistic_regression = resource['object'] + # save reference + world.logistic_regressions.append(resource['resource']) + + +def the_cloned_logistic_regression_is(step, logistic_regression): + """Checking logistic regression is a clone""" + eq_(world.logistic_regression["origin"], logistic_regression) + + +def check_deepnet_id_local_id(step): + """Checking that deepnet ID and local deepnet ID match""" + eq_(world.deepnet["resource"], step.bigml["local_deepnet"].resource_id) + + +def check_leaves_number(step, leaves_number): + """Checking the number of leaves in a tree local model""" + eq_(len(get_leaves(step.bigml["local_model"])), leaves_number) diff --git a/bigml/tests/create_multimodel_steps.py b/bigml/tests/create_multimodel_steps.py new file mode 100644 index 00000000..7fe82a82 --- /dev/null +++ b/bigml/tests/create_multimodel_steps.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from .world import world, ok_ + +def i_store_dataset_id(step): + """Step: I store the dataset id in a list""" + if step.bigml.get("dataset_ids") is None: + step.bigml["dataset_ids"] = [] + step.bigml["dataset_ids"].append(world.dataset['resource']) + + +def i_check_model_datasets_and_datasets_ids(step): + """Step: I check the model stems from the original dataset list""" + model = world.model + ok_('datasets' in model and model['datasets'] == step.bigml["dataset_ids"], + ("The model contains only %s and the dataset ids are %s" % + (",".join(model['datasets']), ",".join(step.bigml["dataset_ids"])))) diff --git a/bigml/tests/create_pca_steps.py b/bigml/tests/create_pca_steps.py new file mode 100644 index 00000000..c5a8ff09 --- /dev/null +++ b/bigml/tests/create_pca_steps.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2018-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import json + +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY + +from .read_resource_steps import wait_until_status_code_is +from .world import world, eq_ + + +def i_check_pca_name(step, name): + """Step: the pca name is """ + pca_name = world.pca['name'] + eq_(name, pca_name) + + +def i_create_a_pca_from_dataset(step, shared=None): + """Step: I create a PCA from a dataset""" + if shared is None or world.shared.get("pca", {}).get(shared) is None: + dataset = world.dataset.get('resource') + resource = world.api.create_pca(dataset, {'name': 'new PCA'}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.pca = resource['object'] + world.pcas.append(resource['resource']) + + +def i_create_a_pca_with_params(step, params): + """Step: I create a PCA from a dataset""" + params = json.loads(params) + dataset = world.dataset.get('resource') + resource = world.api.create_pca(dataset, params) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.pca = resource['object'] + world.pcas.append(resource['resource']) + + +def i_create_a_pca(step, shared=None): + """Creating a PCA""" + i_create_a_pca_from_dataset(step, shared=shared) + + +def i_update_pca_name(step, name): + """Step: I update the PCA name to """ + resource = world.api.update_pca(world.pca['resource'], + {'name': name}) + world.status = resource['code'] + eq_(world.status, HTTP_ACCEPTED) + world.location = resource['location'] + world.pca = resource['object'] + + +def wait_until_pca_status_code_is(step, code1, code2, secs): + """Step: I wait until the PCA status code is either or + less than + """ + world.pca = wait_until_status_code_is(code1, code2, secs, world.pca) + + +def the_pca_is_finished_in_less_than(step, secs, shared=None): + """Step: I wait until the PCA is ready less than """ + if shared is None or world.shared.get("pca", {}).get(shared) is None: + wait_until_pca_status_code_is(step, FINISHED, FAULTY, secs) + if shared is not None: + if "pca" not in world.shared: + world.shared["pca"] = {} + world.shared["pca"][shared] = world.pca + else: + world.pca = world.shared["pca"][shared] + print("Reusing %s" % world.pca["resource"]) + + +def clone_pca(step, pca): + """Step: I clone pca""" + resource = world.api.clone_pca(pca, + {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.pca = resource['object'] + # save reference + world.pcas.append(resource['resource']) + + +def the_cloned_pca_is(step, pca): + """Checking that pca is a clone """ + eq_(world.pca["origin"], pca) diff --git a/bigml/tests/create_prediction_steps.py b/bigml/tests/create_prediction_steps.py new file mode 100644 index 00000000..978d577c --- /dev/null +++ b/bigml/tests/create_prediction_steps.py @@ -0,0 +1,459 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import json + +from bigml.api import HTTP_CREATED +from bigml.api import FINISHED, FAULTY + +from .read_resource_steps import wait_until_status_code_is +from .world import world, res_filename, eq_, ok_, approx_ + + +def i_create_a_prediction(step, data=None): + """Creating prediction""" + if data is None: + data = "{}" + model = world.model['resource'] + data = json.loads(data) + resource = world.api.create_prediction(model, data) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.prediction = resource['object'] + world.predictions.append(resource['resource']) + + +def i_create_a_prediction_op(step, data=None, operating_point=None): + """Creating prediction with operating point""" + if data is None: + data = "{}" + ok_(operating_point is not None) + model = world.model['resource'] + data = json.loads(data) + resource = world.api.create_prediction( \ + model, data, {"operating_point": operating_point}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.prediction = resource['object'] + world.predictions.append(resource['resource']) + + +def i_create_an_ensemble_prediction_op(step, data=None, operating_point=None): + """Creating prediction from ensemble with operating point""" + if data is None: + data = "{}" + ok_(operating_point is not None) + ensemble = world.ensemble['resource'] + data = json.loads(data) + resource = world.api.create_prediction( \ + ensemble, data, {"operating_point": operating_point}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.prediction = resource['object'] + world.predictions.append(resource['resource']) + + +def i_create_a_fusion_prediction_op(step, data=None, operating_point=None): + """Create prediction from fusion with operating point""" + if data is None: + data = "{}" + ok_(operating_point is not None) + fusion = world.fusion['resource'] + data = json.loads(data) + resource = world.api.create_prediction( \ + fusion, data, {"operating_point": operating_point}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.prediction = resource['object'] + world.predictions.append(resource['resource']) + + +def i_create_a_centroid(step, data=None): + """Create centroid""" + if data is None: + data = "{}" + cluster = world.cluster['resource'] + data = json.loads(data) + resource = world.api.create_centroid(cluster, data) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.centroid = resource['object'] + world.centroids.append(resource['resource']) + + +def i_create_a_proportional_prediction(step, data=None): + """Create prediction using proportional strategy for missings""" + if data is None: + data = "{}" + model = world.model['resource'] + data = json.loads(data) + resource = world.api.create_prediction(model, data, + args={'missing_strategy': 1}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.prediction = resource['object'] + world.predictions.append(resource['resource']) + + +def check_prediction(got, expected, precision=4): + """Checking prediction is as expected""" + if not isinstance(got, str): + approx_(got, float(expected), precision=precision) + else: + eq_(got, expected) + + +def the_prediction_is(step, objective, prediction, precision=4): + """Checking the prediction for objective field""" + check_prediction(world.prediction['prediction'][objective], prediction, + precision=precision) + + +def the_median_prediction_is(step, objective, prediction, precision=4): + """Checking the prediction using median""" + check_prediction(world.prediction['prediction_path'][ + 'objective_summary']['median'], prediction, precision=precision) + + +def the_centroid_is_with_distance(step, centroid, distance): + """Checking expected centroid and distance""" + check_prediction(world.centroid['centroid_name'], centroid) + check_prediction(world.centroid['distance'], distance) + + +def the_centroid_is(step, centroid): + """Checking centroid""" + check_prediction(world.centroid['centroid_name'], centroid) + + +def the_centroid_is_ok(step): + """Checking centroid is ready""" + ok_(world.api.ok(world.centroid)) + + +def the_confidence_is(step, confidence): + """Checking confidence""" + local_confidence = world.prediction.get('confidence', \ + world.prediction.get('probability')) + approx_(float(local_confidence), float(confidence), precision=4) + + +def i_create_an_ensemble_prediction(step, data=None): + """Creating prediction from ensemble""" + if data is None: + data = "{}" + ensemble = world.ensemble['resource'] + data = json.loads(data) + resource = world.api.create_prediction(ensemble, data) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.prediction = resource['object'] + world.predictions.append(resource['resource']) + + +def i_create_an_ensemble_proportional_prediction(step, data=None, params=None): + """Creating prediction from ensemble using proportional strategy for + missings + """ + if data is None: + data = "{}" + if params is None: + params = {} + ensemble = world.ensemble['resource'] + data = json.loads(data) + args = {"missing_strategy": 1} + args.update(params) + resource = world.api.create_prediction(ensemble, data, args) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.prediction = resource['object'] + world.predictions.append(resource['resource']) + + +def wait_until_prediction_status_code_is(step, code1, code2, secs): + """Waiting for prediction and storing result""" + world.prediction = wait_until_status_code_is( + code1, code2, secs, world.prediction) + + +def the_prediction_is_finished_in_less_than(step, secs): + """Checking wait time""" + wait_until_prediction_status_code_is(step, FINISHED, FAULTY, secs) + + +def create_local_ensemble_prediction_add_confidence(step, input_data): + """Creating prediction from local ensemble with confidence""" + step.bigml["local_prediction"] = step.bigml["local_ensemble"].predict( + json.loads(input_data), full=True) + + +def create_local_ensemble_prediction(step, input_data): + """Creating prediction from local ensemble""" + step.bigml["local_prediction"] = step.bigml["local_ensemble"].predict(json.loads(input_data)) + + +def create_local_ensemble_prediction_probabilities(step, input_data): + """Creating prediction from local ensemble with probabilities""" + step.bigml["local_prediction"] = step.bigml["local_ensemble"].predict( \ + json.loads(input_data), full=True) + step.bigml["local_probabilities"] = step.bigml[ + "local_ensemble"].predict_probability( \ + json.loads(input_data), compact=True) + + +def create_local_ensemble_proportional_prediction_with_confidence( \ + step, input_data, params=None): + """Creating prediction from local ensemble with confidence""" + if params is None: + params = {} + kwargs = {"full": True, "missing_strategy": 1} + kwargs.update(params) + step.bigml["local_prediction"] = step.bigml["local_ensemble"].predict( \ + json.loads(input_data), **kwargs) + +def create_local_ensemble_prediction_using_median_with_confidence( \ + step, input_data): + """Creating prediction from local ensemble using median with confidence""" + step.bigml["local_prediction"] = step.bigml["local_ensemble"].predict( \ + json.loads(input_data), full=True) + + +def i_create_an_anomaly_score(step, data=None): + """Creating anomaly score""" + if data is None: + data = "{}" + anomaly = world.anomaly['resource'] + data = json.loads(data) + resource = world.api.create_anomaly_score(anomaly, data) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.anomaly_score = resource['object'] + world.anomaly_scores.append(resource['resource']) + + +def i_create_an_association_set(step, data=None): + """Creating association set""" + if data is None: + data = "{}" + association = world.association['resource'] + data = json.loads(data) + resource = world.api.create_association_set(association, data) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.association_set = resource['object'] + world.association_sets.append(resource['resource']) + + +def the_anomaly_score_is(step, score): + """Checking the expected anomaly score""" + check_prediction(world.anomaly_score['score'], score) + + +def the_logistic_prediction_is(step, prediction): + """Checking the expected logistic regression prediction""" + check_prediction(world.prediction['output'], prediction) + + +def the_fusion_prediction_is(step, prediction): + """Checking the expected fusion prediction """ + the_logistic_prediction_is(step, prediction) + + +def i_create_a_logistic_prediction(step, data=None): + """Checking the expected logistic regression prediction""" + if data is None: + data = "{}" + model = world.logistic_regression['resource'] + data = json.loads(data) + resource = world.api.create_prediction(model, data) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.prediction = resource['object'] + world.predictions.append(resource['resource']) + + +def i_create_a_deepnet_prediction(step, data=None, image_fields=None): + """Creating a prediction from a deepnet""" + if data is None: + data = "{}" + if image_fields is None: + image_fields = [] + deepnet = world.deepnet['resource'] + data = json.loads(data) + data_image_fields = [] + for field in image_fields: + if field in data: + data[field] = res_filename(data[field]) + data_image_fields.append(field) + resource = world.api.create_prediction(deepnet, data) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.prediction = resource['object'] + for field in data_image_fields: + world.sources.append(world.prediction["input_data"][field]) + world.predictions.append(resource['resource']) + + +def i_create_a_deepnet_prediction_with_op(step, data=None, + operating_point=None): + """Creating a prediction from a deepnet with operating point""" + if data is None: + data = "{}" + deepnet = world.deepnet['resource'] + data = json.loads(data) + resource = world.api.create_prediction( \ + deepnet, data, {"operating_point": operating_point}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.prediction = resource['object'] + world.predictions.append(resource['resource']) + + +def i_create_a_logistic_prediction_with_op(step, data=None, + operating_point=None): + """Creating a prediction from a logistic regression with operating point""" + if data is None: + data = "{}" + logistic_regression = world.logistic_regression['resource'] + data = json.loads(data) + resource = world.api.create_prediction( \ + logistic_regression, data, {"operating_point": operating_point}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.prediction = resource['object'] + world.predictions.append(resource['resource']) + + +#pylint: disable=locally-disabled,undefined-loop-variable +def the_logistic_probability_is(step, probability): + """Checking the logistic regression prediction probability""" + for [prediction, remote_probability] in world.prediction['probabilities']: + if prediction == world.prediction['output']: + break + approx_(float(remote_probability), float(probability), precision=4) + + +def the_fusion_probability_is(step, probability): + """Checking the fusion prediction probability""" + the_logistic_probability_is(step, probability) + + +def i_create_a_prediction_op_kind(step, data=None, operating_kind=None): + """Creating a prediction with operating kind""" + if data is None: + data = "{}" + ok_(operating_kind is not None) + model = world.model['resource'] + data = json.loads(data) + resource = world.api.create_prediction( \ + model, data, {"operating_kind": operating_kind}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.prediction = resource['object'] + world.predictions.append(resource['resource']) + + +def i_create_an_ensemble_prediction_op_kind( + step, data=None, operating_kind=None): + """Creating a prediction from an ensemble with operating kind""" + if data is None: + data = "{}" + ok_(operating_kind is not None) + ensemble = world.ensemble['resource'] + data = json.loads(data) + resource = world.api.create_prediction( \ + ensemble, data, {"operating_kind": operating_kind}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.prediction = resource['object'] + world.predictions.append(resource['resource']) + + +def i_create_a_deepnet_prediction_op_kind(step, data=None, + operating_kind=None): + """Creating a prediction from a deepnet with operating kind""" + if data is None: + data = "{}" + deepnet = world.deepnet['resource'] + data = json.loads(data) + resource = world.api.create_prediction( \ + deepnet, data, {"operating_kind": operating_kind}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.prediction = resource['object'] + world.predictions.append(resource['resource']) + + +def i_create_a_logistic_prediction_with_op_kind(step, data=None, + operating_kind=None): + """Creating a prediction from a logistic regression with operating kind""" + if data is None: + data = "{}" + logistic_regression = world.logistic_regression['resource'] + data = json.loads(data) + resource = world.api.create_prediction( \ + logistic_regression, data, {"operating_kind": operating_kind}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.prediction = resource['object'] + world.predictions.append(resource['resource']) + + +def i_create_a_fusion_prediction(step, data=None): + """Creating a prediction from a fusion""" + if data is None: + data = "{}" + fusion = world.fusion['resource'] + data = json.loads(data) + resource = world.api.create_prediction(fusion, data) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.prediction = resource['object'] + world.predictions.append(resource['resource']) + + +def i_create_a_linear_prediction(step, data=None): + """Creating a prediction from a linear regression""" + if data is None: + data = "{}" + linear_regression = world.linear_regression['resource'] + data = json.loads(data) + resource = world.api.create_prediction(linear_regression, data) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.prediction = resource['object'] + world.predictions.append(resource['resource']) diff --git a/bigml/tests/create_project_steps.py b/bigml/tests/create_project_steps.py new file mode 100644 index 00000000..3d997bfe --- /dev/null +++ b/bigml/tests/create_project_steps.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from bigml.api import HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY + +from .read_resource_steps import wait_until_status_code_is +from .world import world, eq_ + + +def i_create_project(step, name): + """Creating projects """ + resource = world.api.create_project({"name": name}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.project = resource['object'] + # save reference + world.projects.append(resource['resource']) + + +def the_project_is_finished(step, secs): + """Waiting for project to be finished""" + wait_until_status_code_is(FINISHED, FAULTY, secs, world.project) + + +def i_update_project_name_with(step, name=""): + """Updating project name""" + resource = world.api.update_project(world.project.get('resource'), + {"name": name}) + world.status = resource['code'] + eq_(world.status, HTTP_ACCEPTED) + world.project = resource['object'] + + +def i_check_project_name(step, name=""): + """Checking project name""" + updated_name = world.project.get("name", "") + eq_(updated_name, name) diff --git a/bigml/tests/create_projection_steps.py b/bigml/tests/create_projection_steps.py new file mode 100644 index 00000000..92df6cb7 --- /dev/null +++ b/bigml/tests/create_projection_steps.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument +# +# Copyright 2018-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import json +from bigml.api import HTTP_CREATED +from bigml.api import FINISHED, FAULTY + +from .world import world, eq_ +from .read_resource_steps import wait_until_status_code_is + + +#pylint: disable=locally-disabled,no-member +def i_create_a_projection(step, data=None): + """Creating Projection""" + if data is None: + data = "{}" + pca = world.pca['resource'] + data = json.loads(data) + resource = world.api.create_projection(pca, data) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.projection = resource['object'] + world.projections.append(resource['resource']) + + +def the_projection_is(step, projection): + """Checking projection""" + if projection is None: + projection = "{}" + projection = json.loads(projection) + eq_(len(list(projection.keys())), + len(list(world.projection['projection']['result'].keys()))) + for name, value in list(projection.items()): + eq_(world.projection['projection']['result'][name], value, + "remote: %s, %s - expected: %s" % ( \ + name, world.projection['projection']['result'][name], + value)) + + +def wait_until_projection_status_code_is(step, code1, code2, secs): + """Checking status code""" + world.projection = wait_until_status_code_is( + code1, code2, secs, world.projection) + + +def the_projection_is_finished_in_less_than(step, secs): + """Wait for completion""" + wait_until_projection_status_code_is(step, FINISHED, FAULTY, secs) diff --git a/bigml/tests/create_sample_steps.py b/bigml/tests/create_sample_steps.py new file mode 100644 index 00000000..8f451f4b --- /dev/null +++ b/bigml/tests/create_sample_steps.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY + +from .read_resource_steps import wait_until_status_code_is +from .world import world, eq_ + + +def i_check_sample_name(step, name): + """Step: the sample name is """ + sample_name = world.sample['name'] + eq_(name, sample_name) + + +def i_create_a_sample_from_dataset(step): + """Step: I create a sample from a dataset""" + dataset = world.dataset.get('resource') + resource = world.api.create_sample(dataset, {'name': 'new sample'}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.sample = resource['object'] + world.samples.append(resource['resource']) + + +def i_update_sample_name(step, name): + """Step: I update the sample name to """ + resource = world.api.update_sample(world.sample['resource'], + {'name': name}) + world.status = resource['code'] + eq_(world.status, HTTP_ACCEPTED) + world.location = resource['location'] + world.sample = resource['object'] + + +def the_sample_is_finished_in_less_than(step, secs): + """Step: I wait until the sample is ready less than """ + world.sample = wait_until_status_code_is( + FINISHED, FAULTY, secs, world.sample) diff --git a/bigml/tests/create_script_steps.py b/bigml/tests/create_script_steps.py new file mode 100644 index 00000000..cb7ab4ed --- /dev/null +++ b/bigml/tests/create_script_steps.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY +from bigml.util import is_url + +from .read_resource_steps import wait_until_status_code_is +from .world import world, res_filename, eq_ + + +def the_script_code_and_attributes(step, source_code, param, param_value): + """Step: the script code is and the value of is + + """ + res_param_value = world.script[param] + eq_(res_param_value, param_value, + ("The script %s is %s and the expected %s is %s" % + (param, param_value, param, param_value))) + + +def i_create_a_script(step, source_code): + """Step: I create a whizzml script from a excerpt of code """ + resource = world.api.create_script(source_code, + {"project": world.project_id}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.script = resource['object'] + world.scripts.append(resource['resource']) + + +def i_create_a_script_from_file_or_url(step, source_code): + """Step: I create a whizzml script from file """ + if not is_url(source_code): + source_code = res_filename(source_code) + resource = world.api.create_script(source_code, + {"project": world.project_id}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.script = resource['object'] + world.scripts.append(resource['resource']) + + +def i_update_a_script(step, param, param_value): + """Step: I update the script with , """ + resource = world.api.update_script(world.script['resource'], + {param: param_value}) + world.status = resource['code'] + eq_(world.status, HTTP_ACCEPTED) + world.location = resource['location'] + world.script = resource['object'] + + +def the_script_is_finished(step, secs): + """Step: I wait until the script is ready less than """ + world.script = wait_until_status_code_is( + FINISHED, FAULTY, secs, world.script) diff --git a/bigml/tests/create_source_steps.py b/bigml/tests/create_source_steps.py new file mode 100644 index 00000000..3eac296a --- /dev/null +++ b/bigml/tests/create_source_steps.py @@ -0,0 +1,223 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2012, 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import json +import csv + +from bigml.api import HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY + +from .read_resource_steps import wait_until_status_code_is +from .world import world, res_filename, eq_, ok_ + + +def i_upload_a_file(step, filename, shared=None): + """Step: I create a data source uploading a file""" + + if shared is None or world.shared.get("source", {}).get(shared) is None: + resource = world.api.create_source(res_filename(filename), \ + {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.source = resource['object'] + # save reference + world.sources.append(resource['resource']) + + +def i_upload_a_file_with_project_conn(step, filename): + """Step: I create a data source uploading a file using + a project + """ + resource = world.api.create_source(res_filename(filename)) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.source = resource['object'] + # save reference + world.sources.append(resource['resource']) + + +def i_upload_a_file_from_stdin(step, filename): + """Step: I create a data source from stdin uploading a file """ + file_name = res_filename(filename) + with open(file_name, 'rb') as file_handler: + resource = world.api.create_source(file_handler, \ + {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.source = resource['object'] + # save reference + world.sources.append(resource['resource']) + + +def i_upload_a_file_with_args(step, filename, args): + """Step: I create a data source uploading a file with args + + """ + args = json.loads(args) + args.update({'project': world.project_id}) + resource = world.api.create_source(res_filename(filename), args) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.source = resource['object'] + # save reference + world.sources.append(resource['resource']) + + +def i_create_using_url(step, url): + """Step: I create a data source using the url """ + resource = world.api.create_source(url, {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.source = resource['object'] + # save reference + world.sources.append(resource['resource']) + + +def i_create_using_connector(step, connector): + """Step: I create a data source using the connection """ + resource = world.api.create_source(connector, {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.source = resource['object'] + # save reference + world.sources.append(resource['resource']) + + +def i_create_composite(step, sources): + """Step: I create from list of sources """ + resource = world.api.create_source(sources, {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.source = resource['object'] + # save reference + world.composites.append(resource['resource']) + + +def the_composite_contains(step, sources): + """Checking source in composite""" + eq_(world.source["sources"], sources) + + +def clone_source(step, source): + """Step: I clone source""" + resource = world.api.clone_source(source, {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.source = resource['object'] + # save reference + world.sources.append(resource['resource']) + + +def the_cloned_source_origin_is(step, source): + """Checking cloned source""" + eq_(world.source["origin"], source) + + +def i_create_annotated_source(step, directory, args=None): + """Creating annotated source""" + if args is None: + args = {} + args.update({'project': world.project_id}) + resource = world.api.create_annotated_source(res_filename(directory), + args) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.source = resource['object'] + # save reference + world.composites.append(resource['resource']) + + +#pylint: disable=locally-disabled,unnecessary-comprehension +def i_create_using_dict_data(step, data): + """Step: I create a data source from inline data slurped from """ + # slurp CSV file to local variable + mode = 'rt' + with open(res_filename(data), mode) as fid: + reader = csv.DictReader(fid) + dict_data = [row for row in reader] + # create source + resource = world.api.create_source(dict_data, + {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.source = resource['object'] + # save reference + world.sources.append(resource['resource']) + + +def i_upload_a_file_async(step, filename): + """Step: I create a data source uploading a file in + asynchronous mode + """ + resource = world.api.create_source(res_filename(filename), + {'project': world.project_id}, + async_load=True) + world.resource = resource + + +def the_source_has_been_created_async(step, secs): + """Step: I wait until the source has been created less than secs""" + world.source = wait_until_status_code_is( + FINISHED, FAULTY, secs, world.source) + + +def wait_until_source_status_code_is(step, code1, code2, secs): + """Step: I wait until the source status code is either + or less than + """ + world.source = wait_until_status_code_is(code1, code2, secs, world.source) + + +def the_source_is_finished(step, secs, shared=None): + """Step: I wait until the source is ready less than """ + if shared is None or world.shared.get("source", {}).get(shared) is None: + wait_until_source_status_code_is(step, FINISHED, FAULTY, secs) + if shared is not None: + if world.shared.get("source") is None: + world.shared["source"] = {} + world.shared["source"][shared] = world.source + else: + world.source = world.shared["source"][shared] + print("Reusing %s" % world.source["resource"]) + + +def i_update_source_with(step, data="{}"): + """Step: I update the source with params """ + resource = world.api.update_source(world.source.get('resource'), json.loads(data)) + world.status = resource['code'] + eq_(world.status, HTTP_ACCEPTED) + + +def source_has_args(step, args="{}"): + """Step: the source exists and has args """ + args = json.loads(args) + for key, value in list(args.items()): + if key in world.source: + eq_(world.source[key], value, + "Expected key %s: %s. Found %s" % (key, value, world.source[key])) + else: + ok_(False, "No key %s in source." % key) diff --git a/bigml/tests/create_statistical_tst_steps.py b/bigml/tests/create_statistical_tst_steps.py new file mode 100644 index 00000000..44e76dd4 --- /dev/null +++ b/bigml/tests/create_statistical_tst_steps.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY + +from .read_resource_steps import wait_until_status_code_is +from .world import world, eq_ + + +def i_check_tst_name(step, name): + """Step: the statistical test name is """ + statistical_test_name = world.statistical_test['name'] + eq_(name, statistical_test_name) + + +def i_create_a_tst_from_dataset(step): + """Step: I create an statistical test from a dataset""" + dataset = world.dataset.get('resource') + resource = world.api.create_statistical_test(dataset, \ + {'name': 'new statistical test'}) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.statistical_test = resource['object'] + world.statistical_tests.append(resource['resource']) + + +def i_update_tst_name(step, name): + """Step: I update the statistical test name to """ + resource = world.api.update_statistical_test( \ + world.statistical_test['resource'], {'name': name}) + world.status = resource['code'] + eq_(world.status, HTTP_ACCEPTED) + world.location = resource['location'] + world.statistical_test = resource['object'] + + +def wait_until_tst_status_code_is(step, code1, code2, secs): + """Step: I wait until the statistical test status code is either + code1 or code2 less than """ + world.statistical_test = wait_until_status_code_is( + code1, code2, secs, world.statistical_test) + + +def the_tst_is_finished_in_less_than(step, secs): + """Step: I wait until the statistical test is ready less than """ + wait_until_tst_status_code_is(step, FINISHED, FAULTY, secs) diff --git a/bigml/tests/create_time_series_steps.py b/bigml/tests/create_time_series_steps.py new file mode 100644 index 00000000..d12fc2c8 --- /dev/null +++ b/bigml/tests/create_time_series_steps.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import json + +from bigml.api import HTTP_CREATED, HTTP_ACCEPTED +from bigml.api import FINISHED, FAULTY +from bigml.timeseries import TimeSeries + +from .read_resource_steps import wait_until_status_code_is +from .world import world, res_filename, eq_ + + +def i_create_a_time_series(step): + """Step: I create a time series""" + dataset = world.dataset.get('resource') + resource = world.api.create_time_series(dataset) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.time_series = resource['object'] + world.time_series_set.append(resource['resource']) + + +def i_create_a_time_series_with_params(step, data="{}"): + """Step: I create a time series with params """ + args = json.loads(data) + resource = world.api.create_time_series(world.dataset.get('resource'), + args=args) + world.status = resource['code'] + eq_(world.status, HTTP_CREATED) + world.location = resource['location'] + world.time_series = resource['object'] + world.time_series_set.append(resource['resource']) + + +def the_time_series_is_finished_in_less_than(step, secs): + """Step: I wait until the time series is ready less than """ + world.time_series = wait_until_status_code_is( + FINISHED, FAULTY, secs, world.time_series) + + +def create_local_time_series(step): + """Step: I create a local TimeSeries""" + step.bigml["local_time_series"] = TimeSeries(world.time_series["resource"], + world.api) + + +def i_update_time_series_name(step, name): + """Step: I update the time series name to """ + resource = world.api.update_time_series(world.time_series['resource'], + {'name': name}) + world.status = resource['code'] + eq_(world.status, HTTP_ACCEPTED) + world.location = resource['location'] + world.time_series = resource['object'] + + +def i_check_time_series_name(step, name): + """Step: the time series name is """ + time_series_name = world.time_series['name'] + eq_(name, time_series_name) + + +def i_export_time_series(step, filename): + """Step: I export the time series""" + world.api.export(world.time_series.get('resource'), + filename=res_filename(filename)) + + +def i_create_local_time_series_from_file(step, export_file): + """Step: I create a local time series from file """ + step.bigml["local_time_series"] = TimeSeries( + res_filename(export_file)) + + +def check_time_series_id_local_id(step): + """Step: the time series ID and the local time series ID match""" + eq_(step.bigml["local_time_series"].resource_id, + world.time_series["resource"]) + + +def clone_time_series(step, time_series): + """Step: I clone time series""" + resource = world.api.clone_time_series(time_series, + {'project': world.project_id}) + # update status + world.status = resource['code'] + world.location = resource['location'] + world.time_series = resource['object'] + # save reference + world.time_series_set.append(resource['resource']) + + +def the_cloned_time_series_is(step, time_series): + """Checking the time series is a clone""" + eq_(world.time_series["origin"], time_series) diff --git a/bigml/tests/delete_project_steps.py b/bigml/tests/delete_project_steps.py new file mode 100644 index 00000000..49d6ddb6 --- /dev/null +++ b/bigml/tests/delete_project_steps.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member,broad-except +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import time +from datetime import datetime, timedelta + +from bigml.api import HTTP_NO_CONTENT, HTTP_OK, HTTP_NOT_FOUND + +from .world import world, eq_, ok_ + + +def i_delete_the_project(step): + """Deleting project""" + resource = world.api.delete_project(world.project['resource']) + world.status = resource['code'] + eq_(world.status, HTTP_NO_CONTENT) + + +def wait_until_project_deleted(step, secs): + """Waiting for delete """ + start = datetime.utcnow() + project_id = world.project['resource'] + resource = world.api.get_project(project_id) + while resource['code'] == HTTP_OK: + time.sleep(3) + ok_(datetime.utcnow() - start < timedelta(seconds=int(secs))) + resource = world.api.get_project(project_id) + eq_(resource['code'], HTTP_NOT_FOUND) + world.projects.remove(project_id) diff --git a/bigml/tests/fields_steps.py b/bigml/tests/fields_steps.py new file mode 100644 index 00000000..59336ea5 --- /dev/null +++ b/bigml/tests/fields_steps.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from bigml.fields import Fields, get_resource_type +from bigml.io import UnicodeReader + +from .world import world, res_filename, eq_, ok_ + + +def create_fields(step, objective_column): + """Step: I create a Fields object from the source with objective column + + """ + world.fields = Fields(world.source, objective_field=int(objective_column), + objective_field_present=True) + + +def create_fields_from_dataset(step, objective_column): + """Step: I create a Fields object from the dataset with objective column + objective_column + """ + world.fields = Fields(world.dataset, objective_field=int(objective_column), + objective_field_present=True) + + +def check_objective(step, objective_id): + """Step: the object id is """ + found_id = world.fields.field_id(world.fields.objective_field) + eq_(found_id, objective_id) + + +def import_summary_file(step, summary_file): + """#Step: I import a summary fields file as a fields + structure + """ + world.fields_struct = world.fields.new_fields_structure( \ + csv_attributes_file=res_filename(summary_file)) + + +def check_field_type(step, field_id, field_type): + """Step: I check the new field structure has field as + + """ + ok_(field_id in list(world.fields_struct['fields'].keys())) + eq_(world.fields_struct['fields'][field_id]["optype"], field_type) + + +def generate_summary(step, summary_file): + """Step: I export a summary fields file """ + world.fields.summary_csv(res_filename(summary_file)) + + +def check_summary_like_expected(step, summary_file, expected_file): + """Step: I check that the fields summary file is like """ + summary_contents = [] + expected_contents = [] + with UnicodeReader(res_filename(summary_file)) as summary_handler: + for line in summary_handler: + summary_contents.append(line) + with UnicodeReader(res_filename(expected_file)) as expected_handler: + for line in expected_handler: + expected_contents.append(line) + eq_(summary_contents, expected_contents) + + +def update_with_summary_file(step, resource, summary_file): + """Step: I update the with the file """ + if get_resource_type(resource) == "source": + # We need to download the source again, as it could have been closed + resource = world.api.get_source(resource) + if resource.get("object", {}).get("closed", False): + resource = world.api.clone_source(resource) + world.api.ok(resource) + fields = Fields(resource) + changes = fields.filter_fields_update( \ + fields.new_fields_structure(res_filename(summary_file))) + resource_type = get_resource_type(resource) + resource = world.api.updaters[resource_type](resource, changes) + world.api.ok(resource) + setattr(world, resource_type, resource) + + +def check_resource_field_type(step, resource, field_id, optype): + """Step: I check the source has field as """ + eq_(resource["object"]["fields"][field_id]["optype"], optype) diff --git a/bigml/tests/inspect_model_steps.py b/bigml/tests/inspect_model_steps.py new file mode 100644 index 00000000..a13c90ac --- /dev/null +++ b/bigml/tests/inspect_model_steps.py @@ -0,0 +1,115 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,unused-argument,no-member +# +# Copyright 2012, 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import io +import json + +import bigml.generators.model as g + +from bigml.tests.world import res_filename +from bigml.predict_utils.common import extract_distribution +from bigml.util import utf8 + +from .world import world, eq_ + + +#pylint: disable=locally-disabled,invalid-name +def i_translate_the_tree_into_IF_THEN_rules(step): + """Step: I translate the tree into IF-THEN rules""" + output = io.StringIO() + g.rules(step.bigml["local_model"], out=output) + world.output = output.getvalue() + +def i_check_the_data_distribution(step, filename): + """Step: I check data distribution with file""" + distribution = g.get_data_distribution(step.bigml["local_model"]) + + distribution_str = '' + for bin_value, bin_instances in distribution: + distribution_str += "[%s,%s]\n" % (bin_value, bin_instances) + world.output = utf8(distribution_str) + i_check_if_the_output_is_like_expected_file(step, filename) + + +def i_check_the_predictions_distribution(step, filename): + """Step: I check the predictions distribution with file""" + predictions = g.get_prediction_distribution(step.bigml["local_model"]) + + distribution_str = '' + for group, instances in predictions: + distribution_str += "[%s,%s]\n" % (group, instances) + + world.output = utf8(distribution_str) + + i_check_if_the_output_is_like_expected_file(step, filename) + + +def i_check_the_model_summary_with(step, filename): + """Step: I check the model summary with file""" + output = io.StringIO() + g.summarize(step.bigml["local_model"], out=output) + world.output = output.getvalue() + i_check_if_the_output_is_like_expected_file(step, filename) + + +def i_check_if_the_output_is_like_expected_file(step, expected_file): + """Step: I check the output is like expected file""" + with open(res_filename(expected_file), "r") as handler: + expected_content = handler.read() + eq_(world.output.strip(), expected_content.strip()) + + +def i_check_print_distribution(step, filename): + """Step: I check the distribution print with file""" + output = io.StringIO() + _, distribution = extract_distribution( + step.bigml["local_model"].root_distribution) + g.print_distribution(distribution, output) + world.output = output.getvalue() + if world.debug: + backup = "%s.bck" % filename + with open(backup, "w") as bck_file: + bck_file.write(world.output) + i_check_if_the_output_is_like_expected_file(step, filename) + + +def i_list_fields(step, filename): + """Step: I check the list fields print with file""" + output = io.StringIO() + g.list_fields(step.bigml["local_model"], output) + world.output = output.getvalue() + if world.debug: + backup = "%s.bck" % filename + with open(backup, "w") as bck_file: + bck_file.write(world.output) + i_check_if_the_output_is_like_expected_file(step, filename) + + +def i_create_tree_csv(step, filename): + """Step: I check the tree csv print with file""" + rows = g.tree_csv(step.bigml["local_model"]) + world.output = json.dumps(rows) + if world.debug: + backup = "%s.bck" % filename + with open(backup, "w") as bck_file: + bck_file.write(world.output) + i_check_if_the_output_is_like_expected_file(step, filename) + +def update_content(filename, content): + """Step: I check the tree csv print with file""" + with open(res_filename(filename), "w") as file_handler: + file_handler.write(content) diff --git a/bigml/tests/mlflow_ensemble/ensemble.json b/bigml/tests/mlflow_ensemble/ensemble.json new file mode 100644 index 00000000..fc312d49 --- /dev/null +++ b/bigml/tests/mlflow_ensemble/ensemble.json @@ -0,0 +1 @@ +{"code": 200, "resource": "ensemble/62605abc0c11da5783002915", "location": "https://bigml.io/andromeda/ensemble/62605abc0c11da5783002915", "object": {"boosting": null, "category": 0, "code": 200, "columns": 9, "configuration": null, "configuration_status": false, "created": "2022-04-20T19:10:52.806000", "creator": "mmartin", "dataset": "dataset/62605ab1049fde5d990028f1", "dataset_field_types": {"categorical": 1, "datetime": 0, "image": 0, "items": 0, "numeric": 8, "path": 0, "preferred": 9, "regions": 0, "text": 0, "total": 9}, "dataset_status": true, "depth_threshold": 512, "description": "", "distributions": [{"importance": [["000001", 0.39328], ["000005", 0.19841], ["000006", 0.16783], ["000007", 0.08845], ["000003", 0.04986], ["000002", 0.0447], ["000000", 0.0369], ["000004", 0.02058]], "predictions": {"categories": [["false", 417], ["true", 197]]}, "training": {"categories": [["false", 416], ["true", 198]]}}, {"importance": [["000001", 0.33357], ["000005", 0.2589], ["000006", 0.16331], ["000000", 0.10221], ["000007", 0.04527], ["000002", 0.04284], ["000004", 0.03004], ["000003", 0.02386]], "predictions": {"categories": [["false", 410], ["true", 204]]}, "training": {"categories": [["false", 410], ["true", 204]]}}], "ensemble": {"fields": {"000000": {"column_number": 0, "datatype": "int8", "name": "pregnancies", "optype": "numeric", "order": 0, "preferred": true, "summary": {"counts": [[0, 86], [1, 110], [2, 80], [3, 56], [4, 58], [5, 45], [6, 40], [7, 37], [8, 35], [9, 22], [10, 16], [11, 9], [12, 9], [13, 8], [14, 1], [15, 1], [17, 1]], "exact_histogram": {"populations": [196, 136, 103, 77, 57, 25, 17, 2, 1], "start": 0, "width": 2}, "kurtosis": 0.14808, "maximum": 17, "mean": 3.89088, "median": 3, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 0.88555, "standard_deviation": 3.38254, "sum": 2389, "sum_squares": 16309, "variance": 11.44158}}, "000001": {"column_number": 1, "datatype": "int16", "name": "plasma glucose", "optype": "numeric", "order": 1, "preferred": true, "summary": {"bins": [[0, 5], [44, 1], [57, 2], [61.5, 2], [65, 1], [67.75, 4], [73.21429, 14], [79.52941, 17], [84.2381, 21], [88.74074, 27], [92, 19], [95.81818, 33], [100.86275, 51], [105.67647, 34], [109.38235, 34], [113.77143, 35], [118.775, 40], [124.14634, 41], [128.93548, 31], [133.46154, 13], [137.32, 25], [141.95652, 23], [146.4, 20], [152.86957, 23], [158, 14], [162.41667, 12], [166.66667, 15], [172.92857, 14], [180.77778, 18], [187.90909, 11], [193, 4], [196.7, 10]], "exact_histogram": {"populations": [5, 0, 0, 0, 1, 2, 7, 21, 48, 74, 93, 75, 78, 46, 43, 36, 28, 19, 23, 15], "start": 0, "width": 10}, "kurtosis": 0.68455, "maximum": 199, "mean": 121.11401, "median": 117, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 0.08447, "standard_deviation": 32.75167, "sum": 74364, "sum_squares": 9664070, "variance": 1072.67214}}, "000002": {"column_number": 2, "datatype": "int8", "name": "blood pressure", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[0, 28], [30, 2], [39, 2], [44.66667, 6], [49.625, 16], [52, 9], [55.11111, 18], [58, 19], [60, 29], [62, 23], [64.87097, 62], [68, 38], [70, 43], [72, 37], [74.84524, 84], [78, 34], [80, 34], [82, 24], [84.85366, 41], [88, 17], [90, 19], [92, 4], [95, 9], [98, 3], [100, 2], [102, 1], [104, 1], [106, 3], [108, 2], [110, 2], [114, 1], [122, 1]], "exact_histogram": {"populations": [28, 0, 0, 0, 0, 0, 2, 1, 5, 5, 29, 30, 84, 68, 125, 73, 79, 37, 27, 8, 4, 5, 3, 0, 1], "start": 0, "width": 5}, "kurtosis": 5.12315, "maximum": 122, "mean": 69.10912, "median": 72, "minimum": 0, "missing_count": 0, "population": 614, "skewness": -1.82589, "standard_deviation": 19.37631, "sum": 42433, "sum_squares": 3162653, "variance": 375.44158}}, "000003": {"column_number": 3, "datatype": "int8", "name": "triceps skin thickness", "optype": "numeric", "order": 3, "preferred": true, "summary": {"bins": [[0, 189], [7.33333, 3], [10.57143, 7], [12, 4], [13.375, 16], [15.26667, 15], [17, 13], [18.46429, 28], [20.41176, 17], [22.48387, 31], [24.4, 20], [26.5625, 32], [28.46429, 28], [30.4375, 32], [32.38636, 44], [34.68421, 19], [36.6087, 23], [38.68421, 19], [40.46429, 28], [42.38462, 13], [44, 4], [45, 6], [46, 5], [47, 4], [48, 4], [49, 2], [50, 2], [51, 1], [52, 2], [56, 1], [63, 1], [99, 1]], "exact_histogram": {"open_max": 3, "populations": [189, 0, 0, 2, 1, 7, 14, 17, 17, 28, 17, 31, 20, 32, 28, 32, 44, 19, 23, 19, 28, 13, 10, 9, 6, 3, 2], "start": 0, "width": 2}, "kurtosis": -0.44344, "maximum": 99, "mean": 20.27687, "median": 22, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 0.14897, "standard_deviation": 16.11049, "sum": 12450, "sum_squares": 411550, "variance": 259.54801}}, "000004": {"column_number": 4, "datatype": "int16", "name": "insulin", "optype": "numeric", "order": 4, "preferred": true, "summary": {"bins": [[0, 295], [19.57143, 7], [38.36364, 11], [56.08889, 45], [76.55172, 29], [93.76471, 34], [113.11111, 27], [133.5, 38], [154.23077, 13], [173.31818, 22], [189.75, 16], [205.57143, 14], [219.28571, 7], [235.44444, 9], [268.5, 10], [292.33333, 3], [307, 2], [324.75, 8], [338.5, 2], [372.5, 2], [389.5, 2], [415, 1], [440, 1], [474.75, 4], [491.66667, 3], [510, 1], [542.66667, 3], [579, 1], [600, 1], [680, 1], [744, 1], [846, 1]], "exact_histogram": {"open_max": 3, "populations": [299, 10, 35, 34, 36, 27, 32, 24, 18, 22, 17, 10, 5, 7, 4, 3, 8, 1, 2, 2, 1, 0, 1, 2, 5, 1, 0, 3, 1, 0, 1], "start": 0, "width": 20}, "kurtosis": 7.14939, "maximum": 846, "mean": 82.72801, "median": 37, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 2.2975, "standard_deviation": 119.91638, "sum": 50795, "sum_squares": 13017071, "variance": 14379.93732}}, "000005": {"column_number": 5, "datatype": "double", "name": "bmi", "optype": "numeric", "order": 5, "preferred": true, "summary": {"bins": [[0, 9], [18.26667, 3], [19.5875, 8], [20.9375, 8], [22.13529, 17], [23.25333, 15], [24.14167, 24], [25.12059, 34], [26.32187, 32], [27.64706, 34], [28.8, 33], [30.17627, 59], [31.53077, 39], [32.85849, 53], [33.94118, 34], [34.89355, 31], [35.9129, 31], [37.34138, 29], [38.39048, 21], [39.45, 24], [40.60769, 13], [41.41429, 7], [42.90556, 18], [44.125, 8], [45.42727, 11], [46.4, 7], [48.225, 4], [49.85, 2], [52.8, 3], [55, 1], [57.3, 1], [59.4, 1]], "exact_histogram": {"populations": [9, 0, 0, 0, 0, 0, 0, 0, 0, 9, 17, 31, 59, 52, 60, 66, 81, 68, 42, 43, 21, 20, 17, 9, 3, 1, 3, 1, 1, 1], "start": 0, "width": 2}, "kurtosis": 3.13899, "maximum": 59.4, "mean": 31.9171, "median": 32, "minimum": 0, "missing_count": 0, "population": 614, "skewness": -0.52314, "standard_deviation": 7.80358, "sum": 19597.1, "sum_squares": 662811.83, "variance": 60.89594}}, "000006": {"column_number": 6, "datatype": "double", "name": "diabetes pedigree", "optype": "numeric", "order": 6, "preferred": true, "summary": {"bins": [[0.09254, 13], [0.14651, 61], [0.19706, 52], [0.25062, 86], [0.29458, 48], [0.34338, 42], [0.40032, 34], [0.44206, 33], [0.49592, 26], [0.54232, 31], [0.59219, 26], [0.64124, 21], [0.69418, 33], [0.744, 18], [0.81253, 15], [0.8645, 14], [0.93444, 16], [1.015, 5], [1.086, 2], [1.14533, 9], [1.2064, 5], [1.2702, 5], [1.33067, 3], [1.39375, 4], [1.45933, 3], [1.6, 1], [1.70933, 3], [1.781, 1], [2.137, 1], [2.288, 1], [2.329, 1], [2.42, 1]], "exact_histogram": {"populations": [9, 95, 140, 73, 68, 58, 52, 31, 28, 16, 6, 11, 8, 6, 4, 0, 3, 2, 0, 0, 0, 1, 1, 1, 1], "start": 0, "width": 0.1}, "kurtosis": 5.46252, "maximum": 2.42, "mean": 0.47944, "median": 0.3865, "minimum": 0.078, "missing_count": 0, "population": 614, "skewness": 1.91857, "standard_deviation": 0.34277, "sum": 294.378, "sum_squares": 213.15908, "variance": 0.11749}}, "000007": {"column_number": 7, "datatype": "int8", "name": "age", "optype": "numeric", "order": 7, "preferred": true, "summary": {"bins": [[21.56757, 111], [23, 29], [24.48611, 72], [26.5, 48], [28.4375, 48], [30.54545, 33], [32.59259, 27], [34.47368, 19], [36.48276, 29], [38.48, 25], [40.68, 25], [42.44, 25], [44.63158, 19], [46.27778, 18], [48.5, 8], [50.46667, 15], [52.41667, 12], [54.375, 8], [56.71429, 7], [58.375, 8], [60.28571, 7], [62, 3], [63, 4], [64, 1], [65, 2], [66, 3], [67, 3], [68, 1], [69, 1], [70, 1], [72, 1], [81, 1]], "exact_histogram": {"populations": [48, 92, 72, 48, 48, 33, 27, 19, 29, 25, 25, 25, 19, 18, 8, 15, 12, 8, 7, 8, 7, 7, 3, 6, 2, 1, 1, 0, 0, 0, 1], "start": 20, "width": 2}, "kurtosis": 0.42204, "maximum": 81, "mean": 33.66287, "median": 29, "minimum": 21, "missing_count": 0, "population": 614, "skewness": 1.05637, "standard_deviation": 12.0408, "sum": 20669, "sum_squares": 784651, "variance": 144.98077}}, "000008": {"column_number": 8, "datatype": "string", "name": "diabetes", "optype": "categorical", "order": 8, "preferred": true, "summary": {"categories": [["false", 394], ["true", 220]], "missing_count": 0}, "term_analysis": {"enabled": true}}}}, "ensemble_sample": {"rate": 1, "replacement": true, "seed": "d5f6867da5224b4793c0a4088697ef3d"}, "error_models": 0, "fields_meta": {"count": 9, "limit": 1000, "offset": 0, "query_total": 9, "total": 9}, "finished_models": 2, "focus_field": null, "focus_field_name": null, "importance": {"000000": 0.06955, "000001": 0.36342, "000002": 0.04377, "000003": 0.03686, "000004": 0.02531, "000005": 0.22865, "000006": 0.16557, "000007": 0.06686}, "input_fields": ["000000", "000001", "000002", "000003", "000004", "000005", "000006", "000007"], "locale": "en_US", "max_columns": 9, "max_rows": 614, "missing_splits": false, "models": ["model/62605ac123541b220100748a", "model/62605ac323541b220100748c"], "name": "diabetes", "name_options": "bootstrap decision forest, 512-node, 2-model, pruned, deterministic order", "node_threshold": 512, "number_of_batchpredictions": 0, "number_of_evaluations": 1, "number_of_models": 2, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "000008", "objective_field_details": {"column_number": 8, "datatype": "string", "name": "diabetes", "optype": "categorical", "order": 8}, "objective_field_name": "diabetes", "objective_field_type": "categorical", "objective_fields": ["000008"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": null, "randomize": false, "range": null, "replacement": false, "resource": "ensemble/62605abc0c11da5783002915", "rows": 614, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 20939, "source": "source/62605aa75198db5eed003416", "source_status": true, "split_candidates": 32, "split_field": null, "split_field_name": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 1193, "message": "The ensemble has been created", "progress": 1}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2022-04-20T19:11:09.173000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/tests/mlflow_ensemble/model_62605ac123541b220100748a b/bigml/tests/mlflow_ensemble/model_62605ac123541b220100748a new file mode 100644 index 00000000..818f9b86 --- /dev/null +++ b/bigml/tests/mlflow_ensemble/model_62605ac123541b220100748a @@ -0,0 +1 @@ +{"code": 200, "resource": "model/62605ac123541b220100748a", "location": "https://bigml.io/andromeda/model/62605ac123541b220100748a", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 9, "configuration": null, "configuration_status": false, "created": "2022-04-20T19:10:57.279000", "creator": "mmartin", "dataset": "dataset/62605ab1049fde5d990028f1", "dataset_field_types": {"categorical": 1, "datetime": 0, "image": 0, "items": 0, "numeric": 8, "path": 0, "preferred": 9, "regions": 0, "text": 0, "total": 9}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": true, "ensemble_id": "62605abc0c11da5783002915", "ensemble_index": 0, "excluded_fields": [], "fields_meta": {"count": 9, "limit": 1000, "offset": 0, "query_total": 9, "total": 9}, "focus_field": null, "input_fields": ["000000", "000001", "000002", "000003", "000004", "000005", "000006", "000007"], "locale": "en_US", "max_columns": 9, "max_rows": 614, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["false", 417], ["true", 197]]}, "training": {"categories": [["false", 416], ["true", 198]]}}, "fields": {"000000": {"column_number": 0, "datatype": "int8", "name": "pregnancies", "optype": "numeric", "order": 0, "preferred": true, "summary": {"counts": [[0, 86], [1, 110], [2, 80], [3, 56], [4, 58], [5, 45], [6, 40], [7, 37], [8, 35], [9, 22], [10, 16], [11, 9], [12, 9], [13, 8], [14, 1], [15, 1], [17, 1]], "exact_histogram": {"populations": [196, 136, 103, 77, 57, 25, 17, 2, 1], "start": 0, "width": 2}, "kurtosis": 0.14808, "maximum": 17, "mean": 3.89088, "median": 3, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 0.88555, "standard_deviation": 3.38254, "sum": 2389, "sum_squares": 16309, "variance": 11.44158}}, "000001": {"column_number": 1, "datatype": "int16", "name": "plasma glucose", "optype": "numeric", "order": 1, "preferred": true, "summary": {"bins": [[0, 5], [44, 1], [57, 2], [61.5, 2], [65, 1], [67.75, 4], [73.21429, 14], [79.52941, 17], [84.2381, 21], [88.74074, 27], [92, 19], [95.81818, 33], [100.86275, 51], [105.67647, 34], [109.38235, 34], [113.77143, 35], [118.775, 40], [124.14634, 41], [128.93548, 31], [133.46154, 13], [137.32, 25], [141.95652, 23], [146.4, 20], [152.86957, 23], [158, 14], [162.41667, 12], [166.66667, 15], [172.92857, 14], [180.77778, 18], [187.90909, 11], [193, 4], [196.7, 10]], "exact_histogram": {"populations": [5, 0, 0, 0, 1, 2, 7, 21, 48, 74, 93, 75, 78, 46, 43, 36, 28, 19, 23, 15], "start": 0, "width": 10}, "kurtosis": 0.68455, "maximum": 199, "mean": 121.11401, "median": 117, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 0.08447, "standard_deviation": 32.75167, "sum": 74364, "sum_squares": 9664070, "variance": 1072.67214}}, "000002": {"column_number": 2, "datatype": "int8", "name": "blood pressure", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[0, 28], [30, 2], [39, 2], [44.66667, 6], [49.625, 16], [52, 9], [55.11111, 18], [58, 19], [60, 29], [62, 23], [64.87097, 62], [68, 38], [70, 43], [72, 37], [74.84524, 84], [78, 34], [80, 34], [82, 24], [84.85366, 41], [88, 17], [90, 19], [92, 4], [95, 9], [98, 3], [100, 2], [102, 1], [104, 1], [106, 3], [108, 2], [110, 2], [114, 1], [122, 1]], "exact_histogram": {"populations": [28, 0, 0, 0, 0, 0, 2, 1, 5, 5, 29, 30, 84, 68, 125, 73, 79, 37, 27, 8, 4, 5, 3, 0, 1], "start": 0, "width": 5}, "kurtosis": 5.12315, "maximum": 122, "mean": 69.10912, "median": 72, "minimum": 0, "missing_count": 0, "population": 614, "skewness": -1.82589, "standard_deviation": 19.37631, "sum": 42433, "sum_squares": 3162653, "variance": 375.44158}}, "000003": {"column_number": 3, "datatype": "int8", "name": "triceps skin thickness", "optype": "numeric", "order": 3, "preferred": true, "summary": {"bins": [[0, 189], [7.33333, 3], [10.57143, 7], [12, 4], [13.375, 16], [15.26667, 15], [17, 13], [18.46429, 28], [20.41176, 17], [22.48387, 31], [24.4, 20], [26.5625, 32], [28.46429, 28], [30.4375, 32], [32.38636, 44], [34.68421, 19], [36.6087, 23], [38.68421, 19], [40.46429, 28], [42.38462, 13], [44, 4], [45, 6], [46, 5], [47, 4], [48, 4], [49, 2], [50, 2], [51, 1], [52, 2], [56, 1], [63, 1], [99, 1]], "exact_histogram": {"open_max": 3, "populations": [189, 0, 0, 2, 1, 7, 14, 17, 17, 28, 17, 31, 20, 32, 28, 32, 44, 19, 23, 19, 28, 13, 10, 9, 6, 3, 2], "start": 0, "width": 2}, "kurtosis": -0.44344, "maximum": 99, "mean": 20.27687, "median": 22, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 0.14897, "standard_deviation": 16.11049, "sum": 12450, "sum_squares": 411550, "variance": 259.54801}}, "000004": {"column_number": 4, "datatype": "int16", "name": "insulin", "optype": "numeric", "order": 4, "preferred": true, "summary": {"bins": [[0, 295], [19.57143, 7], [38.36364, 11], [56.08889, 45], [76.55172, 29], [93.76471, 34], [113.11111, 27], [133.5, 38], [154.23077, 13], [173.31818, 22], [189.75, 16], [205.57143, 14], [219.28571, 7], [235.44444, 9], [268.5, 10], [292.33333, 3], [307, 2], [324.75, 8], [338.5, 2], [372.5, 2], [389.5, 2], [415, 1], [440, 1], [474.75, 4], [491.66667, 3], [510, 1], [542.66667, 3], [579, 1], [600, 1], [680, 1], [744, 1], [846, 1]], "exact_histogram": {"open_max": 3, "populations": [299, 10, 35, 34, 36, 27, 32, 24, 18, 22, 17, 10, 5, 7, 4, 3, 8, 1, 2, 2, 1, 0, 1, 2, 5, 1, 0, 3, 1, 0, 1], "start": 0, "width": 20}, "kurtosis": 7.14939, "maximum": 846, "mean": 82.72801, "median": 37, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 2.2975, "standard_deviation": 119.91638, "sum": 50795, "sum_squares": 13017071, "variance": 14379.93732}}, "000005": {"column_number": 5, "datatype": "double", "name": "bmi", "optype": "numeric", "order": 5, "preferred": true, "summary": {"bins": [[0, 9], [18.26667, 3], [19.5875, 8], [20.9375, 8], [22.13529, 17], [23.25333, 15], [24.14167, 24], [25.12059, 34], [26.32187, 32], [27.64706, 34], [28.8, 33], [30.17627, 59], [31.53077, 39], [32.85849, 53], [33.94118, 34], [34.89355, 31], [35.9129, 31], [37.34138, 29], [38.39048, 21], [39.45, 24], [40.60769, 13], [41.41429, 7], [42.90556, 18], [44.125, 8], [45.42727, 11], [46.4, 7], [48.225, 4], [49.85, 2], [52.8, 3], [55, 1], [57.3, 1], [59.4, 1]], "exact_histogram": {"populations": [9, 0, 0, 0, 0, 0, 0, 0, 0, 9, 17, 31, 59, 52, 60, 66, 81, 68, 42, 43, 21, 20, 17, 9, 3, 1, 3, 1, 1, 1], "start": 0, "width": 2}, "kurtosis": 3.13899, "maximum": 59.4, "mean": 31.9171, "median": 32, "minimum": 0, "missing_count": 0, "population": 614, "skewness": -0.52314, "standard_deviation": 7.80358, "sum": 19597.1, "sum_squares": 662811.83, "variance": 60.89594}}, "000006": {"column_number": 6, "datatype": "double", "name": "diabetes pedigree", "optype": "numeric", "order": 6, "preferred": true, "summary": {"bins": [[0.09254, 13], [0.14651, 61], [0.19706, 52], [0.25062, 86], [0.29458, 48], [0.34338, 42], [0.40032, 34], [0.44206, 33], [0.49592, 26], [0.54232, 31], [0.59219, 26], [0.64124, 21], [0.69418, 33], [0.744, 18], [0.81253, 15], [0.8645, 14], [0.93444, 16], [1.015, 5], [1.086, 2], [1.14533, 9], [1.2064, 5], [1.2702, 5], [1.33067, 3], [1.39375, 4], [1.45933, 3], [1.6, 1], [1.70933, 3], [1.781, 1], [2.137, 1], [2.288, 1], [2.329, 1], [2.42, 1]], "exact_histogram": {"populations": [9, 95, 140, 73, 68, 58, 52, 31, 28, 16, 6, 11, 8, 6, 4, 0, 3, 2, 0, 0, 0, 1, 1, 1, 1], "start": 0, "width": 0.1}, "kurtosis": 5.46252, "maximum": 2.42, "mean": 0.47944, "median": 0.3865, "minimum": 0.078, "missing_count": 0, "population": 614, "skewness": 1.91857, "standard_deviation": 0.34277, "sum": 294.378, "sum_squares": 213.15908, "variance": 0.11749}}, "000007": {"column_number": 7, "datatype": "int8", "name": "age", "optype": "numeric", "order": 7, "preferred": true, "summary": {"bins": [[21.56757, 111], [23, 29], [24.48611, 72], [26.5, 48], [28.4375, 48], [30.54545, 33], [32.59259, 27], [34.47368, 19], [36.48276, 29], [38.48, 25], [40.68, 25], [42.44, 25], [44.63158, 19], [46.27778, 18], [48.5, 8], [50.46667, 15], [52.41667, 12], [54.375, 8], [56.71429, 7], [58.375, 8], [60.28571, 7], [62, 3], [63, 4], [64, 1], [65, 2], [66, 3], [67, 3], [68, 1], [69, 1], [70, 1], [72, 1], [81, 1]], "exact_histogram": {"populations": [48, 92, 72, 48, 48, 33, 27, 19, 29, 25, 25, 25, 19, 18, 8, 15, 12, 8, 7, 8, 7, 7, 3, 6, 2, 1, 1, 0, 0, 0, 1], "start": 20, "width": 2}, "kurtosis": 0.42204, "maximum": 81, "mean": 33.66287, "median": 29, "minimum": 21, "missing_count": 0, "population": 614, "skewness": 1.05637, "standard_deviation": 12.0408, "sum": 20669, "sum_squares": 784651, "variance": 144.98077}}, "000008": {"column_number": 8, "datatype": "string", "name": "diabetes", "optype": "categorical", "order": 8, "preferred": true, "summary": {"categories": [["false", 394], ["true", 220]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000001", 0.39328], ["000005", 0.19841], ["000006", 0.16783], ["000007", 0.08845], ["000003", 0.04986], ["000002", 0.0447], ["000000", 0.0369], ["000004", 0.02058]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000000": {"column_number": 0, "datatype": "int8", "name": "pregnancies", "optype": "numeric", "preferred": true}, "000001": {"column_number": 1, "datatype": "int16", "name": "plasma glucose", "optype": "numeric", "preferred": true}, "000002": {"column_number": 2, "datatype": "int8", "name": "blood pressure", "optype": "numeric", "preferred": true}, "000003": {"column_number": 3, "datatype": "int8", "name": "triceps skin thickness", "optype": "numeric", "preferred": true}, "000004": {"column_number": 4, "datatype": "int16", "name": "insulin", "optype": "numeric", "preferred": true}, "000005": {"column_number": 5, "datatype": "double", "name": "bmi", "optype": "numeric", "preferred": true}, "000006": {"column_number": 6, "datatype": "double", "name": "diabetes pedigree", "optype": "numeric", "preferred": true}, "000007": {"column_number": 7, "datatype": "int8", "name": "age", "optype": "numeric", "preferred": true}, "000008": {"column_number": 8, "datatype": "string", "name": "diabetes", "optype": "categorical", "preferred": true, "term_analysis": {"enabled": true}}}, "node_threshold": 512, "root": {"children": [{"children": [{"children": [{"children": [{"children": [{"confidence": 0.20654, "count": 1, "id": 5, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 39.8}}, {"confidence": 0.34237, "count": 2, "id": 6, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 39.8}}], "confidence": 0.20765, "count": 3, "id": 4, "objective_summary": {"categories": [["false", 2], ["true", 1]]}, "output": "false", "predicate": {"field": "000002", "operator": ">", "value": 93}}, {"children": [{"confidence": 0.20654, "count": 1, "id": 8, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000004", "operator": ">", "value": 643}}, {"children": [{"children": [{"confidence": 0.83887, "count": 20, "id": 11, "objective_summary": {"categories": [["true", 20]]}, "output": "true", "predicate": {"field": "000000", "operator": ">", "value": 3}}, {"children": [{"children": [{"confidence": 0.20654, "count": 1, "id": 14, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 41.65}}, {"confidence": 0.56551, "count": 5, "id": 15, "objective_summary": {"categories": [["true", 5]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 41.65}}], "confidence": 0.43649, "count": 6, "id": 13, "objective_summary": {"categories": [["true", 5], ["false", 1]]}, "output": "true", "predicate": {"field": "000007", "operator": ">", "value": 36}}, {"confidence": 0.34237, "count": 2, "id": 16, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000007", "operator": "<=", "value": 36}}], "confidence": 0.30574, "count": 8, "id": 12, "objective_summary": {"categories": [["true", 5], ["false", 3]]}, "output": "true", "predicate": {"field": "000000", "operator": "<=", "value": 3}}], "confidence": 0.72804, "count": 28, "id": 10, "objective_summary": {"categories": [["true", 25], ["false", 3]]}, "output": "true", "predicate": {"field": "000007", "operator": ">", "value": 33}}, {"confidence": 0.8668, "count": 25, "id": 17, "objective_summary": {"categories": [["true", 25]]}, "output": "true", "predicate": {"field": "000007", "operator": "<=", "value": 33}}], "confidence": 0.8463, "count": 53, "id": 9, "objective_summary": {"categories": [["true", 50], ["false", 3]]}, "output": "true", "predicate": {"field": "000004", "operator": "<=", "value": 643}}], "confidence": 0.82446, "count": 54, "id": 7, "objective_summary": {"categories": [["true", 50], ["false", 4]]}, "output": "true", "predicate": {"field": "000002", "operator": "<=", "value": 93}}], "confidence": 0.78877, "count": 57, "id": 3, "objective_summary": {"categories": [["true", 51], ["false", 6]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 27.25}}, {"children": [{"confidence": 0.34237, "count": 2, "id": 19, "objective_summary": {"categories": [["true", 2]]}, "output": "true", "predicate": {"field": "000001", "operator": ">", "value": 194}}, {"confidence": 0.5101, "count": 4, "id": 20, "objective_summary": {"categories": [["false", 4]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 194}}], "confidence": 0.29999, "count": 6, "id": 18, "objective_summary": {"categories": [["false", 4], ["true", 2]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 27.25}}], "confidence": 0.73188, "count": 63, "id": 2, "objective_summary": {"categories": [["true", 53], ["false", 10]]}, "output": "true", "predicate": {"field": "000001", "operator": ">", "value": 167}}, {"children": [{"children": [{"confidence": 0.34237, "count": 2, "id": 23, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000001", "operator": ">", "value": 163}}, {"confidence": 0.83182, "count": 19, "id": 24, "objective_summary": {"categories": [["true", 19]]}, "output": "true", "predicate": {"field": "000001", "operator": "<=", "value": 163}}], "confidence": 0.71085, "count": 21, "id": 22, "objective_summary": {"categories": [["true", 19], ["false", 2]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 41.85}}, {"children": [{"children": [{"confidence": 0.34237, "count": 2, "id": 27, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 1.4955}}, {"confidence": 0.7575, "count": 12, "id": 28, "objective_summary": {"categories": [["true", 12]]}, "output": "true", "predicate": {"field": "000006", "operator": "<=", "value": 1.4955}}], "confidence": 0.60058, "count": 14, "id": 26, "objective_summary": {"categories": [["true", 12], ["false", 2]]}, "output": "true", "predicate": {"field": "000006", "operator": ">", "value": 0.92725}}, {"children": [{"children": [{"children": [{"confidence": 0.70085, "count": 9, "id": 32, "objective_summary": {"categories": [["true", 9]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 29.8}}, {"children": [{"confidence": 0.34237, "count": 2, "id": 34, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 28.05}}, {"confidence": 0.20654, "count": 1, "id": 35, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 28.05}}], "confidence": 0.20765, "count": 3, "id": 33, "objective_summary": {"categories": [["false", 2], ["true", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 29.8}}], "confidence": 0.55196, "count": 12, "id": 31, "objective_summary": {"categories": [["true", 10], ["false", 2]]}, "output": "true", "predicate": {"field": "000006", "operator": ">", "value": 0.727}}, {"children": [{"children": [{"children": [{"children": [{"children": [{"children": [{"confidence": 0.20654, "count": 1, "id": 42, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 36.15}}, {"confidence": 0.56551, "count": 5, "id": 43, "objective_summary": {"categories": [["false", 5]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 36.15}}], "confidence": 0.43649, "count": 6, "id": 41, "objective_summary": {"categories": [["false", 5], ["true", 1]]}, "output": "false", "predicate": {"field": "000001", "operator": ">", "value": 149}}, {"children": [{"confidence": 0.64566, "count": 7, "id": 45, "objective_summary": {"categories": [["true", 7]]}, "output": "true", "predicate": {"field": "000001", "operator": ">", "value": 131}}, {"confidence": 0.20654, "count": 1, "id": 46, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 131}}], "confidence": 0.52911, "count": 8, "id": 44, "objective_summary": {"categories": [["true", 7], ["false", 1]]}, "output": "true", "predicate": {"field": "000001", "operator": "<=", "value": 149}}], "confidence": 0.3259, "count": 14, "id": 40, "objective_summary": {"categories": [["true", 8], ["false", 6]]}, "output": "true", "predicate": {"field": "000007", "operator": ">", "value": 34}}, {"children": [{"confidence": 0.20654, "count": 1, "id": 48, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000001", "operator": ">", "value": 157}}, {"confidence": 0.74116, "count": 11, "id": 49, "objective_summary": {"categories": [["false", 11]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 157}}], "confidence": 0.64611, "count": 12, "id": 47, "objective_summary": {"categories": [["false", 11], ["true", 1]]}, "output": "false", "predicate": {"field": "000007", "operator": "<=", "value": 34}}], "confidence": 0.4622, "count": 26, "id": 39, "objective_summary": {"categories": [["false", 17], ["true", 9]]}, "output": "false", "predicate": {"field": "000002", "operator": ">", "value": 60}}, {"confidence": 0.43849, "count": 3, "id": 50, "objective_summary": {"categories": [["true", 3]]}, "output": "true", "predicate": {"field": "000002", "operator": "<=", "value": 60}}], "confidence": 0.40738, "count": 29, "id": 38, "objective_summary": {"categories": [["false", 17], ["true", 12]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 31.25}}, {"confidence": 0.80639, "count": 16, "id": 51, "objective_summary": {"categories": [["false", 16]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 31.25}}], "confidence": 0.58961, "count": 45, "id": 37, "objective_summary": {"categories": [["false", 33], ["true", 12]]}, "output": "false", "predicate": {"field": "000003", "operator": ">", "value": 20}}, {"children": [{"children": [{"confidence": 0.34237, "count": 2, "id": 54, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000000", "operator": ">", "value": 9}}, {"children": [{"children": [{"confidence": 0.60966, "count": 6, "id": 57, "objective_summary": {"categories": [["true", 6]]}, "output": "true", "predicate": {"field": "000003", "operator": ">", "value": 8}}, {"confidence": 0.34237, "count": 2, "id": 58, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000003", "operator": "<=", "value": 8}}], "confidence": 0.40927, "count": 8, "id": 56, "objective_summary": {"categories": [["true", 6], ["false", 2]]}, "output": "true", "predicate": {"field": "000006", "operator": ">", "value": 0.286}}, {"confidence": 0.7719, "count": 13, "id": 59, "objective_summary": {"categories": [["true", 13]]}, "output": "true", "predicate": {"field": "000006", "operator": "<=", "value": 0.286}}], "confidence": 0.71085, "count": 21, "id": 55, "objective_summary": {"categories": [["true", 19], ["false", 2]]}, "output": "true", "predicate": {"field": "000000", "operator": "<=", "value": 9}}], "confidence": 0.62862, "count": 23, "id": 53, "objective_summary": {"categories": [["true", 19], ["false", 4]]}, "output": "true", "predicate": {"field": "000001", "operator": ">", "value": 142}}, {"children": [{"confidence": 0.70085, "count": 9, "id": 61, "objective_summary": {"categories": [["false", 9]]}, "output": "false", "predicate": {"field": "000001", "operator": ">", "value": 135}}, {"children": [{"confidence": 0.56551, "count": 5, "id": 63, "objective_summary": {"categories": [["false", 5]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 34.4}}, {"children": [{"confidence": 0.34237, "count": 2, "id": 65, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000003", "operator": ">", "value": 6}}, {"children": [{"children": [{"confidence": 0.20654, "count": 1, "id": 68, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 22.45}}, {"confidence": 0.20654, "count": 1, "id": 69, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 22.45}}], "confidence": 0.09453, "count": 2, "id": 67, "objective_summary": {"categories": [["false", 1], ["true", 1]]}, "output": "false", "predicate": {"field": "000007", "operator": ">", "value": 54}}, {"confidence": 0.67558, "count": 8, "id": 70, "objective_summary": {"categories": [["true", 8]]}, "output": "true", "predicate": {"field": "000007", "operator": "<=", "value": 54}}], "confidence": 0.59584, "count": 10, "id": 66, "objective_summary": {"categories": [["true", 9], ["false", 1]]}, "output": "true", "predicate": {"field": "000003", "operator": "<=", "value": 6}}], "confidence": 0.46769, "count": 12, "id": 64, "objective_summary": {"categories": [["true", 9], ["false", 3]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 34.4}}], "confidence": 0.30963, "count": 17, "id": 62, "objective_summary": {"categories": [["true", 9], ["false", 8]]}, "output": "true", "predicate": {"field": "000001", "operator": "<=", "value": 135}}], "confidence": 0.4622, "count": 26, "id": 60, "objective_summary": {"categories": [["false", 17], ["true", 9]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 142}}], "confidence": 0.4327, "count": 49, "id": 52, "objective_summary": {"categories": [["true", 28], ["false", 21]]}, "output": "true", "predicate": {"field": "000003", "operator": "<=", "value": 20}}], "confidence": 0.47353, "count": 94, "id": 36, "objective_summary": {"categories": [["false", 54], ["true", 40]]}, "output": "false", "predicate": {"field": "000006", "operator": "<=", "value": 0.727}}], "confidence": 0.43395, "count": 106, "id": 30, "objective_summary": {"categories": [["false", 56], ["true", 50]]}, "output": "false", "predicate": {"field": "000007", "operator": ">", "value": 24}}, {"children": [{"confidence": 0.79611, "count": 15, "id": 72, "objective_summary": {"categories": [["false", 15]]}, "output": "false", "predicate": {"field": "000001", "operator": ">", "value": 135}}, {"children": [{"confidence": 0.34237, "count": 2, "id": 74, "objective_summary": {"categories": [["true", 2]]}, "output": "true", "predicate": {"field": "000003", "operator": ">", "value": 38}}, {"children": [{"confidence": 0.20654, "count": 1, "id": 76, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000001", "operator": ">", "value": 132}}, {"confidence": 0.70085, "count": 9, "id": 77, "objective_summary": {"categories": [["false", 9]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 132}}], "confidence": 0.59584, "count": 10, "id": 75, "objective_summary": {"categories": [["false", 9], ["true", 1]]}, "output": "false", "predicate": {"field": "000003", "operator": "<=", "value": 38}}], "confidence": 0.46769, "count": 12, "id": 73, "objective_summary": {"categories": [["false", 9], ["true", 3]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 135}}], "confidence": 0.71942, "count": 27, "id": 71, "objective_summary": {"categories": [["false", 24], ["true", 3]]}, "output": "false", "predicate": {"field": "000007", "operator": "<=", "value": 24}}], "confidence": 0.51657, "count": 133, "id": 29, "objective_summary": {"categories": [["false", 80], ["true", 53]]}, "output": "false", "predicate": {"field": "000006", "operator": "<=", "value": 0.92725}}], "confidence": 0.47708, "count": 147, "id": 25, "objective_summary": {"categories": [["false", 82], ["true", 65]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 41.85}}], "confidence": 0.42524, "count": 168, "id": 21, "objective_summary": {"categories": [["false", 84], ["true", 84]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 167}}], "confidence": 0.5287, "count": 231, "id": 1, "objective_summary": {"categories": [["true", 137], ["false", 94]]}, "output": "true", "predicate": {"field": "000001", "operator": ">", "value": 124}}, {"children": [{"children": [{"children": [{"confidence": 0.79611, "count": 15, "id": 81, "objective_summary": {"categories": [["false", 15]]}, "output": "false", "predicate": {"field": "000001", "operator": ">", "value": 119}}, {"children": [{"children": [{"confidence": 0.5101, "count": 4, "id": 84, "objective_summary": {"categories": [["false", 4]]}, "output": "false", "predicate": {"field": "000007", "operator": ">", "value": 38}}, {"children": [{"confidence": 0.20654, "count": 1, "id": 86, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000002", "operator": ">", "value": 87}}, {"children": [{"confidence": 0.7719, "count": 13, "id": 88, "objective_summary": {"categories": [["true", 13]]}, "output": "true", "predicate": {"field": "000004", "operator": ">", "value": 94}}, {"children": [{"confidence": 0.60966, "count": 6, "id": 90, "objective_summary": {"categories": [["true", 6]]}, "output": "true", "predicate": {"field": "000002", "operator": ">", "value": 67}}, {"children": [{"confidence": 0.34237, "count": 2, "id": 92, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 0.55}}, {"confidence": 0.20654, "count": 1, "id": 93, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000006", "operator": "<=", "value": 0.55}}], "confidence": 0.20765, "count": 3, "id": 91, "objective_summary": {"categories": [["false", 2], ["true", 1]]}, "output": "false", "predicate": {"field": "000002", "operator": "<=", "value": 67}}], "confidence": 0.45258, "count": 9, "id": 89, "objective_summary": {"categories": [["true", 7], ["false", 2]]}, "output": "true", "predicate": {"field": "000004", "operator": "<=", "value": 94}}], "confidence": 0.72185, "count": 22, "id": 87, "objective_summary": {"categories": [["true", 20], ["false", 2]]}, "output": "true", "predicate": {"field": "000002", "operator": "<=", "value": 87}}], "confidence": 0.67872, "count": 23, "id": 85, "objective_summary": {"categories": [["true", 20], ["false", 3]]}, "output": "true", "predicate": {"field": "000007", "operator": "<=", "value": 38}}], "confidence": 0.55321, "count": 27, "id": 83, "objective_summary": {"categories": [["true", 20], ["false", 7]]}, "output": "true", "predicate": {"field": "000001", "operator": ">", "value": 107}}, {"children": [{"children": [{"children": [{"confidence": 0.72246, "count": 10, "id": 97, "objective_summary": {"categories": [["true", 10]]}, "output": "true", "predicate": {"field": "000004", "operator": ">", "value": 68}}, {"children": [{"confidence": 0.5101, "count": 4, "id": 99, "objective_summary": {"categories": [["true", 4]]}, "output": "true", "predicate": {"field": "000002", "operator": ">", "value": 75}}, {"confidence": 0.56551, "count": 5, "id": 100, "objective_summary": {"categories": [["false", 5]]}, "output": "false", "predicate": {"field": "000002", "operator": "<=", "value": 75}}], "confidence": 0.26665, "count": 9, "id": 98, "objective_summary": {"categories": [["false", 5], ["true", 4]]}, "output": "false", "predicate": {"field": "000004", "operator": "<=", "value": 68}}], "confidence": 0.51208, "count": 19, "id": 96, "objective_summary": {"categories": [["true", 14], ["false", 5]]}, "output": "true", "predicate": {"field": "000000", "operator": ">", "value": 0}}, {"confidence": 0.56551, "count": 5, "id": 101, "objective_summary": {"categories": [["false", 5]]}, "output": "false", "predicate": {"field": "000000", "operator": "<=", "value": 0}}], "confidence": 0.38834, "count": 24, "id": 95, "objective_summary": {"categories": [["true", 14], ["false", 10]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 32.7}}, {"children": [{"children": [{"children": [{"confidence": 0.20654, "count": 1, "id": 105, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000002", "operator": ">", "value": 69}}, {"confidence": 0.34237, "count": 2, "id": 106, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000002", "operator": "<=", "value": 69}}], "confidence": 0.20765, "count": 3, "id": 104, "objective_summary": {"categories": [["false", 2], ["true", 1]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 1.0865}}, {"confidence": 0.87544, "count": 27, "id": 107, "objective_summary": {"categories": [["false", 27]]}, "output": "false", "predicate": {"field": "000006", "operator": "<=", "value": 1.0865}}], "confidence": 0.83329, "count": 30, "id": 103, "objective_summary": {"categories": [["false", 29], ["true", 1]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 0.486}}, {"confidence": 0.34237, "count": 2, "id": 108, "objective_summary": {"categories": [["true", 2]]}, "output": "true", "predicate": {"field": "000006", "operator": "<=", "value": 0.486}}], "confidence": 0.75782, "count": 32, "id": 102, "objective_summary": {"categories": [["false", 29], ["true", 3]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 32.7}}], "confidence": 0.56664, "count": 56, "id": 94, "objective_summary": {"categories": [["false", 39], ["true", 17]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 107}}], "confidence": 0.44725, "count": 83, "id": 82, "objective_summary": {"categories": [["false", 46], ["true", 37]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 119}}], "confidence": 0.52356, "count": 98, "id": 80, "objective_summary": {"categories": [["false", 61], ["true", 37]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 0.47355}}, {"children": [{"children": [{"children": [{"confidence": 0.20654, "count": 1, "id": 112, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000000", "operator": ">", "value": 9}}, {"confidence": 0.70085, "count": 9, "id": 113, "objective_summary": {"categories": [["true", 9]]}, "output": "true", "predicate": {"field": "000000", "operator": "<=", "value": 9}}], "confidence": 0.59584, "count": 10, "id": 111, "objective_summary": {"categories": [["true", 9], ["false", 1]]}, "output": "true", "predicate": {"field": "000007", "operator": ">", "value": 34}}, {"children": [{"children": [{"confidence": 0.20654, "count": 1, "id": 116, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 47.45}}, {"children": [{"confidence": 0.83887, "count": 20, "id": 118, "objective_summary": {"categories": [["false", 20]]}, "output": "false", "predicate": {"field": "000001", "operator": ">", "value": 113}}, {"confidence": 0.37553, "count": 5, "id": 119, "objective_summary": {"categories": [["false", 4], ["true", 1]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 113}}], "confidence": 0.80456, "count": 25, "id": 117, "objective_summary": {"categories": [["false", 24], ["true", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 47.45}}], "confidence": 0.75858, "count": 26, "id": 115, "objective_summary": {"categories": [["false", 24], ["true", 2]]}, "output": "false", "predicate": {"field": "000003", "operator": ">", "value": 9}}, {"children": [{"confidence": 0.60966, "count": 6, "id": 121, "objective_summary": {"categories": [["false", 6]]}, "output": "false", "predicate": {"field": "000007", "operator": ">", "value": 28}}, {"children": [{"confidence": 0.60966, "count": 6, "id": 123, "objective_summary": {"categories": [["true", 6]]}, "output": "true", "predicate": {"field": "000006", "operator": ">", "value": 0.1335}}, {"confidence": 0.20654, "count": 1, "id": 124, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000006", "operator": "<=", "value": 0.1335}}], "confidence": 0.48687, "count": 7, "id": 122, "objective_summary": {"categories": [["true", 6], ["false", 1]]}, "output": "true", "predicate": {"field": "000007", "operator": "<=", "value": 28}}], "confidence": 0.29143, "count": 13, "id": 120, "objective_summary": {"categories": [["false", 7], ["true", 6]]}, "output": "false", "predicate": {"field": "000003", "operator": "<=", "value": 9}}], "confidence": 0.64466, "count": 39, "id": 114, "objective_summary": {"categories": [["false", 31], ["true", 8]]}, "output": "false", "predicate": {"field": "000007", "operator": "<=", "value": 34}}], "confidence": 0.51311, "count": 49, "id": 110, "objective_summary": {"categories": [["false", 32], ["true", 17]]}, "output": "false", "predicate": {"field": "000001", "operator": ">", "value": 110}}, {"children": [{"children": [{"confidence": 0.34237, "count": 2, "id": 127, "objective_summary": {"categories": [["true", 2]]}, "output": "true", "predicate": {"field": "000000", "operator": ">", "value": 11}}, {"children": [{"children": [{"confidence": 0.85688, "count": 23, "id": 130, "objective_summary": {"categories": [["false", 23]]}, "output": "false", "predicate": {"field": "000004", "operator": ">", "value": 9}}, {"children": [{"confidence": 0.20654, "count": 1, "id": 132, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 40.1}}, {"children": [{"confidence": 0.79611, "count": 15, "id": 134, "objective_summary": {"categories": [["false", 15]]}, "output": "false", "predicate": {"field": "000002", "operator": ">", "value": 77}}, {"children": [{"children": [{"confidence": 0.34237, "count": 2, "id": 137, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000003", "operator": ">", "value": 28}}, {"confidence": 0.34237, "count": 2, "id": 138, "objective_summary": {"categories": [["true", 2]]}, "output": "true", "predicate": {"field": "000003", "operator": "<=", "value": 28}}], "confidence": 0.15004, "count": 4, "id": 136, "objective_summary": {"categories": [["false", 2], ["true", 2]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 0.2455}}, {"confidence": 0.60966, "count": 6, "id": 139, "objective_summary": {"categories": [["false", 6]]}, "output": "false", "predicate": {"field": "000006", "operator": "<=", "value": 0.2455}}], "confidence": 0.49016, "count": 10, "id": 135, "objective_summary": {"categories": [["false", 8], ["true", 2]]}, "output": "false", "predicate": {"field": "000002", "operator": "<=", "value": 77}}], "confidence": 0.75033, "count": 25, "id": 133, "objective_summary": {"categories": [["false", 23], ["true", 2]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 40.1}}], "confidence": 0.71024, "count": 26, "id": 131, "objective_summary": {"categories": [["false", 23], ["true", 3]]}, "output": "false", "predicate": {"field": "000004", "operator": "<=", "value": 9}}], "confidence": 0.83479, "count": 49, "id": 129, "objective_summary": {"categories": [["false", 46], ["true", 3]]}, "output": "false", "predicate": {"field": "000000", "operator": ">", "value": 0}}, {"children": [{"confidence": 0.43849, "count": 3, "id": 141, "objective_summary": {"categories": [["false", 3]]}, "output": "false", "predicate": {"field": "000007", "operator": ">", "value": 25}}, {"confidence": 0.34237, "count": 2, "id": 142, "objective_summary": {"categories": [["true", 2]]}, "output": "true", "predicate": {"field": "000007", "operator": "<=", "value": 25}}], "confidence": 0.23072, "count": 5, "id": 140, "objective_summary": {"categories": [["false", 3], ["true", 2]]}, "output": "false", "predicate": {"field": "000000", "operator": "<=", "value": 0}}], "confidence": 0.8009, "count": 54, "id": 128, "objective_summary": {"categories": [["false", 49], ["true", 5]]}, "output": "false", "predicate": {"field": "000000", "operator": "<=", "value": 11}}], "confidence": 0.76374, "count": 56, "id": 126, "objective_summary": {"categories": [["false", 49], ["true", 7]]}, "output": "false", "predicate": {"field": "000002", "operator": ">", "value": 71}}, {"confidence": 0.93686, "count": 57, "id": 143, "objective_summary": {"categories": [["false", 57]]}, "output": "false", "predicate": {"field": "000002", "operator": "<=", "value": 71}}], "confidence": 0.87763, "count": 113, "id": 125, "objective_summary": {"categories": [["false", 106], ["true", 7]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 110}}], "confidence": 0.78902, "count": 162, "id": 109, "objective_summary": {"categories": [["false", 138], ["true", 24]]}, "output": "false", "predicate": {"field": "000006", "operator": "<=", "value": 0.47355}}], "confidence": 0.71024, "count": 260, "id": 79, "objective_summary": {"categories": [["false", 199], ["true", 61]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 26.86384}}, {"confidence": 0.96971, "count": 123, "id": 144, "objective_summary": {"categories": [["false", 123]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 26.86384}}], "confidence": 0.80073, "count": 383, "id": 78, "objective_summary": {"categories": [["false", 322], ["true", 61]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 124}}], "confidence": 0.63955, "count": 614, "id": 0, "objective_summary": {"categories": [["false", 416], ["true", 198]]}, "output": "false", "predicate": true}}, "name": "diabetes - 0", "name_options": "512-node, pruned, deterministic order", "node_threshold": 512, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "000008", "objective_field_name": "diabetes", "objective_field_type": "categorical", "objective_fields": ["000008"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": null, "randomize": false, "range": null, "replacement": false, "resource": "model/62605ac123541b220100748a", "rows": 614, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 20939, "source": "source/62605aa75198db5eed003416", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 0, "message": "The model has been created", "progress": 0.0}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2022-04-20T19:11:30.564000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/tests/mlflow_ensemble/model_62605ac323541b220100748c b/bigml/tests/mlflow_ensemble/model_62605ac323541b220100748c new file mode 100644 index 00000000..12c57cfa --- /dev/null +++ b/bigml/tests/mlflow_ensemble/model_62605ac323541b220100748c @@ -0,0 +1 @@ +{"code": 200, "resource": "model/62605ac323541b220100748c", "location": "https://bigml.io/andromeda/model/62605ac323541b220100748c", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 9, "configuration": null, "configuration_status": false, "created": "2022-04-20T19:10:59.462000", "creator": "mmartin", "dataset": "dataset/62605ab1049fde5d990028f1", "dataset_field_types": {"categorical": 1, "datetime": 0, "image": 0, "items": 0, "numeric": 8, "path": 0, "preferred": 9, "regions": 0, "text": 0, "total": 9}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": true, "ensemble_id": "62605abc0c11da5783002915", "ensemble_index": 1, "excluded_fields": [], "fields_meta": {"count": 9, "limit": 1000, "offset": 0, "query_total": 9, "total": 9}, "focus_field": null, "input_fields": ["000000", "000001", "000002", "000003", "000004", "000005", "000006", "000007"], "locale": "en_US", "max_columns": 9, "max_rows": 614, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["false", 410], ["true", 204]]}, "training": {"categories": [["false", 410], ["true", 204]]}}, "fields": {"000000": {"column_number": 0, "datatype": "int8", "name": "pregnancies", "optype": "numeric", "order": 0, "preferred": true, "summary": {"counts": [[0, 86], [1, 110], [2, 80], [3, 56], [4, 58], [5, 45], [6, 40], [7, 37], [8, 35], [9, 22], [10, 16], [11, 9], [12, 9], [13, 8], [14, 1], [15, 1], [17, 1]], "exact_histogram": {"populations": [196, 136, 103, 77, 57, 25, 17, 2, 1], "start": 0, "width": 2}, "kurtosis": 0.14808, "maximum": 17, "mean": 3.89088, "median": 3, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 0.88555, "standard_deviation": 3.38254, "sum": 2389, "sum_squares": 16309, "variance": 11.44158}}, "000001": {"column_number": 1, "datatype": "int16", "name": "plasma glucose", "optype": "numeric", "order": 1, "preferred": true, "summary": {"bins": [[0, 5], [44, 1], [57, 2], [61.5, 2], [65, 1], [67.75, 4], [73.21429, 14], [79.52941, 17], [84.2381, 21], [88.74074, 27], [92, 19], [95.81818, 33], [100.86275, 51], [105.67647, 34], [109.38235, 34], [113.77143, 35], [118.775, 40], [124.14634, 41], [128.93548, 31], [133.46154, 13], [137.32, 25], [141.95652, 23], [146.4, 20], [152.86957, 23], [158, 14], [162.41667, 12], [166.66667, 15], [172.92857, 14], [180.77778, 18], [187.90909, 11], [193, 4], [196.7, 10]], "exact_histogram": {"populations": [5, 0, 0, 0, 1, 2, 7, 21, 48, 74, 93, 75, 78, 46, 43, 36, 28, 19, 23, 15], "start": 0, "width": 10}, "kurtosis": 0.68455, "maximum": 199, "mean": 121.11401, "median": 117, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 0.08447, "standard_deviation": 32.75167, "sum": 74364, "sum_squares": 9664070, "variance": 1072.67214}}, "000002": {"column_number": 2, "datatype": "int8", "name": "blood pressure", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[0, 28], [30, 2], [39, 2], [44.66667, 6], [49.625, 16], [52, 9], [55.11111, 18], [58, 19], [60, 29], [62, 23], [64.87097, 62], [68, 38], [70, 43], [72, 37], [74.84524, 84], [78, 34], [80, 34], [82, 24], [84.85366, 41], [88, 17], [90, 19], [92, 4], [95, 9], [98, 3], [100, 2], [102, 1], [104, 1], [106, 3], [108, 2], [110, 2], [114, 1], [122, 1]], "exact_histogram": {"populations": [28, 0, 0, 0, 0, 0, 2, 1, 5, 5, 29, 30, 84, 68, 125, 73, 79, 37, 27, 8, 4, 5, 3, 0, 1], "start": 0, "width": 5}, "kurtosis": 5.12315, "maximum": 122, "mean": 69.10912, "median": 72, "minimum": 0, "missing_count": 0, "population": 614, "skewness": -1.82589, "standard_deviation": 19.37631, "sum": 42433, "sum_squares": 3162653, "variance": 375.44158}}, "000003": {"column_number": 3, "datatype": "int8", "name": "triceps skin thickness", "optype": "numeric", "order": 3, "preferred": true, "summary": {"bins": [[0, 189], [7.33333, 3], [10.57143, 7], [12, 4], [13.375, 16], [15.26667, 15], [17, 13], [18.46429, 28], [20.41176, 17], [22.48387, 31], [24.4, 20], [26.5625, 32], [28.46429, 28], [30.4375, 32], [32.38636, 44], [34.68421, 19], [36.6087, 23], [38.68421, 19], [40.46429, 28], [42.38462, 13], [44, 4], [45, 6], [46, 5], [47, 4], [48, 4], [49, 2], [50, 2], [51, 1], [52, 2], [56, 1], [63, 1], [99, 1]], "exact_histogram": {"open_max": 3, "populations": [189, 0, 0, 2, 1, 7, 14, 17, 17, 28, 17, 31, 20, 32, 28, 32, 44, 19, 23, 19, 28, 13, 10, 9, 6, 3, 2], "start": 0, "width": 2}, "kurtosis": -0.44344, "maximum": 99, "mean": 20.27687, "median": 22, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 0.14897, "standard_deviation": 16.11049, "sum": 12450, "sum_squares": 411550, "variance": 259.54801}}, "000004": {"column_number": 4, "datatype": "int16", "name": "insulin", "optype": "numeric", "order": 4, "preferred": true, "summary": {"bins": [[0, 295], [19.57143, 7], [38.36364, 11], [56.08889, 45], [76.55172, 29], [93.76471, 34], [113.11111, 27], [133.5, 38], [154.23077, 13], [173.31818, 22], [189.75, 16], [205.57143, 14], [219.28571, 7], [235.44444, 9], [268.5, 10], [292.33333, 3], [307, 2], [324.75, 8], [338.5, 2], [372.5, 2], [389.5, 2], [415, 1], [440, 1], [474.75, 4], [491.66667, 3], [510, 1], [542.66667, 3], [579, 1], [600, 1], [680, 1], [744, 1], [846, 1]], "exact_histogram": {"open_max": 3, "populations": [299, 10, 35, 34, 36, 27, 32, 24, 18, 22, 17, 10, 5, 7, 4, 3, 8, 1, 2, 2, 1, 0, 1, 2, 5, 1, 0, 3, 1, 0, 1], "start": 0, "width": 20}, "kurtosis": 7.14939, "maximum": 846, "mean": 82.72801, "median": 37, "minimum": 0, "missing_count": 0, "population": 614, "skewness": 2.2975, "standard_deviation": 119.91638, "sum": 50795, "sum_squares": 13017071, "variance": 14379.93732}}, "000005": {"column_number": 5, "datatype": "double", "name": "bmi", "optype": "numeric", "order": 5, "preferred": true, "summary": {"bins": [[0, 9], [18.26667, 3], [19.5875, 8], [20.9375, 8], [22.13529, 17], [23.25333, 15], [24.14167, 24], [25.12059, 34], [26.32187, 32], [27.64706, 34], [28.8, 33], [30.17627, 59], [31.53077, 39], [32.85849, 53], [33.94118, 34], [34.89355, 31], [35.9129, 31], [37.34138, 29], [38.39048, 21], [39.45, 24], [40.60769, 13], [41.41429, 7], [42.90556, 18], [44.125, 8], [45.42727, 11], [46.4, 7], [48.225, 4], [49.85, 2], [52.8, 3], [55, 1], [57.3, 1], [59.4, 1]], "exact_histogram": {"populations": [9, 0, 0, 0, 0, 0, 0, 0, 0, 9, 17, 31, 59, 52, 60, 66, 81, 68, 42, 43, 21, 20, 17, 9, 3, 1, 3, 1, 1, 1], "start": 0, "width": 2}, "kurtosis": 3.13899, "maximum": 59.4, "mean": 31.9171, "median": 32, "minimum": 0, "missing_count": 0, "population": 614, "skewness": -0.52314, "standard_deviation": 7.80358, "sum": 19597.1, "sum_squares": 662811.83, "variance": 60.89594}}, "000006": {"column_number": 6, "datatype": "double", "name": "diabetes pedigree", "optype": "numeric", "order": 6, "preferred": true, "summary": {"bins": [[0.09254, 13], [0.14651, 61], [0.19706, 52], [0.25062, 86], [0.29458, 48], [0.34338, 42], [0.40032, 34], [0.44206, 33], [0.49592, 26], [0.54232, 31], [0.59219, 26], [0.64124, 21], [0.69418, 33], [0.744, 18], [0.81253, 15], [0.8645, 14], [0.93444, 16], [1.015, 5], [1.086, 2], [1.14533, 9], [1.2064, 5], [1.2702, 5], [1.33067, 3], [1.39375, 4], [1.45933, 3], [1.6, 1], [1.70933, 3], [1.781, 1], [2.137, 1], [2.288, 1], [2.329, 1], [2.42, 1]], "exact_histogram": {"populations": [9, 95, 140, 73, 68, 58, 52, 31, 28, 16, 6, 11, 8, 6, 4, 0, 3, 2, 0, 0, 0, 1, 1, 1, 1], "start": 0, "width": 0.1}, "kurtosis": 5.46252, "maximum": 2.42, "mean": 0.47944, "median": 0.3865, "minimum": 0.078, "missing_count": 0, "population": 614, "skewness": 1.91857, "standard_deviation": 0.34277, "sum": 294.378, "sum_squares": 213.15908, "variance": 0.11749}}, "000007": {"column_number": 7, "datatype": "int8", "name": "age", "optype": "numeric", "order": 7, "preferred": true, "summary": {"bins": [[21.56757, 111], [23, 29], [24.48611, 72], [26.5, 48], [28.4375, 48], [30.54545, 33], [32.59259, 27], [34.47368, 19], [36.48276, 29], [38.48, 25], [40.68, 25], [42.44, 25], [44.63158, 19], [46.27778, 18], [48.5, 8], [50.46667, 15], [52.41667, 12], [54.375, 8], [56.71429, 7], [58.375, 8], [60.28571, 7], [62, 3], [63, 4], [64, 1], [65, 2], [66, 3], [67, 3], [68, 1], [69, 1], [70, 1], [72, 1], [81, 1]], "exact_histogram": {"populations": [48, 92, 72, 48, 48, 33, 27, 19, 29, 25, 25, 25, 19, 18, 8, 15, 12, 8, 7, 8, 7, 7, 3, 6, 2, 1, 1, 0, 0, 0, 1], "start": 20, "width": 2}, "kurtosis": 0.42204, "maximum": 81, "mean": 33.66287, "median": 29, "minimum": 21, "missing_count": 0, "population": 614, "skewness": 1.05637, "standard_deviation": 12.0408, "sum": 20669, "sum_squares": 784651, "variance": 144.98077}}, "000008": {"column_number": 8, "datatype": "string", "name": "diabetes", "optype": "categorical", "order": 8, "preferred": true, "summary": {"categories": [["false", 394], ["true", 220]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000001", 0.33357], ["000005", 0.2589], ["000006", 0.16331], ["000000", 0.10221], ["000007", 0.04527], ["000002", 0.04284], ["000004", 0.03004], ["000003", 0.02386]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000000": {"column_number": 0, "datatype": "int8", "name": "pregnancies", "optype": "numeric", "preferred": true}, "000001": {"column_number": 1, "datatype": "int16", "name": "plasma glucose", "optype": "numeric", "preferred": true}, "000002": {"column_number": 2, "datatype": "int8", "name": "blood pressure", "optype": "numeric", "preferred": true}, "000003": {"column_number": 3, "datatype": "int8", "name": "triceps skin thickness", "optype": "numeric", "preferred": true}, "000004": {"column_number": 4, "datatype": "int16", "name": "insulin", "optype": "numeric", "preferred": true}, "000005": {"column_number": 5, "datatype": "double", "name": "bmi", "optype": "numeric", "preferred": true}, "000006": {"column_number": 6, "datatype": "double", "name": "diabetes pedigree", "optype": "numeric", "preferred": true}, "000007": {"column_number": 7, "datatype": "int8", "name": "age", "optype": "numeric", "preferred": true}, "000008": {"column_number": 8, "datatype": "string", "name": "diabetes", "optype": "categorical", "preferred": true, "term_analysis": {"enabled": true}}}, "node_threshold": 512, "root": {"children": [{"children": [{"children": [{"confidence": 0.64566, "count": 7, "id": 3, "objective_summary": {"categories": [["false", 7]]}, "output": "false", "predicate": {"field": "000002", "operator": ">", "value": 92}}, {"children": [{"children": [{"confidence": 0.93242, "count": 53, "id": 6, "objective_summary": {"categories": [["true", 53]]}, "output": "true", "predicate": {"field": "000002", "operator": ">", "value": 54}}, {"children": [{"confidence": 0.5101, "count": 4, "id": 8, "objective_summary": {"categories": [["true", 4]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 27.15}}, {"confidence": 0.20654, "count": 1, "id": 9, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 27.15}}], "confidence": 0.37553, "count": 5, "id": 7, "objective_summary": {"categories": [["true", 4], ["false", 1]]}, "output": "true", "predicate": {"field": "000002", "operator": "<=", "value": 54}}], "confidence": 0.90859, "count": 58, "id": 5, "objective_summary": {"categories": [["true", 57], ["false", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 23.1}}, {"confidence": 0.34237, "count": 2, "id": 10, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 23.1}}], "confidence": 0.86299, "count": 60, "id": 4, "objective_summary": {"categories": [["true", 57], ["false", 3]]}, "output": "true", "predicate": {"field": "000002", "operator": "<=", "value": 92}}], "confidence": 0.74659, "count": 67, "id": 2, "objective_summary": {"categories": [["true", 57], ["false", 10]]}, "output": "true", "predicate": {"field": "000001", "operator": ">", "value": 165}}, {"children": [{"confidence": 0.70085, "count": 9, "id": 12, "objective_summary": {"categories": [["false", 9]]}, "output": "false", "predicate": {"field": "000001", "operator": ">", "value": 163}}, {"children": [{"children": [{"children": [{"confidence": 0.60966, "count": 6, "id": 16, "objective_summary": {"categories": [["false", 6]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 0.3645}}, {"children": [{"confidence": 0.5101, "count": 4, "id": 18, "objective_summary": {"categories": [["true", 4]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 33.35}}, {"confidence": 0.20654, "count": 1, "id": 19, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 33.35}}], "confidence": 0.37553, "count": 5, "id": 17, "objective_summary": {"categories": [["true", 4], ["false", 1]]}, "output": "true", "predicate": {"field": "000006", "operator": "<=", "value": 0.3645}}], "confidence": 0.3538, "count": 11, "id": 15, "objective_summary": {"categories": [["false", 7], ["true", 4]]}, "output": "false", "predicate": {"field": "000004", "operator": ">", "value": 269}}, {"children": [{"children": [{"children": [{"confidence": 0.74116, "count": 11, "id": 23, "objective_summary": {"categories": [["true", 11]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 42.65}}, {"children": [{"confidence": 0.43849, "count": 3, "id": 25, "objective_summary": {"categories": [["false", 3]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 39.95}}, {"children": [{"confidence": 0.64566, "count": 7, "id": 27, "objective_summary": {"categories": [["true", 7]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 36.35}}, {"confidence": 0.20654, "count": 1, "id": 28, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 36.35}}], "confidence": 0.52911, "count": 8, "id": 26, "objective_summary": {"categories": [["true", 7], ["false", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 39.95}}], "confidence": 0.3538, "count": 11, "id": 24, "objective_summary": {"categories": [["true", 7], ["false", 4]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 42.65}}], "confidence": 0.61483, "count": 22, "id": 22, "objective_summary": {"categories": [["true", 18], ["false", 4]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 35.35}}, {"confidence": 0.81568, "count": 17, "id": 29, "objective_summary": {"categories": [["true", 17]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 35.35}}], "confidence": 0.76421, "count": 39, "id": 21, "objective_summary": {"categories": [["true", 35], ["false", 4]]}, "output": "true", "predicate": {"field": "000006", "operator": ">", "value": 0.2125}}, {"children": [{"confidence": 0.5101, "count": 4, "id": 31, "objective_summary": {"categories": [["false", 4]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 30.65}}, {"confidence": 0.20654, "count": 1, "id": 32, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 30.65}}], "confidence": 0.37553, "count": 5, "id": 30, "objective_summary": {"categories": [["false", 4], ["true", 1]]}, "output": "false", "predicate": {"field": "000006", "operator": "<=", "value": 0.2125}}], "confidence": 0.68039, "count": 44, "id": 20, "objective_summary": {"categories": [["true", 36], ["false", 8]]}, "output": "true", "predicate": {"field": "000004", "operator": "<=", "value": 269}}], "confidence": 0.59768, "count": 55, "id": 14, "objective_summary": {"categories": [["true", 40], ["false", 15]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 30.25833}}, {"children": [{"confidence": 0.79611, "count": 15, "id": 34, "objective_summary": {"categories": [["false", 15]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 25.2}}, {"children": [{"confidence": 0.20654, "count": 1, "id": 36, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000003", "operator": ">", "value": 9}}, {"confidence": 0.5101, "count": 4, "id": 37, "objective_summary": {"categories": [["true", 4]]}, "output": "true", "predicate": {"field": "000003", "operator": "<=", "value": 9}}], "confidence": 0.37553, "count": 5, "id": 35, "objective_summary": {"categories": [["true", 4], ["false", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 25.2}}], "confidence": 0.58398, "count": 20, "id": 33, "objective_summary": {"categories": [["false", 16], ["true", 4]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 30.25833}}], "confidence": 0.47366, "count": 75, "id": 13, "objective_summary": {"categories": [["true", 44], ["false", 31]]}, "output": "true", "predicate": {"field": "000001", "operator": "<=", "value": 163}}], "confidence": 0.41832, "count": 84, "id": 11, "objective_summary": {"categories": [["true", 44], ["false", 40]]}, "output": "true", "predicate": {"field": "000001", "operator": "<=", "value": 165}}], "confidence": 0.59044, "count": 151, "id": 1, "objective_summary": {"categories": [["true", 101], ["false", 50]]}, "output": "true", "predicate": {"field": "000001", "operator": ">", "value": 141}}, {"children": [{"children": [{"children": [{"children": [{"children": [{"confidence": 0.79611, "count": 15, "id": 43, "objective_summary": {"categories": [["true", 15]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 33.9}}, {"children": [{"children": [{"confidence": 0.43849, "count": 3, "id": 46, "objective_summary": {"categories": [["false", 3]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 0.7505}}, {"children": [{"confidence": 0.20654, "count": 1, "id": 48, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 33.15}}, {"confidence": 0.34237, "count": 2, "id": 49, "objective_summary": {"categories": [["true", 2]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 33.15}}], "confidence": 0.20765, "count": 3, "id": 47, "objective_summary": {"categories": [["true", 2], ["false", 1]]}, "output": "true", "predicate": {"field": "000006", "operator": "<=", "value": 0.7505}}], "confidence": 0.29999, "count": 6, "id": 45, "objective_summary": {"categories": [["false", 4], ["true", 2]]}, "output": "false", "predicate": {"field": "000002", "operator": ">", "value": 71}}, {"confidence": 0.56551, "count": 5, "id": 50, "objective_summary": {"categories": [["true", 5]]}, "output": "true", "predicate": {"field": "000002", "operator": "<=", "value": 71}}], "confidence": 0.3538, "count": 11, "id": 44, "objective_summary": {"categories": [["true", 7], ["false", 4]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 33.9}}], "confidence": 0.66468, "count": 26, "id": 42, "objective_summary": {"categories": [["true", 22], ["false", 4]]}, "output": "true", "predicate": {"field": "000000", "operator": ">", "value": 2}}, {"children": [{"children": [{"confidence": 0.70085, "count": 9, "id": 53, "objective_summary": {"categories": [["false", 9]]}, "output": "false", "predicate": {"field": "000002", "operator": ">", "value": 52}}, {"confidence": 0.20654, "count": 1, "id": 54, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000002", "operator": "<=", "value": 52}}], "confidence": 0.59584, "count": 10, "id": 52, "objective_summary": {"categories": [["false", 9], ["true", 1]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 0.896}}, {"confidence": 0.67558, "count": 8, "id": 55, "objective_summary": {"categories": [["true", 8]]}, "output": "true", "predicate": {"field": "000006", "operator": "<=", "value": 0.896}}], "confidence": 0.29031, "count": 18, "id": 51, "objective_summary": {"categories": [["false", 9], ["true", 9]]}, "output": "false", "predicate": {"field": "000000", "operator": "<=", "value": 2}}], "confidence": 0.5578, "count": 44, "id": 41, "objective_summary": {"categories": [["true", 31], ["false", 13]]}, "output": "true", "predicate": {"field": "000007", "operator": ">", "value": 26}}, {"children": [{"confidence": 0.20654, "count": 1, "id": 57, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000003", "operator": ">", "value": 37}}, {"children": [{"confidence": 0.20654, "count": 1, "id": 59, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000001", "operator": ">", "value": 131}}, {"confidence": 0.7719, "count": 13, "id": 60, "objective_summary": {"categories": [["false", 13]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 131}}], "confidence": 0.68531, "count": 14, "id": 58, "objective_summary": {"categories": [["false", 13], ["true", 1]]}, "output": "false", "predicate": {"field": "000003", "operator": "<=", "value": 37}}], "confidence": 0.62118, "count": 15, "id": 56, "objective_summary": {"categories": [["false", 13], ["true", 2]]}, "output": "false", "predicate": {"field": "000007", "operator": "<=", "value": 26}}], "confidence": 0.43289, "count": 59, "id": 40, "objective_summary": {"categories": [["true", 33], ["false", 26]]}, "output": "true", "predicate": {"field": "000006", "operator": ">", "value": 0.71247}}, {"children": [{"children": [{"confidence": 0.72246, "count": 10, "id": 63, "objective_summary": {"categories": [["true", 10]]}, "output": "true", "predicate": {"field": "000002", "operator": ">", "value": 65}}, {"confidence": 0.20654, "count": 1, "id": 64, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000002", "operator": "<=", "value": 65}}], "confidence": 0.62264, "count": 11, "id": 62, "objective_summary": {"categories": [["true", 10], ["false", 1]]}, "output": "true", "predicate": {"field": "000000", "operator": ">", "value": 12}}, {"children": [{"children": [{"children": [{"children": [{"confidence": 0.34237, "count": 2, "id": 69, "objective_summary": {"categories": [["false", 2]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 0.6945}}, {"confidence": 0.83182, "count": 19, "id": 70, "objective_summary": {"categories": [["true", 19]]}, "output": "true", "predicate": {"field": "000006", "operator": "<=", "value": 0.6945}}], "confidence": 0.71085, "count": 21, "id": 68, "objective_summary": {"categories": [["true", 19], ["false", 2]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 27.55}}, {"confidence": 0.43849, "count": 3, "id": 71, "objective_summary": {"categories": [["false", 3]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 27.55}}], "confidence": 0.59529, "count": 24, "id": 67, "objective_summary": {"categories": [["true", 19], ["false", 5]]}, "output": "true", "predicate": {"field": "000006", "operator": ">", "value": 0.52269}}, {"children": [{"confidence": 0.43849, "count": 3, "id": 73, "objective_summary": {"categories": [["true", 3]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 45.7125}}, {"children": [{"children": [{"confidence": 0.7575, "count": 12, "id": 76, "objective_summary": {"categories": [["false", 12]]}, "output": "false", "predicate": {"field": "000002", "operator": ">", "value": 84}}, {"children": [{"confidence": 0.64566, "count": 7, "id": 78, "objective_summary": {"categories": [["false", 7]]}, "output": "false", "predicate": {"field": "000004", "operator": ">", "value": 127}}, {"children": [{"confidence": 0.5101, "count": 4, "id": 80, "objective_summary": {"categories": [["true", 4]]}, "output": "true", "predicate": {"field": "000003", "operator": ">", "value": 28}}, {"children": [{"confidence": 0.60966, "count": 6, "id": 82, "objective_summary": {"categories": [["false", 6]]}, "output": "false", "predicate": {"field": "000003", "operator": ">", "value": 7}}, {"children": [{"children": [{"confidence": 0.20654, "count": 1, "id": 85, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 35.65}}, {"confidence": 0.60966, "count": 6, "id": 86, "objective_summary": {"categories": [["false", 6]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 35.65}}], "confidence": 0.48687, "count": 7, "id": 84, "objective_summary": {"categories": [["false", 6], ["true", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 34.15}}, {"children": [{"confidence": 0.64566, "count": 7, "id": 88, "objective_summary": {"categories": [["true", 7]]}, "output": "true", "predicate": {"field": "000006", "operator": ">", "value": 0.261}}, {"children": [{"confidence": 0.34237, "count": 2, "id": 90, "objective_summary": {"categories": [["true", 2]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 31.6}}, {"confidence": 0.5101, "count": 4, "id": 91, "objective_summary": {"categories": [["false", 4]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 31.6}}], "confidence": 0.29999, "count": 6, "id": 89, "objective_summary": {"categories": [["false", 4], ["true", 2]]}, "output": "false", "predicate": {"field": "000006", "operator": "<=", "value": 0.261}}], "confidence": 0.42369, "count": 13, "id": 87, "objective_summary": {"categories": [["true", 9], ["false", 4]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 34.15}}], "confidence": 0.29929, "count": 20, "id": 83, "objective_summary": {"categories": [["false", 10], ["true", 10]]}, "output": "false", "predicate": {"field": "000003", "operator": "<=", "value": 7}}], "confidence": 0.42535, "count": 26, "id": 81, "objective_summary": {"categories": [["false", 16], ["true", 10]]}, "output": "false", "predicate": {"field": "000003", "operator": "<=", "value": 28}}], "confidence": 0.36142, "count": 30, "id": 79, "objective_summary": {"categories": [["false", 16], ["true", 14]]}, "output": "false", "predicate": {"field": "000004", "operator": "<=", "value": 127}}], "confidence": 0.461, "count": 37, "id": 77, "objective_summary": {"categories": [["false", 23], ["true", 14]]}, "output": "false", "predicate": {"field": "000002", "operator": "<=", "value": 84}}], "confidence": 0.57591, "count": 49, "id": 75, "objective_summary": {"categories": [["false", 35], ["true", 14]]}, "output": "false", "predicate": {"field": "000000", "operator": ">", "value": 2}}, {"children": [{"confidence": 0.81568, "count": 17, "id": 93, "objective_summary": {"categories": [["false", 17]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 35}}, {"children": [{"children": [{"confidence": 0.34237, "count": 2, "id": 96, "objective_summary": {"categories": [["true", 2]]}, "output": "true", "predicate": {"field": "000002", "operator": ">", "value": 56}}, {"confidence": 0.20654, "count": 1, "id": 97, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000002", "operator": "<=", "value": 56}}], "confidence": 0.20765, "count": 3, "id": 95, "objective_summary": {"categories": [["true", 2], ["false", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 33.8}}, {"confidence": 0.7575, "count": 12, "id": 98, "objective_summary": {"categories": [["false", 12]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 33.8}}], "confidence": 0.62118, "count": 15, "id": 94, "objective_summary": {"categories": [["false", 13], ["true", 2]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 35}}], "confidence": 0.79853, "count": 32, "id": 92, "objective_summary": {"categories": [["false", 30], ["true", 2]]}, "output": "false", "predicate": {"field": "000000", "operator": "<=", "value": 2}}], "confidence": 0.70295, "count": 81, "id": 74, "objective_summary": {"categories": [["false", 65], ["true", 16]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 45.7125}}], "confidence": 0.67353, "count": 84, "id": 72, "objective_summary": {"categories": [["false", 65], ["true", 19]]}, "output": "false", "predicate": {"field": "000006", "operator": "<=", "value": 0.52269}}], "confidence": 0.55441, "count": 108, "id": 66, "objective_summary": {"categories": [["false", 70], ["true", 38]]}, "output": "false", "predicate": {"field": "000001", "operator": ">", "value": 113}}, {"children": [{"confidence": 0.43849, "count": 3, "id": 100, "objective_summary": {"categories": [["true", 3]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 50.85}}, {"children": [{"children": [{"children": [{"children": [{"confidence": 0.7719, "count": 13, "id": 105, "objective_summary": {"categories": [["false", 13]]}, "output": "false", "predicate": {"field": "000002", "operator": ">", "value": 77}}, {"children": [{"children": [{"children": [{"confidence": 0.20654, "count": 1, "id": 109, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 35.9}}, {"confidence": 0.34237, "count": 2, "id": 110, "objective_summary": {"categories": [["true", 2]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 35.9}}], "confidence": 0.20765, "count": 3, "id": 108, "objective_summary": {"categories": [["true", 2], ["false", 1]]}, "output": "true", "predicate": {"field": "000000", "operator": ">", "value": 4}}, {"confidence": 0.78468, "count": 14, "id": 111, "objective_summary": {"categories": [["false", 14]]}, "output": "false", "predicate": {"field": "000000", "operator": "<=", "value": 4}}], "confidence": 0.65663, "count": 17, "id": 107, "objective_summary": {"categories": [["false", 15], ["true", 2]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 0.2965}}, {"children": [{"confidence": 0.56551, "count": 5, "id": 113, "objective_summary": {"categories": [["true", 5]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 29.7}}, {"confidence": 0.20654, "count": 1, "id": 114, "objective_summary": {"categories": [["false", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 29.7}}], "confidence": 0.43649, "count": 6, "id": 112, "objective_summary": {"categories": [["true", 5], ["false", 1]]}, "output": "true", "predicate": {"field": "000006", "operator": "<=", "value": 0.2965}}], "confidence": 0.49134, "count": 23, "id": 106, "objective_summary": {"categories": [["false", 16], ["true", 7]]}, "output": "false", "predicate": {"field": "000002", "operator": "<=", "value": 77}}], "confidence": 0.64972, "count": 36, "id": 104, "objective_summary": {"categories": [["false", 29], ["true", 7]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 27.05}}, {"confidence": 0.5101, "count": 4, "id": 115, "objective_summary": {"categories": [["true", 4]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 27.05}}], "confidence": 0.57165, "count": 40, "id": 103, "objective_summary": {"categories": [["false", 29], ["true", 11]]}, "output": "false", "predicate": {"field": "000001", "operator": ">", "value": 105}}, {"children": [{"children": [{"children": [{"confidence": 0.60966, "count": 6, "id": 119, "objective_summary": {"categories": [["false", 6]]}, "output": "false", "predicate": {"field": "000004", "operator": ">", "value": 42}}, {"children": [{"confidence": 0.5101, "count": 4, "id": 121, "objective_summary": {"categories": [["false", 4]]}, "output": "false", "predicate": {"field": "000007", "operator": ">", "value": 58}}, {"confidence": 0.60966, "count": 6, "id": 122, "objective_summary": {"categories": [["true", 6]]}, "output": "true", "predicate": {"field": "000007", "operator": "<=", "value": 58}}], "confidence": 0.31267, "count": 10, "id": 120, "objective_summary": {"categories": [["true", 6], ["false", 4]]}, "output": "true", "predicate": {"field": "000004", "operator": "<=", "value": 42}}], "confidence": 0.38641, "count": 16, "id": 118, "objective_summary": {"categories": [["false", 10], ["true", 6]]}, "output": "false", "predicate": {"field": "000006", "operator": ">", "value": 0.2425}}, {"confidence": 0.80639, "count": 16, "id": 123, "objective_summary": {"categories": [["false", 16]]}, "output": "false", "predicate": {"field": "000006", "operator": "<=", "value": 0.2425}}], "confidence": 0.64691, "count": 32, "id": 117, "objective_summary": {"categories": [["false", 26], ["true", 6]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 35.525}}, {"confidence": 0.92444, "count": 47, "id": 124, "objective_summary": {"categories": [["false", 47]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 35.525}}], "confidence": 0.84404, "count": 79, "id": 116, "objective_summary": {"categories": [["false", 73], ["true", 6]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 105}}], "confidence": 0.78309, "count": 119, "id": 102, "objective_summary": {"categories": [["false", 102], ["true", 17]]}, "output": "false", "predicate": {"field": "000007", "operator": ">", "value": 22}}, {"confidence": 0.9162, "count": 42, "id": 125, "objective_summary": {"categories": [["false", 42]]}, "output": "false", "predicate": {"field": "000007", "operator": "<=", "value": 22}}], "confidence": 0.83741, "count": 161, "id": 101, "objective_summary": {"categories": [["false", 144], ["true", 17]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 50.85}}], "confidence": 0.81914, "count": 164, "id": 99, "objective_summary": {"categories": [["false", 144], ["true", 20]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 113}}], "confidence": 0.73427, "count": 272, "id": 65, "objective_summary": {"categories": [["false", 214], ["true", 58]]}, "output": "false", "predicate": {"field": "000000", "operator": "<=", "value": 12}}], "confidence": 0.70667, "count": 283, "id": 61, "objective_summary": {"categories": [["false", 215], ["true", 68]]}, "output": "false", "predicate": {"field": "000006", "operator": "<=", "value": 0.71247}}], "confidence": 0.65427, "count": 342, "id": 39, "objective_summary": {"categories": [["false", 241], ["true", 101]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 26.20018}}, {"children": [{"children": [{"confidence": 0.83182, "count": 19, "id": 128, "objective_summary": {"categories": [["false", 19]]}, "output": "false", "predicate": {"field": "000003", "operator": ">", "value": 6}}, {"children": [{"children": [{"confidence": 0.20654, "count": 1, "id": 131, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": ">", "value": 25.15}}, {"confidence": 0.56551, "count": 5, "id": 132, "objective_summary": {"categories": [["false", 5]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 25.15}}], "confidence": 0.43649, "count": 6, "id": 130, "objective_summary": {"categories": [["false", 5], ["true", 1]]}, "output": "false", "predicate": {"field": "000005", "operator": ">", "value": 10.55}}, {"confidence": 0.20654, "count": 1, "id": 133, "objective_summary": {"categories": [["true", 1]]}, "output": "true", "predicate": {"field": "000005", "operator": "<=", "value": 10.55}}], "confidence": 0.35893, "count": 7, "id": 129, "objective_summary": {"categories": [["false", 5], ["true", 2]]}, "output": "false", "predicate": {"field": "000003", "operator": "<=", "value": 6}}], "confidence": 0.75858, "count": 26, "id": 127, "objective_summary": {"categories": [["false", 24], ["true", 2]]}, "output": "false", "predicate": {"field": "000001", "operator": ">", "value": 122}}, {"confidence": 0.96113, "count": 95, "id": 134, "objective_summary": {"categories": [["false", 95]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 122}}], "confidence": 0.94173, "count": 121, "id": 126, "objective_summary": {"categories": [["false", 119], ["true", 2]]}, "output": "false", "predicate": {"field": "000005", "operator": "<=", "value": 26.20018}}], "confidence": 0.73746, "count": 463, "id": 38, "objective_summary": {"categories": [["false", 360], ["true", 103]]}, "output": "false", "predicate": {"field": "000001", "operator": "<=", "value": 141}}], "confidence": 0.62955, "count": 614, "id": 0, "objective_summary": {"categories": [["false", 410], ["true", 204]]}, "output": "false", "predicate": true}}, "name": "diabetes - 1", "name_options": "512-node, pruned, deterministic order", "node_threshold": 512, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "000008", "objective_field_name": "diabetes", "objective_field_type": "categorical", "objective_fields": ["000008"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": null, "randomize": false, "range": null, "replacement": false, "resource": "model/62605ac323541b220100748c", "rows": 614, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 20939, "source": "source/62605aa75198db5eed003416", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 0, "message": "The model has been created", "progress": 0.0}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2022-04-20T19:11:31.004000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/tests/my_dataset/dataset_62e2bd555687096969004659 b/bigml/tests/my_dataset/dataset_62e2bd555687096969004659 new file mode 100644 index 00000000..0e38a423 --- /dev/null +++ b/bigml/tests/my_dataset/dataset_62e2bd555687096969004659 @@ -0,0 +1 @@ +{"code": 200, "resource": "dataset/62e2bd555687096969004659", "location": "https://bigml.io/andromeda/dataset/62e2bd555687096969004659", "object": {"all_fields": true, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 9, "configuration": null, "configuration_status": false, "correlations": {}, "created": "2022-07-28T16:46:13.389000", "creator": "mmartin", "dataset_origin_status": true, "description": "", "download": {"code": 0, "decimal_separator": ".", "excluded_input_fields": [], "header": true, "input_fields": [], "message": "", "new_line": "lf", "preview": [], "separator": ","}, "evaluation": null, "excluded_fields": [], "field_types": {"categorical": 1, "datetime": 0, "image": 0, "items": 0, "numeric": 8, "path": 0, "preferred": 9, "regions": 0, "text": 0, "total": 9}, "fields": {"000000": {"column_number": 0, "datatype": "int8", "name": "pregnancies", "optype": "numeric", "order": 0, "preferred": true, "summary": {"counts": [[0, 111], [1, 135], [2, 103], [3, 75], [4, 68], [5, 57], [6, 50], [7, 45], [8, 38], [9, 28], [10, 24], [11, 11], [12, 9], [13, 10], [14, 2], [15, 1], [17, 1]], "exact_histogram": {"populations": [246, 178, 125, 95, 66, 35, 19, 3, 1], "start": 0, "width": 2}, "kurtosis": 0.15038, "maximum": 17, "mean": 3.84505, "median": 3, "minimum": 0, "missing_count": 0, "population": 768, "skewness": 0.89991, "standard_deviation": 3.36958, "sum": 2953, "sum_squares": 20063, "variance": 11.35406}}, "000001": {"column_number": 1, "datatype": "int16", "name": "plasma glucose", "optype": "numeric", "order": 1, "preferred": true, "summary": {"bins": [[0, 5], [44, 1], [56.66667, 3], [61.5, 2], [67.2, 5], [73.3125, 16], [79.47619, 21], [84.03448, 29], [89.83333, 54], [95.45455, 44], [100.69231, 65], [105.68182, 44], [109.46667, 45], [113.525, 40], [118.54902, 51], [123.98182, 55], [128.90476, 42], [133.45, 20], [137.82353, 34], [142.65217, 23], [146.4, 25], [150.92857, 14], [154.5625, 16], [158.15385, 13], [162.4, 15], [166.66667, 15], [172.21429, 14], [176.16667, 6], [180.29412, 17], [184, 6], [188.41667, 12], [195.6875, 16]], "exact_histogram": {"populations": [5, 0, 0, 0, 1, 3, 7, 25, 63, 93, 117, 94, 102, 61, 54, 41, 31, 25, 28, 18], "start": 0, "width": 10}, "kurtosis": 0.62881, "maximum": 199, "mean": 120.89453, "median": 117, "minimum": 0, "missing_count": 0, "population": 768, "skewness": 0.17341, "standard_deviation": 31.97262, "sum": 92847, "sum_squares": 12008759, "variance": 1022.24831}}, "000002": {"column_number": 2, "datatype": "int8", "name": "blood pressure", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[0, 35], [24, 1], [30, 2], [39, 2], [44.66667, 6], [49.44444, 18], [52, 11], [55.04, 25], [58, 21], [60.95833, 72], [64.8375, 80], [68, 45], [70, 57], [72, 44], [74.86869, 99], [78, 45], [80, 40], [82, 30], [84.96, 50], [88, 25], [90, 22], [92, 8], [94.81818, 11], [98, 3], [100, 3], [102, 1], [104, 2], [106, 3], [108, 2], [110, 3], [114, 1], [122, 1]], "exact_histogram": {"populations": [35, 0, 0, 0, 1, 0, 2, 1, 5, 7, 35, 35, 115, 82, 153, 92, 93, 52, 36, 8, 6, 5, 4, 0, 1], "start": 0, "width": 5}, "kurtosis": 5.13869, "maximum": 122, "mean": 69.10547, "median": 72, "minimum": 0, "missing_count": 0, "population": 768, "skewness": -1.84001, "standard_deviation": 19.35581, "sum": 53073, "sum_squares": 3954989, "variance": 374.64727}}, "000003": {"column_number": 3, "datatype": "int8", "name": "triceps skin thickness", "optype": "numeric", "order": 3, "preferred": true, "summary": {"bins": [[0, 227], [7.5, 4], [10.54545, 11], [12, 7], [13.35294, 17], [15.3, 20], [17, 14], [18.47368, 38], [20.43478, 23], [22.57895, 38], [24.57143, 28], [26.58974, 39], [28.45946, 37], [30.41304, 46], [32.39216, 51], [34.65217, 23], [36.53333, 30], [38.72, 25], [40.48387, 31], [42.35294, 17], [44.54545, 11], [46.33333, 12], [48, 4], [49, 3], [50, 3], [51, 1], [52, 2], [54, 2], [56, 1], [60, 1], [63, 1], [99, 1]], "exact_histogram": {"open_max": 3, "populations": [227, 0, 0, 2, 2, 11, 18, 20, 20, 38, 23, 38, 28, 39, 37, 46, 51, 23, 30, 25, 31, 17, 11, 12, 7, 4, 2, 2, 1], "start": 0, "width": 2}, "kurtosis": -0.52449, "maximum": 99, "mean": 20.53646, "median": 23, "minimum": 0, "missing_count": 0, "population": 768, "skewness": 0.10916, "standard_deviation": 15.95222, "sum": 15772, "sum_squares": 519082, "variance": 254.47325}}, "000004": {"column_number": 4, "datatype": "int16", "name": "insulin", "optype": "numeric", "order": 4, "preferred": true, "summary": {"bins": [[0, 374], [20.3, 10], [48.36, 50], [69.88636, 44], [90.93617, 47], [112.23256, 43], [134.95556, 45], [155.94444, 18], [173.26667, 30], [189.61111, 18], [207.23529, 17], [226.66667, 9], [239.4, 5], [257, 6], [277.36364, 11], [298.5, 6], [324.75, 8], [338.5, 2], [368.33333, 3], [393.66667, 3], [415, 1], [440, 1], [465, 1], [479.4, 5], [495, 2], [510, 1], [542.66667, 3], [579, 1], [600, 1], [680, 1], [744, 1], [846, 1]], "exact_histogram": {"open_max": 3, "populations": [379, 12, 43, 45, 39, 40, 37, 32, 25, 27, 17, 11, 7, 9, 7, 4, 8, 1, 3, 2, 2, 0, 1, 3, 5, 1, 0, 3, 1, 0, 1], "start": 0, "width": 20}, "kurtosis": 7.15957, "maximum": 846, "mean": 79.79948, "median": 30.5, "minimum": 0, "missing_count": 0, "population": 768, "skewness": 2.26781, "standard_deviation": 115.244, "sum": 61286, "sum_squares": 15077256, "variance": 13281.18008}}, "000005": {"column_number": 5, "datatype": "double", "name": "bmi", "optype": "numeric", "order": 5, "preferred": true, "summary": {"bins": [[0, 11], [18.25, 4], [19.6, 11], [20.90909, 11], [21.93333, 12], [23.00323, 31], [24.37179, 39], [25.505, 40], [26.55357, 28], [27.64667, 45], [28.77692, 39], [29.85581, 43], [30.83333, 45], [31.88125, 32], [32.8569, 58], [34.11587, 63], [35.42353, 51], [36.70286, 35], [37.95, 38], [39.17586, 29], [40.37143, 21], [41.90588, 17], [43.33462, 26], [45.23571, 14], [46.41111, 9], [48.225, 4], [49.65, 4], [52.675, 4], [55, 1], [57.3, 1], [59.4, 1], [67.1, 1]], "exact_histogram": {"populations": [11, 0, 0, 13, 93, 179, 224, 150, 62, 27, 5, 3, 0, 1], "start": 0, "width": 5}, "kurtosis": 3.26126, "maximum": 67.1, "mean": 31.99258, "median": 32, "minimum": 0, "missing_count": 0, "population": 768, "skewness": -0.42814, "standard_deviation": 7.88416, "sum": 24570.3, "sum_squares": 833743.95, "variance": 62.15998}}, "000006": {"column_number": 6, "datatype": "double", "name": "diabetes pedigree", "optype": "numeric", "order": 6, "preferred": true, "summary": {"bins": [[0.096, 16], [0.14349, 59], [0.19135, 78], [0.2508, 118], [0.29704, 56], [0.34648, 60], [0.40083, 47], [0.44738, 39], [0.49728, 29], [0.54156, 36], [0.5869, 29], [0.62955, 22], [0.68606, 47], [0.74575, 24], [0.8084, 15], [0.86, 20], [0.9389, 21], [1.015, 5], [1.0792, 5], [1.13722, 9], [1.20238, 8], [1.2702, 5], [1.33067, 3], [1.39375, 4], [1.45933, 3], [1.6, 1], [1.70933, 3], [1.781, 1], [1.893, 1], [2.137, 1], [2.3085, 2], [2.42, 1]], "exact_histogram": {"populations": [9, 116, 179, 104, 83, 71, 62, 39, 33, 21, 9, 13, 9, 6, 4, 0, 3, 2, 1, 0, 0, 1, 1, 1, 1], "start": 0, "width": 0.1}, "kurtosis": 5.55079, "maximum": 2.42, "mean": 0.47188, "median": 0.3725, "minimum": 0.078, "missing_count": 0, "population": 768, "skewness": 1.91616, "standard_deviation": 0.33133, "sum": 362.401, "sum_squares": 255.20866, "variance": 0.10978}}, "000007": {"column_number": 7, "datatype": "int8", "name": "age", "optype": "numeric", "order": 7, "preferred": true, "summary": {"bins": [[21.53333, 135], [23, 38], [24.51064, 94], [26.49231, 65], [28.45313, 64], [30.53333, 45], [32.51515, 33], [34.41667, 24], [36.54286, 35], [38.42857, 28], [40.62857, 35], [42.41935, 31], [44.65217, 23], [46.31579, 19], [48.5, 10], [50.5, 16], [52.38462, 13], [54.4, 10], [56.625, 8], [58.3, 10], [60.28571, 7], [62, 4], [63, 4], [64, 1], [65, 3], [66, 4], [67, 3], [68, 1], [69, 2], [70, 1], [72, 1], [81, 1]], "exact_histogram": {"populations": [63, 110, 94, 65, 64, 45, 33, 24, 35, 28, 35, 31, 23, 19, 10, 16, 13, 10, 8, 10, 7, 8, 4, 7, 3, 1, 1, 0, 0, 0, 1], "start": 20, "width": 2}, "kurtosis": 0.63118, "maximum": 81, "mean": 33.24089, "median": 29, "minimum": 21, "missing_count": 0, "population": 768, "skewness": 1.12739, "standard_deviation": 11.76023, "sum": 25529, "sum_squares": 954685, "variance": 138.30305}}, "000008": {"column_number": 8, "datatype": "string", "name": "diabetes", "optype": "categorical", "order": 8, "preferred": true, "summary": {"categories": [["false", 500], ["true", 268]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "fields_meta": {"count": 9, "effective_fields": 9, "limit": -1, "offset": 0, "parent_optypes": {}, "preferred": 9, "provenances": {}, "query_total": 9, "total": 9}, "input_fields": ["000000", "000001", "000002", "000003", "000004", "000005", "000006", "000007", "000008"], "juxtapose": false, "locale": "en_US", "missing_numeric_rows": 0, "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "name": "diabetes", "name_options": "768 instances, 9 fields (1 categorical, 8 numeric)", "new_fields": [], "number_of_anomalies": 0, "number_of_anomalyscores": 0, "number_of_associations": 0, "number_of_associationsets": 0, "number_of_batchanomalyscores": 0, "number_of_batchcentroids": 0, "number_of_batchpredictions": 0, "number_of_batchprojections": 0, "number_of_batchtopicdistributions": 0, "number_of_centroids": 0, "number_of_clusters": 0, "number_of_correlations": 0, "number_of_deepnets": 0, "number_of_ensembles": 0, "number_of_evaluations": 0, "number_of_forecasts": 0, "number_of_linearregressions": 0, "number_of_logisticregressions": 0, "number_of_models": 0, "number_of_optimls": 0, "number_of_pca": 0, "number_of_predictions": 0, "number_of_projections": 0, "number_of_statisticaltests": 0, "number_of_timeseries": 0, "number_of_topicdistributions": 0, "number_of_topicmodels": 0, "objective_field": {"column_number": 8, "datatype": "string", "id": "000008", "name": "diabetes", "optype": "categorical", "order": 8, "term_analysis": {"enabled": true}}, "optiml": null, "optiml_status": false, "origin_batch_dataset": null, "origin_batch_dataset_status": false, "origin_batch_model": null, "origin_batch_model_status": false, "origin_batch_resource": null, "origin_batch_status": false, "output_fields": [], "price": 0.0, "private": true, "project": null, "refresh_field_types": false, "refresh_objective": false, "refresh_preferred": false, "resource": "dataset/62e2bd555687096969004659", "row_offset": 0, "row_step": 1, "rows": 768, "shared": false, "size": 26191, "source": "source/62e2bd535687096969004656", "source_status": true, "sql_output_fields": [], "statisticaltest": null, "status": {"bytes": 26191, "code": 5, "elapsed": 1203, "extracted_count": 0, "field_errors": {}, "message": "The dataset has been created", "progress": 1, "row_format_errors": {"total": 0}, "serialized_rows": 768}, "subscription": true, "tags": [], "tde_download": {"code": 0, "excluded_input_fields": [], "input_fields": [], "message": "", "preview": []}, "term_limit": 1000, "timeseries": null, "timeseries_status": false, "type": 0, "updated": "2022-07-28T16:46:16.530000"}, "error": null} \ No newline at end of file diff --git a/bigml/tests/my_dataset/dataset_62e2bd65d432eb563000442e b/bigml/tests/my_dataset/dataset_62e2bd65d432eb563000442e new file mode 100644 index 00000000..0c24d0dd --- /dev/null +++ b/bigml/tests/my_dataset/dataset_62e2bd65d432eb563000442e @@ -0,0 +1 @@ +{"code": 200, "resource": "dataset/62e2bd65d432eb563000442e", "location": "https://bigml.io/andromeda/dataset/62e2bd65d432eb563000442e", "object": {"all_fields": true, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 9, "configuration": null, "configuration_status": false, "correlations": {}, "created": "2022-07-28T16:46:29.076000", "creator": "mmartin", "dataset_origin_status": true, "description": "", "download": {"code": 0, "decimal_separator": ".", "excluded_input_fields": [], "header": true, "input_fields": [], "message": "", "new_line": "lf", "preview": [], "separator": ","}, "evaluation": null, "excluded_fields": [], "field_types": {"categorical": 1, "datetime": 0, "image": 0, "items": 0, "numeric": 8, "path": 0, "preferred": 9, "regions": 0, "text": 0, "total": 9}, "fields": {"000000": {"column_number": 0, "datatype": "int8", "name": "pregnancies", "optype": "numeric", "order": 0, "preferred": true, "summary": {"counts": [[0, 25], [1, 25], [2, 23], [3, 19], [4, 10], [5, 12], [6, 10], [7, 8], [8, 3], [9, 6], [10, 8], [11, 2], [13, 2], [14, 1]], "exact_histogram": {"populations": [50, 42, 22, 18, 9, 10, 2, 1], "start": 0, "width": 2}, "kurtosis": 0.16147, "maximum": 14, "mean": 3.66234, "median": 3, "minimum": 0, "missing_count": 0, "population": 154, "skewness": 0.95878, "standard_deviation": 3.32198, "sum": 564, "sum_squares": 3754, "variance": 11.03557}}, "000001": {"column_number": 1, "datatype": "int16", "name": "plasma glucose", "optype": "numeric", "order": 1, "preferred": true, "summary": {"bins": [[56, 1], [72, 1], [77.33333, 3], [81, 3], [83.71429, 7], [87.8, 5], [91.42857, 7], [95.14286, 7], [99.63636, 11], [102.6, 5], [107, 13], [111.22222, 9], [115.22222, 9], [120.25, 4], [123.5, 14], [128.81818, 11], [132.25, 4], [135, 3], [138.25, 4], [141, 1], [143.66667, 3], [146.42857, 7], [150.5, 2], [154.66667, 3], [161.5, 2], [164, 1], [170.5, 2], [176, 3], [179.66667, 3], [184, 2], [189, 1], [194.33333, 3]], "exact_histogram": {"populations": [1, 0, 0, 1, 3, 9, 6, 10, 9, 11, 13, 13, 6, 14, 10, 8, 7, 4, 7, 4, 1, 3, 0, 2, 4, 4, 1, 2, 1], "start": 55, "width": 5}, "kurtosis": 0.04986, "maximum": 196, "mean": 120.01948, "median": 115.5, "minimum": 56, "missing_count": 0, "population": 154, "skewness": 0.67008, "standard_deviation": 28.73919, "sum": 18483, "sum_squares": 2344689, "variance": 825.94079}}, "000002": {"column_number": 2, "datatype": "int8", "name": "blood pressure", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[0, 7], [24, 1], [48, 2], [52, 2], [54, 4], [56, 3], [58, 2], [60, 8], [61, 1], [62, 11], [64, 11], [65, 1], [66, 6], [68, 7], [70, 14], [72, 7], [74, 7], [75, 1], [76, 7], [78, 11], [80, 6], [82, 6], [84, 2], [85, 1], [86, 6], [88, 8], [90, 3], [92, 4], [94, 2], [100, 1], [104, 1], [110, 1]], "exact_histogram": {"populations": [7, 0, 0, 0, 1, 0, 0, 0, 0, 2, 6, 5, 31, 14, 28, 19, 14, 15, 9, 0, 2, 0, 1], "start": 0, "width": 5}, "kurtosis": 5.20109, "maximum": 110, "mean": 69.09091, "median": 70, "minimum": 0, "missing_count": 0, "population": 154, "skewness": -1.89704, "standard_deviation": 19.33684, "sum": 10640, "sum_squares": 792336, "variance": 373.91325}}, "000003": {"column_number": 3, "datatype": "int8", "name": "triceps skin thickness", "optype": "numeric", "order": 3, "preferred": true, "summary": {"bins": [[0, 38], [8, 1], [10.5, 4], [12.25, 4], [15.4, 5], [17, 1], [18.5, 10], [20.5, 6], [23, 7], [25.2, 10], [27.5, 10], [29, 4], [30, 9], [31, 5], [32, 4], [33, 3], [34, 2], [35, 2], [36, 5], [37, 2], [38, 1], [39, 5], [40, 1], [41, 2], [42, 3], [43, 1], [44, 1], [46, 3], [49, 1], [50, 1], [54, 2], [60, 1]], "exact_histogram": {"populations": [38, 0, 0, 0, 1, 4, 4, 3, 3, 10, 6, 7, 8, 7, 9, 14, 7, 4, 7, 6, 3, 4, 1, 3, 1, 1, 0, 2, 0, 0, 1], "start": 0, "width": 2}, "kurtosis": -0.89167, "maximum": 60, "mean": 21.57143, "median": 25, "minimum": 0, "missing_count": 0, "population": 154, "skewness": -0.05392, "standard_deviation": 15.31194, "sum": 3322, "sum_squares": 107532, "variance": 234.45565}}, "000004": {"column_number": 4, "datatype": "int16", "name": "insulin", "optype": "numeric", "order": 4, "preferred": true, "summary": {"bins": [[0, 79], [18, 1], [24, 2], [42, 4], [47, 3], [58, 2], [67, 4], [76.66667, 6], [82, 1], [94, 2], [100, 2], [106.875, 8], [114.33333, 3], [120, 1], [125, 1], [132.33333, 3], [140, 2], [146, 1], [156.85714, 7], [165, 3], [170, 1], [176, 1], [181.75, 4], [194, 1], [228, 1], [249.5, 2], [277.5, 2], [284.66667, 3], [300, 1], [360, 1], [402, 1], [478, 1]], "exact_histogram": {"populations": [80, 2, 8, 11, 3, 13, 5, 8, 7, 5, 0, 1, 2, 2, 3, 1, 0, 0, 1, 0, 1, 0, 0, 1], "start": 0, "width": 20}, "kurtosis": 2.76998, "maximum": 478, "mean": 68.12338, "median": 0, "minimum": 0, "missing_count": 0, "population": 154, "skewness": 1.62114, "standard_deviation": 93.77705, "sum": 10491, "sum_squares": 2060185, "variance": 8794.13501}}, "000005": {"column_number": 5, "datatype": "double", "name": "bmi", "optype": "numeric", "order": 5, "preferred": true, "summary": {"bins": [[0, 2], [18.2, 1], [19.5, 2], [20.15, 2], [21.05, 2], [21.8, 2], [22.65, 4], [23.325, 4], [24.35, 4], [25.4, 6], [26.175, 4], [27.44667, 15], [28.71667, 6], [29.81111, 9], [31.31667, 6], [32.2, 9], [33.06667, 3], [34.12353, 17], [35.05, 4], [35.66667, 6], [36.68571, 14], [38.16667, 6], [39.24, 5], [40.3, 2], [42.14, 5], [43.1, 6], [44.1, 2], [46.1, 1], [46.8, 1], [49.45, 2], [52.3, 1], [67.1, 1]], "exact_histogram": {"populations": [2, 0, 0, 4, 17, 36, 40, 34, 15, 4, 1, 0, 0, 1], "start": 0, "width": 5}, "kurtosis": 3.57751, "maximum": 67.1, "mean": 32.29351, "median": 32.4, "minimum": 0, "missing_count": 0, "population": 154, "skewness": -0.11158, "standard_deviation": 8.21685, "sum": 4973.2, "sum_squares": 170932.12, "variance": 67.51669}}, "000006": {"column_number": 6, "datatype": "double", "name": "diabetes pedigree", "optype": "numeric", "order": 6, "preferred": true, "summary": {"bins": [[0.111, 3], [0.15418, 11], [0.19875, 16], [0.24761, 23], [0.27833, 6], [0.309, 4], [0.33375, 12], [0.3698, 15], [0.39933, 3], [0.426, 1], [0.45025, 8], [0.48875, 4], [0.52225, 4], [0.551, 1], [0.57383, 6], [0.598, 3], [0.633, 2], [0.6606, 5], [0.6895, 2], [0.733, 4], [0.76367, 3], [0.787, 1], [0.816, 1], [0.838, 2], [0.878, 2], [0.933, 1], [0.95825, 4], [1.0645, 2], [1.098, 2], [1.189, 1], [1.224, 1], [1.893, 1]], "exact_histogram": {"populations": [21, 39, 31, 15, 13, 10, 8, 5, 5, 3, 2, 1, 0, 0, 0, 0, 0, 1], "start": 0.1, "width": 0.1}, "kurtosis": 4.04452, "maximum": 1.893, "mean": 0.44171, "median": 0.3565, "minimum": 0.1, "missing_count": 0, "population": 154, "skewness": 1.65698, "standard_deviation": 0.28009, "sum": 68.023, "sum_squares": 42.04958, "variance": 0.07845}}, "000007": {"column_number": 7, "datatype": "int8", "name": "age", "optype": "numeric", "order": 7, "preferred": true, "summary": {"bins": [[21.375, 24], [23.5, 18], [25.40909, 22], [27.5, 16], [29.42857, 14], [31, 6], [32, 5], [33, 1], [34, 4], [35, 1], [36, 1], [37, 5], [38, 3], [40, 5], [41, 5], [42, 4], [43, 2], [44, 1], [45, 3], [47, 1], [48, 1], [49, 1], [51, 1], [52, 1], [54, 1], [55, 1], [56, 1], [58, 2], [62, 1], [65, 1], [66, 1], [69, 1]], "exact_histogram": {"populations": [15, 18, 22, 17, 16, 12, 6, 5, 6, 3, 10, 6, 4, 1, 2, 1, 1, 2, 1, 2, 0, 1, 1, 1, 1], "start": 20, "width": 2}, "kurtosis": 1.75384, "maximum": 69, "mean": 31.55844, "median": 28, "minimum": 21, "missing_count": 0, "population": 154, "skewness": 1.43278, "standard_deviation": 10.43498, "sum": 4860, "sum_squares": 170034, "variance": 108.88872}}, "000008": {"column_number": 8, "datatype": "string", "name": "diabetes", "optype": "categorical", "order": 8, "preferred": true, "summary": {"categories": [["false", 106], ["true", 48]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "fields_meta": {"count": 9, "effective_fields": 9, "limit": -1, "offset": 0, "parent_optypes": {}, "preferred": 9, "provenances": {}, "query_total": 9, "total": 9}, "input_fields": ["000000", "000001", "000002", "000003", "000004", "000005", "000006", "000007", "000008"], "juxtapose": false, "locale": "en_US", "missing_numeric_rows": 0, "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "name": "diabetes", "name_options": "154 instances, 9 fields (1 categorical, 8 numeric), sample rate=0.2, out of bag", "new_fields": [], "number_of_anomalies": 0, "number_of_anomalyscores": 0, "number_of_associations": 0, "number_of_associationsets": 0, "number_of_batchanomalyscores": 0, "number_of_batchcentroids": 0, "number_of_batchpredictions": 0, "number_of_batchprojections": 0, "number_of_batchtopicdistributions": 0, "number_of_centroids": 0, "number_of_clusters": 0, "number_of_correlations": 0, "number_of_deepnets": 0, "number_of_ensembles": 0, "number_of_evaluations": 1, "number_of_forecasts": 0, "number_of_linearregressions": 0, "number_of_logisticregressions": 0, "number_of_models": 0, "number_of_optimls": 0, "number_of_pca": 0, "number_of_predictions": 0, "number_of_projections": 0, "number_of_statisticaltests": 0, "number_of_timeseries": 0, "number_of_topicdistributions": 0, "number_of_topicmodels": 0, "objective_field": {"column_number": 8, "datatype": "string", "id": "000008", "name": "diabetes", "optype": "categorical", "order": 8, "term_analysis": {"enabled": true}}, "optiml": null, "optiml_status": false, "origin_batch_dataset": null, "origin_batch_dataset_status": false, "origin_batch_model": null, "origin_batch_model_status": false, "origin_batch_resource": null, "origin_batch_status": false, "origin_dataset": "dataset/62e2bd555687096969004659", "out_of_bag": true, "output_fields": [], "price": 0.0, "private": true, "project": null, "refresh_field_types": false, "refresh_objective": false, "refresh_preferred": false, "replacement": false, "resource": "dataset/62e2bd65d432eb563000442e", "row_offset": 0, "row_step": 1, "rows": 154, "sample_rate": 0.8, "seed": "bigml", "shared": false, "size": 5251, "source": "source/62e2bd535687096969004656", "source_status": true, "sql_output_fields": [], "statisticaltest": null, "status": {"bytes": 26191, "code": 5, "elapsed": 1017, "extracted_count": 0, "field_errors": {}, "message": "The dataset has been created", "progress": 1, "row_format_errors": {"total": 0}, "serialized_rows": 154}, "subscription": true, "tags": [], "tde_download": {"code": 0, "excluded_input_fields": [], "input_fields": [], "message": "", "preview": []}, "term_limit": 1000, "timeseries": null, "timeseries_status": false, "type": 0, "updated": "2022-07-28T16:46:40.984000"}, "error": null} \ No newline at end of file diff --git a/bigml/tests/my_dataset/dataset_62e464778be2aa335a001548 b/bigml/tests/my_dataset/dataset_62e464778be2aa335a001548 new file mode 100644 index 00000000..29dfe07c --- /dev/null +++ b/bigml/tests/my_dataset/dataset_62e464778be2aa335a001548 @@ -0,0 +1 @@ +{"code": 200, "resource": "dataset/62e464778be2aa335a001548", "location": "https://bigml.io/andromeda/dataset/62e464778be2aa335a001548", "object": {"all_but": ["000004"], "all_fields": false, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 9, "configuration": null, "configuration_status": false, "correlations": {}, "created": "2022-07-29T22:51:35.732000", "creator": "mmartin", "dataset_origin_status": true, "description": "", "download": {"code": 0, "decimal_separator": ".", "excluded_input_fields": [], "header": true, "input_fields": [], "message": "", "new_line": "lf", "preview": [], "separator": ","}, "evaluation": null, "excluded_fields": [], "field_types": {"categorical": 2, "datetime": 0, "image": 0, "items": 0, "numeric": 7, "path": 0, "preferred": 9, "regions": 0, "text": 0, "total": 9}, "fields": {"000000": {"column_number": 0, "datatype": "int8", "generated": false, "name": "pregnancies", "optype": "numeric", "order": 0, "preferred": true, "summary": {"counts": [[0, 25], [1, 25], [2, 23], [3, 19], [4, 10], [5, 12], [6, 10], [7, 8], [8, 3], [9, 6], [10, 8], [11, 2], [13, 2], [14, 1]], "exact_histogram": {"populations": [50, 42, 22, 18, 9, 10, 2, 1], "start": 0, "width": 2}, "kurtosis": 0.16147, "maximum": 14, "mean": 3.66234, "median": 3, "minimum": 0, "missing_count": 0, "population": 154, "skewness": 0.95878, "standard_deviation": 3.32198, "sum": 564, "sum_squares": 3754, "variance": 11.03557}}, "000001": {"column_number": 1, "datatype": "int16", "generated": false, "name": "plasma glucose", "optype": "numeric", "order": 1, "preferred": true, "summary": {"bins": [[56, 1], [72, 1], [77.33333, 3], [81, 3], [83.71429, 7], [87.8, 5], [91.42857, 7], [95.14286, 7], [99.63636, 11], [102.6, 5], [107, 13], [111.22222, 9], [115.22222, 9], [120.25, 4], [123.5, 14], [128.81818, 11], [132.25, 4], [135, 3], [138.25, 4], [141, 1], [143.66667, 3], [146.42857, 7], [150.5, 2], [154.66667, 3], [161.5, 2], [164, 1], [170.5, 2], [176, 3], [179.66667, 3], [184, 2], [189, 1], [194.33333, 3]], "exact_histogram": {"populations": [1, 0, 0, 1, 3, 9, 6, 10, 9, 11, 13, 13, 6, 14, 10, 8, 7, 4, 7, 4, 1, 3, 0, 2, 4, 4, 1, 2, 1], "start": 55, "width": 5}, "kurtosis": 0.04986, "maximum": 196, "mean": 120.01948, "median": 115.5, "minimum": 56, "missing_count": 0, "population": 154, "skewness": 0.67008, "standard_deviation": 28.73919, "sum": 18483, "sum_squares": 2344689, "variance": 825.94079}}, "000002": {"column_number": 2, "datatype": "int8", "generated": false, "name": "blood pressure", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[0, 7], [24, 1], [48, 2], [52, 2], [54, 4], [56, 3], [58, 2], [60, 8], [61, 1], [62, 11], [64, 11], [65, 1], [66, 6], [68, 7], [70, 14], [72, 7], [74, 7], [75, 1], [76, 7], [78, 11], [80, 6], [82, 6], [84, 2], [85, 1], [86, 6], [88, 8], [90, 3], [92, 4], [94, 2], [100, 1], [104, 1], [110, 1]], "exact_histogram": {"populations": [7, 0, 0, 0, 1, 0, 0, 0, 0, 2, 6, 5, 31, 14, 28, 19, 14, 15, 9, 0, 2, 0, 1], "start": 0, "width": 5}, "kurtosis": 5.20109, "maximum": 110, "mean": 69.09091, "median": 70, "minimum": 0, "missing_count": 0, "population": 154, "skewness": -1.89704, "standard_deviation": 19.33684, "sum": 10640, "sum_squares": 792336, "variance": 373.91325}}, "000003": {"column_number": 3, "datatype": "int8", "generated": false, "name": "triceps skin thickness", "optype": "numeric", "order": 3, "preferred": true, "summary": {"bins": [[0, 38], [8, 1], [10.5, 4], [12.25, 4], [15.4, 5], [17, 1], [18.5, 10], [20.5, 6], [23, 7], [25.2, 10], [27.5, 10], [29, 4], [30, 9], [31, 5], [32, 4], [33, 3], [34, 2], [35, 2], [36, 5], [37, 2], [38, 1], [39, 5], [40, 1], [41, 2], [42, 3], [43, 1], [44, 1], [46, 3], [49, 1], [50, 1], [54, 2], [60, 1]], "exact_histogram": {"populations": [38, 0, 0, 0, 1, 4, 4, 3, 3, 10, 6, 7, 8, 7, 9, 14, 7, 4, 7, 6, 3, 4, 1, 3, 1, 1, 0, 2, 0, 0, 1], "start": 0, "width": 2}, "kurtosis": -0.89167, "maximum": 60, "mean": 21.57143, "median": 25, "minimum": 0, "missing_count": 0, "population": 154, "skewness": -0.05392, "standard_deviation": 15.31194, "sum": 3322, "sum_squares": 107532, "variance": 234.45565}}, "000005": {"column_number": 4, "datatype": "double", "generated": false, "name": "bmi", "optype": "numeric", "order": 4, "preferred": true, "summary": {"bins": [[0, 2], [18.2, 1], [19.5, 2], [20.15, 2], [21.05, 2], [21.8, 2], [22.65, 4], [23.325, 4], [24.35, 4], [25.4, 6], [26.175, 4], [27.44667, 15], [28.71667, 6], [29.81111, 9], [31.31667, 6], [32.2, 9], [33.06667, 3], [34.12353, 17], [35.05, 4], [35.66667, 6], [36.68571, 14], [38.16667, 6], [39.24, 5], [40.3, 2], [42.14, 5], [43.1, 6], [44.1, 2], [46.1, 1], [46.8, 1], [49.45, 2], [52.3, 1], [67.1, 1]], "exact_histogram": {"populations": [2, 0, 0, 4, 17, 36, 40, 34, 15, 4, 1, 0, 0, 1], "start": 0, "width": 5}, "kurtosis": 3.57751, "maximum": 67.1, "mean": 32.29351, "median": 32.4, "minimum": 0, "missing_count": 0, "population": 154, "skewness": -0.11158, "standard_deviation": 8.21685, "sum": 4973.2, "sum_squares": 170932.12, "variance": 67.51669}}, "000006": {"column_number": 5, "datatype": "double", "generated": false, "name": "diabetes pedigree", "optype": "numeric", "order": 5, "preferred": true, "summary": {"bins": [[0.111, 3], [0.15418, 11], [0.19875, 16], [0.24761, 23], [0.27833, 6], [0.309, 4], [0.33375, 12], [0.3698, 15], [0.39933, 3], [0.426, 1], [0.45025, 8], [0.48875, 4], [0.52225, 4], [0.551, 1], [0.57383, 6], [0.598, 3], [0.633, 2], [0.6606, 5], [0.6895, 2], [0.733, 4], [0.76367, 3], [0.787, 1], [0.816, 1], [0.838, 2], [0.878, 2], [0.933, 1], [0.95825, 4], [1.0645, 2], [1.098, 2], [1.189, 1], [1.224, 1], [1.893, 1]], "exact_histogram": {"populations": [21, 39, 31, 15, 13, 10, 8, 5, 5, 3, 2, 1, 0, 0, 0, 0, 0, 1], "start": 0.1, "width": 0.1}, "kurtosis": 4.04452, "maximum": 1.893, "mean": 0.44171, "median": 0.3565, "minimum": 0.1, "missing_count": 0, "population": 154, "skewness": 1.65698, "standard_deviation": 0.28009, "sum": 68.023, "sum_squares": 42.04958, "variance": 0.07845}}, "000007": {"column_number": 6, "datatype": "int8", "generated": false, "name": "age", "optype": "numeric", "order": 6, "preferred": true, "summary": {"bins": [[21.375, 24], [23.5, 18], [25.40909, 22], [27.5, 16], [29.42857, 14], [31, 6], [32, 5], [33, 1], [34, 4], [35, 1], [36, 1], [37, 5], [38, 3], [40, 5], [41, 5], [42, 4], [43, 2], [44, 1], [45, 3], [47, 1], [48, 1], [49, 1], [51, 1], [52, 1], [54, 1], [55, 1], [56, 1], [58, 2], [62, 1], [65, 1], [66, 1], [69, 1]], "exact_histogram": {"populations": [15, 18, 22, 17, 16, 12, 6, 5, 6, 3, 10, 6, 4, 1, 2, 1, 1, 2, 1, 2, 0, 1, 1, 1, 1], "start": 20, "width": 2}, "kurtosis": 1.75384, "maximum": 69, "mean": 31.55844, "median": 28, "minimum": 21, "missing_count": 0, "population": 154, "skewness": 1.43278, "standard_deviation": 10.43498, "sum": 4860, "sum_squares": 170034, "variance": 108.88872}}, "000008": {"column_number": 7, "datatype": "string", "generated": false, "name": "diabetes", "optype": "categorical", "order": 7, "preferred": true, "summary": {"categories": [["false", 106], ["true", 48]], "missing_count": 0}, "term_analysis": {"enabled": true}}, "100008": {"column_number": 8, "datatype": "string", "description": "", "generated": true, "label": "", "name": "age_range", "optype": "categorical", "order": 8, "preferred": true, "provenance": "flatline", "summary": {"categories": [["1st third", 55], ["2nd third", 50], ["3rd third", 49]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "fields_meta": {"count": 9, "effective_fields": 9, "limit": -1, "offset": 0, "parent_optypes": {}, "preferred": 9, "provenances": {"flatline": 1}, "query_total": 9, "total": 9}, "input_fields": ["000000", "000001", "000002", "000003", "000004", "000005", "000006", "000007", "000008"], "juxtapose": false, "locale": "en_US", "missing_numeric_rows": 0, "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "name": "diabetes [extended]", "name_options": "154 instances, 9 fields (2 categorical, 7 numeric)", "new_fields": [{"description": "", "generator": ["percentile-label", "age", "1st third", "2nd third", "3rd third"], "label": "", "name": "age_range"}], "number_of_anomalies": 0, "number_of_anomalyscores": 0, "number_of_associations": 0, "number_of_associationsets": 0, "number_of_batchanomalyscores": 0, "number_of_batchcentroids": 0, "number_of_batchpredictions": 0, "number_of_batchprojections": 0, "number_of_batchtopicdistributions": 0, "number_of_centroids": 0, "number_of_clusters": 0, "number_of_correlations": 0, "number_of_deepnets": 0, "number_of_ensembles": 0, "number_of_evaluations": 0, "number_of_forecasts": 0, "number_of_linearregressions": 0, "number_of_logisticregressions": 0, "number_of_models": 0, "number_of_optimls": 0, "number_of_pca": 0, "number_of_predictions": 0, "number_of_projections": 0, "number_of_statisticaltests": 0, "number_of_timeseries": 0, "number_of_topicdistributions": 0, "number_of_topicmodels": 0, "objective_field": {"column_number": 7, "datatype": "string", "generated": false, "id": "000008", "name": "diabetes", "optype": "categorical", "order": 7, "term_analysis": {"enabled": true}}, "optiml": null, "optiml_status": false, "origin_batch_dataset": null, "origin_batch_dataset_status": false, "origin_batch_model": null, "origin_batch_model_status": false, "origin_batch_resource": null, "origin_batch_status": false, "origin_dataset": "dataset/62e2bd65d432eb563000442e", "out_of_bag": false, "output_fields": [{"generator": "(all-but \"000004\")", "ids": ["000000", "000001", "000002", "000003", "000005", "000006", "000007", "000008"], "json_generator": ["all-but", "000004"], "names": ["pregnancies", "plasma glucose", "blood pressure", "triceps skin thickness", "bmi", "diabetes pedigree", "age", "diabetes"]}, {"description": "", "generator": "(percentile-label \"age\" \"1st third\" \"2nd third\" \"3rd third\")", "ids": ["100008"], "json_generator": ["percentile-label", "age", "1st third", "2nd third", "3rd third"], "label": "", "name": "age_range", "names": ["age_range"]}], "price": 0.0, "private": true, "project": null, "refresh_field_types": false, "refresh_objective": false, "refresh_preferred": false, "replacement": false, "resource": "dataset/62e464778be2aa335a001548", "row_offset": 0, "row_step": 1, "rows": 154, "sample_rate": 1.0, "shared": false, "size": 6006, "source": "source/62e2bd535687096969004656", "source_status": true, "sql_output_fields": [], "statisticaltest": null, "status": {"bytes": 5251, "code": 5, "elapsed": 1488, "extracted_count": 0, "field_errors": {}, "message": "The dataset has been created", "progress": 1, "row_format_errors": {"total": 0}, "serialized_rows": 154}, "subscription": true, "tags": [], "tde_download": {"code": 0, "excluded_input_fields": [], "input_fields": [], "message": "", "preview": []}, "term_limit": 1000, "timeseries": null, "timeseries_status": false, "type": 0, "updated": "2022-07-29T22:51:38.889000"}, "error": null} \ No newline at end of file diff --git a/bigml/tests/my_dataset/my_flatline_ds.json b/bigml/tests/my_dataset/my_flatline_ds.json new file mode 100644 index 00000000..d7dcfcad --- /dev/null +++ b/bigml/tests/my_dataset/my_flatline_ds.json @@ -0,0 +1 @@ +{"code": 200, "resource": "dataset/62e954f3aba2df1257001252", "location": "https://bigml.io/andromeda/dataset/62e954f3aba2df1257001252", "object": {"all_but": ["000005"], "all_fields": false, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 9, "configuration": null, "configuration_status": false, "correlations": {}, "created": "2022-08-02T16:46:43.382000", "creator": "mmartin", "dataset_origin_status": true, "description": "", "download": {"code": 0, "decimal_separator": ".", "excluded_input_fields": [], "header": true, "input_fields": [], "message": "", "new_line": "lf", "preview": [], "separator": ","}, "evaluation": null, "excluded_fields": [], "field_types": {"categorical": 2, "datetime": 0, "image": 0, "items": 0, "numeric": 7, "path": 0, "preferred": 9, "regions": 0, "text": 0, "total": 9}, "fields": {"000000": {"column_number": 0, "datatype": "int8", "generated": false, "name": "pregnancies", "optype": "numeric", "order": 0, "preferred": true, "summary": {"counts": [[0, 25], [1, 25], [2, 23], [3, 19], [4, 10], [5, 12], [6, 10], [7, 8], [8, 3], [9, 6], [10, 8], [11, 2], [13, 2], [14, 1]], "exact_histogram": {"populations": [50, 42, 22, 18, 9, 10, 2, 1], "start": 0, "width": 2}, "kurtosis": 0.16147, "maximum": 14, "mean": 3.66234, "median": 3, "minimum": 0, "missing_count": 0, "population": 154, "skewness": 0.95878, "standard_deviation": 3.32198, "sum": 564, "sum_squares": 3754, "variance": 11.03557}}, "000001": {"column_number": 1, "datatype": "int16", "generated": false, "name": "plasma glucose", "optype": "numeric", "order": 1, "preferred": true, "summary": {"bins": [[56, 1], [72, 1], [77.33333, 3], [81, 3], [83.71429, 7], [87.8, 5], [91.42857, 7], [95.14286, 7], [99.63636, 11], [102.6, 5], [107, 13], [111.22222, 9], [115.22222, 9], [120.25, 4], [123.5, 14], [128.81818, 11], [132.25, 4], [135, 3], [138.25, 4], [141, 1], [143.66667, 3], [146.42857, 7], [150.5, 2], [154.66667, 3], [161.5, 2], [164, 1], [170.5, 2], [176, 3], [179.66667, 3], [184, 2], [189, 1], [194.33333, 3]], "exact_histogram": {"populations": [1, 0, 0, 1, 3, 9, 6, 10, 9, 11, 13, 13, 6, 14, 10, 8, 7, 4, 7, 4, 1, 3, 0, 2, 4, 4, 1, 2, 1], "start": 55, "width": 5}, "kurtosis": 0.04986, "maximum": 196, "mean": 120.01948, "median": 115.5, "minimum": 56, "missing_count": 0, "population": 154, "skewness": 0.67008, "standard_deviation": 28.73919, "sum": 18483, "sum_squares": 2344689, "variance": 825.94079}}, "000002": {"column_number": 2, "datatype": "int8", "generated": false, "name": "blood pressure", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[0, 7], [24, 1], [48, 2], [52, 2], [54, 4], [56, 3], [58, 2], [60, 8], [61, 1], [62, 11], [64, 11], [65, 1], [66, 6], [68, 7], [70, 14], [72, 7], [74, 7], [75, 1], [76, 7], [78, 11], [80, 6], [82, 6], [84, 2], [85, 1], [86, 6], [88, 8], [90, 3], [92, 4], [94, 2], [100, 1], [104, 1], [110, 1]], "exact_histogram": {"populations": [7, 0, 0, 0, 1, 0, 0, 0, 0, 2, 6, 5, 31, 14, 28, 19, 14, 15, 9, 0, 2, 0, 1], "start": 0, "width": 5}, "kurtosis": 5.20109, "maximum": 110, "mean": 69.09091, "median": 70, "minimum": 0, "missing_count": 0, "population": 154, "skewness": -1.89704, "standard_deviation": 19.33684, "sum": 10640, "sum_squares": 792336, "variance": 373.91325}}, "000003": {"column_number": 3, "datatype": "int8", "generated": false, "name": "triceps skin thickness", "optype": "numeric", "order": 3, "preferred": true, "summary": {"bins": [[0, 38], [8, 1], [10.5, 4], [12.25, 4], [15.4, 5], [17, 1], [18.5, 10], [20.5, 6], [23, 7], [25.2, 10], [27.5, 10], [29, 4], [30, 9], [31, 5], [32, 4], [33, 3], [34, 2], [35, 2], [36, 5], [37, 2], [38, 1], [39, 5], [40, 1], [41, 2], [42, 3], [43, 1], [44, 1], [46, 3], [49, 1], [50, 1], [54, 2], [60, 1]], "exact_histogram": {"populations": [38, 0, 0, 0, 1, 4, 4, 3, 3, 10, 6, 7, 8, 7, 9, 14, 7, 4, 7, 6, 3, 4, 1, 3, 1, 1, 0, 2, 0, 0, 1], "start": 0, "width": 2}, "kurtosis": -0.89167, "maximum": 60, "mean": 21.57143, "median": 25, "minimum": 0, "missing_count": 0, "population": 154, "skewness": -0.05392, "standard_deviation": 15.31194, "sum": 3322, "sum_squares": 107532, "variance": 234.45565}}, "000006": {"column_number": 4, "datatype": "double", "generated": false, "name": "diabetes pedigree", "optype": "numeric", "order": 4, "preferred": true, "summary": {"bins": [[0.111, 3], [0.15418, 11], [0.19875, 16], [0.24761, 23], [0.27833, 6], [0.309, 4], [0.33375, 12], [0.3698, 15], [0.39933, 3], [0.426, 1], [0.45025, 8], [0.48875, 4], [0.52225, 4], [0.551, 1], [0.57383, 6], [0.598, 3], [0.633, 2], [0.6606, 5], [0.6895, 2], [0.733, 4], [0.76367, 3], [0.787, 1], [0.816, 1], [0.838, 2], [0.878, 2], [0.933, 1], [0.95825, 4], [1.0645, 2], [1.098, 2], [1.189, 1], [1.224, 1], [1.893, 1]], "exact_histogram": {"populations": [21, 39, 31, 15, 13, 10, 8, 5, 5, 3, 2, 1, 0, 0, 0, 0, 0, 1], "start": 0.1, "width": 0.1}, "kurtosis": 4.04452, "maximum": 1.893, "mean": 0.44171, "median": 0.3565, "minimum": 0.1, "missing_count": 0, "population": 154, "skewness": 1.65698, "standard_deviation": 0.28009, "sum": 68.023, "sum_squares": 42.04958, "variance": 0.07845}}, "000007": {"column_number": 5, "datatype": "int8", "generated": false, "name": "age", "optype": "numeric", "order": 5, "preferred": true, "summary": {"bins": [[21.375, 24], [23.5, 18], [25.40909, 22], [27.5, 16], [29.42857, 14], [31, 6], [32, 5], [33, 1], [34, 4], [35, 1], [36, 1], [37, 5], [38, 3], [40, 5], [41, 5], [42, 4], [43, 2], [44, 1], [45, 3], [47, 1], [48, 1], [49, 1], [51, 1], [52, 1], [54, 1], [55, 1], [56, 1], [58, 2], [62, 1], [65, 1], [66, 1], [69, 1]], "exact_histogram": {"populations": [15, 18, 22, 17, 16, 12, 6, 5, 6, 3, 10, 6, 4, 1, 2, 1, 1, 2, 1, 2, 0, 1, 1, 1, 1], "start": 20, "width": 2}, "kurtosis": 1.75384, "maximum": 69, "mean": 31.55844, "median": 28, "minimum": 21, "missing_count": 0, "population": 154, "skewness": 1.43278, "standard_deviation": 10.43498, "sum": 4860, "sum_squares": 170034, "variance": 108.88872}}, "000008": {"column_number": 6, "datatype": "string", "generated": false, "name": "diabetes", "optype": "categorical", "order": 6, "preferred": true, "summary": {"categories": [["false", 106], ["true", 48]], "missing_count": 0}, "term_analysis": {"enabled": true}}, "100008": {"column_number": 7, "datatype": "string", "description": "", "generated": true, "label": "", "name": "age_range", "optype": "categorical", "order": 7, "preferred": true, "provenance": "flatline", "summary": {"categories": [["1st third", 55], ["2nd third", 50], ["3rd third", 49]], "missing_count": 0}, "term_analysis": {"enabled": true}}, "100009": {"column_number": 8, "datatype": "double", "description": "", "generated": true, "label": "", "name": "glucose half", "optype": "numeric", "order": 8, "preferred": true, "provenance": "flatline", "summary": {"bins": [[28, 1], [36, 1], [38.66667, 3], [40.5, 3], [41.85714, 7], [43.9, 5], [45.71429, 7], [47.57143, 7], [49.81818, 11], [51.3, 5], [53.5, 13], [55.61111, 9], [57.61111, 9], [60.125, 4], [61.75, 14], [64.40909, 11], [66.125, 4], [67.5, 3], [69.125, 4], [70.5, 1], [71.83333, 3], [73.21429, 7], [75.25, 2], [77.33333, 3], [80.75, 2], [82, 1], [85.25, 2], [88, 3], [89.83333, 3], [92, 2], [94.5, 1], [97.16667, 3]], "exact_histogram": {"populations": [1, 0, 4, 15, 19, 24, 19, 24, 15, 11, 5, 3, 6, 5, 3], "start": 25, "width": 5}, "kurtosis": 0.04986, "maximum": 98, "mean": 60.00974, "median": 57.75, "minimum": 28, "missing_count": 0, "population": 154, "skewness": 0.67008, "standard_deviation": 14.36959, "sum": 9241.5, "sum_squares": 586172.25, "variance": 206.4852}}}, "fields_meta": {"count": 9, "effective_fields": 9, "limit": -1, "offset": 0, "parent_optypes": {}, "preferred": 9, "provenances": {"flatline": 2}, "query_total": 9, "total": 9}, "input_fields": ["000000", "000001", "000002", "000003", "000005", "000006", "000007", "000008", "100008"], "juxtapose": false, "locale": "en_US", "missing_numeric_rows": 0, "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "name": "diabetes transf test", "name_options": "154 instances, 9 fields (2 categorical, 7 numeric)", "new_fields": [{"description": "", "generator": "(/ (f \"plasma glucose\") 2)", "label": "", "name": "glucose half"}], "number_of_anomalies": 0, "number_of_anomalyscores": 0, "number_of_associations": 0, "number_of_associationsets": 0, "number_of_batchanomalyscores": 0, "number_of_batchcentroids": 0, "number_of_batchpredictions": 1, "number_of_batchprojections": 0, "number_of_batchtopicdistributions": 0, "number_of_centroids": 0, "number_of_clusters": 0, "number_of_correlations": 0, "number_of_deepnets": 0, "number_of_ensembles": 0, "number_of_evaluations": 0, "number_of_forecasts": 0, "number_of_linearregressions": 0, "number_of_logisticregressions": 0, "number_of_models": 1, "number_of_optimls": 0, "number_of_pca": 0, "number_of_predictions": 0, "number_of_projections": 0, "number_of_statisticaltests": 0, "number_of_timeseries": 0, "number_of_topicdistributions": 0, "number_of_topicmodels": 0, "objective_field": {"column_number": 6, "datatype": "string", "generated": false, "id": "000008", "name": "diabetes", "optype": "categorical", "order": 6, "term_analysis": {"enabled": true}}, "optiml": null, "optiml_status": false, "origin_batch_dataset": null, "origin_batch_dataset_status": false, "origin_batch_model": null, "origin_batch_model_status": false, "origin_batch_resource": null, "origin_batch_status": false, "origin_dataset": "dataset/62e464778be2aa335a001548", "out_of_bag": false, "output_fields": [{"generator": "(all-but \"000005\")", "ids": ["000000", "000001", "000002", "000003", "000006", "000007", "000008", "100008"], "json_generator": ["all-but", "000005"], "names": ["pregnancies", "plasma glucose", "blood pressure", "triceps skin thickness", "diabetes pedigree", "age", "diabetes", "age_range"]}, {"description": "", "generator": "(/ (f \"plasma glucose\") 2)", "ids": ["100009"], "json_generator": ["/", ["f", "plasma glucose"], 2], "label": "", "name": "glucose half", "names": ["glucose half"]}], "price": 0.0, "private": true, "project": null, "refresh_field_types": false, "refresh_objective": false, "refresh_preferred": false, "replacement": false, "resource": "dataset/62e954f3aba2df1257001252", "row_offset": 0, "row_step": 1, "rows": 154, "sample_rate": 1.0, "shared": false, "size": 6006, "source": "source/62e2bd535687096969004656", "source_status": true, "sql_output_fields": [], "statisticaltest": null, "status": {"bytes": 6006, "code": 5, "elapsed": 2366, "extracted_count": 0, "field_errors": {}, "message": "The dataset has been created", "progress": 1, "row_format_errors": {"total": 0}, "serialized_rows": 154}, "subscription": true, "tags": [], "tde_download": {"code": 0, "excluded_input_fields": [], "input_fields": [], "message": "", "preview": []}, "term_limit": 1000, "timeseries": null, "timeseries_status": false, "type": 0, "updated": "2022-08-03T15:19:48.959000"}, "error": null} \ No newline at end of file diff --git a/bigml/tests/my_ensemble/__init__.py b/bigml/tests/my_ensemble/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bigml/tests/my_ensemble/ensemble.json b/bigml/tests/my_ensemble/ensemble.json new file mode 100644 index 00000000..c0424c63 --- /dev/null +++ b/bigml/tests/my_ensemble/ensemble.json @@ -0,0 +1 @@ +{"code": 200, "resource": "ensemble/59db76e97e0a8d1f5f008c9e", "location": "https://bigml.io/andromeda/ensemble/59db76e97e0a8d1f5f008c9e", "object": {"size": 2152, "code": 200, "objective_field_name": "Final", "error_models": 0, "locale": "en_US", "node_threshold": 512, "objective_field": "000005", "private": true, "dataset": "dataset/59db76e77e0a8d1f5e004396", "dataset_field_types": {"effective_fields": 6, "categorical": 0, "items": 0, "preferred": 6, "datetime": 0, "numeric": 6, "text": 0, "total": 6}, "number_of_batchpredictions": 0, "seed": "BigML", "missing_splits": false, "white_box": false, "ensemble_sample": {"rate": 1, "seed": "bc42a7a4a1c842a0929ca1e197e1a2e2", "replacement": true}, "range": [1, 80], "distributions": [{"importance": [["000003", 0.72077], ["000002", 0.17078], ["000004", 0.07106], ["000001", 0.03241], ["000000", 0.00498]], "training": {}, "predictions": {"counts": [[-27.48979, 4], [-16.89263, 6], [-15.97477, 9], [-12.86316, 2], [-8.34478, 5], [-6.10379, 4], [-3.94987, 1], [-3.53487, 1], [-2.49316, 2], [-0.08649, 4], [1.88013, 1], [2.71513, 1], [3.55013, 1], [6.05013, 1], [9.17351, 2], [12.02522, 5], [14.56621, 4], [23.71468, 8]]}}, {"importance": [["000003", 0.79744], ["000004", 0.10113], ["000001", 0.05336], ["000002", 0.04806]], "training": {}, "predictions": {"counts": [[-18.93268, 6], [-14.0932, 4], [-13.82749, 3], [-13.76724, 5], [-7.01337, 2], [-4.37529, 5], [-3.41771, 2], [-1.74541, 1], [-0.79237, 1], [-0.71028, 2], [-0.08074, 2], [1.25414, 5], [3.43945, 2], [4.4972, 2], [8.56289, 2], [10.35668, 5], [23.88547, 12]]}}, {"importance": [["000003", 0.76807], ["000004", 0.12435], ["000002", 0.08334], ["000001", 0.02423]], "training": {}, "predictions": {"counts": [[-16.57693, 16], [-12.42279, 5], [-10.89428, 3], [-6.43719, 3], [-4.54968, 1], [-2.93235, 5], [-2.69614, 1], [-1.00567, 2], [-0.05667, 1], [0.63728, 1], [0.80228, 2], [2.41413, 1], [3.58837, 1], [4.62108, 2], [5.60188, 1], [9.023, 2], [15.26138, 10], [27.63444, 4]]}}], "balance_objective": false, "number_of_predictions": 1, "category": 0, "project": null, "objective_field_type": "numeric", "initial_offset": 68.45974, "out_of_bag": false, "source": "source/59db76e51333b35f220042db", "ordering": 0, "depth_threshold": 512, "support_threshold": 0.0, "tags": ["Final"], "name_options": "512-node, 3-iteration, sample rate=0.8 boosted trees", "fast": true, "credits_per_prediction": 0.0, "models": ["model/59db76eb9b356c2c97004802", "model/59db76eb9b356c2c97004804", "model/59db76eb9b356c2c97004806"], "number_of_public_predictions": 0, "sample_rate": 0.8, "number_of_models": 3, "objective_fields": ["000005"], "type": 1, "ensemble": {"fields": {"000004": {"optype": "numeric", "name": "TakeHome", "datatype": "double", "preferred": true, "summary": {"skewness": -1.03266, "missing_count": 1, "sum": 6222.17, "median": 86.11, "maximum": 107.41, "sum_squares": 536397.5363, "minimum": 16.91, "standard_deviation": 24.37138, "variance": 593.96395, "population": 79, "kurtosis": 0.23762, "exact_histogram": {"start": 15, "width": 5, "populations": [3, 3, 0, 0, 0, 2, 1, 4, 3, 8, 0, 4, 2, 6, 10, 7, 12, 9, 5]}, "bins": [[16.91, 2], [18.52, 1], [21.53, 2], [24.77, 1], [42.22, 2], [47.22, 1], [51.48, 2], [52.41, 1], [53.7, 1], [56.11, 1], [57.41, 2], [60.86333, 3], [63.63, 5], [72.4075, 4], [77.96, 2], [80.93, 1], [83.33, 5], [85.7425, 4], [87.71333, 3], [88.89, 2], [89.93667, 3], [91.48, 2], [93.61, 2], [94.44, 1], [95.93, 1], [97.35429, 7], [99.552, 5], [100.93, 3], [102.59, 2], [104.07333, 3], [105.99, 3], [107.41, 2]], "mean": 78.76165}, "column_number": 4, "order": 4}, "000005": {"optype": "numeric", "name": "Final", "datatype": "double", "preferred": true, "summary": {"skewness": 0.1639, "missing_count": 3, "sum": 5271.4, "median": 65.56, "maximum": 108.89, "sum_squares": 389814.3944, "minimum": 28.06, "standard_deviation": 19.51238, "variance": 380.73315, "population": 77, "kurtosis": -0.87237, "exact_histogram": {"start": 25, "width": 5, "populations": [1, 1, 3, 1, 9, 7, 6, 8, 9, 3, 4, 5, 6, 7, 2, 3, 2]}, "bins": [[28.06, 1], [34.44, 1], [35.97, 2], [39.72, 1], [43.33, 1], [45.93, 3], [47.78, 2], [49.0975, 4], [50.415, 4], [52.5, 3], [55.8325, 4], [58.055, 2], [60.975, 2], [63.332, 5], [65.60333, 6], [67.985, 4], [72.22, 2], [73.89, 1], [75.56, 1], [77.5, 1], [78.89, 2], [80.28, 2], [82.87, 3], [85.28, 2], [87.22, 1], [89.39, 5], [90.97, 2], [92.5, 2], [94.72, 2], [99.17, 1], [102.03667, 3], [108.335, 2]], "mean": 68.45974}, "column_number": 5, "order": 5}, "000002": {"optype": "numeric", "name": "Tutorial", "datatype": "double", "preferred": true, "summary": {"skewness": -1.58318, "missing_count": 0, "sum": 7172.96, "median": 93.37, "maximum": 112.58, "sum_squares": 661502.4374, "minimum": 34.09, "standard_deviation": 15.24504, "variance": 232.41137, "population": 80, "kurtosis": 2.97079, "exact_histogram": {"start": 30, "width": 5, "populations": [2, 0, 0, 0, 0, 3, 1, 3, 2, 1, 8, 13, 13, 13, 15, 5, 1]}, "bins": [[34.09, 2], [57.32, 1], [58.24, 2], [63.39, 1], [65.13333, 3], [70.24, 1], [74.66, 1], [76.21, 1], [80.6, 1], [81.93, 2], [83.094, 5], [86.65, 6], [87.56, 1], [88.205, 2], [88.91, 3], [90.01333, 3], [91.17, 2], [92.82667, 3], [94.01, 5], [94.89, 1], [95.63167, 6], [96.66, 1], [97.595, 4], [98.47, 1], [100.3925, 4], [101.935, 2], [102.78833, 6], [103.8575, 4], [105.53, 2], [106.53, 1], [108.84, 2], [112.58, 1]], "mean": 89.662}, "column_number": 2, "order": 2}, "000003": {"optype": "numeric", "name": "Midterm", "datatype": "double", "preferred": true, "summary": {"skewness": -0.01974, "missing_count": 0, "sum": 5431.89, "median": 69.38, "maximum": 110, "sum_squares": 401041.2915, "minimum": 28.12, "standard_deviation": 20.19632, "variance": 407.89151, "population": 80, "kurtosis": -0.82824, "exact_histogram": {"start": 25, "width": 5, "populations": [2, 2, 3, 5, 6, 6, 3, 8, 6, 11, 5, 3, 5, 7, 4, 2, 1, 1]}, "bins": [[28.435, 2], [30, 1], [32.5, 1], [37.5, 1], [39.37667, 3], [41.565, 2], [44.38, 2], [46.25, 2], [47.70667, 3], [49.38, 1], [51.875, 4], [54.58, 3], [56.25, 2], [60.9375, 4], [63.435, 4], [67.29333, 3], [69.502, 5], [72.29333, 6], [74.69, 6], [76.25, 1], [79.38, 1], [81.875, 2], [83.75, 1], [85.83333, 3], [88.75, 2], [90.83, 3], [92.965, 4], [95.62333, 3], [99.38, 1], [101.25, 2], [106.25, 1], [110, 1]], "mean": 67.89863}, "column_number": 3, "order": 3}, "000000": {"optype": "numeric", "name": "Prefix", "datatype": "int8", "preferred": true, "summary": {"skewness": -1.58551, "missing_count": 0, "sum": 590, "median": 8, "maximum": 8, "sum_squares": 4406, "minimum": 4, "standard_deviation": 0.83249, "variance": 0.69304, "counts": [[4, 1], [5, 2], [6, 6], [7, 28], [8, 43]], "population": 80, "kurtosis": 2.92325, "exact_histogram": {"start": 4, "width": 1, "populations": [1, 2, 6, 28, 43]}, "mean": 7.375}, "column_number": 0, "order": 0}, "000001": {"optype": "numeric", "name": "Assignment", "datatype": "double", "preferred": true, "summary": {"skewness": -1.63382, "missing_count": 0, "sum": 6776.45, "median": 89.995, "maximum": 100.83, "sum_squares": 587584.7701, "minimum": 28.14, "standard_deviation": 13.11166, "variance": 171.91567, "population": 80, "kurtosis": 3.27876, "exact_histogram": {"start": 25, "width": 5, "populations": [1, 0, 0, 0, 0, 1, 3, 2, 4, 5, 3, 12, 9, 22, 17, 1]}, "bins": [[28.14, 1], [53.36, 1], [55.14, 1], [57.14, 2], [63.59, 2], [66.17, 1], [67.29, 1], [68.95, 2], [71.79, 3], [72.85, 2], [75.27, 3], [80.47, 3], [81.22, 1], [82.45, 1], [83.7, 2], [84.26, 3], [84.87, 3], [85.42, 1], [86.26, 3], [87.98333, 3], [90.10667, 3], [90.86, 2], [91.35, 6], [92.016, 5], [92.46, 1], [93.025, 2], [93.77, 4], [95.05375, 8], [95.9, 1], [97.076, 5], [98.58, 3], [100.83, 1]], "mean": 84.70562}, "column_number": 1, "order": 1}}}, "columns": 6, "status": {"progress": 1, "message": "The ensemble has been created", "code": 5, "elapsed": 1796}, "updated": "2017-10-09T13:17:35.384000", "description": "", "split_candidates": 32, "importance": {"000004": 0.09882, "000002": 0.10137, "000003": 0.76166, "000000": 0.00168, "000001": 0.03646}, "price": 0.0, "finished_models": 3, "credits": 0.0082122802734375, "dataset_type": 0, "stat_pruning": false, "boosting": {"step_out_of_bag": false, "learning_rate": 0.1, "early_holdout": 0, "iterations": 3, "scores": [7589.17083, 3400.51476, 5121.69502], "early_out_of_bag": true, "final_iterations": 3}, "rows": 64, "configuration": null, "subscription": false, "tlp": 1, "resource": "ensemble/59db76e97e0a8d1f5f008c9e", "name": "grades", "created": "2017-10-09T13:17:29.505000", "dataset_status": true, "source_status": true, "randomize": false, "configuration_status": false, "number_of_evaluations": 0, "max_columns": 6, "max_rows": 80, "input_fields": ["000000", "000001", "000002", "000003", "000004"], "weight_field": null, "shared": false, "replacement": false}, "error": null} \ No newline at end of file diff --git a/bigml/tests/my_ensemble/model_59db76eb9b356c2c97004802.py b/bigml/tests/my_ensemble/model_59db76eb9b356c2c97004802.py new file mode 100644 index 00000000..c63911e9 --- /dev/null +++ b/bigml/tests/my_ensemble/model_59db76eb9b356c2c97004802.py @@ -0,0 +1,98 @@ + +# -*- coding: utf-8 -*- +def predict_final(prefix=None, + assignment=None, + tutorial=None, + midterm=None, + takehome=None, + final=None): + """ Predictor for Final from model/59db76eb9b356c2c97004802 + + Predictive model by BigML - Machine Learning Made Easy + """ + if (midterm is None): + return {"prediction":-2.31071} + if (midterm > 77.19): + if (assignment is None): + return {"prediction":17.90525} + if (assignment > 85.635): + if (takehome is None): + return {"prediction":18.7373} + if (takehome > 106.945): + return {"prediction":2.71513} + if (takehome <= 106.945): + if (tutorial is None): + return {"prediction":19.56899} + if (tutorial > 103.365): + return {"prediction":12.02522} + if (tutorial <= 103.365): + if (tutorial > 78.795): + return {"prediction":23.71468} + if (tutorial <= 78.795): + return {"prediction":9.17351} + if (assignment <= 85.635): + return {"prediction":1.88013} + if (midterm <= 77.19): + if (midterm > 55.31): + if (tutorial is None): + return {"prediction":-5.75208} + if (tutorial > 84.92): + if (tutorial > 87.115): + if (takehome is None): + return {"prediction":-5.90899} + if (takehome > 98.33): + if (tutorial > 100.58): + return {"prediction":-0.08649} + if (tutorial <= 100.58): + return {"prediction":-2.49316} + if (takehome <= 98.33): + if (prefix is None): + return {"prediction":-7.67532} + if (prefix > 7): + if (tutorial > 94.715): + return {"prediction":-12.86316} + if (tutorial <= 94.715): + return {"prediction":-3.53487} + if (prefix <= 7): + if (midterm > 65.31): + return {"prediction":-6.10379} + if (midterm <= 65.31): + return {"prediction":3.55013} + if (tutorial <= 87.115): + return {"prediction":14.56621} + if (tutorial <= 84.92): + if (assignment is None): + return {"prediction":-12.04119} + if (assignment > 40.75): + if (tutorial > 73.515): + return {"prediction":-8.34478} + if (tutorial <= 73.515): + return {"prediction":-16.89263} + if (assignment <= 40.75): + return {"prediction":-0.08649} + if (midterm <= 55.31): + if (takehome is None): + return {"prediction":-18.31226} + if (takehome > 101.67): + return {"prediction":6.05013} + if (takehome <= 101.67): + if (tutorial is None): + return {"prediction":-20.33976} + if (tutorial > 100.315): + return {"prediction":-27.48979} + if (tutorial <= 100.315): + if (tutorial > 97.21): + return {"prediction":-3.94987} + if (tutorial <= 97.21): + return {"prediction":-15.97477} + + +def predict(prefix=None, + assignment=None, + tutorial=None, + midterm=None, + takehome=None, + final=None): + prediction = predict_final(prefix=prefix, assignment=assignment, tutorial=tutorial, midterm=midterm, takehome=takehome, final=final) + prediction.update({"weight": 0.09984}) + return prediction \ No newline at end of file diff --git a/bigml/tests/my_ensemble/model_59db76eb9b356c2c97004804.py b/bigml/tests/my_ensemble/model_59db76eb9b356c2c97004804.py new file mode 100644 index 00000000..32c47f4c --- /dev/null +++ b/bigml/tests/my_ensemble/model_59db76eb9b356c2c97004804.py @@ -0,0 +1,90 @@ + +# -*- coding: utf-8 -*- +def predict_final(prefix=None, + assignment=None, + tutorial=None, + midterm=None, + takehome=None, + final=None): + """ Predictor for Final from model/59db76eb9b356c2c97004804 + + Predictive model by BigML - Machine Learning Made Easy + """ + if (midterm is None): + return {"prediction":0.38343} + if (midterm > 77.08667): + if (takehome is None): + return {"prediction":20.38342} + if (takehome > 106.945): + return {"prediction":3.43945} + if (takehome <= 106.945): + if (tutorial is None): + return {"prediction":22.41332} + if (tutorial > 78.665): + return {"prediction":23.88547} + if (tutorial <= 78.665): + return {"prediction":8.56289} + if (midterm <= 77.08667): + if (midterm > 48.75): + if (takehome is None): + return {"prediction":-4.5295} + if (takehome > 53.795): + if (midterm > 73.44): + if (takehome > 73.795): + return {"prediction":-13.82749} + if (takehome <= 73.795): + return {"prediction":-3.41771} + if (midterm <= 73.44): + if (assignment is None): + return {"prediction":-0.71945} + if (assignment > 82.74): + if (tutorial is None): + return {"prediction":-3.97172} + if (tutorial > 103.945): + if (tutorial > 104.835): + return {"prediction":-0.08074} + if (tutorial <= 104.835): + return {"prediction":1.25414} + if (tutorial <= 103.945): + if (midterm > 62.5): + if (midterm > 65.31): + return {"prediction":-4.37529} + if (midterm <= 65.31): + return {"prediction":4.4972} + if (midterm <= 62.5): + if (tutorial > 95.71): + return {"prediction":-14.0932} + if (tutorial <= 95.71): + return {"prediction":-1.74541} + if (assignment <= 82.74): + if (tutorial is None): + return {"prediction":7.50115} + if (tutorial > 96.79): + return {"prediction":-0.71028} + if (tutorial <= 96.79): + return {"prediction":10.35668} + if (takehome <= 53.795): + return {"prediction":-13.76724} + if (midterm <= 48.75): + if (takehome is None): + return {"prediction":-15.51536} + if (takehome > 58.89): + return {"prediction":-18.93268} + if (takehome <= 58.89): + if (tutorial is None): + return {"prediction":-5.65621} + if (tutorial > 77.095): + return {"prediction":-7.01337} + if (tutorial <= 77.095): + return {"prediction":-0.79237} + + +def predict(prefix=None, + assignment=None, + tutorial=None, + midterm=None, + takehome=None, + final=None): + prediction = predict_final(prefix=prefix, assignment=assignment, tutorial=tutorial, midterm=midterm, takehome=takehome, final=final) + prediction.update({"weight": 0.09621}) + return prediction \ No newline at end of file diff --git a/bigml/tests/my_ensemble/model_59db76eb9b356c2c97004806.py b/bigml/tests/my_ensemble/model_59db76eb9b356c2c97004806.py new file mode 100644 index 00000000..925cfb07 --- /dev/null +++ b/bigml/tests/my_ensemble/model_59db76eb9b356c2c97004806.py @@ -0,0 +1,89 @@ + +# -*- coding: utf-8 -*- +def predict_final(prefix=None, + assignment=None, + tutorial=None, + midterm=None, + takehome=None, + final=None): + """ Predictor for Final from model/59db76eb9b356c2c97004806 + + Predictive model by BigML - Machine Learning Made Easy + """ + if (midterm is None): + return {"prediction":-1.4035} + if (midterm > 77.19): + if (midterm > 98.75): + return {"prediction":27.63444} + if (midterm <= 98.75): + if (assignment is None): + return {"prediction":12.52275} + if (assignment > 85.635): + if (takehome is None): + return {"prediction":13.32619} + if (takehome > 106.945): + return {"prediction":2.41413} + if (takehome <= 106.945): + if (assignment > 97.08): + return {"prediction":4.62108} + if (assignment <= 97.08): + return {"prediction":15.26138} + if (assignment <= 85.635): + return {"prediction":0.63728} + if (midterm <= 77.19): + if (tutorial is None): + return {"prediction":-9.38705} + if (tutorial > 86.76): + if (takehome is None): + return {"prediction":-12.08301} + if (takehome > 92.5): + if (tutorial > 104.6): + return {"prediction":-0.05667} + if (tutorial <= 104.6): + return {"prediction":-2.93235} + if (takehome <= 92.5): + if (midterm > 46.875): + if (assignment is None): + return {"prediction":-15.59471} + if (assignment > 66.73): + if (tutorial > 88.32): + return {"prediction":-16.57693} + if (tutorial <= 88.32): + return {"prediction":-4.54968} + if (assignment <= 66.73): + return {"prediction":-2.69614} + if (midterm <= 46.875): + if (takehome > 75.925): + return {"prediction":3.58837} + if (takehome <= 75.925): + return {"prediction":-10.89428} + if (tutorial <= 86.76): + if (takehome is None): + return {"prediction":-3.91393} + if (takehome > 88.795): + return {"prediction":-12.42279} + if (takehome <= 88.795): + if (tutorial > 84.92): + return {"prediction":9.023} + if (tutorial <= 84.92): + if (tutorial > 81.225): + return {"prediction":-6.43719} + if (tutorial <= 81.225): + if (tutorial > 72.855): + return {"prediction":5.60188} + if (tutorial <= 72.855): + if (tutorial > 61.81): + return {"prediction":-1.00567} + if (tutorial <= 61.81): + return {"prediction":0.80228} + + +def predict(prefix=None, + assignment=None, + tutorial=None, + midterm=None, + takehome=None, + final=None): + prediction = predict_final(prefix=prefix, assignment=assignment, tutorial=tutorial, midterm=midterm, takehome=takehome, final=final) + prediction.update({"weight": 0.09984}) + return prediction \ No newline at end of file diff --git a/bigml/tests/my_no_root_ensemble/ensemble.json b/bigml/tests/my_no_root_ensemble/ensemble.json new file mode 100644 index 00000000..99791a67 --- /dev/null +++ b/bigml/tests/my_no_root_ensemble/ensemble.json @@ -0,0 +1 @@ +{"code": 200, "resource": "ensemble/6182d9bfb3ef374f3c0031de", "location": "https://bigml.io/andromeda/ensemble/6182d9bfb3ef374f3c0031de", "object": {"boosting": null, "category": 0, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2021-11-03T18:49:35.307000", "creator": "merce_demo", "credits": 0.018146514892578125, "credits_per_prediction": 0.0, "dataset": "dataset/604f5f06cb4f96592d004959", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "distributions": [{"importance": [["000002", 0.72739], ["000003", 0.27261]], "predictions": {"categories": [["Iris-setosa", 54], ["Iris-versicolor", 49], ["Iris-virginica", 47]]}, "training": {"categories": [["Iris-setosa", 54], ["Iris-versicolor", 49], ["Iris-virginica", 47]]}}, {"importance": [["000002", 0.94621], ["000000", 0.02353], ["000003", 0.01864], ["000001", 0.01161]], "predictions": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 49], ["Iris-virginica", 51]]}, "training": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 49], ["Iris-virginica", 51]]}}, {"importance": [["000002", 0.93437], ["000003", 0.05554], ["000001", 0.01009]], "predictions": {"categories": [["Iris-setosa", 52], ["Iris-versicolor", 44], ["Iris-virginica", 54]]}, "training": {"categories": [["Iris-setosa", 52], ["Iris-versicolor", 44], ["Iris-virginica", 54]]}}], "ensemble": {"fields": {"000000": {"column_number": 0, "datatype": "double", "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "order": 3, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}}, "ensemble_sample": {"rate": 1.0, "replacement": true, "seed": "986709f2dd2340b1b860954a3ea806c3"}, "error_models": 0, "fields_meta": {"count": 5, "limit": 1000, "offset": 0, "query_total": 5, "total": 5}, "finished_models": 3, "focus_field": null, "focus_field_name": null, "importance": {"000000": 0.00784, "000001": 0.00723, "000002": 0.86933, "000003": 0.1156}, "input_fields": ["000000", "000001", "000002", "000003"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "models": ["model/6182d9c7f731fb7252001d37", "model/6182d9c7f731fb7252001d39", "model/6182d9c8f731fb7252001d3b"], "name": "iris", "name_options": "bootstrap decision forest, 512-node, 3-model, pruned, deterministic order", "node_threshold": 512, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_models": 3, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "000004", "objective_field_details": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "order": 4}, "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["000004"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": "project/604f5ee5c1c0000b90003cc5", "randomize": false, "range": null, "replacement": false, "resource": "ensemble/6182d9bfb3ef374f3c0031de", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4757, "source": "source/604f5ef647d775129e0011a7", "source_status": true, "split_candidates": 32, "split_field": null, "split_field_name": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 4644, "message": "The ensemble has been created", "progress": 1}, "subscription": false, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2021-11-03T18:49:44.894000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/tests/my_no_root_ensemble/model_6182d9c7f731fb7252001d37 b/bigml/tests/my_no_root_ensemble/model_6182d9c7f731fb7252001d37 new file mode 100644 index 00000000..70d044f6 --- /dev/null +++ b/bigml/tests/my_no_root_ensemble/model_6182d9c7f731fb7252001d37 @@ -0,0 +1 @@ +{"code": 200, "resource": "model/6182d9c7f731fb7252001d37", "location": "https://bigml.io/andromeda/model/6182d9c7f731fb7252001d37", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2021-11-03T18:49:43.557000", "creator": "merce_demo", "credits": 0.0, "credits_per_prediction": 0.0, "dataset": "dataset/604f5f06cb4f96592d004959", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": true, "ensemble_id": "6182d9bfb3ef374f3c0031de", "ensemble_index": 0, "excluded_fields": [], "fields_meta": {"count": 5, "limit": 1000, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000002", "000003"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["Iris-setosa", 54], ["Iris-versicolor", 49], ["Iris-virginica", 47]]}, "training": {"categories": [["Iris-setosa", 54], ["Iris-versicolor", 49], ["Iris-virginica", 47]]}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "order": 3, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000002", 0.72739], ["000003", 0.27261]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "preferred": true}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "preferred": true}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "preferred": true, "term_analysis": {"enabled": true}}}, "node_threshold": 512, "root": {"children": [{"children": [{"confidence": 0.91799, "count": 43, "id": 2, "objective_summary": {"categories": [["Iris-virginica", 43]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 1.75}}, {"children": [{"children": [{"confidence": 0.43849, "count": 3, "id": 5, "objective_summary": {"categories": [["Iris-virginica", 3]]}, "output": "Iris-virginica", "predicate": {"field": "000002", "operator": ">", "value": 5.45}}, {"children": [{"confidence": 0.43849, "count": 3, "id": 7, "objective_summary": {"categories": [["Iris-versicolor", 3]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": ">", "value": 1.55}}, {"confidence": 0.20654, "count": 1, "id": 8, "objective_summary": {"categories": [["Iris-virginica", 1]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": "<=", "value": 1.55}}], "confidence": 0.30064, "count": 4, "id": 6, "objective_summary": {"categories": [["Iris-versicolor", 3], ["Iris-virginica", 1]]}, "output": "Iris-versicolor", "predicate": {"field": "000002", "operator": "<=", "value": 5.45}}], "confidence": 0.25045, "count": 7, "id": 4, "objective_summary": {"categories": [["Iris-virginica", 4], ["Iris-versicolor", 3]]}, "output": "Iris-virginica", "predicate": {"field": "000002", "operator": ">", "value": 4.95}}, {"confidence": 0.92292, "count": 46, "id": 9, "objective_summary": {"categories": [["Iris-versicolor", 46]]}, "output": "Iris-versicolor", "predicate": {"field": "000002", "operator": "<=", "value": 4.95}}], "confidence": 0.82141, "count": 53, "id": 3, "objective_summary": {"categories": [["Iris-versicolor", 49], ["Iris-virginica", 4]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": "<=", "value": 1.75}}], "confidence": 0.41196, "count": 96, "id": 1, "objective_summary": {"categories": [["Iris-versicolor", 49], ["Iris-virginica", 47]]}, "output": "Iris-versicolor", "predicate": {"field": "000002", "operator": ">", "value": 2.45}}, {"confidence": 0.93358, "count": 54, "id": 10, "objective_summary": {"categories": [["Iris-setosa", 54]]}, "output": "Iris-setosa", "predicate": {"field": "000002", "operator": "<=", "value": 2.45}}], "confidence": 0.28756, "count": 150, "id": 0, "objective_summary": {"categories": [["Iris-setosa", 54], ["Iris-versicolor", 49], ["Iris-virginica", 47]]}, "output": "Iris-setosa", "predicate": true}}, "name": "iris - 0", "name_options": "512-node, pruned, deterministic order", "node_threshold": 512, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "000004", "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["000004"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": "project/604f5ee5c1c0000b90003cc5", "randomize": false, "range": null, "replacement": false, "resource": "model/6182d9c7f731fb7252001d37", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4757, "source": "source/604f5ef647d775129e0011a7", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 0, "message": "The model has been created", "progress": 0.0}, "subscription": false, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2021-11-03T18:49:43.918000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/tests/my_no_root_ensemble/model_6182d9c7f731fb7252001d37.py b/bigml/tests/my_no_root_ensemble/model_6182d9c7f731fb7252001d37.py new file mode 100644 index 00000000..e69de29b diff --git a/bigml/tests/my_no_root_ensemble/model_6182d9c7f731fb7252001d39 b/bigml/tests/my_no_root_ensemble/model_6182d9c7f731fb7252001d39 new file mode 100644 index 00000000..dc36581c --- /dev/null +++ b/bigml/tests/my_no_root_ensemble/model_6182d9c7f731fb7252001d39 @@ -0,0 +1 @@ +{"code": 200, "resource": "model/6182d9c7f731fb7252001d39", "location": "https://bigml.io/andromeda/model/6182d9c7f731fb7252001d39", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2021-11-03T18:49:43.942000", "creator": "merce_demo", "credits": 0.0, "credits_per_prediction": 0.0, "dataset": "dataset/604f5f06cb4f96592d004959", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": true, "ensemble_id": "6182d9bfb3ef374f3c0031de", "ensemble_index": 1, "excluded_fields": [], "fields_meta": {"count": 5, "limit": 1000, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000002", "000003"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 49], ["Iris-virginica", 51]]}, "training": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 49], ["Iris-virginica", 51]]}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "order": 3, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000002", 0.94621], ["000000", 0.02353], ["000003", 0.01864], ["000001", 0.01161]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000000": {"column_number": 0, "datatype": "double", "name": "sepal length", "optype": "numeric", "preferred": true}, "000001": {"column_number": 1, "datatype": "double", "name": "sepal width", "optype": "numeric", "preferred": true}, "000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "preferred": true}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "preferred": true}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "preferred": true, "term_analysis": {"enabled": true}}}, "node_threshold": 512}, "name": "iris - 1", "name_options": "512-node, pruned, deterministic order", "node_threshold": 512, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "000004", "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["000004"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": "project/604f5ee5c1c0000b90003cc5", "randomize": false, "range": null, "replacement": false, "resource": "model/6182d9c7f731fb7252001d39", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4757, "source": "source/604f5ef647d775129e0011a7", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 0, "message": "The model has been created", "progress": 0.0}, "subscription": false, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2021-11-03T18:49:44.363000", "white_box": false}, "error": null} diff --git a/bigml/tests/my_no_root_ensemble/model_6182d9c8f731fb7252001d3b b/bigml/tests/my_no_root_ensemble/model_6182d9c8f731fb7252001d3b new file mode 100644 index 00000000..cd9211db --- /dev/null +++ b/bigml/tests/my_no_root_ensemble/model_6182d9c8f731fb7252001d3b @@ -0,0 +1 @@ +{"code": 200, "resource": "model/6182d9c8f731fb7252001d3b", "location": "https://bigml.io/andromeda/model/6182d9c8f731fb7252001d3b", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2021-11-03T18:49:44.383000", "creator": "merce_demo", "credits": 0.0, "credits_per_prediction": 0.0, "dataset": "dataset/604f5f06cb4f96592d004959", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": true, "ensemble_id": "6182d9bfb3ef374f3c0031de", "ensemble_index": 2, "excluded_fields": [], "fields_meta": {"count": 5, "limit": 1000, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000002", "000003"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["Iris-setosa", 52], ["Iris-versicolor", 44], ["Iris-virginica", 54]]}, "training": {"categories": [["Iris-setosa", 52], ["Iris-versicolor", 44], ["Iris-virginica", 54]]}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "order": 3, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000002", 0.93437], ["000003", 0.05554], ["000001", 0.01009]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000000": {"column_number": 0, "datatype": "double", "name": "sepal length", "optype": "numeric", "preferred": true}, "000001": {"column_number": 1, "datatype": "double", "name": "sepal width", "optype": "numeric", "preferred": true}, "000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "preferred": true}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "preferred": true}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "preferred": true, "term_analysis": {"enabled": true}}}, "node_threshold": 512, "root": {"children": [{"children": [{"confidence": 0.92865, "count": 50, "id": 2, "objective_summary": {"categories": [["Iris-virginica", 50]]}, "output": "Iris-virginica", "predicate": {"field": "000002", "operator": ">", "value": 4.95}}, {"children": [{"children": [{"confidence": 0.20654, "count": 1, "id": 5, "objective_summary": {"categories": [["Iris-versicolor", 1]]}, "output": "Iris-versicolor", "predicate": {"field": "000001", "operator": ">", "value": 3.1}}, {"confidence": 0.5101, "count": 4, "id": 6, "objective_summary": {"categories": [["Iris-virginica", 4]]}, "output": "Iris-virginica", "predicate": {"field": "000001", "operator": "<=", "value": 3.1}}], "confidence": 0.37553, "count": 5, "id": 4, "objective_summary": {"categories": [["Iris-virginica", 4], ["Iris-versicolor", 1]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 1.7}}, {"confidence": 0.91799, "count": 43, "id": 7, "objective_summary": {"categories": [["Iris-versicolor", 43]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": "<=", "value": 1.7}}], "confidence": 0.80446, "count": 48, "id": 3, "objective_summary": {"categories": [["Iris-versicolor", 44], ["Iris-virginica", 4]]}, "output": "Iris-versicolor", "predicate": {"field": "000002", "operator": "<=", "value": 4.95}}], "confidence": 0.45247, "count": 98, "id": 1, "objective_summary": {"categories": [["Iris-virginica", 54], ["Iris-versicolor", 44]]}, "output": "Iris-virginica", "predicate": {"field": "000002", "operator": ">", "value": 2.45}}, {"confidence": 0.93121, "count": 52, "id": 8, "objective_summary": {"categories": [["Iris-setosa", 52]]}, "output": "Iris-setosa", "predicate": {"field": "000002", "operator": "<=", "value": 2.45}}], "confidence": 0.28756, "count": 150, "id": 0, "objective_summary": {"categories": [["Iris-virginica", 54], ["Iris-setosa", 52], ["Iris-versicolor", 44]]}, "output": "Iris-virginica", "predicate": true}}, "name": "iris - 2", "name_options": "512-node, pruned, deterministic order", "node_threshold": 512, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "000004", "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["000004"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": "project/604f5ee5c1c0000b90003cc5", "randomize": false, "range": null, "replacement": false, "resource": "model/6182d9c8f731fb7252001d3b", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4757, "source": "source/604f5ef647d775129e0011a7", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 0, "message": "The model has been created", "progress": 0.0}, "subscription": false, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2021-11-03T18:49:44.881000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/bigml/tests/pipeline3.zip b/bigml/tests/pipeline3.zip new file mode 100644 index 00000000..aae690e8 Binary files /dev/null and b/bigml/tests/pipeline3.zip differ diff --git a/bigml/tests/read_dataset_steps.py b/bigml/tests/read_dataset_steps.py new file mode 100644 index 00000000..026b361c --- /dev/null +++ b/bigml/tests/read_dataset_steps.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,no-member +# +# Copyright 2012-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import json + +from bigml.fields import Fields +from .world import world, eq_, ok_ + + +def i_get_the_missing_values(step): + """Step: I ask for the missing values counts in the fields""" + resource = world.dataset + fields = Fields(resource['fields']) + step.bigml["result"] = fields.missing_counts() + + +def i_get_the_errors_values(step): + """Step: I ask for the error counts in the fields """ + resource = world.dataset + step.bigml["result"] = world.api.error_counts(resource) + + +def i_get_the_properties_values(step, properties_dict): + """Step: the (missing values counts|error counts) dict + is + """ + ok_(properties_dict is not None) + eq_(step.bigml["result"], json.loads(properties_dict)) diff --git a/bigml/tests/read_resource_steps.py b/bigml/tests/read_resource_steps.py new file mode 100644 index 00000000..bf702e04 --- /dev/null +++ b/bigml/tests/read_resource_steps.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2014-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import time + +from datetime import datetime + +from bigml.api import HTTP_OK, get_status, get_resource_type + +from .world import world, logged_wait, eq_, ok_ + + +def wait_until_status_code_is(code1, code2, secs, resource_info): + """Waits for the resource to be finished and stores the resulting full + info in the corresponding dictionary. Attention, resource_info is + modified + """ + + start = datetime.utcnow() + delta = int(secs) * world.delta + resource_info = world.get_minimal_resource( + resource_info['resource']).get("object") + status = get_status(resource_info) + count = 0 + while (status['code'] != int(code1) and + status['code'] != int(code2)): + count += 1 + resource_type = get_resource_type(resource_info["resource"]) + logged_wait(start, delta, count, resource_type, status=status) + ok_((datetime.utcnow() - start).seconds < delta) + resource_info = world.get_minimal_resource( + resource_info['resource']).get("object") + status = get_status(resource_info) + if status['code'] == int(code2): + world.errors.append(resource_info) + eq_(status['code'], int(code1)) + time.sleep(0.1) # added to avoid synch mongo issues + return i_get_the_resource(resource_info) + + +def i_get_the_resource(resource_info): + """Step: I get the resource """ + resource = world.get_maximal_resource(resource_info["resource"]) + world.status = resource['code'] + eq_(world.status, HTTP_OK) + return resource['object'] diff --git a/bigml/tests/test_01_prediction.py b/bigml/tests/test_01_prediction.py new file mode 100644 index 00000000..7a97fd6d --- /dev/null +++ b/bigml/tests/test_01_prediction.py @@ -0,0 +1,270 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Testing prediction creation + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_cluster_steps as cluster_create +from . import create_anomaly_steps as anomaly_create +from . import create_lda_steps as topic_create +from . import create_prediction_steps as prediction_create + + +class TestPrediction: + """Test predictions""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario 1: Successfully creating a prediction: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + When I create a prediction for "" + Then the prediction for "" is "" + + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction"] + examples = [ + ['data/iris.csv', '30', '30', '30', + '{"petal width": 0.5}', '000004', 'Iris-setosa'], + ['data/iris_sp_chars.csv', '30', '30', '30', + '{"pétal&width\\u0000": 0.5}', '000004', 'Iris-setosa']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"], + shared=example["data"]) + source_create.the_source_is_finished(self, example["source_wait"], + shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + + def test_scenario2(self): + """ + Scenario 2: Successfully creating a prediction from a source in a remote location + + Given I create a data source using the url "" + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + When I create a prediction for "" + Then the prediction for "" is "" + + """ + show_doc(self.test_scenario2) + headers = ["url", "wait_source", "wait_dataset", "wait_model", + "input_data", "objective_id", "prediction"] + examples = [ + ['s3://bigml-public/csv/iris.csv', '10', '10', '10', + '{"petal width": 0.5}', '000004', 'Iris-setosa']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_create_using_url(self, example["url"]) + source_create.the_source_is_finished(self, example["wait_source"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["wait_dataset"]) + model_create.i_create_a_model(self) + model_create.the_model_is_finished_in_less_than( + self, example["wait_model"]) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + + def test_scenario3(self): + """ + Scenario 3: Successfully creating a prediction from inline data source: + Given I create a data source from inline data slurped from "" + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + When I create a prediction for "" + Then the prediction for "" is "" + """ + show_doc(self.test_scenario3) + headers = ["data", "wait_source", "wait_dataset", "wait_model", + "input_data", "objective_id", "prediction"] + examples = [ + ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', + '000004', 'Iris-setosa']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_create_using_dict_data( + self, example["data"]) + source_create.the_source_is_finished(self, example["wait_source"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["wait_dataset"]) + model_create.i_create_a_model(self) + model_create.the_model_is_finished_in_less_than( + self, example["wait_model"]) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + + def test_scenario4(self): + """ + Scenario 4: Successfully creating a centroid and the associated dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster + And I wait until the cluster is ready less than secs + When I create a centroid for "" + And I check the centroid is ok + Then the centroid is "" + And I create a dataset from the cluster and the centroid + And I wait until the dataset is ready less than secs + And I check that the dataset is created for the cluster and the centroid + """ + show_doc(self.test_scenario4) + headers = ["data", "wait_source", "wait_dataset", "wait_cluster", + "input_data", "centroid"] + examples = [ + ['data/diabetes.csv', '10', '20', '20', + '{"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84,' + ' "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8,' + ' "diabetes pedigree": 0.551, "age": 31, "diabetes": "true"}', + 'Cluster 3']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished(self, example["wait_source"], + shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["wait_dataset"], shared=example["data"]) + cluster_create.i_create_a_cluster(self, shared=example["data"]) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["wait_cluster"], shared=example["data"]) + prediction_create.i_create_a_centroid(self, example["input_data"]) + prediction_create.the_centroid_is(self, example["centroid"]) + + def test_scenario5(self): + """ + Scenario 5: Successfully creating an anomaly score: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an anomaly detector from a dataset + And I wait until the anomaly detector is ready less than secs + When I create an anomaly score for "" + Then the anomaly score is "" + """ + show_doc(self.test_scenario5) + headers = ["data", "wait_source", "wait_dataset", "wait_anomaly", + "input_data", "score"] + examples = [ + ['data/tiny_kdd.csv', '10', '10', '100', + '{"src_bytes": 350}', '0.92846'], + ['data/iris_sp_chars.csv', '10', '10', '100', + '{"pétal&width\\u0000": 300}', '0.89313']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["wait_source"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["wait_dataset"], shared=example["data"]) + anomaly_create.i_create_an_anomaly(self, shared=example["data"]) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["wait_anomaly"], shared=example["data"]) + prediction_create.i_create_an_anomaly_score( + self, example["input_data"]) + prediction_create.the_anomaly_score_is(self, example["score"]) + + def test_scenario6(self): + """ + Scenario 6: Successfully creating a Topic Model: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + When I create a Topic Model from a dataset + Then I wait until the Topic Model is ready less than secs + """ + show_doc(self.test_scenario6) + headers = ["data", "wait_source", "wait_dataset", "wait_topic", + "source_params"] + examples = [ + ['data/movies.csv', '10', '10', '100', + '{"fields": {"000007": {"optype": "items", "item_analysis":' + ' {"separator": "$"}}, "000006": {"optype": "text"}}}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished(self, example["wait_source"]) + source_create.i_update_source_with(self, example["source_params"]) + source_create.the_source_is_finished(self, example["wait_source"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["wait_dataset"]) + topic_create.i_create_a_topic_model(self) + topic_create.the_topic_model_is_finished_in_less_than( + self, example["wait_topic"]) diff --git a/bigml/tests/test_03_local_prediction.py b/bigml/tests/test_03_local_prediction.py new file mode 100644 index 00000000..e746accd --- /dev/null +++ b/bigml/tests/test_03_local_prediction.py @@ -0,0 +1,220 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Testing local prediction + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import compare_predictions_steps as prediction_compare +from . import create_ensemble_steps as ensemble_create +from . import create_prediction_steps as prediction_create + + +class TestLocalPrediction: + """Testing local predictions """ + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario 1: Successfully creating a prediction from a local model in a json file: + Given I create a local model from a "" file + When I create a local prediction for "" with confidence + Then the local prediction is "" + And the local prediction's confidence is "" + """ + show_doc(self.test_scenario1) + headers = ["file_path", "input_data", "prediction", "confidence"] + examples = [ + ['data/iris_model.json', '{"petal length": 0.5}', 'Iris-setosa', + '0.90594'], + ['data/iris_model.json', '{"petal length": "0.5"}', 'Iris-setosa', + '0.90594']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + prediction_compare.i_create_a_local_model_from_file( + self, example["file_path"]) + prediction_compare.i_create_a_local_prediction_with_confidence( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) + + def test_scenario2(self): + """ + Scenario 2: Successfully creating a prediction from a local model in a json file: + Given I create a local model using SupervisedModel from a "" file + When I create a local prediction for "" with confidence + Then the local prediction is "" + And the local prediction's confidence is "" + """ + show_doc(self.test_scenario2) + headers = ["file_path", "input_data", "prediction", "confidence"] + examples = [ + ['data/iris_model.json', '{"petal length": 0.5}', 'Iris-setosa', + '0.90594'], + ['data/iris_model.json', '{"petal length": "0.5"}', 'Iris-setosa', + '0.90594']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + prediction_compare.i_create_a_local_supervised_model_from_file( + self, example["file_path"]) + prediction_compare.i_create_a_local_prediction_with_confidence( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) + + + def test_scenario3(self): + """ + Scenario 3: Successfully creating a local prediction from an Ensemble created from file storage: + Given I create a local Ensemble from path "" + When I create a local ensemble prediction with confidence for "" + Then the local prediction is "" + And the local prediction's confidence is "" + And the local probabilities are "" + """ + show_doc(self.test_scenario3) + headers = ["file_path", "input_data", "prediction", "confidence", + "probabilities"] + examples = [ + ['bigml/tests/my_no_root_ensemble/ensemble.json', + '{"petal width": 0.5}', 'Iris-setosa', '0.3533', + '["0.3533", "0.31", "0.33666"]' ]] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + ensemble_create.create_local_ensemble( + self, path=example["file_path"]) + prediction_create.create_local_ensemble_prediction_probabilities( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) + prediction_compare.the_local_probabilities_are( + self, example["probabilities"]) + + def test_scenario4(self): + """ + Scenario 4: Successfully creating a local prediction from an Ensemble created from file storage: + Given I create a local SupervisedModel from path "" + When I create a local ensemble prediction with confidence for "" + Then the local prediction is "" + And the local prediction's confidence is "" + And the local probabilities are "" + """ + show_doc(self.test_scenario4) + headers = ["file_path", "input_data", "prediction", "confidence", + "probabilities"] + examples = [ + ['bigml/tests/my_no_root_ensemble/ensemble.json', + '{"petal width": 0.5}', 'Iris-setosa', '0.3533', + '["0.3533", "0.31", "0.33666"]' ]] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + prediction_compare.i_create_a_local_supervised_model_from_file( + self, example["file_path"]) + prediction_compare.i_create_a_local_prediction_with_confidence( + self, example["input_data"]) + prediction_compare.i_create_local_probabilities( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) + prediction_compare.the_local_probabilities_are( + self, example["probabilities"]) + + def test_scenario5(self): + """ + Scenario 5: Successfully creating a prediction from a local images deepnet in a json file: + Given I create a local deepnet from a "" file + When I create a local prediction for "" + Then the local prediction is "" + """ + show_doc(self.test_scenario5) + headers = ["file_path", "input_data", "operation_settings", + "prediction"] + examples = [ + ['data/imgs_deepnet.zip', "data/images/cats/pexels-pixabay-33358.jpg", + {"region_score_threshold": 0.7}, + ('{"prediction": [{"box": [0.68164, 0.30469, 0.79688, 0.36979], ' + '"label": "eye", "score": 0.79633}, ' + '{"box": [0.38086, 0.27865, 0.50391, 0.36068], ' + '"label": "eye", "score": 0.74563}]}')]] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + prediction_compare.i_create_a_local_deepnet_from_zip_file( + self, example["file_path"], + operation_settings=example["operation_settings"]) + prediction_compare.i_create_a_local_regions_prediction( + self, example["input_data"]) + prediction_compare.the_local_regions_prediction_is( + self, example["prediction"]) + + def test_scenario6(self): + """ + Scenario 6: Successfully creating a prediction from a ShapWrapper of a model in a json file: + Given I create a local model using ShapWrapper from a "" file + When I create a local prediction for "" + Then the local prediction is "" + When I create a local probabilities prediction for "" + Then the local probabilities prediction is "" + """ + import numpy as np + show_doc(self.test_scenario6) + headers = ["file_path", "numpy_input", "prediction", "proba_prediction"] + examples = [ + ['data/iris_model.json', np.asarray([np.asarray([0.5,1.0,1.0])]), + 0., [0.9818, 0.00921, 0.00899]]] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + prediction_compare.i_create_a_local_shap_wrapper_from_file( + self, example["file_path"]) + prediction_compare.i_create_a_shap_local_prediction( + self, example["numpy_input"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.i_create_shap_local_probabilities( + self, example["numpy_input"]) + prediction_compare.the_local_proba_prediction_is( + self, example["proba_prediction"]) diff --git a/bigml/tests/test_04_multivote_prediction.py b/bigml/tests/test_04_multivote_prediction.py new file mode 100644 index 00000000..b66f5abd --- /dev/null +++ b/bigml/tests/test_04_multivote_prediction.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Testing MultiVote predictions + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import compute_multivote_prediction_steps as multivote_prediction + + +class TestMultiVotePrediction: + """Testing MultiVote methods""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario 1: Successfully computing predictions combinations: + Given I create a MultiVote for the set of predictions in file + When I compute the prediction with confidence using method "" + And I compute the prediction without confidence using method "" + Then the combined prediction is "" + And the combined prediction without confidence is "" + And the confidence for the combined prediction is + """ + show_doc(self.test_scenario1) + headers = ["predictions_file", "method", "prediction", "confidence"] + examples = [ + ['data/predictions_c.json', '0', 'a', '0.45047'], + ['data/predictions_c.json', '1', 'a', '0.55202'], + ['data/predictions_c.json', '2', 'a', '0.40363'], + ['data/predictions_r.json', '0', '1.55555556667', '0.40008'], + ['data/predictions_r.json', '1', '1.59376845074', '0.24837'], + ['data/predictions_r.json', '2', '1.55555556667', '0.40008']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + multivote_prediction.i_create_a_multivote( + self, example["predictions_file"]) + multivote_prediction.compute_prediction( + self, example["method"]) + multivote_prediction.compute_prediction_no_confidence( + self, example["method"]) + multivote_prediction.check_combined_prediction( + self, example["prediction"]) + multivote_prediction.check_combined_prediction_no_confidence( + self, example["prediction"]) + multivote_prediction.check_combined_confidence( + self, example["confidence"]) diff --git a/bigml/tests/test_05_compare_predictions.py b/bigml/tests/test_05_compare_predictions.py new file mode 100644 index 00000000..7cebde55 --- /dev/null +++ b/bigml/tests/test_05_compare_predictions.py @@ -0,0 +1,575 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Comparing remote and local predictions + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_prediction_steps as prediction_create +from . import compare_predictions_steps as prediction_compare + + +class TestComparePrediction: + """Comparing remote and local predictions""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario 1: Successfully comparing predictions: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction"] + examples = [ + ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', + '000004', 'Iris-setosa'], + ['data/iris.csv', '10', '10', '10', + '{"petal length": 6, "petal width": 2}', '000004', + 'Iris-virginica'], + ['data/iris.csv', '10', '10', '10', + '{"petal length": 4, "petal width": 1.5}', '000004', + 'Iris-versicolor'], + ['data/iris_sp_chars.csv', '10', '10', '10', + '{"pétal.length": 4, "pétal&width\\u0000": 1.5}', '000004', + 'Iris-versicolor']] + show_doc(self.test_scenario1) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"], + shared=example["data"]) + source_create.the_source_is_finished(self, example["source_wait"], + shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_model(self) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario2(self): + """ + Scenario 2: Successfully comparing predictions with text options: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf", "input_data", "objective_id", "prediction"] + examples = [ + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": true, "stem_words": true, ' + '"use_stopwords": false, "language": "en"}}}}', + '{"Message": "Mobile call"}', '000000', 'spam'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": true, "stem_words": true, ' + '"use_stopwords": false, "language": "en"}}}}', + '{"Message": "A normal message"}', '000000', 'ham'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": false, "stem_words": false, ' + '"use_stopwords": false, "language": "en"}}}}', + '{"Message": "Mobile calls"}', '000000', 'spam'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": false, "stem_words": false, ' + '"use_stopwords": false, "language": "en"}}}}', + '{"Message": "A normal message"}', '000000', 'ham'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": false, "stem_words": true, ' + '"use_stopwords": true, "language": "en"}}}}', + '{"Message": "Mobile call"}', '000000', 'spam'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": false, "stem_words": true, ' + '"use_stopwords": true, "language": "en"}}}}', + '{"Message": "A normal message"}', '000000', 'ham'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"token_mode": "full_terms_only", "language": "en"}}}}', + '{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every ' + 'week just txt NOKIA to 87077 Get txting and tell ur mates. zed ' + 'POBox 36504 W45WQ norm150p/tone 16+"}', '000000', 'spam'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"token_mode": "full_terms_only", "language": "en"}}}}', + '{"Message": "Ok"}', '000000', 'ham'], + ['data/movies.csv', '20', '20', '30', + '{"fields": {"000007": {"optype": "items", "item_analysis": ' + '{"separator": "$"}}}}', '{"genres": "Adventure$Action", ' + '"timestamp": 993906291, "occupation": "K-12 student"}', + '000009', '3.92135'], + ['data/text_missing.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"token_mode": "all", "language": "en"}}, "000000": {"optype": ' + '"text", "term_analysis": {"token_mode": "all", ' + '"language": "en"}}}}', '{}', "000003", 'swap']] + show_doc(self.test_scenario2) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + model_create.i_create_a_model(self) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_model(self) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + + def test_scenario3(self): + """ + Scenario 3: Successfully comparing predictions with proportional missing strategy: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + When I create a proportional missing strategy prediction for "" + Then the prediction for "" is "" + And the confidence for the prediction is "" + And I create a proportional missing strategy local prediction for "" + Then the local prediction is "" + And the local prediction's confidence is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "confidence"] + examples = [ + ['data/iris.csv', '50', '30', '30', '{}', '000004', 'Iris-setosa', + '0.2629'], + ['data/grades.csv', '50', '30', '30', '{}', '000005', '68.62224', + '27.5358'], + ['data/grades.csv', '50', '30', '30', '{"Midterm": 20}', '000005', + '40.46667', '54.89713'], + ['data/grades.csv', '50', '30', '30', + '{"Midterm": 20, "Tutorial": 90, "TakeHome": 100}', '000005', + '28.06', '25.65806']] + show_doc(self.test_scenario3) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_model(self) + prediction_create.i_create_a_proportional_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_create.the_confidence_is(self, example["confidence"]) + prediction_compare.i_create_a_proportional_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) + + def test_scenario4(self): + """ + Scenario 4: Successfully comparing predictions with proportional missing strategy for missing_splits models: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with missing splits + And I wait until the model is ready less than secs + And I create a local model + When I create a proportional missing strategy prediction for "" + Then the prediction for "" is "" + And the confidence for the prediction is "" + And I create a proportional missing strategy local prediction for "" + Then the local prediction is "" + And the local prediction's confidence is "" + And the highest local prediction's confidence is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "confidence"] + examples = [ + ['data/iris_missing2.csv', '10', '10', '10', + '{"petal width": 1}', '000004', 'Iris-setosa', '0.8064'], + ['data/iris_missing2.csv', '10', '10', '10', + '{"petal width": 1, "petal length": 4}', '000004', + 'Iris-versicolor', '0.7847'], + ['data/missings_reg.csv', '10', '10', '10', '{"x2": 4}', + '000002', '1.33333', '1.62547'] +] + show_doc(self.test_scenario4) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model_with_missing_splits(self) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_model(self) + prediction_create.i_create_a_proportional_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_create.the_confidence_is(self, example["confidence"]) + prediction_compare.i_create_a_proportional_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) + prediction_compare.the_highest_local_prediction_confidence_is( + self, example["input_data"], example["confidence"]) + + def test_scenario5(self): + """ + Scenario 5: Successfully comparing logistic regression predictions: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression model + And I wait until the logistic regression model is ready less than secs + And I create a local logistic regression model + When I create a logistic regression prediction for "" + Then the logistic regression prediction is "" + And I create a local logistic regression prediction for "" + Then the local logistic regression prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "prediction"] + examples = [ + ['data/iris.csv', '10', '10', '50', + '{"petal width": 0.5, "petal length": 0.5, "sepal width": 0.5, ' + '"sepal length": 0.5}', 'Iris-versicolor'], + ['data/iris.csv', '10', '10', '50', + '{"petal width": 2, "petal length": 6, "sepal width": 0.5, ' + '"sepal length": 0.5}', 'Iris-versicolor'], + ['data/iris.csv', '10', '10', '50', + '{"petal width": 1.5, "petal length": 4, "sepal width": 0.5, ' + '"sepal length": 0.5}', 'Iris-versicolor'], + ['data/iris.csv', '10', '10', '50', + '{"petal length": 1}', 'Iris-setosa'], + ['data/iris_sp_chars.csv', '10', '10', '50', + '{"pétal.length": 4, "pétal&width\\u0000": 1.5, "sépal&width": ' + '0.5, "sépal.length": 0.5}', 'Iris-versicolor'], + ['data/price.csv', '10', '10', '50', '{"Price": 1200}', + 'Product1']] + show_doc(self.test_scenario5) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model( + self, shared=example["data"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_logistic_model(self) + prediction_create.i_create_a_logistic_prediction( + self, example["input_data"]) + prediction_create.the_logistic_prediction_is( + self, example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + + def test_scenario6(self): + """ + Scenario 6: Successfully comparing predictions with text options: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression model + And I wait until the logistic regression model is ready less than secs + And I create a local logistic regression model + When I create a logistic regression prediction for "" + Then the logistic regression prediction is "" + And I create a local logistic regression prediction for "" + Then the local logistic regression prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf", "input_data", "prediction"] + examples = [ + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": true, "stem_words": true, ' + '"use_stopwords": false, "language": "en"}}}}', + '{"Message": "Mobile call"}', 'ham'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": true, "stem_words": true, ' + '"use_stopwords": false, "language": "en"}}}}', + '{"Message": "A normal message"}', 'ham'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": false, "stem_words": false, ' + '"use_stopwords": false, "language": "en"}}}}', + '{"Message": "Mobile calls"}', 'ham'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": false, "stem_words": false, ' + '"use_stopwords": false, "language": "en"}}}}', + '{"Message": "A normal message"}', 'ham'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": false, "stem_words": true, ' + '"use_stopwords": true, "language": "en"}}}}', + '{"Message": "Mobile call"}', 'ham'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": false, "stem_words": true, ' + '"use_stopwords": true, "language": "en"}}}}', + '{"Message": "A normal message"}', 'ham'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"token_mode": "full_terms_only", "language": "en"}}}}', + '{"Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every ' + 'week just txt NOKIA to 87077 Get txting and tell ur mates. zed ' + 'POBox 36504 W45WQ norm150p/tone 16+"}', 'ham'], + ['data/spam.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"token_mode": "full_terms_only", "language": "en"}}}}', + '{"Message": "Ok"}', 'ham']] + show_doc(self.test_scenario6) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + model_create.i_create_a_logistic_model(self) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_logistic_model(self) + prediction_create.i_create_a_logistic_prediction( + self, example["input_data"]) + prediction_create.the_logistic_prediction_is( + self, example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + + def test_scenario7(self): + """ + Scenario 7: Successfully comparing predictions with text options and proportional missing strategy: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + When I create a proportional missing strategy prediction for "" + Then the prediction for "" is "" + And I create a proportional missing strategy local prediction for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf", "input_data", "objective_id", "prediction"] + examples = [ + ['data/text_missing.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"token_mode": "all", "language": "en"}}, "000000": {"optype": ' + '"text", "term_analysis": {"token_mode": "all", ' + '"language": "en"}}}}', '{}', "000003",'swap'], + ['data/text_missing.csv', '20', '20', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"token_mode": "all", "language": "en"}}, "000000": {"optype": ' + '"text", "term_analysis": {"token_mode": "all", ' + '"language": "en"}}}}', '{"category1": "a"}', "000003", + 'paperwork']] + show_doc(self.test_scenario7) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + model_create.i_create_a_model(self) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_model(self) + prediction_create.i_create_a_proportional_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_proportional_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + + def test_scenario8(self): + """ + Scenario 8: Successfully comparing predictions with text options: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression model with objective "" and parms "" + And I wait until the logistic regression model is ready less than secs + And I create a local logistic regression model + When I create a logistic regression prediction for "" + Then the logistic regression prediction is "" + And the logistic regression probability for the prediction is "" + And I create a local logistic regression prediction for "" + Then the local logistic regression prediction is "" + And the local logistic regression probability for the prediction is "" + + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf", "input_data", "prediction", "probability", + "objective_id", "model_conf"] + examples = [ + ['data/iris.csv', '20', '20', '130', + '{"fields": {"000000": {"optype": "categorical"}}}', + '{"species": "Iris-setosa"}', '5.0', 0.0394, "000000", + '{"field_codings": [{"field": "species", "coding": "dummy", ' + '"dummy_class": "Iris-setosa"}]}'], + ['data/iris.csv', '20', '20', '130', '{"fields": {"000000": ' + '{"optype": "categorical"}}}', '{"species": "Iris-setosa"}', + '5.0', 0.051, "000000", '{"balance_fields": false, ' + '"field_codings": [{"field": "species", "coding": "contrast", ' + '"coefficients": [[1, 2, -1, -2]]}]}'], + ['data/iris.csv', '20', '20', '130', + '{"fields": {"000000": {"optype": "categorical"}}}', + '{"species": "Iris-setosa"}', '5.0', 0.051, "000000", + '{"balance_fields": false, "field_codings": [{"field": "species",' + ' "coding": "other", "coefficients": [[1, 2, -1, -2]]}]}'], + ['data/iris.csv', '20', '20', '130', + '{"fields": {"000000": {"optype": "categorical"}}}', + '{"species": "Iris-setosa"}', '5.0', 0.0417, "000000", + '{"bias": false}']] + show_doc(self.test_scenario8) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + model_create.i_create_a_logistic_model_with_objective_and_parms( + self, example["objective_id"], example["model_conf"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_logistic_model(self) + prediction_create.i_create_a_logistic_prediction( + self, example["input_data"]) + prediction_create.the_logistic_prediction_is( + self, example["prediction"]) + prediction_create.the_logistic_probability_is( + self, example["probability"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_probability_is( + self, example["probability"]) diff --git a/bigml/tests/test_05_compare_predictions_b.py b/bigml/tests/test_05_compare_predictions_b.py new file mode 100644 index 00000000..65097657 --- /dev/null +++ b/bigml/tests/test_05_compare_predictions_b.py @@ -0,0 +1,640 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Comparing remote and local predictions + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_prediction_steps as prediction_create +from . import compare_predictions_steps as prediction_compare + + +class TestComparePrediction: + """Testing local and remote predictions""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario10(self): + """ + Scenario: Successfully comparing predictions with proportional missing strategy and balanced models: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a balanced model + And I wait until the model is ready less than secs + And I create a local model + When I create a proportional missing strategy prediction for "" + Then the prediction for "" is "" + And the confidence for the prediction is "" + And I create a proportional missing strategy local prediction for "" + Then the local prediction is "" + And the local prediction's confidence is "" + And I create local probabilities for "" + Then the local probabilities are "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", + "confidence", "probabilities"] + examples = [ + ['data/iris_unbalanced.csv', '10', '10', '10', '{}', '000004', + 'Iris-setosa', '0.25284', '[0.33333, 0.33333, 0.33333]'], + ['data/iris_unbalanced.csv', '10', '10', '10', + '{"petal length":1, "sepal length":1, "petal width": 1, ' + '"sepal width": 1}', '000004', 'Iris-setosa', '0.7575', + '[1.0, 0.0, 0.0]']] + show_doc(self.test_scenario10) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"], + shared=example["data"]) + source_create.the_source_is_finished(self, example["source_wait"], + shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_balanced_model(self) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_model(self) + prediction_create.i_create_a_proportional_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_proportional_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_create.the_confidence_is( + self, example["confidence"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) + prediction_compare.i_create_local_probabilities( + self, example["input_data"]) + prediction_compare.the_local_probabilities_are( + self, example["probabilities"]) + + def test_scenario11(self): + """ + Scenario: Successfully comparing predictions for logistic regression with balance_fields: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression model with objective "" and flags + And I wait until the logistic regression model is ready less than secs + And I create a local logistic regression model + When I create a logistic regression prediction for "" + Then the logistic regression prediction is "" + And the logistic regression probability for the prediction is "" + And I create a local logistic regression prediction for "" + Then the local logistic regression prediction is "" + And the local logistic regression probability for the prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf", "input_data", "prediction", "probability", + "objective_id", "model_conf"] + examples = [ + ['data/movies.csv', '20', '20', '180', + '{"fields": {"000000": {"name": "user_id", "optype": "numeric"},' + ' "000001": {"name": "gender", "optype": "categorical"},' + ' "000002": {"name": "age_range", "optype": "categorical"},' + ' "000003": {"name": "occupation", "optype": "categorical"},' + ' "000004": {"name": "zipcode", "optype": "numeric"},' + ' "000005": {"name": "movie_id", "optype": "numeric"},' + ' "000006": {"name": "title", "optype": "text"},' + ' "000007": {"name": "genres", "optype": "items",' + '"item_analysis": {"separator": "$"}},' + '"000008": {"name": "timestamp", "optype": "numeric"},' + '"000009": {"name": "rating", "optype": "categorical"}},' + '"source_parser": {"separator": ";"}}', + '{"timestamp": "999999999"}', '4', 0.4079, "000009", + '{"balance_fields": false}'], + ['data/movies.csv', '20', '20', '180', + '{"fields": {"000000": {"name": "user_id", "optype": "numeric"},' + ' "000001": {"name": "gender", "optype": "categorical"},' + ' "000002": {"name": "age_range", "optype": "categorical"},' + ' "000003": {"name": "occupation", "optype": "categorical"},' + ' "000004": {"name": "zipcode", "optype": "numeric"},' + ' "000005": {"name": "movie_id", "optype": "numeric"},' + ' "000006": {"name": "title", "optype": "text"},' + ' "000007": {"name": "genres", "optype": "items",' + '"item_analysis": {"separator": "$"}},' + '"000008": {"name": "timestamp", "optype": "numeric"},' + '"000009": {"name": "rating", "optype": "categorical"}},' + '"source_parser": {"separator": ";"}}', + '{"timestamp": "999999999"}', '4', 0.2547, "000009", + '{"normalize": true}'], + ['data/movies.csv', '20', '20', '180', + '{"fields": {"000000": {"name": "user_id", "optype": "numeric"},' + ' "000001": {"name": "gender", "optype": "categorical"},' + ' "000002": {"name": "age_range", "optype": "categorical"},' + ' "000003": {"name": "occupation", "optype": "categorical"},' + ' "000004": {"name": "zipcode", "optype": "numeric"},' + ' "000005": {"name": "movie_id", "optype": "numeric"},' + ' "000006": {"name": "title", "optype": "text"},' + ' "000007": {"name": "genres", "optype": "items",' + '"item_analysis": {"separator": "$"}},' + '"000008": {"name": "timestamp", "optype": "numeric"},' + '"000009": {"name": "rating", "optype": "categorical"}},' + '"source_parser": {"separator": ";"}}', + '{"timestamp": "999999999"}', '4', 0.2547, "000009", + '{"balance_fields": true, "normalize": true}']] + show_doc(self.test_scenario11) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + model_create.i_create_a_logistic_model_with_objective_and_parms( + self, example["objective_id"], example["model_conf"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_logistic_model(self) + prediction_create.i_create_a_logistic_prediction( + self, example["input_data"]) + prediction_create.the_logistic_prediction_is( + self, example["prediction"]) + prediction_create.the_logistic_probability_is( + self, example["probability"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_probability_is( + self, example["probability"]) + + def test_scenario12(self): + """ + Scenario: Successfully comparing logistic regression predictions with constant fields: + + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I update the dataset with "" + And I wait until the dataset is ready less than secs + And I create a logistic regression model + And I wait until the logistic regression model is ready less than secs + And I create a local logistic regression model + When I create a logistic regression prediction for "" + Then the logistic regression prediction is "" + And I create a local logistic regression prediction for "" + Then the local logistic regression prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "prediction", "dataset_conf"] + examples = [ + ['data/constant_field.csv', '10', '20', '50', + '{"a": 1, "b": 1, "c": 1}', 'a', + '{"fields": {"000000": {"preferred": true}}}']] + show_doc(self.test_scenario12) + + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_create.i_update_dataset_with(self, example["dataset_conf"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + model_create.i_create_a_logistic_model(self) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_logistic_model(self) + prediction_create.i_create_a_logistic_prediction( + self, example["input_data"]) + prediction_create.the_logistic_prediction_is( + self, example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario13(self): + """ + Scenario: Successfully comparing predictions: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" + And I export the model with tags "" + And I create a local model from file "" + And I create a local prediction for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", + "model_file", "model_tags"] + examples = [ + ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', + '000004', 'Iris-setosa', "tmp/my_model.json", "my_test"], + ['data/iris.csv', '10', '10', '10', + '{"petal length": 6, "petal width": 2}', '000004', + 'Iris-virginica', "tmp/my_model.json", "my_test"], + ['data/iris.csv', '10', '10', '10', + '{"petal length": 4, "petal width": 1.5}', '000004', + 'Iris-versicolor', "tmp/my_model.json", "my_test"], + ['data/iris_sp_chars.csv', '10', '10', '10', + '{"pétal.length": 4, "pétal&width\\u0000": 1.5}', '000004', + 'Iris-versicolor', "tmp/my_model_2.json", "my_test"]] + show_doc(self.test_scenario13) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset( + self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + args = '{"tags": ["%s"]}' % example["model_tags"] + model_create.i_create_a_model_with(self, data=args) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_export_model( + self, False, example["model_file"]) # no pmml + prediction_compare.i_create_a_local_model_from_file( + self, example["model_file"]) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + model_create.i_export_tags_model( + self, example["model_file"], example["model_tags"]) + prediction_compare.i_create_a_local_model_from_file( + self, example["model_file"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario14(self): + """ + Scenario: Successfully comparing predictions with supervised model: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local supervised model + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction"] + examples = [ + ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', + '000004', 'Iris-setosa'], + ['data/iris.csv', '10', '10', '10', + '{"petal length": 6, "petal width": 2}', '000004', + 'Iris-virginica'], + ['data/iris.csv', '10', '10', '10', + '{"petal length": 4, "petal width": 1.5}', '000004', + 'Iris-versicolor'], + ['data/iris_sp_chars.csv', '10', '10', '10', + '{"pétal.length": 4, "pétal&width\\u0000": 1.5}', + '000004', 'Iris-versicolor']] + show_doc(self.test_scenario14) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_supervised_model(self) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario15(self): + """ + Scenario: Successfully comparing predictions with text options: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression model with objective "" and params "" + And I wait until the logistic regression model is ready less than secs + And I create a local logistic regression model + When I create a logistic regression prediction for "" + Then the logistic regression prediction is "" + And the logistic regression probability for the prediction is "" + And I create a local logistic regression prediction for "" + Then the local logistic regression prediction is "" + And the local logistic regression probability for the prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "model_conf", "input_data", "prediction", "probability", + "objective_id"] + examples = [ + ['data/iris.csv', '20', '20', '180', + '{"weight_field": "000000", "missing_numerics": false}', + '{"petal width": 1.5, "petal length": 2, "sepal width":1}', + 'Iris-versicolor', '0.9547', '000004']] + show_doc(self.test_scenario15) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished(self, example["source_wait"], + shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model_with_objective_and_parms( + self, example["objective_id"], example["model_conf"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_logistic_model(self) + prediction_create.i_create_a_logistic_prediction( + self, example["input_data"]) + prediction_create.the_logistic_prediction_is( + self, example["prediction"]) + prediction_create.the_logistic_probability_is( + self, example["probability"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_probability_is( + self, example["probability"]) + + def test_scenario16(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction"] + examples = [ + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "1910-05-08T19:10:23.106", "cat-0":"cat2"}', + '000002', -1.01482], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "1920-06-30T20:21:20.320", "cat-0":"cat1"}', + '000002', 0.78406], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "1932-01-30T19:24:11.440", "cat-0":"cat2"}', + '000002', -0.98757], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "1950-11-06T05:34:05.252", "cat-0":"cat1"}', + '000002', 0.27538], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "1969-7-14 17:36", "cat-0":"cat2"}', + '000002', -0.06256], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "2001-01-05T23:04:04.693", "cat-0":"cat2"}', + '000002', 0.9832], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "2011-04-01T00:16:45.747", "cat-0":"cat2"}', + '000002', -0.5977], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "1969-W29-1T17:36:39Z", "cat-0":"cat1"}', + '000002', -0.06256], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "Mon Jul 14 17:36 +0000 1969", "cat-0":"cat1"}', + '000002', -0.06256]] + show_doc(self.test_scenario16) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_model(self, pre_model=True) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"], + pre_model=self.bigml["local_pipeline"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario17(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression model + And I wait until the logistic regression is ready less + than secs + And I create a local logistic regression model + When I create a prediction for "" + Then the prediction is "" + And the logistic regression probability for the prediction + is "" + And I create a local prediction for "" + Then the local prediction is "" + And the local logistic regression probability for the + prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "prediction", "probability"] + examples = [ + ['data/dates2.csv', '20', '20', '45', + '{"time-1": "1910-05-08T19:10:23.106", "target-1":0.722}', + 'cat0', 0.75024], + ['data/dates2.csv', '20', '20', '45', + '{"time-1": "1920-06-30T20:21:20.320", "target-1":0.12}', + 'cat0', 0.75821], + ['data/dates2.csv', '20', '20', '45', + '{"time-1": "1932-01-30T19:24:11.440", "target-1":0.32}', + 'cat0', 0.71498], + ['data/dates2.csv', '20', '20', '45', + '{"time-1": "1950-11-06T05:34:05.252", "target-1":0.124}', + 'cat0', 0.775], + ['data/dates2.csv', '20', '20', '45', + '{"time-1": "1969-7-14 17:36", "target-1":0.784}', + 'cat0', 0.73663], + ['data/dates2.csv', '20', '20', '45', + '{"time-1": "2001-01-05T23:04:04.693", "target-1":0.451}', + 'cat0', 0.6822], + ['data/dates2.csv', '20', '20', '45', + '{"time-1": "2011-04-01T00:16:45.747", "target-1":0.42}', + 'cat0', 0.71107], + ['data/dates2.csv', '20', '20', '45', + '{"time-1": "1969-W29-1T17:36:39Z", "target-1":0.67}', + 'cat0', 0.73663], + ['data/dates2.csv', '20', '20', '45', + '{"time-1": "Mon Jul 14 17:36 +0000 1969", "target-1":0.005}', + 'cat0', 0.73663]] + show_doc(self.test_scenario17) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model( + self, shared=example["data"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_logistic_model(self, + pre_model=True) + prediction_create.i_create_a_logistic_prediction( + self, example["input_data"]) + prediction_create.the_logistic_prediction_is( + self, example["prediction"]) + prediction_create.the_logistic_probability_is( + self, example["probability"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"], + pre_model=self.bigml["local_pipeline"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_probability_is( + self, example["probability"]) + + def test_scenario18(self): + """ + Scenario: Successfully comparing predictions with proportional missing strategy for missing_splits models: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a weighted model with missing splits + And I wait until the model is ready less than secs + And I create a local model + When I create a proportional missing strategy prediction for "" + Then the prediction for "" is "" + And the confidence for the prediction is "" + And I create a proportional missing strategy local prediction for "" + Then the local prediction is "" + And the local prediction's confidence is "" + And the highest local prediction's confidence is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "confidence"] + examples = [ + ['data/missings_cat.csv', '10', '10', '10', '{"x2": 4}', + '000002', 'positive', '0.25241'] +] + show_doc(self.test_scenario18) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_weighted_model_with_missing_splits(self) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_model(self) + prediction_create.i_create_a_proportional_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_create.the_confidence_is( + self, example["confidence"]) + prediction_compare.i_create_a_proportional_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) + prediction_compare.the_highest_local_prediction_confidence_is( + self, example["input_data"], example["confidence"]) diff --git a/bigml/tests/test_06_batch_predictions.py b/bigml/tests/test_06_batch_predictions.py new file mode 100644 index 00000000..89266f8b --- /dev/null +++ b/bigml/tests/test_06_batch_predictions.py @@ -0,0 +1,319 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating batch predictions + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_ensemble_steps as ensemble_create +from . import create_cluster_steps as cluster_create +from . import create_anomaly_steps as anomaly_create +from . import create_batch_prediction_steps as batch_pred_create +from . import create_prediction_steps as prediction_create + + + +class TestBatchPrediction: + """Testing Batch Prediction""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating a batch prediction: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + When I create a batch prediction for the dataset with the model + And I wait until the batch prediction is ready less than secs + And I download the created predictions file to "" + Then the batch prediction file is like "" + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "batch_wait", "local_file", "predictions_file"] + examples = [ + ['data/iris.csv', '30', '30', '50', '50', + 'tmp/batch_predictions.csv', 'data/batch_predictions.csv']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + batch_pred_create.i_create_a_batch_prediction(self) + batch_pred_create.the_batch_prediction_is_finished_in_less_than( + self, example["batch_wait"]) + batch_pred_create.i_download_predictions_file( + self, example["local_file"]) + batch_pred_create.i_check_predictions( + self, example["predictions_file"]) + + def test_scenario2(self): + """ + Scenario: Successfully creating a batch prediction for an ensemble: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an ensemble of models + And I wait until the ensemble is ready less than secs + When I create a batch prediction for the dataset with the ensemble and "" + And I wait until the batch prediction is ready less than secs + And I download the created predictions file to "" + Then the batch prediction file is like "" + """ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "number_of_models", + "model_wait", "batch_wait", "local_file", + "predictions_file", "batch_conf"] + examples = [ + ['data/iris.csv', '30', '30', '5', '180', '150', + 'tmp/batch_predictions.csv', 'data/batch_predictions_e_c0.csv', + {"combiner":0}], + ['data/iris.csv', '30', '30', '5', '180', '150', + 'tmp/batch_predictions.csv', 'data/batch_predictions_e_c1.csv', + {"combiner":1, "confidence": True}], + ['data/iris.csv', '30', '30', '5', '180', '150', + 'tmp/batch_predictions.csv', 'data/batch_predictions_e_c2.csv', + {"combiner":2, "confidence": True}], + ['data/iris.csv', '30', '30', '5', '180', '150', + 'tmp/batch_predictions.csv', 'data/batch_predictions_e_o_k_v.csv', + {"operating_kind": "votes", "confidence": True}], + ['data/iris.csv', '30', '30', '5', '180', '150', + 'tmp/batch_predictions.csv', 'data/batch_predictions_e_o_k_p.csv', + {"operating_kind": "probability", "probability": True}], + ['data/iris.csv', '30', '30', '5', '180', '150', + 'tmp/batch_predictions.csv', 'data/batch_predictions_e_o_k_c.csv', + {"operating_kind": "confidence", "confidence": True}]] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_shared = "%s_%s" % (example["data"], + example["number_of_models"]) + ensemble_create.i_create_an_ensemble( + self, example["number_of_models"], + shared=ensemble_shared) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"], shared=ensemble_shared) + batch_pred_create.i_create_a_batch_prediction_ensemble( + self, example["batch_conf"]) + batch_pred_create.the_batch_prediction_is_finished_in_less_than( + self, example["batch_wait"]) + batch_pred_create.i_download_predictions_file( + self, example["local_file"]) + batch_pred_create.i_check_predictions( + self, example["predictions_file"]) + + def test_scenario3(self): + """ + Scenario: Successfully creating a batch centroid from a cluster: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster + And I wait until the cluster is ready less than secs + When I create a batch centroid for the dataset + And I check the batch centroid is ok + And I wait until the batch centroid is ready less than secs + And I download the created centroid file to "" + Then the batch centroid file is like "" + """ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "batch_wait", "local_file", "predictions_file"] + examples = [ + ['data/diabetes.csv', '50', '50', '50', '50', + 'tmp/batch_predictions.csv', 'data/batch_predictions_c.csv']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + cluster_create.i_create_a_cluster(self) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["model_wait"]) + batch_pred_create.i_create_a_batch_prediction_with_cluster(self) + batch_pred_create.the_batch_centroid_is_finished_in_less_than( + self, example["batch_wait"]) + batch_pred_create.i_download_centroid_file( + self, example["local_file"]) + batch_pred_create.i_check_predictions( + self, example["predictions_file"]) + + def test_scenario4(self): + """ + Scenario: Successfully creating a source from a batch prediction: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + When I create a batch prediction for the dataset with the model + And I wait until the batch prediction is ready less than secs + Then I create a source from the batch prediction + And I wait until the source is ready less than secs + """ + show_doc(self.test_scenario4) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "batch_wait"] + examples = [ + ['data/diabetes.csv', '30', '30', '50', '50']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + batch_pred_create.i_create_a_batch_prediction(self) + batch_pred_create.the_batch_prediction_is_finished_in_less_than( + self, example["batch_wait"]) + batch_pred_create.i_create_a_source_from_batch_prediction(self) + source_create.the_source_is_finished(self, example["source_wait"]) + + def test_scenario5(self): + """ + Scenario: Successfully creating a batch anomaly score from an anomaly detector: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an anomaly detector + And I wait until the anomaly detector is ready less than secs + When I create a batch anomaly score + And I check the batch anomaly score is ok + And I wait until the batch anomaly score is ready less than secs + And I download the created anomaly score file to "" + Then the batch anomaly score file is like "" + """ + show_doc(self.test_scenario5) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "batch_wait", "local_file", "predictions_file"] + examples = [ + ['data/tiny_kdd.csv', '30', '30', '50', '50', + 'tmp/batch_predictions.csv', 'data/batch_predictions_a.csv']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + anomaly_create.i_create_an_anomaly(self, shared=example["data"]) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + batch_pred_create.i_create_a_batch_prediction_with_anomaly(self) + batch_pred_create.the_batch_anomaly_score_is_finished_in_less_than( + self, example["batch_wait"]) + batch_pred_create.i_download_anomaly_score_file( + self, example["local_file"]) + batch_pred_create.i_check_predictions( + self, example["predictions_file"]) + + def test_scenario6(self): + """ + Scenario: Successfully creating a batch prediction for a logistic regression: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression + And I wait until the logistic regression is ready less than secs + When I create a batch prediction for the dataset with the logistic regression + And I wait until the batch prediction is ready less than secs + And I download the created predictions file to "" + Then the batch prediction file is like "" + """ + show_doc(self.test_scenario6) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "batch_wait", "local_file", "predictions_file"] + examples = [ + ['data/iris.csv', '30', '30', '80', '50', + 'tmp/batch_predictions.csv', 'data/batch_predictions_lr.csv']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model( + self, shared=example["data"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + batch_pred_create.i_create_a_batch_prediction_logistic_model(self) + batch_pred_create.the_batch_prediction_is_finished_in_less_than( + self, example["batch_wait"]) + batch_pred_create.i_download_predictions_file( + self, example["local_file"]) + batch_pred_create.i_check_predictions( + self, example["predictions_file"]) diff --git a/bigml/tests/test_07_multimodel_batch_predictions.py b/bigml/tests/test_07_multimodel_batch_predictions.py new file mode 100644 index 00000000..a19ea4ca --- /dev/null +++ b/bigml/tests/test_07_multimodel_batch_predictions.py @@ -0,0 +1,94 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating Multimodel batch predictions + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import compare_predictions_steps as compare_pred + +class TestMultimodelBatchPrediction: + """Test MultiModel batch predictions""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating a batch prediction from a multi model: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I retrieve a list of remote models tagged with "" + And I create a local multi model + When I create a batch prediction for "" and save it in "" + And I combine the votes in "" + Then the plurality combined predictions are "" + And the confidence weighted predictions are "" + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "tags", "tag", "input_data", "path", "predictions"] + examples = [ + ['data/iris.csv', '10', '10', '10', '{"tags":["mytag"]}', 'mytag', '[{"petal width": 0.5}, {"petal length": 6, "petal width": 2}, {"petal length": 4, "petal width": 1.5}]', './tmp', '["Iris-setosa", "Iris-virginica", "Iris-versicolor"]']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model_with(self, example["tags"]) + model_create.the_model_is_finished_in_less_than(self, example["model_wait"]) + model_create.i_create_a_model_with(self, example["tags"]) + model_create.the_model_is_finished_in_less_than(self, example["model_wait"]) + model_create.i_create_a_model_with(self, example["tags"]) + model_create.the_model_is_finished_in_less_than(self, example["model_wait"]) + compare_pred.i_retrieve_a_list_of_remote_models(self, example["tag"]) + compare_pred.i_create_a_local_multi_model(self) + compare_pred.i_create_a_batch_prediction(self, example["input_data"], example["path"]) + compare_pred.i_combine_the_votes(self, example["path"]) + compare_pred.the_plurality_combined_prediction(self, example["predictions"]) + compare_pred.the_confidence_weighted_prediction(self, example["predictions"]) diff --git a/bigml/tests/test_08_multimodel.py b/bigml/tests/test_08_multimodel.py new file mode 100644 index 00000000..c9ac4d1b --- /dev/null +++ b/bigml/tests/test_08_multimodel.py @@ -0,0 +1,125 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating model on lists of datasets + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_multimodel_steps as multimodel_create +from . import compare_predictions_steps as compare_pred + +class TestMultimodel: + """Testing the MultiModel class methods""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating a model from a dataset list: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I store the dataset id in a list + And I create a dataset + And I wait until the dataset is ready less than secs + And I store the dataset id in a list + Then I create a model from a dataset list + And I wait until the model is ready less than secs + And I check the model stems from the original dataset list + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] + examples = [ + ['data/iris.csv', '10', '10', '10']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + multimodel_create.i_store_dataset_id(self) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + multimodel_create.i_store_dataset_id(self) + model_create.i_create_a_model_from_dataset_list(self) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + multimodel_create.i_check_model_datasets_and_datasets_ids(self) + + def test_scenario2(self): + """ + Scenario: Successfully creating a model from a dataset list and predicting with it using median: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local multi model + When I create a local multimodel batch prediction using median for + Then the local prediction is + """ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "prediction"] + examples = [ + ['data/grades.csv', '30', '30', '30', + '{"Tutorial": 99.47, "Midterm": 53.12, "TakeHome": 87.96}', + 63.33]] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + world.list_of_models = [world.model] + compare_pred.i_create_a_local_multi_model(self) + compare_pred.i_create_a_local_mm_median_batch_prediction( + self, example["input_data"]) + compare_pred.the_local_prediction_is(self, example["prediction"]) diff --git a/bigml/tests/test_09_ensemble_prediction.py b/bigml/tests/test_09_ensemble_prediction.py new file mode 100644 index 00000000..52b06872 --- /dev/null +++ b/bigml/tests/test_09_ensemble_prediction.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating ensembles predictions + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_ensemble_steps as ensemble_create +from . import create_prediction_steps as prediction_create + +class TestEnsemblePrediction: + """Testing Ensemble Predictions""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating a prediction from an ensemble: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an ensemble of models + And I wait until the ensemble is ready less than secs + When I create an ensemble prediction for "" + And I wait until the prediction is ready less than secs + Then the prediction for "" is "" + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "prediction_wait", "number_of_models", "input_data", + "objective_id", "prediction"] + examples = [ + ['data/iris.csv', '30', '30', '50', '20', '5', + '{"petal width": 0.5}', '000004', 'Iris-versicolor'], + ['data/iris_sp_chars.csv', '30', '30', '50', '20', '5', + '{"pétal&width\\u0000": 0.5}', '000004', 'Iris-versicolor'], + ['data/grades.csv', '30', '30', '150', '20', '10', + '{"Assignment": 81.22, "Tutorial": 91.95, "Midterm": 79.38,' + ' "TakeHome": 105.93}', '000005', '84.556'], + ['data/grades.csv', '30', '30', '150', '20', '10', + '{"Assignment": 97.33, "Tutorial": 106.74, "Midterm": 76.88,' + ' "TakeHome": 108.89}', '000005', '73.13558']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_shared = "%s_%s" % (example["data"], + example["number_of_models"]) + ensemble_create.i_create_an_ensemble( + self, example["number_of_models"], shared=ensemble_shared) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"], shared=ensemble_shared) + prediction_create.i_create_an_ensemble_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is_finished_in_less_than( + self, example["prediction_wait"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) diff --git a/bigml/tests/test_10_local_ensemble_prediction.py b/bigml/tests/test_10_local_ensemble_prediction.py new file mode 100644 index 00000000..2e35f1b0 --- /dev/null +++ b/bigml/tests/test_10_local_ensemble_prediction.py @@ -0,0 +1,286 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating local ensemble predictions + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_ensemble_steps as ensemble_create +from . import create_prediction_steps as prediction_create +from . import compare_predictions_steps as compare_pred + + +class TestEnsemblePrediction: + """Testing local ensemble prediction""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating a local prediction from an Ensemble: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an ensemble of models + And I wait until the ensemble is ready less than secs + And I create a local Ensemble + When I create a local ensemble prediction with probabilities for "" + Then the local prediction is "" + And the local prediction's confidence is "" + And the local probabilities are "" + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "number_of_models", "input_data", + "prediction", "confidence", "probabilities"] + examples = [ + ['data/iris.csv', '10', '10', '50', '5', + '{"petal width": 0.5}', 'Iris-versicolor', '0.415', + '["0.3403", "0.4150", "0.2447"]' ]] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_shared = "%s_%s" % (example["data"], + example["number_of_models"]) + ensemble_create.i_create_an_ensemble( + self, example["number_of_models"], shared=ensemble_shared) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"], shared=ensemble_shared) + ensemble_create.create_local_ensemble(self) + prediction_create.create_local_ensemble_prediction_probabilities( + self, example["input_data"]) + compare_pred.the_local_prediction_is(self, example["prediction"]) + compare_pred.the_local_prediction_confidence_is( + self, example["confidence"]) + compare_pred.the_local_probabilities_are( + self, example["probabilities"]) + + def test_scenario2(self): + """ + Scenario: Successfully obtaining field importance from an Ensemble: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + When I create a local Ensemble with the last models + Then the field importance text is + """ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "model_conf1", "model_conf2", "model_conf3", + "number_of_models", "field_importance"] + examples = [ + ['data/iris.csv', '10', '10', '20', + '{"input_fields": ["000000", "000001","000003", "000004"]}', + '{"input_fields": ["000000", "000001","000002", "000004"]}', + '{"input_fields": ["000000", "000001","000002", "000003",' + ' "000004"]}', '3', + '[["000002", 0.5269933333333333], ["000003", 0.38936],' + ' ["000000", 0.04662333333333333],' + '["000001", 0.037026666666666666]]']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model_with(self, example["model_conf1"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with(self, example["model_conf2"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with(self, example["model_conf3"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + ensemble_create.create_local_ensemble_with_list( + self, example["number_of_models"]) + ensemble_create.field_importance_print( + self, example["field_importance"]) + + def test_scenario3(self): + """ + Scenario: Successfully creating a local prediction from an Ensemble adding confidence: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an ensemble of models + And I wait until the ensemble is ready less than secs + And I create a local Ensemble + When I create a local ensemble prediction for "" in JSON adding confidence + Then the local prediction is "" + And the local prediction's confidence is "" + """ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "number_of_models", "input_data", "prediction", + "confidence"] + examples = [ + ['data/iris.csv', '10', '10', '50', '5', + '{"petal width": 0.5}', 'Iris-versicolor', '0.415']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_shared = "%s_%s" % (example["data"], + example["number_of_models"]) + ensemble_create.i_create_an_ensemble( + self, example["number_of_models"], shared=ensemble_shared) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"], shared=ensemble_shared) + ensemble_create.create_local_ensemble(self) + prediction_create.create_local_ensemble_prediction_add_confidence( + self, example["input_data"]) + compare_pred.the_local_prediction_is(self, example["prediction"]) + compare_pred.the_local_prediction_confidence_is( + self, example["confidence"]) + + def test_scenario4(self): + """ + Scenario: Successfully obtaining field importance from an Ensemble created from local models: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + When I create a local Ensemble with the last local models + Then the field importance text is + """ + show_doc(self.test_scenario4) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "model_conf1", "model_conf2", "model_conf3", + "number_of_models", "field_importance"] + examples = [ + ['data/iris.csv', '10', '10', '30', + '{"input_fields": ["000000", "000001","000003", "000004"]}', + '{"input_fields": ["000000", "000001","000002", "000004"]}', + '{"input_fields": ["000000", "000001","000002", "000003",' + ' "000004"]}', '3', + '[["000002", 0.5269933333333333], ["000003", 0.38936],' + ' ["000000", 0.04662333333333333], ' + '["000001", 0.037026666666666666]]']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model_with(self, example["model_conf1"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with(self, example["model_conf2"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with(self, example["model_conf3"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + ensemble_create.create_local_ensemble_with_list_of_local_models( + self, example["number_of_models"]) + ensemble_create.field_importance_print( + self, example["field_importance"]) + + def test_scenario5(self): + """ + Scenario: Successfully creating a local prediction from an Ensemble: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an ensemble of models + And I wait until the ensemble is ready less than secs + And I create a local Ensemble + When I create a local ensemble prediction using median with confidence for "" + Then the local prediction is "" + """ + show_doc(self.test_scenario5) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "number_of_models", "input_data", "prediction"] + examples = [ + ['data/grades.csv', '30', '30', '50', '2', '{}', 69.0934]] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_shared = "%s_%s" % (example["data"], + example["number_of_models"]) + ensemble_create.i_create_an_ensemble( + self, example["number_of_models"], shared=ensemble_shared) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"], shared=ensemble_shared) + ensemble_create.create_local_ensemble(self) + prediction_create.create_local_ensemble_prediction_using_median_with_confidence( + self, example["input_data"]) + compare_pred.the_local_prediction_is(self, example["prediction"]) diff --git a/bigml/tests/test_11_multimodel_prediction.py b/bigml/tests/test_11_multimodel_prediction.py new file mode 100644 index 00000000..23021c1d --- /dev/null +++ b/bigml/tests/test_11_multimodel_prediction.py @@ -0,0 +1,149 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" Creating multimodel predictions + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_ensemble_steps as ensemble_create +from . import create_prediction_steps as prediction_create +from . import compare_predictions_steps as compare_pred + +class TestMultimodelPrediction: + """Test MultiModel methods""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating a prediction from a multi model: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I retrieve a list of remote models tagged with "" + And I create a local multi model + When I create a local prediction for "" + Then the prediction for "" is "" + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "tags", "tag", "input_data", "prediction"] + examples = [ + ['data/iris.csv', '10', '10', '10', '{"tags":["mytag"]}', + 'mytag', '{"petal width": 0.5}', 'Iris-setosa']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished(self, example["source_wait"], + shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model_with(self, example["tags"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with(self, example["tags"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with(self, example["tags"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + compare_pred.i_retrieve_a_list_of_remote_models( + self, example["tag"]) + compare_pred.i_create_a_local_multi_model(self) + compare_pred.i_create_a_local_prediction( + self, example["input_data"]) + compare_pred.the_local_prediction_is(self, example["prediction"]) + + def test_scenario2(self): + """ + Scenario: Successfully creating a local batch prediction from a multi model: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I retrieve a list of remote models tagged with "" + And I create a local multi model + When I create a batch multimodel prediction for "" + Then the predictions are "" + """ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "tags", "tag", "input_data", "predictions"] + examples = [ + ['data/iris.csv', '10', '10', '10', '{"tags":["mytag"]}', + 'mytag', '[{"petal width": 0.5}, {"petal length": 6, ' + '"petal width": 2}]', '["Iris-setosa", "Iris-virginica"]']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model_with(self, example["tags"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with(self, example["tags"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with(self, example["tags"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + compare_pred.i_retrieve_a_list_of_remote_models( + self, example["tag"]) + compare_pred.i_create_a_local_multi_model(self) + compare_pred.i_create_a_batch_prediction_from_a_multi_model( + self, example["input_data"]) + compare_pred.the_batch_mm_predictions_are( + self, example["predictions"]) diff --git a/bigml/tests/test_12_public_model_prediction.py b/bigml/tests/test_12_public_model_prediction.py new file mode 100644 index 00000000..cbfe2e36 --- /dev/null +++ b/bigml/tests/test_12_public_model_prediction.py @@ -0,0 +1,90 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating public model predictions + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_prediction_steps as prediction_create +from . import compare_predictions_steps as compare_pred + +class TestPublicModelPrediction: + """Testing published models""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating a prediction using a public model: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I make the model public + And I wait until the model is ready less than secs + And I check the model status using the model's public url + When I create a prediction for "" + Then the prediction for "" is "" + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction"] + examples = [ + ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', + '000004', 'Iris-setosa']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.make_the_model_public(self) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.model_from_public_url(self) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) diff --git a/bigml/tests/test_13_public_dataset.py b/bigml/tests/test_13_public_dataset.py new file mode 100644 index 00000000..94657661 --- /dev/null +++ b/bigml/tests/test_13_public_dataset.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating public dataset + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create + +class TestPublicDataset: + """Testing published datasets """ + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating and reading a public dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I make the dataset public + And I wait until the dataset is ready less than secs + When I get the dataset status using the dataset's public url + Then the dataset's status is FINISHED + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait"] + examples = [ + ['data/iris.csv', '10', '10']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file_from_stdin( + self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_create.make_the_dataset_public(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_create.build_local_dataset_from_public_url(self) + dataset_create.dataset_status_finished(self) diff --git a/bigml/tests/test_14_create_evaluations.py b/bigml/tests/test_14_create_evaluations.py new file mode 100644 index 00000000..093dc638 --- /dev/null +++ b/bigml/tests/test_14_create_evaluations.py @@ -0,0 +1,242 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating evaluation + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_ensemble_steps as ensemble_create +from . import create_evaluation_steps as evaluation_create + +class TestEvaluation: + """Testing Evaluation methods""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario1: Successfully creating an evaluation: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + When I create an evaluation for the model with the dataset + And I wait until the evaluation is ready less than secs + Then the measured "" is + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "evaluation_wait", "metric", "value"] + examples = [ + ['data/iris.csv', '50', '50', '50', '50', 'average_phi', '1']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + evaluation_create.i_create_an_evaluation(self) + evaluation_create.the_evaluation_is_finished_in_less_than( + self, example["evaluation_wait"]) + evaluation_create.the_measured_measure_is_value( + self, example["metric"], example["value"]) + + def test_scenario2(self): + """ + Scenario2: Successfully creating an evaluation for an ensemble: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an ensemble of models + And I wait until the ensemble is ready less than secs + When I create an evaluation for the ensemble with the dataset and "evaluation_conf" + And I wait until the evaluation is ready less than secs + Then the measured "" is + """ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "evaluation_wait", "number_of_models", + "metric", "value", "evaluation_conf"] + examples = [ + ['data/iris.csv', '50', '50', '80', '80', '5', 'average_phi', + '0.98029', {"combiner": 0}], + ['data/iris.csv', '50', '50', '80', '80', '5', 'average_phi', + '0.95061', {"combiner": 1}], + ['data/iris.csv', '50', '50', '80', '80', '5', 'average_phi', + '0.98029', {"combiner": 2}], + ['data/iris.csv', '50', '50', '80', '80', '5', 'average_phi', + '0.98029', {"operating_kind": "votes"}], + ['data/iris.csv', '50', '50', '80', '80', '5', 'average_phi', + '0.97064', {"operating_kind": "probability"}], + ['data/iris.csv', '50', '50', '80', '80', '5', 'average_phi', + '0.95061', {"operating_kind": "confidence"}]] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_shared = "%s_%s" % (example["data"], + example["number_of_models"]) + ensemble_create.i_create_an_ensemble( + self, example["number_of_models"], shared=ensemble_shared) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"], shared=ensemble_shared) + evaluation_create.i_create_an_evaluation_ensemble( + self, example["evaluation_conf"]) + evaluation_create.the_evaluation_is_finished_in_less_than( + self, example["evaluation_wait"]) + evaluation_create.the_measured_measure_is_value( + self, example["metric"], example["value"]) + + def test_scenario3(self): + """ + Scenario3: Successfully creating an evaluation for a logistic regression: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression + And I wait until the logistic regression is ready less than secs + When I create an evaluation for the logistic regression with the dataset + And I wait until the evaluation is ready less than secs + Then the measured "" is + """ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "evaluation_wait", "metric", "value"] + examples = [ + ['data/iris.csv', '50', '50', '800', '80', 'average_phi', + '0.89054']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model( + self, shared=example["data"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + evaluation_create.i_create_an_evaluation_logistic( + self) + evaluation_create.the_evaluation_is_finished_in_less_than( + self, example["evaluation_wait"]) + evaluation_create.the_measured_measure_is_value( + self, example["metric"], example["value"]) + + def test_scenario4(self): + """ + Scenario4: Successfully creating an evaluation for a deepnet: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a deepnet + And I wait until the deepnet is ready less than secs + When I create an evaluation for the deepnet with the dataset + And I wait until the evaluation is ready less than secs + Then the measured "" is + """ + show_doc(self.test_scenario4) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "evaluation_wait", "metric", "value"] + examples = [ + ['data/iris.csv', '50', '50', '800', '80', 'average_phi', + '0.98029']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_deepnet(self, shared=example["data"]) + model_create.the_deepnet_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + evaluation_create.i_create_an_evaluation_deepnet( + self) + evaluation_create.the_evaluation_is_finished_in_less_than( + self, example["evaluation_wait"]) + evaluation_create.the_measured_measure_is_value( + self, example["metric"], example["value"]) + + def test_scenario5(self): + """ + Scenario5: Successfully instantiating Evaluation: + Given a stored evaluation "" file + When I create an Evaluation for the JSON + Then the measured "" is + """ + show_doc(self.test_scenario5) + headers = ["data", "metric", "value"] + examples = [ + ['data/classification_evaluation.json', 'phi', + 0.64837], + ['data/classification_evaluation.json', 'accuracy', + 0.91791], + ['data/classification_evaluation.json', 'precision', + 0.86639], + ['data/regression_evaluation.json', 'r_squared', + 0.9288]] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + evaluation_create.i_create_a_local_evaluation( + self, example["data"]) + evaluation_create.the_local_metric_is_value( + self, example["metric"], example["value"]) diff --git a/bigml/tests/test_15_download.py b/bigml/tests/test_15_download.py new file mode 100644 index 00000000..415257e2 --- /dev/null +++ b/bigml/tests/test_15_download.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Downloading dataset + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create + + +class TestDownload: + """Testing downloads""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully exporting a dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I download the dataset file to "" + Then file "" is like file "" + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "exported_file"] + examples = [ + ['data/iris.csv', '30', '30', 'tmp/exported_iris.csv']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + dataset_create.i_export_a_dataset(self, example["exported_file"]) + dataset_create.files_equal( + self, example["exported_file"], example["data"]) + + def test_scenario2(self): + """ + Scenario: Successfully creating a model and exporting it: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I export the <"pmml"> model to file "" + Then I check the model is stored in "" file in <"pmml"> + """ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "exported_file", "pmml"] + examples = [ + ['data/iris.csv', '30', '30', '30', 'tmp/model/iris.json', False], + ['data/iris_sp_chars.csv', '30', '30', '30', 'tmp/model/iris_sp_chars.pmml', True]] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + model_create.i_export_model( + self, example["pmml"], example["exported_file"]) + model_create.i_check_model_stored( + self, example["exported_file"], example["pmml"]) diff --git a/bigml/tests/test_16_sample_dataset.py b/bigml/tests/test_16_sample_dataset.py new file mode 100644 index 00000000..186b76ef --- /dev/null +++ b/bigml/tests/test_16_sample_dataset.py @@ -0,0 +1,112 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating sample dataset + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_sample_steps as sample_create + + +class TestSampleDataset: + """Test for Sample methods""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating a sample from a dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a sample from a dataset + And I wait until the sample is ready less than secs + And I update the sample name to "" + When I wait until the sample is ready less than secs + Then the sample name is "" + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "sample_wait", + "sample_name"] + examples = [ + ['data/iris.csv', '10', '10', '10', 'my new sample name']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + sample_create.i_create_a_sample_from_dataset(self) + sample_create.the_sample_is_finished_in_less_than( + self, example["sample_wait"]) + sample_create.i_update_sample_name(self, example["sample_name"]) + sample_create.the_sample_is_finished_in_less_than( + self, example["sample_wait"]) + sample_create.i_check_sample_name(self, example["sample_name"]) + + def test_scenario2(self): + """ + Scenario: Successfully cloning dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I clone the last dataset + And I wait until the dataset is ready less than secs + Then the new dataset is as the origin dataset + """ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait"] + examples = [ + ['data/iris.csv', '30', '30']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + source = world.source["resource"] + source_create.clone_source(self, source) + source_create.the_source_is_finished(self, example["source_wait"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset = world.dataset["resource"] + dataset_create.clone_dataset(self, dataset) + dataset_create.the_cloned_dataset_is(self, dataset) diff --git a/bigml/tests/test_17_split_dataset.py b/bigml/tests/test_17_split_dataset.py new file mode 100644 index 00000000..c570ea12 --- /dev/null +++ b/bigml/tests/test_17_split_dataset.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Splitting dataset + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create + +class TestSplitDataset: + """Test dataset split""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating a split dataset: + Given I create a data source with "" uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a dataset extracting a sample + And I wait until the dataset is ready less than secs + When I compare the datasets' instances + Then the proportion of instances between datasets is + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "rate", "source_conf"] + examples = [ + ['data/iris.csv', '10', '10', '10', '0.8', '{"category": 12}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file_with_args( + self, example["data"], example["source_conf"]) + source_create.the_source_is_finished( + self, example["source_wait"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_create.i_create_a_split_dataset(self, example["rate"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_create.i_compare_datasets_instances(self) + dataset_create.proportion_datasets_instances(self, example["rate"]) diff --git a/bigml/tests/test_18_create_anomaly.py b/bigml/tests/test_18_create_anomaly.py new file mode 100644 index 00000000..b38adfa6 --- /dev/null +++ b/bigml/tests/test_18_create_anomaly.py @@ -0,0 +1,133 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" Creating anomaly detector + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_anomaly_steps as anomaly_create +from . import create_multimodel_steps as mm_create + +class TestAnomaly: + """Test anomaly detector methods""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating an anomaly detector from a dataset and a dataset list: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + Then I create an anomaly detector from a dataset + And I wait until the anomaly detector is ready less than secs + And I check the anomaly detector stems from the original dataset + And I store the dataset id in a list + And I create a dataset + And I wait until the dataset is ready less than secs + And I store the dataset id in a list + Then I create an anomaly detector from a dataset list + And I wait until the anomaly detector is ready less than 'model_wait'> secs + And I check the anomaly detector stems from the original dataset list + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] + examples = [ + ['data/tiny_kdd.csv', '40', '40', '100']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset( + self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + anomaly_create.i_create_an_anomaly_from_dataset( + self, shared=example["data"]) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + anomaly_create.i_check_anomaly_dataset_and_datasets_ids(self) + mm_create.i_store_dataset_id(self) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + mm_create.i_store_dataset_id(self) + anomaly_create.i_create_an_anomaly_from_dataset_list(self) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"]) + anomaly_create.i_check_anomaly_datasets_and_datasets_ids(self) + + def test_scenario2(self): + """ + Scenario: Successfully creating an anomaly detector from a dataset and generating the anomalous dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + Then I create an anomaly detector of anomalies from a dataset + And I wait until the anomaly detector is ready less than secs + And I create a dataset with only the anomalies + And I wait until the dataset is ready less than secs + And I check that the dataset has rows + + Examples: + | data | time_1 | time_2 | time_3 |time_4| rows| + | ../data/iris_anomalous.csv | 40 | 40 | 80 | 40 | 1 + """ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", "rows"] + examples = [ + ['data/iris_anomalous.csv', '40', '40', '80', '1']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + anomaly_create.i_create_an_anomaly_with_top_n_from_dataset( + self, example["rows"]) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"]) + anomaly_create.create_dataset_with_anomalies(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["model_wait"]) + anomaly_create.the_dataset_has_n_rows(self, example["rows"]) diff --git a/bigml/tests/test_19_missing_and_errors.py b/bigml/tests/test_19_missing_and_errors.py new file mode 100644 index 00000000..22326c08 --- /dev/null +++ b/bigml/tests/test_19_missing_and_errors.py @@ -0,0 +1,166 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating datasets with missing values and errors counters + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import read_dataset_steps as dataset_read +from . import create_prediction_steps as prediction_create +from . import compare_predictions_steps as prediction_compare +from . import create_model_steps as model_create + +class TestMissingsAndErrors: + """Testing Missings and Errors retrieval""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully obtaining missing values counts: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + When I ask for the missing values counts in the fields + Then the missing values counts dict is "" + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "source_conf", "dataset_wait", + "missing_values"] + examples = [ + ['data/iris_missing.csv', '30', + '{"fields": {"000000": {"optype": "numeric"}}}', '30', + '{"000000": 1}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) + source_create.the_source_is_finished(self, example["source_wait"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["source_wait"]) + dataset_read.i_get_the_missing_values(self) + dataset_read.i_get_the_properties_values( + self, example["missing_values"]) + + def test_scenario2(self): + """ + Scenario: Successfully obtaining parsing error counts: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + When I ask for the error counts in the fields + Then the error counts dict is "" + """ + print(self.test_scenario2.__doc__) + headers = ["data", "source_wait", "source_conf", + "dataset_wait", "error_values"] + examples = [ + ['data/iris_missing.csv', '30', + '{"fields": {"000000": {"optype": "numeric"}}}', 30, + '{"000000": 1}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_read.i_get_the_errors_values(self) + dataset_read.i_get_the_properties_values( + self, example["error_values"]) + + def test_scenario3(self): + """ + Scenario: Successfully comparing predictions: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf", "input_data", "objective_id", "prediction"] + examples = [ + ['data/iris_missing.csv', '30', '30', '50', + '{"fields": {"000000": {"optype": "numeric"}}, ' + '"source_parser": {"missing_tokens": ["foo"]}}', + '{"sepal length": "foo", "petal length": 3}', + '000004', 'Iris-versicolor'], + ['data/iris_missing.csv', '30', '30', '50', + '{"fields": {"000000": {"optype": "numeric"}}, ' + '"source_parser": {"missing_tokens": ["foo"]}}', + '{"sepal length": "foo", "petal length": 5, ' + '"petal width": 1.5}', '000004', 'Iris-virginica']] + + show_doc(self.test_scenario3) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) + source_create.i_update_source_with( + self, example["source_conf"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + model_create.i_create_a_model(self) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_model(self) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) diff --git a/bigml/tests/test_20_rename_duplicated_names.py b/bigml/tests/test_20_rename_duplicated_names.py new file mode 100644 index 00000000..ac2def75 --- /dev/null +++ b/bigml/tests/test_20_rename_duplicated_names.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Renaming duplicated names in fields + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import compare_predictions_steps as compare_preds + +class TestDuplicatedFields: + """Test working with different fields with identical names""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully changing duplicated field names: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset with "" + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + Then "" field's name is changed to "" + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "dataset_conf", "field_id", "new_name"] + examples = [ + ['data/iris.csv', '20', '20', '30', + '{"fields": {"000001": {"name": "species"}}}', + '000001', 'species1'], + ['data/iris.csv', '20', '20', '30', + '{"fields": {"000001": {"name": "petal width"}}}', + '000003', 'petal width3']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset_with( + self, example["dataset_conf"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + model_create.i_create_a_model(self) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + compare_preds.i_create_a_local_model(self) + model_create.field_name_to_new_name( + self, example["field_id"], example["new_name"]) diff --git a/bigml/tests/test_21_projects.py b/bigml/tests/test_21_projects.py new file mode 100644 index 00000000..b58f6d0a --- /dev/null +++ b/bigml/tests/test_21_projects.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Testing projects REST api calls + +""" +from .world import world, setup_module, teardown_module +from . import create_project_steps as create +from . import delete_project_steps as delete + + +class TestProjects: + """Testing project methods""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """Creating and updating project""" + name = "my project" + new_name = "my new project" + create.i_create_project(self, name) + create.the_project_is_finished(self, 10) + create.i_check_project_name(self, name=name) + create.i_update_project_name_with(self, name=new_name) + create.i_check_project_name(self, name=new_name) + delete.i_delete_the_project(self) + delete.wait_until_project_deleted(self, 50) diff --git a/bigml/tests/test_22_source_args.py b/bigml/tests/test_22_source_args.py new file mode 100644 index 00000000..b66edc9e --- /dev/null +++ b/bigml/tests/test_22_source_args.py @@ -0,0 +1,165 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import,no-member +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Uploading source with structured args + +""" +from bigml.api_handlers.resourcehandler import get_id + +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create + + +class TestUploadSource: + """Testing source uploads""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully uploading source: + Given I create a data source uploading a "" file with args "" + And I wait until the source is ready less than secs + Then the source exists and has args "" + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "source_conf"] + examples = [ + ['data/iris.csv', '30', '{"tags": ["my tag", "my second tag"]}'], + ['data/iris.csv', '30', '{"name": "Testing unicode names: áé"}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file_with_args( + self, example["data"], example["source_conf"]) + source_create.the_source_is_finished(self, example["source_wait"]) + source_create.source_has_args(self, example["source_conf"]) + + def test_scenario2(self): + """ + Scenario: Successfully creating composite source: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a data source uploading a "" file + And I wait until the source is ready less than secs + Then I create a composite from the last two sources + And I wait until the source is ready less than secs + Then the composite exists and has the previous two sources + """ + show_doc(self.test_scenario2) + headers = ["data", "source_wait"] + examples = [ + ['data/iris.csv', '30']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + sources = [] + source_create.i_upload_a_file( + self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) + sources.append(get_id(world.source["resource"])) + source_create.i_upload_a_file( + self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) + sources.append(get_id(world.source["resource"])) + source_create.i_create_composite(self, sources) + source_create.the_source_is_finished(self, example["source_wait"]) + for source in sources: + world.sources.remove("source/%s" % source) + source_create.the_composite_contains(self, sources) + + def test_scenario3(self): + """ + Scenario: Successfully cloning source: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I clone the last source + And I wait until the source is ready less than secs + Then the new source the first one as origin + """ + show_doc(self.test_scenario3) + headers = ["data", "source_wait"] + examples = [ + ['data/iris.csv', '30']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + source = world.source["resource"] + source_create.clone_source(self, source) + source_create.the_source_is_finished( + self, example["source_wait"]) + source_create.the_cloned_source_origin_is(self, source) + + def test_scenario4(self): + """ + Scenario: Successfully adding annotatations to composite source: + Given I create an annotated images data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + Then the new dataset has annotations in the field + """ + headers = ["data", "source_wait", "dataset_wait", "annotations_num", + "annotations_field"] + examples = [ + ['data/images/metadata.json', '500', '500', '12', + '100002'], + ['data/images/metadata_compact.json', '500', '500', '3', + '100003'], + ['data/images/metadata_list.json', '500', '500', '3', + '100003']] + show_doc(self.test_scenario4) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_create_annotated_source( + self, + example["data"], + args={"image_analysis": {"enabled": False, + "extracted_features": []}}) + source_create.the_source_is_finished( + self, example["source_wait"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_create.check_annotations(self, + example["annotations_field"], + example["annotations_num"]) + diff --git a/bigml/tests/test_23_local_model_info.py b/bigml/tests/test_23_local_model_info.py new file mode 100644 index 00000000..8ee0ac97 --- /dev/null +++ b/bigml/tests/test_23_local_model_info.py @@ -0,0 +1,386 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Testing local model information output methods + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import compare_predictions_steps as prediction_compare +from . import inspect_model_steps as inspect_model + +class TestLocalModelOutputs: + """Testing local model code generators""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating a model and translate the tree model into a set of IF-THEN rules: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + And I translate the tree into IF_THEN rules + Then I check the output is like "" expected file + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "output_file"] + examples = [ + ['data/iris.csv', '30', '30', '30', + 'data/model/if_then_rules_iris.txt'], + ['data/iris_sp_chars.csv', '30', '30', '30', + 'data/model/if_then_rules_iris_sp_chars.txt'], + ['data/spam.csv', '30', '30', '30', + 'data/model/if_then_rules_spam.txt'], + ['data/grades.csv', '30', '30', '30', + 'data/model/if_then_rules_grades.txt'], + ['data/diabetes.csv', '30', '30', '30', + 'data/model/if_then_rules_diabetes.txt'], + ['data/iris_missing2.csv', '30', '30', '30', + 'data/model/if_then_rules_iris_missing2.txt'], + ['data/tiny_kdd.csv', '30', '30', '30', + 'data/model/if_then_rules_tiny_kdd.txt']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished(self, example["source_wait"], + shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than(self, + example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_model(self) + inspect_model.i_translate_the_tree_into_IF_THEN_rules(self) + inspect_model.i_check_if_the_output_is_like_expected_file( + self, example["output_file"]) + + def test_scenario2(self): + """ + Scenario: Successfully creating a model with missing values and translate the tree model into a set of IF-THEN rules: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + And I translate the tree into IF_THEN rules + Then I check the output is like "" expected file + """ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "output_file"] + examples = [ + ['data/iris_missing2.csv', '10', '10', '30', 'data/model/if_then_rules_iris_missing2_MISSINGS.txt']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset( + self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than(self, + example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model_with_missing_splits(self) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_model(self) + inspect_model.i_translate_the_tree_into_IF_THEN_rules(self) + inspect_model.i_check_if_the_output_is_like_expected_file( + self, example["output_file"]) + + def test_scenario3(self): + """ + Scenario: Successfully creating a model and translate the tree model into a set of IF-THEN rules: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with "" waiting less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + And I translate the tree into IF_THEN rules + Then I check the output is like "" expected file + """ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf", "output_file"] + examples = [ + ['data/spam.csv', '30', '30', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": true, "stem_words": true, "use_stopwords": ' + 'false, "language": "en"}}}}', + 'data/model/if_then_rules_spam_textanalysis_1.txt'], + ['data/spam.csv', '30', '30', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": true, "stem_words": true, ' + '"use_stopwords": false}}}}', + 'data/model/if_then_rules_spam_textanalysis_2.txt'], + ['data/spam.csv', '30', '30', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": false, "stem_words": false, ' + '"use_stopwords": false, "language": "en"}}}}', + 'data/model/if_then_rules_spam_textanalysis_3.txt'], + ['data/spam.csv', '30', '30', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": false, "stem_words": true, "use_stopwords": ' + 'true, "language": "en"}}}}', + 'data/model/if_then_rules_spam_textanalysis_4.txt'], + ['data/spam.csv', '30', '30', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"token_mode": "full_terms_only", "language": "en"}}}}', + 'data/model/if_then_rules_spam_textanalysis_5.txt'], + ['data/spam.csv', '30', '30', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": true, "stem_words": true, "use_stopwords": ' + 'false, "language": "en"}}}}', + 'data/model/if_then_rules_spam_textanalysis_6.txt']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than(self, + example["dataset_wait"]) + model_create.i_create_a_model(self) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_model(self) + inspect_model.i_translate_the_tree_into_IF_THEN_rules(self) + inspect_model.i_check_if_the_output_is_like_expected_file( + self, example["output_file"]) + + def test_scenario4(self): + """ + Scenario: Successfully creating a model and check its data distribution: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + And I translate the tree into IF_THEN rules + Then I check the data distribution with "" file + """ + show_doc(self.test_scenario4) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "output_file"] + examples = [ + ['data/iris.csv', '30', '30', '30', + 'data/model/data_distribution_iris.txt'], + ['data/iris_sp_chars.csv', '30', '30', '30', + 'data/model/data_distribution_iris_sp_chars.txt'], + ['data/spam.csv', '30', '30', '30', + 'data/model/data_distribution_spam.txt'], + ['data/grades.csv', '30', '30', '30', + 'data/model/data_distribution_grades.txt'], + ['data/diabetes.csv', '30', '30', '30', + 'data/model/data_distribution_diabetes.txt'], + ['data/iris_missing2.csv', '30', '30', '30', + 'data/model/data_distribution_iris_missing2.txt'], + ['data/tiny_kdd.csv', '30', '30', '30', + 'data/model/data_distribution_tiny_kdd.txt']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than(self, + example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_model(self) + inspect_model.i_check_the_data_distribution( + self, example["output_file"]) + + def test_scenario5(self): + """ + Scenario: Successfully creating a model and check its predictions distribution: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + And I translate the tree into IF_THEN rules + Then I check the predictions distribution with "" file + """ + show_doc(self.test_scenario5) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "output_file"] + examples = [ + ['data/iris.csv', '30', '30', '30', + 'data/model/predictions_distribution_iris.txt'], + ['data/iris_sp_chars.csv', '30', '30', '30', + 'data/model/predictions_distribution_iris_sp_chars.txt'], + ['data/spam.csv', '30', '30', '30', + 'data/model/predictions_distribution_spam.txt'], + ['data/grades.csv', '30', '30', '30', + 'data/model/predictions_distribution_grades.txt'], + ['data/diabetes.csv', '30', '30', '30', + 'data/model/predictions_distribution_diabetes.txt'], + ['data/iris_missing2.csv', '30', '30', '30', + 'data/model/predictions_distribution_iris_missing2.txt'], + ['data/tiny_kdd.csv', '30', '30', '30', + 'data/model/predictions_distribution_tiny_kdd.txt']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than(self, + example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_model(self) + inspect_model.i_check_the_predictions_distribution( + self, example["output_file"]) + + + def test_scenario6(self): + """ + Scenario: Successfully creating a model and check its summary information: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + And I translate the tree into IF_THEN rules + Then I check the model summary with "" file + """ + show_doc(self.test_scenario6) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "output_file"] + examples = [ + ['data/iris.csv', '30', '30', '30', + 'data/model/summarize_iris.txt'], + ['data/iris_sp_chars.csv', '30', '30', '30', + 'data/model/summarize_iris_sp_chars.txt'], + ['data/spam.csv', '30', '30', '30', + 'data/model/summarize_spam.txt'], + ['data/grades.csv', '30', '30', '30', + 'data/model/summarize_grades.txt'], + ['data/diabetes.csv', '30', '30', '30', + 'data/model/summarize_diabetes.txt'], + ['data/iris_missing2.csv', '30', '30', '30', + 'data/model/summarize_iris_missing2.txt'], + ['data/tiny_kdd.csv', '30', '30', '30', + 'data/model/summarize_tiny_kdd.txt']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset( + self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than(self, + example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_model(self) + inspect_model.i_check_the_model_summary_with( + self, example["output_file"]) + + def test_scenario7(self): + """ + Scenario: Unit tests for output generators: + Given I read a model from "" file + And I create a local model + And I create a distribution, list fields and a tree CSV + Then I check distribution with "" file + Then I check list_fields with "" file + Then I check tree CSV with "" file + """ + + show_doc(self.test_scenario7) + headers = ["data", "distribution", "list_fields", "tree_csv"] + examples = [ + ['data/model/iris.json', + 'data/model/distribution_iris.txt', + 'data/model/list_fields.txt', + 'data/model/tree_csv.txt'], + ['data/model/regression.json', + 'data/model/rdistribution_iris.txt', + 'data/model/rlist_fields.txt', + 'data/model/rtree_csv.txt'], + ['data/model/w_iris.json', + 'data/model/wdistribution_iris.txt', + 'data/model/wlist_fields.txt', + 'data/model/wtree_csv.txt'], + ['data/model/w_regression.json', + 'data/model/wrdistribution_iris.txt', + 'data/model/wrlist_fields.txt', + 'data/model/wrtree_csv.txt']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + world.debug=True + model_create.i_read_model_file(self, example["data"]) + prediction_compare.i_create_a_local_model(self) + inspect_model.i_check_print_distribution( + self, example["distribution"]) + inspect_model.i_list_fields(self, example["list_fields"]) + inspect_model.i_create_tree_csv(self, example["tree_csv"]) diff --git a/bigml/tests/test_24_cluster_derived.py b/bigml/tests/test_24_cluster_derived.py new file mode 100644 index 00000000..5e565463 --- /dev/null +++ b/bigml/tests/test_24_cluster_derived.py @@ -0,0 +1,207 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating datasets and models associated to a cluster + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_cluster_steps as cluster_create +from . import compare_predictions_steps as prediction_compare + +class TestClusterDerived: + """Testing resources derived from clusters""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating datasets for first centroid of a cluster: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster + And I wait until the cluster is ready less than secs + When I create a dataset associated to centroid "" + And I wait until the dataset is ready less than secs + Then the dataset is associated to the centroid "" of the cluster + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "centroid_id"] + examples = [ + ['data/iris.csv', '10', '10', '40', '000001']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + cluster_create.i_create_a_cluster(self, shared=example["data"]) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset_from_cluster( + self, example["centroid_id"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_create.is_associated_to_centroid_id( + self, example["centroid_id"]) + + def test_scenario2(self): + """ + Scenario: Successfully creating models for first centroid of a cluster: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster with options "" + And I wait until the cluster is ready less than secs + When I create a model associated to centroid "" + And I wait until the model is ready less than secs + Then the model is associated to the centroid "" of the cluster + """ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "centroid_id", "model_conf"] + examples = [ + ['data/iris.csv', '10', '10', '40', '000001', + '{"model_clusters": true}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + cluster_create.i_create_a_cluster_with_options( + self, example["model_conf"]) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_from_cluster( + self, example["centroid_id"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.is_associated_to_centroid_id( + self, example["centroid_id"]) + + def test_scenario3(self): + """ + Scenario: Successfully getting the closest point in a cluster: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster + And I wait until the cluster is ready less than secs + And I create a local cluster + Then the data point in the cluster closest to "" is "" + """ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "reference", "closest"] + examples = [ + ['data/iris.csv', '10', '10', '40', + '{"petal length": 1.4, "petal width": 0.2,' + ' "sepal width": 3.0, "sepal length": 4.89,' + ' "species": "Iris-setosa"}', + '{"distance": 0.001894153207990619, "data":' + ' {"petal length": "1.4", "petal width": "0.2",' + ' "sepal width": "3.0", "sepal length": "4.9",' + ' "species": "Iris-setosa"}}'], + ['data/spam_4w.csv', '10', '10', '40', + '{"Message": "mobile"}', + '{"distance": 0.0, "data":' + ' {"Message": "mobile", "Type": "spam"}}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + cluster_create.i_create_a_cluster(self, shared=example["data"]) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_cluster(self) + cluster_create.closest_in_cluster( + self, example["reference"], example["closest"]) + + + def test_scenario4(self): + """ + Scenario: Successfully getting the closest centroid in a cluster: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster + And I wait until the cluster is ready less than secs + And I create a local cluster + Then the centroid in the cluster closest to "" is "" + """ + show_doc(self.test_scenario4) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "reference", "closest"] + examples = [ + ['data/spam_4w.csv', '10', '10', '40', + '{"Message": "free"}', + '000005']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + cluster_create.i_create_a_cluster(self, shared=example["data"]) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_cluster(self) + cluster_create.closest_centroid_in_cluster( + self, example["reference"], example["closest"]) diff --git a/bigml/tests/test_25_correlation.py b/bigml/tests/test_25_correlation.py new file mode 100644 index 00000000..27f4c029 --- /dev/null +++ b/bigml/tests/test_25_correlation.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating correlation + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_correlation_steps as correlation_create + +class TestCorrelation: + """Test Correlation methods""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating a correlation from a dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a correlation from a dataset + And I wait until the correlation is ready less than secs + And I update the correlation name to "" + When I wait until the correlation is ready less than secs + Then the correlation name is "" + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "correlation_name"] + examples = [ + ['data/iris.csv', '10', '10', '20', 'my new correlation name']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + correlation_create.i_create_a_correlation_from_dataset(self) + correlation_create.the_correlation_is_finished_in_less_than( + self, example["model_wait"]) + correlation_create.i_update_correlation_name( + self, example["correlation_name"]) + correlation_create.the_correlation_is_finished_in_less_than( + self, example["model_wait"]) + correlation_create.i_check_correlation_name( + self, example["correlation_name"]) diff --git a/bigml/tests/test_26_statistical_test.py b/bigml/tests/test_26_statistical_test.py new file mode 100644 index 00000000..b09ebd48 --- /dev/null +++ b/bigml/tests/test_26_statistical_test.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating test + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_statistical_tst_steps as statistical_tst_create + +class TestStatisticalTest: + """Test Statistica Test methods""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating an statistical test from a dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an statistical test from a dataset + And I wait until the statistical test is ready less than secs + And I update the statistical test name to "" + When I wait until the statistical test is ready less than secs + Then the statistical test name is "" + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "test_name"] + examples = [ + ['data/iris.csv', '10', '10', '20', '20', + 'my new statistical test name']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + statistical_tst_create.i_create_a_tst_from_dataset(self) + statistical_tst_create.the_tst_is_finished_in_less_than( + self, example["model_wait"]) + statistical_tst_create.i_update_tst_name( + self, example["test_name"]) + statistical_tst_create.the_tst_is_finished_in_less_than( + self, example["model_wait"]) + statistical_tst_create.i_check_tst_name( + self, example["test_name"]) diff --git a/bigml/tests/test_27_fields.py b/bigml/tests/test_27_fields.py new file mode 100644 index 00000000..bd461f04 --- /dev/null +++ b/bigml/tests/test_27_fields.py @@ -0,0 +1,147 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Testing Fields object properties + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import fields_steps +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create + + +class TestFields: + """Tests Fields class methods """ + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating a Fields object: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a Fields object from the source with objective column "" + Then the object id is "" + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "objective_column", "objective_id"] + examples = [ + ['data/iris.csv', '10', '0', '000000']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + fields_steps.create_fields(self, example["objective_column"]) + fields_steps.check_objective(self, example["objective_id"]) + + def test_scenario2(self): + """ + Scenario: Successfully creating a Fields object and a summary fields file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a Fields object from the dataset with objective column "" + And I export a summary fields file "" + Then I check that the file "" is like "" + """ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "objective_column", + "summary_file", "expected_file"] + examples = [ + ['data/iris.csv', '10', '10', '0', 'fields_summary.csv', + 'data/fields/fields_summary.csv']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + fields_steps.create_fields_from_dataset( + self, example["objective_column"]) + fields_steps.generate_summary(self, example["summary_file"]) + fields_steps.check_summary_like_expected( + self, example["summary_file"], example["expected_file"]) + + def test_scenario3(self): + """ + Scenario: Successfully creating a Fields object and a modified fields structure from a file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a Fields object from the dataset with objective column "" + And I import a summary fields file "" as a fields structure + And I clone the source to open it + And I update the source with the file "" + And I update the dataset with the file "" + Then I check the new field structure has field "" as "" + And I check the source has field "" as "" + """ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "objective_column", + "summary_file", "field_id", "optype"] + examples = [ + ['data/iris.csv', '10', '10', '0', + 'data/fields/fields_summary_modified.csv', '000000', + 'categorical']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + fields_steps.create_fields_from_dataset( + self, example["objective_column"]) + source_create.clone_source(self, world.source["resource"]) + source_create.the_source_is_finished(self, example["source_wait"]) + fields_steps.import_summary_file(self, example["summary_file"]) + fields_steps.update_with_summary_file( + self, world.source, example["summary_file"]) + fields_steps.update_with_summary_file( + self, world.dataset, example["summary_file"]) + fields_steps.check_field_type( + self, example["field_id"], example["optype"]) + fields_steps.check_resource_field_type( + self, world.source, example["field_id"], example["optype"]) diff --git a/bigml/tests/test_28_association.py b/bigml/tests/test_28_association.py new file mode 100644 index 00000000..7e5bec63 --- /dev/null +++ b/bigml/tests/test_28_association.py @@ -0,0 +1,166 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating association + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_association_steps as association_create + +class TestAssociation: + """Test for associations""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating associations from a dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create associations from a dataset + And I wait until the association is ready less than secs + And I update the association name to "" + When I wait until the association is ready less than secs + Then the association name is "" + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "association_name"] + examples = [ + ['data/iris.csv', '10', '10', '50', 'my new association name']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + association_create.i_create_an_association_from_dataset(self) + association_create.the_association_is_finished_in_less_than( + self, example["model_wait"]) + association_create.i_update_association_name( + self, example["association_name"]) + association_create.the_association_is_finished_in_less_than( + self, example["model_wait"]) + association_create.i_check_association_name( + self, example["association_name"]) + + def test_scenario2(self): + """ + Scenario: Successfully creating local association object: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an association from a dataset + And I wait until the association is ready less than secs + And I create a local association + When I get the rules for <"item_list"> + Then the first rule is "" + """ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "item_list", "JSON_rule"] + examples = [ + ['data/tiny_mushrooms.csv', '10', '20', '50', ["Edible"], + {'p_value': 5.26971e-31, 'confidence': 1, + 'rhs_cover': [0.488, 122], 'leverage': 0.24986, + 'rhs': [19], 'rule_id': '000002', 'lift': 2.04918, + 'lhs': [0, 21, 16, 7], 'lhs_cover': [0.488, 122], + 'support': [0.488, 122]}]] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + association_create.i_create_an_association_from_dataset(self) + association_create.the_association_is_finished_in_less_than( + self, example["model_wait"]) + association_create.i_create_a_local_association(self) + association_create.i_get_rules_for_item_list( + self, example["item_list"]) + association_create.the_first_rule_is( + self, example["JSON_rule"]) + + def test_scenario3(self): + """ + Scenario: Successfully creating local association object: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an association with search strategy "" from a dataset + And I wait until the association is ready less than secs + And I create a local association + When I get the rules for <"item_list"> + Then the first rule is "" + """ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "item_list", "JSON_rule", "strategy"] + examples = [ + ['data/tiny_mushrooms.csv', '10', '20', '50', ["Edible"], + {'p_value': 2.08358e-17, 'confidence': 0.79279, + 'rhs_cover': [0.704, 176], 'leverage': 0.07885, + 'rhs': [11], 'rule_id': '000007', 'lift': 1.12613, + 'lhs': [0], 'lhs_cover': [0.888, 222], + 'support': [0.704, 176]}, 'lhs_cover']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + association_create.i_create_an_association_with_strategy_from_dataset( + self, example["strategy"]) + association_create.the_association_is_finished_in_less_than( + self, example["model_wait"]) + association_create.i_create_a_local_association(self) + association_create.i_get_rules_for_item_list( + self, example["item_list"]) + association_create.the_first_rule_is(self, example["JSON_rule"]) diff --git a/bigml/tests/test_29_script.py b/bigml/tests/test_29_script.py new file mode 100644 index 00000000..eb5bc752 --- /dev/null +++ b/bigml/tests/test_29_script.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating and updating scripts + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_script_steps as script_create + +class TestScript: + """Testint script methods""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating a whizzml script: + Given I create a whizzml script from a excerpt of code "" + And I wait until the script is ready less than secs + And I update the script with "", "" + And I wait until the script is ready less than secs + Then the script code is "" and the value of "" is "" + """ + show_doc(self.test_scenario1) + headers = ["source_code", "script_wait", "param", "param_value"] + examples = [ + ['(+ 1 1)', '30', 'name', 'my script']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + script_create.i_create_a_script(self, example["source_code"]) + script_create.the_script_is_finished(self, example["script_wait"]) + script_create.i_update_a_script( + self, example["param"], example["param_value"]) + script_create.the_script_is_finished(self, example["script_wait"]) + script_create.the_script_code_and_attributes( + self, example["source_code"], + example["param"], + example["param_value"]) diff --git a/bigml/tests/test_30_execution.py b/bigml/tests/test_30_execution.py new file mode 100644 index 00000000..e1864d5c --- /dev/null +++ b/bigml/tests/test_30_execution.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating and updating scripts + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_script_steps as script_create +from . import create_execution_steps as execution_create + +class TestExecution: + """Testing local executions""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating a whizzml script execution: + Given I create a whizzml script from a excerpt of code "" + And I wait until the script is ready less than secs + And I create a whizzml script execution from an existing script + And I wait until the execution is ready less than secs + And I update the execution with "", "" + And I wait until the execution is ready less than secs + And I create a local execution + Then the script id is correct, the value of "" is "" and the result is "" + And the local execution result is "" + """ + show_doc(self.test_scenario1) + headers = ["source_code", "script_wait", "execution_wait", "param", + "param_value", "result"] + examples = [ + ['(+ 1 1)', '30', '30', 'name', 'my execution', 2]] + + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + script_create.i_create_a_script(self, example["source_code"]) + script_create.the_script_is_finished(self, example["script_wait"]) + execution_create.i_create_an_execution(self) + execution_create.the_execution_is_finished( + self, example["execution_wait"]) + execution_create.i_update_an_execution( + self, example["param"], example["param_value"]) + execution_create.the_execution_is_finished( + self, example["execution_wait"]) + execution_create.create_local_execution(self) + execution_create.the_execution_and_attributes( + self, example["param"], example["param_value"], + example["result"]) + execution_create.the_local_execution_result_is( + self, example["result"]) + + def test_scenario2(self): + """ + Scenario: Successfully creating a whizzml script execution from a list of scripts: + Given I create a whizzml script from a excerpt of code "" + And I wait until the script is ready less than secs + And I create a whizzml script from a excerpt of code "" + And I wait until the script is ready less than secs + And I create a whizzml script execution from the last two scripts + And I wait until the execution is ready less than secs + And I update the execution with "", "" + And I wait until the execution is ready less than secs + Then the script ids are correct, the value of "" is "" and the result is "" + """ + show_doc(self.test_scenario2) + headers = ["source_code", "script_wait", "execution_wait", "param", + "param_value", "result"] + examples = [ + ['(+ 1 1)', '100', '100', 'name', 'my execution', [2, 2]]] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + script_create.i_create_a_script(self, example["source_code"]) + script_create.the_script_is_finished(self, example["script_wait"]) + script_create.i_create_a_script(self, example["source_code"]) + script_create.the_script_is_finished(self, example["script_wait"]) + execution_create.i_create_an_execution_from_list( + self, number_of_scripts=2) + execution_create.the_execution_is_finished( + self, example["execution_wait"]) + execution_create.i_update_an_execution( + self, example["param"], example["param_value"]) + execution_create.the_execution_is_finished( + self, example["execution_wait"]) + execution_create.the_execution_ids_and_attributes( + self, 2, example["param"], example["param_value"], + example["result"]) + + def test_scenario3(self): + """ + Scenario: Successfully creating a whizzml script execution from a local or remote file: + Given I create a whizzml script from a excerpt of code "" + And I wait until the script is ready less than secs + And I create a whizzml script from a excerpt of code "" + And I wait until the script is ready less than secs + And I create a whizzml script execution from the last two scripts + And I wait until the execution is ready less than secs + And I update the execution with "", "" + And I wait until the execution is ready less than secs + Then the script ids are correct, the value of "" is "" and the result is "" + """ + show_doc(self.test_scenario2) + headers = ["source_code", "script_wait", "execution_wait", "param", + "param_value", "result"] + examples = [ + ['data/one_plus_one.whizzml', '50', '50', 'name', + 'my execution', 2], + ['https://gist.github.com/mmerce/49e0a69cab117b6a11fb490140326020', + '30', '30', 'name', 'my execution', 2]] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + script_create.i_create_a_script_from_file_or_url( + self, example["source_code"]) + script_create.the_script_is_finished( + self, example["script_wait"]) + execution_create.i_create_an_execution(self) + execution_create.the_execution_is_finished( + self, example["execution_wait"]) + execution_create.i_update_an_execution( + self, example["param"], example["param_value"]) + execution_create.the_execution_is_finished( + self, example["execution_wait"]) + execution_create.the_execution_and_attributes( + self, example["param"], example["param_value"], + example["result"]) diff --git a/bigml/tests/test_31_library.py b/bigml/tests/test_31_library.py new file mode 100644 index 00000000..9de406c8 --- /dev/null +++ b/bigml/tests/test_31_library.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating and updating scripts + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_library_steps as library_create + +class TestLibrary: + """Testing Library methods""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating a whizzml library: + Given I create a whizzml library from a excerpt of code "" + And I wait until the library is ready less than secs + And I update the library with "", "" + And I wait until the library is ready less than secs + Then the library code is "" and the value of "" is "" + """ + show_doc(self.test_scenario1) + headers = ["source_code", "library_wait", "param", "param_value"] + examples = [ + ['(define (mu x) (+ x 1))', '10', 'name', 'my library']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + library_create.i_create_a_library(self, example["source_code"]) + library_create.the_library_is_finished( + self, example["library_wait"]) + library_create.i_update_a_library( + self, example["param"], example["param_value"]) + library_create.the_library_is_finished( + self, example["library_wait"]) + library_create.the_library_code_and_attributes( + self, example["source_code"], example["param"], + example["param_value"]) diff --git a/bigml/tests/test_32_topic_model_prediction.py b/bigml/tests/test_32_topic_model_prediction.py new file mode 100644 index 00000000..fd26e407 --- /dev/null +++ b/bigml/tests/test_32_topic_model_prediction.py @@ -0,0 +1,168 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2016-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating a local Topic distribution from Topic Model + +""" +import sys + +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_lda_steps as topic_create +from . import compute_lda_prediction_steps as lda_predict + + +# This model is from the bigmlcom/streaming-lda; the associated test is +# for near-exact equivalence with that library (with special attention +# to random number generation). +DUMMY_MODEL = { + "input_fields": ["000001"], + "topic_model": { + "alpha": 0.08, + "beta": 0.1, + "hashed_seed": 0, + "language": "en", + "bigrams": True, + "case_sensitive": False, + "term_topic_assignments": [[0, 0, 1, 2], + [0, 1, 2, 0], + [1, 2, 0, 0], + [0, 0, 2, 0]], + "termset": ["cycling", "playing", "shouldn't", "uńąnimous court"], + "options": {}, + "topics": [{"name": "Topic 1", + "id": "000000", + "top_terms": ["a", "b"], + "probability": 0.1}, + {"name": "Topic 2", + "id": "000001", + "top_terms": ["c", "d"], + "probability": 0.1}, + {"name": "Topic 3", + "id": "000000", + "top_terms": ["e", "f"], + "probability": 0.1}, + {"name": "Topic 4", + "id": "000000", + "top_terms": ["g", "h"], + "probability": 0.1}], + "fields": { + "000001": { + "datatype": "string", + "name": "TEST TEXT", + "optype": "text", + "order": 0, + "preferred": True, + "summary": {}, + "term_analysis": {} + } + } + }, + "resource": "topicmodel/aaaaaabbbbbbccccccdddddd" +} + + +class TestTopicModel: + """Test Topic Model Predictions""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario 1: Successfully creating a local Topic Distribution + Given I have a block of text and an LDA model + And I use the model to predict the topic distribution + Then the value of the distribution matches the expected distribution + """ + show_doc(self.test_scenario1) + headers = ["model", "text", "expected_distribution"] + examples = [ + # This example is a replication of a test in bigmlcom/streaming-lda + [ + DUMMY_MODEL, + {"TEST TEXT": "uńąnimous court 'UŃĄNIMOUS COURT' " + "`play``the plays PLAYing SHOULDN'T CYCLE " + "cycling shouldn't uńąnimous or court's"}, + [ + {"name": 'Topic 1', "probability": 0.1647366}, + {"name": 'Topic 2', "probability": 0.1885310}, + {"name": 'Topic 3', "probability": 0.4879441}, + {"name": 'Topic 4', "probability": 0.1587880}] + + ] + ] + + for ex in examples: + ex = dict(zip(headers, ex)) + show_method(self, self.bigml["method"], ex) + lda_predict.i_make_a_prediction( + self, ex["model"], ex["text"], ex["expected_distribution"]) + + def test_scenario2(self): + """ + Scenario 2: Successfully creating Topic Model from a dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create topic model from a dataset + And I wait until the topic model is ready less than secs + And I update the topic model name to "" + When I wait until the topic_model is ready less than secs + Then the topic model name is "" + """ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "topic_model_name", "source_conf"] + examples = [ + ['data/spam.csv', '100', '100', '100', 'my new topic model name', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + topic_create.i_create_a_topic_model(self) + topic_create.the_topic_model_is_finished_in_less_than( + self, example["model_wait"]) + topic_create.i_update_topic_model_name( + self, example["topic_model_name"]) + topic_create.the_topic_model_is_finished_in_less_than( + self, example["model_wait"]) + topic_create.i_check_topic_model_name( + self, example["topic_model_name"]) diff --git a/bigml/tests/test_33_compare_predictions.py b/bigml/tests/test_33_compare_predictions.py new file mode 100644 index 00000000..cf322c36 --- /dev/null +++ b/bigml/tests/test_33_compare_predictions.py @@ -0,0 +1,906 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Comparing remote and local predictions + +""" +import json + +from .world import world, setup_module, teardown_module, show_doc, \ + show_method, res_filename +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_ensemble_steps as ensemble_create +from . import create_linear_steps as linear_create +from . import create_association_steps as association_create +from . import create_cluster_steps as cluster_create +from . import create_anomaly_steps as anomaly_create +from . import create_prediction_steps as prediction_create +from . import compare_predictions_steps as prediction_compare +from . import create_lda_steps as topic_create + + + +class TestComparePrediction: + """Test local and remote predictions""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully comparing centroids with or without text options: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster + And I wait until the cluster is ready less than secs + And I create a local cluster + When I create a centroid for "" + Then the centroid is "" with distance "" + And I create a local centroid for "" + Then the local centroid is "" with distance "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf", "input_data", "centroid", "distance"] + examples = [ + ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "Mobile call"}', 'Cluster 0', '0.25'], + ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false}}}}', '{"Type": "ham", "Message": "A normal message"}', 'Cluster 0', '0.5'], + ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "Mobile calls"}', 'Cluster 0', '0.5'], + ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": false, "use_stopwords": false, "language": "en"}}}}', '{"Type": "ham", "Message": "A normal message"}', 'Cluster 0', '0.5'], + ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Type": "ham", "Message": "Mobile call"}', 'Cluster 1', '0.34189'], + ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": false, "stem_words": true, "use_stopwords": true, "language": "en"}}}}', '{"Type": "ham", "Message": "A normal message"}', 'Cluster 0', '0.5'], + ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Type": "ham", "Message": "FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+"}', 'Cluster 0', '0.5'], + ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"token_mode": "full_terms_only", "language": "en"}}}}', '{"Type": "ham", "Message": "Ok"}', 'Cluster 0', '0.478833312167'], + ['data/spam.csv', '20', '20', '30', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}', '{"Type": "", "Message": ""}', 'Cluster 6', '0.5'], + ['data/diabetes.csv', '20', '20', '30', '{"fields": {}}', '{"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": "true"}', 'Cluster 3', '0.5033378686559257'], + ['data/diabetes.csv', '20', '20', '30', '{"fields": {}}', '{"pregnancies": 0, "plasma glucose": 118, "blood pressure": 84, "triceps skin thickness": 47, "insulin": 230, "bmi": 45.8, "diabetes pedigree": 0.551, "age": 31, "diabetes": true}', 'Cluster 3', '0.5033378686559257'], + ['data/iris_sp_chars.csv', '20', '20', '30', '{"fields": {}}', '{"pétal.length":1, "pétal&width\\u0000": 2, "sépal.length":1, "sépal&width": 2, "spécies": "Iris-setosa"}', 'Cluster 7', '0.8752380218327035'], + ['data/movies.csv', '20', '20', '30', '{"fields": {"000007": {"optype": "items", "item_analysis": {"separator": "$"}}}}', '{"gender": "Female", "age_range": "18-24", "genres": "Adventure$Action", "timestamp": 993906291, "occupation": "K-12 student", "zipcode": 59583, "rating": 3}', 'Cluster 3', '0.62852']] + show_doc(self.test_scenario1) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + cluster_create.i_create_a_cluster(self) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_cluster(self) + prediction_create.i_create_a_centroid( + self, example["input_data"]) + prediction_create.the_centroid_is_with_distance( + self, example["centroid"], example["distance"]) + prediction_compare.i_create_a_local_centroid( + self, example["input_data"]) + prediction_compare.the_local_centroid_is( + self, example["centroid"], example["distance"]) + + def test_scenario2(self): + """ + Scenario: Successfully comparing centroids with configuration options: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster with options "" + And I wait until the cluster is ready less than secs + And I create a local cluster + When I create a centroid for "" + Then the centroid is "" with distance "" + And I create a local centroid for "" + Then the local centroid is "" with distance "" + And I create a local bigml model prediction for "" + Then the local centroid is "" with distance "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "model_conf", "input_data_l", "centroid", "distance", + "input_data"] + examples = [ + ['data/iris.csv', '30', '30', '30', + '{"summary_fields": ["sepal width"]}', + '{"petal length": 1, "petal width": 1, "sepal length": 1, ' + '"species": "Iris-setosa"}', 'Cluster 2', '1.16436', + '{"petal length": 1, "petal width": 1, "sepal length": 1, ' + '"species": "Iris-setosa"}'], + ['data/iris.csv', '20', '20', '30', + '{"default_numeric_value": "zero"}', + '{"petal length": 1}', 'Cluster 4', '1.41215', + '{"petal length": 1, "petal width": 0, "sepal length": 0, ' + '"sepal width": 0, "species": ""}']] + show_doc(self.test_scenario2) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + cluster_create.i_create_a_cluster_with_options( + self, example["model_conf"]) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_cluster(self) + prediction_create.i_create_a_centroid( + self, example["input_data"]) + prediction_create.the_centroid_is_with_distance( + self, example["centroid"], example["distance"]) + prediction_compare.i_create_a_local_centroid( + self, example["input_data_l"]) + prediction_compare.the_local_centroid_is( + self, example["centroid"], example["distance"]) + prediction_compare.i_create_a_local_bigml_model(self, + model_type="cluster") + prediction_compare.i_create_a_local_bigml_model_prediction( + self, example["input_data_l"], prediction_type="centroid") + prediction_compare.the_local_centroid_is( + self, example["centroid"], example["distance"]) + + + def test_scenario3(self): + """ + Scenario: Successfully comparing scores from anomaly detectors: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an anomaly detector with params "" + And I wait until the anomaly detector is ready less than secs + And I create a local anomaly detector + When I create an anomaly score for "" + Then the anomaly score is "" + And I create a local anomaly score for "" + Then the local anomaly score is "" + And I create a local bigml model prediction for "" + Then the local anomaly score is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "score", "model_conf"] + examples = [ + ['data/tiny_kdd.csv', '30', '30', '80', + '{"000020": 255.0, "000004": 183.0, "000016": 4.0, ' + '"000024": 0.04, "000025": 0.01, "000026": 0.0, "000019": 0.25, ' + '"000017": 4.0, "000018": 0.25, "00001e": 0.0, "000005": 8654.0, ' + '"000009": "0", "000023": 0.01, "00001f": 123.0}', '0.69802', + '{}'], + ['data/repeat_iris.csv', '30', '30', '80', + '{"sepal width":3.5, "petal width": 0.2, "sepal length": 5.1, ' + '"petal length": 1.4, "species": "Iris-setosa"}', '0.50', + '{"normalize_repeats": false}'], + ['data/repeat_iris.csv', '30', '30', '80', + '{"sepal width":3.5, "petal width": 0.2, "sepal length": 5.1, ' + '"petal length": 1.4, "species": "Iris-setosa"}', '0.36692', + '{"normalize_repeats": true}'], + ['data/repeat_iris.csv', '30', '30', '80', + '{"sepal width":3.2, "petal width": 1.5, "sepal length": 6.4, ' + '"petal length": 4.5, "species": "Iris-versicolor"}', '0.76131', + '{"normalize_repeats": true}']] + show_doc(self.test_scenario3) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + anomaly_create.i_create_an_anomaly_with_params( + self, example["model_conf"]) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_anomaly(self) + prediction_create.i_create_an_anomaly_score( + self, example["input_data"]) + prediction_create.the_anomaly_score_is(self, example["score"]) + prediction_compare.i_create_a_local_anomaly_score( + self, example["input_data"]) + prediction_compare.the_local_anomaly_score_is( + self, example["score"]) + prediction_compare.i_create_a_local_bigml_model(self, + model_type="anomaly") + prediction_compare.i_create_a_local_bigml_model_prediction( + self, example["input_data"], prediction_type="anomaly_score") + prediction_compare.the_local_bigml_prediction_is( + self, float(example["score"]), prediction_type="anomaly_score", + key="score", precision=4) + + def test_scenario4(self): + """ + Scenario: Successfully comparing topic distributions: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a topic model + And I wait until the topic model is ready less than secs + And I create a local topic model + When I create a topic distribution for "" + Then the topic distribution is "" + And I create a local topic distribution for "" + Then the local topic distribution is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf", "input_data", "topic_distribution"] + examples = [ + ['data/spam.csv', '30', '30', '80', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": true, "stem_words": true, ' + '"use_stopwords": false, "language": "en"}}}}', + '{"Type": "ham", "Message": "Mobile call"}', + '[0.51133, 0.00388, 0.00574, 0.00388, 0.00388, 0.00388, ' + '0.00388, 0.00388, 0.00388, 0.00388, 0.00388, 0.44801]'], + ['data/spam.csv', '30', '30', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"case_sensitive": true, "stem_words": true, ' + '"use_stopwords": false, "language": "en"}}}}', + '{"Type": "ham", "Message": "Go until jurong point, crazy.. ' + 'Available only in bugis n great world la e buffet... Cine ' + 'there got amore wat..."}', + '[0.39188, 0.00643, 0.00264, 0.00643, 0.08112, 0.00264, ' + '0.37352, 0.0115, 0.00707, 0.00327, 0.00264, 0.11086]']] + show_doc(self.test_scenario4) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + topic_create.i_create_a_topic_model(self) + topic_create.the_topic_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_topic_model(self) + topic_create.i_create_a_topic_distribution( + self, example["input_data"]) + prediction_compare.the_topic_distribution_is( + self, example["topic_distribution"]) + topic_create.i_create_a_local_topic_distribution( + self, example["input_data"]) + prediction_compare.the_local_topic_distribution_is( + self, example["topic_distribution"]) + prediction_compare.i_create_a_local_bigml_model(self, + model_type="topic_model") + prediction_compare.i_create_a_local_bigml_model_prediction( + self, example["input_data"], + prediction_type="topic_distribution") + ref_distribution = dict( + zip([t["name"] for t in self.bigml["local_model"].topics], + json.loads(example["topic_distribution"]))) + prediction_compare.the_local_bigml_prediction_is( + self, ref_distribution, prediction_type="topic_distribution", + precision=4) + + + def test_scenario5(self): + """ + Scenario: Successfully comparing association sets: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the association is ready less than secs + And I create a local association + When I create an association set for "" + Then the association set is like the contents of "" + And I create a local association set for "" + Then the local association set is like the contents of "" + And I create a local bigml model prediction for "" + Then the local bigml model prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf", "association_set_file", "input_data"] + examples = [ + ['data/groceries.csv', '20', '20', '50', '{"fields": {"00000": {"optype": "text", "term_analysis": {"token_mode": "all", "language": "en"}}}}', 'data/associations/association_set.json', '{"field1": "cat food"}']] + show_doc(self.test_scenario5) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + association_create.i_create_an_association_from_dataset(self) + association_create.the_association_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_association(self) + prediction_create.i_create_an_association_set( + self, example["input_data"]) + prediction_compare.the_association_set_is_like_file( + self, example["association_set_file"]) + prediction_compare.i_create_a_local_association_set( + self, example["input_data"]) + prediction_compare.the_local_association_set_is_like_file( + self, example["association_set_file"]) + prediction_compare.i_create_a_local_bigml_model(self, + model_type="association") + prediction_compare.i_create_a_local_bigml_model_prediction( + self, example["input_data"], prediction_type="rules") + with open(res_filename(example["association_set_file"])) as handler: + rules = {"rules": json.load(handler)} + prediction_compare.the_local_bigml_prediction_is( + self, rules, prediction_type="rules", precision=4) + + + def test_scenario6(self): + """ + Scenario: Successfully comparing predictions for ensembles: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an ensemble with "" + And I wait until the ensemble is ready less than secs + And I create a local ensemble + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "model_conf"] + examples = [ + ['data/iris_unbalanced.csv', '30', '30', '120', + '{"petal width": 4}', '000004', 'Iris-virginica', + '{"boosting": {"iterations": 5}, "number_of_models": 5}'], + ['data/grades.csv', '30', '30', '120', '{"Midterm": 20}', + '000005', 61.61036, + '{"boosting": {"iterations": 5}, "number_of_models": 5}']] + show_doc(self.test_scenario6) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_create.i_create_an_ensemble_with_params( + self, example["model_conf"]) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"]) + ensemble_create.create_local_ensemble(self) + prediction_create.i_create_an_ensemble_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_ensemble_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario7(self): + """ + Scenario: Successfully comparing predictions for ensembles with proportional missing strategy: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an esemble with "" + And I wait until the ensemble is ready less than secs + And I create a local ensemble + When I create a proportional missing strategy prediction for "" with <"operating"> + Then the prediction for "" is "" + And the confidence for the prediction is "" + And I create a proportional missing strategy local prediction for "" with <"operating"> + Then the local prediction is "" + And the local prediction's confidence is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "confidence", + "model_conf", "operating"] + examples = [ + ['data/iris.csv', '30', '30', '80', '{}', '000004', 'Iris-virginica', '0.33784', '{"boosting": {"iterations": 5}}', {}], + ['data/iris.csv', '30', '30', '80', '{}', '000004', 'Iris-versicolor', '0.27261', '{"number_of_models": 5"}', {"operating_kind": "confidence"}], + ['data/grades.csv', '30', '30', '50', '{}', '000005', '70.505792', '30.7161', '{"number_of_models": 5}', {}]] + + show_doc(self.test_scenario7) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_create.i_create_an_ensemble_with_params( + self, example["model_conf"]) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"]) + ensemble_create.create_local_ensemble(self) + prediction_create.i_create_an_ensemble_proportional_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_create.the_confidence_is(self, example["confidence"]) + prediction_create.create_local_ensemble_proportional_prediction_with_confidence( + self, example["input_data"], example["operating"]) + prediction_compare.the_local_ensemble_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) + + def test_scenario7b(self): + """ + Scenario: Successfully comparing predictions for ensembles with proportional missing strategy: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an esemble with "" + And I wait until the ensemble is ready less than secs + And I create a local ensemble + When I create a proportional missing strategy prediction for "" with <"operating"> + Then the prediction for "" is "" + And the confidence for the prediction is "" + And I create a proportional missing strategy local prediction for "" with <"operating"> + Then the local prediction is "" + And the local prediction's confidence is "" + + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "confidence", + "model_conf", "operating"] + examples = [ + ['data/grades.csv', '30', '30', '80', + '{"Midterm": 20}', '000005', '54.82214', '25.89672', + '{"number_of_models": 5}', {"operating_kind": "confidence"}], + ['data/grades.csv', '30', '30', '80', '{"Midterm": 20}', + '000005', '45.4573', '29.58403', '{"number_of_models": 5}', {}], + ['data/grades.csv', '30', '30', '80', + '{"Midterm": 20, "Tutorial": 90, "TakeHome": 100}', '000005', + '42.814', '31.51804', '{"number_of_models": 5}', {}]] + show_doc(self.test_scenario7b) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + ensemble_create.i_create_an_ensemble_with_params( + self, example["model_conf"]) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"]) + ensemble_create.create_local_ensemble(self) + prediction_create.i_create_an_ensemble_proportional_prediction( + self, example["input_data"], example["operating"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_create.the_confidence_is(self, example["confidence"]) + prediction_create.create_local_ensemble_proportional_prediction_with_confidence( + self, example["input_data"], example["operating"]) + prediction_compare.the_local_ensemble_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) + + def test_scenario8(self): + """ + Scenario: Successfully comparing predictions for ensembles: + Given I create a local ensemble predictor from "" + And I create a local prediction for "" + Then the local prediction is "" + """ + headers = ["directory", "input_data", "prediction"] + examples = [ + ['bigml/tests/my_ensemble', '{"petal width": 4}', 68.1258030739]] + show_doc(self.test_scenario8) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + ensemble_create.create_local_ensemble_predictor( + self, example["directory"]) + prediction_compare.i_create_a_local_ensemble_prediction( + self, example["input_data"]) + prediction_compare.the_local_ensemble_prediction_is( + self, example["prediction"]) + + def test_scenario9(self): + """ + Scenario: Successfully comparing predictions for ensembles with proportional missing strategy in a supervised model: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an esemble with "" + And I wait until the ensemble is ready less than secs + And I create a local ensemble + When I create a proportional missing strategy prediction for "" with <"operating"> + Then the prediction for "" is "" + And the confidence for the prediction is "" + And I create a proportional missing strategy local prediction for "" with <"operating"> + Then the local prediction is "" + And the local prediction's confidence is "" + And I create a local bigml model + Then the local prediction is "" + And the local prediction's confidence is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "confidence", + "model_conf", "operating"] + examples = [ + ['data/iris.csv', '10', '10', '80', '{}', '000004', 'Iris-virginica', '0.33784', '{"boosting": {"iterations": 5}}', {}], + ['data/iris.csv', '10', '10', '80', '{}', '000004', 'Iris-versicolor', '0.27261', '{"number_of_models": 5"}', {"operating_kind": "confidence"}]] + show_doc(self.test_scenario9) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_create.i_create_an_ensemble_with_params( + self, example["model_conf"]) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"]) + ensemble_create.create_local_supervised_ensemble(self) + prediction_create.i_create_an_ensemble_proportional_prediction( + self, example["input_data"], example["operating"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_create.the_confidence_is(self, example["confidence"]) + prediction_create.create_local_ensemble_proportional_prediction_with_confidence( + self, example["input_data"], example["operating"]) + prediction_compare.the_local_ensemble_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) + ensemble_create.create_local_bigml_ensemble(self) + prediction_compare.the_local_ensemble_prediction_is( + self, example["prediction"]) + prediction_compare.the_local_prediction_confidence_is( + self, example["confidence"]) + + def test_scenario10(self): + """ + Scenario: Successfully comparing predictions for fusions: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I retrieve a list of remote models tagged with "" + And I create a fusion from a list of models + And I wait until the fusion is ready less than secs + And I create a local fusion + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", "tag", + "input_data", "objective_id", "prediction"] + examples = [ + ['data/iris_unbalanced.csv', '30', '30', '120', + 'my_fusion_tag', '{"petal width": 4}', '000004', + 'Iris-virginica'], + ['data/grades.csv', '30', '30', '120', + 'my_fusion_tag_reg', '{"Midterm": 20}', '000005', 43.65286]] + show_doc(self.test_scenario10) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + tag = example["tag"] + tag_args = '{"tags":["%s"]}' % tag + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model_with(self, tag_args) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with(self, tag_args) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with(self, tag_args) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_retrieve_a_list_of_remote_models( + self, tag) + model_create.i_create_a_fusion(self) + model_create.the_fusion_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_fusion(self) + prediction_create.i_create_a_fusion_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario11(self): + """ + Scenario: Successfully comparing predictions in operating points for fusions: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I retrieve a list of remote models tagged with "" + And I create a fusion from a list of models + And I wait until the fusion is ready less than secs + And I create a local fusion + When I create a prediction for "" in "" + Then the prediction for "" is "" + And I create a local fusion prediction for "" in "" + Then the local ensemble prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", "tag", + "input_data", "objective_id", "prediction", + "operating_point"] + examples = [ + ['data/iris_unbalanced.csv', '30', '30', '120', + 'my_fusion_tag_11', '{"petal width": 4}', '000004', + 'Iris-virginica', + {"kind": "probability", "threshold": 0.1, + "positive_class": "Iris-setosa"}], + ['data/iris_unbalanced.csv', '30', '30', '120', + 'my_fusion_tag_11_b', '{"petal width": 4}', + '000004', 'Iris-virginica', + {"kind": "probability", "threshold": 0.9, + "positive_class": "Iris-setosa"}]] + show_doc(self.test_scenario11) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + tag = example["tag"] + tag_args = '{"tags":["%s"]}' % tag + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model_with(self, tag_args) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with(self, tag_args) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with(self, tag_args) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_retrieve_a_list_of_remote_models(self, tag) + model_create.i_create_a_fusion(self) + model_create.the_fusion_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_fusion(self) + prediction_create.i_create_a_fusion_prediction_op( + self, example["input_data"], example["operating_point"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction_op( + self, example["input_data"], example["operating_point"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario12(self): + """ + Scenario: Successfully comparing predictions for fusions: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I retrieve a list of remote models tagged with "" + And I create a fusion from a list of models + And I wait until the fusion is ready less than secs + And I create a local fusion + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "model_conf", "tag", "input_data", "objective_id", + "prediction"] + tag = "my_fusion_tag_12" + tag_reg = "my_fusion_tag_12_reg" + examples = [ + ['data/iris_unbalanced.csv', '30', '30', '120', + '{"tags":["%s"], "sample_rate": 0.8, "seed": "bigml"}' % tag, tag, + '{"petal width": 4}', '000004', 'Iris-virginica'], + ['data/grades.csv', '30', '30', '120', + '{"tags":["%s"], "sample_rate": 0.8, "seed": "bigml"}' % tag_reg, + tag_reg, '{"Midterm": 20}', '000005', 44.37625]] + show_doc(self.test_scenario12) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model_with( + self, example["model_conf"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with( + self, example["model_conf"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with( + self, example["model_conf"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_retrieve_a_list_of_remote_models( + self, example["tag"]) + model_create.i_create_a_fusion_with_weights(self) + model_create.the_fusion_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_fusion(self) + prediction_create.i_create_a_fusion_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario13(self): + """ + Scenario: Successfully comparing predictions for fusions: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than <"dataset_wait"> secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I retrieve a list of remote models tagged with "" + And I create a fusion from a list of models + And I wait until the fusion is ready less than secs + And I create a local fusion + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "tag", "input_data", "objective_id", "prediction"] + examples = [ + ['data/grades.csv', '30', '30', '120', 'my_fusion_tag_lreg', + '{"000000": 10, "000001": 10, "000002": 10, "000003": 10, ' + '"000004": 10}', '000005', 21.01712]] + show_doc(self.test_scenario13) + + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + tag = example["tag"] + tag_args = '{"tags":["%s"]}' % tag + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + linear_create.i_create_a_linear_regression_with_params( + self, tag_args) + linear_create.the_linear_regression_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_retrieve_a_list_of_remote_linear_regressions( + self, tag) + model_create.i_create_a_fusion(self) + model_create.the_fusion_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_fusion(self) + prediction_create.i_create_a_fusion_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario14(self): + """ + Scenario: Successfully comparing predictions for ensembles: + Given I load the full ensemble information from "" + And I create a local ensemble from the ensemble + models list + And I create a local prediction for "" + Then the local prediction is "" + """ + headers = ["directory", "input_data", "prediction"] + examples = [ + ['bigml/tests/mlflow_ensemble', '{"plasma glucose": 240}', 'true']] + show_doc(self.test_scenario14) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + model_list = ensemble_create.load_full_ensemble( + self, example["directory"]) + ensemble_create.create_local_ensemble_from_list( + self, model_list) + prediction_compare.i_create_a_local_ensemble_prediction( + self, example["input_data"]) + prediction_compare.the_local_ensemble_prediction_is( + self, example["prediction"]) diff --git a/bigml/tests/test_34_time_series.py b/bigml/tests/test_34_time_series.py new file mode 100644 index 00000000..4b5fb472 --- /dev/null +++ b/bigml/tests/test_34_time_series.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating time series forecasts + +""" +import json + +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_time_series_steps as time_series_create +from . import create_forecast_steps as forecast_create +from . import compare_predictions_steps as prediction_compare + + +class TestTimeSeries: + """Testing Time Series methods""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating forecasts from a dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create time-series from a dataset + And I wait until the time series is ready less than secs + And I update the time series name to "" + When I wait until the time series is ready less than secs + Then the time series name is "" + And I create a forecast for "" + Then the forecasts are "" + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "time_series_name", "input_data", "forecast_points"] + examples = [ + ['data/grades.csv', '30', '30', '50', 'my new time series name', + '{"000005": {"horizon": 5}}', + '{"000005": [{"point_forecast": [73.96192, 74.04106, 74.12029, ' + '74.1996, 74.27899], "model": "M,M,N"}]}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + time_series_create.i_create_a_time_series(self) + time_series_create.the_time_series_is_finished_in_less_than( + self, example["model_wait"]) + time_series_create.i_update_time_series_name( + self, example["time_series_name"]) + time_series_create.the_time_series_is_finished_in_less_than( + self, example["model_wait"]) + time_series_create.i_check_time_series_name( + self, example["time_series_name"]) + forecast_create.i_create_a_forecast( + self, example["input_data"]) + forecast_create.the_forecast_is(self, example["forecast_points"]) + prediction_compare.i_create_a_local_bigml_model(self, + model_type="time_series") + prediction_compare.i_create_a_local_bigml_model_prediction( + self, example["input_data"], prediction_type="forecast") + forecast_points = json.loads(example["forecast_points"]) + prediction_compare.the_local_bigml_prediction_is( + self, {"forecast": forecast_points}, prediction_type="forecast") diff --git a/bigml/tests/test_35_b_compare_predictions.py b/bigml/tests/test_35_b_compare_predictions.py new file mode 100644 index 00000000..7b768ff6 --- /dev/null +++ b/bigml/tests/test_35_b_compare_predictions.py @@ -0,0 +1,118 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Comparing remote and local predictions + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_time_series_steps as time_series_create +from . import create_forecast_steps as forecast_create +from . import compare_forecasts_steps as forecast_compare +from . import create_pca_steps as pca_create +from . import create_projection_steps as projection_create +from . import compare_predictions_steps as compare_predictions + + +class TestComparePrediction: + """Testing local model predictions""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully comparing forecasts from time series: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a time series with "" + And I wait until the time series is ready less than secs + And I create a local time series + When I create a forecast for "" + Then the forecast is "" + And I create a local forecast for "" + Then the local forecast is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "forecast", "model_conf"] + examples = [ + ['data/grades.csv', '30', '30', '120', + '{"000005": {"horizon": 5}}', + '{"000005": [{"point_forecast": [73.96192, 74.04106, 74.12029, ' + '74.1996, 74.27899], "model": "M,M,N"}]}', + '{"objective_fields": ["000001", "000005"]}'], + ['data/grades.csv', '30', '30', '120', + '{"000005": {"horizon": 5, "ets_models": {"names": ["M,N,N"], ' + '"criterion": "aic", "limit": 3}}}', + '{"000005": [{"point_forecast": [68.39832, 68.39832, 68.39832, ' + '68.39832, 68.39832], "model": "M,N,N"}]}', + '{"objective_fields": ["000001", "000005"]}'], + ['data/grades.csv', '30', '30', '120', + '{"000005": {"horizon": 5, "ets_models": {"names": ["A,A,N"], ' + '"criterion": "aic", "limit": 3}}}', + '{"000005": [{"point_forecast": [72.46247, 72.56247, 72.66247, ' + '72.76247, 72.86247], "model": "A,A,N"}]}', + '{"objective_fields": ["000001", "000005"]}'], + ['data/grades.csv', '30', '30', '120', + '{"000005": {"horizon": 5}, "000001": {"horizon": 3, ' + '"ets_models": {"criterion": "aic", "limit": 2}}}', + '{"000005": [{"point_forecast": [73.96192, 74.04106, ' + '74.12029, 74.1996, 74.27899], "model": "M,M,N"}], ' + '"000001": [{"point_forecast": [55.51577, 89.69111, 82.04935],' + ' "model": "A,N,A"}, {"point_forecast": [56.67419, 91.89657, ' + '84.70017], "model": "A,A,A"}]}', + '{"objective_fields": ["000001", "000005"]}']] + show_doc(self.test_scenario1) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + time_series_create.i_create_a_time_series_with_params( + self, example["model_conf"]) + time_series_create.the_time_series_is_finished_in_less_than( + self, example["model_wait"]) + time_series_create.create_local_time_series(self) + forecast_create.i_create_a_forecast(self, example["input_data"]) + forecast_create.the_forecast_is(self, example["forecast"]) + forecast_compare.i_create_a_local_forecast( + self, example["input_data"]) + forecast_compare.the_local_forecast_is(self, example["forecast"]) diff --git a/bigml/tests/test_35_c_compare_predictions.py b/bigml/tests/test_35_c_compare_predictions.py new file mode 100644 index 00000000..0a39e66d --- /dev/null +++ b/bigml/tests/test_35_c_compare_predictions.py @@ -0,0 +1,140 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Comparing remote and local predictions + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_time_series_steps as time_series_create +from . import create_forecast_steps as forecast_create +from . import compare_forecasts_steps as forecast_compare +from . import create_pca_steps as pca_create +from . import create_projection_steps as projection_create +from . import compare_predictions_steps as compare_predictions + + +class TestComparePrediction: + """Test local and remote predictions""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario3(self): + """ + Scenario: Successfully comparing forecasts from time series with "M" seasonality + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a time series with "" + And I wait until the time series is ready less than secs + And I create a local time series + When I create a forecast for "" + Then the forecast is "" + And I create a local forecast for "" + Then the local forecast is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "forecast", "model_conf"] + examples = [ + ['data/grades.csv', '30', '30', '120', '{"000005": {"horizon": 5, "ets_models": {"names": ["M,N,M"], "criterion": "aic", "limit": 3}}}', '{"000005": [{"point_forecast": [68.99775, 72.76777, 66.5556, 70.90818, 70.92998], "model": "M,N,M"}]}', '{"objective_fields": ["000001", "000005"], "period": 12}'], + ['data/grades.csv', '30', '30', '120', '{"000005": {"horizon": 5, "ets_models": {"names": ["M,A,M"], "criterion": "aic", "limit": 3}}}', '{"000005": [{"point_forecast": [70.65993, 78.20652, 69.64806, 75.43716, 78.13556], "model": "M,A,M"}]}', '{"objective_fields": ["000001", "000005"], "period": 12}']] + show_doc(self.test_scenario3) + + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + time_series_create.i_create_a_time_series_with_params( + self, example["model_conf"]) + time_series_create.the_time_series_is_finished_in_less_than( + self, example["model_wait"]) + time_series_create.create_local_time_series(self) + forecast_create.i_create_a_forecast(self, example["input_data"]) + forecast_create.the_forecast_is(self, example["forecast"]) + forecast_compare.i_create_a_local_forecast( + self, example["input_data"]) + forecast_compare.the_local_forecast_is( + self, example["forecast"]) + + def test_scenario3b(self): + """ + Scenario: Successfully comparing forecasts from time series with "M" seasonality + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a time series with "" + And I wait until the time series is ready less than secs + And I create a local time series + When I create a forecast for "" + Then the forecast is "" + And I create a local forecast for "" + Then the local forecast is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "forecast", "model_conf"] + examples = [ + ['data/grades.csv', '30', '30', '120', + '{"000005": {"horizon": 5, "ets_models": {"names": ["M,M,M"], ' + '"criterion": "aic", "limit": 3}}}', + '{"000005": [{"point_forecast": [71.75055, 80.67195, 70.81368, ' + '79.84999, 78.27634], "model": "M,M,M"}]}', + '{"objective_fields": ["000001", "000005"], "period": 12}']] + show_doc(self.test_scenario3) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + time_series_create.i_create_a_time_series_with_params( + self, example["model_conf"]) + time_series_create.the_time_series_is_finished_in_less_than( + self, example["model_wait"]) + time_series_create.create_local_time_series(self) + forecast_create.i_create_a_forecast(self, example["input_data"]) + forecast_create.the_forecast_is(self, example["forecast"]) + forecast_compare.i_create_a_local_forecast( + self, example["input_data"]) + forecast_compare.the_local_forecast_is(self, example["forecast"]) diff --git a/bigml/tests/test_35_compare_predictions.py b/bigml/tests/test_35_compare_predictions.py new file mode 100644 index 00000000..248b9520 --- /dev/null +++ b/bigml/tests/test_35_compare_predictions.py @@ -0,0 +1,228 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Comparing remote and local predictions + +""" +import sys + +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_time_series_steps as time_series_create +from . import create_forecast_steps as forecast_create +from . import compare_forecasts_steps as forecast_compare +from . import create_pca_steps as pca_create +from . import create_projection_steps as projection_create +from . import compare_predictions_steps as compare_predictions + + +class TestComparePrediction: + """Testing local and remote predictions""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario4(self): + """ + Scenario: Successfully comparing forecasts from time series with trivial models + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a time series with "" + And I wait until the time series is ready less than secs + And I create a local time series + When I create a forecast for "" + Then the forecast is "" + And I create a local forecast for "" + Then the local forecast is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "forecast", "model_conf"] + examples = [ + ['data/grades.csv', '10', '100', '100', + '{"000005": {"horizon": 5, "ets_models": {"names": ["naive"]}}}', + '{"000005": [{"point_forecast": [61.39, 61.39, 61.39, 61.39, ' + '61.39], "model": "naive"}]}', + '{"objective_fields": ["000001", "000005"], "period": 1}'], + ['data/grades.csv', '10', '100', '100', + '{"000005": {"horizon": 5, "ets_models": {"names": ["naive"]}}}', + '{"000005": [{"point_forecast": [78.89, 61.39, 78.89, 61.39, ' + '78.89], "model": "naive"}]}', + '{"objective_fields": ["000001", "000005"], "period": 2}'], + ['data/grades.csv', '10', '100', '100', + '{"000005": {"horizon": 5, "ets_models": {"names": ["mean"]}}}', + '{"000005": [{"point_forecast": [68.45974, 68.45974, 68.45974, ' + '68.45974, 68.45974], "model": "mean"}]}', + '{"objective_fields": ["000001", "000005"], "period": 1}'], + ['data/grades.csv', '10', '100', '100', + '{"000005": {"horizon": 5, "ets_models": {"names": ["mean"]}}}', + '{"000005": [{"point_forecast": [69.79553, 67.15821, 69.79553, ' + '67.15821, 69.79553], "model": "mean"}]}', + '{"objective_fields": ["000001", "000005"], "period": 2}'], + ['data/grades.csv', '10', '100', '100', + '{"000005": {"horizon": 5, "ets_models": {"names": ["drift"]}}}', + '{"000005": [{"point_forecast": [61.50545, 61.6209, 61.73635, ' + '61.8518, 61.96725], "model": "drift"}]}', + '{"objective_fields": ["000001", "000005"], "period": 1}'], + ['data/grades.csv', '10', '100', '100', + '{"000005": {"horizon": 5, "ets_models": {"names": ["drift"]}}}', + '{"000005": [{"point_forecast": [61.50545, 61.6209, 61.73635, ' + '61.8518, 61.96725], "model": "drift"}]}', + '{"objective_fields": ["000001", "000005"], "period": 2}']] + show_doc(self.test_scenario4) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + time_series_create.i_create_a_time_series_with_params( + self, example["model_conf"]) + time_series_create.the_time_series_is_finished_in_less_than( + self, example["model_wait"]) + time_series_create.create_local_time_series(self) + forecast_create.i_create_a_forecast( + self, example["input_data"]) + forecast_create.the_forecast_is( + self, example["forecast"]) + forecast_compare.i_create_a_local_forecast( + self, example["input_data"]) + forecast_compare.the_local_forecast_is( + self, example["forecast"]) + + + def test_scenario5(self): + """ + Scenario: Successfully comparing projections for PCAs: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a PCA with "" + And I wait until the PCA is ready less than secs + And I create a local PCA + When I create a projection for "" + Then the projection is "" + And I create a local projection for "" + Then the local projection is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "projection", "model_conf"] + examples = [ + ['data/iris.csv', '30', '30', '120', '{}', + '{"PC2": 0, "PC3": 0, "PC1": 0, "PC6": 0, "PC4": 5e-05, ' + '"PC5": 0}', '{}'], + ['data/iris.csv', '30', '30', '120', '{"petal length": 1}', + '{"PC2": 0.08708, "PC3": 0.20929, "PC1": 1.56084, ' + '"PC6": -1.34463, "PC4": 0.7295, "PC5": -1.00876}', '{}']] + show_doc(self.test_scenario5) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + pca_create.i_create_a_pca_with_params( + self, example["model_conf"]) + pca_create.the_pca_is_finished_in_less_than( + self, example["model_wait"]) + compare_predictions.create_local_pca(self) + projection_create.i_create_a_projection( + self, example["input_data"]) + projection_create.the_projection_is( + self, example["projection"]) + compare_predictions.i_create_a_local_projection( + self, example["input_data"]) + compare_predictions.the_local_projection_is( + self, example["projection"]) + + def test_scenario5_b(self): + """ + Scenario: Successfully comparing projections for PCAs: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a PCA with "" + And I wait until the PCA is ready less than secs + And I create a local PCA + When I create a projection for "" + Then the projection is "" + And I create a local projection for "" + Then the local projection is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "projection", "model_conf"] + examples = [ + ['data/iris.csv', '30', '30', '120', + '{"species": "Iris-versicolor"}', + '{"PC2": 1.8602, "PC3": -2.00864, "PC1": -0.61116, ' + '"PC6": -0.66983, "PC4": -2.44618, "PC5": 0.43414}', '{}'], + ['data/iris.csv', '30', '30', '120', + '{"petal length": 1, "sepal length": 0, "petal width": 0, ' + '"sepal width": 0, "species": "Iris-versicolor"}', + '{"PC2": 7.18009, "PC3": 6.51511, "PC1": 2.78155, ' + '"PC6": 0.21372, "PC4": -1.94865, "PC5": 0.57646}', '{}']] + show_doc(self.test_scenario5) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + pca_create.i_create_a_pca_with_params( + self, example["model_conf"]) + pca_create.the_pca_is_finished_in_less_than( + self, example["model_wait"]) + compare_predictions.create_local_pca(self) + projection_create.i_create_a_projection( + self, example["input_data"]) + projection_create.the_projection_is(self, example["projection"]) + compare_predictions.i_create_a_local_projection( + self, example["input_data"]) + compare_predictions.the_local_projection_is( + self, example["projection"]) diff --git a/bigml/tests/test_35_d_compare_predictions.py b/bigml/tests/test_35_d_compare_predictions.py new file mode 100644 index 00000000..442ac2cf --- /dev/null +++ b/bigml/tests/test_35_d_compare_predictions.py @@ -0,0 +1,110 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Comparing remote and local predictions + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_time_series_steps as time_series_create +from . import create_forecast_steps as forecast_create +from . import compare_forecasts_steps as forecast_compare +from . import create_pca_steps as pca_create +from . import create_projection_steps as projection_create +from . import compare_predictions_steps as compare_predictions + + +class TestComparePrediction: + """Test local and remote predictions""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario2(self): + """ + Scenario: Successfully comparing forecasts from time series with "A" seasonality + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a time series with "" + And I wait until the time series is ready less than secs + And I create a local time series + When I create a forecast for "" + Then the forecast is "" + And I create a local forecast for "" + Then the local forecast is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "forecast", "model_conf"] + examples = [ + + ['data/grades.csv', '30', '30', '300', + '{"000005": {"horizon": 5}}', + '{"000005": [{"point_forecast": [73.96192, 74.04106, 74.12029, ' + '74.1996, 74.27899], "model": "M,M,N"}]}', + '{"objective_fields": ["000001", "000005"], "period": 12}'], + ['data/grades.csv', '30', '30', '300', + '{"000005": {"horizon": 5, "ets_models": {"names": ["M,N,A"], ' + '"criterion": "aic", "limit": 3}}}', + '{"000005": [{"point_forecast": [67.43222, 68.24468, ' + '64.14437, 67.5662, 67.79028], "model": "M,N,A"}]}', + '{"objective_fields": ["000001", "000005"], "period": 12}'], + ['data/grades.csv', '30', '30', '300', + '{"000005": {"horizon": 5, "ets_models": {"names": ["A,A,A"], ' + '"criterion": "aic", "limit": 3}}}', + '{"000005": [{"point_forecast": [74.73553, 71.6163, 71.90264, ' + '76.4249, 75.06982], "model": "A,A,A"}]}', + '{"objective_fields": ["000001", "000005"], "period": 12}']] + show_doc(self.test_scenario2) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + time_series_create.i_create_a_time_series_with_params( + self, example["model_conf"]) + time_series_create.the_time_series_is_finished_in_less_than( + self, example["model_wait"]) + time_series_create.create_local_time_series(self) + forecast_create.i_create_a_forecast(self, example["input_data"]) + forecast_create.the_forecast_is(self, example["forecast"]) + forecast_compare.i_create_a_local_forecast( + self, example["input_data"]) + forecast_compare.the_local_forecast_is(self, example["forecast"]) diff --git a/bigml/tests/test_35_e_compare_predictions.py b/bigml/tests/test_35_e_compare_predictions.py new file mode 100644 index 00000000..b998b1a4 --- /dev/null +++ b/bigml/tests/test_35_e_compare_predictions.py @@ -0,0 +1,224 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Comparing remote and local predictions + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_time_series_steps as time_series_create +from . import create_forecast_steps as forecast_create +from . import compare_forecasts_steps as forecast_compare +from . import create_pca_steps as pca_create +from . import create_projection_steps as projection_create +from . import compare_predictions_steps as compare_predictions + + +class TestComparePrediction: + """Test predictions""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario6(self): + """ + Scenario: Successfully comparing projections for PCAs: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a PCA with "" + And I wait until the PCA is ready less than secs + And I create a local PCA + When I create a projection for "" + Then the projection is "" + And I create a local projection for "" + Then the local projection is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf", "input_data", "model_conf", "projection"] + examples = [ + ['data/spam_tiny.csv', '30', '30', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"token_mode": "all"}}}}', '{"Message": "early"}', '{}', + '{"PC40": 0.00416, "PC38": 0.08267, "PC39": 0.00033, "PC18": 0.28094, ' + '"PC19": -0.15056, "PC14": 0.20643, "PC15": 0.23931, "PC16": 0.03251, ' + '"PC17": 0.02776, "PC10": 0.1424, "PC11": 0.4059, "PC12": -0.1238, ' + '"PC13": 0.15131, "PC43": 0.29617, "PC42": 1.0091, "PC41": 0, ' + '"PC25": 0.07164, "PC24": -0.29904, "PC27": -0.1331, "PC26": -0.18572, ' + '"PC21": 0.25616, "PC20": 0.30424, "PC23": -0.45775, "PC22": -0.3362, ' + '"PC47": -0.13757, "PC49": 0.01864, "PC48": 0.04742, "PC29": -0.16286, ' + '"PC28": 0.42207, "PC32": -0.05917, "PC46": -0.05018, "PC31": -0.13973, ' + '"PC45": -0.05015, "PC36": 0.03017, "PC44": 0, "PC37": -0.06093, ' + '"PC34": 0.25821, "PC35": -0.22194, "PC33": -0.23398, "PC8": 0.01159, ' + '"PC9": -0.16042, "PC2": -0.09202, "PC3": 0.14371, "PC1": 0.65114, ' + '"PC6": -0.43034, "PC7": -0.02563, "PC4": -0.04947, "PC5": -0.07796, ' + '"PC50": -0.00769, "PC30": 0.07813}'], + ['data/spam_tiny.csv', '30', '30', '30', + '{"fields": {"000001": {"optype": "text", "term_analysis": ' + '{"token_mode": "all"}}}}', '{"Message": "mobile call"}','{}', + '{"PC40": 0.31818, "PC38": 0.06912, "PC39": -0.14342, "PC18": 0.22382, ' + '"PC19": 0.18518, "PC14": 0.89231, "PC15": 0.05046, "PC16": -0.00241, ' + '"PC17": 0.54501, "PC10": -0.26463, "PC11": 0.30251, "PC12": 1.16327, ' + '"PC13": 0.16973, "PC43": 0.11952, "PC42": 1.05499, "PC41": 0.51263, ' + '"PC25": 0.02467, "PC24": -0.65128, "PC27": 0.48916, "PC26": -0.45228, ' + '"PC21": -0.44167, "PC20": 0.76896, "PC23": 0.29398, "PC22": 0.06425, ' + '"PC47": 0.70416, "PC49": -0.30313, "PC48": 0.12976, "PC29": -0.34, ' + '"PC28": 0.17406, "PC32": -0.06411, "PC46": 0.69257, "PC31": 0.07523, ' + '"PC45": -0.03461, "PC36": 0.29732, "PC44": 0.14516, "PC37": -0.19109, ' + '"PC34": 0.58399, "PC35": 0.37608, "PC33": -0.00378, "PC8": -0.88156, ' + '"PC9": 0.38233, "PC2": -0.56685, "PC3": 0.56321, "PC1": 0.49171, ' + '"PC6": -0.09854, "PC7": -1.24639, "PC4": 1.50134, "PC5": -0.03161, ' + '"PC50": 0.17349, "PC30": -1.29612}']] + show_doc(self.test_scenario6) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + pca_create.i_create_a_pca_with_params(self, example["model_conf"]) + pca_create.the_pca_is_finished_in_less_than( + self, example["model_wait"]) + projection_create.i_create_a_projection( + self, example["input_data"]) + projection_create.the_projection_is(self, example["projection"]) + compare_predictions.create_local_pca(self) + compare_predictions.i_create_a_local_projection( + self, example["input_data"]) + compare_predictions.the_local_projection_is( + self, example["projection"]) + + def test_scenario7(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input for PCAs: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a PCA + And I wait until the PCA is ready less than secs + And I create a local PCA + When I create a projection for "" + Then the projection is "" + And I create a local projection for "" + Then the local projection is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "projection"] + examples = [ + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1910-05-08T19:10:23.106","cat-0":"cat2",' + '"target-2":0.4}', + '{"PC8": -1.54293, "PC9": -0.94836, "PC2": 0.78176, ' + '"PC3": -0.62082, "PC1": 0.89614, "PC10": 1.06575, ' + '"PC11": 1.3211, "PC4": 1.90088, "PC5": 0.24197, ' + '"PC7": -0.37701, "PC6": 2.25007}'], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1920-06-30T20:21:20.320","cat-0":"cat1",' + '"target-2":0.2}', + '{"PC8": 0.3148, "PC9": -0.61742, "PC2": 0.93411, ' + '"PC3": 1.80286, "PC1": 0.36425, "PC10": 0.7364, ' + '"PC11": 2.25863, "PC4": -1.50319, "PC5": 0.17088, ' + '"PC7": 0.51738, "PC6": 0.42403}'], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1932-01-30T19:24:11.440","cat-0":"cat2",' + '"target-2":0.1}', + '{"PC8": -0.86728, "PC9": -1.85164, "PC2": 2.13206, ' + '"PC3": 0.58449, "PC1": 0.28379, "PC10": 2.05465, ' + '"PC11": 0.44372, "PC4": 1.27236, "PC5": 0.99468, ' + '"PC7": -0.32496, "PC6": 0.52217}'], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1950-11-06T05:34:05.602","cat-0":"cat1" ,' + '"target-2":0.9}', + '{"PC8": 2.49563, "PC9": -0.57774, "PC2": -0.76354, ' + '"PC3": 0.19215, "PC1": 0.99197, "PC10": -1.21017, ' + '"PC11": 1.55778, "PC4": -0.24013, "PC5": -0.38492, ' + '"PC7": 1.82607, "PC6": 0.3736}'], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1969-7-14 17:36","cat-0":"cat2","target-2":0.9}', + '{"PC8": -0.41111, "PC9": -5.32959, "PC2": -1.25322, ' + '"PC3": 2.93113, "PC1": 2.07444, "PC10": 4.8808, ' + '"PC11": 0.4185, "PC4": 3.13876, "PC5": 3.70259, ' + '"PC7": 0.55665, "PC6": 5.16873}'], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"2001-01-05T23:04:04.693","cat-0":"cat2",' + '"target-2":0.01}', + '{"PC8": -1.10654, "PC9": -0.34137, "PC2": 1.73362, ' + '"PC3": -0.34799, "PC1": 2.32583, "PC10": 0.94566, ' + '"PC11": 0.53787, "PC4": 2.77385, "PC5": -0.1017, ' + '"PC7": 0.20156, "PC6": -0.44476}'], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"2011-04-01T00:16:45.747","cat-0":"cat2",' + '"target-2":0.32}', + '{"PC8": -0.514, "PC9": 0.38349, "PC2": -0.27037, ' + '"PC3": -1.82588, "PC1": 1.05737, "PC10": 0.08607, ' + '"PC11": -0.97078, "PC4": 2.10426, "PC5": 1.86843, ' + '"PC7": 1.55632, "PC6": 0.42395}'], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1969-W29-1T17:36:39Z","cat-0":"cat1",' + '"target-2":0.87}', + '{"PC8": 2.05525, "PC9": 1.50754, "PC2": 6.27524, ' + '"PC3": 7.74224, "PC1": 5.30354, "PC10": -6.40442, ' + '"PC11": 6.90365, "PC4": -1.44431, "PC5": 2.16179, ' + '"PC7": 1.35718, "PC6": 5.02426}']] + show_doc(self.test_scenario7) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + pca_create.i_create_a_pca(self, shared=example["data"]) + pca_create.the_pca_is_finished_in_less_than( + self, example["model_wait"]) + projection_create.i_create_a_projection( + self, example["input_data"]) + projection_create.the_projection_is( + self, example["projection"]) + compare_predictions.create_local_pca(self, pre_model=True) + compare_predictions.i_create_a_local_projection( + self, example["input_data"], + pre_model=self.bigml["local_pipeline"]) + compare_predictions.the_local_projection_is( + self, example["projection"]) diff --git a/bigml/tests/test_36_compare_predictions.py b/bigml/tests/test_36_compare_predictions.py new file mode 100644 index 00000000..c8a76e3d --- /dev/null +++ b/bigml/tests/test_36_compare_predictions.py @@ -0,0 +1,635 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Comparing remote and local predictions + +""" +import json + +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_anomaly_steps as anomaly_create +from . import create_model_steps as model_create +from . import create_ensemble_steps as ensemble_create +from . import create_linear_steps as linear_create +from . import create_prediction_steps as prediction_create +from . import compare_predictions_steps as prediction_compare + + +class TestComparePrediction: + """Test local and remote predictions""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully comparing predictions for deepnets: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a deepnet with objective "" and "" + And I wait until the deepnet is ready less than secs + And I create a local deepnet + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "model_conf"] + examples = [ + ['data/iris.csv', '30', '50', '60', '{"petal width": 4}', '000004', + 'Iris-virginica', '{}'], + ['data/iris.csv', '30', '50', '60', + '{"sepal length": 4.1, "sepal width": 2.4}', '000004', + 'Iris-versicolor', '{}'], + ['data/iris_missing2.csv', '30', '50', '60', '{}', '000004', + 'Iris-versicolor', '{}'], + ['data/grades.csv', '30', '50', '60', '{}', '000005', 47.04852, + '{}'], + ['data/spam.csv', '30', '50', '60', '{}', '000000', 'ham', '{}']] + show_doc(self.test_scenario1) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_deepnet_with_objective_and_params( + self, example["objective_id"], example["model_conf"]) + model_create.the_deepnet_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_deepnet(self) + prediction_create.i_create_a_deepnet_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"], + precision=3) + prediction_compare.i_create_a_local_deepnet_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"], precision=3) + + def test_scenario2(self): + """ + Scenario: Successfully comparing predictions in operating points for models: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + When I create a prediction for "" in "" + Then the prediction for "" is "" + And I create a local prediction for "" in "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "prediction", "operating_point", + "objective_id"] + examples = [ + ['data/iris.csv', '10', '50', '50', '{"petal width": 4}', + 'Iris-setosa', + {"kind": "probability", "threshold": 0.1, + "positive_class": "Iris-setosa"}, "000004"], + ['data/iris.csv', '10', '50', '50', '{"petal width": 4}', + 'Iris-versicolor', + {"kind": "probability", "threshold": 0.9, + "positive_class": "Iris-setosa"}, "000004"], + ['data/iris.csv', '10', '50', '50', + '{"sepal length": 4.1, "sepal width": 2.4}', 'Iris-setosa', + {"kind": "confidence", "threshold": 0.1, + "positive_class": "Iris-setosa"}, "000004"], + ['data/iris.csv', '10', '50', '50', + '{"sepal length": 4.1, "sepal width": 2.4}', 'Iris-versicolor', + {"kind": "confidence", "threshold": 0.9, + "positive_class": "Iris-setosa"}, "000004"]] + show_doc(self.test_scenario2) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_model(self) + prediction_create.i_create_a_prediction_op( + self, example["input_data"], example["operating_point"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction_op( + self, example["input_data"], example["operating_point"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario3(self): + """ + Scenario: Successfully comparing predictions for deepnets with operating point: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a deepnet with objective "" and "" + And I wait until the deepnet is ready less than secs + And I create a local deepnet + When I create a prediction with operating point "" for "" + Then the prediction for "" is "" + And I create a local prediction with operating point "" for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "model_conf", + "operating_point"] + examples = [ + ['data/iris.csv', '10', '50', '60', '{"petal width": 4}', '000004', + 'Iris-setosa', '{}', {"kind": "probability", "threshold": 1, + "positive_class": "Iris-virginica"}]] + show_doc(self.test_scenario3) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_deepnet_with_objective_and_params( + self, example["objective_id"], example["model_conf"]) + model_create.the_deepnet_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_deepnet(self) + prediction_create.i_create_a_deepnet_prediction_with_op( + self, example["input_data"], example["operating_point"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_deepnet_prediction_with_op( + self, example["input_data"], example["operating_point"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario4(self): + """ + Scenario: Successfully comparing predictions in operating points for ensembles: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an ensemble + And I wait until the ensemble is ready less than secs + And I create a local ensemble + When I create a prediction for "" in "" + Then the prediction for "" is "" + And I create a local ensemble prediction for "" in "" + Then the local ensemble prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "prediction", "operating_point", + "objective_id"] + examples = [ + ['data/iris.csv', '10', '50', '50', '{"petal width": 4}', + 'Iris-setosa', + {"kind": "probability", "threshold": 0.1, + "positive_class": "Iris-setosa"}, "000004"], + ['data/iris.csv', '10', '50', '50', '{"petal width": 4}', + 'Iris-virginica', + {"kind": "probability", "threshold": 0.9, + "positive_class": "Iris-setosa"}, "000004"], + ['data/iris.csv', '10', '50', '50', + '{"sepal length": 4.1, "sepal width": 2.4}', 'Iris-setosa', + {"kind": "confidence", "threshold": 0.1, + "positive_class": "Iris-setosa"}, "000004"], + ['data/iris.csv', '10', '50', '50', + '{"sepal length": 4.1, "sepal width": 2.4}', 'Iris-versicolor', + {"kind": "confidence", "threshold": 0.9, + "positive_class": "Iris-setosa"}, "000004"]] + show_doc(self.test_scenario4) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_create.i_create_an_ensemble(self, shared=example["data"]) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + ensemble_create.create_local_ensemble(self) + prediction_create.i_create_an_ensemble_prediction_op( + self, example["input_data"], example["operating_point"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_ensemble_prediction_op( + self, example["input_data"], example["operating_point"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario5(self): + """ + Scenario: Successfully comparing predictions in operating kind for models: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I create a local model + When I create a prediction for "" in "" + Then the prediction for "" is "" + And I create a local prediction for "" in "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "prediction", "operating_kind", + "objective_id"] + examples = [ + ['data/iris.csv', '10', '50', '50', + '{"petal length": 2.46, "sepal length": 5}', 'Iris-versicolor', + "probability", "000004"], + ['data/iris.csv', '10', '50', '50', + '{"petal length": 2.46, "sepal length": 5}', 'Iris-versicolor', + "confidence", "000004"], + ['data/iris.csv', '10', '50', '50', '{"petal length": 2}', + 'Iris-setosa', "probability", "000004"], + ['data/iris.csv', '10', '50', '50', '{"petal length": 2}', + 'Iris-setosa', "confidence", "000004"]] + show_doc(self.test_scenario5) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_model(self) + prediction_create.i_create_a_prediction_op_kind( + self, example["input_data"], example["operating_kind"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction_op_kind( + self, example["input_data"], example["operating_kind"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario6(self): + """ + Scenario: Successfully comparing predictions for deepnets with operating kind: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a deepnet with objective "" and "" + And I wait until the deepnet is ready less than secs + And I create a local deepnet + When I create a prediction with operating kind "" for "" + Then the prediction for "" is "" + And I create a local prediction with operating point "" for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "model_conf", + "operating_kind"] + examples = [ + ['data/iris.csv', '10', '50', '60', '{"petal length": 2.46}', + '000004', 'Iris-setosa', '{}', "probability"], + ['data/iris.csv', '10', '50', '60', '{"petal length": 6}', + '000004', 'Iris-versicolor', '{}', "probability"]] + show_doc(self.test_scenario6) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_deepnet_with_objective_and_params( + self, example["objective_id"], example["model_conf"]) + model_create.the_deepnet_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_deepnet(self) + prediction_create.i_create_a_deepnet_prediction_op_kind( + self, example["input_data"], example["operating_kind"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_deepnet_prediction_op_kind( + self, example["input_data"], example["operating_kind"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario7(self): + """ + Scenario: Successfully comparing predictions in operating points for ensembles: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an ensemble + And I wait until the ensemble is ready less than secs + And I create a local ensemble + When I create a prediction for "" in "" + Then the prediction for "" is "" + And I create a local ensemble prediction for "" in "" + Then the local ensemble prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "prediction", "operating_kind", + "objective_id"] + examples = [ + ['data/iris.csv', '10', '50', '50', '{"petal length": 2.46}', + 'Iris-versicolor', "probability", "000004"], + ['data/iris.csv', '10', '50', '50', '{"petal length": 2}', + 'Iris-setosa', "probability", "000004"], + ['data/iris.csv', '10', '50', '50', '{"petal length": 2.46}', + 'Iris-versicolor', "confidence", "000004"], + ['data/iris.csv', '10', '50', '50', '{"petal length": 2}', + 'Iris-setosa', "confidence", "000004"], + ['data/iris.csv', '10', '50', '50', '{"petal length": 2.46}', + 'Iris-versicolor', "votes", "000004"], + ['data/iris.csv', '10', '50', '50', '{"petal length": 1}', + 'Iris-setosa', "votes", "000004"]] + show_doc(self.test_scenario7) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_create.i_create_an_ensemble(self, shared=example["data"]) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + ensemble_create.create_local_ensemble(self) + prediction_create.i_create_an_ensemble_prediction_op_kind( + self, example["input_data"], example["operating_kind"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_ensemble_prediction_op_kind( + self, example["input_data"], example["operating_kind"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario8(self): + """ + Scenario: Successfully comparing predictions for logistic regressions with operating kind: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression with objective "" + And I wait until the logistic regression is ready less than secs + And I create a local logistic regression + When I create a prediction with operating kind "" for "" + Then the prediction for "" is "" + And I create a local prediction with operating point "" for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", + "operating_kind"] + examples = [ + ['data/iris.csv', '10', '50', '60', '{"petal length": 5}', + '000004', 'Iris-versicolor', "probability"], + ['data/iris.csv', '10', '50', '60', '{"petal length": 2}', + '000004', 'Iris-setosa', "probability"]] + show_doc(self.test_scenario8) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model( + self, shared=example["data"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_logistic_model(self) + prediction_create.i_create_a_logistic_prediction_with_op_kind( + self, example["input_data"], example["operating_kind"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_logistic_prediction_op_kind( + self, example["input_data"], example["operating_kind"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario9(self): + """ + Scenario: Successfully comparing predictions for logistic regressions with operating kind and supervised model: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression with objective "" + And I wait until the logistic regression is ready less than secs + And I create a local supervised model + When I create a prediction with operating kind "" for "" + Then the prediction for "" is "" + And I create a local prediction with operating point "" for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", + "operating_kind"] + examples = [ + ['data/iris.csv', '10', '50', '60', '{"petal length": 5}', + '000004', 'Iris-versicolor', "probability"], + ['data/iris.csv', '10', '50', '60', '{"petal length": 2}', + '000004', 'Iris-setosa', "probability"]] + show_doc(self.test_scenario9) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model( + self, shared=example["data"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_supervised_model( + self, model_type="logistic_regression") + prediction_create.i_create_a_logistic_prediction_with_op_kind( + self, example["input_data"], example["operating_kind"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_logistic_prediction_op_kind( + self, example["input_data"], example["operating_kind"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + prediction_compare.i_create_a_local_bigml_model( + self, model_type="logistic_regression") + prediction_compare.i_create_a_local_bigml_model_prediction( + self, example["input_data"], prediction_type="prediction", + operating_kind=example["operating_kind"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario10(self): + """ + Scenario: Successfully comparing predictions for linear regression: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a linear regression with objective "" and "" + And I wait until the linear regression is ready less than secs + And I create a local linear regression + When I create a prediction for "" + Then the prediction for "" is "" + And I create a local prediction for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "model_conf", + "operating_kind"] + examples = [ + ['data/grades.csv', '10', '50', '60', + '{"000000": 1, "000001": 1, "000002": 1}', '000005', 29.63024, + '{"input_fields": ["000000", "000001", "000002"]}'], + ['data/iris.csv', '10', '50', '60', + '{"000000": 1, "000001": 1, "000004": "Iris-virginica"}', + '000003', 1.21187, + '{"input_fields": ["000000", "000001", "000004"]}'], + ['data/movies.csv', '10', '50', '60', '{"000007": "Action"}', + '000009', 4.33333, '{"input_fields": ["000007"]}'], + ['data/movies.csv', '10', '50', '60', '{"000006": "1999"}', + '000009', 3.28427, '{"input_fields": ["000006"], "bias": false}']] + show_doc(self.test_scenario10) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + linear_create.i_create_a_linear_regression_with_objective_and_params( + self, example["objective_id"], example["model_conf"]) + linear_create.the_linear_regression_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_linear(self) + prediction_create.i_create_a_linear_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_linear_prediction( + self, example["input_data"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) + + def test_scenario11(self): + """ + Scenario: Successfully comparing predictions for logistic regressions with operating point: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression with objective "" + And I wait until the logistic regression is ready less than secs + And I create a local logistic regression + When I create a prediction with operating point "" for "" + Then the prediction for "" is "" + And I create a local prediction with operating point "" for "" + Then the local prediction is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "model_conf", + "operating_point"] + examples = [ + ['data/iris.csv', '10', '50', '60', '{"petal width": 4}', '000004', + 'Iris-versicolor', '{"default_numeric_value": "mean"}', + {"kind": "probability", "threshold": 1, + "positive_class": "Iris-virginica"}]] + show_doc(self.test_scenario11) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model_with_objective_and_parms( + self, example["objective_id"], example["model_conf"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_logistic_model(self) + prediction_create.i_create_a_logistic_prediction_with_op( + self, example["input_data"], example["operating_point"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_compare.i_create_a_local_prediction_op( + self, example["input_data"], example["operating_point"]) + prediction_compare.the_local_prediction_is( + self, example["prediction"]) diff --git a/bigml/tests/test_37_configuration.py b/bigml/tests/test_37_configuration.py new file mode 100644 index 00000000..1c4ba9ac --- /dev/null +++ b/bigml/tests/test_37_configuration.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating configuration + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_configuration_steps as config_create + +class TestConfiguration: + """Test for Configuration methods""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating configuration: + Given I create a configuration from "" info + And I update the configuration name to "" + When I wait until the configuration is ready less than secs + Then the configuration name is "" + And the configuration contents are "" + """ + show_doc(self.test_scenario1) + headers = ["configurations", "configuration_wait", + "configuration_name"] + examples = [ + [{ + "dataset": { + "name": "Customer FAQ dataset" + } + }, '10', {"name": 'my new configuration name'}]] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + config_create.i_create_configuration( + self, example["configurations"]) + config_create.i_update_configuration( + self, example["configuration_name"]) + config_create.the_configuration_is_finished_in_less_than( + self, example["configuration_wait"]) + config_create.i_check_configuration_name( + self, example["configuration_name"]) + config_create.i_check_configuration_conf( + self, example["configurations"]) diff --git a/bigml/tests/test_38_organization.py b/bigml/tests/test_38_organization.py new file mode 100644 index 00000000..4187a474 --- /dev/null +++ b/bigml/tests/test_38_organization.py @@ -0,0 +1,144 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2018-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Working with organizations + +""" +import os +import shutil + + +from bigml.api import BigML + +from .world import world, show_doc, show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_prediction_steps as prediction_create + + +try: + BIGML_ORGANIZATION = os.environ['BIGML_ORGANIZATION'] +except KeyError: + raise ValueError("You need to set BIGML_ORGANIZATION" + " to an organization ID in your " + "environment variables to run this test.") + + +def setup_module(): + """Operations to be performed before each module + + """ + # Project or Organization IDs + + world.bck_api = world.api + world.api = BigML(world.username, world.api_key, debug=world.debug, + organization=BIGML_ORGANIZATION) + print(world.api.connection_info()) + world.bck_project_id = world.project_id + world.project_id = world.api.create_project( \ + {"name": world.test_project_name})['resource'] + world.api = BigML(world.username, world.api_key, debug=world.debug, + project=world.project_id) + print("New connection: ", world.api.connection_info()) + world.clear() + + +#pylint: disable=locally-disabled,broad-except +def teardown_module(): + """Operations to be performed after each module + + """ + + if os.path.exists('./tmp'): + shutil.rmtree('./tmp') + + if not world.debug: + try: + world.delete_resources() + except Exception as exc: + print(exc) + world.api = BigML(world.username, world.api_key, debug=world.debug, + organization=BIGML_ORGANIZATION) + project_stats = world.api.get_project( \ + world.project_id)['object']['stats'] + for resource_type, value in list(project_stats.items()): + if value['count'] != 0: + # assert False, ("Increment in %s: %s" % (resource_type, value)) + print("WARNING: Increment in %s: %s" % (resource_type, value)) + + world.api.delete_project(world.project_id) + world.project_id = world.bck_project_id + world.api = world.bck_api + print("New connection: ", world.api.connection_info()) + + +class TestOrgPrediction: + """Testing predictions for organization resources""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating a prediction in an organization: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + When I create a prediction for "" + Then the prediction for "" is "" + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction"] + examples = [ + ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', + '000004', 'Iris-setosa']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + model_create.i_create_a_model(self) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) diff --git a/bigml/tests/test_38_project_connection.py b/bigml/tests/test_38_project_connection.py new file mode 100644 index 00000000..7175d8a6 --- /dev/null +++ b/bigml/tests/test_38_project_connection.py @@ -0,0 +1,131 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import,broad-except +# +# Copyright 2018-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Working with organizations + +""" +import os +import shutil + + +from bigml.api import BigML + +from .world import world, eq_, show_method +from .world import setup_module as general_setup_module +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_prediction_steps as prediction_create + + + +def setup_module(): + """Operations to be performed before each module + + """ + # Project or Organization IDs + + general_setup_module() + world.bck_api = world.api + world.api = BigML(world.username, world.api_key, debug=world.debug, + project=world.project_id) + print(world.api.connection_info()) + world.clear() + +def teardown_module(): + """Operations to be performed after each module + + """ + + if os.path.exists('./tmp'): + shutil.rmtree('./tmp') + + if not world.debug: + try: + world.delete_resources() + except Exception as exc: + print(exc) + project_stats = world.api.get_project( \ + world.project_id)['object']['stats'] + for resource_type, value in list(project_stats.items()): + if value['count'] != 0: + # assert False, ("Increment in %s: %s" % (resource_type, value)) + print("WARNING: Increment in %s: %s" % (resource_type, value)) + world.api.delete_project(world.project_id) + world.project_id = None + world.api = world.bck_api + print(world.api.connection_info()) + + +class TestProjPrediction: + """Testing predictions in organization's project """ + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating a prediction with a user's project connection: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And the source is in the project + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + When I create a prediction for "" + Then the prediction for "" is "" + """ + print(self.test_scenario1.__doc__) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective", "prediction"] + examples = [ + ['data/iris.csv', '10', '10', '10', '{"petal width": 0.5}', '000004', 'Iris-setosa']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file_with_project_conn( + self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) + eq_(world.source['project'], world.project_id) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + eq_(world.dataset['project'], world.project_id) + model_create.i_create_a_model(self) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + eq_(world.model['project'], world.project_id) + prediction_create.i_create_a_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective"], example["prediction"]) + eq_(world.prediction['project'], world.project_id) diff --git a/bigml/tests/test_39_optiml_fusion.py b/bigml/tests/test_39_optiml_fusion.py new file mode 100644 index 00000000..0ff5992f --- /dev/null +++ b/bigml/tests/test_39_optiml_fusion.py @@ -0,0 +1,451 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2018-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating optimls and fusions + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_model_steps as model_create +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import compare_predictions_steps as compare_pred +from . import create_prediction_steps as prediction_create +from . import create_evaluation_steps as evaluation_create +from . import create_batch_prediction_steps as batch_pred_create + + +class TestOptimlFusion: + """Testing OptiML and Fusion methods""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario 1: Successfully creating an optiml from a dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an optiml from a dataset + And I wait until the optiml is ready less than secs + And I update the optiml name to "" + When I wait until the optiml is ready less than secs + Then the optiml name is "" + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "optiml_name"] + examples = [ + ['data/iris.csv', '10', '10', '300', 'my new optiml name']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_an_optiml_with_objective_and_params( \ + self, parms='{"max_training_time": %s, "model_types": ' + '["model", "logisticregression"]}' % \ + (int(float(example["model_wait"])/10) - 1)) + model_create.the_optiml_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_update_optiml_name(self, example["optiml_name"]) + model_create.the_optiml_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_check_optiml_name(self, example["optiml_name"]) + + def test_scenario2(self): + """ + Scenario 2: Successfully creating a fusion: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I retrieve a list of remote models tagged with "" + And I create a fusion from a list of models + And I wait until the fusion is ready less than secs + And I update the fusion name to "" + When I wait until the fusion is ready less than secs + And I create a prediction for "" + Then the fusion name is "" + And the prediction for "" is "" + And I create an evaluation for the fusion with the dataset + And I wait until the evaluation is ready less than secs + Then the measured "" is + """ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "fusion_wait", "evaluation_wait", "fusion_name", + "model_conf", "tag", "input_data", "objective_id", + "prediction", "metric", "value"] + examples = [ + ['data/iris.csv', '10', '10', '50', '50', '50', + 'my new fusion name', + '{"tags":["my_fusion_2_tag"]}', 'my_fusion_2_tag', + '{"petal width": 1.75, "petal length": 2.45}', "000004", + "Iris-setosa", 'average_phi', '1.0']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + model_create.i_create_a_model_with( + self, example["model_conf"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with( + self, example["model_conf"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with( + self, example["model_conf"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + compare_pred.i_retrieve_a_list_of_remote_models( + self, example["tag"]) + model_create.i_create_a_fusion(self) + model_create.the_fusion_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_update_fusion_name(self, example["fusion_name"]) + model_create.the_fusion_is_finished_in_less_than( + self, example["fusion_wait"]) + model_create.i_check_fusion_name(self, example["fusion_name"]) + prediction_create.i_create_a_fusion_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + evaluation_create.i_create_an_evaluation_fusion(self) + evaluation_create.the_evaluation_is_finished_in_less_than( + self, example["evaluation_wait"]) + evaluation_create.the_measured_measure_is_value( + self, example["metric"], example["value"]) + + + def test_scenario3(self): + """ + Scenario 3: Successfully creating a fusion: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I retrieve a list of remote models tagged with "" + And I create a fusion from a list of models + And I wait until the fusion is ready less than secs + When I create a batch prediction for the dataset with the fusion + And I wait until the batch prediction is ready less than secs + And I download the created predictions file to "" + Then the batch prediction file is like "" + """ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "fusion_wait", "batch_wait", "model_conf", "tag", + "local_file", "predictions_file"] + examples = [ + ['data/iris.csv', '10', '10', '30', '30', '30', + '{"tags":["my_fusion_3_tag"]}', 'my_fusion_3_tag', + 'tmp/batch_predictions.csv', 'data/batch_predictions_fs.csv']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model_with( + self, example["model_conf"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with( + self, example["model_conf"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with( + self, example["model_conf"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + compare_pred.i_retrieve_a_list_of_remote_models( + self, example["tag"]) + model_create.i_create_a_fusion(self) + model_create.the_fusion_is_finished_in_less_than( + self, example["fusion_wait"]) + batch_pred_create.i_create_a_batch_prediction_fusion(self) + batch_pred_create.the_batch_prediction_is_finished_in_less_than( + self, example["batch_wait"]) + batch_pred_create.i_download_predictions_file( + self, example["local_file"]) + batch_pred_create.i_check_predictions( + self, example["predictions_file"]) + + def test_scenario4(self): + """ + Scenario 4: Successfully creating a fusion: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a logistic regression with "" + And I wait until the logistic regression is ready less than secs + And I create a logistic regression with "" + And I wait until the logistic regression is ready less than secs + And I retrieve a list of remote logistic regression tagged with "" + And I create a fusion from a list of models + And I wait until the fusion is ready less than secs + When I create a prediction for "" + Then the prediction for "" is "" + And the local logistic regression probability for the prediction is "" + And I create a local fusion prediction for "" + Then the local fusion prediction is "" + And the local fusion probability for the prediction is "" + And the local fusion confidence for the prediction is "" + """ + show_doc(self.test_scenario4) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "fusion_wait", "model_conf", "tag", "input_data", + "objective_id", "prediction", "probability", "confidence"] + examples = [ + ['data/iris.csv', '10', '10', '30', '30', + '{"tags":["my_fusion_4_tag"], "missing_numerics": true}', + 'my_fusion_4_tag', + '{"petal width": 1.75, "petal length": 2.45}', "000004", + "Iris-setosa", '0.4726', '0.4726']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model_with_objective_and_parms( + self, example["objective_id"], example["model_conf"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_logistic_model_with_objective_and_parms( + self, example["objective_id"], example["model_conf"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + compare_pred.i_retrieve_a_list_of_remote_logistic_regressions( + self, example["tag"]) + model_create.i_create_a_fusion(self) + model_create.the_fusion_is_finished_in_less_than( + self, example["fusion_wait"]) + compare_pred.i_create_a_local_fusion(self) + prediction_create.i_create_a_fusion_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_create.the_fusion_probability_is( + self, example["probability"]) + compare_pred.i_create_a_local_prediction( + self, example["input_data"]) + compare_pred.the_local_prediction_is( + self, example["prediction"]) + compare_pred.the_local_probability_is( + self, example["probability"]) + compare_pred.the_local_confidence_is( + self, example["confidence"]) + + def test_scenario5(self): + """ + Scenario 5: Successfully creating a fusion: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a logistic regression with "" + And I wait until the logistic regression is ready less than secs + And I create a logistic regression with "" + And I wait until the logistic regression is ready less than secs + And I retrieve a list of remote logistic regression tagged with "" + And I create a fusion from a list of models + And I wait until the fusion is ready less than secs + When I create a prediction for "" + Then the prediction for "" is "" + And the fusion probability for the prediction is "" + And I create a local fusion prediction for "" + Then the local fusion prediction is "" + And the local fusion probability for the prediction is "" + """ + show_doc(self.test_scenario5) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "fusion_wait", "model_conf1", "model_conf2", "tag", + "input_data", "objective_id", "prediction", "probability"] + examples = [ + ['data/iris.csv', '10', '10', '30', '30', + '{"tags":["my_fusion_5_tag"], "missing_numerics": true}', + '{"tags":["my_fusion_5_tag"], "missing_numerics": false, ' + '"balance_fields": false }', + 'my_fusion_5_tag', + '{"petal width": 1.75, "petal length": 2.45}', + "000004", + "Iris-setosa", + '0.4726']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model_with_objective_and_parms( + self, example["objective_id"], example["model_conf1"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_logistic_model_with_objective_and_parms( + self, example["objective_id"], example["model_conf2"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + compare_pred.i_retrieve_a_list_of_remote_logistic_regressions( + self, example["tag"]) + model_create.i_create_a_fusion(self) + model_create.the_fusion_is_finished_in_less_than( + self, example["fusion_wait"]) + compare_pred.i_create_a_local_fusion(self) + prediction_create.i_create_a_fusion_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_create.the_fusion_probability_is( + self, example["probability"]) + compare_pred.i_create_a_local_prediction( + self, example["input_data"]) + compare_pred.the_local_prediction_is( + self, example["prediction"]) + compare_pred.the_local_probability_is( + self, example["probability"]) + + def test_scenario6(self): + """ + Scenario 6: Successfully creating a fusion: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a logistic regression with "" + And I wait until the logistic regression is ready less than secs + And I create a logistic regression with "" + And I wait until the logistic regression is ready less than secs + And I retrieve a list of remote logistic regression tagged with "" + And I create a fusion from a list of models and weights "" + And I wait until the fusion is ready less than secs + When I create a prediction for "" + Then the prediction for "" is "" + And the fusion probability for the prediction is "" + And I create a local fusion prediction for "" + Then the local fusion prediction is "" + And the local fusion probability for the prediction is "" + """ + show_doc(self.test_scenario6) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "fusion_wait", "model_conf1", "model_conf2", "tag", + "input_data", "objective_id", + "prediction", "probability", "fusion_weights"] + examples = [ + ['data/iris.csv', '10', '10', '30', '30', + '{"tags":["my_fusion_6_tag"], "missing_numerics": true}', + '{"tags":["my_fusion_6_tag"], "missing_numerics": false, ' + '"balance_fields": false }', + 'my_fusion_6_tag', + '{"petal width": 1.75, "petal length": 2.45}', + "000004", + "Iris-setosa", + '0.4726', '[1, 2]']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model_with_objective_and_parms( + self, example["objective_id"], example["model_conf1"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_logistic_model_with_objective_and_parms( + self, example["objective_id"], example["model_conf2"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + compare_pred.i_retrieve_a_list_of_remote_logistic_regressions( + self, example["tag"]) + model_create.i_create_a_fusion_with_weights( + self, example["fusion_weights"]) + model_create.the_fusion_is_finished_in_less_than( + self, example["fusion_wait"]) + compare_pred.i_create_a_local_fusion(self) + prediction_create.i_create_a_fusion_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + prediction_create.the_fusion_probability_is( + self, example["probability"]) + compare_pred.i_create_a_local_prediction( + self, example["input_data"]) + compare_pred.the_local_prediction_is(self, example["prediction"]) + compare_pred.the_local_probability_is(self, example["probability"]) diff --git a/bigml/tests/test_40_local_from_file.py b/bigml/tests/test_40_local_from_file.py new file mode 100644 index 00000000..c8311285 --- /dev/null +++ b/bigml/tests/test_40_local_from_file.py @@ -0,0 +1,555 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2018-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating tests for building local models from files + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_model_steps as model_create +from . import create_linear_steps as linear_create +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_ensemble_steps as ensemble_create +from . import create_anomaly_steps as anomaly_create +from . import create_time_series_steps as timeseries_create +from . import create_association_steps as association_create +from . import create_cluster_steps as cluster_create +from . import create_lda_steps as topic_create +from . import compare_predictions_steps as prediction_compare + + +class TestLocalFromFile: + """Testing locally generated code""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario 1: Successfully creating a local model from an exported file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with params "" + And I wait until the model is ready less than secs + And I export the "" model to "" + When I create a local model from the file "" + Then the model ID and the local model ID match + And the prediction for "" is "" + And the number of leaves is "" + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "pmml", "exported_file", "input_data", "prediction", + "model_conf", 'leaves#'] + examples = [ + ['data/iris.csv', '10', '10', '10', False, + './tmp/model.json', {}, "Iris-setosa", '{}', 9], + ['data/iris.csv', '10', '10', '10', False, + './tmp/model_dft.json', {}, "Iris-versicolor", + '{"default_numeric_value": "mean"}', 9]] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model_with(self, example["model_conf"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_export_model( + self, example["pmml"], example["exported_file"]) + model_create.i_create_local_model_from_file( + self, example["exported_file"]) + model_create.check_model_id_local_id(self) + model_create.local_model_prediction_is( + self, example["input_data"], example["prediction"]) + model_create.check_leaves_number(self, example["leaves#"]) + + def test_scenario2(self): + """ + Scenario 2: Successfully creating a local ensemble from an exported file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an ensemble with "" + And I wait until the ensemble is ready less than secs + And I export the ensemble to "" + When I create a local ensemble from the file "" + Then the ensemble ID and the local ensemble ID match + And the prediction for "" is "" + """ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "exported_file", "input_data", "prediction", + "model_conf"] + examples = [ + ['data/iris.csv', '10', '10', '50', './tmp/ensemble.json', + {}, {'probability': 0.35714, 'prediction': 'Iris-versicolor'}, + '{}'], + ['data/iris.csv', '10', '10', '50', './tmp/ensemble_dft.json', + {}, {'probability': 0.98209, 'prediction': 'Iris-versicolor'}, + '{"default_numeric_value": "mean"}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_create.i_create_an_ensemble_with_params( + self, example["model_conf"]) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"]) + ensemble_create.i_export_ensemble(self, example["exported_file"]) + ensemble_create.i_create_local_ensemble_from_file( + self, example["exported_file"]) + ensemble_create.check_ensemble_id_local_id(self) + model_create.local_ensemble_prediction_is( + self, example["input_data"], example["prediction"]) + + def test_scenario3(self): + """ + Scenario 3: Successfully creating a local logistic regression from an exported file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression with "" + And I wait until the logistic regression is ready less than secs + And I export the logistic regression to "" + When I create a local logistic regression from the file "" + Then the logistic regression ID and the local logistic regression ID match + And the prediction for "" is "" + """ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "exported_file", "input_data", "prediction", + "model_conf"] + examples = [ + ['data/iris.csv', '10', '10', '50', './tmp/logistic.json', {}, + 'Iris-versicolor', '{}'], + ['data/iris.csv', '10', '10', '50', './tmp/logistic_dft.json', {}, + 'Iris-virginica', '{"default_numeric_value": "maximum"}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model_with_objective_and_parms( + self, parms=example["model_conf"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_export_logistic_regression( + self, example["exported_file"]) + model_create.i_create_local_logistic_regression_from_file( + self, example["exported_file"]) + model_create.check_logistic_regression_id_local_id(self) + model_create.local_logistic_prediction_is( + self, example["input_data"], example["prediction"]) + + def test_scenario4(self): + """ + Scenario 4: Successfully creating a local deepnet from an exported file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a deepnet with "" + And I wait until the deepnet is ready less than secs + And I export the deepnet to "" + When I create a local deepnet from the file "" + Then the deepnet ID and the local deepnet ID match + And the prediction for "" is "" + """ + show_doc(self.test_scenario4) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "exported_file", "input_data", "prediction", + "model_conf"] + examples = [ + ['data/iris.csv', '10', '10', '500', './tmp/deepnet.json', {}, + 'Iris-versicolor', '{}'], + ['data/iris.csv', '10', '10', '500', './tmp/deepnet_dft.json', {}, + 'Iris-versicolor', '{"default_numeric_value": "maximum"}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_deepnet_with_objective_and_params( + self, parms=example["model_conf"]) + model_create.the_deepnet_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_export_deepnet(self, example["exported_file"]) + model_create.i_create_local_deepnet_from_file( + self, example["exported_file"]) + model_create.check_deepnet_id_local_id(self) + model_create.local_deepnet_prediction_is( + self, example["input_data"], example["prediction"]) + + def test_scenario5(self): + """ + Scenario 5: Successfully creating a local cluster from an exported file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster with "" + And I wait until the cluster is ready less than secs + And I export the cluster to "" + When I create a local cluster from the file "" + Then the cluster ID and the local cluster ID match + And the prediction for "" is "" + """ + show_doc(self.test_scenario5) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "exported_file", "input_data", "prediction", + "model_conf"] + examples = [ + ['data/iris.csv', '10', '10', '500', './tmp/cluster.json', + {"petal length": 2, "petal width": 2, "sepal length": 2, + "sepal width": 2, "species": "Iris-setosa"}, + {'centroid_id': '000007', 'centroid_name': 'Cluster 7', + 'distance': 0.7340597799442431}, '{}'], + ['data/iris.csv', '10', '10', '500', './tmp/cluster_dft.json', {}, + {'centroid_id': '000005', 'centroid_name': 'Cluster 5', + 'distance': 0.502695797586787}, + '{"default_numeric_value": "maximum"}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + cluster_create.i_create_a_cluster_with_options( + self, example["model_conf"]) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["model_wait"]) + cluster_create.i_export_cluster(self, example["exported_file"]) + cluster_create.i_create_local_cluster_from_file( + self, example["exported_file"]) + cluster_create.check_cluster_id_local_id(self) + model_create.local_cluster_prediction_is( + self, example["input_data"], example["prediction"]) + + def test_scenario6(self): + """ + Scenario 6: Successfully creating a local anomaly from an exported file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an anomaly with "" + And I wait until the anomaly is ready less than secs + And I export the anomaly to "" + When I create a local anomaly from the file "" + Then the anomaly ID and the local anomaly ID match + And the prediction for "" is "" + """ + show_doc(self.test_scenario6) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "exported_file", "input_data", "prediction", + "model_conf"] + examples = [ + ['data/iris.csv', '10', '10', '500', './tmp/anomaly.json', + {"petal length": 2, "petal width": 2, "sepal length": 2, + "sepal width": 2, "species": "Iris-setosa"}, + 0.64387, '{}'], + ['data/iris.csv', '10', '10', '500', + './tmp/anomaly_dft.json', {}, 0.77699, + '{"default_numeric_value": "maximum"}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + anomaly_create.i_create_an_anomaly_with_params( + self, example["model_conf"]) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"]) + anomaly_create.i_export_anomaly(self, example["exported_file"]) + anomaly_create.i_create_local_anomaly_from_file( + self, example["exported_file"]) + anomaly_create.check_anomaly_id_local_id(self) + model_create.local_anomaly_prediction_is( + self, example["input_data"], example["prediction"]) + + def test_scenario7(self): + """ + Scenario 7: Successfully creating a local association from an exported file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an association with "" + And I wait until the association is ready less than secs + And I export the association to "" + When I create a local association from the file "" + Then the association ID and the local association ID match + And the prediction for "" is "" + """ + show_doc(self.test_scenario7) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "exported_file", "input_data", "prediction", "model_conf"] + examples = [ + ['data/iris.csv', '10', '10', '500', './tmp/association.json', {}, + [], '{}'], + ['data/iris.csv', '10', '10', '500', './tmp/association_dft.json', + {}, [{'score': 0.12, 'rules': ['00000d'], 'item': { + 'complement': False, 'count': 50, 'field_id': '000004', + 'name': 'Iris-versicolor'}}], + '{"default_numeric_value": "mean"}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + association_create.i_create_an_association_from_dataset_with_params( + self, example["model_conf"]) + association_create.the_association_is_finished_in_less_than( + self, example["model_wait"]) + association_create.i_export_association( + self, example["exported_file"]) + association_create.i_create_local_association_from_file( + self, example["exported_file"]) + association_create.check_association_id_local_id(self) + model_create.local_association_prediction_is( + self, example["input_data"], example["prediction"]) + + def test_scenario8(self): + """ + Scenario 8: Successfully creating a local topic model from an exported file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a topic model + And I wait until the topic model is ready less than secs + And I export the topic model to "" + When I create a local topic model from the file "" + Then the topic model ID and the local topic model ID match + """ + show_doc(self.test_scenario8) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "exported_file", "source_conf"] + examples = [ + ['data/spam.csv', '10', '10', '500', './tmp/topic_model.json', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) + source_create.i_update_source_with(self, example["source_conf"]) + source_create.the_source_is_finished(self, example["source_wait"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + topic_create.i_create_a_topic_model(self) + topic_create.the_topic_model_is_finished_in_less_than( + self, example["model_wait"]) + topic_create.i_export_topic_model( + self, example["exported_file"]) + topic_create.i_create_local_topic_model_from_file( + self, example["exported_file"]) + topic_create.check_topic_model_id_local_id(self) + + def test_scenario9(self): + """ + Scenario 9: Successfully creating a local time series from an exported file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a time series with "" + And I wait until the time series is ready less than secs + And I export the time series to "" + When I create a local time series from the file "" + Then the time series ID and the local time series ID match + """ + show_doc(self.test_scenario9) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "exported_file"] + examples = [ + ['data/iris.csv', '10', '10', '500', './tmp/time_series.json']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished(self, example["source_wait"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + timeseries_create.i_create_a_time_series(self) + timeseries_create.the_time_series_is_finished_in_less_than( + self, example["model_wait"]) + timeseries_create.i_export_time_series( + self, example["exported_file"]) + timeseries_create.i_create_local_time_series_from_file( + self, example["exported_file"]) + timeseries_create.check_time_series_id_local_id(self) + + def test_scenario10(self): + """ + Scenario 10: Successfully creating a local fusion from an exported file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I create a model with "" + And I wait until the model is ready less than secs + And I retrieve a list of remote models tagged with "" + And I create a fusion from a list of models + And I wait until the fusion is ready less than secs + And I export the fusion to "" + When I create a local fusion from the file "" + Then the fusion ID and the local fusion ID match + """ + show_doc(self.test_scenario10) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "exported_file", "tag"] + examples = [ + ['data/iris.csv', '10', '10', '50', './tmp/fusion.json', + 'my_fusion_tag']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + tag = example["tag"] + tag_args = '{"tags":["%s"]}' % tag + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model_with(self, tag_args) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with(self, tag_args) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_create_a_model_with(self, tag_args) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_retrieve_a_list_of_remote_models(self, tag) + model_create.i_create_a_fusion(self) + model_create.the_fusion_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_export_fusion(self, example["exported_file"]) + model_create.i_create_local_fusion_from_file( + self, example["exported_file"]) + model_create.check_fusion_id_local_id(self) + + def test_scenario11(self): + """ + Scenario 11: Successfully creating a local linear regression from an exported file: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a linear regression with "" + And I wait until the linear regression is ready less than secs + And I export the linear regression to "" + When I create a local linear regression from the file "" + Then the linear regression ID and the local linear regression ID match + And the prediction for "" is "" + """ + show_doc(self.test_scenario11) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "exported_file", "input_data", "prediction", "model_conf"] + examples = [ + ['data/grades.csv', '20', '20', '50', './tmp/linear.json', + {"Prefix": 5, "Assignment": 57.14, "Tutorial": 34.09, + "Midterm": 64, "TakeHome": 40, "Final": 50}, 54.69551, + '{}'], + ['data/grades.csv', '20', '20', '50', './tmp/linear_dft.json', {}, + 100.33246, '{"default_numeric_value": "maximum"}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + linear_create.i_create_a_linear_regression_with_objective_and_params( + self, params=example["model_conf"]) + linear_create.the_linear_regression_is_finished_in_less_than( + self, example["model_wait"]) + model_create.i_export_linear_regression( + self, example["exported_file"]) + model_create.i_create_local_linear_regression_from_file( + self, example["exported_file"]) + model_create.check_linear_regression_id_local_id(self) + model_create.local_linear_prediction_is( + self, example["input_data"], example["prediction"]) diff --git a/bigml/tests/test_41_multidataset.py b/bigml/tests/test_41_multidataset.py new file mode 100644 index 00000000..e0c8f1b3 --- /dev/null +++ b/bigml/tests/test_41_multidataset.py @@ -0,0 +1,155 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2018-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating a sampled multidataset + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create + +class TestMultiDataset: + """Test datasets and multidatasets""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating a sampled multi-dataset: + Given I create a data source with "" uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a multi-dataset with sample rates + And I wait until the multi-dataset is ready less than secs + When I compare the datasets' instances + Then the proportion of instances between datasets is + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "rate", + "rates"] + examples = [ + ['data/iris.csv', '50', '50', '0.5', '[0.2, 0.3]']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file_with_args( + self, example["data"], '{}') + source_create.the_source_is_finished( + self, example["source_wait"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_create.i_create_a_multidataset( + self, example["rates"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_create.i_compare_datasets_instances(self) + dataset_create.proportion_datasets_instances( + self, example["rate"]) + + + def test_scenario2(self): + """ + Scenario: Successfully creating a single dataset multi-dataset: + Given I create a data source with "" uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a multi-dataset with sample rates + And I wait until the multi-dataset is ready less than secs + When I compare the datasets' instances + Then the proportion of instances between datasets is + """ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "rate", + "rates"] + examples = [ + ['data/iris.csv', '50', '50', '0.2', '[0.2]']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file_with_args( + self, example["data"], '{}') + source_create.the_source_is_finished( + self, example["source_wait"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_create.i_create_a_multidataset( + self, example["rates"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_create.i_compare_datasets_instances(self) + dataset_create.proportion_datasets_instances( + self, example["rate"]) + + def test_scenario3(self): + """ + Scenario: Successfully creating a sampled multi-dataset with sample: + Given I create a data source with "" uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a multi-dataset with same dataset and the first sample rate + And I wait until the multi-dataset is ready less than secs + When I compare the datasets' instances + Then the proportion of instances between datasets is + """ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "rate", + "rates"] + examples = [ + ['data/iris.csv', '50', '50', '1.3', '[1, 0.3]']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file_with_args( + self, example["data"], '{}') + source_create.the_source_is_finished( + self, example["source_wait"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_create.i_create_a_multidataset_mixed_format( + self, example["rates"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + dataset_create.i_compare_datasets_instances(self) + dataset_create.proportion_datasets_instances( + self, example["rate"]) diff --git a/bigml/tests/test_42_pca.py b/bigml/tests/test_42_pca.py new file mode 100644 index 00000000..706305bf --- /dev/null +++ b/bigml/tests/test_42_pca.py @@ -0,0 +1,163 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2018-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating PCA + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_pca_steps as pca_create +from . import create_projection_steps as projection_create +from . import create_batch_projection_steps as batch_proj_create + +class TestPCA: + """Testing PCA methods""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating a PCA from a dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a PCA from a dataset + And I wait until the PCA is ready less than secs + And I update the PCA name to "" + When I wait until the PCA is ready less than secs + Then the PCA name is "" + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "pca_name"] + examples = [ + ['data/iris.csv', '10', '10', '40', 'my new pca name']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + pca_create.i_create_a_pca_from_dataset(self) + pca_create.the_pca_is_finished_in_less_than( + self, example["model_wait"]) + pca_create.i_update_pca_name(self, example["pca_name"]) + pca_create.the_pca_is_finished_in_less_than( + self, example["model_wait"]) + pca_create.i_check_pca_name(self, example["pca_name"]) + + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + + def test_scenario2(self): + """ + Scenario: Successfully creating a projection: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a pca + And I wait until the pca is ready less than secs + When I create a projection for "" + Then the projection is "" + """ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "projection"] + examples = [ + ['data/iris.csv', '30', '30', '50', '{"petal width": 0.5}', + '{"PC2": 0.1593, "PC3": -0.01286, "PC1": 0.91648, ' + '"PC6": 0.27284, "PC4": 1.29255, "PC5": 0.75196}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + pca_create.i_create_a_pca(self) + pca_create.the_pca_is_finished_in_less_than( + self, example["model_wait"]) + projection_create.i_create_a_projection( + self, example["input_data"]) + projection_create.the_projection_is( + self, example["projection"]) + + def test_scenario3(self): + """ + Scenario: Successfully creating a batch projection: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a pca + And I wait until the pca is ready less than secs + When I create a batch projection for the dataset with the pca + And I wait until the batch projection is ready less than secs + And I download the created projections file to "" + Then the batch projection file is like "" + """ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "batch_wait", "local_file", "projections_file"] + examples = [ + ['data/iris.csv', '30', '30', '50', '50', + 'tmp/batch_projections.csv', 'data/batch_projections.csv']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + pca_create.i_create_a_pca(self) + pca_create.the_pca_is_finished_in_less_than( + self, example["model_wait"]) + batch_proj_create.i_create_a_batch_projection(self) + batch_proj_create.the_batch_projection_is_finished_in_less_than( + self, example["batch_wait"]) + batch_proj_create.i_download_projections_file( + self, example["local_file"]) + batch_proj_create.i_check_projections( + self, example["projections_file"]) diff --git a/bigml/tests/test_43_linear.py b/bigml/tests/test_43_linear.py new file mode 100644 index 00000000..a9a20ecb --- /dev/null +++ b/bigml/tests/test_43_linear.py @@ -0,0 +1,173 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2019-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating Linear Regression + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_linear_steps as linear_create +from . import create_prediction_steps as prediction_create +from . import create_batch_prediction_steps as batch_pred_create + +class TestLinearRegression: + """Testing Linear Regression methods""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating a linear regression from a dataset: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a linear regression from a dataset + And I wait until the linear regression is ready less than secs + And I update the linear regression name to "" + When I wait until the linear regression is ready less than secs + Then the linear regression name is "" + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "linear_name"] + examples = [ + ['data/grades.csv', '100', '100', '200', 'my new linear regression name']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + linear_create.i_create_a_linear_regression_from_dataset(self) + linear_create.the_linear_regression_is_finished_in_less_than( + self, example["model_wait"]) + linear_create.i_update_linear_regression_name( + self, example["linear_name"]) + linear_create.the_linear_regression_is_finished_in_less_than( + self, example["model_wait"]) + linear_create.i_check_linear_name(self, example["linear_name"]) + + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + + def test_scenario2(self): + """ + Scenario: Successfully creating a prediction from linear regression: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a pca + And I wait until the linear regression is ready less than secs + When I create a prediction for "" + Then the prediction is "" + """ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "model_conf"] + examples = [ + ['data/grades.csv', '30', '30', '50', + '{"000000": 0.5, "000001": 1, "000002": 1, "000003": 1}', + "000005", '2.27312', '{}'], + ['data/grades.csv', '30', '30', '50', + '{"000000": 0.5, "000001": 1, "000002": 1, "000003": 1}', + "000005", '8.19619', '{"bias": false}'], + ['data/dates.csv', '30', '30', '30', + '{"test-num1": 23, "test-num2" : 54, "test-date.day-of-month":2, ' + '"test-date.month":12, "test-date.day-of-week": 2, ' + '"test-date.year": 2012}', "000003", '48.27679', + '{"bias": false}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + linear_create.i_create_a_linear_regression_with_objective_and_params( + self, example["objective_id"], example["model_conf"]) + linear_create.the_linear_regression_is_finished_in_less_than( + self, example["model_wait"]) + prediction_create.i_create_a_linear_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + + def test_scenario3(self): + """ + Scenario: Successfully creating a batch prediction from a linear regression: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a linear regression + And I wait until the linear regression is ready less than secs + When I create a batch prediction for the dataset with the linear regression + And I wait until the batch predictin is ready less than secs + And I download the created predictions file to "" + Then the batch prediction file is like "" + """ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "batch_wait", "local_file", "predictions_file"] + examples = [ + ['data/grades.csv', '30', '30', '50', '50', + 'tmp/batch_predictions.csv', 'data/batch_predictions_linear.csv']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + linear_create.i_create_a_linear_regression_from_dataset(self) + linear_create.the_linear_regression_is_finished_in_less_than( + self, example["model_wait"]) + batch_pred_create.i_create_a_linear_batch_prediction(self) + batch_pred_create.the_batch_prediction_is_finished_in_less_than( + self, example["batch_wait"]) + batch_pred_create.i_download_predictions_file( + self, example["local_file"]) + batch_pred_create.i_check_predictions( + self, example["predictions_file"]) diff --git a/bigml/tests/test_44_compare_predictions.py b/bigml/tests/test_44_compare_predictions.py new file mode 100644 index 00000000..c50a6350 --- /dev/null +++ b/bigml/tests/test_44_compare_predictions.py @@ -0,0 +1,442 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +""" Comparing remote and local predictions + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_association_steps as association_create +from . import create_cluster_steps as cluster_create +from . import create_anomaly_steps as anomaly_create +from . import create_prediction_steps as prediction_create +from . import compare_predictions_steps as prediction_compare + + +class TestComparePrediction: + """Test local and remote predictions""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input for anomaly detectors + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an anomaly detector + And I wait until the anomaly detector is ready less + than secs + And I create a local anomaly detector + And I enable the pre-modeling pipeline + When I create an anomaly score for "" + Then the anomaly score is "" + And I create a local anomaly score for "" + Then the local anomaly score is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "score"] + examples = [ + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1910-05-08T19:10:23.106","cat-0":"cat2","target-2":0.4}', + 0.52477], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1920-06-30T20:21:20.320","cat-0":"cat1","target-2":0.2}', + 0.50654]] + show_doc(self.test_scenario1, examples) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + anomaly_create.i_create_an_anomaly(self) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_anomaly(self, pre_model=True) + prediction_create.i_create_an_anomaly_score( + self, example["input_data"]) + prediction_create.the_anomaly_score_is( + self, example["score"]) + prediction_compare.i_create_a_local_anomaly_score( + self, example["input_data"], pre_model=self.bigml["local_pipeline"]) + prediction_compare.the_local_anomaly_score_is( + self, example["score"]) + + def test_scenario1b(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input for anomaly detectors + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an anomaly detector + And I wait until the anomaly detector is ready less + than secs + And I create a local anomaly detector + When I create an anomaly score for "" + Then the anomaly score is "" + And I create a local anomaly score for "" + Then the local anomaly score is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "score"] + examples = [ + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1932-01-30T19:24:11.440","cat-0":"cat2","target-2":0.1}', + 0.54343], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1950-11-06T05:34:05.602","cat-0":"cat1" ,"target-2":0.9}', + 0.5202]] + show_doc(self.test_scenario1b) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + anomaly_create.i_create_an_anomaly(self, shared=example["data"]) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_anomaly(self, pre_model=True) + prediction_create.i_create_an_anomaly_score( + self, example["input_data"]) + prediction_create.the_anomaly_score_is( + self, example["score"]) + prediction_compare.i_create_a_local_anomaly_score( + self, example["input_data"], pre_model=self.bigml["local_pipeline"]) + prediction_compare.the_local_anomaly_score_is( + self, example["score"]) + + + def test_scenario1b_a(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input for anomaly detectors + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an anomaly detector + And I wait until the anomaly detector is ready less + than secs + And I create a local anomaly detector + When I create an anomaly score for "" + Then the anomaly score is "" + And I create a local anomaly score for "" + Then the local anomaly score is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "score"] + examples = [ + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1969-7-14 17:36","cat-0":"cat2","target-2":0.9}', + 0.93639]] + show_doc(self.test_scenario1b_a) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + anomaly_create.i_create_an_anomaly(self, shared=example["data"]) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_anomaly(self, pre_model=True) + prediction_create.i_create_an_anomaly_score( + self, example["input_data"]) + prediction_create.the_anomaly_score_is( + self, example["score"]) + prediction_compare.i_create_a_local_anomaly_score( + self, example["input_data"], pre_model=self.bigml["local_pipeline"]) + prediction_compare.the_local_anomaly_score_is( + self, example["score"]) + + def test_scenario1c(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input for anomaly detectors + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an anomaly detector + And I wait until the anomaly detector is ready less + than secs + And I create a local anomaly detector + When I create an anomaly score for "" + Then the anomaly score is "" + And I create a local anomaly score for "" + Then the local anomaly score is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "score"] + examples = [ + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"2001-01-05T23:04:04.693","cat-0":"cat2","target-2":0.01}', + 0.54911], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"2011-04-01T00:16:45.747","cat-0":"cat2","target-2":0.32}', + 0.52477]] + show_doc(self.test_scenario1c) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + anomaly_create.i_create_an_anomaly(self, shared=example["data"]) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_anomaly(self, pre_model=True) + prediction_create.i_create_an_anomaly_score( + self, example["input_data"]) + prediction_create.the_anomaly_score_is(self, example["score"]) + prediction_compare.i_create_a_local_anomaly_score( + self, example["input_data"], pre_model=self.bigml["local_pipeline"]) + prediction_compare.the_local_anomaly_score_is( + self, example["score"]) + + def test_scenario1c_a(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input for anomaly detectors + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an anomaly detector + And I wait until the anomaly detector is ready less + than secs + And I create a local anomaly detector + When I create an anomaly score for "" + Then the anomaly score is "" + And I create a local anomaly score for "" + Then the local anomaly score is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "score"] + examples = [ + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1969-W29-1T17:36:39Z","cat-0":"cat1","target-2":0.87}', + 0.93678], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"Mon Jul 14 17:36 +0000 1969","cat-0":"cat1","target-2":0}', + 0.93717]] + show_doc(self.test_scenario1c_a) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + anomaly_create.i_create_an_anomaly(self, shared=example["data"]) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_anomaly(self, pre_model=True) + prediction_create.i_create_an_anomaly_score( + self, example["input_data"]) + prediction_create.the_anomaly_score_is( + self, example["score"]) + prediction_compare.i_create_a_local_anomaly_score( + self, example["input_data"], pre_model=self.bigml["local_pipeline"]) + prediction_compare.the_local_anomaly_score_is( + self, example["score"]) + + def test_scenario2(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input for cluster + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster + And I wait until the cluster is ready less than secs + And I create a local cluster + When I create a centroid for "" + Then the centroid is "" with distance "" + And I create a local centroid for "" + Then the local centroid is "" with + distance "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "centroid", "distance"] + examples = [ + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1910-05-08T19:10:23.106","cat-0":"cat2","target-2":0.4}', + "Cluster 2", 0.92112], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1920-06-30T20:21:20.320","cat-0":"cat1","target-2":0.2}', + "Cluster 3", 0.77389]] + show_doc(self.test_scenario2) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + cluster_create.i_create_a_cluster(self, shared=example["data"]) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_cluster(self, pre_model=True) + prediction_create.i_create_a_centroid( + self, example["input_data"]) + prediction_create.the_centroid_is_with_distance( + self, example["centroid"], example["distance"]) + prediction_compare.i_create_a_local_centroid( + self, example["input_data"], pre_model=self.bigml["local_pipeline"]) + prediction_compare.the_local_centroid_is( + self, example["centroid"], example["distance"]) + + def test_scenario2_a(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input for cluster + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster + And I wait until the cluster is ready less than secs + And I create a local cluster + When I create a centroid for "" + Then the centroid is "" with distance "" + And I create a local centroid for "" + Then the local centroid is "" with + distance "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "centroid", "distance"] + examples = [ + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1932-01-30T19:24:11.440","cat-0":"cat2","target-2":0.1}', + "Cluster 0", 0.87855], + ['data/dates2.csv', '20', '30', '60', + '{"time-1":"1950-11-06T05:34:05.602","cat-0":"cat1" ,"target-2":0.9}', + "Cluster 6", 0.83506]] + show_doc(self.test_scenario2_a) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + cluster_create.i_create_a_cluster(self, shared=example["data"]) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["model_wait"]) + prediction_compare.i_create_a_local_cluster(self, pre_model=True) + prediction_create.i_create_a_centroid( + self, example["input_data"]) + prediction_create.the_centroid_is_with_distance( + self, example["centroid"], example["distance"]) + prediction_compare.i_create_a_local_centroid( + self, example["input_data"], pre_model=self.bigml["local_pipeline"]) + prediction_compare.the_local_centroid_is( + self, example["centroid"], example["distance"]) + + def test_scenario3(self): + """ + Scenario: Successfully comparing association sets: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I update the source with params "" + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the association is ready less than secs + And I create a local association + When I create an association set for "" + Then the association set is like the contents of + "" + And I create a local association set for "" + Then the local association set is like the contents of + "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "association_set_file"] + examples = [['data/dates2.csv', '20', '30', '80', '{"target-2": -1}', + 'data/associations/association_set2.json']] + show_doc(self.test_scenario3) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + association_create.i_create_an_association_from_dataset( + self, shared=example["data"]) + association_create.the_association_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + prediction_compare.i_create_a_local_association( + self, pre_model=True) + prediction_create.i_create_an_association_set( + self, example["input_data"]) + prediction_compare.the_association_set_is_like_file( + self, example["association_set_file"]) + prediction_compare.i_create_a_local_association_set( + self, example["input_data"], pre_model=self.bigml["local_pipeline"]) + prediction_compare.the_local_association_set_is_like_file( + self, example["association_set_file"]) diff --git a/bigml/tests/test_45_external_connector.py b/bigml/tests/test_45_external_connector.py new file mode 100644 index 00000000..deac2c94 --- /dev/null +++ b/bigml/tests/test_45_external_connector.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2018-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating external connectors + +""" +import json + +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_external_steps as connector_create + +class TestExternalConnector: + """Testing external connector creation""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating an external connector: + Given I create an external connector from environment vars + And I wait until the external connector is ready less than secs + And I update the external connector with args + And the external connector has arguments + """ + show_doc(self.test_scenario1) + headers = ["conn_wait", "args"] + examples = [ + ['20', '{"name": "my connector name"}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + connector_create.i_create_external_connector(self) + connector_create.the_external_connector_is_finished( + self, example["conn_wait"]) + connector_create.i_update_external_connector_with( + self, example["args"]) + connector_create.the_external_connector_is_finished( + self, example["conn_wait"]) + connector_create.external_connector_has_args( + example["args"]) diff --git a/bigml/tests/test_46_model_cloning.py b/bigml/tests/test_46_model_cloning.py new file mode 100644 index 00000000..70c32743 --- /dev/null +++ b/bigml/tests/test_46_model_cloning.py @@ -0,0 +1,426 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2020 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating clones for models + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_model_steps as model_create +from . import create_ensemble_steps as ensemble_create +from . import create_linear_steps as linear_create +from . import create_cluster_steps as cluster_create +from . import create_lda_steps as topic_create +from . import create_anomaly_steps as anomaly_create +from . import create_association_steps as association_create +from . import create_time_series_steps as time_create +from . import create_pca_steps as pca_create + + +class TestCloning: + """Testing cloned resources creation""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Successfully creating a clone from a model: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a model + And I wait until the model is ready less than secs + And I clone the model + Then the origin model is the previous model + And I share and clone the shared model + Then the origin model is the previous model + + """ + show_doc(self.test_scenario1) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] + examples = [ + ['data/iris.csv', '10', '10', '10']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_model(self, shared=example["data"]) + model_create.the_model_is_finished_in_less_than( + self, example["model_wait"]) + model = world.model["resource"] + model_create.make_the_model_shared(self, cloneable=True) + model_hash = "shared/model/%s" % world.model["shared_hash"] + model_create.clone_model(self, model) + model_create.the_cloned_model_is(self, model) + model_create.clone_model(self, model_hash) + model_create.the_cloned_model_is(self, model) + + def test_scenario2(self): + """ + Scenario: Successfully creating a clone from a ensemble: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an ensemble + And I wait until the ensemble is ready less than secs + Then the origin ensemble is the previous ensemble + """ + show_doc(self.test_scenario2) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] + examples = [ + ['data/iris.csv', '10', '10', '30']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + ensemble_create.i_create_an_ensemble(self, shared=example["data"]) + ensemble_create.the_ensemble_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + ensemble = world.ensemble["resource"] + ensemble_create.clone_ensemble(self, ensemble) + ensemble_create.the_cloned_ensemble_is(self, ensemble) + + def test_scenario3(self): + """ + Scenario: Successfully creating a clone from a deepnet: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a quick deepnet + And I wait until the deepnet is ready less than secs + Then the origin deepnet is the previous deepnet + """ + show_doc(self.test_scenario3) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] + examples = [ + ['data/iris.csv', '10', '10', '100']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_quick_deepnet(self) + model_create.the_deepnet_is_finished_in_less_than( + self, example["model_wait"]) + deepnet = world.deepnet["resource"] + model_create.clone_deepnet(self, deepnet) + model_create.the_cloned_deepnet_is(self, deepnet) + + def test_scenario4(self): + """ + Scenario: Successfully creating a clone from a logistic regression: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a logistic regression + And I wait until the logistic regression is ready less than secs + Then the origin logistic regression is the previous logistic regression + """ + show_doc(self.test_scenario4) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] + examples = [ + ['data/iris.csv', '10', '10', '30']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + model_create.i_create_a_logistic_model(self, shared=example["data"]) + model_create.the_logistic_model_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + logistic_regression = world.logistic_regression["resource"] + model_create.clone_logistic_regression(self, logistic_regression) + model_create.the_cloned_logistic_regression_is( + self, logistic_regression) + + def test_scenario5(self): + """ + Scenario: Successfully creating a clone from a linear regression: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a linear regression + And I wait until the linear regression is ready less than secs + Then the origin linear regression is the previous linear regression + """ + show_doc(self.test_scenario5) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] + examples = [ + ['data/iris.csv', '10', '10', '30']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file(self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + linear_create.i_create_a_linear_regression_from_dataset( + self, shared=example["data"]) + linear_create.the_linear_regression_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + linear_regression = world.linear_regression["resource"] + linear_create.clone_linear_regression(self, linear_regression) + linear_create.the_cloned_linear_regression_is( + self, linear_regression) + + def test_scenario6(self): + """ + Scenario: Successfully creating a clone from a cluster: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a cluster + And I wait until the cluster is ready less than secs + Then the origin cluster is the previous cluster + """ + show_doc(self.test_scenario6) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] + examples = [ + ['data/iris.csv', '10', '10', '30']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + cluster_create.i_create_a_cluster(self, shared=example["data"]) + cluster_create.the_cluster_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + cluster = world.cluster["resource"] + cluster_create.clone_cluster(self, cluster) + cluster_create.the_cloned_cluster_is( + self, cluster) + + def test_scenario7(self): + """ + Scenario: Successfully creating a clone from a topic model: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a topic model + And I wait until the topic model is ready less than secs + Then the origin topic model is the previous topic model + """ + show_doc(self.test_scenario7) + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "source_conf"] + examples = [ + ['data/spam.csv', '10', '10', '100', '{"fields": {"000001": {"optype": "text", "term_analysis": {"case_sensitive": true, "stem_words": true, "use_stopwords": false, "language": "en"}}}}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"]) + source_create.i_update_source_with( + self, example["source_conf"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + topic_create.i_create_a_topic_model(self) + topic_create.the_topic_model_is_finished_in_less_than( + self, example["model_wait"]) + topic_model = world.topic_model["resource"] + topic_create.clone_topic_model(self, topic_model) + topic_create.the_cloned_topic_model_is( + self, topic_model) + + + def test_scenario8(self): + """ + Scenario: Successfully creating a clone from an anomaly detector: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an anomaly detector + And I wait until the anomaly detector is ready less than secs + Then the origin anomaly detector is the previous anomaly detector + """ + show_doc(self.test_scenario8) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] + examples = [ + ['data/iris.csv', '10', '10', '100']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + anomaly_create.i_create_an_anomaly(self, shared=example["data"]) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + anomaly = world.anomaly["resource"] + anomaly_create.clone_anomaly(self, anomaly) + anomaly_create.the_cloned_anomaly_is( + self, anomaly) + + def test_scenario9(self): + """ + Scenario: Successfully creating a clone from an association: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an association + And I wait until the association is ready less than secs + Then the origin association is the previous association + """ + show_doc(self.test_scenario9) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] + examples = [ + ['data/iris.csv', '10', '10', '100']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + association_create.i_create_an_association_from_dataset( + self, shared=example["data"]) + association_create.the_association_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + association = world.association["resource"] + association_create.clone_association(self, association) + association_create.the_cloned_association_is( + self, association) + + def test_scenario10(self): + """ + Scenario: Successfully creating a clone from a time series: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a time series + And I wait until the time series is ready less than secs + Then the origin time series is the previous time series + """ + show_doc(self.test_scenario10) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] + examples = [ + ['data/iris.csv', '10', '10', '100']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + time_create.i_create_a_time_series(self) + time_create.the_time_series_is_finished_in_less_than( + self, example["model_wait"]) + time_series = world.time_series["resource"] + time_create.clone_time_series(self, time_series) + time_create.the_cloned_time_series_is( + self, time_series) + + def test_scenario11(self): + """ + Scenario: Successfully creating a clone from a pca: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a pca + And I wait until the pca is ready less than secs + Then the origin pca is the previous pca + """ + show_doc(self.test_scenario11) + headers = ["data", "source_wait", "dataset_wait", "model_wait"] + examples = [ + ['data/iris.csv', '10', '10', '100']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + pca_create.i_create_a_pca(self, shared=example["data"]) + pca_create.the_pca_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + pca = world.pca["resource"] + pca_create.clone_pca(self, pca) + pca_create.the_cloned_pca_is(self, pca) diff --git a/bigml/tests/test_47_webhooks.py b/bigml/tests/test_47_webhooks.py new file mode 100644 index 00000000..3206f0ef --- /dev/null +++ b/bigml/tests/test_47_webhooks.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import,invalid-name +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Checking webhooks secrets + +""" +import json + +from collections import OrderedDict +from bigml.webhooks import check_signature + +from .world import world, setup_module, teardown_module, show_doc, \ + show_method, ok_ + + +BIGML_SECRET = 'mysecret' + +BIGML_REQUEST_MOCKUP = { + "body": { + 'event': 'finished', + 'message': 'The source has been created', + 'resource': 'source/627eceb1d432eb7338001d4b', + 'timestamp': '2022-05-13 21:33:39 GMT' + }, + "META": { + 'HTTP_X_BIGML_SIGNATURE': "sha1=af38d979e8582d678653a8059ca0821daeedebbd" + } +} + + +class RequestMockup: + """Test for webhooks with secrets""" + + def __init__(self, request_dict): + self.body = json.dumps(request_dict["body"], sort_keys=True) + self.meta = request_dict["META"] + + +class TestWebhook: + """Testing webhooks""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario: Testing webhook secret signature + """ + show_doc(self.test_scenario1) + ok_(check_signature(RequestMockup(BIGML_REQUEST_MOCKUP), + BIGML_SECRET)) diff --git a/bigml/tests/test_48_local_dataset.py b/bigml/tests/test_48_local_dataset.py new file mode 100644 index 00000000..eabd52f1 --- /dev/null +++ b/bigml/tests/test_48_local_dataset.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Testing local dataset transformations + +""" +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import compare_dataset_steps as dataset_compare + + +class TestLocalDataset: + """Testing Local class for datasets""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario 1: Successfully creating a transformation from a local dataset in a json file: + Given I create a local dataset from a "" file + Then the transformed data for "" is "" + """ + show_doc(self.test_scenario1) + headers = ["dataset_file", "input_data", "output_data"] + examples = [ + ['bigml/tests/my_dataset/my_flatline_ds.json', + '{"plasma glucose": 120, "age": 30, "bmi": 46}', + '{"plasma glucose": 120, "age": 30, "glucose half": 60}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + dataset_compare.i_create_a_local_dataset_from_file( + self, example["dataset_file"]) + dataset_compare.the_transformed_data_is( + self, example["input_data"], example["output_data"]) diff --git a/bigml/tests/test_49_local_pipeline.py b/bigml/tests/test_49_local_pipeline.py new file mode 100644 index 00000000..651a87a3 --- /dev/null +++ b/bigml/tests/test_49_local_pipeline.py @@ -0,0 +1,382 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Testing local dataset transformations + +""" +import os +import json + +from .world import world, setup_module, teardown_module, show_doc, \ + show_method +from . import compare_pipeline_steps as pipeline_compare +from . import create_source_steps as source_create +from . import create_dataset_steps as dataset_create +from . import create_anomaly_steps as anomaly_create +from . import create_model_steps as model_create +from . import create_ensemble_steps as ensemble_create +from . import create_linear_steps as linear_create +from . import create_prediction_steps as prediction_create +from . import compare_predictions_steps as prediction_compare + + +class TestLocalPipeline: + """Testing local Pipeline methods""" + + def setup_method(self, method): + """ + Debug information + """ + self.bigml = {} + self.bigml["method"] = method.__name__ + print("\n-------------------\nTests in: %s\n" % __name__) + + def teardown_method(self): + """ + Debug information + """ + print("\nEnd of tests in: %s\n-------------------\n" % __name__) + self.bigml = {} + + def test_scenario1(self): + """ + Scenario 1: Successfully creating a local pipeline from a model and anomaly detector: + Given I expand the zip file "" that contain "" + And I create a local pipeline for "" named "" + Then the transformed data for "" is "" + """ + show_doc(self.test_scenario1) + headers = ["pipeline_file", "models_list", "name", "input_data", + "output_data"] + examples = [ + ['bigml/tests/pipeline3.zip', + '["anomaly/631a6a968f679a2d2d000319",' + ' "model/631a6a6f8f679a2d31000445"]', + "pipeline3", + '{"plasma glucose": 120, "age": 30, "bmi": 46}', + '{"plasma glucose": 120, "age": 30, "glucose half": 60,' + ' "age_range": "2nd third", "bmi": 46,' + ' "score": 0.85456,' + ' "prediction": "false", "probability": 0.6586746586746587}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + pipeline_compare.i_expand_file_with_models_list( + self, example["pipeline_file"], example["models_list"]) + pipeline_compare.i_create_a_local_pipeline_from_models_list( + self, example["models_list"], example["name"], + storage=os.path.splitext(example["pipeline_file"])[0]) + pipeline_compare.the_pipeline_transformed_data_is( + self, example["input_data"], example["output_data"]) + + def test_scenario2(self): + """ + Scenario 2: Successfully creating a local pipeline from two BMLPipelines + Given I expand the zip file "" that contain "" + And I create a local pipeline for "" named "" + And I create a local pipeline for "" named "" + And I create a local pipeline "" for both pipelines + Then the transformed data for "" is "" + """ + show_doc(self.test_scenario2) + headers = ["pipeline_file", "models_list", "model1", "name1", + "model2", "name2", "name", "input_data", "output_data"] + examples = [ + ['bigml/tests/pipeline3.zip', + '["anomaly/631a6a968f679a2d2d000319",' + ' "model/631a6a6f8f679a2d31000445"]', + '["model/631a6a6f8f679a2d31000445"]', + "pipeline1", + '["anomaly/631a6a968f679a2d2d000319"]', + "pipeline2", + "pipeline3", + '{"plasma glucose": 120, "age": 30, "bmi": 46}', + '{"plasma glucose": 120, "age": 30, "glucose half": 60,' + ' "age_range": "2nd third", "bmi": 46,' + ' "score": 0.85456,' + ' "prediction": "false", "probability": 0.6586746586746587}']] + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + pipeline_compare.i_expand_file_with_models_list( + self, example["pipeline_file"], example["models_list"]) + pipe1 = pipeline_compare.i_create_a_local_pipeline_from_models_list( + self, example["model1"], example["name1"], + storage=os.path.splitext(example["pipeline_file"])[0]) + pipe2 = pipeline_compare.i_create_a_local_pipeline_from_models_list( + self, example["model2"], example["name2"], + storage=os.path.splitext(example["pipeline_file"])[0]) + pipeline_compare.i_create_composed_pipeline(self, [pipe1, pipe2], + example["name"]) + pipeline_compare.the_pipeline_transformed_data_is( + self, example["input_data"], example["output_data"]) + + def test_scenario3(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input for linear regression: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a linear regression + And I wait until the linear regression is ready + less than secs + And I create a local pipeline for the linear regression named "" + When I create a prediction for "" + Then the prediction for "" is "" + And the prediction in the transformed data for "" is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "name"] + examples = [ + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "1910-05-08T19:10:23.106", "cat-0":"cat2"}', + '000002', -0.01284, "pipeline1"], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "1920-06-30T20:21:20.320", "cat-0":"cat1"}', + '000002', -0.09459, "pipeline2"], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "1932-01-30T19:24:11.440", "cat-0":"cat2"}', + '000002', -0.02259, "pipeline3"], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "1950-11-06T05:34:05.252", "cat-0":"cat1"}', + '000002', -0.06754, "pipeline4"], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "2001-01-05T23:04:04.693", "cat-0":"cat2"}', + '000002', 0.05204, "pipeline5"], + ['data/dates2.csv', '20', '20', '25', + '{"time-1": "2011-04-01T00:16:45.747", "cat-0":"cat2"}', + '000002', 0.05878, "pipeline6"]] + show_doc(self.test_scenario3) + + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"], shared=example["data"]) + linear_create.i_create_a_linear_regression( + self, shared=example["data"]) + linear_create.the_linear_regression_is_finished_in_less_than( + self, example["model_wait"], shared=example["data"]) + pipeline_compare.i_create_a_local_pipeline_from_models_list( + self, [world.linear_regression["resource"]], example["name"]) + prediction_create.i_create_a_linear_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"]) + pipeline_compare.the_pipeline_result_key_is( + self, example["input_data"], "prediction", + example["prediction"]) + + def test_scenario4(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw date input for deepnet: + Given I create a data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a deepnet + And I wait until the deepnet is ready less than secs + And I create a local pipeline for the deepnet named "" + When I create a prediction for "" + Then the prediction for "" is "" + And the prediction in the transformed data for "" is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "prediction", "name"] + examples = [ + ['data/dates2.csv', '20', '45', '160', + '{"time-1": "1910-05-08T19:10:23.106", "cat-0":"cat2"}', + '000002', -0.4264, "pipeline1"], + ['data/dates2.csv', '20', '45', '160', + '{"time-1": "2011-04-01T00:16:45.747", "cat-0":"cat2"}', + '000002', 0.11985, "pipeline2"], + ['data/dates2.csv', '20', '45', '160', + '{"time-1": "1969-W29-1T17:36:39Z", "cat-0":"cat1"}', + '000002', -0.08211, "pipeline3"], + ['data/dates2.csv', '20', '45', '160', + '{"time-1": "1920-06-45T20:21:20.320", "cat-0":"cat1"}', + '000002', -0.08211, "pipeline4"], + ['data/dates2.csv', '20', '45', '160', + '{"time-1": "2001-01-05T23:04:04.693", "cat-0":"cat2"}', + '000002', 0.00388, "pipeline5"], + ['data/dates2.csv', '20', '45', '160', + '{"time-1": "1950-11-06T05:34:05.602", "cat-0":"cat1"}', + '000002', -0.04976, "pipeline6"], + ['data/dates2.csv', '20', '45', '160', + '{"time-1": "1932-01-30T19:24:11.440", "cat-0":"cat2"}', + '000002', -0.36264, "pipeline7"], + ['data/dates2.csv', '20', '45', '160', + '{"time-1": "Mon Jul 14 17:36 +0000 1969", "cat-0":"cat1"}', + '000002', -0.08211, "pipeline8"]] + show_doc(self.test_scenario4) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file( + self, example["data"], shared=example["data"]) + source_create.the_source_is_finished( + self, example["source_wait"], shared=example["data"]) + dataset_create.i_create_a_dataset(self, shared=example["data"]) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + deepnet_shared = "%s_no_sug" % example["data"] + model_create.i_create_a_no_suggest_deepnet( + self, shared=deepnet_shared) + model_create.the_deepnet_is_finished_in_less_than( + self, example["model_wait"], shared=deepnet_shared) + pipeline_compare.i_create_a_local_pipeline_from_models_list( + self, [world.deepnet["resource"]], example["name"]) + prediction_create.i_create_a_deepnet_prediction( + self, example["input_data"]) + prediction_create.the_prediction_is( + self, example["objective_id"], example["prediction"], + precision=4) + pipeline_compare.the_pipeline_result_key_is( + self, example["input_data"], "prediction", + example["prediction"], precision=4) + + def test_scenario5(self): + """ + Scenario: Successfully comparing remote and local predictions + with raw input for deepnets with images: + Given I create an annotated images data source uploading a "" file + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create a deepnet with parms + And I wait until the deepnet is ready + less than secs + And I create a local pipeline for the deepnet named "" + When I create a prediction for "" + Then the prediction for "" is "" + When I create a prediction for "" + Then the prediction for "" is "" + And the prediction in the transformed data for "" is "" + And the probability in the transformed data for "" is "" + """ + headers = ["data", "source_wait", "dataset_wait", "model_wait", + "input_data", "objective_id", "model_conf", "image_fields", + "name"] + examples = [ + ['data/images/metadata.json', '500', '500', '600', + '{"image_id": "data/fruits1e.jpg", "label":"f1"}', + '100003', {"objective_field": "100003", + "number_of_hidden_layers": 1, + "suggest_structure": False, + "missing_numerics": True, + "max_training_time": 100, + "hidden_layers": [{ + "activation_function": "tanh", + "number_of_nodes": 10}]}, + ['image_id'], "pipeline1"]] + show_doc(self.test_scenario5) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_create_annotated_source( + self, + example["data"], + args={"image_analysis": {"enabled": False, + "extracted_features": []}}) + source_create.the_source_is_finished( + self, example["source_wait"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + model_create.i_create_a_deepnet_with_objective_and_params( + self, example["objective_id"], + json.dumps(example["model_conf"])) + model_create.the_deepnet_is_finished_in_less_than( + self, example["model_wait"]) + pipeline_compare.i_create_a_local_pipeline_from_models_list( + self, [world.deepnet["resource"]], example["name"]) + prediction_create.i_create_a_deepnet_prediction( + self, example["input_data"], example["image_fields"]) + prediction = world.prediction["output"] + probability = world.prediction["probability"] + pipeline_compare.i_create_a_local_pipeline_from_models_list( + self, [world.deepnet["resource"]], example["name"]) + pipeline_compare.the_pipeline_result_key_is( + self, example["input_data"], "prediction", prediction, + precision=4) + pipeline_compare.the_pipeline_result_key_is( + self, example["input_data"], "probability", probability, + precision=2) + + def test_scenario6(self): + """ + Scenario: Successfully comparing remote and local anomaly scores + with raw input for dataset with images: + Given I create an annotated images data source uploading a "" file and + + And I wait until the source is ready less than secs + And I create a dataset + And I wait until the dataset is ready less than secs + And I create an anomaly detector + And I wait until the anomaly is ready + less than secs + And I create a local pipeline for the anomaly detector named "" + When I create an anomaly score for "" + Then the anomaly score is "" + And the anomaly score in the transformed data for "" is "" + """ + headers = ["data", "extracted_features", "source_wait", "dataset_wait", + "anomaly_wait", "input_data", "score", "name"] + examples = [ + ['data/images/fruits_hist.zip', + ["dimensions", "average_pixels", "level_histogram", + "histogram_of_gradients", ["wavelet_subbands", 5], + ["pretrained_cnn", "mobilenetv2"]], + '500', '500', '600', + '{"image_id": "data/fruits1e.jpg"}', 0.39902, "pipeline1"]] + show_doc(self.test_scenario6) + for example in examples: + example = dict(zip(headers, example)) + show_method(self, self.bigml["method"], example) + source_create.i_upload_a_file_with_args( + self, + example["data"], + args=json.dumps({"image_analysis": { + "enabled": True, + "extracted_features": example["extracted_features"]}})) + source_create.the_source_is_finished( + self, example["source_wait"]) + dataset_create.i_create_a_dataset(self) + dataset_create.the_dataset_is_finished_in_less_than( + self, example["dataset_wait"]) + anomaly_create.i_create_an_anomaly(self) + anomaly_create.the_anomaly_is_finished_in_less_than( + self, example["anomaly_wait"]) + pipeline_compare.i_create_a_local_pipeline_from_models_list( + self, [world.anomaly["resource"]], example["name"]) + prediction_create.i_create_an_anomaly_score( + self, example["input_data"]) + score = world.anomaly_score["score"] + prediction_create.the_anomaly_score_is( + self, world.anomaly_score["score"]) + pipeline_compare.the_pipeline_result_key_is( + self, example["input_data"], "score", score) diff --git a/bigml/tests/test_99_cleaning.py b/bigml/tests/test_99_cleaning.py new file mode 100644 index 00000000..1f80e98e --- /dev/null +++ b/bigml/tests/test_99_cleaning.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +#pylint: disable=locally-disabled,line-too-long,attribute-defined-outside-init +#pylint: disable=locally-disabled,unused-import,no-self-use +# +# Copyright 2018-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Creating external connectors + +""" + +from .world import world, teardown_fn, setup_module, ok_ + + +class TestCleaningProject: + """Artifact to clean all the created resources after each test execution""" + + def setup_method(self): + """ + Debug information + """ + print("\nFinal cleaning\n") + + def test_final(self): + """Final empty test """ + ok_(True) + + def teardown_method(self): + """ + Debug information + """ + teardown_fn(force=True) + print("\nEnd of tests.") diff --git a/bigml/tests/world.py b/bigml/tests/world.py new file mode 100644 index 00000000..f3c86ba2 --- /dev/null +++ b/bigml/tests/world.py @@ -0,0 +1,387 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2015-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +""" Mimic World lettuce object + +""" +import os +import sys +import re +import shutil +import time +import datetime +import pprint +import json +import math +import pytest + +from bigml.api import BigML +from bigml.api import HTTP_NO_CONTENT, HTTP_NOT_FOUND +from bigml.constants import IRREGULAR_PLURALS, RENAMED_RESOURCES, \ + TINY_RESOURCE, ALL_FIELDS +from bigml.api_handlers.externalconnectorhandler import get_env_connection_info +from bigml.util import get_exponential_wait + + +MAX_RETRIES = 10 +RESOURCE_TYPES = [ + 'cluster', + 'fusion', + 'optiml', + 'composite', + 'source', + 'dataset', + 'prediction', + 'evaluation', + 'ensemble', + 'batchprediction', + 'centroid', + 'batchcentroid', + 'anomaly', + 'anomalyscore', + 'batchanomalyscore', + 'project', + 'sample', + 'correlation', + 'statisticaltest', + 'logisticregression', + 'model', + 'deepnet', + 'association', + 'associationset', + 'configuration', + 'topicmodel', + 'topicdistribution', + 'timeseries', + 'forecast', + 'pca', + 'projection', + 'batchprojection', + 'linearregression', + 'script', + 'execution', + 'library', + 'externalconnector' +] + +irregular_plurals = {} +irregular_plurals.update(IRREGULAR_PLURALS) +irregular_plurals.update({"timeseries": "time_series_set"}) + + +def plural(resource_type): + """Creates the plural form of a resource type + + """ + return irregular_plurals.get(resource_type, "%ss" % resource_type) + + +def show_doc(self, examples=None): + """ Shows the name and documentation of the method passed as argument + + """ + print("%s:\n%s" % (self.__name__, self.__doc__)) + if examples: + print(" |%s" % \ + "\n |".join(["|".join([str(item) + for item in example]) for + example in examples])) + +def show_method(self, method, example): + """Prints the test class and method of the current test""" + class_name = re.sub(".*'(.*)'.*", "\\1", str(self.__class__)) + print("\nTesting %s %s with:\n" % (class_name, method), example) + + +def float_round(value, precision=5): + """Rounding if float""" + if isinstance(value, float): + return round(value, precision) + return value + + +def flatten_shared(): + """Returns the list of IDs stored in the world.shared structure """ + ids_list = [] + for _, value in world.shared.items(): + for _, resource in value.items(): + ids_list.append(resource["resource"]) + return ids_list + + +def sort_dict(item): + """ + Sort nested dict + """ + if isinstance(item, list): + return [sort_dict(v) for v in item] + if not isinstance(item, dict): + return item + return {k: sort_dict(v) for k, v in sorted(item.items())} + + +def eq_(*args, msg=None, precision=None): + """Wrapper to assert. If precision is set, previous rounding""" + new_args = list(args) + if isinstance(args[0], dict): + assert isinstance(args[1], dict) + for index, arg in enumerate(new_args): + new_args[index] = list(dict(sorted(arg.items())).values()) + if precision is not None: + if isinstance(new_args[0], list): + if msg is None: + msg = "Comparing: %s" % new_args + assert all(len(new_args[0]) == len(b) for b in new_args[1:]), msg + pairs = zip(new_args[0], new_args[1]) + if msg is None: + msg = "Comparing: %s" % new_args + assert all(float_round(a, precision) == float_round(b, precision) + for a, b in pairs), msg + else: + for index, arg in enumerate(new_args): + new_args[index] = float_round(arg, precision) + if msg is None: + msg = "Comparing: %s" % new_args + assert all(new_args[0] == b for b in new_args[1:]), msg + else: + if isinstance(new_args[0], (dict, list)): + for index, arg in enumerate(new_args): + new_args[index] = sort_dict(new_args[index]) + if msg is None: + msg = "expected: %s, got: %s" % (new_args[0], new_args[1]) + assert new_args[0] == new_args[1], msg + + +def ok_(value, msg=None): + """Wrapper to assert.""" + if msg is None: + assert value + else: + assert value, msg + + +def approx_(number_a, number_b, msg=None, precision=5): + """Wrapper for pytest approx function""" + epsilon = math.pow(0.1, precision) + if msg is None: + msg = "%s != %s" % (repr(number_a), repr(number_b)) + assert number_a == pytest.approx(number_b, abs=epsilon), msg + + +class World: + """Object to store common test resources""" + + def __init__(self): + self.username = None + self.api_key = None + self.api = None + self.debug = False + try: + self.debug = bool(os.environ.get('BIGML_DEBUG', 0)) + except ValueError: + pass + self.short_debug = False + try: + self.short_debug = bool(os.environ.get('BIGML_SHORT_DEBUG', 0)) + except ValueError: + pass + self.clear() + self.dataset_ids = [] + self.fields_properties_dict = {} + self.counters = {} + self.test_project_name = "Test: python bindings %s" % \ + datetime.datetime.now() + self.project_id = None + self.print_connection_info() + self.delta = int(os.environ.get('BIGML_DELTA', '1')) + self.errors = [] + self.shared = {} + + def print_connection_info(self): + """Prints the variables used for the connection authentication""" + self.username = os.environ.get('BIGML_USERNAME') + self.api_key = os.environ.get('BIGML_API_KEY') + self.external_conn = get_env_connection_info() + + if self.username is None or self.api_key is None: + assert False, ("Tests use the BIGML_USERNAME and BIGML_API_KEY" + " environment variables to authenticate the" + " connection, but they seem to be unset. Please," + "set them before testing.") + self.api = BigML(self.username, self.api_key, debug=self.debug, + short_debug=self.short_debug, + storage=(None if not (self.debug or self.short_debug) + else "./debug_storage")) + print("----------------------------------------------------------") + print(self.api.connection_info()) + print(self.external_connection_info()) + print("----------------------------------------------------------") + + def external_connection_info(self): + """Printable string: The information used to connect to a external + data source + + """ + info = "External data connection config:\n%s" % \ + pprint.pformat(self.external_conn, indent=4) + return info + + def clear(self): + """Clears the stored resources' ids + + """ + for resource_type in RESOURCE_TYPES: + setattr(self, plural(resource_type), []) + setattr(self, RENAMED_RESOURCES.get(resource_type, + resource_type), None) + + def _delete_resources(self, object_list, resource_type): + """Deletes resources grouped by type""" + if object_list: + print("Deleting %s %s" % (len(object_list), + plural(resource_type))) + kwargs = {} + if resource_type == "composite": + resource_type = "source" + kwargs = {"query_string": "delete_all=true"} + delete_method = self.api.deleters[resource_type] + for obj_id in object_list: + counter = 0 + print("Deleting %s" % obj_id) + result = delete_method(obj_id, **kwargs) + while (result['code'] not in [HTTP_NO_CONTENT, + HTTP_NOT_FOUND] and + counter < MAX_RETRIES): + print("Delete failed for %s. Retrying" % obj_id) + time.sleep(3 * self.delta) + counter += 1 + result = delete_method(obj_id, **kwargs) + if counter == MAX_RETRIES: + print ("Retries to delete the created resources are" + " exhausted. Failed to delete.") + + def delete_resources(self): + """Deletes the created objects""" + keepers = flatten_shared() + for resource_type in RESOURCE_TYPES: + object_list = getattr(self, plural(resource_type)) + object_list.reverse() + object_list = [obj for obj in object_list if obj not in keepers] + self._delete_resources(object_list, resource_type) + if world.errors: + print("Failed resources: \n\n") + for resource in world.errors: + print(json.dumps(resource["status"], indent=4)) + + def store_resources(self): + """Stores the created objects """ + + for resource_type in RESOURCE_TYPES: + object_list = set(getattr(self, plural(resource_type))) + if object_list: + print("Storing %s %s" % (len(object_list), + plural(resource_type))) + if resource_type == "composite": + resource_type = "source" + store_method = self.api.getters[resource_type] + for obj_id in object_list: + result = store_method(obj_id) + self.api.ok(result) + + def get_minimal_resource(self, resource_id): + """Retrieving resource info in a minimal way to get status info""" + return self.api.get_resource( + resource_id, query_string=TINY_RESOURCE) + + def get_maximal_resource(self, resource_id): + """Retrieving all resource info for local handling""" + return self.api.get_resource( + resource_id, query_string=ALL_FIELDS) + + +world = World() + +def res_filename(filename): + """Returns path to a data filename""" + directory = os.path.dirname(sys.modules['bigml'].__file__) + return os.path.join(os.path.dirname(directory), filename) + + +def setup_module(): + """Operations to be performed before each module + + """ + if world.project_id is None: + if "project" not in world.shared: + world.shared["project"] = {} + world.shared["project"]["common"] = world.api.create_project( \ + {"name": world.test_project_name}) + world.project_id = world.shared["project"]["common"]['resource'] + print("Creating common project: ", world.project_id) + world.clear() + + +def teardown_module(): + """Operations to be performed after each module + + """ + print("Teardown module ---------------------------") + teardown_fn(force=False) + + +def teardown_fn(force=False): + """Operations to be performed after a certain point """ + if not world.debug and not world.short_debug: + if os.path.exists('./tmp'): + shutil.rmtree('./tmp') + + world.delete_resources() + if force: + world.api.delete_project(world.project_id) + del world.shared["project"] + world.project_id = None + else: + world.store_resources() + + +def logged_wait(start, delta, count, res_description, progress=0, status=None): + """Comparing the elapsed time to the expected delta and waiting for + the next sleep period. + + """ + if status is not None: + progress = status.get("progress", 0) + status_code = status.get("code") + progress_dumping = progress if progress > 0.8 \ + else 0 # dumping when almost finished + wait_time = min(get_exponential_wait( + ((1.0 - progress_dumping) * delta / 100.0) + 0.5, count), delta) + message = "" + if status is not None: + message =" (status: %s, progress: %s)" % ( + status_code, + progress) + print("Waiting for %s%s %s secs." % ( + res_description, + message, + wait_time)) + time.sleep(wait_time) + elapsed = (datetime.datetime.utcnow() - start).seconds + if elapsed > delta / 2.0: + print("%s seconds waiting for %s" % \ + (elapsed, res_description)) + ok_(elapsed < delta) diff --git a/bigml/timeseries.py b/bigml/timeseries.py new file mode 100644 index 00000000..62c6b2f5 --- /dev/null +++ b/bigml/timeseries.py @@ -0,0 +1,367 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A local Predictive Time Series model object + +This module defines a Time Series model to make predictions locally or +embedded into your application without needing to send requests to +BigML.io. + +This module can help you enormously to +reduce the latency for each prediction and let you use your logistic +regressions offline. + +Example usage (assuming that you have previously set up the BIGML_USERNAME +and BIGML_API_KEY environment variables and that you own the +logisticregression/id below): + +from bigml.api import BigML +from bigml.timeseries import TimeSeries + +api = BigML() + +time_series = TimeSeries( + 'timeseries/5026965515526876630001b2') +time_series.forecast({"price": {"horizon": 10}}) + +""" +import logging +import re +import sys +import io +import pprint + +from bigml.api import FINISHED +from bigml.api import get_status, get_api_connection, get_time_series_id +from bigml.util import utf8, use_cache, load +from bigml.basemodel import get_resource_dict, extract_objective +from bigml.modelfields import ModelFields +from bigml.constants import DECIMALS +from bigml.tssubmodels import SUBMODELS +from bigml.tsoutconstants import SUBMODELS_CODE, TRIVIAL_MODEL, \ + SEASONAL_CODE, FORECAST_FUNCTION, USAGE_DOC + +LOGGER = logging.getLogger('BigML') + +REQUIRED_INPUT = "horizon" +SUBMODEL_KEYS = ["indices", "names", "criterion", "limit"] +DEFAULT_SUBMODEL = {"criterion": "aic", "limit": 1} +INDENT = " " * 4 + + +def compute_forecasts(submodels, horizon): + """Computes the forecasts for each of the models in the submodels + array. The number of forecasts is set by horizon. + """ + forecasts = [] + for submodel in submodels: + name = submodel["name"] + trend = name + seasonality = None + if "," in name: + _, trend, seasonality = name.split(",") + args = [submodel, horizon, seasonality] + else: + args = [submodel, horizon] + + forecasts.append( \ + {"model": name, + "point_forecast": [round(value, DECIMALS) for value in + SUBMODELS[trend](*args)]}) + return forecasts + + +def filter_submodels(submodels, filter_info): + """Filters the submodels available for the field in the time-series + model according to the criteria provided in the prediction input data + for the field. + + """ + field_submodels = [] + submodel_names = [] + # filtering by indices and/or names + indices = filter_info.get(SUBMODEL_KEYS[0], []) + names = filter_info.get(SUBMODEL_KEYS[1], []) + + if indices: + # adding all submodels by index if they are not also in the names + # list + field_submodels = [submodel for index, submodel in \ + enumerate(submodels) if index in indices] + + # union with filtered by names + if names: + pattern = r'|'.join(names) + # only adding the submodels if they have not been included by using + # indices + submodel_names = [submodel["name"] for submodel in field_submodels] + named_submodels = [submodel for submodel in submodels \ + if re.search(pattern, submodel["name"]) is not None and \ + submodel["name"] not in submodel_names] + field_submodels.extend(named_submodels) + + if not indices and not names: + field_submodels.extend(submodels) + + # filtering the resulting set by criterion and limit + criterion = filter_info.get(SUBMODEL_KEYS[2]) + if criterion is not None: + field_submodels = sorted(field_submodels, + key=lambda x: x.get(criterion, float('inf'))) + limit = filter_info.get(SUBMODEL_KEYS[3]) + if limit is not None: + field_submodels = field_submodels[0: limit] + return field_submodels + + +class TimeSeries(ModelFields): + """ A lightweight wrapper around a time series model. + + Uses a BigML remote time series model to build a local version + that can be used to generate predictions locally. + + """ + + def __init__(self, time_series, api=None, cache_get=None): + + if use_cache(cache_get): + # using a cache to store the model attributes + self.__dict__ = load(get_time_series_id(time_series), cache_get) + return + + self.resource_id = None + self.name = None + self.description = None + self.dataset_id = None + self.input_fields = [] + self.default_numeric_value = None + self.objective_fields = [] + self.all_numeric_objectives = False + self.period = 1 + self.ets_models = {} + self.error = None + self.damped_trend = None + self.seasonality = None + self.trend = None + self.time_range = {} + self.field_parameters = {} + self._forecast = {} + api = get_api_connection(api) + + self.resource_id, time_series = get_resource_dict( \ + time_series, "timeseries", api=api) + + if 'object' in time_series and \ + isinstance(time_series['object'], dict): + time_series = time_series['object'] + try: + self.dataset_id = time_series.get('dataset') + self.name = time_series.get("name") + self.description = time_series.get("description") + self.input_fields = time_series.get("input_fields", []) + self.default_numeric_value = time_series.get( \ + "default_numeric_value") + self._forecast = time_series.get("forecast") + self.objective_fields = time_series.get( + "objective_fields", []) + objective_field = time_series['objective_field'] if \ + time_series.get('objective_field') else \ + time_series['objective_fields'] + except (AttributeError, KeyError): + raise ValueError("Failed to find the time series expected " + "JSON structure. Check your arguments.") + if 'time_series' in time_series and \ + isinstance(time_series['time_series'], dict): + status = get_status(time_series) + if 'code' in status and status['code'] == FINISHED: + time_series_info = time_series['time_series'] + fields = time_series_info.get('fields', {}) + self.fields = fields + if not self.input_fields: + self.input_fields = [ \ + field_id for field_id, _ in + sorted(list(self.fields.items()), + key=lambda x: x[1].get("column_number"))] + self.all_numeric_objectives = time_series_info.get( \ + 'all_numeric_objectives') + self.period = time_series_info.get('period', 1) + self.ets_models = time_series_info.get('ets_models', {}) + self.error = time_series_info.get('error') + self.damped_trend = time_series_info.get('damped_trend') + self.seasonality = time_series_info.get('seasonality') + self.trend = time_series_info.get('trend') + self.time_range = time_series_info.get('time_range') + self.field_parameters = time_series_info.get( \ + 'field_parameters', {}) + + objective_id = extract_objective(objective_field) + ModelFields.__init__( + self, fields, + objective_id=objective_id) + else: + raise Exception("The time series isn't finished yet") + else: + raise Exception("Cannot create the TimeSeries instance." + " Could not find the 'time_series' key" + " in the resource:\n\n%s" % + time_series) + + def forecast(self, input_data=None): + """Returns the class prediction and the confidence + + input_data: Input data to be predicted + + """ + if not input_data: + forecasts = {} + for field_id, value in list(self._forecast.items()): + forecasts[field_id] = [] + for forecast in value: + local_forecast = {} + local_forecast.update( \ + {"point_forecast": forecast["point_forecast"]}) + local_forecast.update( \ + {"model": forecast["model"]}) + forecasts[field_id].append(local_forecast) + return forecasts + + # Checks and cleans input_data leaving only the fields used as + # objective fields in the model + norm_input_data = self.filter_objectives( \ + input_data) + + # filter submodels: filtering the submodels in the time-series + # model to be used in the prediction + filtered_submodels = {} + for field_id, field_input in list(norm_input_data.items()): + filter_info = field_input.get("ets_models", {}) + if not filter_info: + filter_info = DEFAULT_SUBMODEL + filtered_submodels[field_id] = filter_submodels( \ + self.ets_models[field_id], filter_info) + + forecasts = {} + for field_id, submodels in list(filtered_submodels.items()): + forecasts[field_id] = compute_forecasts(submodels, \ + norm_input_data[field_id]["horizon"]) + + return forecasts + + def predict(self, input_data, full=False): + """Method to homogeneize the local models interface for all BigML + models. It returns the forecast method result. + """ + forecast = self.forecast(input_data) + if full: + return {"forecast": forecast} + return forecast + + def filter_objectives(self, input_data, + full=False): + """Filters the keys given in input_data checking against the + objective fields in the time-series model fields. + If `full` is set to True, it also + provides information about the fields that are not used. + + """ + + unused_fields = [] + new_input = {} + if isinstance(input_data, dict): + for key, value in list(input_data.items()): + if key not in self.fields: + key = self.inverted_fields.get(key, key) + if key in self.input_fields: + new_input[key] = value + else: + unused_fields.append(key) + + # raise error if no horizon is provided + for key, value in list(input_data.items()): + value = self.normalize(value) + if not isinstance(value, dict): + raise ValueError( \ + "Each field input data needs to be specified " + "as a dictionary. Found %s for field %s." % ( \ + type(value).name, key)) + if REQUIRED_INPUT not in value: + raise ValueError( \ + "Each field in input data must contain at" + "least a \"horizon\" attribute.") + if any(key not in SUBMODEL_KEYS for key in \ + list(value.get("ets_models", {}).keys())): + raise ValueError( \ + "Only %s allowed as keys in each fields submodel" + " filter." % ", ".join(SUBMODEL_KEYS)) + + result = (new_input, unused_fields) if full else \ + new_input + return result + LOGGER.error("Failed to read input data in the expected" + " {field:value} format.") + return ({}, []) if full else {} + + def python(self, out=sys.stdout): + """Generates the code in python that creates the forecasts + + """ + attributes = ["l", "b", "s", "phi", "value", "slope"] + components = {} + model_components = {} + model_names = [] + out.write(utf8(USAGE_DOC % (self.resource_id, + self.fields[self.objective_id]["name"]))) + output = ["COMPONENTS = \\"] + for field_id, models in list(self.ets_models.items()): + for model in models: + final_state = model.get("final_state", {}) + attrs = {} + for attribute in attributes: + if attribute in model: + attrs.update({attribute: model[attribute]}) + elif attribute in final_state: + attrs.update( \ + {attribute: final_state[attribute]}) + model_names.append(model["name"]) + model_components[model["name"]] = attrs + field_name = self.fields[field_id]["name"] + if field_name not in components: + components[field_name] = model_components + partial_output = io.StringIO() + pprint.pprint(components, stream=partial_output) + for line in partial_output.getvalue().split("\n"): + output.append("%s%s" % (INDENT, line)) + + out.write(utf8("\n".join(output))) + + model_names = list(set(model_names)) + if any(name in model_names for name in ["naive", "mean"]): + out.write(utf8(TRIVIAL_MODEL)) + if any("," in name and name.split(",")[2] in ["A", "M"] for \ + name in model_names): + out.write(utf8(SEASONAL_CODE)) + trends = [name.split(",")[1] for name in model_names if "," in name] + trends.extend([name for name in model_names if "," not in name]) + trends = set(trends) + models_function = [] + for trend in trends: + models_function.append("\"%s\": _%s_forecast" % (trend, trend)) + out.write(utf8(SUBMODELS_CODE[trend])) + out.write(utf8("\n\nMODELS = \\\n")) + out.write(utf8("%s%s%s" % \ + (" {", ",\n ".join(models_function), "}"))) + + out.write(utf8(FORECAST_FUNCTION)) diff --git a/bigml/topicmodel.py b/bigml/topicmodel.py new file mode 100644 index 00000000..abc87b5f --- /dev/null +++ b/bigml/topicmodel.py @@ -0,0 +1,475 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2016-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""A local Predictive Topic Model. + +This module allows you to download and use Topic models for local +predicitons. Specifically, the function topic_model.distribution allows you +to pass in input text and infers a generative distribution over the +topics in the learned topic model. + +Example usage (assuming that you have previously set up the BIGML_USERNAME +and BIGML_API_KEY environment variables and that you own the topicmodel/id +below): + +from bigml.api import BigML +from bigml.topicmodel import TopicModel + +api = BigML() + +topic_model = TopicModel('topicmodel/5026965515526876630001b2') +topic_distribution = topic_model.distribution({"text": "A sample string"})) + +""" + +import random +import logging +import array +try: + import Stemmer +except ImportError: + raise ImportError("Failed to import the Stemmer module. You need to" + " install pystemmer to use the Topic Model class.") + + +from bigml.api import FINISHED +from bigml.api import get_status, get_api_connection, get_topic_model_id +from bigml.basemodel import get_resource_dict +from bigml.modelfields import ModelFields +from bigml.util import use_cache, load, dump, dumps, get_data_format, \ + get_formatted_data, format_data, get_data_transformations +from bigml.constants import OUT_NEW_FIELDS, OUT_NEW_HEADERS, INTERNAL + + +LOGGER = logging.getLogger('BigML') + +MAXIMUM_TERM_LENGTH = 30 +MIN_UPDATES = 16 +MAX_UPDATES = 512 +SAMPLES_PER_TOPIC = 128 + +CODE_TO_NAME = { + "da": 'danish', + "nl": 'dutch', + "en": 'english', + "fi": 'finnish', + "fr": 'french', + "de": 'german', + "hu": 'hungarian', + "it": 'italian', + "nn": 'norwegian', + "pt": 'portuguese', + "ro": 'romanian', + "ru": 'russian', + "es": 'spanish', + "sv": 'swedish', + "tr": 'turkish' +} + + +def distribution_to_dict(distribution): + """Returns a dictionary as topic_name: probability for the + topic distribution. + """ + prediction_dict = {} + for topic_info in distribution: + prediction_dict.update({topic_info["name"]: + topic_info["probability"]}) + return prediction_dict + + +class TopicModel(ModelFields): + """ A lightweight wrapper around a Topic Model. + + Uses a BigML remote Topic Model to build a local version that can be used + to generate topic distributions for input documents locally. + + """ + #pylint: disable=locally-disabled,c-extension-no-member,invalid-name + def __init__(self, topic_model, api=None, cache_get=None): + + self.lang = None + self.stemmer = None + if use_cache(cache_get): + # using a cache to store the model attributes + self.__dict__ = load(get_topic_model_id(topic_model), cache_get) + if self.lang in CODE_TO_NAME: + self.stemmer = Stemmer.Stemmer(CODE_TO_NAME[self.lang]) + return + + self.resource_id = None + self.name = None + self.description = None + self.parent_id = None + self.seed = None + self.case_sensitive = False + self.bigrams = False + self.ntopics = None + self.temp = None + self.phi = None + self.term_to_index = None + self.topics = [] + api = get_api_connection(api) + + self.resource_id, topic_model = get_resource_dict( \ + topic_model, "topicmodel", api=api) + + if 'object' in topic_model and isinstance(topic_model['object'], dict): + topic_model = topic_model['object'] + try: + self.parent_id = topic_model.get('dataset') + self.name = topic_model.get("name") + self.description = topic_model.get("description") + except AttributeError: + raise ValueError("Failed to find the expected " + "JSON structure. Check your arguments.") + + if 'topic_model' in topic_model \ + and isinstance(topic_model['topic_model'], dict): + status = get_status(topic_model) + if 'code' in status and status['code'] == FINISHED: + self.input_fields = topic_model['input_fields'] + model = topic_model['topic_model'] + self.topics = model['topics'] + + if 'language' in model and model['language'] is not None: + self.lang = model['language'] + if self.lang in CODE_TO_NAME: + self.stemmer = Stemmer.Stemmer(CODE_TO_NAME[self.lang]) + + self.term_to_index = {self.stem(term): index for index, term + in enumerate(model['termset'])} + + self.seed = abs(model['hashed_seed']) + self.case_sensitive = model['case_sensitive'] + self.bigrams = model['bigrams'] + + self.ntopics = len(model['term_topic_assignments'][0]) + + self.alpha = model['alpha'] + self.ktimesalpha = self.ntopics * self.alpha + + self.temp = [0] * self.ntopics + + assignments = model['term_topic_assignments'] + beta = model['beta'] + nterms = len(self.term_to_index) + + sums = [sum(n[index] for n in assignments) for index + in range(self.ntopics)] + + self.phi = [[0 for _ in range(nterms)] + for _ in range(self.ntopics)] + + for k in range(self.ntopics): + norm = sums[k] + nterms * beta + for w in range(nterms): + self.phi[k][w] = (assignments[w][k] + beta) / norm + + missing_tokens = model.get("missing_tokens") + ModelFields.__init__(self, model['fields'], + missing_tokens=missing_tokens) + else: + raise Exception("The topic model isn't finished yet") + else: + raise Exception("Cannot create the topic model instance. Could not" + " find the 'topic_model' key in the" + " resource:\n\n%s" % topic_model) + + def distribution(self, input_data): + """Returns the distribution of topics given the input text. + + """ + # Checks and cleans input_data leaving the fields used in the model + input_data = self.filter_input_data(input_data) + + return self.distribution_for_text("\n\n".join(list(input_data.values()))) + + def distribution_for_text(self, text): + """Returns the topic distribution of the given `text`, which can + either be a string or a list of strings + + """ + if isinstance(text, str): + astr = text + else: + # List of strings + astr = "\n\n".join(text) + + doc = self.tokenize(astr) + topics_probability = self.infer(doc) + return [{"name": self.topics[index]["name"], \ + "probability": probability} \ + for index, probability in enumerate(topics_probability)] + + def stem(self, term): + """Returns the stem of the given term, if the stemmer is defined + + """ + if not self.stemmer: + return term + return self.stemmer.stemWord(term) + + def append_bigram(self, out_terms, first, second): + """Takes two terms and appends the index of their concatenation to the + provided list of output terms + + """ + if self.bigrams and first is not None and second is not None: + bigram = self.stem(first + " " + second) + if bigram in self.term_to_index: + out_terms.append(self.term_to_index[bigram]) + + def tokenize(self, astr): + """Tokenizes the input string `astr` into a list of integers, one for + each term term present in the `self.term_to_index` + dictionary. Uses word stemming if applicable. + + """ + out_terms = [] + + last_term = None + term_before = None + + space_was_sep = False + saw_char = False + + text = str(astr) + index = 0 + length = len(text) + + def next_char(text, index): + """Auxiliary function to get next char and index with end check + + """ + index += 1 + if index < length: + char = text[index] + else: + char = '' + return char, index + + while index < length: + self.append_bigram(out_terms, term_before, last_term) + + char = text[index] + buf = array.array('u') + saw_char = False + + if not char.isalnum(): + saw_char = True + + while not char.isalnum() and index < length: + char, index = next_char(text, index) + + while (index < length and + (char.isalnum() or char == "'") and + len(buf) < MAXIMUM_TERM_LENGTH): + + buf.append(char) + char, index = next_char(text, index) + + if len(buf) > 0: + term_out = buf.tounicode() + + if not self.case_sensitive: + term_out = term_out.lower() + + if space_was_sep and not saw_char: + term_before = last_term + else: + term_before = None + + last_term = term_out + + if char in [" ", "\n"]: + space_was_sep = True + + tstem = self.stem(term_out) + if tstem in self.term_to_index: + out_terms.append(self.term_to_index[tstem]) + + index += 1 + + self.append_bigram(out_terms, term_before, last_term) + + return out_terms + + + def sample_topics(self, document, assignments, normalizer, updates, rng): + """Samples topics for the terms in the given `document` for `updates` + iterations, using the given set of topic `assigments` for + the current document and a `normalizer` term derived from + the dirichlet hyperparameters + + """ + counts = [0] * self.ntopics + + for _ in range(updates): + for term in document: + for k in range(self.ntopics): + topic_term = self.phi[k][term] + topic_document = (assignments[k] + self.alpha) / normalizer + self.temp[k] = topic_term * topic_document + + for k in range(1, self.ntopics): + self.temp[k] += self.temp[k - 1] + + random_value = rng.random() * self.temp[-1] + topic = 0 + + while self.temp[topic] < random_value and topic < self.ntopics: + topic += 1 + + counts[topic] += 1 + + return counts + + def sample_uniform(self, document, updates, rng): + """Samples topics for the terms in the given `document` assuming + uniform topic assignments for `updates` iterations. Used + to initialize the gibbs sampler. + + """ + counts = [0] * self.ntopics + + for _ in range(updates): + for term in document: + for k in range(self.ntopics): + self.temp[k] = self.phi[k][term] + + for k in range(1, self.ntopics): + self.temp[k] += self.temp[k - 1] + + random_value = rng.random() * self.temp[-1] + topic = 0 + + while self.temp[topic] < random_value and topic < self.ntopics: + topic += 1 + + counts[topic] += 1 + + return counts + + + def infer(self, list_of_indices): + """Infer a topic distribution for a document, presented as a list of + term indices. + + """ + + doc = sorted(list_of_indices) + updates = 0 + + if len(doc) > 0: + updates = SAMPLES_PER_TOPIC * self.ntopics / len(doc) + updates = int(min(MAX_UPDATES, max(MIN_UPDATES, updates))) + + rng = random.Random(self.seed) + normalizer = (len(doc) * updates) + self.ktimesalpha + + # Initialization + uniform_counts = self.sample_uniform(doc, updates, rng) + + # Burn-in + burn_counts = self.sample_topics(doc, + uniform_counts, + normalizer, + updates, + rng) + # Sampling + sample_counts = self.sample_topics(doc, + burn_counts, + normalizer, + updates, + rng) + + return [(sample_counts[k] + self.alpha) / normalizer + for k in range(self.ntopics)] + + def predict(self, input_data, full=False): + """Method to homogeneize the local models interface for all BigML + models. It returns the distribution method result. + """ + distribution = self.distribution(input_data) + if full: + return distribution_to_dict(distribution) + return distribution + + def batch_predict(self, input_data_list, outputs=None, **kwargs): + """Creates a batch prediction for a list of inputs using the local + supervised model. Allows to define some output settings to + decide the fields to be added to the input_data (prediction, + probability, etc.) and the name that we want to assign to these new + fields. The outputs argument accepts a dictionary with keys + "output_fields", to contain a list of the prediction properties to add + (["prediction", "probability"] by default) and "output_headers", to + contain a list of the headers to be used when adding them (identical + to "output_fields" list, by default). + + :param input_data_list: List of input data to be predicted + :type input_data_list: list or Panda's dataframe + :param dict outputs: properties that define the headers and fields to + be added to the input data + :return: the list of input data plus the predicted values + :rtype: list or Panda's dataframe depending on the input type in + input_data_list + + """ + if outputs is None: + outputs = {} + new_fields = outputs.get(OUT_NEW_FIELDS, [topic["name"] for topic + in self.topics]) + new_headers = outputs.get(OUT_NEW_HEADERS, new_fields) + if len(new_fields) > len(new_headers): + new_headers.expand(new_fields[len(new_headers):]) + else: + new_headers = new_headers[0: len(new_fields)] + data_format = get_data_format(input_data_list) + inner_data_list = get_formatted_data(input_data_list, INTERNAL) + for index, input_data in enumerate(inner_data_list): + prediction = self.distribution(input_data, **kwargs) + prediction_dict = distribution_to_dict(prediction) + for ikey, key in enumerate(new_fields): + inner_data_list[index][new_headers[ikey]] = prediction_dict[ + key] + if data_format != INTERNAL: + return format_data(inner_data_list, out_format=data_format) + return inner_data_list + + def data_transformations(self): + """Returns the pipeline transformations previous to the modeling + step as a pipeline, so that they can be used in local predictions. + Avoiding to set it in a Mixin to maintain the current dump function. + """ + return get_data_transformations(self.resource_id, self.parent_id) + + def dump(self, output=None, cache_set=None): + """Uses msgpack to serialize the resource object + If cache_set is filled with a cache set method, the method is called + + """ + self_vars = vars(self).copy() + del self_vars["stemmer"] + dump(self_vars, output=output, cache_set=cache_set) + + def dumps(self): + """Uses msgpack to serialize the resource object to a string + + """ + self_vars = vars(self).copy() + del self_vars["stemmer"] + dumps(self_vars) diff --git a/bigml/tree_utils.py b/bigml/tree_utils.py new file mode 100644 index 00000000..ed033dbf --- /dev/null +++ b/bigml/tree_utils.py @@ -0,0 +1,465 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Tree utilities + +This module stores auxiliar functions used in tree traversal and +code generators for the body of the local model plugins +""" + +import re +import locale +import sys + +from urllib.parse import urlparse +from functools import reduce +from unidecode import unidecode + + +from bigml.predicate_utils.utils import LT, LE, EQ, NE, GE, GT, IN +from bigml.util import asciify + +DEFAULT_LOCALE = 'en_US.UTF-8' +TM_TOKENS = 'tokens_only' +TM_FULL_TERM = 'full_terms_only' +TM_ALL = 'all' +TERM_OPTIONS = ["case_sensitive", "token_mode"] +ITEM_OPTIONS = ["separator", "separator_regexp"] +COMPOSED_FIELDS = ["text", "items"] +NUMERIC_VALUE_FIELDS = ["text", "items", "numeric"] + +MAX_ARGS_LENGTH = 10 + +INDENT = ' ' + +# Map operator str to its corresponding python operator +PYTHON_OPERATOR = { + LT: "<", + LE: "<=", + EQ: "==", + NE: "!=", + GE: ">=", + GT: ">", + IN: "in" +} + +# reserved keywords + +CS_KEYWORDS = [ + "abstract", "as", "base", "bool", "break", "byte", "case", + "catch", "char", "checked", "class", "const", "continue", "decimal", + "default", "delegate", "do", "double", "else", "enum", "event", "explicit", + "extern", "false", "finally", "fixed", "float", "for", "foreach", "goto", + "if", "implicit", "in", "int", "interface", "internal", "is", "lock", "long", + "namespace", "new", "null", "object", "operador", "out", "override", + "params", "private", "protected", "public", "readonly", "ref", "return", + "sbyte", "sealed", "short", "sizeof", "stackalloc", "static", "string", + "struct", "switch", "this", "throw", "true", "try", "typeof", "uint", "ulong", + "unchecked", "unsafe", "ushort", "using", "virtual", "void", "volatile", + "while", "group", "set", "value"] + +VB_KEYWORDS = [ + 'addhandler', 'addressof', 'alias', 'and', 'andalso', 'as', + 'boolean', 'byref', 'byte', 'byval', 'call', 'case', 'catch', 'cbool', + 'cbyte', 'cchar', 'cdate', 'cdec', 'cdbl', 'char', 'cint', 'class', 'clng', + 'cobj', 'const', 'continue', 'csbyte', 'cshort', 'csng', 'cstr', + 'ctype', 'cuint', 'culng', 'cushort', 'date', 'decimal', 'declare', + 'default', 'delegate', 'dim', 'directcast', 'do', 'double', 'each', + 'else', 'elseif', 'end', 'endif', 'enum', 'erase', 'error', 'event', + 'exit', 'false', 'finally', 'for', 'friend', 'function', 'get', + 'gettype', 'getxmlnamespace', 'global', 'gosub', 'goto', 'handles', + 'if', 'implements', 'imports ', 'in', 'inherits', 'integer', 'interface', + 'is', 'isnot', 'let', 'lib', 'like', 'long', 'loop', 'me', 'mod', 'module', + 'mustinherit', 'mustoverride', 'mybase', 'myclass', 'namespace', + 'narrowing', 'new', 'next', 'not', 'nothing', 'notinheritable', + 'notoverridable', 'object', 'of', 'on', 'operator', 'option', + 'optional', 'or', 'orelse', 'overloads', 'overridable', 'overrides', + 'paramarray', 'partial', 'private', 'property', 'protected', + 'public', 'raiseevent', 'readonly', 'redim', 'rem', 'removehandler', + 'resume', 'return', 'sbyte', 'select', 'set', 'shadows', 'shared', + 'short', 'single', 'static', 'step', 'stop', 'string', 'structure', + 'sub', 'synclock', 'then', 'throw', 'to', 'true', 'try', + 'trycast', 'typeof', 'variant', 'wend', 'uinteger', 'ulong', + 'ushort', 'using', 'when', 'while', 'widening', 'with', 'withevents', + 'writeonly', 'xor', '#const', '#else', '#elseif', '#end', '#if' +] + +JAVA_KEYWORDS = [ + "abstract", "continue", "for", "new", "switch", "assert", "default", + "goto", "package", "synchronized", "boolean", "do", "if", "private", + "this", "break", "double", "implements", "protected", "throw", + "byte", "else", "import", "public", "throws", "case", "enum", + "instanceof", "return", "transient", "catch", "extends", "int", + "short", "try", "char", "final", "interface", "static", "void", + "class", "finally", "long", "strictfp", "volatile", "const", + "float", "native", "super", "while" +] + +OBJC_KEYWORDS = [ + "auto", "BOOL", "break", "Class", "case", "bycopy", "char", "byref", + "const", "id", "continue", "IMP", "default", "in", "do", "inout", + "double", "nil", "else", "NO", "enum", "NULL", "extern", "oneway", + "float", "out", "for", "Protocol", "goto", "SEL", "if", "self", + "inline", "super", "int", "YES", "long", "@interface", "register", + "@end", "restrict", "@implementation", "return", "@protocol", + "short", "@class", "signed", "@public", "sizeof", "@protected", + "static", "@private", "struct", "@property", "switch", "@try", + "typedef", "@throw", "union", "@catch()", "unsigned", "@finally", + "void", "@synthesize", "volatile", "@dynamic", "while", "@selector", + "_Bool", "atomic", "_Complex", "nonatomic", "_Imaginery", "retain" +] + +JS_KEYWORDS = [ + "break", "case", "catch", "continue", "debugger", "default", "delete", + "do", "else", "finally", "for", "function", "if", "in", "instanceof", + "new", "return", "switch", "this", "throw", "try", "typeof", "var", + "void", "while", "with", "class", "enum", "export", "extends", + "import", "super", "implements", "interface", "let", "package", + "private", "protected", "public", "static", "yield", "null", + "true", "const", "false" +] + + +PYTHON_KEYWORDS = [ + "and", "assert", "break", "class", "continue", "def", "del", "elif", + "else", "except", "exec", "finally", "for", "from", "global", "if", + "import", "in", "is", "lambda", "not", "or", "pass", "print", "raise", + "return", "try", "while ", "Data", "Float", "Int", "Numeric", "Oxphys", + "array", "close", "float", "int", "input", "open", "range", "type", + "write", "zeros", "acos", "asin", "atan", "cos", "e", "exp", "fabs", + "floor", "log", "log10", "pi", "sin", "sqrt", "tan" +] + + +def add_distribution(model): + """Adding the distribution attribute + + """ + summary = model.fields[model.objective_id]['summary'] + if 'bins' in summary: + distribution = summary['bins'] + elif 'counts' in summary: + distribution = summary['counts'] + elif 'categories' in summary: + distribution = summary['categories'] + else: + distribution = [] + model.distribution = distribution + + +def split(children): + """Returns the field that is used by the node to make a decision. + + """ + field = {child.predicate.field for child in children} + + if len(field) == 1: + return field.pop() + return None + + +def java_string(text): + """Transforms string output for java, cs, and objective-c code + + """ + text = f"{text}" + return text.replace(""", "\"").replace("\"", "\\\"") + + +def python_string(text): + """Transforms string output for python code + + """ + return repr(text.replace("'", "\'")) + + +def ruby_string(text): + """Transforms string output for ruby code + + """ + out = python_string(text) + if isinstance(text, str): + return out[1:] + return out + + +def sort_fields(fields): + """Sort fields by column_number but put together parents and children. + + """ + fathers = [(key, val) for key, val in + sorted(list(fields.items()), + key=lambda k: k[1]['column_number']) + if not 'auto_generated' in val] + children = [(key, val) for key, val in + sorted(list(fields.items()), + key=lambda k: k[1]['column_number']) + if 'auto_generated' in val] + children.reverse() + fathers_keys = [father[0] for father in fathers] + for child in children: + try: + index = fathers_keys.index(child[1]['parent_ids'][0]) + except ValueError: + index = -1 + + if index >= 0: + fathers.insert(index + 1, child) + else: + fathers.append(child) + return fathers + + +def slugify(name, reserved_keywords=None, prefix=''): + """Translates a field name into a variable name. + + """ + name = asciify(name) + try: + if name[0].isdigit(): + name = "field_" + name + except IndexError: + name = "unnamed_field" + if reserved_keywords: + if name in reserved_keywords: + name = prefix + name + return name + + +def plural(text, num): + """Pluralizer: adds "s" at the end of a string if a given number is > 1 + """ + suffix = "s"[num == 1:] + return f"{text}{suffix}" + + +def prefix_as_comment(comment_prefix, text): + """Adds comment prefixes to new lines in comments + + """ + return text.replace('\n', '\n' + comment_prefix) + + +def to_camelcase(text, first_lower=True, + reserved_keywords=None, prefix='', suffix=''): + """Returns the text in camelCase or CamelCase format + + """ + if len(text) == 0: + # case of empty name? + return text + + text = re.sub(r'\W+', ' ', text) + if reserved_keywords: + if text.lower() in reserved_keywords: + text = prefix + text + suffix + if ' ' in text: + text = unidecode(text).lower() + text = re.sub(r'\w+', lambda m: m.group(0).capitalize(), text) + text = re.sub(r'\s+', '', text) + elif text == text.upper(): + # if the text is a single word in caps, we turn it all into lowers + text = text.lower() + if text[0].isdigit(): + text = "Field" + text + if first_lower: + return text[0].lower() + text[1:] + return text[0].upper() + text[1:] + + +def to_camel_cs(text, first_lower=True): + """Returns the text in camelCase or CamelCase format for C# + + """ + return to_camelcase(text, first_lower=first_lower, + reserved_keywords=CS_KEYWORDS, prefix="@") + + +def to_camel_vb(text, first_lower=True): + """Returns the text in camelCase or CamelCase format for Visual Basic + + """ + text = "v_" + text + return to_camelcase(text, first_lower=first_lower, + reserved_keywords=VB_KEYWORDS, prefix="v ") + + +def to_camel_java(text, first_lower=True): + """Returns the text in camelCase or CamelCase format for Java + + """ + return to_camelcase(text, first_lower=first_lower, + reserved_keywords=JAVA_KEYWORDS, suffix="_") + + +def to_camel_objc(text, first_lower=True): + """Returns the text in camelCase or CamelCase format for objective-c + + """ + keywords = [keyword.lower() for keyword in OBJC_KEYWORDS] + return to_camelcase(text, first_lower=first_lower, + reserved_keywords=keywords, suffix="_") + + +def to_camel_js(text, first_lower=True): + """Returns the text in camelCase or CamelCase format for node.js + + """ + return to_camelcase(text, first_lower=first_lower, + reserved_keywords=JS_KEYWORDS, suffix="_") + + +def docstring_comment(model): + """Returns the docstring describing the model. + + """ + name = model.fields[model.objective_id]['name'] + resource_id = model.resource_id + docstring = f"Predictor for {name} from {resource_id}" + model.description = (str( \ + model.description).strip() \ + or 'Predictive model by BigML - Machine Learning Made Easy') + return docstring + + +def java_class_definition(model): + """Implements java class definition and doc comments + + """ + docstring = model.java_comment() + field_obj = model.fields[model.objective_id] + if not 'CamelCase' in field_obj: + field_obj['CamelCase'] = to_camel_java(field_obj['name'], False) + description = model.description.replace('\n', '\n * ') + field_camelcase = field_obj['CamelCase'] + output = \ +f""" +/** +* {docstring} +* {description} +*/ +public class {field_camelcase} +""" + output += "{" + return output + + +# This method is reused in core/excel/producer.py class +def signature_name_vb(text, model): + """Returns the name of the function in Visual Basic for Applications + + """ + default_description = "Predictive model by BigML - Machine Learning Made Easy" + obj_field_for_name = to_camel_vb(text, False).replace("V_", "") + obj_field_for_name = obj_field_for_name.title() + header = "" + name = model.fields[model.objective_id]['name'] + resource_id = model.resource_id + description = model.description if model.description else \ + default_description + if model: + header = f""" +' +' Predictor for {name} from {resource_id} +' {description} +' +""" + return (f"Predict{obj_field_for_name}", header) + + +def localize(number): + """Localizes `number` to show commas appropriately. + + """ + return locale.format_string("%d", number, grouping=True) + + +def is_url(value): + """Returns True if value is a valid URL. + + """ + url = isinstance(value, str) and urlparse(value) + return url and url.scheme and url.netloc and url.path + + +def print_distribution(distribution, out=sys.stdout): + """ Prints distribution data + + """ + total = reduce(lambda x, y: x + y, + [group[1] for group in distribution]) + output = "" + for group in distribution: + substr1 = group[0] + substr2 = round(group[1] * 1.0 / total, 4) * 100 + substr3 = group[1] + substr4 = "" if group[1] == 1 else "s" + output += (f" {substr1}: {substr2:.2f}% ({substr3} " + f"instance{substr4})\n") + out.write(output) + out.flush() + + +def old_filter_nodes(nodes_list, ids=None, subtree=True): + """Filters the contents of a nodes_list. If any of the nodes is in the + ids list, the rest of nodes are removed. If none is in the ids list + we include or exclude the nodes depending on the subtree flag. + + """ + if not nodes_list: + return None + nodes = nodes_list[:] + if ids is not None: + for node in nodes: + if node.id in ids: + nodes = [node] + return nodes + if not subtree: + nodes = [] + return nodes + + +def missing_branch(children): + """Checks if the missing values are assigned to a special branch + + """ + return any(child.predicate.missing for child in children) + + +def none_value(children): + """Checks if the predicate has a None value + + """ + return any(child.predicate.value is None for child in children) + + +def one_branch(children, input_data): + """Check if there's only one branch to be followed + + """ + missing = split(children) in input_data + return (missing or missing_branch(children) + or none_value(children)) + + +def tableau_string(text): + """Transforms to a string representation in Tableau + + """ + value = repr(text) + if isinstance(text, str): + return value[1:] + return value diff --git a/bigml/tsoutconstants.py b/bigml/tsoutconstants.py new file mode 100644 index 00000000..7903a6f6 --- /dev/null +++ b/bigml/tsoutconstants.py @@ -0,0 +1,236 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" Constants for Time series + +""" + +SUBMODELS_CODE = {"naive": \ +""" + +def _naive_forecast(components, horizon): + \"\"\"Computing the forecast for the naive model + + \"\"\" + return _trivial_forecast(components, horizon) + +""", + "mean": \ +""" +def _mean_forecast(components, horizon): + \"\"\"Computing the forecast for the mean model + + \"\"\" + return _trivial_forecast(submodel, horizon) +""", + "drift": \ +""" + +def _drift_forecast(components, horizon): + \"\"\"Computing the forecast for the drift model + + \"\"\" + points = [] + for h in range(horizon): + points.append(components["value"] + components["slope"] * (h + 1)) + return points +""", + "N": \ +""" + +def _N_forecast(components, horizon, seasonality): + \"\"\"Computing the forecast for the trend=N models + ŷ_t+h|t = l_t + ŷ_t+h|t = l_t + s_f(s, h) (if seasonality = "A") + ŷ_t+h|t = l_t * s_f(s, h) (if seasonality = "M") + \"\"\" + points = [] + l = components.get(\"l\", 0) + s = components.get(\"s\", 0) + for h in range(horizon): + # each season has a different contribution + s_i = season_contribution(s, h) + points.append(OPERATORS[seasonality](l, s_i)) + return points +""", + "A": \ +""" + +def _A_forecast(components, horizon, seasonality): + \"\"\"Computing the forecast for the trend=A models + ŷ_t+h|t = l_t + h * b_t + ŷ_t+h|t = l_t + h * b_t + s_f(s, h) (if seasonality = "A") + ŷ_t+h|t = (l_t + h * b_t) * s_f(s,h) (if seasonality = "M") + \"\"\" + points = [] + l = components.get(\"l\", 0) + b = components.get(\"b\", 0) + s = components.get(\"s\", 0) + for h in range(horizon): + # each season has a different contribution + s_i = season_contribution(s, h) + points.append(OPERATORS[seasonality](l + b * (h + 1), s_i)) + return points +""", + "Ad": \ +""" + +def _Ad_forecast(components, horizon, seasonality): + \"\"\"Computing the forecast for the trend=Ad model + ŷ_t+h|t = l_t + phi_h * b_t + ŷ_t+h|t = l_t + phi_h * b_t + s_f(m, h) (if seasonality = "A") + ŷ_t+h|t = (l_t + phi_h * b_t) * s_f(m, h) (if seasonality = "M") + with phi_0 = phi + phi_1 = phi + phi^2 + phi_h = phi + phi^2 + ... + phi^(h + 1) (for h > 0) + \"\"\" + points = [] + l = components.get(\"l\", 0) + b = components.get(\"b\", 0) + phi = components.get(\"phi\", 0) + s = components.get(\"s\", 0) + phi_h = phi + for h in range(horizon): + # each season has a different contribution + s_i = season_contribution(s, h) + points.append(OPERATORS[seasonality](l + phi_h * b, s_i)) + phi_h = phi_h + pow(phi, h + 2) + return points +""", + "M": \ +""" + +def _M_forecast(components, horizon, seasonality): + \"\"\"Computing the forecast for the trend=M model + ŷ_t+h|t = l_t * b_t^h + ŷ_t+h|t = l_t * b_t^h + s_f(m, h) (if seasonality = "A") + ŷ_t+h|t = (l_t * b_t^h) * s_f(m, h) (if seasonality = "M") + \"\"\" + points = [] + l = components.get(\"l\", 0) + b = components.get(\"b\", 0) + s = components.get(\"s\", 0) + for h in range(horizon): + # each season has a different contribution + s_i = season_contribution(s, h) + points.append(OPERATORS[seasonality](l * pow(b, h + 1), s_i)) + return points +""", + "Md": \ +""" + +def _Md_forecast(components, horizon, seasonality): + \"\"\"Computing the forecast for the trend=Md model + ŷ_t+h|t = l_t + b_t^(phi_h) + ŷ_t+h|t = l_t + b_t^(phi_h) + s_f(m, h) (if seasonality = "A") + ŷ_t+h|t = (l_t + b_t^(phi_h)) * s_f(m, h) (if seasonality = "M") + with phi_0 = phi + phi_1 = phi + phi ^ 2 + phi_h = phi + phi^2 + ... + phi^h (for h > 1) + \"\"\" + points = [] + l = components.get(\"l\", 0) + b = components.get(\"b\", 0) + s = components.get(\"s\", 0) + phi = components.get(\"phi\", 0) + phi_h = phi + for h in range(horizon): + # each season has a different contribution + s_i = season_contribution(s, h) + points.append(OPERATORS[seasonality](l * pow(b, phi_h), s_i)) + phi_h = phi_h + pow(phi, h + 2) + return points + +"""} + +TRIVIAL_MODEL = \ +""" +def _trivial_forecast(components, horizon): + \"\"\"Computing the forecast for the trivial models + + \"\"\" + points = [] + submodel_points = components[\"value\"] + period = len(submodel_points) + if period > 1: + # when a period is used, the points in the model are repeated + for h in range(horizon): + points.append(submodel_points[h % period]) + else: + for _ in range(horizon): + points.append(submodel_points[0]) + return points + + +""" + +SEASONAL_CODE = \ +""" +OPERATORS = {\"A\": lambda x, s: x + s, + \"M\": lambda x, s: x * s, + \"N\": lambda x, s: x} + + +def season_contribution(s_list, step): + \"\"\"Chooses the seasonal contribution from the list in the period + + s_list: The list of contributions per season + step: The actual prediction step + + \"\"\" + if isinstance(s_list, list): + period = len(s_list) + index = abs(- period + 1 + step % period) + return s_list[index] + else: + return 0 + + +""" + +FORECAST_FUNCTION = \ +""" + +def forecast(field, model_name, horizon=50): + \"\"\"Forecast using the user-given model type and horizon + + \"\"\" + components = COMPONENTS.get(field, {}).get(model_name) + if model_name: + if \",\" in model_name: + _, trend, seasonality = model_name.split(",") + return MODELS[trend](components, horizon, seasonality) + else: + return MODELS[model_name](components, horizon) + else: + return {} +""" + +USAGE_DOC = \ +"""\"\"\"Local forecast for BigML's Time Series %s. + +Time Series Forecast by BigML - Machine Learning Made Easy + +Add this code to your project and use the `forecast` function to make +your forecasts: + + forecast(\"%s\", "naive", horizon=10) + +where the first parameter is the field to forecast, the second is the name +of the model to use and the third the number of points to generate. +\"\"\" + +""" diff --git a/bigml/tssubmodels.py b/bigml/tssubmodels.py new file mode 100644 index 00000000..1e055af8 --- /dev/null +++ b/bigml/tssubmodels.py @@ -0,0 +1,192 @@ +# -*- coding: utf-8 -*- +#pylint: disable=invalid-name +# +# Copyright 2017-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Auxiliary module to store the functions to compute time-series forecasts +following the formulae in +https://www.otexts.org/sites/default/files/fpp/images/Table7-8.png +as explained in +https://www.otexts.org/fpp/7/6 +""" + +import inspect +import sys + + +OPERATORS = {"A": lambda x, s: x + s, + "M": lambda x, s: x * s, + "N": lambda x, s: x} + + +def season_contribution(s_list, step): + """Chooses the seasonal contribution from the list in the period + + s_list: The list of contributions per season + step: The actual prediction step + + """ + if isinstance(s_list, list): + period = len(s_list) + index = abs(- period + 1 + step % period) + return s_list[index] + return 0 + + +def trivial_forecast(submodel, horizon): + """Computing the forecast for the trivial models + + """ + points = [] + submodel_points = submodel["value"] + period = len(submodel_points) + if period > 1: + # when a period is used, the points in the model are repeated + for h in range(horizon): + points.append(submodel_points[h % period]) + else: + for _ in range(horizon): + points.append(submodel_points[0]) + return points + + +def naive_forecast(submodel, horizon): + """Computing the forecast for the naive model + + """ + return trivial_forecast(submodel, horizon) + +def mean_forecast(submodel, horizon): + """Computing the forecast for the mean model + + """ + return trivial_forecast(submodel, horizon) + + +def drift_forecast(submodel, horizon): + """Computing the forecast for the drift model + + """ + points = [] + for h in range(horizon): + points.append(submodel["value"] + submodel["slope"] * (h + 1)) + return points + + +def N_forecast(submodel, horizon, seasonality): + """Computing the forecast for the trend=N models + ŷ_t+h|t = l_t + ŷ_t+h|t = l_t + s_f(s, h) (if seasonality = "A") + ŷ_t+h|t = l_t * s_f(s, h) (if seasonality = "M") + """ + points = [] + final_state = submodel.get("final_state", {}) + l = final_state.get("l", 0) + s = final_state.get("s", 0) + for h in range(horizon): + # each season has a different contribution + s_i = season_contribution(s, h) + points.append(OPERATORS[seasonality](l, s_i)) + return points + + +def A_forecast(submodel, horizon, seasonality): + """Computing the forecast for the trend=A models + ŷ_t+h|t = l_t + h * b_t + ŷ_t+h|t = l_t + h * b_t + s_f(s, h) (if seasonality = "A") + ŷ_t+h|t = (l_t + h * b_t) * s_f(s,h) (if seasonality = "M") + """ + points = [] + final_state = submodel.get("final_state", {}) + l = final_state.get("l", 0) + b = final_state.get("b", 0) + s = final_state.get("s", 0) + for h in range(horizon): + # each season has a different contribution + s_i = season_contribution(s, h) + points.append(OPERATORS[seasonality](l + b * (h + 1), s_i)) + return points + + +def Ad_forecast(submodel, horizon, seasonality): + """Computing the forecast for the trend=Ad model + ŷ_t+h|t = l_t + phi_h * b_t + ŷ_t+h|t = l_t + phi_h * b_t + s_f(m, h) (if seasonality = "A") + ŷ_t+h|t = (l_t + phi_h * b_t) * s_f(m, h) (if seasonality = "M") + with phi_0 = phi + phi_1 = phi + phi^2 + phi_h = phi + phi^2 + ... + phi^(h + 1) (for h > 0) + """ + points = [] + final_state = submodel.get("final_state", {}) + l = final_state.get("l", 0) + b = final_state.get("b", 0) + phi = submodel.get("phi", 0) + s = final_state.get("s", 0) + phi_h = phi + for h in range(horizon): + # each season has a different contribution + s_i = season_contribution(s, h) + points.append(OPERATORS[seasonality](l + phi_h * b, s_i)) + phi_h = phi_h + pow(phi, h + 2) + return points + + +def M_forecast(submodel, horizon, seasonality): + """Computing the forecast for the trend=M model + ŷ_t+h|t = l_t * b_t^h + ŷ_t+h|t = l_t * b_t^h + s_f(m, h) (if seasonality = "A") + ŷ_t+h|t = (l_t * b_t^h) * s_f(m, h) (if seasonality = "M") + """ + points = [] + final_state = submodel.get("final_state", {}) + l = final_state.get("l", 0) + b = final_state.get("b", 0) + s = final_state.get("s", 0) + for h in range(horizon): + # each season has a different contribution + s_i = season_contribution(s, h) + points.append(OPERATORS[seasonality](l * pow(b, h + 1), s_i)) + return points + + +def Md_forecast(submodel, horizon, seasonality): + """Computing the forecast for the trend=Md model + ŷ_t+h|t = l_t + b_t^(phi_h) + ŷ_t+h|t = l_t + b_t^(phi_h) + s_f(m, h) (if seasonality = "A") + ŷ_t+h|t = (l_t + b_t^(phi_h)) * s_f(m, h) (if seasonality = "M") + with phi_0 = phi + phi_1 = phi + phi ^ 2 + phi_h = phi + phi^2 + ... + phi^h (for h > 1) + """ + points = [] + final_state = submodel.get("final_state", {}) + l = final_state.get("l", 0) + b = final_state.get("b", 0) + s = final_state.get("s", 0) + phi = submodel.get("phi", 0) + phi_h = phi + for h in range(horizon): + # each season has a different contribution + s_i = season_contribution(s, h) + points.append(OPERATORS[seasonality](l * pow(b, phi_h), s_i)) + phi_h = phi_h + pow(phi, h + 2) + return points + + +SUBMODELS = { + name[0: -9].replace("_", ","): obj for name, obj in + inspect.getmembers(sys.modules[__name__]) + if inspect.isfunction(obj) and name.endswith('_forecast')} diff --git a/bigml/util.py b/bigml/util.py new file mode 100644 index 00000000..df6b5d67 --- /dev/null +++ b/bigml/util.py @@ -0,0 +1,774 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2012-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Miscellaneous utility functions. + +""" + + +import re +import locale +import sys +import os +import json +import math +import random +import ast +import datetime +import logging + +from urllib.parse import urlparse +from unidecode import unidecode + +import msgpack + +import bigml.constants as c + +try: + from pandas import DataFrame + PANDAS_READY = True +except ImportError: + PANDAS_READY = False + +DEFAULT_LOCALE = 'en_US.UTF-8' +WINDOWS_DEFAULT_LOCALE = 'English' +LOCALE_SYNONYMS = { + 'en': [['en_US', 'en-US', 'en_US.UTF8', 'en_US.UTF-8', + 'English_United States.1252', 'en-us', 'en_us', + 'en_US.utf8'], + ['en_GB', 'en-GB', 'en_GB.UTF8', 'en_GB.UTF-8', + 'English_United Kingdom.1252', 'en-gb', 'en_gb', + 'en_GB.utf8']], + 'es': ['es_ES', 'es-ES', 'es_ES.UTF8', 'es_ES.UTF-8', + 'Spanish_Spain.1252', 'es-es', 'es_es', + 'es_ES.utf8'], + 'sp': ['es_ES', 'es-ES', 'es_ES.UTF8', 'es_ES.UTF-8', + 'Spanish_Spain.1252', 'es-es', 'es_es', + 'es_ES.utf8'], + 'fr': [['fr_FR', 'fr-FR', 'fr_BE', 'fr_CH', 'fr-BE', + 'fr-CH', 'fr_FR.UTF8', 'fr_CH.UTF8', + 'fr_BE.UTF8', 'fr_FR.UTF-8', 'fr_CH.UTF-8', + 'fr_BE.UTF-8', 'French_France.1252', 'fr-fr', + 'fr_fr', 'fr-be', 'fr_be', 'fr-ch', 'fr_ch', + 'fr_FR.utf8', 'fr_BE.utf8', 'fr_CH.utf8'], + ['fr_CA', 'fr-CA', 'fr_CA.UTF8', 'fr_CA.UTF-8', + 'French_Canada.1252', 'fr-ca', 'fr_ca', + 'fr_CA.utf8']], + 'de': ['de_DE', 'de-DE', 'de_DE.UTF8', 'de_DE.UTF-8', + 'German_Germany.1252', 'de-de', 'de_de', + 'de_DE.utf8'], + 'ge': ['de_DE', 'de-DE', 'de_DE.UTF8', 'de_DE.UTF-8', + 'German_Germany.1252', 'de-de', 'de_de', + 'de_DE.utf8'], + 'it': ['it_IT', 'it-IT', 'it_IT.UTF8', 'it_IT.UTF-8', + 'Italian_Italy.1252', 'it-it', 'it_it', + 'it_IT.utf8'], + 'ca': ['ca_ES', 'ca-ES', 'ca_ES.UTF8', 'ca_ES.UTF-8', + 'Catalan_Spain.1252', 'ca-es', 'ca_es', + 'ca_ES.utf8']} + +BOLD_REGEX = re.compile(r'''(\*\*)(?=\S)([^\r]*?\S[*_]*)\1''') +ITALIC_REGEX = re.compile(r'''(_)(?=\S)([^\r]*?\S)\1''') +LINKS_REGEX = re.compile((r'''(\[((?:\[[^\]]*\]|[^\[\]])*)\]\([ \t]*()''' + r'''?[ \t]*((['"])(.*?)\6[ \t]*)?\))'''), + re.MULTILINE) +TYPE_MAP = { + "categorical": str, + "numeric": locale.atof, + "text": str, + "items": str +} + +PYTHON_TYPE_MAP = { + "categorical": [str, str], + "numeric": [int, float], + "text": [str, str], + "items": [str, str] +} + +PREDICTIONS_FILE_SUFFIX = '_predictions.csv' + +PROGRESS_BAR_WIDTH = 50 + +HTTP_INTERNAL_SERVER_ERROR = 500 + +PRECISION = 5 + +NUMERIC = "numeric" + +DFT_STORAGE = "./storage" +DFT_STORAGE_FILE = os.path.join(DFT_STORAGE, "BigML_%s.json") + +DECIMAL_DIGITS = 5 + + +def python_map_type(value): + """Maps a BigML type to equivalent Python types. + + """ + if value in PYTHON_TYPE_MAP: + return PYTHON_TYPE_MAP[value] + return [str, str] + + +def invert_dictionary(dictionary, field='name'): + """Inverts a dictionary. + + Useful to make predictions using fields' names instead of Ids. + It does not check whether new keys are duplicated though. + + """ + return {value[field]: key for key, value in dictionary.items()} + + +def localize(number): + """Localizes `number` to show commas appropriately. + + """ + return locale.format_string("%d", number, grouping=True) + + +def is_url(value): + """Returns True if value is a valid URL. + + """ + url = isinstance(value, str) and urlparse(value) + return url and url.scheme and url.netloc and url.path + + +def is_in_progress(resource): + """Returns True if the resource has no error and has not finished yet + + """ + return resource.get("error") is None \ + and get_status(resource).get("code") != c.FINISHED + + +def markdown_cleanup(text): + """Returns the text without markdown codes + + """ + def cleanup_bold_and_italic(text): + """Removes from text bold and italic markdowns + + """ + text = BOLD_REGEX.sub(r'''\2''', text) + text = ITALIC_REGEX.sub(r'''\2''', text) + return text + + def links_to_footer(text): + """Removes from text links and adds them as footer + + """ + links_found = re.findall(LINKS_REGEX, text) + text = LINKS_REGEX.sub(r'''\2[*]''', text) + text = '%s\n%s' % (text, '\n'.join(['[*]%s: %s' % (link[1], link[3]) + for link in links_found])) + return text + + new_line_regex = re.compile('(\n{2,})', re.DOTALL) + text = new_line_regex.sub('\n', text) + text = cleanup_bold_and_italic(text) + text = links_to_footer(text) + return text + + +def prefix_as_comment(comment_prefix, text): + """Adds comment prefixes to new lines in comments + + """ + return text.replace('\n', '\n' + comment_prefix) + + +def utf8(bytes_str): + """Returns utf-8 string for bytes or string objects + + """ + try: + return str(bytes_str, 'utf-8') + except TypeError: + return bytes_str + + +def map_type(value): + """Maps a BigML type to a Python type. + + """ + if value in TYPE_MAP: + return TYPE_MAP[value] + return str + + +def locale_synonyms(main_locale, locale_alias): + """Returns True if both strings correspond to equivalent locale conventions + + """ + language_code = main_locale[0:2] + if language_code not in LOCALE_SYNONYMS: + return False + alternatives = LOCALE_SYNONYMS[language_code] + if isinstance(alternatives[0], str): + return locale_alias in alternatives + result = False + for subgroup in alternatives: + result = locale_alias in subgroup + break + return result + + +def bigml_locale(locale_alias): + """Returns the locale used in bigml.com for the given locale_alias + + The result is the locale code used in bigml.com provided that + the locale user code has been correctly mapped. None otherwise. + """ + language_code = locale_alias.lower()[0:2] + if language_code not in LOCALE_SYNONYMS: + return None + alternatives = LOCALE_SYNONYMS[language_code] + if isinstance(alternatives[0], str): + return (alternatives[0] if locale_alias in alternatives + else None) + result = None + for subgroup in alternatives: + if locale_alias in subgroup: + result = subgroup[0] + break + return result + + +def find_locale(data_locale=DEFAULT_LOCALE, verbose=False): + """Looks for the given locale or the closest alternatives + + """ + new_locale = None + try: + data_locale = str(data_locale) + except UnicodeEncodeError: + data_locale = data_locale.encode("utf8") + try: + new_locale = locale.setlocale(locale.LC_NUMERIC, data_locale) + except locale.Error: + pass + if new_locale is None: + for locale_alias in LOCALE_SYNONYMS.get(data_locale[0:2], []): + if isinstance(locale_alias, list): + for subalias in locale_alias: + try: + new_locale = locale.setlocale(locale.LC_NUMERIC, subalias) + break + except locale.Error: + pass + if new_locale is not None: + break + else: + try: + new_locale = locale.setlocale(locale.LC_NUMERIC, locale_alias) + break + except locale.Error: + pass + if new_locale is None: + try: + new_locale = locale.setlocale(locale.LC_NUMERIC, DEFAULT_LOCALE) + except locale.Error: + pass + if new_locale is None: + try: + new_locale = locale.setlocale(locale.LC_NUMERIC, + WINDOWS_DEFAULT_LOCALE) + except locale.Error: + pass + if new_locale is None: + new_locale = locale.setlocale(locale.LC_NUMERIC, '') + + if verbose and not locale_synonyms(data_locale, new_locale): + print(("WARNING: Unable to find %s locale, using %s instead. This " + "might alter numeric fields values.\n") % (data_locale, + new_locale)) + + +def asciify(name): + """Translating to ascii and underscores """ + + if len(name) == 0: + # case of empty name? + return name + + name = unidecode(name).lower() + name = re.sub(r'\W+', '_', name) + return name + + +def res_filename(storage_dir, resource_id, extension=None): + """Returns a filename from a resource id""" + basename = asciify(resource_id) + if extension is None: + extension = "" + basename = f"{basename}{extension}" + filename = os.path.join(storage_dir, basename) + return filename + + +def fs_cache_get(storage_dir, minimized=True): + """Returns a function that retrieves a minimized resource from the file + system + """ + extension = ".min" if minimized else "" + def cache_get(resource_id): + filename = res_filename(storage_dir, asciify(resource_id), extension) + if not os.path.exists(filename): + raise ValueError(f"Failed to find the dump file {filename}.") + with open(filename, "rb") as handler: + return handler.read() + + return cache_get + + +def fs_cache_set(storage_dir, minimized=True): + """Returns a function that stores a minimized resource in the file system """ + extension = ".min" if minimized else "" + check_dir(storage_dir) + + def cache_set(resource_id, msg): + filename = res_filename(storage_dir, asciify(resource_id), extension) + with open(filename, "wb") as handler: + handler.write(msg) + return filename + + return cache_set + + +def get_predictions_file_name(model, path): + """Returns the file name for a multimodel predictions file + + """ + if isinstance(model, dict) and 'resource' in model: + model = model['resource'] + filename = res_filename(path, model) + return f"{filename}_{PREDICTIONS_FILE_SUFFIX}" + + +def clear_console_line(out=sys.stdout, length=PROGRESS_BAR_WIDTH): + """Fills console line with blanks. + + """ + out.write("%s" % (" " * length)) + out.flush() + + +def reset_console_line(out=sys.stdout, length=PROGRESS_BAR_WIDTH): + """Returns cursor to first column. + + """ + out.write("\b" * (length + 1)) + out.flush() + + +def console_log(message, out=sys.stdout, length=PROGRESS_BAR_WIDTH, + reset=False): + """Prints the message to the given output + + :param out: output handler + :param length: maximum length + :param reset: whether the line has to be reused and cursor reset to + the beggining of it + """ + if reset: + clear_console_line(out=out, length=length) + reset_console_line(out=out, length=length) + out.write(message) + if reset: + reset_console_line(out=out, length=length) + + +def get_csv_delimiter(): + """Returns the csv delimiter character + + """ + point_char = locale.localeconv()['decimal_point'] + return ',' if point_char != ',' else ';' + + +def strip_affixes(value, field): + """Strips prefixes and suffixes if present + + """ + if not isinstance(value, str): + value = str(value, "utf-8") + if 'prefix' in field and value.startswith(field['prefix']): + value = value[len(field['prefix']):] + if 'suffix' in field and value.endswith(field['suffix']): + value = value[0:-len(field['suffix'])] + return value + + +def cast(input_data, fields): + """Checks expected type in input data values, strips affixes and casts + + """ + for (key, value) in list(input_data.items()): + # inputs not in fieldsor empty + if key not in fields or value is None: + continue + # strings given as booleans + if isinstance(value, bool) and \ + fields[key]['optype'] == 'categorical' and \ + len(fields[key]['summary']['categories']) == 2: + try: + booleans = {} + categories = [category for category, _ in \ + fields[key]['summary']['categories']] + # checking which string represents the boolean + for category in categories: + bool_key = 'True' if ast.literal_eval( \ + category.capitalize()) else 'False' + booleans[bool_key] = category + # converting boolean to the corresponding string + input_data.update({key: booleans[str(value)]}) + except ValueError: + raise ValueError("Mismatch input data type in field " + "\"%s\" for value %s. String expected" % + (fields[key]['name'], value)) + # numerics given as strings + elif ( + (fields[key]['optype'] == NUMERIC and + isinstance(value, str)) or + (fields[key]['optype'] != NUMERIC and + not isinstance(value, str))): + try: + if fields[key]['optype'] == NUMERIC: + value = strip_affixes(value, fields[key]) + input_data.update({key: + map_type(fields[key] + ['optype'])(value)}) + except ValueError: + raise ValueError("Mismatch input data type in field " + "\"%s\" for value %s." % + (fields[key]['name'], + value)) + elif (fields[key]['optype'] == NUMERIC and + isinstance(value, bool)): + raise ValueError("Mismatch input data type in field " + "\"%s\" for value %s. Numeric expected." % + (fields[key]['name'], value)) + if fields[key]['optype'] == NUMERIC and isinstance(value, float): + input_data.update({key: round(value, DECIMAL_DIGITS)}) + + +def check_dir(path): + """Creates a directory if it doesn't exist + + """ + if os.path.exists(path): + if not os.path.isdir(path): + raise ValueError("The given path is not a directory") + elif len(path) > 0: + os.makedirs(path) + return path + + +def resource_structure(code, resource_id, location, resource, error): + """Returns the corresponding JSON structure for a resource + + """ + return { + 'code': code, + 'resource': resource_id, + 'location': location, + 'object': resource, + 'error': error} + + +def empty_resource(): + """Creates an empty resource JSON structure + + """ + return resource_structure( + HTTP_INTERNAL_SERVER_ERROR, + None, + None, + None, + { + "status": { + "code": HTTP_INTERNAL_SERVER_ERROR, + "message": "The resource couldn't be created"}}) + + +def get_status(resource): + """Extracts status info if present or sets the default if public + + """ + if not isinstance(resource, dict): + raise ValueError("We need a complete resource to extract its status") + if 'object' in resource: + if resource['object'] is None: + raise ValueError("The resource has no status info\n%s" % resource) + resource = resource['object'] + if not resource.get('private', True) or resource.get('status') is None: + status = {'code': c.FINISHED} + else: + status = resource['status'] + return status + + +def maybe_save(resource_id, path, + code=None, location=None, + resource=None, error=None): + """Builds the resource dict response and saves it if a path is provided. + + The resource is saved in a local repo json file in the given path. + Only final resources are stored. Final resources should be FINISHED or + FAILED + + """ + resource = resource_structure(code, resource_id, location, resource, error) + if resource_id is not None and path is not None and \ + is_status_final(resource): + resource_file_name = "%s%s%s" % (path, os.sep, + resource_id.replace('/', '_')) + save_json(resource, resource_file_name) + return resource + + +def is_status_final(resource): + """Try whether a resource is in a final state + + """ + status = {} + try: + status = get_status(resource) + except ValueError: + pass + return status.get('code') in [c.FINISHED, c.FAULTY] + + +def save_json(resource, path): + """Stores the resource in the user-given path in a JSON format + + """ + try: + resource_json = json.dumps(resource) + return save(resource_json, path) + except ValueError: + print("The resource has an invalid JSON format") + except IOError: + print("Failed writing resource to %s" % path) + return None + + +def save(content, path): + """Stores content in an utf-8 file + + """ + if path is None: + datestamp = datetime.datetime.now().strftime("%a%b%d%y_%H%M%S") + path = DFT_STORAGE_FILE % datestamp + check_dir(os.path.dirname(path)) + with open(path, "wb", 0) as file_handler: + content = content.encode('UTF-8') + file_handler.write(content) + return path + + +def plural(text, num): + """Pluralizer: adds "s" at the end of a string if a given number is > 1 + + """ + return "%s%s" % (text, "s"[num == 1:]) + + +def get_exponential_wait(wait_time, retry_count): + """Computes the exponential wait time used in next request using the + base values provided by the user: + - wait_time: starting wait time (seconds) + - retries_count: number of retries + + """ + delta = (retry_count ** 2) * wait_time / 2 + exp_factor = delta if retry_count > 1 else 0 + return wait_time + math.floor(random.random() * exp_factor) + + +def check_no_missing_numerics(input_data, fields, weight_field=None): + """Checks whether some numeric fields are missing in the input data + + """ + for field_id, field in list(fields.items()): + if (field['optype'] == NUMERIC and (weight_field is None or \ + field_id != weight_field) and \ + not field_id in input_data): + raise ValueError("Failed to predict. Input" + " data must contain values for all numeric" + " fields to get a prediction.") + +#pylint: disable=locally-disabled,too-many-boolean-expressions +def check_no_training_missings(input_data, fields, weight_field=None, + objective_id=None): + """Checks whether some input fields are missing in the input data + while not training data has no missings in that field + + """ + for field_id, field in fields.items(): + if (field["optype"] != "datetime" and \ + field_id not in input_data and \ + field['summary']['missing_count'] == 0 and \ + (weight_field is None or field_id != weight_field) and \ + (objective_id is None or field_id != objective_id)): + raise ValueError("Failed to predict. Input" + " data must contain values for field '%s' " + "to get a prediction." % field['name']) + + +def flatten(inner_array): + """ Flattens an array with inner arrays + + """ + new_array = [] + + for element in inner_array: + if isinstance(element, list): + new_array.extend(element) + else: + new_array.append(element) + + return new_array + + +def use_cache(cache_get): + """Checks whether the user has provided a cache get function to retrieve + local models. + + """ + return cache_get is not None and hasattr(cache_get, '__call__') + + +def dump(local_attrs, output=None, cache_set=None): + """Uses msgpack to serialize the local resource object + If cache_set is filled with a cache set method, the method is called + + """ + if use_cache(cache_set): + dump_string = msgpack.dumps(local_attrs) + cache_set(local_attrs["resource_id"], dump_string) + else: + msgpack.pack(local_attrs, output) + + +def dumps(local_attrs): + """Uses msgpack to serialize the anomaly object to a string + + """ + + return msgpack.dumps(local_attrs) + + +def load(resource_id, cache_get): + """Uses msgpack to load the resource stored by ID + + """ + + return msgpack.loads(cache_get(resource_id)) + + +def filter_by_extension(file_list, extension_list): + """Returns the files that match the given extensions + + """ + return [filename for filename in file_list if + os.path.splitext(filename)[1].replace(".", "").lower() + in extension_list] + + +def infer_field_type(field, value): + """Returns a dictionary containing the name and optype of the objective + field as inferred from the corresponding value + """ + if isinstance(value, str): + optype = "categorical" + elif isinstance(value, list): + optype = "regions" + else: + optype = "numeric" + return {"name": field, "optype": optype} + + +def is_image(filename): + """Checking whether the file is an image based on its extension """ + return os.path.splitext(filename)[1].replace(".", "").lower() \ + in c.IMAGE_EXTENSIONS + + +def get_data_format(input_data_list): + """Returns the format used in input_data_list: DataFrame or + list of dicts. + + """ + if PANDAS_READY and isinstance(input_data_list, DataFrame): + return c.DATAFRAME + if isinstance(input_data_list, list) and (len(input_data_list) == 0 or + isinstance(input_data_list[0], dict)): + return c.INTERNAL + raise ValueError("Data is expected to be provided as a list of " + "dictionaries or Pandas' DataFrame.") + + +#pylint: disable=locally-disabled,comparison-with-itself +def format_data(input_data_list, out_format=None): + """Transforms the input data format to the one expected """ + if out_format == c.DATAFRAME: + input_data_list = DataFrame.from_dict(input_data_list) + elif out_format == c.INTERNAL: + input_data_list = input_data_list.to_dict('records') + # pandas nan, NaN, etc. outputs need to be changed to None + for row in input_data_list: + for key, value in row.items(): + if value != value: + row[key] = None + return input_data_list + + +def get_formatted_data(input_data_list, out_format=None): + """Checks the type of data and transforms if needed """ + current_format = get_data_format(input_data_list) + if current_format != out_format: + inner_data_list = format_data(input_data_list, out_format) + else: + inner_data_list = input_data_list.copy() + return inner_data_list + + +#pylint: disable=locally-disabled,import-outside-toplevel +def get_data_transformations(resource_id, parent_id): + """Returns the pipeline that contains the tranformations and derived + features created from the raw data to the actual resource. + + """ + if parent_id is None: + raise ValueError("Failed to find the dataset information " + "needed to buid the data transformations " + "pipeline.") + from bigml.pipeline.pipeline import BMLPipeline + return BMLPipeline("dt-%s" % resource_id, [parent_id]) + + +def sensenet_logging(): + """Removes warnings unnecessary logging when using sensenet""" + logging.disable(logging.WARNING) + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' + os.environ["TF_USE_LEGACY_KERAS"] = "1" + import tensorflow as tf + tf.autograph.set_verbosity(0) + logging.getLogger("tensorflow").setLevel(logging.ERROR) diff --git a/bigml/version.py b/bigml/version.py new file mode 100644 index 00000000..68512901 --- /dev/null +++ b/bigml/version.py @@ -0,0 +1 @@ +__version__ = '9.8.3' diff --git a/bigml/webhooks.py b/bigml/webhooks.py new file mode 100644 index 00000000..a1f762e5 --- /dev/null +++ b/bigml/webhooks.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2022-2025 BigML +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Utilities for webhooks + +""" +import json +import hmac + +try: + from hashlib import sha1 +except ImportError: + import sha + sha1 = sha.sha + + +SORTING_SEQUENCE = ["timestamp", "message", "resource", "event"] + + +def dict_to_msg(obj): + """Builds a representation of the dict object in a specific key sequence""" + pair_list = [] + for key in SORTING_SEQUENCE: + pair_list.append("'%s': '%s'" % (key, obj.get(key))) + return "{%s}" % ", ".join(pair_list) + + +def compute_signature(msg, secret, encoding="utf-8"): + """Computes the signature used by BigML when issuing the webhook call""" + return hmac.new( + secret.encode(encoding), + msg=msg.encode(encoding), + digestmod=sha1 + ).hexdigest() + + +def check_signature(request, secret): + """Checks the signature when the webhook has been given one""" + sig_header = request.meta['HTTP_X_BIGML_SIGNATURE'].replace('sha1=', '') + payload = request.body + computed_sig = compute_signature(payload, secret) + if sig_header == computed_sig: + return True + # code for old version of the msg hash + payload = dict_to_msg(json.loads(payload)) + computed_sig = compute_signature(payload, secret) + if sig_header == computed_sig: + return True + return False diff --git a/data/associations/association_set.json b/data/associations/association_set.json new file mode 100644 index 00000000..f11d8d02 --- /dev/null +++ b/data/associations/association_set.json @@ -0,0 +1 @@ +[{"item": {"complement": false, "count": 16, "field_id": "000000", "name": "hygiene articles"}, "rules": ["000002"], "score": 0.01609}] diff --git a/data/associations/association_set2.json b/data/associations/association_set2.json new file mode 100644 index 00000000..828ae91a --- /dev/null +++ b/data/associations/association_set2.json @@ -0,0 +1 @@ +[{"rules": ["000001", "000005"], "item": {"count": 434, "name": "Bin 2", "complement": false,"field_id": "000001-0", "bin_end": 1957, "bin_start": 1930}, "score": 0.03419},{"rules": ["000002", "000003"],"item": {"count": 408, "name": "Bin 4", "complement": false, "field_id": "000001-6", "bin_end": 47, "bin_start": 34},"score": 0.0151},{"rules": ["000004"],"item": {"count": 340, "name": "Bin 4", "complement": false,"field_id": "000001-4", "bin_end": 19, "bin_start": 15},"score": 0.00628}] diff --git a/data/batch_predictions.csv b/data/batch_predictions.csv new file mode 100644 index 00000000..f8cecc8f --- /dev/null +++ b/data/batch_predictions.csv @@ -0,0 +1,151 @@ +species +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica diff --git a/data/batch_predictions_a.csv b/data/batch_predictions_a.csv new file mode 100644 index 00000000..2c164820 --- /dev/null +++ b/data/batch_predictions_a.csv @@ -0,0 +1,201 @@ +score +0.49537 +0.46618 +0.44796 +0.42834 +0.42808 +0.43523 +0.62275 +0.4323 +0.3999 +0.56314 +0.47397 +0.40137 +0.38877 +0.40983 +0.3871 +0.37888 +0.35897 +0.38805 +0.4286 +0.33802 +0.41261 +0.34261 +0.36766 +0.37197 +0.35655 +0.41924 +0.37357 +0.48455 +0.34346 +0.36496 +0.28713 +0.33885 +0.47982 +0.39843 +0.30417 +0.31869 +0.32342 +0.30454 +0.32401 +0.36631 +0.37083 +0.33885 +0.42887 +0.35985 +0.36947 +0.37749 +0.36184 +0.32581 +0.45965 +0.37564 +0.53749 +0.38829 +0.40434 +0.48634 +0.33595 +0.29138 +0.28502 +0.50087 +0.35875 +0.42416 +0.32045 +0.25853 +0.38074 +0.33989 +0.37129 +0.27489 +0.36541 +0.37934 +0.42677 +0.39867 +0.41084 +0.4195 +0.31422 +0.45127 +0.48724 +0.40409 +0.41261 +0.4371 +0.48188 +0.3738 +0.42494 +0.38734 +0.48574 +0.46647 +0.45768 +0.39212 +0.41159 +0.39575 +0.41059 +0.35875 +0.33329 +0.47455 +0.44006 +0.35612 +0.48396 +0.40833 +0.53518 +0.44522 +0.33637 +0.34493 +0.43844 +0.37495 +0.34325 +0.30548 +0.43656 +0.3999 +0.32661 +0.34472 +0.37679 +0.40883 +0.28572 +0.34855 +0.35853 +0.31077 +0.31135 +0.38356 +0.49629 +0.43844 +0.35612 +0.40285 +0.31771 +0.39941 +0.27556 +0.31288 +0.29031 +0.37334 +0.32862 +0.41821 +0.69802 +0.49446 +0.49629 +0.34304 +0.3522 +0.31947 +0.36206 +0.34919 +0.35459 +0.41438 +0.41059 +0.48277 +0.47894 +0.30811 +0.41591 +0.35985 +0.44222 +0.28293 +0.2838 +0.2776 +0.55525 +0.51143 +0.45993 +0.48307 +0.48426 +0.44878 +0.27607 +0.30324 +0.29174 +0.36721 +0.31635 +0.3038 +0.34578 +0.44878 +0.38687 +0.33493 +0.32983 +0.4315 +0.41847 +0.34684 +0.37038 +0.36095 +0.33802 +0.37426 +0.37656 +0.38051 +0.44114 +0.31732 +0.4323 +0.32441 +0.42286 +0.41642 +0.38121 +0.43018 +0.3902 +0.43844 +0.35568 +0.41438 +0.37865 +0.34791 +0.45377 +0.43737 +0.315 +0.36856 +0.36318 +0.43979 +0.32401 +0.38379 +0.45544 +0.33864 +0.33761 +0.38356 diff --git a/data/batch_predictions_c.csv b/data/batch_predictions_c.csv new file mode 100644 index 00000000..2395069a --- /dev/null +++ b/data/batch_predictions_c.csv @@ -0,0 +1,201 @@ +cluster +Cluster 1 +Cluster 0 +Cluster 5 +Cluster 0 +Cluster 7 +Cluster 5 +Cluster 0 +Cluster 4 +Cluster 2 +Cluster 6 +Cluster 5 +Cluster 5 +Cluster 6 +Cluster 2 +Cluster 3 +Cluster 4 +Cluster 3 +Cluster 5 +Cluster 0 +Cluster 0 +Cluster 3 +Cluster 5 +Cluster 5 +Cluster 1 +Cluster 1 +Cluster 1 +Cluster 5 +Cluster 0 +Cluster 6 +Cluster 5 +Cluster 6 +Cluster 3 +Cluster 0 +Cluster 5 +Cluster 1 +Cluster 0 +Cluster 5 +Cluster 1 +Cluster 0 +Cluster 7 +Cluster 3 +Cluster 5 +Cluster 6 +Cluster 3 +Cluster 5 +Cluster 7 +Cluster 0 +Cluster 0 +Cluster 1 +Cluster 4 +Cluster 0 +Cluster 0 +Cluster 0 +Cluster 3 +Cluster 3 +Cluster 0 +Cluster 3 +Cluster 7 +Cluster 7 +Cluster 0 +Cluster 4 +Cluster 5 +Cluster 0 +Cluster 0 +Cluster 5 +Cluster 0 +Cluster 7 +Cluster 6 +Cluster 0 +Cluster 0 +Cluster 0 +Cluster 0 +Cluster 5 +Cluster 0 +Cluster 0 +Cluster 0 +Cluster 5 +Cluster 0 +Cluster 4 +Cluster 0 +Cluster 5 +Cluster 7 +Cluster 6 +Cluster 7 +Cluster 3 +Cluster 1 +Cluster 1 +Cluster 0 +Cluster 6 +Cluster 1 +Cluster 1 +Cluster 1 +Cluster 3 +Cluster 0 +Cluster 1 +Cluster 7 +Cluster 6 +Cluster 5 +Cluster 6 +Cluster 0 +Cluster 0 +Cluster 0 +Cluster 0 +Cluster 3 +Cluster 0 +Cluster 5 +Cluster 6 +Cluster 0 +Cluster 0 +Cluster 0 +Cluster 0 +Cluster 3 +Cluster 5 +Cluster 5 +Cluster 7 +Cluster 1 +Cluster 0 +Cluster 2 +Cluster 3 +Cluster 4 +Cluster 0 +Cluster 5 +Cluster 0 +Cluster 1 +Cluster 3 +Cluster 4 +Cluster 0 +Cluster 0 +Cluster 4 +Cluster 0 +Cluster 5 +Cluster 0 +Cluster 2 +Cluster 1 +Cluster 1 +Cluster 0 +Cluster 0 +Cluster 3 +Cluster 3 +Cluster 6 +Cluster 0 +Cluster 1 +Cluster 0 +Cluster 0 +Cluster 3 +Cluster 1 +Cluster 6 +Cluster 1 +Cluster 0 +Cluster 0 +Cluster 5 +Cluster 0 +Cluster 0 +Cluster 5 +Cluster 3 +Cluster 0 +Cluster 6 +Cluster 0 +Cluster 0 +Cluster 1 +Cluster 0 +Cluster 5 +Cluster 1 +Cluster 0 +Cluster 0 +Cluster 5 +Cluster 1 +Cluster 1 +Cluster 3 +Cluster 7 +Cluster 3 +Cluster 6 +Cluster 5 +Cluster 0 +Cluster 0 +Cluster 2 +Cluster 1 +Cluster 7 +Cluster 1 +Cluster 6 +Cluster 0 +Cluster 6 +Cluster 0 +Cluster 1 +Cluster 5 +Cluster 1 +Cluster 0 +Cluster 0 +Cluster 6 +Cluster 2 +Cluster 3 +Cluster 3 +Cluster 1 +Cluster 0 +Cluster 0 +Cluster 0 +Cluster 0 +Cluster 0 +Cluster 0 +Cluster 2 diff --git a/data/batch_predictions_e_c0.csv b/data/batch_predictions_e_c0.csv new file mode 100644 index 00000000..0fdae12e --- /dev/null +++ b/data/batch_predictions_e_c0.csv @@ -0,0 +1,151 @@ +species +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-versicolor +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-versicolor +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica diff --git a/data/batch_predictions_e_c1.csv b/data/batch_predictions_e_c1.csv new file mode 100644 index 00000000..0f2f15e0 --- /dev/null +++ b/data/batch_predictions_e_c1.csv @@ -0,0 +1,151 @@ +species,confidence +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-setosa,0.89479 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.86305 +Iris-versicolor,0.90992 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-virginica,0.89711 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.84781 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.91112 +Iris-versicolor,0.90817 +Iris-versicolor,0.86305 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-versicolor,0.90817 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-versicolor,0.91605 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-virginica,0.84812 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-versicolor,0.90662 +Iris-virginica,0.88465 +Iris-virginica,0.79185 +Iris-virginica,0.88465 +Iris-virginica,0.79185 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-virginica,0.77084 +Iris-virginica,0.79185 +Iris-virginica,0.88465 +Iris-virginica,0.8122 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-versicolor,0.90662 +Iris-versicolor,0.90678 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-virginica,0.77084 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-virginica,0.84812 +Iris-virginica,0.88465 +Iris-virginica,0.88465 +Iris-virginica,0.88465 diff --git a/data/batch_predictions_e_c2.csv b/data/batch_predictions_e_c2.csv new file mode 100644 index 00000000..7e467b9f --- /dev/null +++ b/data/batch_predictions_e_c2.csv @@ -0,0 +1,151 @@ +species,confidence +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-setosa,0.97711 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.97684 +Iris-versicolor,0.73084 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.48456 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.72886 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.52644 +Iris-versicolor,0.98038 +Iris-versicolor,0.97684 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-versicolor,0.98038 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.49945 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.97064 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-versicolor,0.73557 +Iris-virginica,0.97487 +Iris-virginica,0.96336 +Iris-virginica,0.97487 +Iris-virginica,0.96336 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.69812 +Iris-virginica,0.96336 +Iris-virginica,0.97487 +Iris-virginica,0.51538 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-versicolor,0.73557 +Iris-virginica,0.50482 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.69812 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.97064 +Iris-virginica,0.97487 +Iris-virginica,0.97487 +Iris-virginica,0.97487 diff --git a/data/batch_predictions_e_o_k_c.csv b/data/batch_predictions_e_o_k_c.csv new file mode 100644 index 00000000..37a15c5c --- /dev/null +++ b/data/batch_predictions_e_o_k_c.csv @@ -0,0 +1,151 @@ +species,confidence +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.79798 +Iris-versicolor,0.72785 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-virginica,0.35884 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.61511 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.54664 +Iris-versicolor,0.90807 +Iris-versicolor,0.79798 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-versicolor,0.36642 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.83698 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-versicolor,0.7252 +Iris-virginica,0.88446 +Iris-virginica,0.74808 +Iris-virginica,0.88446 +Iris-virginica,0.74808 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.57567 +Iris-virginica,0.74808 +Iris-virginica,0.88446 +Iris-virginica,0.39452 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-versicolor,0.7252 +Iris-versicolor,0.3627 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.57567 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.83698 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 diff --git a/data/batch_predictions_e_o_k_p.csv b/data/batch_predictions_e_o_k_p.csv new file mode 100644 index 00000000..a7350d3e --- /dev/null +++ b/data/batch_predictions_e_o_k_p.csv @@ -0,0 +1,151 @@ +species,probability +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-setosa,0.97952 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.94412 +Iris-versicolor,0.82098 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.46382 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.74966 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.59575 +Iris-versicolor,0.98394 +Iris-versicolor,0.94412 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-versicolor,0.98394 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.48407 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.96524 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-versicolor,0.78948 +Iris-virginica,0.97711 +Iris-virginica,0.93511 +Iris-virginica,0.97711 +Iris-virginica,0.93511 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.76501 +Iris-virginica,0.93511 +Iris-virginica,0.97711 +Iris-virginica,0.51824 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-versicolor,0.78948 +Iris-versicolor,0.47315 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.76501 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.96524 +Iris-virginica,0.97711 +Iris-virginica,0.97711 +Iris-virginica,0.97711 diff --git a/data/batch_predictions_e_o_k_v.csv b/data/batch_predictions_e_o_k_v.csv new file mode 100644 index 00000000..7ea07fb6 --- /dev/null +++ b/data/batch_predictions_e_o_k_v.csv @@ -0,0 +1,151 @@ +species,confidence +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-setosa,0.89474 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.79798 +Iris-versicolor,0.90981 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.29709 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.76889 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.91106 +Iris-versicolor,0.90807 +Iris-versicolor,0.79798 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-versicolor,0.90807 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.43023 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.83698 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-versicolor,0.9065 +Iris-virginica,0.88446 +Iris-virginica,0.74808 +Iris-virginica,0.88446 +Iris-virginica,0.74808 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.71959 +Iris-virginica,0.74808 +Iris-virginica,0.88446 +Iris-virginica,0.65753 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-versicolor,0.9065 +Iris-virginica,0.43204 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.71959 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.83698 +Iris-virginica,0.88446 +Iris-virginica,0.88446 +Iris-virginica,0.88446 diff --git a/data/batch_predictions_fs.csv b/data/batch_predictions_fs.csv new file mode 100644 index 00000000..f8cecc8f --- /dev/null +++ b/data/batch_predictions_fs.csv @@ -0,0 +1,151 @@ +species +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica diff --git a/data/batch_predictions_linear.csv b/data/batch_predictions_linear.csv new file mode 100644 index 00000000..35711c21 --- /dev/null +++ b/data/batch_predictions_linear.csv @@ -0,0 +1,81 @@ +Final +56.77203 +72.6303 +43.26354 +59.14052 +91.52395 +87.40438 +65.72696 +56.11003 +46.53398 +58.6856 +73.20647 +71.6498 +75.28521 +80.36787 +44.65418 +81.21957 +46.6145 +44.42817 +66.64971 +78.79405 +62.67892 +61.66299 +43.5824 +55.14265 +71.48793 +78.06192 +86.11151 +66.31278 +79.66063 +74.16794 +71.10653 +70.33398 +53.06595 +94.13455 +65.62536 +72.04855 +54.13917 +48.89832 +63.79246 +85.44121 +58.67188 +97.4077 +79.49599 +76.9121 +60.30535 +45.56268 +50.58525 +84.14384 +91.81501 +66.87383 +76.49076 +62.5023 +91.61738 +87.10287 +49.00698 +62.04755 +52.4874 +45.31577 +47.83327 +46.21957 +41.6781 +51.08073 +68.62204 +86.22031 +88.12945 +90.3396 +64.23275 +58.66493 +93.50621 +57.09357 +82.75746 +87.89552 +75.01704 +64.87885 +59.81544 +94.32704 +68.42164 +70.44569 +82.73946 +63.17056 diff --git a/data/batch_predictions_lr.csv b/data/batch_predictions_lr.csv new file mode 100644 index 00000000..906698a6 --- /dev/null +++ b/data/batch_predictions_lr.csv @@ -0,0 +1,151 @@ +species +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-setosa +Iris-versicolor +Iris-virginica +Iris-virginica +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-virginica +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-virginica +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-virginica +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-virginica +Iris-virginica +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-versicolor +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-versicolor +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-versicolor +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-versicolor +Iris-versicolor +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica +Iris-virginica diff --git a/data/batch_projections.csv b/data/batch_projections.csv new file mode 100644 index 00000000..cef2114c --- /dev/null +++ b/data/batch_projections.csv @@ -0,0 +1,151 @@ +PC1,PC2,PC3,PC4,PC5,PC6 +2.6374,-0.60267,-0.06371,0.12306,0.02495,-0.03675 +2.46125,0.15601,0.75994,0.37898,-0.12151,0.01077 +2.70872,-0.06725,0.571,0.03432,-0.04149,0.02122 +2.64822,0.09775,0.75853,0.02673,-0.03058,0.13881 +2.74687,-0.71519,-0.15983,-0.05082,0.073,-0.00854 +2.4893,-1.28012,-0.79956,-0.08992,-0.0186,0.00261 +2.78151,-0.33506,0.32725,-0.28971,-0.0378,0.08527 +2.60545,-0.4359,0.12712,0.11856,0.01977,0.03487 +2.66994,0.43661,1.15011,0.02703,-0.08916,0.14418 +2.55373,0.02669,0.61071,0.35046,0.02431,0.06939 +2.56267,-0.96955,-0.50429,0.2104,0.07013,-0.06906 +2.68295,-0.38165,0.22184,-0.05981,0.06264,0.13471 +2.57886,0.19701,0.80815,0.35216,-0.01302,0.0491 +2.92961,0.34239,1.0631,-0.07673,0.00575,0.0459 +2.60551,-1.49788,-1.1258,0.31152,0.07226,-0.31088 +2.66951,-2.06328,-1.66561,-0.25201,0.0823,-0.16376 +2.60347,-1.27304,-0.78635,-0.07753,-0.0829,-0.18122 +2.57282,-0.61566,-0.06292,0.06224,-0.07014,-0.04814 +2.33836,-1.21064,-0.80226,0.32005,0.00164,-0.06808 +2.7136,-1.03903,-0.5016,-0.19958,0.04988,0.00163 +2.33626,-0.55149,-0.07552,0.46292,-0.00167,0.01904 +2.59258,-0.91148,-0.35568,-0.17416,-0.07986,-0.01103 +3.07314,-0.59606,0.04942,-0.38897,0.06229,-0.08463 +2.24514,-0.36588,0.21901,0.10378,-0.28139,0.06441 +2.59733,-0.38696,0.21193,-0.0691,0.11087,0.27259 +2.35114,0.12446,0.70432,0.46042,-0.10275,0.07575 +2.44774,-0.46364,0.12541,-0.00618,-0.15433,0.05805 +2.55584,-0.63246,-0.11602,0.2076,0.02762,-0.01773 +2.52794,-0.49015,0.0324,0.29694,-0.0231,-0.06496 +2.6231,-0.07257,0.5611,0.02503,0.00674,0.1591 +2.51363,0.03995,0.65721,0.19891,-0.04131,0.13089 +2.26418,-0.57392,-0.06733,0.34747,-0.224,-0.09566 +2.95907,-1.46267,-0.98757,-0.24901,0.33062,0.00129 +2.82039,-1.69845,-1.27563,-0.13006,0.2139,-0.1356 +2.48915,0.01371,0.61151,0.28964,-0.07078,0.058 +2.57819,-0.14952,0.42728,0.30034,-0.09776,-0.10555 +2.45384,-0.71295,-0.25645,0.47671,-0.04472,-0.19046 +2.86448,-0.6742,-0.11161,-0.07763,0.18148,0.02978 +2.75492,0.29785,1.00828,-0.05611,-0.07059,0.09949 +2.55242,-0.46391,0.07811,0.2062,0.00637,0.00794 +2.65439,-0.58587,-0.01061,-0.0223,-0.07282,-0.06717 +2.24222,1.24058,1.97595,0.57438,-0.42163,0.05227 +2.8678,0.01678,0.71803,-0.22859,-0.00129,0.10204 +2.37501,-0.63014,-0.01813,-0.21406,-0.30986,0.03654 +2.53485,-1.0591,-0.51401,-0.27279,0.0191,0.17407 +2.44969,0.17104,0.80974,0.23052,-0.2032,0.02631 +2.74964,-1.02782,-0.50569,-0.14185,0.16105,0.05898 +2.73321,-0.04101,0.61671,-0.05641,-0.01201,0.09412 +2.61569,-0.94153,-0.45528,0.12276,0.08353,-0.04213 +2.57755,-0.2936,0.27555,0.2079,-0.03095,-0.01236 +-1.08435,0.47485,-1.67062,0.46019,0.01093,-0.09021 +-0.7737,0.63349,-1.36917,-0.12026,-0.03592,-0.0319 +-1.20943,0.62687,-1.4823,0.39177,-0.07326,0.01598 +-0.53256,2.18522,0.39297,0.00428,-0.11739,-0.00791 +-1.08103,1.16583,-0.84098,0.30923,-0.17185,-0.01796 +-0.49911,1.41767,-0.44719,-0.26713,0.10945,0.17436 +-0.7859,0.50444,-1.4711,-0.36116,-0.05081,0.07684 +0.23558,2.26411,0.56263,-0.40363,0.17039,-0.13256 +-0.94845,1.02326,-1.0367,0.43227,0.03958,-0.02084 +-0.18376,1.69591,-0.03641,-0.66131,-0.04976,0.02063 +-0.10029,2.79469,1.08752,0.02277,0.05054,-0.07266 +-0.53582,1.05993,-0.82397,-0.37668,-0.08646,-0.03763 +-0.66038,2.22464,0.29067,0.71118,0.06624,-0.1097 +-0.77644,1.14856,-0.79416,-0.06983,0.02756,0.14841 +-0.13277,1.3211,-0.51359,-0.41312,0.01281,-0.21106 +-0.89609,0.70474,-1.36856,0.29281,-0.03175,-0.14855 +-0.46237,1.13865,-0.68685,-0.64889,0.00197,0.18106 +-0.30066,1.57623,-0.34025,0.10161,0.28236,-0.00351 +-1.23206,2.09484,0.18011,0.56685,-0.35563,0.00927 +-0.31499,1.90388,0.05542,0.04418,0.11262,-0.05549 +-0.78794,0.72928,-1.13165,-0.75021,-0.20597,0.20649 +-0.56851,1.31448,-0.62672,0.09891,-0.02453,-0.16318 +-1.22993,1.63814,-0.31749,0.38338,-0.20077,0.16998 +-0.70372,1.31506,-0.65062,0.13805,0.18309,0.16992 +-0.75677,1.0846,-0.92878,0.26629,0.01815,-0.10485 +-0.8995,0.87328,-1.17443,0.29141,-0.05301,-0.12288 +-1.23261,1.09124,-0.9954,0.62677,-0.0848,0.00454 +-1.31753,0.79569,-1.24087,0.17799,-0.25522,0.09176 +-0.73092,1.16713,-0.73776,-0.2121,-0.08628,0.07204 +-0.13283,1.75541,-0.1263,0.1188,0.16465,-0.2536 +-0.28987,2.0742,0.25286,0.04588,0.07529,-0.07578 +-0.19674,2.08895,0.25537,0.1098,0.1543,-0.11035 +-0.37275,1.55381,-0.33206,-0.01384,0.06003,-0.11821 +-1.07963,1.42458,-0.46652,-0.11903,-0.15421,0.33386 +-0.35632,1.19467,-0.58883,-0.82416,0.02876,0.23493 +-0.51329,0.45148,-1.46259,-0.70411,-0.00812,0.067 +-1.0463,0.68644,-1.37768,0.22269,-0.07862,-0.02206 +-1.07093,1.95403,-0.01231,0.69299,-0.16027,-0.03958 +-0.21904,1.1717,-0.67523,-0.51485,0.12784,0.02001 +-0.41968,1.90415,0.10272,-0.16819,-0.04809,-0.00537 +-0.41282,1.76952,-0.05641,-0.206,0.14596,0.19113 +-0.69146,1.0098,-0.93599,-0.15298,0.04614,0.10372 +-0.45773,1.69257,-0.19023,0.0693,0.04146,-0.07352 +0.12611,2.37663,0.65875,-0.22975,0.12234,-0.16077 +-0.4169,1.59153,-0.24315,-0.25923,0.03997,0.06215 +-0.23602,1.1549,-0.72833,-0.36949,0.22561,0.05042 +-0.35705,1.28245,-0.58241,-0.34407,0.09587,0.03776 +-0.65072,1.14062,-0.83076,0.09102,0.04495,-0.05097 +0.20701,2.05989,0.33018,-0.36612,0.03493,-0.33443 +-0.38495,1.42476,-0.43398,-0.25474,0.04514,-0.00947 +-2.10701,-1.35933,0.083,-0.86578,-0.21011,0.22557 +-1.53615,-0.28222,1.22376,-0.39372,0.07484,0.00734 +-2.41367,-1.10812,0.12644,0.34042,-0.05697,0.00586 +-1.76653,-0.69922,0.67116,-0.08268,0.25261,0.11639 +-2.13155,-0.95126,0.42459,-0.24313,-0.08774,0.11014 +-2.87859,-1.26059,-0.14172,0.75692,-0.01143,0.19289 +-0.87139,0.28755,1.97332,-0.86975,0.21985,-0.00574 +-2.49658,-0.99175,0.15795,0.77201,0.23116,0.16873 +-2.26148,-0.25268,1.04902,0.60663,0.09257,0.09547 +-2.44346,-2.03481,-0.79676,-0.33886,-0.21067,0.0329 +-1.68971,-1.19395,0.15586,-0.27227,0.05921,-0.18625 +-1.91139,-0.45384,0.9231,0.12591,0.0266,-0.06236 +-2.14042,-1.017,0.28667,0.0899,-0.08107,-0.09716 +-1.63205,0.01564,1.56712,-0.3666,-0.09223,-0.02561 +-1.80263,-0.48767,1.0826,-0.78407,-0.36596,-0.04834 +-1.88752,-1.20844,0.20065,-0.54857,-0.18051,-0.10157 +-1.7876,-0.89401,0.43132,0.00946,0.24439,0.01782 +-2.57321,-2.42762,-1.35425,0.09073,0.17336,0.21069 +-3.37217,-0.75775,0.38145,1.05858,-0.30538,0.27596 +-1.63754,0.41813,1.8515,0.45914,0.23907,-0.05328 +-2.26682,-1.35559,-0.05761,-0.12278,-0.1832,-0.05242 +-1.38116,-0.37617,1.18405,-0.70986,0.00904,-0.04082 +-3.00846,-0.99632,0.09542,1.07476,0.01704,0.22076 +-1.67962,-0.40575,0.98453,0.11149,0.07078,-0.20787 +-1.97516,-1.41413,-0.1063,-0.26265,0.06843,0.02551 +-2.1886,-1.38002,-0.21851,0.43495,0.30028,0.0616 +-1.54161,-0.5165,0.89171,-0.05929,0.10275,-0.22562 +-1.40424,-0.77133,0.64717,-0.32251,0.20153,-0.15018 +-2.06974,-0.62566,0.76966,-0.09127,-0.0807,0.054 +-2.11523,-1.06944,0.07676,0.73527,0.38901,-0.01008 +-2.61354,-0.88867,0.26147,0.89126,0.05587,0.03721 +-2.46447,-2.45236,-1.44395,0.39694,0.28852,0.04172 +-2.13433,-0.63864,0.77045,-0.15209,-0.17579,0.04261 +-1.48651,-0.51088,0.83041,0.20152,0.42285,-0.08051 +-1.57147,-0.16966,1.20138,0.24405,0.55582,0.21201 +-2.91807,-1.30571,-0.17264,0.73841,-0.29538,-0.08663 +-1.87182,-1.47979,-0.04971,-0.87881,-0.14467,0.05439 +-1.67813,-1.00653,0.3352,-0.16442,0.29244,0.04603 +-1.32267,-0.74154,0.69948,-0.40705,0.19885,-0.16921 +-2.10847,-1.18377,0.09584,0.0944,-0.0759,-0.16879 +-2.25325,-1.17024,0.18963,-0.26954,-0.30222,-0.05717 +-2.15201,-1.20442,0.10733,-0.01795,-0.3143,-0.32945 +-1.53615,-0.28222,1.22376,-0.39372,0.07484,0.00734 +-2.27087,-1.33112,-0.0152,-0.21661,-0.13765,0.06643 +-2.23349,-1.46606,-0.10313,-0.50594,-0.31193,-0.02006 +-2.13094,-1.00964,0.34717,-0.11009,-0.30608,-0.23089 +-1.88562,-0.13944,1.27227,0.22004,-0.07753,-0.17585 +-1.83114,-0.91466,0.44281,-0.10289,0.00598,-0.14284 +-1.69713,-1.43525,0.00511,-0.89942,-0.06834,8.0E-4 +-1.35527,-0.71885,0.73858,-0.50398,0.26048,-0.00439 diff --git a/data/classification_evaluation.json b/data/classification_evaluation.json new file mode 100644 index 00000000..13a12431 --- /dev/null +++ b/data/classification_evaluation.json @@ -0,0 +1 @@ +{"code": 200, "resource": "evaluation/64b5b07f79c6023e9583c16f", "location": "https://bigml.io/andromeda/evaluation/64b5b07f79c6023e9583c16f", "object": {"boosted_ensemble": false, "category": 0, "code": 200, "combiner": null, "configuration": null, "configuration_status": false, "created": "2023-07-17T21:19:59.247000", "creator": "mmartin", "dataset": "dataset/64b5b07a79c602298f37d884", "dataset_status": true, "datasets": [], "deepnet": "", "description": "", "ensemble": "", "evaluations": null, "excluded_fields": [], "fields_map": {"000001": "000001", "000003": "000003", "000004": "000004", "000005": "000005", "000006": "000006", "000007": "000007", "000009": "000009", "00000a": "00000a", "00000c": "00000c", "00000d": "00000d", "000010": "000010", "000011": "000011", "000012": "000012", "000013": "000013"}, "fusion": "", "input_fields": [], "linearregression": "", "locale": "en-US", "logisticregression": "", "max_rows": 134, "missing_strategy": 0, "model": "model/64b5b05079c602298f37d881", "model_status": true, "model_type": 0, "name": "Stdin input vs. Stdin input", "name_options": "512-node, pruned, deterministic order, operating kind=probability", "number_of_models": 1, "objective_field_descriptors": {"000013": {"column_number": 19, "datatype": "string", "name": "Churn", "optype": "categorical", "order": 19, "preferred": true, "term_analysis": {"enabled": true}}}, "objective_fields": ["000013"], "objective_fields_names": ["Churn"], "operating_kind": "probability", "optiml": null, "optiml_status": false, "out_of_bag": false, "performance": 0.81925, "private": true, "project": null, "range": null, "replacement": false, "resource": "evaluation/64b5b07f79c6023e9583c16f", "result": {"class_names": ["False", "True"], "mode": {"accuracy": 0.85075, "average_area_under_pr_curve": 0, "average_area_under_roc_curve": 0, "average_balanced_accuracy": 0.5, "average_f_measure": 0.45968, "average_kendalls_tau_b": 0, "average_ks_statistic": 0, "average_max_phi": 0, "average_phi": 0, "average_precision": 0.42537, "average_recall": 0.5, "average_spearmans_rho": 0, "confusion_matrix": [[114, 0], [20, 0]], "per_class_statistics": [{"accuracy": 0.85075, "balanced_accuracy": 0.5, "class_name": "False", "f_measure": 0.91935, "phi_coefficient": 0, "precision": 0.85075, "present_in_test_data": true, "recall": 1}, {"accuracy": 0.85075, "balanced_accuracy": 0.5, "class_name": "True", "f_measure": 0, "phi_coefficient": 0, "precision": 0, "present_in_test_data": true, "recall": 0}]}, "model": {"accuracy": 0.91791, "average_area_under_pr_curve": 0.90567, "average_area_under_roc_curve": 0.92588, "average_balanced_accuracy": 0.78684, "average_f_measure": 0.81925, "average_kendalls_tau_b": 0.46897, "average_ks_statistic": 0.76491, "average_max_phi": 0.76491, "average_phi": 0.64837, "average_precision": 0.86639, "average_recall": 0.78684, "average_spearmans_rho": 0.5368, "confusion_matrix": [[111, 3], [8, 12]], "per_class_statistics": [{"accuracy": 0.91791, "area_under_pr_curve": 0.9843, "area_under_roc_curve": 0.92588, "balanced_accuracy": 0.78684, "class_name": "False", "f_measure": 0.95279, "gain_curve": [[0, 0, 0.99933], [0.3209, 0.37719, 0.99838], [0.5, 0.57895, 0.99531], [0.52985, 0.60526, 0.99497], [0.6194, 0.71053, 0.99437], [0.67164, 0.76316, 0.99218], [0.69403, 0.78947, 0.98995], [0.79851, 0.90351, 0.98721], [0.81343, 0.92105, 0.98593], [0.82836, 0.9386, 0.98437], [0.85075, 0.96491, 0.97655], [0.85821, 0.96491, 0.9531], [0.87313, 0.96491, 0.92964], [0.88806, 0.97368, 0.42964], [0.89552, 0.98246, 0.28643], [0.91045, 1, 0.17186], [0.91791, 1, 0.14321], [0.92537, 1, 0.09548], [0.93284, 1, 0.06138], [0.96269, 1, 0.04296], [1, 1, null]], "kendalls_tau_b": 0.46897, "ks_statistic": [0.76491, 0.97655], "lift_curve": [[0, 0, 0.99933], [0.3209, 1.17544, 0.99838], [0.5, 1.15789, 0.99531], [0.52985, 1.14233, 0.99497], [0.6194, 1.14711, 0.99437], [0.67164, 1.13626, 0.99218], [0.69403, 1.13752, 0.98995], [0.79851, 1.1315, 0.98721], [0.81343, 1.1323, 0.98593], [0.82836, 1.13308, 0.98437], [0.85075, 1.1342, 0.97655], [0.85821, 1.12433, 0.9531], [0.87313, 1.10511, 0.92964], [0.88806, 1.09642, 0.42964], [0.89552, 1.09708, 0.28643], [0.91045, 1.09836, 0.17186], [0.91791, 1.08943, 0.14321], [0.92537, 1.08065, 0.09548], [0.93284, 1.072, 0.06138], [0.96269, 1.03876, 0.04296], [1, 1, null]], "max_phi": [0.76491, 0.97655], "negative_cdf": [[0, 0, 0.99933], [0.3209, 0, 0.99838], [0.5, 0.05, 0.99531], [0.52985, 0.1, 0.99497], [0.6194, 0.1, 0.99437], [0.67164, 0.15, 0.99218], [0.69403, 0.15, 0.98995], [0.79851, 0.2, 0.98721], [0.81343, 0.2, 0.98593], [0.82836, 0.2, 0.98437], [0.85075, 0.2, 0.97655], [0.85821, 0.25, 0.9531], [0.87313, 0.35, 0.92964], [0.88806, 0.4, 0.42964], [0.89552, 0.4, 0.28643], [0.91045, 0.4, 0.17186], [0.91791, 0.45, 0.14321], [0.92537, 0.5, 0.09548], [0.93284, 0.55, 0.06138], [0.96269, 0.75, 0.04296], [1, 1, null]], "per_threshold_confusion_matrices": [[[114, 20, 0, 0], null], [[114, 15, 5, 0], 0.04296], [[114, 11, 9, 0], 0.06138], [[114, 10, 10, 0], 0.09548], [[114, 9, 11, 0], 0.14321], [[114, 8, 12, 0], 0.17186], [[112, 8, 12, 2], 0.28643], [[111, 8, 12, 3], 0.42964], [[110, 7, 13, 4], 0.92964], [[110, 5, 15, 4], 0.9531], [[110, 4, 16, 4], 0.97655], [[107, 4, 16, 7], 0.98437], [[105, 4, 16, 9], 0.98593], [[103, 4, 16, 11], 0.98721], [[90, 3, 17, 24], 0.98995], [[87, 3, 17, 27], 0.99218], [[81, 2, 18, 33], 0.99437], [[69, 2, 18, 45], 0.99497], [[66, 1, 19, 48], 0.99531], [[43, 0, 20, 71], 0.99838], [[0, 0, 20, 114], 0.99933]], "phi_coefficient": 0.64837, "pr_curve": [[0, 1, 0.99933], [0.37719, 1, 0.99838], [0.57895, 0.98507, 0.99531], [0.60526, 0.97183, 0.99497], [0.71053, 0.9759, 0.99437], [0.76316, 0.96667, 0.99218], [0.78947, 0.96774, 0.98995], [0.90351, 0.96262, 0.98721], [0.92105, 0.9633, 0.98593], [0.9386, 0.96396, 0.98437], [0.96491, 0.96491, 0.97655], [0.96491, 0.95652, 0.9531], [0.96491, 0.94017, 0.92964], [0.97368, 0.93277, 0.42964], [0.98246, 0.93333, 0.28643], [1, 0.93443, 0.17186], [1, 0.92683, 0.14321], [1, 0.91935, 0.09548], [1, 0.912, 0.06138], [1, 0.88372, 0.04296], [1, 0.85075, null]], "precision": 0.93277, "present_in_test_data": true, "recall": 0.97368, "roc_curve": [[0, 0, 0.99933], [0, 0.37719, 0.99838], [0.05, 0.57895, 0.99531], [0.1, 0.60526, 0.99497], [0.1, 0.71053, 0.99437], [0.15, 0.76316, 0.99218], [0.15, 0.78947, 0.98995], [0.2, 0.90351, 0.98721], [0.2, 0.92105, 0.98593], [0.2, 0.9386, 0.98437], [0.2, 0.96491, 0.97655], [0.25, 0.96491, 0.9531], [0.35, 0.96491, 0.92964], [0.4, 0.97368, 0.42964], [0.4, 0.98246, 0.28643], [0.4, 1, 0.17186], [0.45, 1, 0.14321], [0.5, 1, 0.09548], [0.55, 1, 0.06138], [0.75, 1, 0.04296], [1, 1, null]], "spearmans_rho": 0.5368}, {"accuracy": 0.91791, "area_under_pr_curve": 0.82704, "area_under_roc_curve": 0.92588, "balanced_accuracy": 0.78684, "class_name": "True", "f_measure": 0.68571, "gain_curve": [[0, 0, 0.95704], [0.03731, 0.25, 0.93862], [0.06716, 0.45, 0.90452], [0.07463, 0.5, 0.85679], [0.08209, 0.55, 0.82814], [0.08955, 0.6, 0.71357], [0.10448, 0.6, 0.57036], [0.11194, 0.6, 0.07036], [0.12687, 0.65, 0.0469], [0.14179, 0.75, 0.02345], [0.14925, 0.8, 0.01563], [0.17164, 0.8, 0.01407], [0.18657, 0.8, 0.01279], [0.20149, 0.8, 0.01005], [0.30597, 0.85, 0.00782], [0.32836, 0.85, 0.00563], [0.3806, 0.9, 0.00503], [0.47015, 0.9, 0.00469], [0.5, 0.95, 0.00162], [0.6791, 1, 0.00067], [1, 1, null]], "kendalls_tau_b": 0.46897, "ks_statistic": [0.76491, 0.01563], "lift_curve": [[0, 0, 0.95704], [0.03731, 6.7, 0.93862], [0.06716, 6.7, 0.90452], [0.07463, 6.7, 0.85679], [0.08209, 6.7, 0.82814], [0.08955, 6.7, 0.71357], [0.10448, 5.74286, 0.57036], [0.11194, 5.36, 0.07036], [0.12687, 5.12353, 0.0469], [0.14179, 5.28947, 0.02345], [0.14925, 5.36, 0.01563], [0.17164, 4.66087, 0.01407], [0.18657, 4.288, 0.01279], [0.20149, 3.97037, 0.01005], [0.30597, 2.77805, 0.00782], [0.32836, 2.58864, 0.00563], [0.3806, 2.36471, 0.00503], [0.47015, 1.91429, 0.00469], [0.5, 1.9, 0.00162], [0.6791, 1.47253, 0.00067], [1, 1, null]], "max_phi": [0.76491, 0.01563], "negative_cdf": [[0, 0, 0.95704], [0.03731, 0, 0.93862], [0.06716, 0, 0.90452], [0.07463, 0, 0.85679], [0.08209, 0, 0.82814], [0.08955, 0, 0.71357], [0.10448, 0.01754, 0.57036], [0.11194, 0.02632, 0.07036], [0.12687, 0.03509, 0.0469], [0.14179, 0.03509, 0.02345], [0.14925, 0.03509, 0.01563], [0.17164, 0.0614, 0.01407], [0.18657, 0.07895, 0.01279], [0.20149, 0.09649, 0.01005], [0.30597, 0.21053, 0.00782], [0.32836, 0.23684, 0.00563], [0.3806, 0.28947, 0.00503], [0.47015, 0.39474, 0.00469], [0.5, 0.42105, 0.00162], [0.6791, 0.62281, 0.00067], [1, 1, null]], "per_threshold_confusion_matrices": [[[20, 114, 0, 0], null], [[20, 71, 43, 0], 0.00067], [[19, 48, 66, 1], 0.00162], [[18, 45, 69, 2], 0.00469], [[18, 33, 81, 2], 0.00503], [[17, 27, 87, 3], 0.00563], [[17, 24, 90, 3], 0.00782], [[16, 11, 103, 4], 0.01005], [[16, 9, 105, 4], 0.01279], [[16, 7, 107, 4], 0.01407], [[16, 4, 110, 4], 0.01563], [[15, 4, 110, 5], 0.02345], [[13, 4, 110, 7], 0.0469], [[12, 3, 111, 8], 0.07036], [[12, 2, 112, 8], 0.57036], [[12, 0, 114, 8], 0.71357], [[11, 0, 114, 9], 0.82814], [[10, 0, 114, 10], 0.85679], [[9, 0, 114, 11], 0.90452], [[5, 0, 114, 15], 0.93862], [[0, 0, 114, 20], 0.95704]], "phi_coefficient": 0.64837, "pr_curve": [[0, 1, 0.95704], [0.25, 1, 0.93862], [0.45, 1, 0.90452], [0.5, 1, 0.85679], [0.55, 1, 0.82814], [0.6, 1, 0.71357], [0.6, 0.85714, 0.57036], [0.6, 0.8, 0.07036], [0.65, 0.76471, 0.0469], [0.75, 0.78947, 0.02345], [0.8, 0.8, 0.01563], [0.8, 0.69565, 0.01407], [0.8, 0.64, 0.01279], [0.8, 0.59259, 0.01005], [0.85, 0.41463, 0.00782], [0.85, 0.38636, 0.00563], [0.9, 0.35294, 0.00503], [0.9, 0.28571, 0.00469], [0.95, 0.28358, 0.00162], [1, 0.21978, 0.00067], [1, 0.14925, null]], "precision": 0.8, "present_in_test_data": true, "recall": 0.6, "roc_curve": [[0, 0, 0.95704], [0, 0.25, 0.93862], [0, 0.45, 0.90452], [0, 0.5, 0.85679], [0, 0.55, 0.82814], [0, 0.6, 0.71357], [0.01754, 0.6, 0.57036], [0.02632, 0.6, 0.07036], [0.03509, 0.65, 0.0469], [0.03509, 0.75, 0.02345], [0.03509, 0.8, 0.01563], [0.0614, 0.8, 0.01407], [0.07895, 0.8, 0.01279], [0.09649, 0.8, 0.01005], [0.21053, 0.85, 0.00782], [0.23684, 0.85, 0.00563], [0.28947, 0.9, 0.00503], [0.39474, 0.9, 0.00469], [0.42105, 0.95, 0.00162], [0.62281, 1, 0.00067], [1, 1, null]], "spearmans_rho": 0.5368}]}, "random": {"accuracy": 0.47761, "average_area_under_pr_curve": 0, "average_area_under_roc_curve": 0, "average_balanced_accuracy": 0.40439, "average_f_measure": 0.385, "average_kendalls_tau_b": 0, "average_ks_statistic": 0, "average_max_phi": 0, "average_phi": -0.13666, "average_precision": 0.45116, "average_recall": 0.40439, "average_spearmans_rho": 0, "confusion_matrix": [[58, 56], [14, 6]], "per_class_statistics": [{"accuracy": 0.47761, "balanced_accuracy": 0.40439, "class_name": "False", "f_measure": 0.62366, "phi_coefficient": -0.13666, "precision": 0.80556, "present_in_test_data": true, "recall": 0.50877}, {"accuracy": 0.47761, "balanced_accuracy": 0.40439, "class_name": "True", "f_measure": 0.14634, "phi_coefficient": -0.13666, "precision": 0.09677, "present_in_test_data": true, "recall": 0.3}]}}, "rows": 134, "sample_rate": 1.0, "sampled_rows": 134, "shared": false, "size": 11582, "status": {"code": 5, "elapsed": 3847, "message": "The evaluation has been created", "progress": 1}, "subscription": true, "tags": [], "timeseries": "", "type": 0, "updated": "2023-07-17T21:20:05.589000"}, "error": null} \ No newline at end of file diff --git a/data/constant_field.csv b/data/constant_field.csv new file mode 100644 index 00000000..26183424 --- /dev/null +++ b/data/constant_field.csv @@ -0,0 +1,5 @@ +a,b,c,d +1,3,0,a +1,2,1,a +1,1,2,b +1,0,3,b diff --git a/data/dates.csv b/data/dates.csv new file mode 100644 index 00000000..a912288a --- /dev/null +++ b/data/dates.csv @@ -0,0 +1,13 @@ +"test-date","test-num1","test-num2", "test-num3" +"01/12/2012",23,34,54 +"13/12/1998",22,54,65 +"15/04/2002",25,56,70 +"17/12/2012",23,30,54 +"24/03/1998",22,54,65 +"30/04/2002",25,23,70 +"23/12/2012",23,76,54 +"21/04/1998",22,87,65 +"30/06/2002",25,56,90 +"23/11/2012",23,98,9 +"07/12/1998",22,54,34 +"30/04/2002",25,23,12 diff --git a/data/dates2.csv b/data/dates2.csv new file mode 100644 index 00000000..dfc66d6a --- /dev/null +++ b/data/dates2.csv @@ -0,0 +1,2001 @@ +cat-0,time-1,target-2 +cat0,2026-09-29T16:31:24.662,0.5821190584549210 +cat0,1943-03-08T5:16:21.366,0.28911100973707000 +cat0,1931-03-13T16:49:42.655,-1 +cat0,1931-06-28T17:55:25.790,-1 +cat0,1931-08-21T18:38:39.048,-1 +cat0,1902-04-09T5:20:15.100,0.8219057169987630 +cat0,1958-09-30T3:46:00.747,-0.06097665232903070 +cat2,1963-04-01T4:29:01.524,1.6305910497902800 +cat0,1994-08-18T14:15:51.989,-0.26007160391160400 +cat2,1946-01-17T15:49:53.256,-0.7401210045791640 +cat0,1915-07-21T11:37:11.978,-0.8592093953278260 +cat2,2011-07-22T16:45:51.871,-1.7568120469139500 +cat0,2016-10-10T5:16:23.593,-0.5310333702436090 +cat1,2048-09-30T3:27:01.697,0.9053365450711390 +cat0,1957-05-17T11:09:13.839,-0.30656425886528900 +cat1,1943-03-16T21:58:56.649,0.8347639933692480 +cat0,1905-02-16T12:07:23.988,0.5283341437716730 +cat0,1947-01-19T9:26:54.399,-0.41432260513264500 +cat0,2035-07-24T15:42:57.227,0.733492187504245 +cat0,1969-05-10T12:41:49.366,-0.2670765988407150 +cat0,1953-09-07T15:00:59.122,-0.22903006922953900 +cat0,1926-12-04T16:45:27.271,0.4930334962821330 +cat0,1922-06-11T0:56:35.490,1.4139069032069500 +cat0,1987-11-13T23:33:28.351,0.10347867906329900 +cat0,1989-06-15T1:35:57.521,2.035475998314620 +cat0,1914-03-06T17:25:56.896,-1.5638121292421000 +cat0,1958-02-09T23:41:51.520,1.0254026140276200 +cat0,2033-03-27T10:19:47.163,0.16018943010818100 +cat0,1961-03-15T6:51:20.295,-0.9601418247082480 +cat2,2000-12-20T19:01:50.707,0.7763878911328020 +cat2,1906-12-10T16:56:52.591,0.22045687659107500 +cat2,1934-03-15T14:43:33.029,-1.1645015775659600 +cat0,1988-12-09T18:09:57.287,0.7730073356127860 +cat0,2014-07-26T14:22:05.975,-0.13655895440895100 +cat2,1995-02-25T3:50:36.725,1.2057283784679800 +cat0,2037-06-17T8:14:05.896,0.677474230016623 +cat0,1974-02-24T8:19:47.256,1.0655503187403000 +cat0,2018-11-16T11:20:24.234,0.10591084011184700 +cat0,2049-08-01T6:01:07.320,-0.27315294379495700 +cat2,2034-10-10T6:49:52.690,0.41333184640770600 +cat0,2020-08-06T20:33:26.529,0.09121420947664510 +cat1,1937-03-04T16:18:21.103,0.9451454593608690 +cat0,1900-12-14T16:54:31.894,0.6663151718254280 +cat0,1906-01-01T13:19:01.459,0.7130736001763760 +cat0,1990-12-09T22:44:29.509,-0.5330757532726580 +cat0,1915-11-19T22:05:36.169,1.0077734881398700 +cat1,1955-02-10T8:35:37.962,-0.25500505883259500 +cat2,1905-06-02T19:01:33.473,-0.2245574757546850 +cat0,1934-09-01T8:36:42.380,-0.8586929211596630 +cat2,2038-05-19T12:02:28.495,-0.6764379139292900 +cat0,1946-04-08T19:49:25.464,-0.7906449473892130 +cat0,1950-01-26T0:53:40.934,0.44242706129835600 +cat2,1978-03-17T1:36:05.646,1.1192739104291600 +cat0,1913-11-11T8:11:55.283,0.8830437502037900 +cat0,1964-10-25T18:03:14.136,-0.515647729966376 +cat0,2039-05-29T21:16:00.974,-1.2720781270594700 +cat0,1933-06-30T14:41:51.993,0.24274586107814700 +cat0,1971-09-11T5:37:36.963,0.23189845024882100 +cat1,1997-06-30T21:49:29.876,-0.12461666185719900 +cat1,2017-07-12T8:51:16.998,-0.5571074508009260 +cat2,2007-09-21T15:09:48.434,-0.3404447290883770 +cat2,1920-09-10T15:31:03.891,0.5595450671986010 +cat1,1912-03-06T2:26:36.620,-0.6431488920943390 +cat0,1952-10-10T15:12:56.216,1.3301043499142600 +cat1,2042-04-06T22:51:12.573,0.5338647900730440 +cat0,1927-11-01T6:42:12.123,0.9376235332551990 +cat2,1966-01-29T1:41:48.103,-1.5583867815668900 +cat0,1994-02-28T16:04:20.880,-0.8548996184634290 +cat0,2000-08-22T13:22:33.168,0.7243121379329790 +cat0,1925-06-15T19:05:42.763,1.472567719814430 +cat2,1993-06-04T21:27:15.828,0.5743605788239800 +cat1,1999-10-30T5:10:41.221,1.0093985408198600 +cat0,1979-04-28T11:45:39.671,-1.2829071960692100 +cat0,1914-03-03T12:03:05.987,-0.006090036975676140 +cat0,1969-07-08T13:34:26.637,0.24033123749079200 +cat0,1981-02-16T17:39:57.260,-0.14480707025870100 +cat0,2028-07-25T7:44:39.914,-0.368477329529651 +cat0,2013-07-31T0:38:37.289,-1.5389579241901400 +cat0,2026-07-28T9:58:22.232,0.6990834907310900 +cat0,1972-02-02T15:39:02.249,0.032174271299641800 +cat1,2020-03-24T7:45:53.709,0.7188032118220840 +cat0,1929-01-07T21:12:29.175,1.0707555021937300 +cat2,1988-07-18T17:52:22.655,-0.3702268136112520 +cat0,1912-12-27T1:15:03.508,-0.8356060199882280 +cat0,1957-03-21T2:24:55.895,-0.2748331290839940 +cat0,1986-05-16T12:41:43.809,0.58993195183012 +cat0,2004-04-26T9:44:31.891,-0.06216461118726400 +cat0,1937-04-04T23:57:35.694,0.4990496466115030 +cat0,2005-02-26T2:53:14.063,1.1889700875468200 +cat1,2046-02-11T11:57:34.373,0.142326530947397 +cat2,1979-02-15T9:30:32.200,0.8549093808727780 +cat1,2000-07-01T21:09:08.277,0.30830920195537900 +cat0,2037-06-12T3:11:02.213,2.2121661882949100 +cat1,2031-12-13T3:18:12.063,0.18528098554418400 +cat0,1911-02-03T15:20:34.943,-0.4560341989291740 +cat0,1943-12-22T20:05:19.409,1.4863099407576300 +cat0,1997-04-07T4:02:44.603,2.09854603172495 +cat0,1930-06-23T11:43:59.876,-1.0536218945100000 +cat0,1929-04-21T15:11:50.066,1.0565456966844700 +cat0,1925-07-26T14:53:25.142,-0.5397494485294180 +cat0,1944-09-29T15:21:33.294,1.1597851429728700 +cat0,2009-12-18T18:04:00.400,-1.1740184363978500 +cat0,2039-08-03T17:22:27.227,-0.03732293980342540 +cat0,2036-02-19T12:36:17.345,-0.3442238514024860 +cat0,1969-01-26T10:16:25.750,1.0363958381942800 +cat0,1983-10-08T21:37:13.993,-1.7749962525421200 +cat0,1986-02-06T6:40:55.095,-0.09265229773296750 +cat0,2032-08-23T13:24:53.260,0.19346024114437200 +cat0,1972-04-25T23:39:01.835,0.8049292862157450 +cat0,2026-02-07T15:03:13.438,0.9647207293848130 +cat2,1953-02-22T14:44:04.693,0.20018856482556200 +cat0,1970-07-09T7:05:07.707,-2.004828960068260 +cat0,1997-06-02T17:03:38.475,0.06514003733753670 +cat0,2004-01-10T0:42:16.843,0.565612001411909 +cat0,2037-05-08T1:38:26.949,-0.12947313356597800 +cat2,1904-12-30T16:19:03.190,-0.3802963029897950 +cat0,2044-08-14T1:09:41.834,-1.3376236333025500 +cat0,1995-02-06T13:12:29.490,-1.7586577948873400 +cat0,1994-04-08T12:14:05.738,-0.7164265052505330 +cat0,1984-12-26T18:19:51.763,0.5828101433809760 +cat0,2011-03-04T22:52:37.045,1.6422687826352700 +cat0,1932-07-22T19:49:45.682,-0.6206032442136570 +cat0,1956-09-24T22:15:56.527,0.5561418703594510 +cat0,2016-03-23T6:51:58.930,1.1141900754393600 +cat0,2022-01-30T11:15:06.457,0.34402310509642300 +cat0,1971-01-24T20:50:11.617,-0.4251765814017670 +cat0,2026-06-01T22:31:35.381,-0.5574214393781720 +cat0,2045-11-21T2:43:50.298,-1.114043815649420 +cat0,1925-08-28T20:16:41.681,1.7288624914097400 +cat1,2020-07-10T3:47:41.474,1.1182762124351800 +cat0,1983-08-12T7:06:49.959,-1.1501177076554000 +cat1,1943-02-21T12:22:19.881,-0.8185904216101460 +cat0,2033-11-30T22:15:54.134,1.1498369718552100 +cat0,1924-12-02T23:36:24.537,-2.4121216212237400 +cat2,1993-02-04T1:24:32.645,-0.13636895022538400 +cat2,1986-03-26T8:26:41.025,0.2641061183839990 +cat0,1900-03-31T22:45:10.567,2.201780641590700 +cat1,1951-09-17T20:12:41.741,1.021967918823420 +cat1,2002-06-23T1:48:42.729,1.7194262027600700 +cat0,1996-05-19T20:10:39.516,0.5524179526448160 +cat0,1986-06-21T12:02:51.846,-1.4928671875266400 +cat0,1906-03-24T10:37:52.499,-0.42172902252412800 +cat1,2022-05-05T2:10:24.340,1.4541544526517800 +cat0,2028-07-19T9:58:02.194,0.18953799761736400 +cat0,1978-09-28T5:37:16.864,1.0310285871689400 +cat0,1957-06-28T19:44:27.900,-0.5796417748998800 +cat0,1988-05-06T21:21:10.786,0.8828116785473860 +cat0,1901-12-22T2:46:21.892,-1.646676071362500 +cat0,2037-12-31T23:54:53.715,-0.3161929346202230 +cat0,1993-08-10T11:44:08.383,-1.3272509684332300 +cat0,2005-05-08T17:49:45.658,0.16930449912637500 +cat0,1900-07-21T22:01:14.345,1.9435946533952800 +cat0,1938-01-10T22:26:47.351,-1.4671075749086300 +cat0,1936-08-06T14:33:34.532,0.212511042699229 +cat0,1919-03-02T1:03:54.893,0.020399572287449300 +cat0,1921-01-01T3:03:47.887,0.35734200591277800 +cat1,1952-04-20T18:27:20.512,1.9676844425934200 +cat0,1959-04-02T18:13:41.907,-0.5119134389772020 +cat0,1953-01-22T6:21:07.770,0.04749690540832350 +cat0,1907-09-02T22:00:19.345,0.8993181857661180 +cat0,2006-09-25T16:04:13.729,-1.5195694894801900 +cat0,2015-12-05T20:48:19.164,0.31440991113559600 +cat2,2031-03-12T10:39:35.284,0.19517678952274300 +cat0,1914-11-02T18:26:49.959,1.3924684385998000 +cat0,2028-11-07T15:45:07.165,2.245589411937280 +cat0,2007-06-03T22:44:09.559,-0.04421000280828650 +cat0,1985-08-24T21:06:33.715,0.00965540140266617 +cat1,2007-06-22T2:46:46.681,1.3016461106304100 +cat0,1939-09-25T15:35:18.960,-0.5714184600337600 +cat0,2049-03-07T10:24:03.908,1.5051956737416700 +cat0,1906-09-14T20:07:13.602,-0.12118586051497600 +cat0,1949-10-01T7:42:26.774,-0.07283079353557090 +cat0,1944-12-26T5:49:34.903,0.4413914486288290 +cat2,2016-01-25T18:11:19.595,1.4314375764699900 +cat0,1967-02-09T19:09:59.742,-0.16160834785845400 +cat0,2017-02-08T18:18:33.093,1.0361577631102900 +cat0,1977-01-19T9:29:07.718,-1.4324022090269600 +cat0,1919-01-08T13:28:18.191,0.10470273455867000 +cat0,1959-11-07T11:01:15.776,-0.29730150599006000 +cat0,2018-11-03T8:49:47.969,0.015326854839532700 +cat1,1915-10-30T22:05:49.914,0.7494921472275590 +cat0,1963-10-25T3:29:54.445,-1.1126315770128600 +cat0,2017-01-29T17:05:48.118,1.9994713505082300 +cat0,2037-09-30T3:10:28.484,-0.5315165567309280 +cat0,1907-12-07T9:54:33.171,0.9160199464386500 +cat0,1951-10-06T3:22:21.109,0.5195273063453780 +cat0,2040-08-01T2:24:16.850,0.33771003328854000 +cat0,1916-05-15T22:59:49.072,0.2593548937962110 +cat0,2002-08-04T5:40:13.932,-1.697428819611660 +cat0,1931-02-14T19:37:26.516,-1 +cat2,1958-03-03T7:31:35.000,-0.6704726021824540 +cat1,2036-09-16T0:11:48.021,0.43048330053900100 +cat0,1975-03-14T6:50:10.967,0.11022290467859000 +cat0,1913-04-05T18:53:00.432,0.14584183035653000 +cat0,1931-09-19T16:05:13.953,-1 +cat0,2023-01-13T11:09:09.747,0.16571654398934800 +cat0,1931-08-22T21:35:43.969,-1 +cat0,1931-01-19T20:23:33.972,-1 +cat1,1957-03-26T9:21:48.781,-1.7268130401605500 +cat0,1955-04-17T14:01:47.479,-0.3089360138244860 +cat0,2044-08-07T3:40:41.513,-1.1723835438417100 +cat0,2036-08-03T17:01:39.561,-0.8058804952591580 +cat0,1980-04-24T23:46:33.530,-0.5792728170018570 +cat0,1937-12-01T3:26:52.960,-0.2737531041793590 +cat0,1939-07-18T2:18:27.259,-0.682317950550447 +cat0,1941-03-03T3:05:15.876,1.2763762150341000 +cat2,1950-04-24T2:13:25.968,-1.5097650978383700 +cat1,1924-02-05T16:21:37.875,-0.49429140441180600 +cat0,2002-01-04T23:41:57.420,0.7051057266307690 +cat2,1976-10-10T1:43:39.433,-0.7997068683282050 +cat2,1993-03-05T11:20:23.655,-0.5235638819260030 +cat0,1901-02-12T18:28:27.719,0.214494264661566 +cat1,1931-03-29T7:24:33.865,-1 +cat2,1931-08-01T7:59:14.385,-1 +cat0,1917-12-10T18:19:14.408,-0.8330653149157880 +cat0,1927-08-06T4:57:51.283,-1.759243641873800 +cat0,2014-08-23T22:58:11.287,0.1495098271952180 +cat0,2017-03-09T2:12:07.338,0.46452476013384100 +cat0,2012-02-18T5:53:22.966,0.6578364853912080 +cat0,1992-12-16T10:07:54.444,0.22872914287056600 +cat0,1917-03-25T17:53:50.290,-0.22257909522915300 +cat0,2002-08-09T20:52:50.200,-0.6827475148095230 +cat0,2047-07-12T20:02:00.461,-0.7408112331759320 +cat0,2043-02-06T15:25:54.802,-0.10254478669677300 +cat0,1934-01-21T13:30:55.065,0.1119091893971190 +cat0,1921-05-12T2:51:56.981,-2.1585479491704100 +cat1,2040-01-06T15:00:51.097,0.46963869067110800 +cat0,1929-02-26T12:17:05.858,0.33099004886213200 +cat0,2003-10-20T17:52:18.651,0.3111628491987360 +cat2,1977-07-13T11:40:20.387,-0.5654407985224700 +cat1,1948-04-16T2:00:15.361,0.27135301540011400 +cat0,1901-03-22T8:00:00.860,0.04635295220462040 +cat1,2036-05-28T3:32:13.350,1.9257466626852100 +cat0,2006-10-09T4:11:50.229,1.085227427523830 +cat0,1994-12-01T11:38:51.785,0.3656194155702400 +cat0,1907-10-01T12:41:49.042,-3.2407394534919500 +cat1,2014-08-26T8:45:31.304,-0.4640914665450860 +cat0,1945-06-18T23:18:10.298,0.3357893349123490 +cat0,2016-07-12T7:33:40.002,-0.6772104187573440 +cat2,1966-07-10T23:28:04.002,0.8753693974481490 +cat0,1934-07-08T2:23:48.065,0.6070541317561760 +cat1,1982-06-23T6:21:28.689,-0.20732234183355400 +cat0,1962-12-27T23:09:27.166,-0.17722034147295900 +cat0,1928-02-28T10:52:59.623,1.1318203682021600 +cat2,1968-06-10T13:18:43.631,0.39785428580944600 +cat0,1931-12-14T16:39:35.027,-1 +cat0,1931-08-10T6:26:20.557,-1 +cat0,1931-01-28T12:32:46.181,-1 +cat0,1931-01-18T21:45:09.874,-1 +cat0,1931-12-26T17:51:57.956,-1 +cat2,1931-07-01T15:54:32.555,-1 +cat0,2025-10-24T11:39:47.960,-1.1924143220322200 +cat0,2041-03-13T6:13:51.976,0.024184585503312300 +cat0,1931-02-20T19:27:01.947,-1 +cat0,1954-07-15T4:37:23.391,-2.346425057105350 +cat0,1979-06-26T6:35:41.361,-0.07068661764904830 +cat0,1990-04-09T22:50:15.770,-0.4410019308408970 +cat0,1928-03-10T9:08:37.503,-1.7466423414863000 +cat0,1928-02-24T7:56:32.345,-0.5572564534711060 +cat0,2047-12-23T4:21:43.392,1.0585301265333400 +cat2,2022-06-13T11:29:14.013,-1.0600131165620800 +cat0,1976-02-25T22:05:20.352,0.4928462597150440 +cat0,1931-12-14T16:39:35.027,-1 +cat0,1931-08-10T6:26:20.557,-1 +cat0,1931-01-28T12:32:46.181,-1 +cat2,1931-01-18T21:45:09.874,-1 +cat0,1931-12-26T17:51:57.956,-1 +cat0,1931-07-01T15:54:32.555,-1 +cat0,1908-09-28T18:28:05.645,0.19464144688515900 +cat0,2029-11-26T14:20:56.750,0.19004278833364400 +cat0,1906-11-19T10:28:25.365,-0.6472871396831650 +cat0,1917-09-20T15:00:04.164,1.013633976112500 +cat1,2035-02-01T7:24:37.612,0.29024719457425700 +cat0,1958-04-01T14:33:24.112,-0.029207980352454800 +cat2,1925-06-09T18:59:38.575,1.0246312819387900 +cat0,1904-02-21T0:41:42.182,-0.17599336952736200 +cat2,2044-08-21T13:13:28.244,0.43405787951716800 +cat0,2024-07-19T10:07:43.884,1.4236254093449600 +cat2,2028-09-28T14:48:03.091,-0.3640395785806630 +cat0,1911-12-15T20:04:27.661,-0.33822358673185800 +cat0,2049-03-21T22:23:36.855,0.5282539820603190 +cat0,1944-07-04T17:18:28.528,2.0277291550034600 +cat2,1919-08-04T0:30:18.831,-0.5715261607263670 +cat0,2037-09-22T22:29:38.410,1.2999437598804400 +cat0,1952-03-14T8:02:53.678,-0.8591634332772860 +cat0,1910-01-13T16:43:19.825,0.5893451951627660 +cat1,1961-09-02T3:10:06.322,0.29624748880293500 +cat0,2045-08-29T5:44:42.833,-0.028153501861295100 +cat0,2030-03-26T0:35:44.429,-0.006049341459727650 +cat0,1977-02-03T5:44:15.723,-0.8093839397549500 +cat1,2000-04-13T3:07:57.802,-1.0602828193541500 +cat0,1931-12-14T16:39:35.027,-1 +cat0,1931-08-10T6:26:20.557,-1 +cat1,1931-01-28T12:32:46.181,-1 +cat0,1931-01-18T21:45:09.874,-1 +cat0,1931-12-26T17:51:57.956,-1 +cat0,1931-07-01T15:54:32.555,-1 +cat0,1937-06-26T9:06:25.779,-0.5575919869390680 +cat0,1902-11-20T14:21:02.771,-0.5110650975515740 +cat2,1902-11-06T7:59:48.748,-0.06648019635630250 +cat1,2002-11-05T5:57:05.701,0.48242660357284200 +cat0,1902-05-30T5:16:52.591,-0.32639702472835700 +cat2,1963-11-19T19:36:54.285,-0.2835302847603890 +cat2,1995-09-30T20:01:44.474,0.13500405739169200 +cat0,1970-07-26T15:29:32.382,0.5709338833137890 +cat0,1920-10-14T7:40:25.332,1.2432477130963200 +cat0,2016-04-13T11:10:28.321,0.47544920654485900 +cat0,2017-04-12T14:48:27.933,1.8551109040944600 +cat0,1913-11-25T19:49:38.022,0.4608726606465240 +cat0,1925-09-26T9:17:25.753,0.5441105985316490 +cat1,1906-01-01T7:56:14.708,-0.26145351286988300 +cat0,1989-02-22T9:23:08.133,-0.044434143744026000 +cat0,1931-12-14T16:39:35.027,-1 +cat0,1931-08-10T6:26:20.557,-1 +cat2,1931-01-28T12:32:46.181,-1 +cat0,1931-01-18T21:45:09.874,-1 +cat2,1931-12-26T17:51:57.956,-1 +cat0,1931-07-01T15:54:32.555,-1 +cat1,2009-01-14T16:29:27.997,-0.8266732134296790 +cat0,1965-02-24T6:07:33.566,0.5027906043477980 +cat0,1995-11-23T6:04:00.687,-0.17903346519548000 +cat0,2040-11-28T21:21:20.476,0.2692912241785290 +cat1,1922-04-08T10:01:07.413,-0.06811114134574730 +cat0,2043-05-30T6:02:12.000,0.8123027583176850 +cat2,2009-05-08T11:29:21.496,-1.0364510557379800 +cat0,2014-01-21T20:25:13.967,0.1834221203490190 +cat2,1961-12-15T4:50:14.765,0.1038775396168490 +cat0,1970-01-24T22:41:58.701,0.7736381924483240 +cat1,1901-01-31T19:59:37.702,2.3766118084894800 +cat0,1994-06-20T6:28:14.132,-2.342212199150040 +cat0,2000-10-14T19:33:39.102,0.08909887383584800 +cat0,1948-03-10T13:35:30.856,-0.31235218127938300 +cat2,2049-09-29T7:41:50.721,-0.44886717425149500 +cat0,2018-11-05T6:32:51.473,-0.5342040011218960 +cat0,2043-12-18T15:15:19.016,-0.4271957416985150 +cat2,1931-12-14T16:39:35.027,-1 +cat0,1931-08-10T6:26:20.557,-1 +cat2,1931-01-28T12:32:46.181,-1 +cat0,1931-01-18T21:45:09.874,-1 +cat0,1931-12-26T17:51:57.956,-1 +cat2,1931-07-01T15:54:32.555,-1 +cat0,2029-08-20T15:04:23.237,0.3313624181426000 +cat2,2035-05-01T11:50:44.414,-1.4316309763191400 +cat1,1923-09-06T22:17:39.265,-2.042169181695800 +cat2,1985-07-25T19:08:24.166,-1.187268035458480 +cat0,1923-01-23T15:31:13.429,-0.4917668789249820 +cat0,2036-01-22T19:13:32.204,0.4142466579811230 +cat0,1955-11-07T8:41:07.800,0.9315680752325030 +cat0,1960-02-14T7:17:15.030,1.8347348865329800 +cat0,2016-04-26T1:19:38.112,1.313956072447840 +cat0,2009-11-08T12:01:25.450,0.6870445805995850 +cat1,1960-08-29T20:01:24.947,0.5454055546224880 +cat2,1983-04-12T20:11:02.588,-0.03497302315536950 +cat0,1939-10-06T12:00:17.112,-0.49585152456109100 +cat1,1958-01-31T19:25:52.784,-0.5524625452088550 +cat1,1986-01-01T10:20:35.787,2.866141512310220 +cat2,2032-03-14T18:36:04.969,0.1419869445651420 +cat0,2049-10-21T16:08:33.120,-0.007072637319371280 +cat2,1911-10-16T20:57:27.574,-0.8652528237307920 +cat2,2006-05-18T11:03:52.106,0.9139580765867260 +cat2,1901-11-16T4:04:56.665,0.6709029693558610 +cat0,1939-08-11T5:01:00.088,1.0453477443635500 +cat0,1924-11-29T14:01:02.006,2.6041199544161500 +cat0,1908-12-19T6:21:19.812,0.11536738429440300 +cat0,2018-07-12T21:34:24.364,2.179771395031100 +cat0,1976-08-20T15:28:18.858,-0.8156479896503770 +cat0,1976-01-03T20:46:28.405,-1.787043880552940 +cat1,1991-05-23T2:04:39.732,-0.3523566404925980 +cat0,1931-12-14T16:39:35.027,-1 +cat2,1931-08-10T6:26:20.557,-1 +cat0,1931-01-28T12:32:46.181,-1 +cat2,1931-01-18T21:45:09.874,-1 +cat0,1931-12-26T17:51:57.956,-1 +cat0,1931-07-01T15:54:32.555,-1 +cat0,2014-12-22T7:21:48.927,0.18148409995238200 +cat0,2036-07-29T7:07:31.685,-0.4601100241043700 +cat1,2030-07-19T2:15:36.027,0.49701577307102 +cat0,1930-12-15T2:49:45.352,0.17931598672446100 +cat1,1951-08-19T21:42:30.649,-0.9367617305376250 +cat0,2042-08-04T19:02:17.063,-0.8992844181485360 +cat0,1935-03-08T15:14:46.774,-0.43883224522294700 +cat2,1955-07-05T14:35:21.821,0.1485070891438940 +cat0,1912-06-08T15:21:34.010,0.28091462078790300 +cat0,1965-11-16T11:41:36.453,-0.09831429252196150 +cat0,1961-07-08T0:04:13.710,0.4965296511115250 +cat1,1976-07-18T10:09:54.705,0.6265267733394610 +cat0,2026-10-10T18:34:09.132,-0.5325470214301240 +cat0,1983-10-22T19:34:39.793,0.641031937995835 +cat0,1938-01-27T10:49:21.089,-2.1114184306322500 +cat0,2047-08-23T23:23:41.918,0.3421497279496780 +cat0,2045-07-29T15:44:22.567,-0.18436879651282700 +cat0,1931-12-14T16:39:35.027,-1 +cat0,1931-08-10T6:26:20.557,-1 +cat0,1931-01-28T12:32:46.181,-1 +cat0,1931-01-18T21:45:09.874,-1 +cat0,1931-12-26T17:51:57.956,-1 +cat1,1931-07-01T15:54:32.555,-1 +cat0,1975-11-27T22:24:15.083,0.21009296977518400 +cat0,2022-09-01T2:26:48.336,-0.5326922995925790 +cat0,1930-12-23T17:53:35.768,0.37228448263211300 +cat1,2027-12-24T15:39:20.876,1.263061317510000 +cat0,1914-04-01T11:37:02.401,-2.2477137036475200 +cat0,2023-07-24T3:56:03.623,-1.2516501967136 +cat0,1938-06-05T16:36:16.830,0.48708834464603600 +cat0,1973-11-24T22:40:25.608,-0.18983254772095800 +cat0,2043-01-31T13:12:51.442,0.45003666539015900 +cat0,1924-11-02T5:05:14.264,-0.02764173191849530 +cat0,1915-01-01T22:23:44.099,-1.0364440893669300 +cat0,2034-03-02T4:18:18.183,-0.04861108189196200 +cat1,1969-07-18T15:14:00.249,-1.3503574075735600 +cat0,2016-10-25T1:28:16.405,1.412068792907980 +cat0,1994-10-28T2:37:51.504,-1.7090460834174200 +cat0,1995-12-28T8:09:14.335,-0.6463865813923930 +cat0,1909-11-08T11:37:23.288,-0.09715109449241340 +cat0,1979-08-17T10:33:40.603,-0.0669171688927211 +cat1,1974-06-14T1:39:23.204,-0.28423953371718400 +cat0,2030-05-05T13:18:56.482,0.10177538940764000 +cat0,1974-07-11T18:10:49.621,-1.3971378434890200 +cat0,2015-06-22T21:32:27.561,-0.4459115480670370 +cat0,2034-01-07T7:06:23.947,-0.6856637943869150 +cat0,2009-07-28T16:59:38.528,-0.41168146482221300 +cat0,1931-12-14T16:39:35.027,-1 +cat0,1931-08-10T6:26:20.557,-1 +cat1,1931-01-28T12:32:46.181,-1 +cat0,1931-01-18T21:45:09.874,-1 +cat0,1931-12-26T17:51:57.956,-1 +cat0,1931-07-01T15:54:32.555,-1 +cat0,1942-09-01T3:26:58.027,-0.8760584916175200 +cat0,1952-09-26T19:17:15.197,1.2154868615884600 +cat0,1984-09-13T15:50:05.422,1.3464448483726500 +cat1,2011-08-04T14:27:30.789,2.835249788865160 +cat1,1923-09-13T14:36:31.465,1.3737562606153700 +cat0,2023-03-05T9:13:01.168,-0.9911115450270400 +cat2,2000-09-22T0:12:54.746,0.489021216344174 +cat0,1963-02-01T22:46:27.142,-1.2053430919389800 +cat1,2023-12-12T1:10:32.892,2.522573357341860 +cat0,1914-08-11T5:15:25.645,-0.5151878106834460 +cat0,2044-02-24T3:39:08.301,-0.6869489150250250 +cat0,1985-10-30T20:52:45.421,-0.9783096580999940 +cat0,1901-07-18T20:55:59.727,-0.6571544410987800 +cat0,2006-08-21T15:58:06.229,0.6139033115930930 +cat0,1942-03-15T6:36:09.445,-0.2873882385002010 +cat0,1990-03-09T2:58:18.542,0.2006720593958160 +cat0,1949-05-10T8:50:03.221,1.4917724693792900 +cat0,1978-04-02T19:04:40.637,-0.7064174925228160 +cat2,1971-02-11T6:54:58.452,1.1080183426419200 +cat0,1931-05-23T2:40:08.012,-1 +cat0,1962-02-15T4:48:51.801,-0.7107258878750440 +cat0,2014-07-09T10:34:01.521,0.3302326596511160 +cat0,1940-04-21T1:31:12.998,-1.0351256436221800 +cat2,1964-02-24T2:30:36.542,-0.5953091934356840 +cat2,1934-08-22T7:23:49.528,0.18218244192063500 +cat1,2031-12-25T0:30:07.754,1.0011602407004200 +cat0,1937-08-18T11:48:16.148,1.222469980266480 +cat0,1980-01-17T0:10:50.412,0.2849679748985470 +cat0,1981-01-05T21:24:36.191,2.266068420452440 +cat0,1932-10-05T12:06:53.749,-2.381387433286620 +cat0,1959-02-06T22:14:12.479,-0.6853727309719770 +cat0,2046-02-20T1:05:44.738,0.7030477701884570 +cat0,1935-07-22T3:24:44.768,0.27833192573570400 +cat0,1966-08-19T18:11:44.053,-0.4912868236763010 +cat0,2048-09-10T12:23:58.250,1.0882428877023400 +cat2,1917-05-07T8:06:22.313,-0.33483686715859400 +cat0,1969-06-05T7:25:03.894,-0.9056308944628280 +cat0,1903-01-24T3:10:50.193,0.13102336719343000 +cat0,1964-06-08T15:26:48.199,-0.21038916991860800 +cat0,2035-10-27T4:02:11.193,-2.447306751339220 +cat0,1987-08-20T18:22:47.191,0.037029831734732600 +cat0,1988-03-11T8:53:26.277,1.2076879347829500 +cat2,1957-01-16T8:34:25.781,1.333964430463400 +cat0,1962-10-20T9:40:10.209,1.1329742405983600 +cat0,1970-01-29T8:56:11.552,-1.4634817256225900 +cat0,1956-09-29T19:47:36.559,-1.078127847571230 +cat0,1937-11-29T3:43:24.719,1.67448900156446 +cat0,1993-10-27T10:46:26.060,-1.0820150528027700 +cat1,1969-06-05T1:32:23.482,-0.9686366685465970 +cat0,2045-09-24T6:40:43.471,0.8350921719578230 +cat0,2018-11-19T14:04:53.542,-0.3385182716072320 +cat0,2015-09-27T2:27:51.360,0.7834554340503770 +cat0,1974-08-01T21:52:49.706,-1.5941678796606900 +cat0,2022-09-13T7:59:52.825,-1.4860801072828300 +cat0,2027-01-21T20:09:22.532,-0.48987285764481900 +cat0,2010-03-25T6:45:53.432,0.9106615263013740 +cat0,1951-11-18T3:32:09.437,-0.42274194217808700 +cat1,2042-12-29T22:00:46.807,-0.07646631549027620 +cat0,2015-01-31T16:42:28.909,-0.22861795732336300 +cat0,2031-01-04T12:28:51.861,0.10776852656888300 +cat0,1908-05-29T8:16:46.389,-0.5657174276232470 +cat0,1969-06-10T20:53:04.357,0.8722436078508920 +cat1,2022-12-17T5:32:43.868,0.8363684700245160 +cat0,2009-10-15T19:33:58.095,0.21978440137337200 +cat0,1993-01-15T2:18:04.190,-0.1566771320284880 +cat0,1981-04-11T14:49:58.587,-0.2013068945594450 +cat0,1930-12-22T7:45:00.257,1.306474175242040 +cat0,1927-06-03T1:26:05.682,-0.5897704171830320 +cat0,1938-08-14T11:13:43.474,-0.4889956708825670 +cat1,2036-01-15T5:25:06.033,-1.7932920138600000 +cat0,1904-05-19T18:01:32.876,-0.5041965485165630 +cat2,1969-10-27T10:59:00.893,0.4536468511169150 +cat0,1930-05-20T4:59:04.389,-0.10858811636069000 +cat0,1983-12-16T4:05:29.438,-0.5658913010163880 +cat0,1935-12-22T22:10:27.459,0.48986439062699800 +cat1,2034-08-20T9:45:15.649,0.4274923990832550 +cat0,1918-03-18T3:26:31.941,-1.5158109846358300 +cat0,2046-10-10T4:24:59.971,0.4821596133295320 +cat0,2040-03-21T19:09:45.046,-1.1644826766457300 +cat0,1953-03-24T15:28:34.031,0.3049689364873290 +cat0,2003-04-29T5:25:48.314,1.9006422647287700 +cat1,2011-06-28T20:31:12.027,1.1364181299386100 +cat0,1995-12-11T17:43:00.229,0.9028622153997620 +cat0,1981-10-08T6:59:59.446,-0.730456700095653 +cat0,1931-12-14T16:39:35.027,-1 +cat0,1931-08-10T6:26:20.557,-1 +cat0,1931-01-28T12:32:46.181,-1 +cat0,1931-01-18T21:45:09.874,-1 +cat0,1931-12-26T17:51:57.956,-1 +cat2,1931-07-01T15:54:32.555,-1 +cat0,1947-10-20T11:05:40.769,-0.02899457095630540 +cat1,1976-01-21T18:41:58.026,0.9098013284998390 +cat0,2015-10-27T8:45:30.223,-0.4334059691183090 +cat0,1934-06-29T16:51:28.078,1.0885127327020200 +cat0,2004-04-11T1:14:30.290,0.015613503113357700 +cat0,1938-12-08T21:07:18.621,-0.0940136622053698 +cat0,1956-02-08T21:44:30.882,0.019779690429787900 +cat0,1957-03-14T7:31:47.487,-1.8928935315334600 +cat2,2048-12-22T20:24:11.166,0.34981786279307900 +cat0,1996-08-28T7:06:38.589,-1.3490450972823800 +cat0,2023-12-06T17:29:35.241,0.46540494126204600 +cat0,1924-11-14T4:45:42.565,-0.738824360186072 +cat0,1993-11-24T21:27:43.688,-0.21696462695175700 +cat0,2014-10-18T8:25:56.042,0.9941523583149230 +cat0,1981-09-10T19:40:57.063,2.2974428168719100 +cat1,1985-11-19T23:16:23.148,0.3758373374161650 +cat0,1978-10-10T19:26:37.957,-0.3205741095095700 +cat0,1905-08-15T12:04:40.113,0.41260744304636900 +cat0,1933-06-29T21:17:28.989,0.2898097552652430 +cat2,2028-04-03T12:00:13.424,0.21483922944216600 +cat1,1966-06-09T4:08:54.383,-0.15027681461887800 +cat0,1976-05-28T23:54:23.211,-2.2830243534306000 +cat0,1982-01-12T1:10:20.884,-0.12114519117500100 +cat0,1902-01-07T22:15:37.702,-1.1474927043452700 +cat0,2039-01-01T6:26:10.909,0.6899975148384700 +cat1,1980-07-17T1:33:36.549,-1.2962390880743800 +cat0,1965-03-24T8:07:34.350,0.8202846295335180 +cat0,1941-04-15T16:53:51.629,0.6111286324301360 +cat1,1969-03-31T20:19:01.238,-0.6017573804647560 +cat0,2008-05-19T11:19:27.408,-0.14636833351968400 +cat0,2011-02-18T19:22:19.325,-0.39051954878729900 +cat0,1999-04-09T14:19:35.254,1.1318712155592900 +cat0,2012-06-25T20:17:03.173,0.6238824556485380 +cat0,2025-06-01T1:22:46.403,-0.21918605632425200 +cat0,1920-07-01T10:50:35.857,0.06266060497710180 +cat0,1936-08-03T15:31:28.716,0.8368973643316210 +cat0,2045-08-19T23:56:25.545,-0.7215040217255680 +cat1,2008-11-18T5:14:48.598,-0.20814741590895800 +cat0,1985-05-22T14:43:00.100,-0.0897913387325754 +cat0,1983-01-23T17:44:42.878,0.07124822113815360 +cat1,1905-05-07T8:55:07.221,-2.172279749874680 +cat1,1944-10-08T3:33:17.113,1.9947176428577400 +cat0,1918-05-11T3:16:27.847,0.03626915451138230 +cat2,2043-04-10T3:35:28.779,0.6407201803911170 +cat0,2036-08-04T8:48:07.924,-0.879334804399953 +cat0,2031-06-02T19:49:12.352,0.42773687851837300 +cat0,2036-04-13T18:01:33.874,-0.023621202837997500 +cat2,1969-07-16T20:21:28.538,0.9141110361713460 +cat0,1952-10-17T10:02:41.257,-1.2363517257962000 +cat0,1945-04-08T17:38:10.074,0.18140310037963400 +cat2,1993-02-12T8:23:08.935,1.2261360769020300 +cat0,1949-10-01T2:30:52.619,-0.7628738973018220 +cat0,1924-04-22T6:53:47.056,-1.4758009732482800 +cat1,1964-11-24T23:18:47.253,-1.0480133447619600 +cat2,1988-04-07T20:48:54.086,-2.1134742899849900 +cat0,1909-11-19T14:28:44.881,0.5818020935613070 +cat0,2023-09-10T14:56:44.747,1.1100143727390100 +cat0,1902-01-28T9:59:15.495,-2.8487533681720900 +cat0,2015-05-07T23:09:47.732,-1.3576950439413000 +cat1,1975-06-02T7:53:58.806,0.4634621796442320 +cat0,1962-03-07T8:36:13.020,0.10971443994730500 +cat0,1950-11-16T17:23:26.248,0.06176009833417940 +cat1,1952-04-11T1:15:25.493,-1.179379055721530 +cat0,1916-03-03T22:18:06.430,0.4997053030466970 +cat1,1972-01-28T22:03:44.253,0.2882625239442540 +cat0,1911-01-15T20:22:22.225,-0.7145249740029960 +cat0,1923-05-04T17:26:01.182,-0.22830394350871800 +cat0,2049-04-19T21:42:44.213,-1.5614933610415300 +cat2,1973-01-31T15:17:07.505,-0.3838093319755410 +cat0,1940-12-25T23:23:03.372,2.299644952443310 +cat0,1970-10-24T10:26:59.530,-0.6494012337942090 +cat0,1996-03-23T12:23:09.835,0.3857879377757310 +cat0,1918-07-17T8:56:15.679,1.3333310419086500 +cat0,1917-03-08T1:15:50.332,-1.3341899717847800 +cat2,2015-11-06T15:19:15.621,-1.4237312293121200 +cat0,1934-11-01T0:35:51.407,-1.1752877622131600 +cat0,1927-03-23T7:20:52.442,1.2826103369102600 +cat0,2045-08-06T5:15:21.242,-0.04437963776951030 +cat0,1900-06-28T9:10:37.421,1.950428985157970 +cat0,2023-04-17T20:15:39.221,-0.8961119417664330 +cat1,2009-05-31T1:58:32.104,1.0550895460608400 +cat0,1930-08-05T17:59:09.018,0.6559239678489160 +cat0,2008-12-27T8:27:24.003,-0.5969086947411910 +cat0,1941-07-27T12:17:41.399,-0.15039900771725300 +cat0,2020-11-09T6:32:05.740,-1.6010376653543300 +cat0,2009-02-13T19:29:10.606,2.158572112734390 +cat2,1913-02-18T10:19:27.578,1.145767375314080 +cat0,2032-02-06T17:39:36.262,1.1032351409335200 +cat0,2018-03-14T6:35:26.556,-2.1872131250615500 +cat0,1907-11-29T11:42:14.155,0.23423156664057000 +cat2,2013-01-21T15:54:29.390,-0.9238289224705730 +cat1,1993-05-09T8:09:53.353,-1.7738601310845700 +cat0,1993-02-24T16:54:13.560,-0.789874762667593 +cat2,1967-10-26T18:13:32.062,0.5859920009219780 +cat0,1905-08-29T5:25:02.713,-0.4015719305938790 +cat0,1972-04-10T1:58:38.889,-0.6691545225643600 +cat0,1908-07-19T13:46:59.861,0.1980119099629690 +cat2,1959-05-08T17:25:53.522,0.9037780830081770 +cat0,1920-05-28T13:37:26.565,1.7732658951200000 +cat0,1936-11-07T14:55:36.372,0.3049209476491230 +cat0,1905-12-24T2:39:57.100,0.5722413169446880 +cat0,1928-04-16T6:26:40.753,-1.4097050747868200 +cat0,1942-06-11T15:57:07.700,-0.4899723691353840 +cat1,2046-07-02T8:01:26.132,0.7181668010150350 +cat0,1959-09-13T17:32:58.425,-0.26854961167334100 +cat0,1974-02-28T21:46:30.985,-0.7845356188291380 +cat0,1920-09-17T8:14:00.939,0.34573176841769100 +cat0,2016-12-29T18:46:50.441,0.6434205110214180 +cat2,1999-02-03T22:04:08.576,0.6477397042771420 +cat0,1961-10-03T23:47:55.378,-0.2217392116303300 +cat1,1926-06-16T12:28:17.255,1.7871906710199200 +cat0,2045-02-10T21:05:20.788,-0.8740465211185190 +cat0,2038-03-29T3:37:31.181,-2.3105176236901500 +cat1,1927-05-11T9:46:02.867,2.3176504575353700 +cat0,2046-12-16T7:44:26.112,-1.1949239006384100 +cat0,2031-04-11T18:05:02.359,0.7465958022291450 +cat0,1978-04-07T19:42:22.339,0.4606777475692300 +cat1,2028-04-23T13:00:12.897,1.0638770095080700 +cat2,1970-09-25T4:43:51.179,-0.7818184768447340 +cat0,1903-01-08T20:44:36.178,-0.2988267811147010 +cat2,2001-06-02T7:39:19.423,-0.181295458377256 +cat2,1918-02-20T22:53:09.256,-1.0487821764741800 +cat0,1994-04-14T22:49:43.510,0.7000817523278290 +cat0,1903-04-20T21:20:51.638,-0.9519122113311980 +cat1,2025-10-06T10:00:50.854,0.7090759590006410 +cat0,1974-06-14T12:43:55.168,0.08994299263768610 +cat1,2025-07-19T20:21:49.772,-0.449635512675391 +cat0,1948-10-06T12:41:24.090,-0.5498633601504550 +cat0,1945-04-02T19:28:25.767,-1.5651006808736600 +cat1,2049-01-11T17:35:27.082,-1.0306469381329000 +cat0,1933-07-16T8:02:43.161,-0.6362901416607380 +cat0,1952-10-17T22:05:02.446,-0.2875904860754400 +cat0,1957-08-12T15:14:26.997,-0.46061535984624200 +cat2,1928-04-01T18:05:22.684,-1.7704307410319900 +cat0,1958-04-12T13:39:34.333,-0.389376694713177 +cat0,1950-03-11T14:27:54.040,-1.7604243212861700 +cat0,2039-12-29T18:43:34.215,1.133612813170250 +cat2,1944-10-30T0:20:32.517,1.5448507563754800 +cat2,1937-08-21T10:05:20.895,1.5587380285958600 +cat0,2015-09-13T11:27:19.162,-0.37599450467387600 +cat0,2006-10-15T4:32:45.082,0.9583821869501040 +cat0,1944-05-03T5:03:35.181,-0.40646171169738300 +cat1,2017-02-07T14:55:31.677,0.8160797782505240 +cat2,2036-06-13T8:51:54.069,0.24993412581457200 +cat0,1990-12-30T6:29:06.979,-0.7224955298421480 +cat1,2012-09-28T10:21:22.687,0.612778615861383 +cat0,1911-11-15T0:26:33.885,1.2275280161011400 +cat0,2002-09-26T21:09:43.200,-2.77652113258219 +cat0,2014-06-20T9:22:42.522,1.0688533822830200 +cat2,1963-01-01T4:27:06.855,0.4554159061278570 +cat0,2028-11-27T0:04:52.621,-0.3894449027510990 +cat0,1900-07-30T0:23:36.187,-0.28549118158167000 +cat2,2042-12-18T1:35:24.203,-0.22349024805460700 +cat0,1982-07-31T13:17:35.785,1.086204558477700 +cat0,1969-09-22T22:33:59.395,-0.2746025574885130 +cat0,1945-12-11T14:12:13.448,0.987769429317932 +cat0,1984-04-15T4:37:43.958,-1.945408199557090 +cat2,2028-11-01T13:29:19.253,0.005489956383327210 +cat0,1926-08-19T10:36:18.704,-1.1562395169994000 +cat0,1982-12-24T9:05:57.808,-1.118451878665620 +cat0,1945-01-22T7:22:48.746,-0.7060914786252770 +cat0,1900-01-07T20:50:35.431,0.9313174884791080 +cat0,1901-05-28T11:50:12.975,-0.4594487718649070 +cat2,1942-10-28T5:57:49.259,1.0827969884137600 +cat0,2048-08-04T12:25:30.624,0.6508675488435210 +cat0,1963-12-10T2:16:02.705,-1.1410441231428500 +cat0,1918-11-12T7:01:48.434,0.3098711788067110 +cat0,1997-10-28T13:09:25.203,0.7179032748848460 +cat0,2014-11-13T4:58:28.507,-1.1430184840872200 +cat0,2001-12-05T11:11:23.658,-1.594294643920770 +cat0,1927-04-09T8:57:31.613,-0.3281497670961830 +cat1,2049-06-13T18:01:08.320,-1.367114249639670 +cat0,1971-02-19T18:41:14.080,0.37492431917246200 +cat0,2026-05-23T14:02:40.957,0.5993720310795110 +cat0,1972-02-07T4:42:14.896,0.8311384823234490 +cat2,2008-03-01T17:44:12.640,0.6617618982741430 +cat0,1961-02-10T22:33:56.253,0.451737278242199 +cat0,1965-05-13T7:27:54.669,-0.3413086682458410 +cat0,1902-09-09T19:01:59.593,0.15557039397341400 +cat0,2015-03-05T8:56:27.988,-0.09386276460613740 +cat0,1934-02-07T9:43:35.841,-1.1073123327434100 +cat0,1912-10-24T21:53:44.360,0.885661738986894 +cat0,1939-12-09T2:30:09.290,0.14322591250299400 +cat0,2026-03-22T19:04:25.573,0.0395414800295894 +cat0,2010-09-03T16:45:28.761,1.4440335493558200 +cat0,2035-11-06T8:09:36.627,0.6308508000265910 +cat1,2039-05-14T11:02:11.291,0.5023376099315170 +cat0,1951-03-02T19:35:28.676,-0.6022353482315990 +cat0,2009-11-11T6:17:04.426,-1.0441260553161500 +cat0,2035-01-29T5:02:27.252,-2.0470536082736800 +cat1,1991-10-12T17:19:38.544,-0.21279162744344900 +cat1,2040-05-13T14:21:39.248,-0.13626838823639600 +cat0,1902-06-21T7:28:35.196,-1.751217351779290 +cat0,1947-04-03T0:37:15.230,0.43079399257798000 +cat0,2027-07-11T0:49:00.097,0.37427599749309000 +cat2,2019-12-26T20:15:13.224,0.7133509939532470 +cat0,1997-08-15T22:13:24.665,-0.49117160380845700 +cat0,1922-09-05T11:46:35.382,0.7945871736823150 +cat1,1970-08-11T13:07:18.108,-1.7853007209231700 +cat1,1929-01-12T14:52:45.583,-0.649368251600279 +cat1,1952-03-08T6:00:12.118,1.7074466490591600 +cat0,1907-03-30T4:22:23.352,1.6712428116871600 +cat0,1984-06-14T22:46:23.256,0.19066753144948100 +cat0,1928-12-20T19:50:54.620,-1.3117523865944100 +cat0,2042-08-20T17:51:49.322,-0.005736455296393320 +cat0,2006-11-14T18:09:43.679,-1.2365000506454700 +cat0,1964-02-25T7:59:25.533,0.7587246070723170 +cat0,1954-08-08T5:47:13.469,0.7506718662793370 +cat1,1949-02-12T17:09:04.657,0.648570905550447 +cat0,1990-05-06T4:31:24.917,0.38569356086592300 +cat0,2002-05-31T4:05:36.593,-0.2158118064452580 +cat0,1903-04-29T19:54:59.760,0.6061639402172400 +cat0,2028-09-10T3:47:26.992,1.1786469190617000 +cat0,1951-07-27T8:09:17.014,-0.07303311454387920 +cat1,2041-04-29T17:43:57.625,-0.25149821573092500 +cat2,2029-04-15T18:55:47.359,0.9557532619488770 +cat1,1934-01-10T18:39:16.325,-0.9986047976298710 +cat0,1941-05-04T16:06:45.942,-0.2026780819872430 +cat1,1916-10-09T17:13:35.362,-0.8798140424156860 +cat0,1916-04-08T6:08:24.506,-1.471961986852120 +cat1,1964-05-25T22:52:11.863,-0.7452994418749190 +cat0,1974-10-31T21:19:10.145,0.0777077112212923 +cat0,1998-07-17T4:54:44.806,-0.4779301543214300 +cat1,2035-12-09T17:26:25.217,-1.1832310526723500 +cat0,1932-07-22T18:32:32.353,-1.7760405654622700 +cat0,2023-02-20T19:21:33.105,0.6079931885432480 +cat0,1990-08-07T18:34:56.024,0.17864732442345000 +cat0,2045-11-28T4:28:41.637,-1.044532726837590 +cat0,2021-02-02T0:25:56.159,-0.3735670528650330 +cat2,2018-07-12T14:11:39.413,0.26498835394534600 +cat0,1922-07-15T3:57:40.240,1.141574891593780 +cat1,2007-01-04T20:33:24.517,-3.069436382964400 +cat1,1961-06-20T2:31:58.566,-0.14115233616775000 +cat0,1905-11-02T17:12:00.469,0.7012666145840100 +cat0,1972-05-15T21:37:07.090,1.1175236887279400 +cat2,1981-06-17T11:15:46.358,-0.3517998862277250 +cat1,1960-03-28T19:01:26.472,1.2723334890443100 +cat0,1934-08-14T17:53:21.430,-0.5907285056352200 +cat0,1901-01-22T1:29:31.067,-0.7723901962571060 +cat0,1915-03-08T6:59:09.028,0.1283359367881870 +cat1,1960-03-20T9:16:10.367,-0.8191549053627480 +cat0,1976-07-03T0:05:49.599,1.0244589309662200 +cat1,1949-06-22T8:07:30.719,0.32801301344731600 +cat0,1931-10-11T14:15:10.924,-1 +cat0,1943-03-06T14:09:27.714,-0.33462300616756900 +cat1,1999-04-30T2:27:57.393,0.23083767078065100 +cat0,2048-08-28T11:19:13.493,0.4294304852019200 +cat0,1970-03-02T9:06:20.518,1.1163033503917300 +cat0,2017-11-14T5:29:53.867,-0.4859822357637570 +cat0,2030-09-30T12:00:28.772,2.1218299966473600 +cat0,2041-06-20T21:46:10.708,-1.0632858102261700 +cat0,1907-07-24T7:07:18.141,-0.2361879388179660 +cat2,2017-11-05T11:05:05.346,0.04659855545359300 +cat0,2037-05-22T2:42:25.401,0.6048819566113330 +cat0,1971-07-24T20:06:07.593,-0.04974491641041570 +cat0,1943-06-09T12:01:03.503,0.09679448899407700 +cat0,2048-06-30T21:03:22.738,0.17947475201686900 +cat0,2016-11-27T21:19:56.917,1.1112096704034800 +cat0,1924-02-28T14:23:41.619,0.8393463580114850 +cat0,1964-04-18T4:02:11.252,0.6699911035107320 +cat0,1938-02-23T13:34:47.949,-0.5843321665720190 +cat2,1973-11-22T18:12:53.712,-0.4283657456640630 +cat0,1969-01-20T20:54:56.866,0.8398012512255320 +cat0,1919-05-13T17:53:43.865,0.2624961142578480 +cat2,1996-04-04T10:36:21.955,-0.30992289765345400 +cat0,1925-03-15T5:59:59.170,-0.22837427230888700 +cat0,1980-03-03T23:17:33.560,0.6845582086959290 +cat0,1977-06-05T14:11:08.028,-1.181632314153630 +cat0,2006-05-02T0:17:38.533,-0.9914390931383020 +cat0,2007-09-11T9:28:55.609,2.163999410146940 +cat0,1997-05-07T12:51:18.452,0.8463865678546340 +cat0,1924-10-19T1:19:42.746,0.8709869813278630 +cat1,1946-06-14T2:34:50.552,-0.18329239033138200 +cat1,1989-03-09T23:18:38.996,-0.746968965566506 +cat0,1962-05-22T17:49:42.970,-1.4557283569220500 +cat0,2006-07-24T19:32:05.884,-0.30801183392820400 +cat0,1979-03-20T18:20:11.751,-1.8193358645392700 +cat1,1921-07-12T1:24:33.381,-1.731987154989510 +cat0,2014-12-08T8:45:31.641,0.3201789231711150 +cat0,1958-03-06T22:15:15.008,1.2644954387001000 +cat0,1940-02-12T5:38:38.813,0.10165298015815600 +cat2,1922-05-17T14:33:04.906,-0.33797985516523800 +cat0,2024-03-16T14:19:45.439,-0.4798682916145260 +cat0,2028-03-13T15:08:05.150,0.5089238432142260 +cat2,2049-07-04T16:10:44.236,-2.707275731263920 +cat1,2036-08-06T18:17:15.220,-0.8454179867541570 +cat0,1969-08-08T5:13:14.219,0.9349014051324240 +cat0,2009-04-03T14:14:22.210,-1.382959604433330 +cat2,2039-05-02T17:32:34.882,-1.5423367282472400 +cat0,2022-03-18T13:57:46.923,0.32469790852291800 +cat2,2037-01-25T7:25:56.936,0.26499261861678500 +cat0,1955-05-24T10:02:54.980,-0.30110765085017800 +cat0,1984-03-10T13:26:46.316,-0.26481440820700200 +cat0,1931-02-22T19:44:19.355,-1 +cat0,1920-03-18T22:44:45.308,1.2012579319011800 +cat2,1914-03-18T13:53:38.665,0.48157079789478600 +cat0,1936-03-25T18:10:26.627,0.2247212790222480 +cat0,1944-08-12T0:04:59.317,0.5811768956616960 +cat0,1900-12-08T8:29:54.719,-1.0324561072079700 +cat0,1943-01-30T0:11:39.002,0.6507442087482690 +cat1,2005-03-26T7:29:18.075,-1.8208292894947700 +cat0,2045-06-29T8:25:49.355,-0.9722865641200780 +cat0,2025-06-04T1:10:24.096,1.7429640517614800 +cat0,1914-07-05T3:52:05.065,-0.45180666414200300 +cat0,1925-12-25T3:49:41.099,0.35979294126518200 +cat0,1995-04-24T14:57:28.155,0.04914800397991230 +cat0,1951-11-04T14:25:59.716,0.18156205629414100 +cat0,1919-09-13T0:52:31.706,-0.7340812258443940 +cat0,1923-02-03T16:15:31.005,1.9519432241964800 +cat1,1962-02-20T1:32:54.568,1.8752597078612300 +cat0,1979-09-14T10:54:56.330,-0.17126258988644300 +cat1,1940-05-10T21:03:56.143,-0.8312731159174410 +cat0,1988-03-22T14:20:39.551,0.2046920246272960 +cat0,1951-10-02T19:01:24.135,1.2107662273448900 +cat0,1987-03-12T3:59:47.545,0.2133361203495360 +cat0,2045-12-29T4:38:58.348,-0.19082756343768300 +cat0,1974-04-19T16:03:54.093,0.45951833222117700 +cat0,1911-03-22T19:52:41.609,0.8493236048311070 +cat0,2007-07-04T17:46:07.689,-2.113529939229580 +cat0,2048-05-25T2:59:30.022,-0.3799007449108910 +cat0,1901-07-31T3:14:49.988,0.6474117909652040 +cat0,1966-04-08T9:45:19.316,0.053283710414094300 +cat0,2034-09-15T5:41:08.467,-1.8172074495832500 +cat0,1927-09-29T5:12:50.400,0.032134557535558400 +cat0,1931-06-30T13:14:26.353,-1 +cat2,2020-04-17T14:02:57.981,-3.2925692438117400 +cat0,1944-03-27T19:24:44.163,-1.1759335507933600 +cat0,1905-06-11T0:50:57.134,0.09105490359705000 +cat0,2008-07-06T13:13:30.421,-1.1979202104661000 +cat0,2036-07-14T13:35:17.976,-0.5546948620435240 +cat0,1902-08-09T6:33:19.467,0.5952256558051590 +cat0,1940-07-31T12:07:42.062,-0.7700440408819680 +cat0,1972-01-31T22:32:37.487,0.4671868409961980 +cat0,1938-07-08T21:18:59.224,0.5858686761135050 +cat0,1974-12-22T20:54:40.476,0.049764189022905000 +cat0,1995-03-25T18:00:27.348,0.9274735947539970 +cat1,2044-02-15T15:10:08.997,0.5838225057111510 +cat2,1900-06-08T0:50:18.104,-0.2867188312105560 +cat0,1971-02-10T11:21:06.381,-0.12679059273154600 +cat1,2017-07-15T22:44:15.920,1.0410930707052200 +cat0,2048-10-04T13:48:34.291,0.5698541938724780 +cat0,1953-06-22T1:54:15.911,1.1278549128327400 +cat0,2047-10-09T6:21:55.346,-0.09661129256473350 +cat1,1930-06-08T1:12:56.872,1.0286531551422400 +cat0,1976-08-27T20:49:32.734,-1.1698181370767700 +cat0,1936-07-05T18:17:58.690,0.4640044706209720 +cat0,1997-06-08T23:22:48.760,1.4507066018060200 +cat1,1957-03-06T10:38:18.234,-0.3765707811022850 +cat0,1963-03-29T22:41:11.516,0.7990943012323970 +cat2,1928-06-14T1:54:40.847,1.5025101240413500 +cat0,1921-12-17T23:04:44.554,-1.015085403966730 +cat1,1908-10-10T15:32:29.584,0.9705060035599490 +cat0,1961-04-02T22:10:08.437,1.5743725905125300 +cat0,2019-12-11T4:52:53.811,1.0900575560307300 +cat0,1938-08-06T9:41:30.521,-0.3904009744904330 +cat2,2047-07-02T18:05:59.606,-1.1766912174920500 +cat0,1998-07-01T18:02:51.873,-0.1516443862311450 +cat0,1903-03-25T20:28:21.707,2.168532526460890 +cat0,1933-11-19T7:07:21.230,1.3937077000853100 +cat0,1983-08-10T15:36:12.948,-0.2395967891339570 +cat2,1909-03-19T16:55:25.821,-0.90063252428488 +cat0,1903-05-19T6:56:48.213,-0.025445035207046000 +cat0,2006-06-06T11:19:26.048,-0.282578703379129 +cat1,1900-11-05T22:49:17.269,0.6812375971166880 +cat1,1923-01-25T23:02:42.850,1.6051985677551300 +cat2,1925-07-27T4:41:34.268,0.18020338403888900 +cat0,1952-12-26T0:34:26.499,0.8820217906737040 +cat0,1920-05-14T1:44:35.973,1.489308309779160 +cat0,1991-12-30T20:20:53.667,-1.6070488950831300 +cat0,1908-05-23T19:42:51.592,0.24348459234265900 +cat0,2011-11-24T12:57:24.249,-1.6618487498233300 +cat0,1998-11-24T21:47:22.499,0.015114817120839600 +cat2,1960-01-29T22:50:37.580,1.8163300286812200 +cat2,1965-08-14T16:58:51.253,-0.5078904704739630 +cat2,2034-03-04T20:02:50.513,0.2747728404579520 +cat1,2035-06-17T11:47:20.502,-0.967661410797177 +cat0,1957-07-19T17:51:01.089,0.6915833121067870 +cat2,1961-08-10T1:10:23.810,-0.11539509693312600 +cat0,1918-08-24T10:45:44.887,1.631301764776920 +cat0,1984-11-15T4:31:54.820,-2.437987788593290 +cat0,1978-12-17T11:23:52.940,-0.48915403773134200 +cat0,2001-02-22T0:24:35.971,-0.5281477853547530 +cat0,2048-05-09T18:59:10.243,0.022497591930115900 +cat2,1935-10-16T8:46:08.802,0.7835580397724930 +cat0,2045-03-17T7:20:03.497,1.4411537743035400 +cat2,1944-03-26T19:22:24.921,-1.7141400767526300 +cat0,2018-03-26T20:37:14.989,-1.34998308049576 +cat1,1993-02-24T2:01:02.664,-2.1321372813696100 +cat2,1974-08-19T23:47:18.801,0.6047657846255020 +cat0,1951-10-19T0:09:58.270,0.032778486569277400 +cat0,1928-10-17T1:12:57.322,-0.49667019525645700 +cat0,2018-11-14T8:18:44.530,-1.1183674960779000 +cat0,1965-08-30T15:20:49.364,-0.04478131159032100 +cat0,2047-11-20T3:45:46.999,0.40544862687836700 +cat1,2012-03-24T18:58:21.103,1.161527643003050 +cat0,1905-03-31T2:22:29.360,-0.19483431723914400 +cat0,2040-10-17T20:53:12.903,-0.287137356729007 +cat0,1914-03-31T9:45:09.275,-0.5370419441154250 +cat0,2033-03-08T11:00:29.645,-0.8843145418456000 +cat0,2015-12-19T6:38:20.089,-0.018645831398249500 +cat0,1931-02-02T23:52:31.092,-1 +cat1,1929-03-29T3:34:57.625,-1.0154341427018000 +cat0,1927-10-04T15:51:24.752,-0.7402869393559670 +cat2,1936-05-05T9:09:54.073,0.7737140906104590 +cat2,1989-05-21T4:23:17.636,-0.33879104118730300 +cat0,1978-05-04T5:29:15.110,1.0059505422029500 +cat0,1969-05-02T10:59:56.628,-1.4180933544605400 +cat0,1960-07-18T12:36:37.214,-0.11327315118601800 +cat1,1937-11-21T6:07:38.928,0.754908503930691 +cat0,1901-12-30T7:15:41.911,1.2880311146324200 +cat1,1974-07-26T15:40:32.527,0.46813084783113700 +cat0,1971-09-03T21:50:17.000,1.0826209555399800 +cat0,1915-12-09T15:01:29.422,0.04948908189074070 +cat2,2006-06-05T14:03:23.900,0.5496702665714210 +cat2,1998-05-17T3:34:05.761,1.8379914430825900 +cat1,2046-02-06T0:13:25.872,-0.6751479947301830 +cat0,1921-08-05T13:45:14.602,1.138933165049070 +cat0,2040-03-28T19:54:29.683,0.12273509601321000 +cat1,1923-10-25T8:48:11.347,-0.986673089039078 +cat2,2037-08-06T4:00:33.279,1.4085545223698000 +cat2,2011-06-04T22:36:16.243,0.43277962386226400 +cat0,2043-01-06T15:18:00.410,-1.0294281362522700 +cat0,1912-03-17T11:44:32.628,0.06863492128702840 +cat2,2016-05-01T22:25:17.574,0.9952232698394740 +cat0,1945-02-07T22:57:52.714,-0.3628149963345660 +cat0,2030-02-08T12:17:00.097,-0.03125949865149190 +cat0,1926-12-25T19:08:12.080,-0.8963506264806110 +cat0,1930-07-10T18:34:02.480,-1.8351450849796300 +cat0,2007-02-14T23:02:01.995,-0.5914266893853590 +cat0,1922-09-09T1:40:18.302,0.27295023204551000 +cat1,1995-02-17T15:40:35.980,1.2177136544323300 +cat0,1993-05-23T9:52:48.164,-1.2432478562774200 +cat0,2004-04-27T6:04:10.506,0.4254949254592870 +cat0,1901-03-28T1:01:23.038,1.59229308529128 +cat0,1959-04-01T16:12:35.329,1.5515267980834600 +cat1,1900-09-19T0:35:08.176,0.8045737043341870 +cat2,1973-01-20T19:10:34.457,0.06971016783924270 +cat0,1994-07-28T8:40:15.306,-0.888121045815875 +cat0,1951-09-07T2:13:12.462,-0.2849321419360790 +cat0,1941-12-31T0:39:14.082,-0.2877810238471370 +cat0,1912-06-17T6:08:24.916,-2.6112712866234300 +cat0,2007-08-15T19:29:44.848,-0.23028391645094100 +cat2,1927-09-11T19:11:36.658,-1.4177453296365100 +cat0,2007-10-01T21:56:27.853,-0.535677822746136 +cat0,1998-05-30T19:48:29.653,0.7883193455881370 +cat0,1985-11-26T11:42:58.238,-2.07941477924249 +cat2,1980-08-09T1:15:30.575,0.3249405068270380 +cat0,1930-03-07T2:56:01.173,-1.3767011603156600 +cat0,2049-11-14T23:41:06.154,0.24884367188074800 +cat0,2037-06-08T3:55:41.538,-0.5852061335103320 +cat0,1918-08-29T18:38:54.317,0.6194972429839120 +cat0,2048-10-25T6:07:06.119,-1.2136607152175700 +cat0,1957-06-02T5:17:07.017,-0.5507947118266130 +cat0,1961-12-30T21:00:01.760,0.6008689493084510 +cat0,1914-08-20T17:25:00.736,-0.7885895871598490 +cat0,2038-08-21T6:08:07.548,-0.44122537945586400 +cat2,1954-10-18T8:34:03.795,-0.589556881923267 +cat2,2019-11-15T8:10:00.962,1.0698147724573700 +cat0,1923-12-07T21:59:55.880,-0.45263669524098600 +cat2,2011-06-20T7:21:56.145,0.915244707379926 +cat0,1910-06-27T11:15:25.361,0.207160301910564 +cat0,1929-08-17T8:25:07.142,-0.41503686456486900 +cat1,2024-11-18T20:27:04.120,2.6625554235847100 +cat2,2034-07-25T11:09:29.796,0.7375953856472110 +cat0,1901-09-12T5:45:09.650,-0.2613352548759820 +cat0,2013-06-07T14:45:15.788,1.3706301435227500 +cat1,2025-05-23T19:24:05.221,-0.38738757205845700 +cat0,2013-03-29T5:34:31.254,1.0133876675592100 +cat2,2036-04-23T3:51:40.753,0.8775076345191280 +cat0,2013-09-11T23:53:19.445,-0.8355353062402220 +cat0,1962-01-12T22:34:48.667,2.241084637561150 +cat0,2003-03-17T22:50:16.646,0.5421163478012600 +cat0,1905-03-18T3:07:36.727,-0.708712461451847 +cat0,2033-04-22T20:09:10.579,-0.013094227231940300 +cat0,1990-01-29T14:29:33.785,0.3010982837511700 +cat0,2049-02-07T23:33:06.009,-0.4178835947722080 +cat0,1997-03-30T15:59:30.294,-1.0282681501669100 +cat0,1975-02-18T1:20:27.781,1.5455241809589600 +cat1,2007-09-04T23:59:54.441,0.04409216774358350 +cat0,1994-08-23T11:34:43.393,0.08324012327789630 +cat1,1912-07-18T3:19:55.614,-0.2974201203581150 +cat0,1949-01-25T9:47:51.140,0.968912555763342 +cat1,1974-05-05T14:35:26.377,1.1446271095704700 +cat0,1965-09-11T21:14:28.977,3.206573827493120 +cat0,2031-03-09T1:21:11.748,-1.016025809880730 +cat0,1957-09-21T2:39:02.408,0.4996830939935130 +cat0,1909-01-03T20:31:17.514,-0.11940590335135300 +cat0,1959-12-07T19:38:00.165,0.7745739133658770 +cat0,1977-10-07T0:39:58.088,1.9020905113838400 +cat0,2011-06-14T0:11:34.787,-2.0963632723817200 +cat0,1991-06-24T20:48:16.913,-0.867605173888732 +cat0,1981-10-06T11:08:25.494,-0.16644474596085500 +cat0,1942-06-01T15:05:04.797,-0.7922771609914410 +cat0,1961-08-25T13:51:28.458,-0.5618399940004010 +cat0,1978-04-10T9:00:55.441,-0.7687982359942490 +cat2,1984-04-28T5:28:24.958,0.09483174942275740 +cat1,1971-04-07T10:27:20.022,1.30877175594058 +cat2,1981-07-09T9:53:48.817,-0.5544633297531180 +cat1,2017-06-06T2:38:25.976,0.5121798327224070 +cat1,1912-06-23T14:17:15.616,-1.6544588854212900 +cat2,1968-05-12T14:28:36.659,-2.275616259126410 +cat0,1933-02-21T23:25:45.020,-0.4461152388411780 +cat0,1984-05-30T17:57:17.421,-0.07325545592826090 +cat0,1952-04-16T19:54:10.276,-1.7675733538038600 +cat0,1970-06-27T9:41:22.004,-0.4206912037726340 +cat0,2044-04-24T4:59:31.716,0.044839535586000900 +cat0,2028-03-21T13:27:24.545,2.409143917834590 +cat0,1926-01-22T5:13:12.508,-0.26358125121213100 +cat2,2015-05-10T18:02:39.549,-1.6822219323963500 +cat1,2046-12-09T19:17:26.017,-0.9054567141986230 +cat0,2015-03-09T19:55:13.399,2.2293328836140400 +cat0,2026-05-20T18:57:13.218,1.3892354685480300 +cat1,2026-08-28T21:51:56.211,-0.5543945931528270 +cat2,1907-12-23T8:42:27.764,0.5705927624838720 +cat0,1922-09-10T15:43:52.169,-0.7668477676679390 +cat0,2026-02-11T7:42:34.413,0.39103352219332300 +cat0,1904-05-20T6:02:52.557,0.7150465229350910 +cat0,1979-12-24T22:04:01.853,0.5107311005755990 +cat0,2000-06-19T19:50:35.174,0.04053436254815960 +cat0,1957-03-02T11:48:16.986,-0.7003627313926430 +cat0,1970-02-03T6:50:48.786,-1.2489639284207900 +cat0,2027-06-27T21:51:42.873,1.4757065876730300 +cat1,1934-03-03T15:58:18.354,-0.1280818228366680 +cat0,1958-11-18T16:07:47.193,-1.7218482627128900 +cat1,2009-10-19T23:41:28.559,0.686878430488117 +cat2,1920-08-25T8:57:58.470,1.2120959728344100 +cat0,1950-10-28T3:35:36.190,0.2731905662717190 +cat0,1929-02-24T11:51:06.762,-0.86698014468016 +cat2,1923-06-06T5:15:06.647,-0.16279280932527800 +cat0,2045-08-20T9:37:07.896,-1.276794678000080 +cat0,2011-12-06T11:38:33.221,1.2009785221988500 +cat0,1949-12-09T10:26:51.679,0.8928934145184990 +cat0,2012-12-29T4:15:24.586,-1.0534775084465400 +cat1,1981-05-05T20:44:08.269,-0.7124164860888990 +cat0,2014-01-10T17:16:11.852,0.5360133335054130 +cat1,1919-04-13T15:53:02.515,1.4569774913524000 +cat1,1939-02-22T11:49:05.038,0.8892326509650910 +cat0,1910-10-06T9:01:03.959,-1.5749086709494600 +cat0,2019-08-19T0:00:18.974,-0.704214817838915 +cat2,1975-04-04T22:52:56.040,-0.43164680507909300 +cat1,1995-05-06T1:09:47.793,0.7026110669507250 +cat2,2006-12-27T15:18:19.280,-0.6617512877100450 +cat0,1938-10-17T12:59:47.461,-0.8656465516521840 +cat0,2036-10-07T12:21:07.824,-1.5892794506103600 +cat0,2023-07-17T22:10:37.980,1.0984676550918100 +cat2,2020-10-14T3:11:41.131,0.8115589535804370 +cat2,2003-03-13T13:00:40.675,-1.2146120334874400 +cat0,1924-09-29T2:30:08.217,0.6746269294810770 +cat0,1954-12-01T20:40:40.839,-1.054478882046220 +cat0,1991-11-29T6:32:58.442,-2.213259837180490 +cat1,2045-07-15T21:49:52.252,-0.7313450778109490 +cat0,2028-11-18T19:14:30.027,1.9687500386138700 +cat0,2039-11-28T8:44:02.184,-0.08641923216386230 +cat0,2006-06-14T19:13:43.418,-0.7963740999705330 +cat0,2015-07-29T11:57:36.877,0.3604697084163690 +cat2,2026-08-14T12:21:13.967,0.5879945925311000 +cat1,2045-01-23T7:46:24.879,-1.7171410338780900 +cat1,1913-11-30T7:16:08.491,-1.0971212168811900 +cat0,2039-07-23T7:35:28.630,-0.9653918356192340 +cat0,1954-11-05T18:04:53.173,0.4214869679680400 +cat0,2034-06-14T23:26:13.031,-0.16541339083996400 +cat0,2048-11-03T0:05:55.541,0.10901265120973900 +cat0,1908-06-07T1:37:25.429,1.3533774787310100 +cat1,1965-04-15T20:33:02.041,-0.46682633003592400 +cat0,1935-06-29T14:29:58.541,0.3841423470676320 +cat0,1977-06-12T9:34:17.685,-1.690449917171930 +cat0,1920-07-21T21:56:25.861,-1.5440739197178800 +cat1,1936-12-08T7:11:08.159,0.7840084347283910 +cat0,1961-06-16T5:04:52.371,0.9545546707125700 +cat0,2013-01-11T3:09:31.691,0.04376942950913910 +cat0,1928-11-05T0:44:05.350,0.07712955330128460 +cat0,1967-12-08T3:07:19.718,-0.5861785036982250 +cat0,1956-03-19T8:44:05.818,1.4577764184663000 +cat0,1987-02-18T10:15:40.514,1.6431764709078900 +cat2,1988-05-16T22:31:56.892,-0.6303609622738500 +cat0,1974-03-19T16:35:57.643,-0.8390438163004950 +cat1,2021-06-14T18:19:28.123,1.1020955752889400 +cat0,2049-11-19T21:34:27.437,0.5183303271736070 +cat0,2002-03-04T1:53:58.275,1.0929230811867900 +cat2,1954-06-08T13:35:15.863,0.3585941748736270 +cat0,1929-06-19T19:40:22.355,0.9251943146663950 +cat0,1984-06-04T12:53:24.433,0.04458466336633600 +cat0,2036-01-10T9:52:44.315,0.12936878434866600 +cat0,1972-04-17T10:44:44.750,0.08024195473063410 +cat2,1918-04-07T4:06:49.013,0.07657286882239470 +cat1,2041-06-21T22:21:53.865,0.5667147630052540 +cat0,2021-11-16T7:25:16.139,0.911931961885018 +cat0,1916-05-21T16:25:13.936,-0.1855844813438310 +cat0,2004-11-13T6:39:22.704,0.06993197253420640 +cat0,1978-10-09T12:12:20.319,-0.41712086945863500 +cat0,1907-12-23T21:07:07.300,-0.09367001474049180 +cat0,1978-09-07T18:09:15.410,-0.564783667975394 +cat0,1940-03-06T9:12:46.905,-0.9357533516764630 +cat0,1992-12-06T20:25:15.749,-0.022344065640285000 +cat0,2032-08-03T21:55:38.397,-0.8973151677592110 +cat0,1932-08-01T0:56:38.221,-1.2368651867644100 +cat0,2031-06-17T7:46:37.074,-0.5595687698328510 +cat0,1903-12-01T0:16:49.868,-0.08489992201109550 +cat0,2038-10-29T23:15:28.899,-0.1495472491521440 +cat0,2018-04-23T22:39:22.066,1.5210435553849200 +cat1,2025-10-14T1:18:32.269,0.2209946107665310 +cat0,1968-11-27T23:47:39.461,-0.5775954258214420 +cat2,2027-10-17T7:08:56.590,1.2973170931399000 +cat1,1968-10-29T23:37:43.570,-0.6800905943224520 +cat0,1947-11-26T7:05:20.148,0.5258115217576290 +cat0,2046-02-11T1:06:30.650,0.37459325317566100 +cat2,2023-01-01T23:01:24.461,1.12423460709609 +cat0,1973-02-25T7:03:46.921,0.014876939746535000 +cat1,2033-01-24T23:33:47.785,-1.6676098577393900 +cat0,1936-03-26T19:28:29.509,0.12511545659822000 +cat0,1955-03-05T19:56:30.162,0.2567370980954330 +cat2,2002-01-13T5:07:42.799,1.0447187743687300 +cat0,1968-04-25T14:56:52.347,-1.5625536917032900 +cat0,1908-06-12T0:30:00.380,-1.3000263187187300 +cat0,1974-09-13T8:34:08.363,0.7997709180820880 +cat0,2028-06-17T11:34:01.750,1.4353824628742500 +cat0,1951-10-23T18:46:38.130,-1.038854060083850 +cat0,2035-08-05T19:16:29.924,0.8650720877513040 +cat0,2045-11-28T11:27:11.387,-0.7752391642122940 +cat0,2038-10-01T3:02:29.349,-0.31486309835696500 +cat0,1952-09-04T8:04:11.568,0.5001932324490830 +cat0,1958-01-01T11:09:09.029,1.0523826947008000 +cat0,2025-06-09T19:45:03.939,-0.5640426194765440 +cat0,1955-08-17T3:17:23.860,-1.0223446380694200 +cat2,2014-03-26T13:32:37.404,1.0992408462467100 +cat1,2000-12-22T2:39:56.778,0.2113628020183170 +cat2,1954-10-19T11:55:51.616,-0.1240093133255500 +cat1,1975-04-23T23:45:58.007,2.83726448990538 +cat0,1994-08-18T2:36:43.083,-0.9451845307028760 +cat0,2000-05-05T3:29:57.512,-0.95741744687274 +cat0,1921-01-31T12:47:47.974,0.7975907567798170 +cat0,1989-11-24T15:56:09.209,-0.021449031766719900 +cat1,2006-08-14T19:47:01.156,-0.959658926649476 +cat1,2027-11-30T20:10:46.104,-2.259248819759260 +cat0,1938-05-03T20:49:34.817,-0.1294191472635080 +cat0,1918-02-01T5:12:11.651,0.6587103392322770 +cat1,1900-10-22T15:49:55.877,0.4115412787633880 +cat2,2019-09-19T3:50:30.957,-0.9648918917550820 +cat0,2006-04-17T4:06:56.768,0.4566564134102160 +cat0,2045-09-16T21:50:12.734,0.25625298118639200 +cat1,1972-11-19T8:29:24.341,0.8352955034910560 +cat2,2022-04-06T13:03:50.013,2.6243242982304400 +cat0,2000-04-21T23:40:19.818,-1.0292403414443000 +cat1,1974-07-23T22:32:54.013,-1.1695927600651400 +cat0,1952-12-12T9:19:00.818,-0.0690088759191328 +cat2,1911-12-31T23:31:48.652,0.9562678371634470 +cat0,1934-03-21T3:58:17.936,1.2936875968373200 +cat0,2040-08-02T15:38:17.207,2.295316320950600 +cat1,1977-08-15T5:35:51.350,-0.4635826665486880 +cat0,2041-09-17T22:14:45.831,-0.5366081705718050 +cat0,1918-02-26T14:00:29.999,-0.0022052695144606400 +cat0,1948-01-02T22:22:20.753,0.14135616331897700 +cat0,1952-12-20T3:09:06.369,-0.6853040121682420 +cat0,2010-07-12T11:09:38.478,0.24869298803571700 +cat1,2039-05-20T8:26:07.568,0.40277966630468100 +cat0,1926-03-13T20:11:50.336,-2.4650402943342400 +cat0,1956-07-23T0:23:45.572,-0.08394689155243490 +cat0,1952-08-13T18:16:04.600,-0.978485027672803 +cat0,1906-11-03T0:02:59.960,-0.10573782453143800 +cat1,1939-10-23T23:55:24.996,-0.3663177722700630 +cat1,1992-06-12T6:56:14.293,-0.28707076343333100 +cat0,1953-01-04T17:18:31.072,-0.8271493530597200 +cat0,1921-09-25T17:07:58.681,-1.7226336729977900 +cat1,2047-06-04T12:38:39.498,-0.025998638361089900 +cat0,1935-10-21T8:30:35.978,-1.189233702009400 +cat1,2028-08-02T5:51:34.292,-0.2977605440254270 +cat1,1928-11-11T16:42:12.628,-0.8819921580526740 +cat0,1961-06-27T22:27:56.887,0.8345819093603070 +cat0,1996-03-23T7:22:26.613,-0.3020360733458470 +cat0,1954-01-16T5:54:07.339,0.6004516562092890 +cat0,1965-02-07T9:55:26.715,-1.3243651528162800 +cat0,1959-07-20T10:56:34.963,0.6198675632827760 +cat0,1941-10-02T0:47:23.217,0.2557317036991500 +cat0,1961-03-05T20:57:06.886,0.46416963695987200 +cat0,2041-12-08T22:50:23.571,-0.037310566156126500 +cat0,1969-04-26T16:53:49.767,-1.4610546828079100 +cat0,1913-11-13T11:08:16.917,-0.34185045409842700 +cat0,1960-11-02T18:35:44.797,-0.2504904883368990 +cat0,1978-02-11T11:53:47.705,0.7489467714148690 +cat0,1991-07-15T1:08:25.791,1.170994938396860 +cat0,1949-10-22T20:13:49.826,0.1441018349508980 +cat2,1972-12-15T19:07:13.198,0.031918840351729100 +cat0,1905-01-19T14:51:28.621,0.8270453387658350 +cat0,1909-07-05T18:41:19.956,-0.513829222128048 +cat0,2001-01-11T11:30:21.700,-1.5374204882543400 +cat0,2000-10-17T4:32:31.708,-0.9586807972586190 +cat0,1972-07-03T6:03:49.044,0.6342125984574240 +cat0,1986-12-22T3:57:00.021,1.7392124092777100 +cat2,1954-10-19T9:17:05.781,1.8851341565799100 +cat0,2012-11-15T20:17:10.653,-0.8290660746680130 +cat0,1910-07-28T17:54:55.419,-0.2788820334633240 +cat0,1957-12-16T16:39:51.984,-0.7181685525422010 +cat2,1936-10-31T22:55:47.691,-0.12410987573253200 +cat0,1980-08-10T5:02:28.354,-0.05526446835684070 +cat0,2049-01-03T19:09:17.148,0.48025388932215200 +cat1,1917-06-14T0:35:44.166,0.6929355683552770 +cat0,1998-04-23T18:06:53.790,0.09935198960230240 +cat2,1975-09-05T0:22:49.227,0.9928421776096210 +cat0,1986-01-24T6:30:27.361,0.21981202105225000 +cat0,1954-01-13T8:23:23.221,-0.5101375494331900 +cat1,2001-02-24T15:50:17.762,-1.086168390420020 +cat0,1945-04-19T7:31:36.761,-0.04576890958071910 +cat0,1996-03-19T7:59:59.860,0.7712072643049840 +cat0,2031-06-04T15:17:18.476,-0.6121905243821880 +cat0,1947-09-02T17:54:27.540,-0.14283036937010300 +cat0,2015-08-28T15:01:56.335,0.11209929543049800 +cat1,2034-09-15T10:38:50.089,1.4353891322986800 +cat0,1932-09-10T9:46:32.689,0.28560129374261000 +cat0,1998-10-03T3:06:29.663,-0.5388973216679560 +cat2,1944-03-07T2:06:29.225,1.7011607201071900 +cat0,1943-06-25T14:26:49.974,0.7118850610482110 +cat0,1986-10-01T11:08:29.490,1.411079536407670 +cat0,2035-08-14T14:34:41.088,-2.2879615379560500 +cat0,1965-08-26T5:26:47.704,1.97958064606072 +cat0,1972-10-21T16:26:44.319,0.16140936640680500 +cat0,2043-11-18T22:05:20.232,-0.7638383353928110 +cat0,2022-01-29T0:23:09.010,-0.011903835077518900 +cat0,2008-09-27T5:25:56.477,0.9062995119648790 +cat1,2033-06-18T7:52:11.234,0.9810912787051300 +cat0,1988-10-09T22:57:26.773,-0.23862442388396200 +cat0,1956-03-28T12:37:10.084,-0.7787458406859110 +cat0,1941-01-15T10:41:52.057,0.12744254264196900 +cat0,2034-11-02T20:14:20.747,-0.31911076475758200 +cat1,1994-07-13T22:31:16.304,0.7704834397976360 +cat0,1983-03-12T10:18:18.739,0.5081717422773400 +cat0,2023-04-19T11:08:29.567,0.025690959742156000 +cat0,1936-10-12T1:35:24.821,-0.2676182867387930 +cat0,2020-02-20T16:57:57.058,0.5862960164874380 +cat0,2030-12-16T8:33:34.433,0.8313419405199590 +cat0,1905-08-27T5:19:09.471,0.7146375894102780 +cat1,1952-09-07T1:35:31.002,-0.43469762377728800 +cat0,2007-12-14T16:45:30.265,-0.22601029416416700 +cat0,2026-06-01T21:01:23.632,-0.6508248494075880 +cat0,1968-04-09T12:58:29.913,-0.3656531118134380 +cat2,1960-02-09T21:24:41.485,-1.2638204557913000 +cat0,1947-04-04T23:34:58.942,0.771752804959629 +cat0,2008-01-10T16:11:30.748,0.5187812059244730 +cat2,1949-01-18T21:03:22.166,0.8727838339187570 +cat1,2034-07-08T6:47:35.386,-1.5346559511588900 +cat0,1978-04-01T14:55:04.160,0.22749138628808200 +cat0,2027-07-07T21:24:20.357,-0.760934146266728 +cat0,1938-08-17T14:51:01.614,1.3337339683653000 +cat0,2010-12-25T5:25:03.478,-0.4726181688198900 +cat0,2046-08-15T17:58:04.260,-0.2129489649979170 +cat0,1992-05-14T0:15:08.789,0.016308760796659000 +cat0,2020-01-01T18:23:09.867,-0.6869500216505470 +cat2,1949-11-14T17:30:10.256,0.7223897736836550 +cat0,1946-08-30T12:35:45.565,0.5503415187010630 +cat0,1992-08-08T19:52:23.925,0.08419124917142420 +cat0,2040-06-14T20:08:45.672,0.015178854334176500 +cat0,2038-03-07T17:39:40.476,-0.056690055545062900 +cat0,1955-03-17T2:48:25.082,0.4093736585052940 +cat0,2048-10-23T16:22:19.195,-0.06130896906952350 +cat1,2005-10-12T7:25:13.315,1.3250960612699000 +cat0,1999-12-20T3:52:44.765,-0.6308326793892330 +cat0,2026-02-12T22:27:39.395,-0.6001607606548510 +cat1,1905-06-14T22:35:39.681,-0.18298647633312800 +cat0,1963-06-14T2:39:14.632,1.4514534862942900 +cat0,2028-11-04T21:54:39.559,2.8623306927461200 +cat1,1931-12-14T16:39:35.027,-1 +cat0,1931-08-10T6:26:20.557,-1 +cat1,1931-01-28T12:32:46.181,-1 +cat2,1931-01-18T21:45:09.874,-1 +cat0,1931-12-26T17:51:57.956,-1 +cat0,1931-07-01T15:54:32.555,-1 +cat0,1951-12-29T7:53:45.302,-0.5047864602900040 +cat0,1913-04-30T16:22:30.912,0.5741804002787000 +cat0,1956-09-13T10:29:22.278,0.18817426519889500 +cat0,2017-11-13T0:35:21.752,0.8917285508827400 +cat0,2032-08-25T8:03:15.425,-1.6955948625206300 +cat0,2014-03-17T20:36:50.382,-0.41167737694736900 +cat0,1979-09-15T14:52:44.735,-0.7577592898319910 +cat0,2013-12-06T18:16:29.173,0.7165774367719540 +cat0,1929-10-01T3:33:48.988,-1.0640275797215300 +cat2,1905-05-24T20:36:54.399,1.199240100440910 +cat0,1928-05-12T23:11:30.464,-0.29753004675848800 +cat1,2039-04-16T14:21:51.740,0.46061190719174900 +cat0,2032-04-20T7:45:06.194,-0.5992293954153180 +cat1,1979-10-31T11:56:19.476,-0.24852055466919400 +cat0,1953-02-07T17:47:48.906,1.5102443917920700 +cat0,2036-01-18T20:45:36.719,-0.930675230055969 +cat0,1981-03-16T18:47:44.531,0.18925804770986300 +cat0,1972-08-27T5:08:54.435,0.37538729524672800 +cat0,1927-02-10T21:24:56.210,-0.2411437886886610 +cat2,2035-04-12T7:44:02.025,-0.32753591323721100 +cat0,1952-04-23T23:04:57.131,-1.028793438094340 +cat0,2044-10-05T13:31:59.039,0.650943028347011 +cat0,2032-09-25T23:15:44.374,-0.9698917496025190 +cat0,1906-11-03T13:06:18.660,1.0212417645280100 +cat0,1997-02-10T16:13:26.686,-0.4411257822182580 +cat0,1979-02-27T0:52:46.596,0.07275372417618770 +cat0,1900-01-29T20:42:36.034,-2.307374369335410 +cat0,1994-03-04T10:29:01.277,-1.6040688037922800 +cat2,1902-03-26T15:03:42.335,2.087292338221720 +cat2,2034-06-11T12:40:01.037,0.20463006384261900 +cat0,2030-01-24T22:56:32.123,-0.5053694212081840 +cat2,1908-08-01T15:51:13.269,-0.986142199759005 +cat0,2009-12-13T14:33:42.854,1.109272859910860 +cat0,2037-12-21T12:29:58.124,-1.0069869365581600 +cat0,2018-04-14T21:21:12.345,-0.5408659139656540 +cat0,1991-08-06T19:03:19.851,-0.04700928444891030 +cat0,1965-10-26T7:49:09.115,0.2936571810102940 +cat2,1951-04-08T11:55:16.559,0.025094245134882900 +cat0,1999-08-27T22:39:07.469,-0.2885222529434090 +cat0,1900-10-12T0:42:56.953,-0.5896022988108240 +cat0,1942-05-14T22:06:39.095,1.4608358056037500 +cat1,1958-05-05T20:54:24.869,-0.9108240898859110 +cat2,1972-09-01T17:48:18.036,-1.5471493408909400 +cat0,1976-11-12T4:18:57.029,-2.6395031291170400 +cat0,2001-04-16T13:01:35.024,0.018409319144399600 +cat0,1978-08-18T13:38:07.245,-1.076556106078100 +cat2,1955-11-09T20:48:23.360,0.6218272696180510 +cat0,1957-11-25T9:56:46.595,0.03389840076883510 +cat0,1947-08-13T9:18:22.283,-0.3540830241449590 +cat0,1991-10-20T7:32:12.531,-0.22719209564612000 +cat0,1987-12-11T23:06:12.386,-2.178141265712260 +cat0,2027-05-30T1:34:10.691,-0.24612561538328800 +cat1,1908-07-29T18:54:54.576,-1.5424250362685600 +cat0,2005-08-24T2:02:51.899,-1.0601196878775000 +cat2,1952-03-22T17:49:39.807,1.753613011957200 +cat0,1951-02-09T17:40:11.527,0.3733301114524960 +cat0,2042-12-29T11:10:03.068,-0.250794769242975 +cat0,2039-07-01T0:19:27.512,0.48586953082144700 +cat0,1945-03-29T16:41:51.644,-0.142837608775986 +cat0,1970-02-25T7:24:19.309,-1.397915809619290 +cat0,2028-01-10T18:22:27.612,0.8039440425801280 +cat0,2044-03-19T14:24:56.441,-0.49920882184963700 +cat0,1916-03-04T4:43:55.051,-0.5970934952571940 +cat0,1986-03-22T10:50:33.501,-0.6621997043072310 +cat0,2018-02-11T6:08:56.520,0.7650684371324760 +cat0,2045-03-22T3:09:37.926,1.1788255151089200 +cat0,1965-11-10T4:38:38.327,1.250486905311300 +cat0,1995-09-11T17:36:57.868,-0.5655515924780720 +cat0,2044-11-05T18:00:00.630,-1.0290028480911300 +cat0,1997-08-13T1:47:58.246,-1.3958234232386600 +cat0,1920-05-11T6:16:04.730,0.7830249913601850 +cat0,1945-01-06T13:58:22.879,-0.9635965885385970 +cat1,2033-07-16T7:04:01.682,-1.0101220494869600 +cat2,1954-08-06T22:16:57.186,-0.45104426726531300 +cat0,2010-04-06T7:14:38.311,-1.1803687570847300 +cat0,2026-11-30T12:48:14.126,-1.5564385507287900 +cat0,2002-04-01T4:42:31.931,1.0742253731124500 +cat0,2015-08-11T21:42:00.496,-0.6924393196180240 +cat0,2035-10-12T23:02:25.485,-1.0558907424345900 +cat0,1926-09-01T2:16:39.229,-0.33722302773932200 +cat1,1910-07-14T7:36:34.385,0.17226136384618600 +cat0,1947-11-27T3:45:42.561,-0.24949033709990700 +cat0,2018-03-15T14:25:05.452,-0.9320104789829210 +cat0,1990-08-23T7:06:37.536,1.5713391819841400 +cat1,1930-01-31T15:50:46.028,-1.1558018504673700 +cat0,2036-01-16T22:30:16.821,1.1232262169247400 +cat0,1923-09-26T5:55:52.484,0.039311591552799500 +cat0,1940-10-22T14:09:05.095,0.21457808439198300 +cat0,1994-08-05T5:48:51.984,0.05562060388906540 +cat1,1997-03-09T8:03:31.364,-0.1775817802486620 +cat2,1911-12-08T8:09:53.348,0.218755556817088 +cat0,1989-08-15T18:22:05.778,-0.24069618863371100 +cat2,2002-12-07T6:03:02.610,-0.062165226289993100 +cat1,1925-04-08T17:14:06.795,0.0804021632840977 +cat0,1917-06-05T14:56:14.411,-0.3901257074542220 +cat0,2030-07-10T23:13:13.868,0.6962472160885020 +cat1,1990-11-25T22:45:20.014,0.21761627206233000 +cat0,1996-11-19T8:00:19.284,0.16213650303195500 +cat0,2042-12-30T4:46:42.311,0.4343649312341360 +cat0,1969-12-16T5:47:33.422,-0.1418826747183950 +cat0,1981-06-30T23:21:45.488,-1.3335487023678900 +cat0,1948-03-31T20:13:20.345,0.5810923068696490 +cat0,1908-07-06T0:59:36.184,0.36650881280877100 +cat0,2039-11-07T13:25:47.061,0.262566612771352 +cat1,1980-07-04T18:18:14.004,-0.2371794106204730 +cat0,2007-10-21T16:30:52.705,-1.2538546234834300 +cat0,2018-09-12T21:29:20.303,-0.9246517232123870 +cat0,1942-09-18T7:12:38.471,-0.19569525198371200 +cat2,1929-08-31T22:31:22.708,-0.35255296042388600 +cat2,2005-12-19T4:40:46.139,0.42650091107247100 +cat0,1997-11-04T13:08:22.055,1.5757474816504800 +cat0,1945-01-06T21:11:04.518,-0.3624135710784290 +cat0,1994-07-07T17:11:55.179,1.1617205824310800 +cat2,1951-07-21T11:54:24.898,0.06882409425120860 +cat0,1944-01-27T5:42:36.793,1.1126307417417900 +cat1,1907-09-21T22:20:51.146,1.465175618330800 +cat0,2002-07-26T6:58:23.216,0.41245113629084000 +cat0,2008-09-26T1:48:25.912,1.3930442326892200 +cat0,2018-07-08T9:53:52.887,-0.9139776698199470 +cat1,1907-05-04T7:22:01.041,-1.6108755531442300 +cat0,2005-01-14T14:34:51.442,0.8959227833226850 +cat0,1920-10-03T5:46:10.325,0.35883401095146900 +cat1,1901-06-17T14:40:12.296,-1.184619263444560 +cat0,2049-09-30T6:43:48.371,1.0329427460442500 +cat2,1908-10-17T8:49:48.968,-0.047330385557566400 +cat0,2015-07-15T16:21:32.862,-0.7488065081495460 +cat0,2022-11-26T20:23:04.056,1.8965757637358900 +cat0,2028-08-05T20:06:35.397,0.8211315663631220 +cat0,2016-11-02T8:50:09.373,-0.6421571283591090 +cat0,1937-02-17T2:30:39.456,1.1777322340729100 +cat0,2034-01-21T20:52:22.423,2.11816240008462 +cat1,1990-08-19T21:10:09.826,-1.3023577947551400 +cat0,1903-11-23T5:54:51.876,-1.054935795457350 +cat2,2038-04-09T0:02:30.789,-1.4924346028044600 +cat2,1918-11-07T19:56:32.364,-0.2346552189266010 +cat0,1929-04-01T2:49:22.540,-1.141380558606840 +cat0,2019-09-08T0:54:19.056,0.9775936088480460 +cat1,1993-09-16T16:39:55.156,-1.1116880858069600 +cat0,1934-02-01T6:01:51.884,-0.6525604403982100 +cat0,1941-11-01T16:19:43.252,-0.791674761794253 +cat0,2017-02-22T19:42:11.557,-0.9660744851034970 +cat0,1985-01-21T1:19:08.282,0.22149467572100000 +cat0,2039-02-17T13:33:59.113,-1.141781625044170 +cat0,2034-04-10T23:34:32.026,-0.4655869499896840 +cat0,1900-02-22T8:38:07.781,1.455775730235180 +cat0,1921-04-29T15:46:23.345,0.6261400148642100 +cat0,2004-03-31T8:20:27.617,-0.22326257841338100 +cat0,1967-11-15T8:18:48.322,0.6348122673694650 +cat0,1925-03-07T3:22:50.357,-2.07253794039695 +cat0,1976-11-10T16:20:17.179,0.6037820953998590 +cat2,2007-09-13T4:15:20.856,1.5020651505342200 +cat0,1937-05-12T19:27:29.211,0.6769916200775860 +cat0,1932-06-27T2:32:59.560,1.2838853429246900 +cat0,2009-09-01T11:39:42.304,-1.1219842352319200 +cat0,1909-01-03T13:22:34.721,-1.366032025726750 +cat2,2044-08-18T20:42:55.407,1.1694789106391100 +cat0,1997-01-25T2:37:44.642,-1.7874641029746500 +cat2,1966-11-19T7:43:35.447,0.7266265717190750 +cat0,1978-10-17T20:59:30.197,0.7030518077827430 +cat0,1995-07-03T20:15:31.047,-0.06669281879346200 +cat0,2020-09-23T5:13:17.968,0.5675077017861150 +cat2,1929-05-16T19:51:29.720,1.4710858582695300 +cat1,1996-04-21T10:41:16.390,0.18718486218755500 +cat0,1985-10-20T9:45:00.312,0.9821943690378780 +cat0,2037-09-15T18:24:31.105,0.3364680732336300 +cat1,1985-04-09T7:55:25.431,-1.0176146519012600 +cat0,1962-08-13T12:49:27.712,-0.5102364786852270 +cat0,1955-08-08T7:12:37.732,0.07959837746620190 +cat0,1989-06-05T13:13:28.306,-1.199698968765450 +cat1,2035-02-09T12:16:45.898,0.713815714989406 +cat0,1968-09-10T19:01:11.437,1.2121897652958500 +cat0,2012-06-10T14:34:22.263,0.044008437999648200 +cat0,1900-04-08T21:08:07.452,1.3899516986881600 +cat1,1945-09-05T13:32:16.012,0.7045677324381500 +cat2,2018-09-08T20:39:06.450,-0.2560988311606340 +cat0,1915-07-21T8:47:25.312,-1.647987712196820 +cat0,1958-08-12T7:11:43.606,-1.1338918844908100 +cat1,2020-01-19T15:09:55.117,0.18451654167832800 +cat1,2047-08-14T2:19:43.355,-0.42430376899304900 +cat0,1927-09-25T12:02:07.134,-0.224730673527898 +cat0,1971-09-29T20:09:34.292,-1.2944031382233300 +cat0,1961-02-13T23:09:13.274,2.0458912973173100 +cat0,1907-10-23T21:53:51.564,-0.7644840724135980 +cat0,1994-02-16T10:01:15.827,-0.6585998884664400 +cat1,1915-01-02T1:22:14.712,0.23464562683975000 +cat1,2039-01-27T20:11:12.992,-0.5642648982976280 +cat0,1992-05-22T22:04:53.886,-0.12011315792157100 +cat0,1936-08-07T14:28:29.949,0.12856868170070000 +cat0,2015-12-07T10:18:42.416,0.24015321630451400 +cat0,1965-09-21T7:48:25.651,-0.1463892323852550 +cat1,1960-10-12T8:26:11.018,-0.3048275444872270 +cat0,1926-06-02T3:34:52.989,0.9299471693742860 +cat0,1922-02-10T5:57:23.191,-1.2334135281680300 +cat2,2035-07-18T10:42:07.986,-0.8584352721086900 +cat0,1986-08-18T3:06:54.506,0.2645972267607150 +cat0,1904-11-17T13:52:47.337,-0.4695806045765680 +cat0,1900-03-31T3:30:05.895,1.1803475944146100 +cat0,2002-07-30T16:26:25.183,-1.7202734140025900 +cat0,1934-06-17T21:29:42.458,0.1413046608742540 +cat1,1957-06-07T8:28:21.497,0.4404982370333910 +cat2,1945-06-22T4:53:13.689,0.7323420166820080 +cat0,2042-05-01T13:44:58.825,1.2498104233486000 +cat0,2012-10-03T14:30:53.095,-2.2121826343714400 +cat0,1918-12-09T4:31:01.763,1.8011448579479900 +cat0,1988-09-06T17:31:29.433,-0.6042578669093030 +cat0,2045-06-08T13:40:28.408,0.6238324893323670 +cat1,2011-06-23T2:08:29.473,-0.479209814023544 +cat0,2038-03-13T19:48:37.754,1.1187314760607700 +cat0,1996-06-28T3:42:02.134,0.1988207268803590 +cat1,2022-09-06T21:29:23.830,1.1153652315117100 +cat2,1961-08-06T9:43:55.722,1.5002483872001100 +cat0,2006-03-13T15:59:00.245,-0.6787145041091850 +cat0,1911-03-30T19:54:04.145,1.2285635638641600 +cat0,2002-04-21T4:25:48.447,1.3816216301665400 +cat0,2015-09-23T8:30:27.304,-0.1633331400685000 +cat0,1948-04-10T19:18:57.183,-0.017615684140100700 +cat0,1939-04-25T4:55:23.140,0.5628209426712870 +cat0,2041-10-28T12:03:11.849,-0.9787500106822830 +cat1,1968-12-02T0:13:16.714,-0.2855612342520220 +cat0,1952-03-21T8:28:38.682,0.8677416904062740 +cat0,1975-10-25T9:24:49.791,-0.28267357055294400 +cat0,2029-08-07T12:27:51.879,0.09999115230444700 +cat1,1983-04-24T5:05:30.381,-1.7580599026756100 +cat0,1981-05-31T3:03:08.745,-0.9606378896715250 +cat0,2040-08-12T21:02:04.693,0.9173996345212990 +cat1,1942-10-03T12:33:43.238,-1.3820739039546400 +cat0,2049-07-20T9:28:06.659,0.3061474309503800 +cat0,1955-11-01T7:09:35.769,0.4610954912251820 +cat0,2006-08-18T6:15:51.913,-0.6949308687353740 +cat0,1928-07-17T16:24:28.062,-0.08037635810486000 +cat0,2003-04-11T18:01:28.855,-0.09212871157528930 +cat0,2039-09-04T7:44:32.966,0.3847532848579150 +cat0,1971-05-07T1:55:01.114,-0.5778743508463300 +cat0,1921-09-20T20:51:51.326,-0.4656444699217630 +cat0,1932-08-23T17:39:08.710,-0.9673020795621100 +cat0,1973-10-09T7:24:39.371,-0.12168234889113300 +cat0,2048-08-07T15:58:55.126,0.19488343988477200 +cat2,2032-06-22T5:45:12.461,-0.3758614192329700 +cat0,2015-04-12T14:21:13.770,0.2750520412627480 +cat0,1943-10-24T12:03:14.414,0.15474639234343500 +cat0,2007-06-07T12:42:01.919,-0.9411802305233700 +cat1,1974-06-06T7:25:52.448,-0.11815062129196700 +cat0,1920-01-14T4:18:45.256,0.9136176552901030 +cat0,1949-03-16T17:30:31.315,1.1835197306896500 +cat0,1918-02-11T4:56:33.733,1.590028341536870 +cat0,2041-11-17T5:27:33.014,-0.336531965886856 +cat0,1967-09-27T8:59:31.466,1.261675342048890 +cat0,1956-07-15T22:21:16.965,0.578310726916326 +cat0,1935-05-21T0:02:42.225,-1.4164816071371400 +cat0,1942-04-05T5:09:11.719,0.10415461713555800 +cat0,1946-06-11T16:07:57.394,-0.02667551308686790 +cat0,1948-06-27T20:16:29.256,-1.3192815924874500 +cat1,1928-02-28T13:58:59.192,0.32423382972582200 +cat2,2040-11-26T7:02:57.077,-2.553883627605630 +cat0,1907-12-06T11:04:10.349,0.429387785378813 +cat2,1958-08-02T21:32:52.664,-0.47738305810704000 +cat0,2010-02-18T21:37:44.403,1.0143262770881800 +cat1,1987-05-21T22:54:43.084,-0.40724467467521000 +cat0,1934-03-11T19:03:04.081,-0.3067044662740190 +cat0,2025-12-19T18:49:07.295,0.7272776254171860 +cat0,1946-02-03T5:54:10.885,-0.05548842644064840 +cat0,1942-10-24T7:45:48.463,-1.4931341608122500 +cat2,1981-09-10T2:48:51.237,-2.0534904973482500 +cat0,1988-11-25T2:41:45.255,-0.016040558585624600 +cat0,1916-01-28T23:09:22.825,0.44838851750971200 +cat0,2012-11-16T19:54:47.636,0.3509978699145600 +cat0,2041-09-20T12:09:40.695,0.754106693736359 +cat0,2019-04-27T6:14:28.126,-0.840024115093775 +cat0,1981-05-23T14:46:07.457,-1.3973969359948100 +cat0,1986-04-17T16:27:16.795,0.3248167779101130 +cat0,1954-11-03T13:30:08.263,1.3833558957163700 +cat0,1994-12-20T13:48:15.383,-1.0515778656389200 +cat0,1997-09-01T0:14:17.430,-1.8220245782600400 +cat1,1950-08-21T23:07:14.830,0.18762404880924200 +cat0,2048-08-20T22:00:02.141,-1.3157870940627400 +cat1,1972-07-03T13:19:51.134,-1.6353999793389300 +cat0,1931-12-14T16:39:35.027,-1 +cat0,1931-08-10T6:26:20.557,-1 +cat1,1931-01-28T12:32:46.181,-1 +cat0,1931-01-18T21:45:09.874,-1 +cat0,1931-12-26T17:51:57.956,-1 +cat0,1931-07-01T15:54:32.555,-1 +cat2,1900-07-25T6:31:07.833,-1.2182597299362500 +cat0,1959-06-08T10:26:57.769,-0.31924059385874400 +cat0,1902-09-20T8:52:59.725,-0.44109584640905700 +cat2,1935-11-23T16:52:59.889,-1.0189669091277500 +cat0,2018-04-01T0:53:59.874,0.290793242623674 +cat0,1976-10-24T8:38:25.348,0.017519279516016300 +cat0,1965-04-28T13:43:28.183,1.0030858736416700 +cat0,1954-04-09T13:56:48.487,-0.6052481483841730 +cat0,1916-09-15T15:51:57.640,1.2276203859880000 +cat0,1933-04-03T5:48:20.743,1.2653902572654700 +cat1,2026-11-27T7:32:51.596,-0.9136289659612960 +cat1,2004-03-25T19:09:13.787,0.6581705630471620 +cat1,1924-01-11T17:26:16.422,0.15405818419631800 +cat0,1938-04-28T3:51:09.202,-2.3251873025398100 +cat0,1926-02-28T12:45:20.278,-0.3113565362748890 +cat0,1915-05-19T12:10:13.554,-0.7458414743482300 +cat0,1904-05-21T8:10:13.716,0.5507191040512870 +cat0,1960-06-03T18:17:47.427,-1.9376593074950400 +cat0,1953-10-08T5:07:57.704,0.05833918662554260 +cat0,1961-09-03T8:46:42.192,-0.23498303136752600 +cat0,1967-01-19T8:06:19.576,-0.17205758860394100 +cat0,1949-08-06T4:39:58.411,0.35329533983999100 +cat1,1945-03-09T15:33:14.025,0.13110964125291100 +cat0,1980-10-22T2:23:51.655,0.7576990894952510 +cat0,1916-11-19T5:12:11.341,-0.20886703164712700 +cat0,2039-03-24T12:16:23.472,0.7049018841270400 +cat0,1979-03-04T21:05:57.169,0.15570885419387000 +cat0,1984-03-23T1:12:31.133,-1.612403320296440 +cat0,2047-06-06T6:13:53.200,1.1967403353670200 +cat0,2003-07-13T11:23:56.251,-0.42304823161163700 +cat0,1954-05-13T14:48:39.436,-0.8975807173009780 +cat0,2021-08-20T9:26:52.508,0.15309614499616600 +cat0,1971-07-03T15:35:26.262,0.2635985159883400 +cat0,1931-12-14T16:39:35.027,-1 +cat0,1931-08-10T6:26:20.557,-1 +cat1,1931-01-28T12:32:46.181,-1 +cat1,1931-01-18T21:45:09.874,-1 +cat0,1931-12-26T17:51:57.956,-1 +cat0,1931-07-01T15:54:32.555,-1 +cat0,1928-11-04T12:58:38.555,-0.10420096430633200 +cat0,2032-05-01T20:40:11.703,0.08387559733796910 +cat0,1934-09-25T10:51:29.932,0.03580568002265290 +cat0,1980-05-01T17:03:19.116,0.2922897645607180 +cat0,2042-09-19T19:10:09.952,-1.212831052534220 +cat0,1915-11-04T11:30:46.185,1.4340164829039700 +cat0,2037-01-04T20:56:08.979,1.121018804108950 +cat0,2025-02-18T23:43:11.441,-0.2557709494685770 +cat0,1904-06-18T13:19:23.689,0.07943079180555790 +cat2,2029-11-28T6:22:13.870,-0.3198303795870420 +cat0,2047-04-25T1:26:27.818,0.7867886160010400 +cat0,1950-01-27T9:22:31.135,0.1280631999920600 +cat1,1910-02-13T7:07:07.900,-0.7881167078471380 +cat0,1984-07-01T19:44:28.591,1.3517097326998600 +cat0,1950-08-20T21:39:17.960,1.6018599629755800 +cat0,1987-02-19T6:13:25.249,0.24400351052238700 +cat0,1980-10-16T4:16:49.525,1.0259036459871100 +cat0,2006-12-19T9:23:01.127,-0.023203151483272700 +cat2,1911-01-27T9:40:27.457,-0.7426669696903150 +cat0,1925-05-07T5:15:59.027,-0.39531887838708600 +cat0,2046-11-16T3:57:25.659,2.8711682982490800 +cat1,1984-07-17T19:54:09.576,0.10167011079817700 +cat0,2038-06-30T17:46:41.922,0.11267502931184300 +cat0,2020-03-19T23:33:52.147,1.6475552178698900 +cat2,1939-03-22T4:08:08.064,-1.0967194359528100 +cat0,1939-09-12T2:51:27.656,0.4084254166132410 +cat0,1956-01-01T11:22:05.732,-1.071772891393940 +cat0,1935-10-26T11:35:26.131,-0.28799179856512400 +cat0,2036-09-19T13:46:42.274,1.6541301791410700 +cat2,1989-01-06T14:57:20.575,0.574356059517855 +cat0,1949-01-14T0:47:36.457,1.0499044983991300 +cat0,2040-07-02T23:56:03.538,0.06865113859567250 +cat0,1948-03-03T11:31:25.658,-1.1838175161359000 +cat2,1973-05-13T1:08:01.480,-0.22965402270946400 +cat0,2015-11-13T15:08:51.240,0.3639062592027540 +cat0,2033-09-08T3:31:44.508,-1.3563951839120800 +cat0,2010-09-27T22:47:08.723,-0.30325202694493500 +cat0,1905-12-24T13:23:31.130,0.1325202200683560 +cat0,1999-12-27T21:30:26.658,1.295456899887800 +cat1,1912-11-28T21:45:05.095,0.26964998263703500 +cat2,1932-09-05T13:11:41.130,0.9518436550086160 +cat0,1974-09-19T14:44:41.083,1.0589858226338300 +cat1,1905-07-30T3:37:15.729,0.21080407609345900 +cat0,1988-09-09T15:18:40.864,2.7386182574561000 +cat0,2032-01-06T5:32:08.686,-1.3558400572988600 +cat2,1990-05-28T19:22:45.740,0.49477188922378600 +cat0,1982-05-13T18:47:47.143,0.9685369243896560 +cat0,1944-12-22T7:04:59.729,1.850756507230740 +cat0,2003-02-06T7:41:22.848,-0.7821110736718790 +cat0,1955-11-30T4:16:05.302,0.9880009455757780 +cat0,1942-06-17T20:16:46.473,0.10221878690850200 +cat0,1912-05-14T18:52:33.514,0.14428012521953800 +cat0,2038-11-27T15:21:00.085,-0.07140492165572760 +cat0,2026-05-11T20:31:00.208,0.017946502741538600 +cat0,2025-11-09T20:43:52.573,-1.3564902295742400 +cat0,1954-10-16T12:56:39.070,-0.2664401765566100 +cat2,1972-04-24T11:25:28.039,-1.0836249844909700 +cat0,1975-12-28T18:56:12.103,0.43437425034266300 +cat0,1994-01-09T11:47:09.860,-0.4548855574812190 +cat0,1937-12-04T6:09:11.132,-0.8055240929239220 +cat0,1924-02-06T12:24:08.164,1.0086030383477000 +cat1,1942-03-26T7:36:07.491,-0.45200395082880400 +cat1,1998-06-07T15:50:06.995,-0.17508130113914000 +cat0,1954-04-04T23:15:19.277,0.9713570152485750 +cat2,1961-05-10T4:30:56.606,0.34566046567242300 +cat2,2023-07-20T20:16:17.805,2.56853551180175 +cat2,2030-08-04T13:02:12.346,0.10118290600513900 +cat0,1940-03-04T17:43:24.925,-0.5106179272885070 +cat0,2036-12-22T2:52:20.844,-0.929662396086042 +cat0,1923-08-17T8:41:49.103,-0.026484141271550300 +cat0,1981-08-05T7:30:18.664,-0.7471666160737910 +cat0,1933-04-13T10:12:35.017,1.2734994932685800 +cat0,2049-07-13T4:03:05.705,0.09631142223880300 +cat0,2030-04-03T13:18:58.034,0.842384343605036 +cat0,1940-06-02T13:46:50.085,1.7761932287703400 +cat0,1968-01-17T5:01:28.630,-1.0802646262817600 +cat0,1967-01-14T16:07:44.885,-0.9154992761048660 +cat2,1979-04-06T9:11:44.964,0.6720668626014990 +cat0,2016-11-11T10:27:43.163,-0.2642263762401750 +cat0,1931-12-14T16:39:35.027,-1 +cat0,1931-08-10T6:26:20.557,-1 +cat2,1931-01-28T12:32:46.181,-1 +cat1,1931-01-18T21:45:09.874,-1 +cat1,1931-12-26T17:51:57.956,-1 +cat0,1931-07-01T15:54:32.555,-1 +cat1,2040-09-18T21:15:32.769,1.2830985864878700 +cat0,2011-10-04T12:19:26.531,-1.0957627117168000 +cat0,2013-04-27T3:23:24.207,-0.3342979509908160 +cat0,1903-11-03T16:53:57.740,-1.1705876596037400 +cat0,2041-09-11T9:42:09.000,0.37428060347334400 +cat0,2020-05-29T6:43:27.053,-2.9467818334987400 +cat0,2004-09-06T20:08:20.263,0.26522888546196300 +cat0,2023-03-14T11:56:57.361,-0.05208187094929490 +cat0,1907-05-25T2:21:59.707,0.28352780087595800 +cat0,1939-05-28T3:06:22.599,-1.122169237619740 +cat0,1977-07-29T11:10:13.423,1.421370548703120 +cat1,1982-08-27T15:49:57.787,1.0065827562011500 +cat0,1926-09-19T3:19:39.623,-1.4453753263173800 +cat0,2035-01-14T23:31:05.995,-0.0679938828635094 +cat1,1956-01-20T15:01:46.664,-1.2871865151157300 +cat0,2021-01-13T5:03:26.084,1.0577306795268100 +cat1,2042-06-09T12:25:11.722,-0.5688226211901160 +cat0,1940-02-24T20:24:58.274,-2.5427125656980900 +cat0,1956-05-18T2:10:26.999,-0.3444622489813740 +cat1,1970-04-01T11:23:37.185,-0.22313626036669000 +cat0,1944-11-28T23:54:53.377,-1.459789603942420 +cat0,1920-09-14T5:19:24.472,0.11531094099996300 +cat0,1902-02-02T9:14:26.012,1.568355107045420 +cat2,1969-05-20T16:39:15.447,1.1453009740604700 +cat1,2016-05-29T11:06:53.057,-1.9051449304463800 +cat0,2042-03-29T20:17:37.428,1.4645114978094000 +cat0,1972-10-09T1:29:40.708,0.028939058603611700 +cat0,2025-04-25T14:45:31.987,-0.19241932030706400 +cat0,1912-11-09T0:21:15.774,-0.13644086938408200 +cat0,2025-06-09T11:38:10.744,-0.592813411736774 +cat0,2013-04-24T14:05:00.906,-0.7957869390454290 +cat0,1955-04-23T7:02:24.805,-0.4159286577549600 +cat0,2037-08-27T12:26:41.863,-0.36619981989962400 +cat1,2039-10-06T10:09:51.598,1.7381684037256900 +cat0,1939-05-08T15:25:32.794,0.4495362062850690 +cat0,1915-02-21T17:42:23.211,-0.19420717311133500 +cat1,1983-06-30T22:59:29.038,-0.21267233160857000 +cat0,1956-05-28T2:11:20.372,-0.7909384831627370 +cat0,1950-10-12T19:21:28.302,-1.534585111585110 +cat0,2025-06-15T3:49:46.207,-1.1529327271832900 +cat0,2027-03-16T15:53:54.062,1.229758637188510 +cat0,2043-01-27T22:53:16.244,0.3489280834517870 +cat0,1974-09-14T2:39:00.024,0.12265708536735100 +cat0,1940-01-12T3:35:56.867,-0.2964576154187260 +cat0,1935-04-18T14:18:08.988,0.5510491340510900 +cat0,1960-03-16T10:37:35.788,0.42653493623795200 +cat2,1966-01-13T16:28:35.728,0.6650302598878750 +cat0,1922-05-19T23:36:48.810,0.1926421041574990 +cat0,1946-05-13T0:36:00.245,1.5734701681194500 +cat0,1953-07-16T14:00:25.873,0.2607177357834100 +cat2,1918-08-17T22:44:06.270,-1.75565812546354 +cat1,2016-02-01T12:39:23.628,-0.5670495371058450 +cat0,2019-10-21T22:59:31.398,0.7820535567155300 +cat0,2035-01-12T12:45:23.276,1.6076188407092700 +cat1,1985-02-09T12:11:14.616,0.3477335573722570 +cat0,1938-07-08T23:11:34.094,0.5710297486283210 +cat0,1973-03-06T16:04:49.615,0.7040190491753440 +cat2,1939-07-30T18:42:58.126,0.4534094598174740 +cat2,1943-05-08T18:49:21.116,-0.5606615700591240 +cat0,1946-07-22T20:09:39.690,0.24548315700329800 +cat0,2045-06-08T3:34:06.972,-0.30212849631168500 +cat0,1931-04-16T7:20:21.649,-1 +cat0,1933-06-29T8:06:44.191,-2.4405420522649600 +cat0,1972-04-30T15:24:04.724,-0.5281381384955710 +cat2,1997-07-21T5:28:49.896,-2.0280082051609300 +cat0,1945-06-15T4:46:46.895,-0.6908307527462120 +cat1,2046-06-22T21:26:11.504,-1.0369482399167800 +cat0,2005-08-02T4:56:12.362,-2.0691763186139400 +cat0,2026-11-24T5:17:56.994,-2.6648477051849700 +cat0,1924-02-10T9:04:45.858,0.6122670178611580 +cat2,2004-09-15T19:56:05.086,0.8994882979145600 +cat0,1982-02-05T19:48:31.370,-2.085135779653840 +cat0,1936-02-20T12:01:25.557,0.26472626732200400 +cat0,1974-03-09T18:24:10.802,0.35645504399103900 +cat0,1935-02-23T18:36:18.978,-0.402243061836187 +cat2,1963-06-30T14:25:53.967,0.6612346097834120 +cat0,1998-05-30T5:19:19.236,-1.2754581409738000 +cat0,1927-06-14T8:22:50.707,-1.1177830985072900 +cat0,2036-01-26T13:35:56.651,-1.1228125288554300 +cat0,1933-08-13T20:57:25.464,1.685718673183620 +cat2,2016-09-15T10:29:05.687,1.7204687008244000 +cat0,1971-01-06T8:25:21.775,-0.25902204603036900 +cat0,1963-01-31T10:23:23.296,-0.7962166796305350 +cat0,1948-04-18T12:29:36.682,2.3897567445224300 +cat0,1906-03-10T19:01:49.456,-0.7258916223884610 +cat2,2012-06-07T12:16:18.315,-0.5902848302144790 +cat1,1963-08-25T15:00:17.359,-0.36355052528111000 +cat0,2003-02-03T0:31:59.778,-0.560026554898398 +cat0,1929-04-17T1:38:03.033,-0.3230099130111670 +cat0,1915-03-15T18:42:31.201,-0.06728438149423410 +cat0,1975-11-15T0:37:24.117,-0.22806672966554000 +cat0,1914-07-01T7:14:54.185,0.19228724328758300 +cat0,1989-07-11T15:43:35.840,-0.4211561745192410 +cat0,1925-05-23T7:08:07.360,0.18848657799732 +cat0,1969-05-31T9:50:52.143,0.9715924313187210 +cat0,1986-08-17T1:34:57.712,0.2647312949455410 +cat0,1926-09-01T6:47:47.374,1.5113666177294000 +cat2,1993-01-26T9:11:04.856,-1.5895301814278000 +cat0,1977-06-08T1:33:24.741,0.13343164243893100 +cat0,1935-10-22T17:06:04.961,-0.9722509356161470 +cat0,1929-07-31T6:04:51.043,-0.2547527454047220 +cat1,1924-09-08T10:43:39.936,-1.0171307078736800 +cat0,2046-10-07T3:00:47.049,0.902310179744461 +cat0,2041-01-22T5:15:08.329,-1.4033088711136000 +cat0,1984-01-02T4:52:18.562,-1.2203467313595800 +cat0,1993-09-18T10:38:30.839,0.24775360492974400 +cat0,1953-05-03T4:18:48.997,1.2776833563602500 +cat2,1982-12-04T9:24:57.235,2.1415214791606900 +cat0,2039-10-15T7:17:06.836,-0.8434384690615780 +cat0,2021-10-18T2:14:08.716,0.2801022970979600 +cat0,1906-04-08T14:51:36.916,0.4365128281845230 +cat0,1977-06-15T2:14:42.742,-0.13029399556089700 +cat0,1981-08-20T11:10:26.445,0.5344782566867750 +cat0,2032-01-21T21:12:27.501,-0.8212603309596230 +cat0,1904-06-22T1:20:33.781,0.47747186497058900 +cat1,2014-06-14T17:26:32.322,-0.4158055394387770 +cat0,2015-07-07T19:33:13.805,-0.5743967445667770 +cat2,2005-07-09T16:14:46.508,0.3689344819625760 +cat0,1965-07-11T10:44:54.398,-0.5063344732798410 +cat0,1926-09-12T9:46:29.927,-0.04822218683032430 +cat0,2000-08-04T19:19:28.214,0.21714462998429200 +cat1,2024-01-28T7:08:45.420,-0.24235102067413000 +cat2,2030-06-10T18:36:48.595,0.7612323767683700 +cat1,2023-06-18T21:37:31.476,0.42630274400765500 +cat0,2025-03-17T11:57:26.001,-0.5530993264508780 +cat0,1970-11-06T18:49:41.260,-0.3175672219915780 +cat0,2040-04-08T4:51:16.697,-1.5892763765697600 +cat1,1998-12-07T5:35:14.430,-0.37870270195660800 +cat2,2047-02-06T9:06:40.128,-1.1660988512656900 +cat0,1953-07-20T23:01:18.213,0.505493810073354 +cat1,1948-12-19T3:05:31.929,-0.27003007898857300 +cat0,1914-07-30T6:34:33.887,-0.6337414536637140 +cat1,1973-04-21T10:32:31.805,0.7392964514069730 +cat0,2004-10-06T22:45:11.319,0.05540583884003480 +cat1,2026-08-18T17:37:16.247,0.8335321000527230 +cat0,1989-10-13T14:01:14.567,0.042266216165708900 +cat0,1999-01-23T9:38:24.473,-0.07857864924803120 +cat2,2006-06-26T8:33:49.710,-0.022021099833058100 +cat0,1991-06-01T21:00:42.072,0.18324014196860300 +cat2,2022-04-02T21:46:02.305,-0.7701525786974060 +cat1,1915-05-29T0:47:36.664,1.1079209703988500 +cat0,1963-10-29T18:32:22.955,-0.80726727463506 +cat0,1917-01-12T2:34:23.758,0.2121614041831680 +cat1,1984-04-17T3:53:12.409,1.1839115891630100 +cat0,2045-04-02T1:10:53.355,2.2318932677782 +cat0,1959-05-09T11:05:10.528,-2.3341022394089900 +cat0,1977-10-29T4:13:45.806,-2.3779352723758200 +cat2,1980-09-08T3:01:24.912,-0.01961353421530140 +cat0,1982-08-09T1:58:42.351,-0.10655329935255900 +cat2,1967-04-03T10:39:36.329,1.1707101374007100 +cat2,1998-07-07T15:35:05.532,-1.2900863167074100 +cat2,2031-06-29T4:16:45.511,0.5208164111773570 +cat1,2026-03-17T10:19:45.688,-1.6202359714727900 +cat0,1954-10-12T17:41:29.155,-0.9514517026764590 +cat0,1939-03-11T11:47:50.155,-0.5321187365448350 +cat1,1986-12-06T15:54:09.934,1.2028295727586600 +cat0,1920-07-11T0:44:58.724,-0.09248310457164130 +cat0,1942-12-05T12:03:54.060,0.14169988438375900 +cat1,1935-08-31T2:30:09.481,0.047560729851771900 +cat2,2039-05-30T23:01:49.483,-0.37031220226847000 +cat0,2005-12-28T16:43:40.615,0.41472367694593700 +cat1,2004-04-28T7:49:14.705,-0.06423952162800340 +cat1,1958-11-07T9:19:14.513,0.8870462614665560 +cat0,1984-11-23T8:43:12.767,-0.00166789349871702 +cat0,2046-07-22T13:21:01.820,-0.6415224575358570 +cat0,1949-02-03T19:33:55.246,-1.2675271217617800 +cat1,2019-06-30T10:48:18.918,-1.3099528449974900 +cat0,1931-03-10T22:32:04.468,-1 +cat0,2000-07-25T1:46:04.212,-0.3663275609107800 +cat0,1968-09-11T20:42:10.548,-0.05282802015138150 +cat0,1919-12-12T9:35:05.988,0.4309058968146010 +cat2,2009-12-16T7:26:55.412,2.4533827968032900 +cat1,1994-03-02T13:07:18.881,-0.10315014270037300 +cat0,1912-07-31T8:44:39.779,-0.6257450520965030 +cat1,1982-04-19T11:28:13.886,0.03998579192466950 +cat0,1921-01-23T0:49:00.328,-0.4627296562368210 +cat1,1974-08-19T23:34:10.502,-0.2504398593804940 +cat0,1973-02-13T14:04:18.048,-0.05581671912278440 +cat0,1936-01-28T21:46:35.449,-0.04516849107026210 +cat0,1990-09-24T6:20:33.107,2.3952359376212500 +cat0,2022-06-29T19:11:26.163,1.0799700557296600 +cat0,1974-04-05T8:22:14.471,0.7409102020540740 +cat0,2021-05-06T12:03:37.932,-0.03670901692107490 +cat0,1914-12-23T4:12:16.814,-0.44455745753485500 +cat0,1935-08-24T19:03:08.318,-0.45982506588021700 +cat0,1989-07-18T15:36:19.769,-0.43219455058589100 +cat0,1939-02-27T3:20:38.145,0.2681281123236250 +cat2,1920-11-08T11:29:32.842,0.11207997413944000 +cat0,2039-02-16T13:01:40.152,0.018691036446070800 +cat0,1916-07-31T10:47:48.199,1.1325189552929100 +cat0,1987-09-27T12:31:23.970,0.4585648274567310 +cat0,1955-07-07T2:25:41.953,-0.8977138347308100 +cat0,1948-02-23T14:23:31.987,0.8089740362075610 +cat0,2044-10-06T8:26:21.821,-0.7379100013224970 +cat0,2035-10-12T3:53:27.169,0.9497095572729140 +cat2,2024-06-14T16:58:25.464,-0.24685785926393300 +cat2,1943-09-18T18:32:32.671,-0.2788307925804300 +cat0,1907-11-16T0:54:03.800,-1.8561477091486800 +cat0,1927-08-30T15:42:23.266,-0.1620867688151570 +cat2,1902-11-25T5:24:30.871,0.5690532347729310 +cat1,2013-06-28T14:58:08.341,-0.5219678590500580 +cat0,1997-01-24T20:35:22.809,0.3764668121095580 +cat0,1970-04-14T3:59:04.588,-0.14594701926370000 +cat0,1986-10-23T17:54:26.257,-1.1241176113925400 +cat0,2048-11-26T14:38:37.283,0.6602390725712170 +cat2,1975-09-16T6:14:41.770,0.6700272315116490 +cat0,1928-05-31T9:55:09.280,0.5381879356073510 +cat1,1911-11-22T21:58:54.389,-1.4926442705118500 +cat0,1960-04-25T6:35:52.581,0.777349003233133 +cat0,1965-04-05T4:19:25.967,0.23082441251512600 +cat1,1933-08-12T3:12:31.100,0.5724715242945930 +cat0,1993-08-28T7:21:18.591,0.3422598104424350 +cat0,1967-02-10T5:16:31.294,-0.7438708553187880 +cat1,1976-05-04T7:18:09.841,-1.2140102486654100 +cat2,1901-10-07T14:15:01.155,-1.3900305260471100 +cat0,1987-11-05T3:49:12.829,-0.3866555498925360 +cat0,1994-07-21T4:21:10.004,0.040133692387235600 +cat0,2019-06-14T16:03:22.840,1.0319259004120500 +cat0,1972-04-28T23:16:03.938,-1.8232534831051000 +cat2,1974-08-25T7:03:28.060,-2.3377588306547800 +cat0,2049-09-04T23:04:56.258,-0.860032230824609 +cat2,2045-01-07T12:49:32.056,-1.2606489518175800 +cat0,1922-08-09T3:04:48.984,0.057592272484552100 +cat0,1965-09-22T21:00:45.606,1.0064736342527900 +cat0,1921-11-04T5:22:45.866,-0.2844280458668430 +cat0,1957-01-06T0:36:41.053,-1.185022381446060 +cat0,2004-08-04T8:12:01.436,0.9576596829985820 +cat0,1913-01-25T9:01:28.130,-1.3332786288823000 +cat0,1940-12-27T20:26:10.866,-1.1577289985147000 +cat0,1952-07-03T17:12:05.558,-0.6641626915509720 +cat1,1985-09-10T13:49:09.569,1.4631417332911600 +cat1,1989-12-07T2:03:15.067,-0.20178157060040900 +cat0,1954-07-01T17:14:47.325,-0.09078824622111580 +cat0,2028-02-18T6:24:17.491,-0.04962532553404380 +cat0,1975-01-13T22:24:58.331,1.848986379690380 +cat0,2038-01-08T23:39:33.524,0.5518374721865490 +cat1,1966-07-08T0:58:20.460,0.49456215233819300 +cat0,2026-11-17T7:08:01.701,-0.22100986276737000 +cat2,1905-01-15T18:33:34.003,-0.42110955640230000 +cat0,2031-06-14T10:46:37.309,1.2568723287406500 +cat0,1947-04-17T17:55:40.459,-1.3490199823988600 +cat2,2038-06-21T3:37:51.187,0.605741416961361 +cat0,2030-07-14T7:49:28.018,-0.6979063030743960 +cat0,2017-04-15T0:08:39.834,0.24077481617125400 +cat2,2001-12-23T8:54:05.292,0.4356819495199730 +cat0,1932-01-22T5:57:33.618,1.4252859534333100 +cat0,2039-10-02T13:01:09.283,-2.625386919837360 +cat0,1946-08-25T0:31:24.862,1.2765371531578300 +cat0,1930-02-10T11:51:23.198,-0.5096227885702360 +cat2,2014-05-21T16:22:52.977,0.16398209969796600 +cat1,1949-05-06T5:49:58.073,-1.3870225593410200 +cat1,1989-02-02T14:09:08.698,-0.7584895343435220 +cat0,1948-10-10T15:42:47.043,-1.7341883473221200 +cat0,1987-02-28T16:45:32.907,0.5067189969416720 +cat0,2039-11-09T18:45:29.835,1.4862506779033700 +cat0,1981-06-23T13:41:22.796,-0.39929319546842700 +cat0,1935-10-10T2:12:38.324,-0.4565002101662800 +cat0,2029-11-27T21:37:28.183,-1.0432484429917800 +cat0,1983-04-09T18:42:20.251,0.19057677373248500 +cat0,2014-10-18T1:18:00.015,-0.8445524244098320 +cat0,1972-10-03T7:19:31.268,-1.8106594888764100 +cat0,1992-07-04T4:10:28.142,1.0240901118833500 +cat0,2029-12-11T5:30:30.688,0.7316074313736680 +cat0,1928-08-01T4:01:27.045,-0.8310658840944360 +cat0,1929-01-16T11:35:25.175,1.7200248415633800 +cat0,1966-06-06T13:19:54.160,0.5422167270241630 +cat0,1916-06-10T2:02:39.123,-0.8259888109693500 +cat1,1928-12-14T11:10:25.288,0.7167077284596350 +cat1,2022-02-26T14:36:13.670,-1.7537399585786200 +cat0,2036-05-11T14:05:11.964,-2.0805007715393900 +cat0,1925-01-18T15:04:37.711,-0.9964813377959590 +cat1,2041-08-12T22:17:53.822,0.8477934942364750 +cat0,1995-07-31T23:29:46.532,0.43780981810361700 +cat0,1999-11-26T11:06:09.131,-0.16126405255833400 +cat2,2040-06-18T8:25:12.607,0.909008058085873 +cat1,2033-04-09T13:15:57.178,0.16855742101712600 +cat2,2005-09-08T23:53:29.154,-0.8829881365503520 +cat2,1962-08-09T17:44:19.907,-0.054831642521249500 +cat0,1985-08-07T13:07:54.086,0.18381608259635400 +cat1,2032-01-09T5:21:19.109,0.03857537293851960 +cat0,2028-03-14T11:00:29.717,0.24285153227647000 +cat0,1988-06-22T1:25:09.981,1.7340948712748200 +cat1,1922-01-03T5:48:33.999,-0.7180676222641370 +cat2,1903-10-20T8:04:26.666,0.40159441159620500 +cat0,1922-10-30T12:58:25.565,0.5152809441357310 +cat0,1921-01-18T1:48:40.611,1.282054003225870 +cat0,2013-11-19T5:25:38.754,0.9426270852177870 +cat0,1922-06-10T20:50:07.960,-1.8802240640377400 +cat1,2034-07-01T12:44:58.508,1.6475881356599700 +cat1,2012-11-29T0:02:56.257,-0.8196296102541070 +cat2,1935-08-16T22:07:55.977,1.1473696156514800 +cat0,2043-06-15T20:47:47.889,0.9907315825269950 +cat0,1938-09-16T13:23:40.366,-0.40441597497231700 +cat0,1964-03-02T4:16:40.930,-0.654215169859463 +cat1,2001-04-11T7:10:01.067,-0.12297513047005500 +cat0,1936-11-24T7:05:23.119,1.0761471244657600 +cat0,2031-12-02T9:26:29.751,1.525097519514010 +cat0,1937-01-26T4:39:44.234,-0.6276299809138960 +cat0,2047-12-23T9:21:44.391,-1.2232268478246800 +cat0,2036-09-02T14:15:50.945,0.043459800733335100 +cat0,1993-05-02T0:00:26.206,-1.419410001909770 +cat0,1911-12-09T7:33:01.255,0.13249535963552200 +cat0,1912-10-16T10:42:54.560,-0.45627082476414200 +cat2,1948-09-11T22:59:49.411,1.354829727307460 +cat0,1965-12-24T2:26:59.478,-0.9855406984926360 +cat0,1955-09-04T4:02:42.329,-0.025032998727192600 +cat0,1963-07-13T13:39:57.107,0.6195430691952880 +cat0,2010-12-04T14:12:37.142,-0.7778425759683170 +cat0,1957-03-13T19:45:09.544,-0.40190567238634200 +cat0,1969-02-16T23:33:00.186,-0.020727608526111000 +cat1,2049-05-29T12:42:03.207,-0.2475484947979210 +cat0,1968-12-08T17:01:00.937,1.454022116277440 \ No newline at end of file diff --git a/data/diabetes.csv b/data/diabetes.csv new file mode 100644 index 00000000..5e8a7f7b --- /dev/null +++ b/data/diabetes.csv @@ -0,0 +1,201 @@ +pregnancies,plasma glucose,blood pressure,triceps skin thickness,insulin,bmi,diabetes pedigree,age,diabetes +6,148,72,35,0,33.6,0.627,50,true +1,85,66,29,0,26.6,0.351,31,false +8,183,64,0,0,23.3,0.672,32,true +1,89,66,23,94,28.1,0.167,21,false +0,137,40,35,168,43.1,2.288,33,true +5,116,74,0,0,25.6,0.201,30,false +3,78,50,32,88,31.0,0.248,26,true +10,115,0,0,0,35.3,0.134,29,false +2,197,70,45,543,30.5,0.158,53,true +8,125,96,0,0,0.0,0.232,54,true +4,110,92,0,0,37.6,0.191,30,false +10,168,74,0,0,38.0,0.537,34,true +10,139,80,0,0,27.1,1.441,57,false +1,189,60,23,846,30.1,0.398,59,true +5,166,72,19,175,25.8,0.587,51,true +7,100,0,0,0,30.0,0.484,32,true +0,118,84,47,230,45.8,0.551,31,true +7,107,74,0,0,29.6,0.254,31,true +1,103,30,38,83,43.3,0.183,33,false +1,115,70,30,96,34.6,0.529,32,true +3,126,88,41,235,39.3,0.704,27,false +8,99,84,0,0,35.4,0.388,50,false +7,196,90,0,0,39.8,0.451,41,true +9,119,80,35,0,29.0,0.263,29,true +11,143,94,33,146,36.6,0.254,51,true +10,125,70,26,115,31.1,0.205,41,true +7,147,76,0,0,39.4,0.257,43,true +1,97,66,15,140,23.2,0.487,22,false +13,145,82,19,110,22.2,0.245,57,false +5,117,92,0,0,34.1,0.337,38,false +5,109,75,26,0,36.0,0.546,60,false +3,158,76,36,245,31.6,0.851,28,true +3,88,58,11,54,24.8,0.267,22,false +6,92,92,0,0,19.9,0.188,28,false +10,122,78,31,0,27.6,0.512,45,false +4,103,60,33,192,24.0,0.966,33,false +11,138,76,0,0,33.2,0.420,35,false +9,102,76,37,0,32.9,0.665,46,true +2,90,68,42,0,38.2,0.503,27,true +4,111,72,47,207,37.1,1.390,56,true +3,180,64,25,70,34.0,0.271,26,false +7,133,84,0,0,40.2,0.696,37,false +7,106,92,18,0,22.7,0.235,48,false +9,171,110,24,240,45.4,0.721,54,true +7,159,64,0,0,27.4,0.294,40,false +0,180,66,39,0,42.0,1.893,25,true +1,146,56,0,0,29.7,0.564,29,false +2,71,70,27,0,28.0,0.586,22,false +7,103,66,32,0,39.1,0.344,31,true +7,105,0,0,0,0.0,0.305,24,false +1,103,80,11,82,19.4,0.491,22,false +1,101,50,15,36,24.2,0.526,26,false +5,88,66,21,23,24.4,0.342,30,false +8,176,90,34,300,33.7,0.467,58,true +7,150,66,42,342,34.7,0.718,42,false +1,73,50,10,0,23.0,0.248,21,false +7,187,68,39,304,37.7,0.254,41,true +0,100,88,60,110,46.8,0.962,31,false +0,146,82,0,0,40.5,1.781,44,false +0,105,64,41,142,41.5,0.173,22,false +2,84,0,0,0,0.0,0.304,21,false +8,133,72,0,0,32.9,0.270,39,true +5,44,62,0,0,25.0,0.587,36,false +2,141,58,34,128,25.4,0.699,24,false +7,114,66,0,0,32.8,0.258,42,true +5,99,74,27,0,29.0,0.203,32,false +0,109,88,30,0,32.5,0.855,38,true +2,109,92,0,0,42.7,0.845,54,false +1,95,66,13,38,19.6,0.334,25,false +4,146,85,27,100,28.9,0.189,27,false +2,100,66,20,90,32.9,0.867,28,true +5,139,64,35,140,28.6,0.411,26,false +13,126,90,0,0,43.4,0.583,42,true +4,129,86,20,270,35.1,0.231,23,false +1,79,75,30,0,32.0,0.396,22,false +1,0,48,20,0,24.7,0.140,22,false +7,62,78,0,0,32.6,0.391,41,false +5,95,72,33,0,37.7,0.370,27,false +0,131,0,0,0,43.2,0.270,26,true +2,112,66,22,0,25.0,0.307,24,false +11,127,106,0,0,39.0,0.190,51,false +3,80,82,31,70,34.2,1.292,27,true +10,162,84,0,0,27.7,0.182,54,false +1,199,76,43,0,42.9,1.394,22,true +8,167,106,46,231,37.6,0.165,43,true +9,145,80,46,130,37.9,0.637,40,true +6,115,60,39,0,33.7,0.245,40,true +1,112,80,45,132,34.8,0.217,24,false +4,145,82,18,0,32.5,0.235,70,true +10,111,70,27,0,27.5,0.141,40,true +6,98,58,33,190,34.0,0.430,43,false +9,154,78,30,100,30.9,0.164,45,false +6,165,68,26,168,33.6,0.631,49,false +1,99,58,10,0,25.4,0.551,21,false +10,68,106,23,49,35.5,0.285,47,false +3,123,100,35,240,57.3,0.880,22,false +8,91,82,0,0,35.6,0.587,68,false +6,195,70,0,0,30.9,0.328,31,true +9,156,86,0,0,24.8,0.230,53,true +0,93,60,0,0,35.3,0.263,25,false +3,121,52,0,0,36.0,0.127,25,true +2,101,58,17,265,24.2,0.614,23,false +2,56,56,28,45,24.2,0.332,22,false +0,162,76,36,0,49.6,0.364,26,true +0,95,64,39,105,44.6,0.366,22,false +4,125,80,0,0,32.3,0.536,27,true +5,136,82,0,0,0.0,0.640,69,false +2,129,74,26,205,33.2,0.591,25,false +3,130,64,0,0,23.1,0.314,22,false +1,107,50,19,0,28.3,0.181,29,false +1,140,74,26,180,24.1,0.828,23,false +1,144,82,46,180,46.1,0.335,46,true +8,107,80,0,0,24.6,0.856,34,false +13,158,114,0,0,42.3,0.257,44,true +2,121,70,32,95,39.1,0.886,23,false +7,129,68,49,125,38.5,0.439,43,true +2,90,60,0,0,23.5,0.191,25,false +7,142,90,24,480,30.4,0.128,43,true +3,169,74,19,125,29.9,0.268,31,true +0,99,0,0,0,25.0,0.253,22,false +4,127,88,11,155,34.5,0.598,28,false +4,118,70,0,0,44.5,0.904,26,false +2,122,76,27,200,35.9,0.483,26,false +6,125,78,31,0,27.6,0.565,49,true +1,168,88,29,0,35.0,0.905,52,true +2,129,0,0,0,38.5,0.304,41,false +4,110,76,20,100,28.4,0.118,27,false +6,80,80,36,0,39.8,0.177,28,false +10,115,0,0,0,0.0,0.261,30,true +2,127,46,21,335,34.4,0.176,22,false +9,164,78,0,0,32.8,0.148,45,true +2,93,64,32,160,38.0,0.674,23,true +3,158,64,13,387,31.2,0.295,24,false +5,126,78,27,22,29.6,0.439,40,false +10,129,62,36,0,41.2,0.441,38,true +0,134,58,20,291,26.4,0.352,21,false +3,102,74,0,0,29.5,0.121,32,false +7,187,50,33,392,33.9,0.826,34,true +3,173,78,39,185,33.8,0.970,31,true +10,94,72,18,0,23.1,0.595,56,false +1,108,60,46,178,35.5,0.415,24,false +5,97,76,27,0,35.6,0.378,52,true +4,83,86,19,0,29.3,0.317,34,false +1,114,66,36,200,38.1,0.289,21,false +1,149,68,29,127,29.3,0.349,42,true +5,117,86,30,105,39.1,0.251,42,false +1,111,94,0,0,32.8,0.265,45,false +4,112,78,40,0,39.4,0.236,38,false +1,116,78,29,180,36.1,0.496,25,false +0,141,84,26,0,32.4,0.433,22,false +2,175,88,0,0,22.9,0.326,22,false +2,92,52,0,0,30.1,0.141,22,false +3,130,78,23,79,28.4,0.323,34,true +8,120,86,0,0,28.4,0.259,22,true +2,174,88,37,120,44.5,0.646,24,true +2,106,56,27,165,29.0,0.426,22,false +2,105,75,0,0,23.3,0.560,53,false +4,95,60,32,0,35.4,0.284,28,false +0,126,86,27,120,27.4,0.515,21,false +8,65,72,23,0,32.0,0.600,42,false +2,99,60,17,160,36.6,0.453,21,false +1,102,74,0,0,39.5,0.293,42,true +11,120,80,37,150,42.3,0.785,48,true +3,102,44,20,94,30.8,0.400,26,false +1,109,58,18,116,28.5,0.219,22,false +9,140,94,0,0,32.7,0.734,45,true +13,153,88,37,140,40.6,1.174,39,false +12,100,84,33,105,30.0,0.488,46,false +1,147,94,41,0,49.3,0.358,27,true +1,81,74,41,57,46.3,1.096,32,false +3,187,70,22,200,36.4,0.408,36,true +6,162,62,0,0,24.3,0.178,50,true +4,136,70,0,0,31.2,1.182,22,true +1,121,78,39,74,39.0,0.261,28,false +3,108,62,24,0,26.0,0.223,25,false +0,181,88,44,510,43.3,0.222,26,true +8,154,78,32,0,32.4,0.443,45,true +1,128,88,39,110,36.5,1.057,37,true +7,137,90,41,0,32.0,0.391,39,false +0,123,72,0,0,36.3,0.258,52,true +1,106,76,0,0,37.5,0.197,26,false +6,190,92,0,0,35.5,0.278,66,true +2,88,58,26,16,28.4,0.766,22,false +9,170,74,31,0,44.0,0.403,43,true +9,89,62,0,0,22.5,0.142,33,false +10,101,76,48,180,32.9,0.171,63,false +2,122,70,27,0,36.8,0.340,27,false +5,121,72,23,112,26.2,0.245,30,false +1,126,60,0,0,30.1,0.349,47,true +2,157,74,35,440,39.4,0.134,30,false +1,167,74,17,144,23.4,0.447,33,true +0,179,50,36,159,37.8,0.455,22,true +11,136,84,35,130,28.3,0.260,42,true +0,107,60,25,0,26.4,0.133,23,false +1,91,54,25,100,25.2,0.234,23,false +1,117,60,23,106,33.8,0.466,27,false +5,123,74,40,77,34.1,0.269,28,false +2,120,54,0,0,26.8,0.455,27,false +1,106,70,28,135,34.2,0.142,22,false +2,155,52,27,540,38.7,0.240,25,true diff --git a/data/fields/fields_summary.csv b/data/fields/fields_summary.csv new file mode 100644 index 00000000..366ceff0 --- /dev/null +++ b/data/fields/fields_summary.csv @@ -0,0 +1,6 @@ +field column,field ID,field name,field label,field description,field type,preferred,missing count,errors,contents summary,errors summary +0,000000,sepal length,,,numeric,true,0,0,"[4.3, 7.9], mean: 5.84333", +1,000001,sepal width,,,numeric,true,0,0,"[2, 4.4], mean: 3.05733", +2,000002,petal length,,,numeric,true,0,0,"[1, 6.9], mean: 3.758", +3,000003,petal width,,,numeric,true,0,0,"[0.1, 2.5], mean: 1.19933", +4,000004,species,,,categorical,true,0,0,"3 categorìes: Iris-setosa (50), Iris-versicolor (50), Iris-virginica (50)", diff --git a/data/fields/fields_summary_modified.csv b/data/fields/fields_summary_modified.csv new file mode 100644 index 00000000..43d208d6 --- /dev/null +++ b/data/fields/fields_summary_modified.csv @@ -0,0 +1,6 @@ +field column,field ID,field name,field label,field description,field type,preferred,missing count,errors,contents summary,errors summary +0,000000,sepal length,,,categorical,true,0,0,"[4.3, 7.9], mean: 5.84333", +1,000001,sepal width,,,numeric,true,0,0,"[2, 4.4], mean: 3.05733", +2,000002,petal length,,,numeric,true,0,0,"[1, 6.9], mean: 3.758", +3,000003,petal width,,,numeric,true,0,0,"[0.1, 2.5], mean: 1.19933", +4,000004,species,,,categorical,true,0,0,"3 categorìes: Iris-setosa (50), Iris-versicolor (50), Iris-virginica (50)", diff --git a/data/fruits1e.jpg b/data/fruits1e.jpg new file mode 100644 index 00000000..99e7ffa4 Binary files /dev/null and b/data/fruits1e.jpg differ diff --git a/data/grades.csv b/data/grades.csv new file mode 100644 index 00000000..66d43bab --- /dev/null +++ b/data/grades.csv @@ -0,0 +1,81 @@ +Prefix,Assignment,Tutorial,Midterm,TakeHome,Final +05,57.14,34.09,64.38,51.48,52.50 +08,95.05,105.49,67.50,99.07,68.33 +08,83.70,83.17,30.0,63.15,48.89 +07,81.22,96.06,49.38,105.93,80.56 +08,91.32,93.64,95.0,107.41,73.89 +07,95.0,92.58,93.12,97.78,68.06 +08,95.05,102.99,56.25,99.07,50.0 +07,72.85,86.85,60.0,,56.11 +08,84.26,93.10,47.50,18.52,50.83 +07,90.10,97.55,51.25,88.89,63.61 +07,80.44,90.20,75.0,91.48,39.72 +06,86.26,80.60,74.38,87.59,77.50 +08,97.16,103.71,72.50,93.52,63.33 +07,91.28,83.53,81.25,99.81,92.22 +08,84.80,89.08,44.38,16.91,35.83 +07,93.83,95.43,88.12,80.93,90.0 +08,84.80,89.08,47.50,16.91,53.33 +04,92.01,102.52,38.75,86.11,49.17 +08,55.14,81.85,75.0,56.11,62.50 +08,93.04,82.93,79.38,83.33,91.11 +08,63.40,86.21,63.12,72.78,, +08,75.27,97.52,63.12,61.11,66.11 +08,63.78,76.21,39.38,42.22,34.44 +07,80.44,90.20,46.25,91.48,72.22 +07,53.36,82.01,74.38,102.59,56.39 +06,91.28,95.24,82.50,97.59,92.78 +08,82.45,86.65,93.12,85.56,89.17 +08,75.27,86.67,69.38,61.11,88.89 +08,91.32,94.89,76.25,107.41,85.56 +07,91.62,65.18,71.88,90.0,45.56 +07,98.58,102.46,67.50,97.59,63.33 +07,86.26,88.57,70.0,87.59,55.0 +08,67.29,95.64,48.12,72.22,43.33 +07,98.58,91.03,101.25,104.26,107.78 +08,85.42,95.67,56.25,103.52,64.72 +05,88.09,63.39,74.38,93.70,50.83 +06,95.05,70.24,52.50,52.41,47.78 +07,89.89,57.97,32.50,85.19,51.67 +06,90.74,89.64,61.25,90.0,, +07,95.0,94.36,89.38,100.93,85.0 +06,28.14,58.51,72.50,53.70,68.33 +07,95.14,82.67,110.0,89.81,90.83 +07,92.01,112.58,86.25,86.11,83.33 +07,86.26,74.66,85.0,64.07,82.22 +06,57.14,34.09,66.88,51.48,55.83 +07,93.83,57.32,28.12,77.96,45.56 +08,68.95,65.11,44.38,57.41,65.28 +08,85.01,98.47,91.25,83.33,72.22 +08,95.90,99.99,95.62,105.56,102.22 +08,92.46,95.75,61.88,83.33,48.89 +08,96.73,88.11,71.88,97.41,65.56 +08,83.70,83.17,60.62,63.15,57.78 +07,95.14,94.01,99.38,100.0,95.0 +07,98.58,88.30,90.62,100.93,99.17 +08,71.79,102.87,54.37,21.53,36.11 +08,71.79,101.68,75.0,21.53,49.44 +08,87.93,106.53,37.50,97.41,28.06 +08,87.93,108.97,28.75,87.96,47.78 +08,68.95,65.11,40.0,57.41,78.89 +07,72.85,86.85,41.25,60.37,46.67 +08,71.79,102.87,41.88,24.77,, +08,92.02,97.76,46.25,47.22,60.56 +07,90.33,87.56,68.75,77.96,58.33 +07,95.0,94.36,90.62,100.93,101.11 +07,91.28,108.71,96.25,99.81,88.89 +08,97.0,103.02,93.12,106.48,94.44 +08,93.01,104.18,55.0,96.85,67.22 +08,92.02,100.58,54.37,63.89,63.89 +07,100.83,105.57,101.25,104.44,108.89 +08,80.53,92.80,51.25,72.78,66.67 +08,90.98,97.55,86.25,88.89,90.0 +08,93.59,103.83,92.50,96.85,87.22 +08,97.33,100.42,69.38,102.59,83.06 +07,84.26,91.31,63.12,83.33,75.56 +08,84.26,96.66,52.50,83.33,50.0 +07,93.83,102.19,106.25,94.44,102.78 +08,75.27,86.67,70.0,71.85,80.0 +08,92.02,100.58,73.12,63.89,65.28 +08,97.16,103.71,83.75,95.93,78.89 +08,66.17,93.68,71.88,42.22,61.39 diff --git a/data/groceries.csv b/data/groceries.csv new file mode 100644 index 00000000..33506dd1 --- /dev/null +++ b/data/groceries.csv @@ -0,0 +1,344 @@ +chocolate +sausage,rolls/buns,soda,chocolate +hamburger meat,other vegetables,rolls/buns,spices,bottled water,hygiene articles,napkins +sausage,beef,whole milk +red/blush wine +beef,citrus fruit,berries,root vegetables,brown bread,detergent +hamburger meat,other vegetables,whole milk,frozen vegetables,domestic eggs,soda,dishes +frankfurter,sausage,long life bakery product,waffles +curd,dessert,soda,salty snack,waffles,cake bar,chocolate,shopping bags +pork,root vegetables,other vegetables,whole milk,dishes +frankfurter,meat,tropical fruit,onions,oil,baking powder,tea,cling film/bags +citrus fruit,other vegetables,rolls/buns,salty snack,shopping bags +other vegetables,yogurt,brown bread,instant coffee +berries,root vegetables,other vegetables,curd,butter milk,cream cheese ,roll products ,margarine,misc. beverages,specialty chocolate,detergent +sausage,tropical fruit,condensed milk,rolls/buns +hamburger meat +domestic eggs,rolls/buns,dish cleaner,hygiene articles +specialty chocolate +sausage,whole milk,yogurt,brown bread,cereals,bottled water,soda +citrus fruit,tropical fruit,grapes,other vegetables,frozen vegetables,pickled vegetables,fruit/vegetable juice,liquor,rum,liquor (appetizer),candy,cling film/bags +canned beer +chicken,pork,hamburger meat,pip fruit,nuts/prunes,root vegetables,other vegetables,whole milk,spread cheese,frozen potato products,brown bread,Instant food products,soda,fruit/vegetable juice,shopping bags +pork,berries,whipped/sour cream,beverages,flour,oil,soda,chocolate,newspapers +beverages,rolls/buns,soda,misc. beverages,liqueur,cake bar +berries,root vegetables,whole milk,beverages,sugar,soups,coffee,shopping bags +tropical fruit,root vegetables,other vegetables,domestic eggs,white bread,brown bread,canned vegetables,soda,fruit/vegetable juice,rum +chocolate +pork,whole milk,curd,yogurt,whipped/sour cream,cream cheese ,domestic eggs,white bread,sugar,baking powder,coffee,cocoa drinks,salty snack,waffles,candy,napkins +turkey,tropical fruit,pip fruit,root vegetables,other vegetables,curd,butter milk,yogurt,curd cheese,rice,vinegar,margarine +onions +bottled beer,liquor +other vegetables,yogurt,oil,dental care +citrus fruit,cake bar,baby cosmetics,shopping bags +frankfurter,beef,tropical fruit,pip fruit,butter,yogurt,dog food,long life bakery product,cookware,newspapers +soda,softener +frankfurter,bottled beer +canned beer +sausage,whole milk,brown bread,soda +citrus fruit,tropical fruit,whole milk,rolls/buns,bottled water,long life bakery product,decalcifier,newspapers +pastry,soda +sliced cheese,rolls/buns,margarine,soda,shopping bags +other vegetables,rolls/buns +canned beer +sausage,meat,pip fruit,other vegetables,yogurt,rolls/buns,brown bread,bottled water,soda,fruit/vegetable juice,newspapers +tropical fruit,pip fruit,root vegetables,whole milk,yogurt,rolls/buns,sweet spreads,cat food,pet care,hygiene articles,shopping bags +whole milk +tropical fruit,yogurt,processed cheese,rolls/buns,bottled water,soda,misc. beverages,female sanitary products +whole milk,margarine,pot plants +domestic eggs +bottled beer,shopping bags +pork,berries,whole milk,dessert,whipped/sour cream,fruit/vegetable juice,candy +other vegetables,whole milk,butter,yogurt,cream cheese ,vinegar,pet care,fruit/vegetable juice,cling film/bags +chicken,canned beer,shopping bags +sausage,chicken,other vegetables,whole milk,yogurt,cream cheese ,brown bread,soda +rolls/buns,sugar,sweet spreads,chewing gum,newspapers +other vegetables,whole milk,soda,chocolate +soda,bottled beer +tropical fruit,grapes,other vegetables,bottled beer,popcorn +pip fruit,nuts/prunes,curd,frozen dessert,sweet spreads,light bulbs +pip fruit,pastry +onions,yogurt,frozen vegetables +yogurt,pastry +hard cheese,soda +other vegetables,yogurt,whipped/sour cream,newspapers +whole milk,cat food,bottled water,napkins +UHT-milk,margarine,bottled water,soda +processed cheese +whole milk,yogurt,frozen potato products,brown bread,napkins +tropical fruit,cat food,bottled water,soda,bottled beer,white wine,hygiene articles +frankfurter,photo/film +herbs +sausage,beef,hamburger meat,yogurt,soft cheese,cream cheese ,domestic eggs,rolls/buns,pastry,instant coffee +UHT-milk,domestic eggs,sugar,fruit/vegetable juice,newspapers +whole milk,curd,dessert,shopping bags +chicken,citrus fruit,tropical fruit,other vegetables,whole milk,whipped/sour cream,rolls/buns,pastry,flour,specialty chocolate +citrus fruit,root vegetables,other vegetables,whipped/sour cream,processed cheese,rolls/buns,newspapers +whole milk,frozen dessert,margarine,snack products +other vegetables,specialty cheese +frozen meals,soda,shopping bags +rolls/buns,brown bread +waffles +whole milk,beverages +canned beer +other vegetables,dessert,candy,abrasive cleaner,dishes,cling film/bags,candles +beef,root vegetables,herbs,packaged fruit/vegetables,whipped/sour cream,cream cheese ,vinegar,oil,margarine,soups,pickled vegetables,meat spreads,canned fish,fruit/vegetable juice,candles +rolls/buns,soda,canned beer +ham,root vegetables,other vegetables,dessert,brown bread,pasta,margarine,bottled water,fruit/vegetable juice,red/blush wine +frankfurter,sausage,pork,sliced cheese,pastry,margarine,newspapers +bottled water +specialty chocolate +packaged fruit/vegetables +photo/film +coffee +candles +condensed milk,soda +dessert,UHT-milk,cream cheese ,domestic eggs,brown bread,coffee,bottled water,soda,shopping bags +root vegetables,shopping bags +white wine +rolls/buns +tropical fruit,pip fruit,UHT-milk,sugar,cat food,coffee,long life bakery product,chocolate,hygiene articles +frankfurter,other vegetables,frozen vegetables,waffles,cake bar +frozen meals +canned beer +frozen meals,rolls/buns,brown bread +pip fruit,rolls/buns,pet care,soda,waffles,chocolate marshmallow,napkins +misc. beverages,fruit/vegetable juice +berries +frankfurter,other vegetables,red/blush wine,shopping bags +other vegetables +meat,chicken,hamburger meat,citrus fruit,whole milk,butter,butter milk,yogurt,soft cheese,frozen vegetables,domestic eggs,rolls/buns,fruit/vegetable juice,seasonal products +turkey,tropical fruit,root vegetables,other vegetables,curd,whipped/sour cream,liquor (appetizer) +citrus fruit,root vegetables,onions,other vegetables,domestic eggs,white bread,cake bar +other vegetables,whole milk,rolls/buns,brown bread,soda,hygiene articles,newspapers +butter milk,domestic eggs,rolls/buns,soda,specialty chocolate +beef +sausage,meat,other vegetables,yogurt,whipped/sour cream,domestic eggs,semi-finished bread,instant coffee,bottled water,soda,bottled beer,newspapers,shopping bags +whole milk,beverages,frozen vegetables,margarine,liquor (appetizer),newspapers +curd,whipped/sour cream,pudding powder,coffee,long life bakery product +finished products,bottled water,specialty bar,newspapers +grapes,butter,soft cheese,canned beer +salty snack +tropical fruit,dessert,yogurt,syrup,hygiene articles +pork,rolls/buns,liquor,salty snack,popcorn +whole milk,whipped/sour cream +canned beer +pip fruit,root vegetables,other vegetables,whole milk,rolls/buns,brown bread,canned beer,newspapers,pot plants +other vegetables,curd +sausage,beef,tropical fruit,other vegetables,butter,yogurt,UHT-milk,condensed milk,sliced cheese,salt,soups,sweet spreads,newspapers +meat,curd,yogurt,ice cream,oil,canned fish,soda,fruit/vegetable juice,seasonal products +canned beer +tropical fruit,berries,root vegetables,other vegetables,yogurt,cream cheese ,rolls/buns,soda,shopping bags +yogurt,bottled water +UHT-milk,condensed milk,coffee,cling film/bags +frankfurter,sausage,tropical fruit,root vegetables,whole milk,sliced cheese,frozen vegetables,mustard,pickled vegetables,salty snack +skin care,napkins +pip fruit,whole milk,ready soups,canned fish,soda +soda,long life bakery product +dessert +pork,tropical fruit +pastry +whole milk,dessert,coffee,bottled water +other vegetables,rolls/buns,hair spray +domestic eggs +citrus fruit,pip fruit,root vegetables,UHT-milk,coffee,candles +frankfurter,whole milk,rolls/buns,newspapers +frozen fruits,soda,specialty bar +frankfurter,chicken,hamburger meat,other vegetables,dessert,UHT-milk,oil,canned vegetables,cake bar +beef +other vegetables,curd,pastry,pasta,hygiene articles,flower (seeds) +tropical fruit,dessert,yogurt,sliced cheese,cream cheese ,rolls/buns,brown bread,canned vegetables,fruit/vegetable juice +zwieback +beef,whole milk,curd,white bread,napkins +sausage,chicken,hamburger meat,other vegetables,whole milk,butter milk,whipped/sour cream,white bread +pip fruit,other vegetables,whole milk,butter,whipped/sour cream,sliced cheese,pastry,hygiene articles,napkins +sausage,bottled water +beef,rolls/buns +pip fruit,root vegetables,frozen vegetables,rolls/buns,semi-finished bread,soda,white wine,newspapers +turkey,tropical fruit,root vegetables,other vegetables,butter,whipped/sour cream,hard cheese,ketchup,fruit/vegetable juice,candy,napkins +citrus fruit,pip fruit,other vegetables,whole milk,frozen vegetables,bottled water +dessert +other vegetables +turkey,root vegetables,other vegetables,whole milk,butter milk,UHT-milk,brown bread,bottled water +pork +pastry,canned beer,male cosmetics,newspapers +pastry,soda,canned beer,white wine,newspapers +sausage,yogurt,rolls/buns +beverages,specialty bar +onions +ham +soda +tropical fruit,dessert,soft cheese,brown bread,shopping bags +sausage,whole milk,canned beer +bottled water +soda,snack products,detergent +beef,citrus fruit,pip fruit,yogurt,condensed milk,cream cheese ,white bread,dish cleaner,hygiene articles,dishes,shopping bags +soda +chicken,citrus fruit,tropical fruit,pip fruit,whole milk,white bread +rolls/buns,soda +bottled beer,canned beer +yogurt,pastry,soups,detergent,flower (seeds) +frozen fish,chocolate marshmallow,candy,male cosmetics +citrus fruit,tropical fruit,rolls/buns,oil,mustard,soups,sweet spreads,soda,syrup,canned beer,waffles,chocolate,specialty bar,dish cleaner,baby cosmetics,cookware +other vegetables,whole milk,pet care,soda,canned beer,salty snack +dessert,rolls/buns,waffles +other vegetables,brown bread +tropical fruit,whole milk,long life bakery product,chocolate +tropical fruit,root vegetables,whole milk,condensed milk,rolls/buns,bottled beer,waffles +frankfurter,hygiene articles +frankfurter,rolls/buns +whole milk,oil,candles +canned beer +citrus fruit,brown bread,pastry,white wine,chocolate,shopping bags +coffee,fruit/vegetable juice +frozen meals,frozen potato products,coffee +hamburger meat,cream cheese ,sweet spreads,coffee,canned beer,cling film/bags,shopping bags +sausage,other vegetables,whipped/sour cream,ice cream,domestic eggs,rolls/buns,coffee,bottled water,newspapers +bottled beer +bottled water,soda +root vegetables,other vegetables,whole milk,butter milk,frozen meals +citrus fruit,pastry +frankfurter,sausage,chicken,citrus fruit,root vegetables,herbs,packaged fruit/vegetables,whole milk,salt,sugar,bottled beer,napkins +whole milk +other vegetables,whole milk,dessert,cream cheese ,sugar,dishes +sausage,soda +rolls/buns,soda +berries,whole milk,curd,yogurt,rolls/buns,pastry,honey,chocolate +bottled water +whole milk,curd,rolls/buns,bottled water,fruit/vegetable juice,soap +rolls/buns +hamburger meat,soda,rum,pot plants +dessert,canned beer +pip fruit,UHT-milk,semi-finished bread,margarine,bottled water,cake bar,shopping bags +canned beer +meat,root vegetables,onions,herbs,other vegetables,whole milk,flour,soda,seasonal products +butter,UHT-milk,domestic eggs,waffles +meat,beef,other vegetables,butter,frozen fish,bottled water,napkins +ice cream,bottled water,soda +chicken,pork,beef,other vegetables,whole milk,butter,whipped/sour cream,soft cheese,cream cheese ,processed cheese,frozen vegetables,frozen fruits,pastry,sugar,soda,long life bakery product,waffles,chocolate marshmallow,candy +beef,yogurt,white bread,flour,soda,fruit/vegetable juice,bottled beer,brandy,cooking chocolate +pork,tropical fruit,pip fruit,nuts/prunes,other vegetables,frozen meals,margarine,newspapers +frankfurter,whole milk,yogurt,napkins +ham,pork,shopping bags +sausage,pip fruit,UHT-milk,soups,sauces,cat food,newspapers +citrus fruit,whipped/sour cream,pastry +sugar,bottled water,soda,fruit/vegetable juice,salty snack,popcorn +UHT-milk,pastry,margarine +citrus fruit,whole milk,pastry +bottled beer,brandy,specialty chocolate +hamburger meat,brown bread,semi-finished bread +white wine +rolls/buns,margarine +bottled water,soda,napkins +bottled water +pork +whole milk,yogurt +finished products,other vegetables,butter,cream cheese ,processed cheese,domestic eggs,rolls/buns,pastry,long life bakery product,candy +sausage,butter,pastry,vinegar,margarine +frankfurter,tropical fruit,whole milk,brown bread,mustard,baking powder,meat spreads,fruit/vegetable juice,shopping bags +other vegetables +beef,whole milk,yogurt,pastry +whole milk,butter,curd,processed cheese,rolls/buns,brown bread,misc. beverages,long life bakery product +tropical fruit,dessert,tidbits,pastry,waffles,cleaner +tropical fruit,canned vegetables,bottled beer,specialty chocolate +hamburger meat,other vegetables,yogurt,brown bread,pastry,cat food,abrasive cleaner,dishes +soda,canned beer +sausage,rolls/buns,soda,chocolate +hamburger meat,other vegetables,rolls/buns,spices,bottled water,hygiene articles,napkins +sausage,beef,whole milk +red/blush wine +beef,citrus fruit,berries,root vegetables,brown bread,detergent +hamburger meat,other vegetables,whole milk,frozen vegetables,domestic eggs,soda,dishes +frankfurter,sausage,long life bakery product,waffles +curd,dessert,soda,salty snack,waffles,cake bar,chocolate,shopping bags +pork,root vegetables,other vegetables,whole milk,dishes +frankfurter,meat,tropical fruit,onions,oil,baking powder,tea,cling film/bags +citrus fruit,other vegetables,rolls/buns,salty snack,shopping bags +other vegetables,yogurt,brown bread,instant coffee +berries,root vegetables,other vegetables,curd,butter milk,cream cheese ,roll products ,margarine,misc. beverages,specialty chocolate,detergent +sausage,tropical fruit,condensed milk,rolls/buns +hamburger meat +domestic eggs,rolls/buns,dish cleaner,hygiene articles +specialty chocolate +sausage,whole milk,yogurt,brown bread,cereals,bottled water,soda +citrus fruit,tropical fruit,grapes,other vegetables,frozen vegetables,pickled vegetables,fruit/vegetable juice,liquor,rum,liquor (appetizer),candy,cling film/bags +canned beer +chicken,pork,hamburger meat,pip fruit,nuts/prunes,root vegetables,other vegetables,whole milk,spread cheese,frozen potato products,brown bread,Instant food products,soda,fruit/vegetable juice,shopping bags +pork,berries,whipped/sour cream,beverages,flour,oil,soda,chocolate,newspapers +beverages,rolls/buns,soda,misc. beverages,liqueur,cake bar +berries,root vegetables,whole milk,beverages,sugar,soups,coffee,shopping bags +tropical fruit,root vegetables,other vegetables,domestic eggs,white bread,brown bread,canned vegetables,soda,fruit/vegetable juice,rum +chocolate +pork,whole milk,curd,yogurt,whipped/sour cream,cream cheese ,domestic eggs,white bread,sugar,baking powder,coffee,cocoa drinks,salty snack,waffles,candy,napkins +turkey,tropical fruit,pip fruit,root vegetables,other vegetables,curd,butter milk,yogurt,curd cheese,rice,vinegar,margarine +onions +bottled beer,liquor +other vegetables,yogurt,oil,dental care +citrus fruit,cake bar,baby cosmetics,shopping bags +frankfurter,beef,tropical fruit,pip fruit,butter,yogurt,dog food,long life bakery product,cookware,newspapers +soda,softener +frankfurter,bottled beer +canned beer +sausage,whole milk,brown bread,soda +citrus fruit,tropical fruit,whole milk,rolls/buns,bottled water,long life bakery product,decalcifier,newspapers +pastry,soda +sliced cheese,rolls/buns,margarine,soda,shopping bags +other vegetables,rolls/buns +canned beer +sausage,meat,pip fruit,other vegetables,yogurt,rolls/buns,brown bread,bottled water,soda,fruit/vegetable juice,newspapers +tropical fruit,pip fruit,root vegetables,whole milk,yogurt,rolls/buns,sweet spreads,cat food,pet care,hygiene articles,shopping bags +whole milk +tropical fruit,yogurt,processed cheese,rolls/buns,bottled water,soda,misc. beverages,female sanitary products +whole milk,margarine,pot plants +domestic eggs +bottled beer,shopping bags +pork,berries,whole milk,dessert,whipped/sour cream,fruit/vegetable juice,candy +other vegetables,whole milk,butter,yogurt,cream cheese ,vinegar,pet care,fruit/vegetable juice,cling film/bags +chicken,canned beer,shopping bags +sausage,chicken,other vegetables,whole milk,yogurt,cream cheese ,brown bread,soda +rolls/buns,sugar,sweet spreads,chewing gum,newspapers +other vegetables,whole milk,soda,chocolate +soda,bottled beer +tropical fruit,grapes,other vegetables,bottled beer,popcorn +pip fruit,nuts/prunes,curd,frozen dessert,sweet spreads,light bulbs +pip fruit,pastry +onions,yogurt,frozen vegetables +yogurt,pastry +hard cheese,soda +other vegetables,yogurt,whipped/sour cream,newspapers +whole milk,cat food,bottled water,napkins +UHT-milk,margarine,bottled water,soda +processed cheese +whole milk,yogurt,frozen potato products,brown bread,napkins +tropical fruit,cat food,bottled water,soda,bottled beer,white wine,hygiene articles +frankfurter,photo/film +herbs +sausage,beef,hamburger meat,yogurt,soft cheese,cream cheese ,domestic eggs,rolls/buns,pastry,instant coffee +UHT-milk,domestic eggs,sugar,fruit/vegetable juice,newspapers +whole milk,curd,dessert,shopping bags +chicken,citrus fruit,tropical fruit,other vegetables,whole milk,whipped/sour cream,rolls/buns,pastry,flour,specialty chocolate +citrus fruit,root vegetables,other vegetables,whipped/sour cream,processed cheese,rolls/buns,newspapers +whole milk,frozen dessert,margarine,snack products +other vegetables,specialty cheese +frozen meals,soda,shopping bags +rolls/buns,brown bread +waffles +whole milk,beverages +canned beer +other vegetables,dessert,candy,abrasive cleaner,dishes,cling film/bags,candles +beef,root vegetables,herbs,packaged fruit/vegetables,whipped/sour cream,cream cheese ,vinegar,oil,margarine,soups,pickled vegetables,meat spreads,canned fish,fruit/vegetable juice,candles +rolls/buns,soda,canned beer +ham,root vegetables,other vegetables,dessert,brown bread,pasta,margarine,bottled water,fruit/vegetable juice,red/blush wine +frankfurter,sausage,pork,sliced cheese,pastry,margarine,newspapers +bottled water +specialty chocolate +packaged fruit/vegetables +photo/film +coffee +candles +condensed milk,soda +dessert,UHT-milk,cream cheese ,domestic eggs,brown bread,coffee,bottled water,soda,shopping bags +root vegetables,shopping bags +white wine +rolls/buns +tropical fruit,pip fruit,UHT-milk,sugar,cat food,coffee,long life bakery product,chocolate,hygiene articles diff --git a/data/images/annotations.json b/data/images/annotations.json new file mode 100644 index 00000000..25ea91bd --- /dev/null +++ b/data/images/annotations.json @@ -0,0 +1 @@ +[{"file": "f1/fruits1f.png", "new_label": "True"}, {"file": "f1/fruits1.png", "new_label": "False"}, {"file": "f1/fruits1b.png", "new_label": "True"}, {"file": "f1/fruits1c.png", "new_label": "True"}, {"file": "f1/fruits1d.png", "new_label": "True"}, {"file": "f1/fruits1e.png", "new_label": "True"}, {"file": "f2/fruits2.png", "new_label": "False"}, {"file": "f2/fruits2f.png", "new_label": "False"}, {"file": "f2/fruits2d.png", "new_label": "False"}, {"file": "f2/fruits2e.png", "new_label": "False"}, {"file": "f2/fruits2b.png", "new_label": "False"}, {"file": "f2/fruits2c.png", "new_label": "False"}] diff --git a/data/images/annotations_compact.json b/data/images/annotations_compact.json new file mode 100644 index 00000000..294de440 --- /dev/null +++ b/data/images/annotations_compact.json @@ -0,0 +1,2 @@ +[{"file": "f1/fruits1f.png", "my_regions": "[[\"region1\" 0.2 0.2 0.4 0.4]]"}, + {"file": "f1/fruits1.png", "my_regions": "[[\"region2\" 0.3 0.3 0.5 0.5] [\"region1\" 0.6 0.6 0.8 0.8]]"}] diff --git a/data/images/annotations_list.json b/data/images/annotations_list.json new file mode 100644 index 00000000..ecfee3db --- /dev/null +++ b/data/images/annotations_list.json @@ -0,0 +1,2 @@ +[{"file": "f1/fruits1f.png", "my_regions": [{"label": "region1", "xmin": 0.2, "ymin": 0.2, "xmax": 0.4, "ymax": 0.4}]}, + {"file": "f1/fruits1.png", "my_regions": [{"label": "region2", "xmin": 0.2, "ymin": 0.2, "xmax": 0.4, "ymax": 0.4}, {"label": "region1", "xmin": 0.5, "ymin": 0.5, "xmax": 0.7, "ymax": 0.7}]}] diff --git a/data/images/cats/pexels-pixabay-33358.jpg b/data/images/cats/pexels-pixabay-33358.jpg new file mode 100644 index 00000000..0d0d8c73 Binary files /dev/null and b/data/images/cats/pexels-pixabay-33358.jpg differ diff --git a/data/images/fruits_hist.zip b/data/images/fruits_hist.zip new file mode 100644 index 00000000..14f21ac8 Binary files /dev/null and b/data/images/fruits_hist.zip differ diff --git a/data/images/fruits_name.zip b/data/images/fruits_name.zip new file mode 100644 index 00000000..e7d75cec Binary files /dev/null and b/data/images/fruits_name.zip differ diff --git a/data/images/metadata.json b/data/images/metadata.json new file mode 100644 index 00000000..ea5d1ba6 --- /dev/null +++ b/data/images/metadata.json @@ -0,0 +1,5 @@ +{"description": "Fruit images to test colour distributions", + "images_file": "./fruits_hist.zip", + "new_fields": [{"name": "new_label", "optype": "categorical"}], + "source_id": null, + "annotations": "./annotations.json"} diff --git a/data/images/metadata_compact.json b/data/images/metadata_compact.json new file mode 100644 index 00000000..45db412f --- /dev/null +++ b/data/images/metadata_compact.json @@ -0,0 +1,5 @@ +{"description": "Fruit images to test colour distributions with regions", + "images_file": "./fruits_hist.zip", + "new_fields": [{"name": "my_regions", "optype": "regions"}], + "source_id": null, + "annotations": "./annotations_compact.json"} diff --git a/data/images/metadata_list.json b/data/images/metadata_list.json new file mode 100644 index 00000000..1bf61c67 --- /dev/null +++ b/data/images/metadata_list.json @@ -0,0 +1,5 @@ +{"description": "Fruit images to test colour distributions with regions", + "images_file": "./fruits_hist.zip", + "new_fields": [{"name": "my_regions", "optype": "regions"}], + "source_id": null, + "annotations": "./annotations_list.json"} diff --git a/data/imgs_deepnet.zip b/data/imgs_deepnet.zip new file mode 100644 index 00000000..44f752b9 Binary files /dev/null and b/data/imgs_deepnet.zip differ diff --git a/data/iris_anomalous.csv b/data/iris_anomalous.csv new file mode 100644 index 00000000..59a1f3ac --- /dev/null +++ b/data/iris_anomalous.csv @@ -0,0 +1,152 @@ +sepal length,sepal width,petal length,petal width,species +5.1,3.5,1.4,0.2,Iris-setosa +4.9,3.0,1.4,0.2,Iris-setosa +4.7,3.2,1.3,0.2,Iris-setosa +4.6,3.1,1.5,0.2,Iris-setosa +5.0,3.6,1.4,0.2,Iris-setosa +5.4,3.9,1.7,0.4,Iris-setosa +4.6,3.4,1.4,0.3,Iris-setosa +5.0,3.4,1.5,0.2,Iris-setosa +4.4,2.9,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.4,3.7,1.5,0.2,Iris-setosa +4.8,3.4,1.6,0.2,Iris-setosa +4.8,3.0,1.4,0.1,Iris-setosa +4.3,3.0,1.1,0.1,Iris-setosa +5.8,4.0,1.2,0.2,Iris-setosa +5.7,4.4,1.5,0.4,Iris-setosa +5.4,3.9,1.3,0.4,Iris-setosa +5.1,3.5,1.4,0.3,Iris-setosa +5.7,3.8,1.7,0.3,Iris-setosa +5.1,3.8,1.5,0.3,Iris-setosa +5.4,3.4,1.7,0.2,Iris-setosa +5.1,3.7,1.5,0.4,Iris-setosa +4.6,3.6,1.0,0.2,Iris-setosa +5.1,3.3,1.7,0.5,Iris-setosa +4.8,3.4,1.9,0.2,Iris-setosa +5.0,3.0,1.6,0.2,Iris-setosa +5.0,3.4,1.6,0.4,Iris-setosa +5.2,3.5,1.5,0.2,Iris-setosa +5.2,3.4,1.4,0.2,Iris-setosa +4.7,3.2,1.6,0.2,Iris-setosa +4.8,3.1,1.6,0.2,Iris-setosa +5.4,3.4,1.5,0.4,Iris-setosa +5.2,4.1,1.5,0.1,Iris-setosa +5.5,4.2,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.2,Iris-setosa +5.0,3.2,1.2,0.2,Iris-setosa +5.5,3.5,1.3,0.2,Iris-setosa +4.9,3.6,1.4,0.1,Iris-setosa +4.4,3.0,1.3,0.2,Iris-setosa +5.1,3.4,1.5,0.2,Iris-setosa +5.0,3.5,1.3,0.3,Iris-setosa +4.5,2.3,1.3,0.3,Iris-setosa +4.4,3.2,1.3,0.2,Iris-setosa +5.0,3.5,1.6,0.6,Iris-setosa +5.1,3.8,1.9,0.4,Iris-setosa +4.8,3.0,1.4,0.3,Iris-setosa +5.1,3.8,1.6,0.2,Iris-setosa +4.6,3.2,1.4,0.2,Iris-setosa +5.3,3.7,1.5,0.2,Iris-setosa +5.0,3.3,1.4,0.2,Iris-setosa +7.0,3.2,4.7,1.4,Iris-versicolor +6.4,3.2,4.5,1.5,Iris-versicolor +6.9,3.1,4.9,1.5,Iris-versicolor +5.5,2.3,4.0,1.3,Iris-versicolor +6.5,2.8,4.6,1.5,Iris-versicolor +5.7,2.8,4.5,1.3,Iris-versicolor +6.3,3.3,4.7,1.6,Iris-versicolor +4.9,2.4,3.3,1.0,Iris-versicolor +6.6,2.9,4.6,1.3,Iris-versicolor +5.2,2.7,3.9,1.4,Iris-versicolor +5.0,2.0,3.5,1.0,Iris-versicolor +5.9,3.0,4.2,1.5,Iris-versicolor +6.0,2.2,4.0,1.0,Iris-versicolor +6.1,2.9,4.7,1.4,Iris-versicolor +5.6,2.9,3.6,1.3,Iris-versicolor +6.7,3.1,4.4,1.4,Iris-versicolor +5.6,3.0,4.5,1.5,Iris-versicolor +5.8,2.7,4.1,1.0,Iris-versicolor +6.2,2.2,4.5,1.5,Iris-versicolor +5.6,2.5,3.9,1.1,Iris-versicolor +5.9,3.2,4.8,1.8,Iris-versicolor +6.1,2.8,4.0,1.3,Iris-versicolor +6.3,2.5,4.9,1.5,Iris-versicolor +6.1,2.8,4.7,1.2,Iris-versicolor +6.4,2.9,4.3,1.3,Iris-versicolor +6.6,3.0,4.4,1.4,Iris-versicolor +6.8,2.8,4.8,1.4,Iris-versicolor +6.7,3.0,5.0,1.7,Iris-versicolor +6.0,2.9,4.5,1.5,Iris-versicolor +5.7,2.6,3.5,1.0,Iris-versicolor +5.5,2.4,3.8,1.1,Iris-versicolor +5.5,2.4,3.7,1.0,Iris-versicolor +5.8,2.7,3.9,1.2,Iris-versicolor +6.0,2.7,5.1,1.6,Iris-versicolor +5.4,3.0,4.5,1.5,Iris-versicolor +6.0,3.4,4.5,1.6,Iris-versicolor +6.7,3.1,4.7,1.5,Iris-versicolor +6.3,2.3,4.4,1.3,Iris-versicolor +5.6,3.0,4.1,1.3,Iris-versicolor +5.5,2.5,4.0,1.3,Iris-versicolor +5.5,2.6,4.4,1.2,Iris-versicolor +6.1,3.0,4.6,1.4,Iris-versicolor +5.8,2.6,4.0,1.2,Iris-versicolor +5.0,2.3,3.3,1.0,Iris-versicolor +5.6,2.7,4.2,1.3,Iris-versicolor +5.7,3.0,4.2,1.2,Iris-versicolor +5.7,2.9,4.2,1.3,Iris-versicolor +6.2,2.9,4.3,1.3,Iris-versicolor +5.1,2.5,3.0,1.1,Iris-versicolor +5.7,2.8,4.1,1.3,Iris-versicolor +6.3,3.3,6.0,2.5,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +7.1,3.0,5.9,2.1,Iris-virginica +6.3,2.9,5.6,1.8,Iris-virginica +6.5,3.0,5.8,2.2,Iris-virginica +7.6,3.0,6.6,2.1,Iris-virginica +4.9,2.5,4.5,1.7,Iris-virginica +7.3,2.9,6.3,1.8,Iris-virginica +6.7,2.5,5.8,1.8,Iris-virginica +7.2,3.6,6.1,2.5,Iris-virginica +6.5,3.2,5.1,2.0,Iris-virginica +6.4,2.7,5.3,1.9,Iris-virginica +6.8,3.0,5.5,2.1,Iris-virginica +5.7,2.5,5.0,2.0,Iris-virginica +5.8,2.8,5.1,2.4,Iris-virginica +6.4,3.2,5.3,2.3,Iris-virginica +6.5,3.0,5.5,1.8,Iris-virginica +7.7,3.8,6.7,2.2,Iris-virginica +7.7,2.6,6.9,2.3,Iris-virginica +6.0,2.2,5.0,1.5,Iris-virginica +6.9,3.2,5.7,2.3,Iris-virginica +5.6,2.8,4.9,2.0,Iris-virginica +7.7,2.8,6.7,2.0,Iris-virginica +6.3,2.7,4.9,1.8,Iris-virginica +6.7,3.3,5.7,2.1,Iris-virginica +7.2,3.2,6.0,1.8,Iris-virginica +6.2,2.8,4.8,1.8,Iris-virginica +6.1,3.0,4.9,1.8,Iris-virginica +6.4,2.8,5.6,2.1,Iris-virginica +7.2,3.0,5.8,1.6,Iris-virginica +7.4,2.8,6.1,1.9,Iris-virginica +7.9,3.8,6.4,2.0,Iris-virginica +6.4,2.8,5.6,2.2,Iris-virginica +6.3,2.8,5.1,1.5,Iris-virginica +6.1,2.6,5.6,1.4,Iris-virginica +7.7,3.0,6.1,2.3,Iris-virginica +6.3,3.4,5.6,2.4,Iris-virginica +6.4,3.1,5.5,1.8,Iris-virginica +6.0,3.0,4.8,1.8,Iris-virginica +6.9,3.1,5.4,2.1,Iris-virginica +6.7,3.1,5.6,2.4,Iris-virginica +6.9,3.1,5.1,2.3,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +6.8,3.2,5.9,2.3,Iris-virginica +6.7,3.3,5.7,2.5,Iris-virginica +6.7,3.0,5.2,2.3,Iris-virginica +6.3,2.5,5.0,1.9,Iris-virginica +6.5,3.0,5.2,2.0,Iris-virginica +6.2,3.4,5.4,2.3,Iris-virginica +5.9,3.0,5.1,1.8,Iris-virginica +-10,-10,-10,-10,{"a"} diff --git a/data/iris_missing.csv b/data/iris_missing.csv new file mode 100644 index 00000000..00e0f675 --- /dev/null +++ b/data/iris_missing.csv @@ -0,0 +1,7 @@ +sepal length,sepal width,petal length,petal width,species +foo,3.5,1.4,0.2,Iris-setosa +4.9,3.0,1.4,0.2,Iris-setosa +6.3,3.3,6.0,2.5,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +7.0,3.2,4.7,1.4,Iris-versicolor +6.4,3.2,4.5,1.5,Iris-versicolor diff --git a/data/iris_missing2.csv b/data/iris_missing2.csv new file mode 100644 index 00000000..75ac323f --- /dev/null +++ b/data/iris_missing2.csv @@ -0,0 +1,48 @@ +sepal length,sepal width,petal length,petal width,species +5.1,3.5,,0.2,Iris-setosa +4.9,3.0,,0.2,Iris-setosa +4.7,3.2,,0.2,Iris-setosa +4.6,3.1,,0.2,Iris-setosa +5.0,3.6,,0.2,Iris-setosa +5.4,3.9,,0.4,Iris-setosa +4.6,3.4,,0.3,Iris-setosa +5.0,3.4,,0.2,Iris-setosa +4.4,2.9,,0.2,Iris-setosa +4.9,3.1,,0.1,Iris-setosa +5.4,3.7,,0.2,Iris-setosa +4.8,3.4,,0.2,Iris-setosa +4.8,3.0,,0.1,Iris-setosa +4.3,3.0,,0.1,Iris-setosa +5.8,4.0,,0.2,Iris-setosa +5.7,4.4,,0.4,Iris-setosa +7.0,3.2,1,1.4,Iris-versicolor +6.4,3.2,1,1.5,Iris-versicolor +6.9,3.1,1,1.5,Iris-versicolor +5.5,2.3,1,1.3,Iris-versicolor +6.5,2.8,1,1.5,Iris-versicolor +5.7,2.8,1,1.3,Iris-versicolor +6.3,3.3,1,1.6,Iris-versicolor +4.9,2.4,3.3,1.0,Iris-versicolor +6.6,2.9,4.6,1.3,Iris-versicolor +5.2,2.7,3.9,1.4,Iris-versicolor +5.0,2.0,3.5,1.0,Iris-versicolor +5.9,3.0,4.2,1.5,Iris-versicolor +6.0,2.2,4.0,1.0,Iris-versicolor +6.1,2.9,4.7,1.4,Iris-versicolor +6.4,2.7,5.3,1.9,Iris-virginica +6.8,3.0,5.5,2.1,Iris-virginica +5.7,2.5,5.0,2.0,Iris-virginica +5.8,2.8,5.1,2.4,Iris-virginica +6.4,3.2,5.3,2.3,Iris-virginica +6.5,3.0,5.5,1.8,Iris-virginica +7.7,3.8,6.7,2.2,Iris-virginica +7.7,2.6,6.9,2.3,Iris-virginica +6.0,2.2,5.0,1.5,Iris-virginica +6.9,3.2,5.7,2.3,Iris-virginica +5.6,2.8,4.9,2.0,Iris-virginica +7.7,2.8,6.7,2.0,Iris-virginica +6.3,2.7,4.9,1.8,Iris-virginica +6.7,3.3,5.7,2.1,Iris-virginica +7.2,3.2,6.0,1.8,Iris-virginica +6.2,2.8,4.8,1.8,Iris-virginica +6.1,3.0,4.9,1.8,Iris-virginica diff --git a/data/iris_model.json b/data/iris_model.json new file mode 100644 index 00000000..4c2d8b2a --- /dev/null +++ b/data/iris_model.json @@ -0,0 +1 @@ +{"code": 200, "resource": "model/53c872fb37203f7085000ddd", "location": "https://localhost:1026/andromeda/model/53c872fb37203f7085000ddd", "object": {"size": 3686, "code": 200, "locale": "en_US", "node_threshold": 512, "private": true, "dataset": "dataset/53c872f837203f7085000dd8", "dataset_field_types": {"categorical": 1, "text": 0, "preferred": 5, "datetime": 0, "numeric": 4, "total": 5}, "fields_meta": {"count": 4, "query_total": 4, "total": 5, "limit": -1, "offset": 0}, "seed": "BigML, Machine Learning made easy", "ensemble_id": "", "shared_hash": "eNXNyQYkmnUh7gjrvC8C9ITGYIP", "white_box": false, "randomize": false, "balance_objective": false, "number_of_predictions": 0, "category": 12, "rows": 120, "out_of_bag": false, "source": "source/53c872f637203f7085000dd4", "ordering": 0, "ensemble_index": 0, "range": [1, 150], "credits_per_prediction": 0.0, "number_of_batchpredictions": 0, "number_of_public_predictions": 0, "sample_rate": 0.8, "objective_fields": ["000004"], "ensemble": false, "columns": 5, "selective_pruning": true, "status": {"progress": 1.0, "message": "The model has been created", "code": 5, "elapsed": 23}, "updated": "2014-07-18T01:06:15.997000", "description": "Created using BigMLer", "tags": ["BigMLer", "BigMLer_FriJul1814_030558"], "price": 0.0, "sharing_key": "556c1626e6477976acd76cfc0d3eb63a22ff5be9", "excluded_fields": [], "credits": 0.01406097412109375, "dataset_type": 0, "stat_pruning": true, "objective_field": "000004", "subscription": false, "resource": "model/53c872fb37203f7085000ddd", "name": "BigMLer_FriJul1814_030558", "created": "2014-07-18T01:06:03.779000", "dataset_status": false, "source_status": false, "number_of_evaluations": 0, "max_columns": 5, "max_rows": 150, "input_fields": ["000000", "000001", "000002", "000003"], "shared": true, "model": {"kind": "mtree", "importance": [["000002", 0.69212], ["000003", 0.29917], ["000001", 0.00871]], "fields": {"000004": {"optype": "categorical", "name": "species", "datatype": "string", "preferred": true, "summary": {"missing_count": 0, "categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]]}, "term_analysis": {"enabled": true}, "column_number": 4, "order": 3}, "000002": {"optype": "numeric", "name": "petal length", "datatype": "double", "preferred": true, "summary": {"sum_squares": 2582.71, "splits": [1.25138, 1.32426, 1.37171, 1.40962, 1.44567, 1.48173, 1.51859, 1.56301, 1.6255, 1.74645, 3.23033, 3.675, 3.94203, 4.0469, 4.18243, 4.34142, 4.45309, 4.51823, 4.61771, 4.72566, 4.83445, 4.93363, 5.03807, 5.1064, 5.20938, 5.43979, 5.5744, 5.6646, 5.81496, 6.02913, 6.38125], "missing_count": 0, "sum": 563.7, "median": 4.34142, "maximum": 6.9, "minimum": 1, "standard_deviation": 1.7653, "variance": 3.11628, "population": 150, "bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.46, 5], [5.6, 6], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "mean": 3.758}, "column_number": 2, "order": 2}, "000003": {"optype": "numeric", "name": "petal width", "datatype": "double", "preferred": true, "summary": {"sum_squares": 302.33, "missing_count": 0, "sum": 179.9, "median": 1.32848, "maximum": 2.5, "minimum": 0.1, "standard_deviation": 0.76224, "variance": 0.58101, "counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "population": 150, "mean": 1.19933}, "column_number": 3, "order": 1}, "000001": {"optype": "numeric", "name": "sepal width", "datatype": "double", "preferred": true, "summary": {"sum_squares": 1430.4, "missing_count": 0, "sum": 458.6, "median": 3.02044, "maximum": 4.4, "minimum": 2, "standard_deviation": 0.43587, "variance": 0.18998, "counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "population": 150, "mean": 3.05733}, "column_number": 1, "order": 0}}, "node_threshold": 512, "model_fields": {"000004": {"optype": "categorical", "name": "species", "datatype": "string", "term_analysis": {"enabled": true}, "preferred": true, "column_number": 4}, "000002": {"datatype": "double", "optype": "numeric", "name": "petal length", "preferred": true, "column_number": 2}, "000003": {"datatype": "double", "optype": "numeric", "name": "petal width", "preferred": true, "column_number": 3}, "000001": {"datatype": "double", "optype": "numeric", "name": "sepal width", "preferred": true, "column_number": 1}}, "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "root": {"count": 120, "confidence": 0.27052, "predicate": true, "id": 0, "objective_summary": {"categories": [["Iris-versicolor", 42], ["Iris-virginica", 41], ["Iris-setosa", 37]]}, "output": "Iris-versicolor", "children": [{"count": 83, "confidence": 0.4006, "predicate": {"operator": ">", "field": "000002", "value": 2.35}, "id": 1, "objective_summary": {"categories": [["Iris-versicolor", 42], ["Iris-virginica", 41]]}, "output": "Iris-versicolor", "children": [{"count": 38, "confidence": 0.86505, "predicate": {"operator": ">", "field": "000003", "value": 1.75}, "id": 2, "objective_summary": {"categories": [["Iris-virginica", 37], ["Iris-versicolor", 1]]}, "output": "Iris-virginica", "children": [{"count": 35, "confidence": 0.9011, "predicate": {"operator": ">", "field": "000002", "value": 4.85}, "objective_summary": {"categories": [["Iris-virginica", 35]]}, "output": "Iris-virginica", "id": 3}, {"count": 3, "confidence": 0.20765, "predicate": {"operator": "<=", "field": "000002", "value": 4.85}, "id": 4, "objective_summary": {"categories": [["Iris-virginica", 2], ["Iris-versicolor", 1]]}, "output": "Iris-virginica", "children": [{"count": 1, "confidence": 0.20654, "predicate": {"operator": ">", "field": "000001", "value": 3.1}, "objective_summary": {"categories": [["Iris-versicolor", 1]]}, "output": "Iris-versicolor", "id": 5}, {"count": 2, "confidence": 0.34237, "predicate": {"operator": "<=", "field": "000001", "value": 3.1}, "objective_summary": {"categories": [["Iris-virginica", 2]]}, "output": "Iris-virginica", "id": 6}]}]}, {"count": 45, "confidence": 0.79266, "predicate": {"operator": "<=", "field": "000003", "value": 1.75}, "id": 7, "objective_summary": {"categories": [["Iris-versicolor", 41], ["Iris-virginica", 4]]}, "output": "Iris-versicolor", "children": [{"count": 5, "confidence": 0.23072, "predicate": {"operator": ">", "field": "000002", "value": 4.95}, "id": 8, "objective_summary": {"categories": [["Iris-virginica", 3], ["Iris-versicolor", 2]]}, "output": "Iris-virginica", "children": [{"count": 3, "confidence": 0.20765, "predicate": {"operator": ">", "field": "000003", "value": 1.55}, "id": 9, "objective_summary": {"categories": [["Iris-versicolor", 2], ["Iris-virginica", 1]]}, "output": "Iris-versicolor", "children": [{"count": 1, "confidence": 0.20654, "predicate": {"operator": ">", "field": "000002", "value": 5.45}, "objective_summary": {"categories": [["Iris-virginica", 1]]}, "output": "Iris-virginica", "id": 10}, {"count": 2, "confidence": 0.34237, "predicate": {"operator": "<=", "field": "000002", "value": 5.45}, "objective_summary": {"categories": [["Iris-versicolor", 2]]}, "output": "Iris-versicolor", "id": 11}]}, {"count": 2, "confidence": 0.34237, "predicate": {"operator": "<=", "field": "000003", "value": 1.55}, "objective_summary": {"categories": [["Iris-virginica", 2]]}, "output": "Iris-virginica", "id": 12}]}, {"count": 40, "confidence": 0.87118, "predicate": {"operator": "<=", "field": "000002", "value": 4.95}, "id": 13, "objective_summary": {"categories": [["Iris-versicolor", 39], ["Iris-virginica", 1]]}, "output": "Iris-versicolor", "children": [{"count": 1, "confidence": 0.20654, "predicate": {"operator": ">", "field": "000003", "value": 1.65}, "objective_summary": {"categories": [["Iris-virginica", 1]]}, "output": "Iris-virginica", "id": 14}, {"count": 39, "confidence": 0.91033, "predicate": {"operator": "<=", "field": "000003", "value": 1.65}, "objective_summary": {"categories": [["Iris-versicolor", 39]]}, "output": "Iris-versicolor", "id": 15}]}]}]}, {"count": 37, "confidence": 0.90594, "predicate": {"operator": "<=", "field": "000002", "value": 2.35}, "objective_summary": {"categories": [["Iris-setosa", 37]]}, "output": "Iris-setosa", "id": 16}]}, "distribution": {"training": {"categories": [["Iris-setosa", 37], ["Iris-versicolor", 42], ["Iris-virginica", 41]]}, "predictions": {"categories": [["Iris-setosa", 37], ["Iris-versicolor", 42], ["Iris-virginica", 41]]}}, "depth_threshold": 512}, "replacement": false}, "error": null} \ No newline at end of file diff --git a/data/iris_sp_chars.csv b/data/iris_sp_chars.csv new file mode 100644 index 00000000..bd5c4890 Binary files /dev/null and b/data/iris_sp_chars.csv differ diff --git a/data/iris_unbalanced.csv b/data/iris_unbalanced.csv new file mode 100644 index 00000000..951e3ead --- /dev/null +++ b/data/iris_unbalanced.csv @@ -0,0 +1,113 @@ +sepal length,sepal width,petal length,petal width,species +5.1,3.5,1.4,0.2,Iris-setosa +4.9,3.0,1.4,0.2,Iris-setosa +4.7,3.2,1.3,0.2,Iris-setosa +4.6,3.1,1.5,0.2,Iris-setosa +5.0,3.6,1.4,0.2,Iris-setosa +5.4,3.9,1.7,0.4,Iris-setosa +4.6,3.4,1.4,0.3,Iris-setosa +5.0,3.4,1.5,0.2,Iris-setosa +4.4,2.9,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.4,3.7,1.5,0.2,Iris-setosa +4.8,3.4,1.6,0.2,Iris-setosa +7.0,3.2,4.7,1.4,Iris-versicolor +6.4,3.2,4.5,1.5,Iris-versicolor +6.9,3.1,4.9,1.5,Iris-versicolor +5.5,2.3,4.0,1.3,Iris-versicolor +6.5,2.8,4.6,1.5,Iris-versicolor +5.7,2.8,4.5,1.3,Iris-versicolor +6.3,3.3,4.7,1.6,Iris-versicolor +4.9,2.4,3.3,1.0,Iris-versicolor +6.6,2.9,4.6,1.3,Iris-versicolor +5.2,2.7,3.9,1.4,Iris-versicolor +5.0,2.0,3.5,1.0,Iris-versicolor +5.9,3.0,4.2,1.5,Iris-versicolor +6.0,2.2,4.0,1.0,Iris-versicolor +6.1,2.9,4.7,1.4,Iris-versicolor +5.6,2.9,3.6,1.3,Iris-versicolor +6.7,3.1,4.4,1.4,Iris-versicolor +5.6,3.0,4.5,1.5,Iris-versicolor +5.8,2.7,4.1,1.0,Iris-versicolor +6.2,2.2,4.5,1.5,Iris-versicolor +5.6,2.5,3.9,1.1,Iris-versicolor +5.9,3.2,4.8,1.8,Iris-versicolor +6.1,2.8,4.0,1.3,Iris-versicolor +6.3,2.5,4.9,1.5,Iris-versicolor +6.1,2.8,4.7,1.2,Iris-versicolor +6.4,2.9,4.3,1.3,Iris-versicolor +6.6,3.0,4.4,1.4,Iris-versicolor +6.8,2.8,4.8,1.4,Iris-versicolor +6.7,3.0,5.0,1.7,Iris-versicolor +6.0,2.9,4.5,1.5,Iris-versicolor +5.7,2.6,3.5,1.0,Iris-versicolor +5.5,2.4,3.8,1.1,Iris-versicolor +5.5,2.4,3.7,1.0,Iris-versicolor +5.8,2.7,3.9,1.2,Iris-versicolor +6.0,2.7,5.1,1.6,Iris-versicolor +5.4,3.0,4.5,1.5,Iris-versicolor +6.0,3.4,4.5,1.6,Iris-versicolor +6.7,3.1,4.7,1.5,Iris-versicolor +6.3,2.3,4.4,1.3,Iris-versicolor +5.6,3.0,4.1,1.3,Iris-versicolor +5.5,2.5,4.0,1.3,Iris-versicolor +5.5,2.6,4.4,1.2,Iris-versicolor +6.1,3.0,4.6,1.4,Iris-versicolor +5.8,2.6,4.0,1.2,Iris-versicolor +5.0,2.3,3.3,1.0,Iris-versicolor +5.6,2.7,4.2,1.3,Iris-versicolor +5.7,3.0,4.2,1.2,Iris-versicolor +5.7,2.9,4.2,1.3,Iris-versicolor +6.2,2.9,4.3,1.3,Iris-versicolor +5.1,2.5,3.0,1.1,Iris-versicolor +5.7,2.8,4.1,1.3,Iris-versicolor +6.3,3.3,6.0,2.5,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +7.1,3.0,5.9,2.1,Iris-virginica +6.3,2.9,5.6,1.8,Iris-virginica +6.5,3.0,5.8,2.2,Iris-virginica +7.6,3.0,6.6,2.1,Iris-virginica +4.9,2.5,4.5,1.7,Iris-virginica +7.3,2.9,6.3,1.8,Iris-virginica +6.7,2.5,5.8,1.8,Iris-virginica +7.2,3.6,6.1,2.5,Iris-virginica +6.5,3.2,5.1,2.0,Iris-virginica +6.4,2.7,5.3,1.9,Iris-virginica +6.8,3.0,5.5,2.1,Iris-virginica +5.7,2.5,5.0,2.0,Iris-virginica +5.8,2.8,5.1,2.4,Iris-virginica +6.4,3.2,5.3,2.3,Iris-virginica +6.5,3.0,5.5,1.8,Iris-virginica +7.7,3.8,6.7,2.2,Iris-virginica +7.7,2.6,6.9,2.3,Iris-virginica +6.0,2.2,5.0,1.5,Iris-virginica +6.9,3.2,5.7,2.3,Iris-virginica +5.6,2.8,4.9,2.0,Iris-virginica +7.7,2.8,6.7,2.0,Iris-virginica +6.3,2.7,4.9,1.8,Iris-virginica +6.7,3.3,5.7,2.1,Iris-virginica +7.2,3.2,6.0,1.8,Iris-virginica +6.2,2.8,4.8,1.8,Iris-virginica +6.1,3.0,4.9,1.8,Iris-virginica +6.4,2.8,5.6,2.1,Iris-virginica +7.2,3.0,5.8,1.6,Iris-virginica +7.4,2.8,6.1,1.9,Iris-virginica +7.9,3.8,6.4,2.0,Iris-virginica +6.4,2.8,5.6,2.2,Iris-virginica +6.3,2.8,5.1,1.5,Iris-virginica +6.1,2.6,5.6,1.4,Iris-virginica +7.7,3.0,6.1,2.3,Iris-virginica +6.3,3.4,5.6,2.4,Iris-virginica +6.4,3.1,5.5,1.8,Iris-virginica +6.0,3.0,4.8,1.8,Iris-virginica +6.9,3.1,5.4,2.1,Iris-virginica +6.7,3.1,5.6,2.4,Iris-virginica +6.9,3.1,5.1,2.3,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +6.8,3.2,5.9,2.3,Iris-virginica +6.7,3.3,5.7,2.5,Iris-virginica +6.7,3.0,5.2,2.3,Iris-virginica +6.3,2.5,5.0,1.9,Iris-virginica +6.5,3.0,5.2,2.0,Iris-virginica +6.2,3.4,5.4,2.3,Iris-virginica +5.9,3.0,5.1,1.8,Iris-virginica diff --git a/data/missings_cat.csv b/data/missings_cat.csv new file mode 100644 index 00000000..d4ba0909 --- /dev/null +++ b/data/missings_cat.csv @@ -0,0 +1,8 @@ +x1,x2,y +1,3,positive +2,4,positive +7,0,positive +0,1,positive +3,5,negative +8,2,negative +0,1,negative diff --git a/data/missings_reg.csv b/data/missings_reg.csv new file mode 100644 index 00000000..def9c9a2 --- /dev/null +++ b/data/missings_reg.csv @@ -0,0 +1,8 @@ +x1,x2,y +1,3,1 +2,4,1 +7,0,1 +0,1,1 +3,5,2 +8,2,2 +0,1,2 diff --git a/data/model/data_distribution_diabetes.txt b/data/model/data_distribution_diabetes.txt new file mode 100644 index 00000000..e90b70a5 --- /dev/null +++ b/data/model/data_distribution_diabetes.txt @@ -0,0 +1,2 @@ +[false,119] +[true,81] \ No newline at end of file diff --git a/data/model/data_distribution_grades.txt b/data/model/data_distribution_grades.txt new file mode 100644 index 00000000..96c12535 --- /dev/null +++ b/data/model/data_distribution_grades.txt @@ -0,0 +1,32 @@ +[28.06,1] +[34.44,1] +[35.97,2] +[39.72,1] +[43.33,1] +[45.56,2] +[47.41,3] +[49.39833,6] +[50.83,2] +[52.5,3] +[55.8325,4] +[58.055,2] +[60.975,2] +[63.332,5] +[65.21,4] +[66.66667,3] +[68.24,3] +[72.22,2] +[73.89,1] +[75.56,1] +[77.5,1] +[79.585,4] +[82.87,3] +[85.28,2] +[87.22,1] +[89.39,5] +[90.97,2] +[92.5,2] +[94.72,2] +[99.17,1] +[102.03667,3] +[108.335,2] \ No newline at end of file diff --git a/data/model/data_distribution_iris.txt b/data/model/data_distribution_iris.txt new file mode 100644 index 00000000..ee958067 --- /dev/null +++ b/data/model/data_distribution_iris.txt @@ -0,0 +1,3 @@ +[Iris-setosa,50] +[Iris-versicolor,50] +[Iris-virginica,50] \ No newline at end of file diff --git a/data/model/data_distribution_iris_missing2.txt b/data/model/data_distribution_iris_missing2.txt new file mode 100644 index 00000000..bf37e3a3 --- /dev/null +++ b/data/model/data_distribution_iris_missing2.txt @@ -0,0 +1,3 @@ +[Iris-setosa,16] +[Iris-versicolor,14] +[Iris-virginica,17] \ No newline at end of file diff --git a/data/model/data_distribution_iris_sp_chars.txt b/data/model/data_distribution_iris_sp_chars.txt new file mode 100644 index 00000000..ee958067 --- /dev/null +++ b/data/model/data_distribution_iris_sp_chars.txt @@ -0,0 +1,3 @@ +[Iris-setosa,50] +[Iris-versicolor,50] +[Iris-virginica,50] \ No newline at end of file diff --git a/data/model/data_distribution_spam.txt b/data/model/data_distribution_spam.txt new file mode 100644 index 00000000..61384b9c --- /dev/null +++ b/data/model/data_distribution_spam.txt @@ -0,0 +1,2 @@ +[ham,577] +[spam,79] \ No newline at end of file diff --git a/data/model/data_distribution_tiny_kdd.txt b/data/model/data_distribution_tiny_kdd.txt new file mode 100644 index 00000000..c5848e49 --- /dev/null +++ b/data/model/data_distribution_tiny_kdd.txt @@ -0,0 +1,2 @@ +[0,194] +[0.01,6] \ No newline at end of file diff --git a/data/model/distribution_iris.txt b/data/model/distribution_iris.txt new file mode 100644 index 00000000..464efae8 --- /dev/null +++ b/data/model/distribution_iris.txt @@ -0,0 +1,3 @@ + Iris-setosa: 33.33% (50 instances) + Iris-versicolor: 33.33% (50 instances) + Iris-virginica: 33.33% (50 instances) diff --git a/data/model/if_then_rules_diabetes.txt b/data/model/if_then_rules_diabetes.txt new file mode 100644 index 00000000..c34e2c9a --- /dev/null +++ b/data/model/if_then_rules_diabetes.txt @@ -0,0 +1,130 @@ +IF plasma_glucose > 123 AND + IF plasma_glucose > 166 AND + IF bmi > 23.1 AND + IF blood_pressure > 65 THEN + diabetes = true + IF blood_pressure <= 65 AND + IF diabetes_pedigree > 0.3345 THEN + diabetes = true + IF diabetes_pedigree <= 0.3345 THEN + diabetes = false + IF bmi <= 23.1 THEN + diabetes = false + IF plasma_glucose <= 166 AND + IF bmi > 40.81667 THEN + diabetes = true + IF bmi <= 40.81667 AND + IF age > 40 AND + IF diabetes_pedigree > 0.629 AND + IF blood_pressure > 88 THEN + diabetes = true + IF blood_pressure <= 88 THEN + diabetes = false + IF diabetes_pedigree <= 0.629 AND + IF diabetes_pedigree > 0.3265 THEN + diabetes = true + IF diabetes_pedigree <= 0.3265 AND + IF insulin > 112 THEN + diabetes = true + IF insulin <= 112 AND + IF pregnancies > 9 THEN + diabetes = false + IF pregnancies <= 9 AND + IF age > 47 THEN + diabetes = true + IF age <= 47 AND + IF bmi > 31.85 AND + IF blood_pressure > 38 THEN + diabetes = true + IF blood_pressure <= 38 THEN + diabetes = false + IF bmi <= 31.85 THEN + diabetes = false + IF age <= 40 AND + IF diabetes_pedigree > 0.8395 AND + IF bmi > 38.55 THEN + diabetes = false + IF bmi <= 38.55 THEN + diabetes = true + IF diabetes_pedigree <= 0.8395 AND + IF age > 24 AND + IF blood_pressure > 82 THEN + diabetes = false + IF blood_pressure <= 82 AND + IF blood_pressure > 79 THEN + diabetes = true + IF blood_pressure <= 79 AND + IF diabetes_pedigree > 0.367 THEN + diabetes = false + IF diabetes_pedigree <= 0.367 AND + IF plasma_glucose > 156 THEN + diabetes = false + IF plasma_glucose <= 156 THEN + diabetes = true + IF age <= 24 THEN + diabetes = false + IF plasma_glucose <= 123 AND + IF bmi > 27.075 AND + IF age > 22 AND + IF blood_pressure > 90 THEN + diabetes = false + IF blood_pressure <= 90 AND + IF diabetes_pedigree > 1.194 THEN + diabetes = true + IF diabetes_pedigree <= 1.194 AND + IF diabetes_pedigree > 0.8765 THEN + diabetes = false + IF diabetes_pedigree <= 0.8765 AND + IF diabetes_pedigree > 0.6325 THEN + diabetes = true + IF diabetes_pedigree <= 0.6325 AND + IF blood_pressure > 74 AND + IF bmi > 42.8 THEN + diabetes = true + IF bmi <= 42.8 AND + IF bmi > 35.75 THEN + diabetes = false + IF bmi <= 35.75 AND + IF triceps_skin_thickness > 23 AND + IF insulin > 52 THEN + diabetes = false + IF insulin <= 52 AND + IF bmi > 28.3 THEN + diabetes = true + IF bmi <= 28.3 THEN + diabetes = false + IF triceps_skin_thickness <= 23 THEN + diabetes = false + IF blood_pressure <= 74 AND + IF age > 30 AND + IF plasma_glucose > 99 AND + IF diabetes_pedigree > 0.214 THEN + diabetes = true + IF diabetes_pedigree <= 0.214 AND + IF bmi > 28.5 THEN + diabetes = false + IF bmi <= 28.5 THEN + diabetes = true + IF plasma_glucose <= 99 THEN + diabetes = false + IF age <= 30 AND + IF plasma_glucose > 91 AND + IF diabetes_pedigree > 0.1305 THEN + diabetes = false + IF diabetes_pedigree <= 0.1305 THEN + diabetes = true + IF plasma_glucose <= 91 THEN + diabetes = true + IF age <= 22 AND + IF pregnancies > 5 THEN + diabetes = true + IF pregnancies <= 5 THEN + diabetes = false + IF bmi <= 27.075 AND + IF pregnancies > 9 AND + IF bmi > 11.55 THEN + diabetes = false + IF bmi <= 11.55 THEN + diabetes = true + IF pregnancies <= 9 THEN + diabetes = false \ No newline at end of file diff --git a/data/model/if_then_rules_grades.txt b/data/model/if_then_rules_grades.txt new file mode 100644 index 00000000..d33ba28e --- /dev/null +++ b/data/model/if_then_rules_grades.txt @@ -0,0 +1,220 @@ +IF midterm > 75.47 AND + IF midterm > 95.31 AND + IF takehome > 102.13 AND + IF midterm > 98.435 AND + IF tutorial > 98.3 THEN + final = 108.89 + IF tutorial <= 98.3 THEN + final = 107.78 + IF midterm <= 98.435 THEN + final = 102.22 + IF takehome <= 102.13 AND + IF tutorial > 105.45 THEN + final = 88.89 + IF tutorial <= 105.45 AND + IF tutorial > 98.1 THEN + final = 102.78 + IF tutorial <= 98.1 AND + IF tutorial > 88.34 THEN + final = 95 + IF tutorial <= 88.34 THEN + final = 90.83 + IF midterm <= 95.31 AND + IF midterm > 90.935 AND + IF tutorial > 100.745 AND + IF tutorial > 103.425 THEN + final = 87.22 + IF tutorial <= 103.425 THEN + final = 94.44 + IF tutorial <= 100.745 AND + IF tutorial > 89.615 AND + IF assignment > 93.16 THEN + final = 68.06 + IF assignment <= 93.16 AND + IF tutorial > 96.055 THEN + final = 72.22 + IF tutorial <= 96.055 THEN + final = 73.89 + IF tutorial <= 89.615 THEN + final = 89.17 + IF midterm <= 90.935 AND + IF midterm > 90 AND + IF tutorial > 91.33 THEN + final = 101.11 + IF tutorial <= 91.33 THEN + final = 99.17 + IF midterm <= 90 AND + IF tutorial > 100.63 AND + IF tutorial > 108.145 THEN + final = 83.33 + IF tutorial <= 108.145 THEN + final = 78.89 + IF tutorial <= 100.63 AND + IF assignment > 88.62 AND + IF takehome > 100.37 AND + IF tutorial > 94.625 THEN + final = 85.56 + IF tutorial <= 94.625 THEN + final = 85 + IF takehome <= 100.37 AND + IF takehome > 93.24 AND + IF tutorial > 89.385 THEN + final = 92.78 + IF tutorial <= 89.385 THEN + final = 92.22 + IF takehome <= 93.24 AND + IF tutorial > 89.18 THEN + final = 90 + IF tutorial <= 89.18 THEN + final = 91.11 + IF assignment <= 88.62 THEN + final = 82.22 + IF midterm <= 75.47 AND + IF midterm > 39.69 AND + IF takehome > 31.875 AND + IF takehome > 100.83 AND + IF tutorial > 95.865 AND + IF tutorial > 98.24 THEN + final = 83.06 + IF tutorial <= 98.24 THEN + final = 80.56 + IF tutorial <= 95.865 AND + IF tutorial > 88.84 THEN + final = 64.72 + IF tutorial <= 88.84 THEN + final = 56.39 + IF takehome <= 100.83 AND + IF midterm > 40.625 AND + IF tutorial > 75.42 AND + IF tutorial > 86.76 AND + IF midterm > 74.06 THEN + final = 39.72 + IF midterm <= 74.06 AND + IF assignment > 74.06 AND + IF assignment > 82.395 AND + IF midterm > 62.5 AND + IF midterm > 65.31 AND + IF assignment > 91.175 AND + IF takehome > 98.33 THEN + final = 68.33 + IF takehome <= 98.33 AND + IF tutorial > 101.52 THEN + final = 63.33 + IF tutorial <= 101.52 AND + IF tutorial > 94.345 THEN + final = 65.28 + IF tutorial <= 94.345 THEN + final = 65.56 + IF assignment <= 91.175 AND + IF tutorial > 88.065 THEN + final = 55 + IF tutorial <= 88.065 THEN + final = 58.33 + IF midterm <= 65.31 THEN + final = 75.56 + IF midterm <= 62.5 AND + IF tutorial > 97.105 AND + IF midterm > 55.625 THEN + final = 50 + IF midterm <= 55.625 AND + IF tutorial > 102.38 THEN + final = 67.22 + IF tutorial <= 102.38 AND + IF midterm > 48.75 AND + IF tutorial > 99.065 THEN + final = 63.89 + IF tutorial <= 99.065 THEN + final = 63.61 + IF midterm <= 48.75 THEN + final = 60.56 + IF tutorial <= 97.105 AND + IF tutorial > 96.205 THEN + final = 50 + IF tutorial <= 96.205 THEN + final = 48.89 + IF assignment <= 82.395 AND + IF takehome > 82.13 THEN + final = 72.22 + IF takehome <= 82.13 AND + IF tutorial > 95.16 THEN + final = 66.11 + IF tutorial <= 95.16 THEN + final = 66.67 + IF assignment <= 74.06 AND + IF midterm > 60 THEN + final = 61.39 + IF midterm <= 60 AND + IF tutorial > 91.245 THEN + final = 43.33 + IF tutorial <= 91.245 THEN + final = 46.67 + IF tutorial <= 86.76 AND + IF tutorial > 84.92 AND + IF midterm > 69.69 THEN + final = 80 + IF midterm <= 69.69 THEN + final = 88.89 + IF tutorial <= 84.92 AND + IF tutorial > 81.225 AND + IF tutorial > 82.51 THEN + final = 57.78 + IF tutorial <= 82.51 THEN + final = 62.5 + IF tutorial <= 81.225 THEN + final = 77.5 + IF tutorial <= 75.42 AND + IF assignment > 78.52 AND + IF tutorial > 64.285 AND + IF tutorial > 67.71 THEN + final = 47.78 + IF tutorial <= 67.71 THEN + final = 45.56 + IF tutorial <= 64.285 THEN + final = 50.83 + IF assignment <= 78.52 AND + IF tutorial > 46.3 AND + IF tutorial > 61.81 THEN + final = 65.28 + IF tutorial <= 61.81 THEN + final = 68.33 + IF tutorial <= 46.3 AND + IF midterm > 65.63 THEN + final = 55.83 + IF midterm <= 65.63 THEN + final = 52.5 + IF midterm <= 40.625 THEN + final = 78.89 + IF takehome <= 31.875 AND + IF midterm > 45.94 AND + IF tutorial > 102.275 THEN + final = 36.11 + IF tutorial <= 102.275 AND + IF tutorial > 91.09 AND + IF tutorial > 97.39 THEN + final = 49.44 + IF tutorial <= 97.39 THEN + final = 50.83 + IF tutorial <= 91.09 THEN + final = 53.33 + IF midterm <= 45.94 THEN + final = 35.83 + IF midterm <= 39.69 AND + IF takehome > 92.685 THEN + final = 28.06 + IF takehome <= 92.685 AND + IF midterm > 39.065 THEN + final = 34.44 + IF midterm <= 39.065 AND + IF midterm > 29.375 AND + IF tutorial > 70.57 AND + IF tutorial > 92.845 THEN + final = 49.17 + IF tutorial <= 92.845 THEN + final = 48.89 + IF tutorial <= 70.57 THEN + final = 51.67 + IF midterm <= 29.375 AND + IF tutorial > 83.145 THEN + final = 47.78 + IF tutorial <= 83.145 THEN + final = 45.56 \ No newline at end of file diff --git a/data/model/if_then_rules_iris.txt b/data/model/if_then_rules_iris.txt new file mode 100644 index 00000000..c41947ed --- /dev/null +++ b/data/model/if_then_rules_iris.txt @@ -0,0 +1,25 @@ +IF petal_length > 2.45 AND + IF petal_width > 1.75 AND + IF petal_length > 4.85 THEN + species = Iris-virginica + IF petal_length <= 4.85 AND + IF sepal_width > 3.1 THEN + species = Iris-versicolor + IF sepal_width <= 3.1 THEN + species = Iris-virginica + IF petal_width <= 1.75 AND + IF petal_length > 4.95 AND + IF petal_width > 1.55 AND + IF petal_length > 5.45 THEN + species = Iris-virginica + IF petal_length <= 5.45 THEN + species = Iris-versicolor + IF petal_width <= 1.55 THEN + species = Iris-virginica + IF petal_length <= 4.95 AND + IF petal_width > 1.65 THEN + species = Iris-virginica + IF petal_width <= 1.65 THEN + species = Iris-versicolor + IF petal_length <= 2.45 THEN + species = Iris-setosa \ No newline at end of file diff --git a/data/model/if_then_rules_iris_missing.txt b/data/model/if_then_rules_iris_missing.txt new file mode 100644 index 00000000..049c35e7 --- /dev/null +++ b/data/model/if_then_rules_iris_missing.txt @@ -0,0 +1,7 @@ + IF petal_width > 0.8 AND + IF petal_width > 1.7 THEN + species = Iris-virginica + IF petal_width <= 1.7 THEN + species = Iris-versicolor + IF petal_width <= 0.8 THEN + species = Iris-setosa diff --git a/data/model/if_then_rules_iris_missing2.txt b/data/model/if_then_rules_iris_missing2.txt new file mode 100644 index 00000000..4d06fb32 --- /dev/null +++ b/data/model/if_then_rules_iris_missing2.txt @@ -0,0 +1,7 @@ +IF petal_width > 0.7 AND + IF petal_length > 4.75 THEN + species = Iris-virginica + IF petal_length <= 4.75 THEN + species = Iris-versicolor + IF petal_width <= 0.7 THEN + species = Iris-setosa \ No newline at end of file diff --git a/data/model/if_then_rules_iris_missing2_MISSINGS.txt b/data/model/if_then_rules_iris_missing2_MISSINGS.txt new file mode 100644 index 00000000..b4ccc79a --- /dev/null +++ b/data/model/if_then_rules_iris_missing2_MISSINGS.txt @@ -0,0 +1,7 @@ +IF petal_length > 4.75 THEN + species = Iris-virginica + IF petal_length <= 4.75 or missing AND + IF petal_length is missing THEN + species = Iris-setosa + IF petal_length is not missing THEN + species = Iris-versicolor \ No newline at end of file diff --git a/data/model/if_then_rules_iris_sp_chars.txt b/data/model/if_then_rules_iris_sp_chars.txt new file mode 100644 index 00000000..9113c85d --- /dev/null +++ b/data/model/if_then_rules_iris_sp_chars.txt @@ -0,0 +1,25 @@ +IF petal_length > 2.45 AND + IF petal_width_ > 1.75 AND + IF petal_length > 4.85 THEN + species = Iris-virginica + IF petal_length <= 4.85 AND + IF sepal_width > 3.1 THEN + species = Iris-versicolor + IF sepal_width <= 3.1 THEN + species = Iris-virginica + IF petal_width_ <= 1.75 AND + IF petal_length > 4.95 AND + IF petal_width_ > 1.55 AND + IF petal_length > 5.45 THEN + species = Iris-virginica + IF petal_length <= 5.45 THEN + species = Iris-versicolor + IF petal_width_ <= 1.55 THEN + species = Iris-virginica + IF petal_length <= 4.95 AND + IF petal_width_ > 1.65 THEN + species = Iris-virginica + IF petal_width_ <= 1.65 THEN + species = Iris-versicolor + IF petal_length <= 2.45 THEN + species = Iris-setosa \ No newline at end of file diff --git a/data/model/if_then_rules_spam.txt b/data/model/if_then_rules_spam.txt new file mode 100644 index 00000000..f168c4fa --- /dev/null +++ b/data/model/if_then_rules_spam.txt @@ -0,0 +1,133 @@ +IF message contains call AND + IF message contains mobile THEN + type = spam + IF message does not contain mobile AND + IF message contains claim THEN + type = spam + IF message does not contain claim AND + IF message contains landline THEN + type = spam + IF message does not contain landline AND + IF message contains text THEN + type = spam + IF message does not contain text AND + IF message contains free THEN + type = spam + IF message does not contain free AND + IF message contains private THEN + type = spam + IF message does not contain private AND + IF message contains message AND + IF message contains please THEN + type = spam + IF message does not contain please THEN + type = ham + IF message does not contain message AND + IF message contains 50 THEN + type = spam + IF message does not contain 50 AND + IF message contains booked THEN + type = spam + IF message does not contain booked AND + IF message contains contact THEN + type = spam + IF message does not contain contact AND + IF message contains luv THEN + type = spam + IF message does not contain luv AND + IF message contains visit THEN + type = spam + IF message does not contain visit AND + IF message contains miss THEN + type = spam + IF message does not contain miss THEN + type = ham + IF message does not contain call AND + IF message contains free AND + IF message contains ü THEN + type = ham + IF message does not contain ü AND + IF message contains ah THEN + type = ham + IF message does not contain ah AND + IF message contains oso THEN + type = ham + IF message does not contain oso AND + IF message contains hav THEN + type = ham + IF message does not contain hav AND + IF message contains txt THEN + type = spam + IF message does not contain txt AND + IF message contains text THEN + type = spam + IF message does not contain text AND + IF message contains send THEN + type = spam + IF message does not contain send THEN + type = spam + IF message does not contain free AND + IF message contains txt AND + IF message contains tomorrow THEN + type = ham + IF message does not contain tomorrow THEN + type = spam + IF message does not contain txt AND + IF message contains text AND + IF message contains time THEN + type = ham + IF message does not contain time AND + IF message contains phone THEN + type = ham + IF message does not contain phone AND + IF message contains yes THEN + type = ham + IF message does not contain yes AND + IF message contains luv THEN + type = ham + IF message does not contain luv AND + IF message contains shit THEN + type = ham + IF message does not contain shit THEN + type = spam + IF message does not contain text AND + IF message contains currently THEN + type = spam + IF message does not contain currently AND + IF message contains cost AND + IF message contains apply THEN + type = ham + IF message does not contain apply AND + IF message contains month THEN + type = ham + IF message does not contain month THEN + type = spam + IF message does not contain cost AND + IF message contains 50 THEN + type = spam + IF message does not contain 50 AND + IF message contains rate THEN + type = spam + IF message does not contain rate AND + IF message contains http THEN + type = spam + IF message does not contain http AND + IF message contains lost AND + IF message contains thanks THEN + type = ham + IF message does not contain thanks THEN + type = spam + IF message does not contain lost AND + IF message contains station AND + IF message contains news THEN + type = spam + IF message does not contain news THEN + type = ham + IF message does not contain station AND + IF message contains girls AND + IF message contains story THEN + type = spam + IF message does not contain story THEN + type = ham + IF message does not contain girls THEN + type = ham \ No newline at end of file diff --git a/data/model/if_then_rules_spam_textanalysis_1.txt b/data/model/if_then_rules_spam_textanalysis_1.txt new file mode 100644 index 00000000..2ca05075 --- /dev/null +++ b/data/model/if_then_rules_spam_textanalysis_1.txt @@ -0,0 +1,130 @@ +IF message contains call AND + IF message contains mobile THEN + type = spam + IF message does not contain mobile AND + IF message contains claim THEN + type = spam + IF message does not contain claim AND + IF message contains text THEN + type = spam + IF message does not contain text AND + IF message contains landline THEN + type = spam + IF message does not contain landline AND + IF message contains FREE THEN + type = spam + IF message does not contain FREE AND + IF message contains PRIVATE THEN + type = spam + IF message does not contain PRIVATE AND + IF message contains message AND + IF message contains please THEN + type = spam + IF message does not contain please THEN + type = ham + IF message does not contain message AND + IF message contains land THEN + type = spam + IF message does not contain land AND + IF message contains SK38XH THEN + type = spam + IF message does not contain SK38XH AND + IF message contains time AND + IF message contains DARLIN THEN + type = ham + IF message does not contain DARLIN THEN + type = spam + IF message does not contain time AND + IF message contains visit THEN + type = spam + IF message does not contain visit AND + IF message contains 2NITE THEN + type = spam + IF message does not contain 2NITE THEN + type = ham + IF message does not contain call AND + IF message contains FREE AND + IF message contains ü THEN + type = ham + IF message does not contain ü AND + IF message contains il THEN + type = ham + IF message does not contain il AND + IF message contains oso THEN + type = ham + IF message does not contain oso AND + IF message contains Hey THEN + type = ham + IF message does not contain Hey AND + IF message contains txt THEN + type = spam + IF message does not contain txt AND + IF message contains text THEN + type = spam + IF message does not contain text AND + IF message contains Co THEN + type = spam + IF message does not contain Co THEN + type = spam + IF message does not contain FREE AND + IF message contains txt AND + IF message contains time THEN + type = ham + IF message does not contain time THEN + type = spam + IF message does not contain txt AND + IF message contains text AND + IF message contains time THEN + type = ham + IF message does not contain time AND + IF message contains phone THEN + type = ham + IF message does not contain phone AND + IF message contains DARLIN THEN + type = ham + IF message does not contain DARLIN AND + IF message contains Yes THEN + type = ham + IF message does not contain Yes AND + IF message contains shit THEN + type = ham + IF message does not contain shit THEN + type = spam + IF message does not contain text AND + IF message contains currently THEN + type = spam + IF message does not contain currently AND + IF message contains Cost AND + IF message contains 1000s THEN + type = spam + IF message does not contain 1000s AND + IF message contains POBOX THEN + type = spam + IF message does not contain POBOX THEN + type = ham + IF message does not contain Cost AND + IF message contains 50 THEN + type = spam + IF message does not contain 50 AND + IF message contains rate THEN + type = spam + IF message does not contain rate AND + IF message contains http THEN + type = spam + IF message does not contain http AND + IF message contains lost AND + IF message contains help THEN + type = spam + IF message does not contain help THEN + type = ham + IF message does not contain lost AND + IF message contains STATION AND + IF message contains gas THEN + type = ham + IF message does not contain gas THEN + type = spam + IF message does not contain STATION AND + IF message contains story THEN + type = ham + IF message does not contain story THEN + type = ham \ No newline at end of file diff --git a/data/model/if_then_rules_spam_textanalysis_2.txt b/data/model/if_then_rules_spam_textanalysis_2.txt new file mode 100644 index 00000000..2ca05075 --- /dev/null +++ b/data/model/if_then_rules_spam_textanalysis_2.txt @@ -0,0 +1,130 @@ +IF message contains call AND + IF message contains mobile THEN + type = spam + IF message does not contain mobile AND + IF message contains claim THEN + type = spam + IF message does not contain claim AND + IF message contains text THEN + type = spam + IF message does not contain text AND + IF message contains landline THEN + type = spam + IF message does not contain landline AND + IF message contains FREE THEN + type = spam + IF message does not contain FREE AND + IF message contains PRIVATE THEN + type = spam + IF message does not contain PRIVATE AND + IF message contains message AND + IF message contains please THEN + type = spam + IF message does not contain please THEN + type = ham + IF message does not contain message AND + IF message contains land THEN + type = spam + IF message does not contain land AND + IF message contains SK38XH THEN + type = spam + IF message does not contain SK38XH AND + IF message contains time AND + IF message contains DARLIN THEN + type = ham + IF message does not contain DARLIN THEN + type = spam + IF message does not contain time AND + IF message contains visit THEN + type = spam + IF message does not contain visit AND + IF message contains 2NITE THEN + type = spam + IF message does not contain 2NITE THEN + type = ham + IF message does not contain call AND + IF message contains FREE AND + IF message contains ü THEN + type = ham + IF message does not contain ü AND + IF message contains il THEN + type = ham + IF message does not contain il AND + IF message contains oso THEN + type = ham + IF message does not contain oso AND + IF message contains Hey THEN + type = ham + IF message does not contain Hey AND + IF message contains txt THEN + type = spam + IF message does not contain txt AND + IF message contains text THEN + type = spam + IF message does not contain text AND + IF message contains Co THEN + type = spam + IF message does not contain Co THEN + type = spam + IF message does not contain FREE AND + IF message contains txt AND + IF message contains time THEN + type = ham + IF message does not contain time THEN + type = spam + IF message does not contain txt AND + IF message contains text AND + IF message contains time THEN + type = ham + IF message does not contain time AND + IF message contains phone THEN + type = ham + IF message does not contain phone AND + IF message contains DARLIN THEN + type = ham + IF message does not contain DARLIN AND + IF message contains Yes THEN + type = ham + IF message does not contain Yes AND + IF message contains shit THEN + type = ham + IF message does not contain shit THEN + type = spam + IF message does not contain text AND + IF message contains currently THEN + type = spam + IF message does not contain currently AND + IF message contains Cost AND + IF message contains 1000s THEN + type = spam + IF message does not contain 1000s AND + IF message contains POBOX THEN + type = spam + IF message does not contain POBOX THEN + type = ham + IF message does not contain Cost AND + IF message contains 50 THEN + type = spam + IF message does not contain 50 AND + IF message contains rate THEN + type = spam + IF message does not contain rate AND + IF message contains http THEN + type = spam + IF message does not contain http AND + IF message contains lost AND + IF message contains help THEN + type = spam + IF message does not contain help THEN + type = ham + IF message does not contain lost AND + IF message contains STATION AND + IF message contains gas THEN + type = ham + IF message does not contain gas THEN + type = spam + IF message does not contain STATION AND + IF message contains story THEN + type = ham + IF message does not contain story THEN + type = ham \ No newline at end of file diff --git a/data/model/if_then_rules_spam_textanalysis_3.txt b/data/model/if_then_rules_spam_textanalysis_3.txt new file mode 100644 index 00000000..29775bd7 --- /dev/null +++ b/data/model/if_then_rules_spam_textanalysis_3.txt @@ -0,0 +1,136 @@ +IF message contains call AND + IF message contains mobile THEN + type = spam + IF message does not contain mobile AND + IF message contains claim THEN + type = spam + IF message does not contain claim AND + IF message contains landline THEN + type = spam + IF message does not contain landline AND + IF message contains 04 THEN + type = spam + IF message does not contain 04 AND + IF message contains free THEN + type = spam + IF message does not contain free AND + IF message contains line THEN + type = spam + IF message does not contain line AND + IF message contains texts THEN + type = spam + IF message does not contain texts AND + IF message contains message THEN + type = spam + IF message does not contain message AND + IF message contains messages THEN + type = spam + IF message does not contain messages AND + IF message contains missed THEN + type = spam + IF message does not contain missed AND + IF message contains little THEN + type = spam + IF message does not contain little AND + IF message contains collection THEN + type = spam + IF message does not contain collection AND + IF message contains visit THEN + type = spam + IF message does not contain visit THEN + type = ham + IF message does not contain call AND + IF message contains free AND + IF message contains ü THEN + type = ham + IF message does not contain ü AND + IF message contains wan THEN + type = ham + IF message does not contain wan AND + IF message contains booked THEN + type = ham + IF message does not contain booked AND + IF message contains home THEN + type = ham + IF message does not contain home AND + IF message contains txt THEN + type = spam + IF message does not contain txt AND + IF message contains text THEN + type = spam + IF message does not contain text AND + IF message contains co THEN + type = spam + IF message does not contain co THEN + type = spam + IF message does not contain free AND + IF message contains txt AND + IF message contains tomorrow THEN + type = ham + IF message does not contain tomorrow THEN + type = spam + IF message does not contain txt AND + IF message contains text AND + IF message contains flirt THEN + type = spam + IF message does not contain flirt AND + IF message contains dating THEN + type = spam + IF message does not contain dating AND + IF message contains money THEN + type = spam + IF message does not contain money AND + IF message contains unsubscribe THEN + type = spam + IF message does not contain unsubscribe AND + IF message contains times THEN + type = ham + IF message does not contain times THEN + type = ham + IF message does not contain text AND + IF message contains send AND + IF message contains box THEN + type = spam + IF message does not contain box AND + IF message contains xxx THEN + type = spam + IF message does not contain xxx AND + IF message contains care THEN + type = spam + IF message does not contain care THEN + type = ham + IF message does not contain send AND + IF message contains sex THEN + type = spam + IF message does not contain sex AND + IF message contains 1000s THEN + type = spam + IF message does not contain 1000s AND + IF message contains http THEN + type = spam + IF message does not contain http AND + IF message contains calls THEN + type = spam + IF message does not contain calls AND + IF message contains rate THEN + type = spam + IF message does not contain rate AND + IF message contains lost AND + IF message contains help THEN + type = spam + IF message does not contain help THEN + type = ham + IF message does not contain lost AND + IF message contains station AND + IF message contains news THEN + type = spam + IF message does not contain news THEN + type = ham + IF message does not contain station AND + IF message contains girls AND + IF message contains waiting THEN + type = spam + IF message does not contain waiting THEN + type = ham + IF message does not contain girls THEN + type = ham \ No newline at end of file diff --git a/data/model/if_then_rules_spam_textanalysis_4.txt b/data/model/if_then_rules_spam_textanalysis_4.txt new file mode 100644 index 00000000..f168c4fa --- /dev/null +++ b/data/model/if_then_rules_spam_textanalysis_4.txt @@ -0,0 +1,133 @@ +IF message contains call AND + IF message contains mobile THEN + type = spam + IF message does not contain mobile AND + IF message contains claim THEN + type = spam + IF message does not contain claim AND + IF message contains landline THEN + type = spam + IF message does not contain landline AND + IF message contains text THEN + type = spam + IF message does not contain text AND + IF message contains free THEN + type = spam + IF message does not contain free AND + IF message contains private THEN + type = spam + IF message does not contain private AND + IF message contains message AND + IF message contains please THEN + type = spam + IF message does not contain please THEN + type = ham + IF message does not contain message AND + IF message contains 50 THEN + type = spam + IF message does not contain 50 AND + IF message contains booked THEN + type = spam + IF message does not contain booked AND + IF message contains contact THEN + type = spam + IF message does not contain contact AND + IF message contains luv THEN + type = spam + IF message does not contain luv AND + IF message contains visit THEN + type = spam + IF message does not contain visit AND + IF message contains miss THEN + type = spam + IF message does not contain miss THEN + type = ham + IF message does not contain call AND + IF message contains free AND + IF message contains ü THEN + type = ham + IF message does not contain ü AND + IF message contains ah THEN + type = ham + IF message does not contain ah AND + IF message contains oso THEN + type = ham + IF message does not contain oso AND + IF message contains hav THEN + type = ham + IF message does not contain hav AND + IF message contains txt THEN + type = spam + IF message does not contain txt AND + IF message contains text THEN + type = spam + IF message does not contain text AND + IF message contains send THEN + type = spam + IF message does not contain send THEN + type = spam + IF message does not contain free AND + IF message contains txt AND + IF message contains tomorrow THEN + type = ham + IF message does not contain tomorrow THEN + type = spam + IF message does not contain txt AND + IF message contains text AND + IF message contains time THEN + type = ham + IF message does not contain time AND + IF message contains phone THEN + type = ham + IF message does not contain phone AND + IF message contains yes THEN + type = ham + IF message does not contain yes AND + IF message contains luv THEN + type = ham + IF message does not contain luv AND + IF message contains shit THEN + type = ham + IF message does not contain shit THEN + type = spam + IF message does not contain text AND + IF message contains currently THEN + type = spam + IF message does not contain currently AND + IF message contains cost AND + IF message contains apply THEN + type = ham + IF message does not contain apply AND + IF message contains month THEN + type = ham + IF message does not contain month THEN + type = spam + IF message does not contain cost AND + IF message contains 50 THEN + type = spam + IF message does not contain 50 AND + IF message contains rate THEN + type = spam + IF message does not contain rate AND + IF message contains http THEN + type = spam + IF message does not contain http AND + IF message contains lost AND + IF message contains thanks THEN + type = ham + IF message does not contain thanks THEN + type = spam + IF message does not contain lost AND + IF message contains station AND + IF message contains news THEN + type = spam + IF message does not contain news THEN + type = ham + IF message does not contain station AND + IF message contains girls AND + IF message contains story THEN + type = spam + IF message does not contain story THEN + type = ham + IF message does not contain girls THEN + type = ham \ No newline at end of file diff --git a/data/model/if_then_rules_spam_textanalysis_5.txt b/data/model/if_then_rules_spam_textanalysis_5.txt new file mode 100644 index 00000000..ecbaad11 --- /dev/null +++ b/data/model/if_then_rules_spam_textanalysis_5.txt @@ -0,0 +1,238 @@ +IF message is equal to congratulations! thanks to a good friend u have won the £2,000 xmas prize. 2 claim is easy, just call 08718726971 now! only 10p per minute. bt-national-rate. THEN + type = spam + IF message is not equal to congratulations! thanks to a good friend u have won the £2,000 xmas prize. 2 claim is easy, just call 08718726971 now! only 10p per minute. bt-national-rate. AND + IF message is equal to 88066 from 88066 lost 3pound help THEN + type = spam + IF message is not equal to 88066 from 88066 lost 3pound help AND + IF message is equal to urgent!! your 4* costa del sol holiday or £5000 await collection. call 09050090044 now toclaim. sae, tc s, pobox334, stockport, sk38xh, cost£1.50/pm, max10mins THEN + type = spam + IF message is not equal to urgent!! your 4* costa del sol holiday or £5000 await collection. call 09050090044 now toclaim. sae, tc s, pobox334, stockport, sk38xh, cost£1.50/pm, max10mins AND + IF message is equal to double your mins & txts on orange or 1/2 price linerental - motorola and sonyericsson with b/tooth free-nokia free call mobileupd8 on 08000839402 or2optout/hv9d THEN + type = spam + IF message is not equal to double your mins & txts on orange or 1/2 price linerental - motorola and sonyericsson with b/tooth free-nokia free call mobileupd8 on 08000839402 or2optout/hv9d AND + IF message is equal to urgent! your mobile no 07xxxxxxxxx won a £2,000 bonus caller prize on 02/06/03! this is the 2nd attempt to reach you! call 09066362231 asap! box97n7qp, 150ppm THEN + type = spam + IF message is not equal to urgent! your mobile no 07xxxxxxxxx won a £2,000 bonus caller prize on 02/06/03! this is the 2nd attempt to reach you! call 09066362231 asap! box97n7qp, 150ppm AND + IF message is equal to hi - this is your mailbox messaging sms alert. you have 4 messages. you have 21 matches. please call back on 09056242159 to retrieve your messages and matches THEN + type = spam + IF message is not equal to hi - this is your mailbox messaging sms alert. you have 4 messages. you have 21 matches. please call back on 09056242159 to retrieve your messages and matches AND + IF message is equal to marvel mobile play the official ultimate spider-man game (£4.50) on ur mobile right now. text spider to 83338 for the game & we ll send u a free 8ball wallpaper THEN + type = spam + IF message is not equal to marvel mobile play the official ultimate spider-man game (£4.50) on ur mobile right now. text spider to 83338 for the game & we ll send u a free 8ball wallpaper AND + IF message is equal to tddnewsletter@emc1.co.uk (more games from thedailydraw) dear helen, dozens of free games - with great prizeswith.. THEN + type = spam + IF message is not equal to tddnewsletter@emc1.co.uk (more games from thedailydraw) dear helen, dozens of free games - with great prizeswith.. AND + IF message is equal to http//tms. widelive.com/index. wml?id=820554ad0a1705572711&first=true¡c c ringtone¡ THEN + type = spam + IF message is not equal to http//tms. widelive.com/index. wml?id=820554ad0a1705572711&first=true¡c c ringtone¡ AND + IF message is equal to free entry in 2 a wkly comp to win fa cup final tkts 21st may 2005. text fa to 87121 to receive entry question(std txt rate)t&c's apply 08452810075over18's THEN + type = spam + IF message is not equal to free entry in 2 a wkly comp to win fa cup final tkts 21st may 2005. text fa to 87121 to receive entry question(std txt rate)t&c's apply 08452810075over18's AND + IF message is equal to winner!! as a valued network customer you have been selected to receivea £900 prize reward! to claim call 09061701461. claim code kl341. valid 12 hours only. THEN + type = spam + IF message is not equal to winner!! as a valued network customer you have been selected to receivea £900 prize reward! to claim call 09061701461. claim code kl341. valid 12 hours only. AND + IF message is equal to free for 1st week! no1 nokia tone 4 ur mob every week just txt nokia to 87077 get txting and tell ur mates. zed pobox 36504 w45wq norm150p/tone 16+ THEN + type = spam + IF message is not equal to free for 1st week! no1 nokia tone 4 ur mob every week just txt nokia to 87077 get txting and tell ur mates. zed pobox 36504 w45wq norm150p/tone 16+ AND + IF message is equal to congrats! 1 year special cinema pass for 2 is yours. call 09061209465 now! c suprman v, matrix3, starwars3, etc all 4 free! bx420-ip4-5we. 150pm. dont miss out! THEN + type = spam + IF message is not equal to congrats! 1 year special cinema pass for 2 is yours. call 09061209465 now! c suprman v, matrix3, starwars3, etc all 4 free! bx420-ip4-5we. 150pm. dont miss out! AND + IF message is equal to important message. this is a final contact attempt. you have important messages waiting out our customer claims dept. expires 13/4/04. call 08717507382 now! THEN + type = spam + IF message is not equal to important message. this is a final contact attempt. you have important messages waiting out our customer claims dept. expires 13/4/04. call 08717507382 now! AND + IF message is equal to latest news! police station toilet stolen, cops have nothing to go on! THEN + type = spam + IF message is not equal to latest news! police station toilet stolen, cops have nothing to go on! AND + IF message is equal to free camera phones with linerental from 4.49/month with 750 cross ntwk mins. 1/2 price txt bundle deals also avble. call 08001950382 or call2optout/j mf THEN + type = spam + IF message is not equal to free camera phones with linerental from 4.49/month with 750 cross ntwk mins. 1/2 price txt bundle deals also avble. call 08001950382 or call2optout/j mf AND + IF message is equal to splashmobile: choose from 1000s of gr8 tones each wk! this is a subscrition service with weekly tones costing 300p. u have one credit - kick back and enjoy THEN + type = spam + IF message is not equal to splashmobile: choose from 1000s of gr8 tones each wk! this is a subscrition service with weekly tones costing 300p. u have one credit - kick back and enjoy AND + IF message is equal to urgent! you have won a 1 week free membership in our £100,000 prize jackpot! txt the word: claim to no: 81010 t&c www.dbuk.net lccltd pobox 4403ldnw1a7rw18 THEN + type = spam + IF message is not equal to urgent! you have won a 1 week free membership in our £100,000 prize jackpot! txt the word: claim to no: 81010 t&c www.dbuk.net lccltd pobox 4403ldnw1a7rw18 AND + IF message is equal to your free ringtone is waiting to be collected. simply text the password "mix" to 85069 to verify. get usher and britney. fml, po box 5249, mk17 92h. 450ppw 16 THEN + type = spam + IF message is not equal to your free ringtone is waiting to be collected. simply text the password "mix" to 85069 to verify. get usher and britney. fml, po box 5249, mk17 92h. 450ppw 16 AND + IF message is equal to txt: call to no: 86888 & claim your reward of 3 hours talk time to use from your phone now! subscribe6gbp/mnth inc 3hrs 16 stop?txtstop www.gamb.tv THEN + type = spam + IF message is not equal to txt: call to no: 86888 & claim your reward of 3 hours talk time to use from your phone now! subscribe6gbp/mnth inc 3hrs 16 stop?txtstop www.gamb.tv AND + IF message is equal to you have won a guaranteed £1000 cash or a £2000 prize. to claim yr prize call our customer service representative on 08714712379 between 10am-7pm cost 10p THEN + type = spam + IF message is not equal to you have won a guaranteed £1000 cash or a £2000 prize. to claim yr prize call our customer service representative on 08714712379 between 10am-7pm cost 10p AND + IF message is equal to moby pub quiz.win a £100 high street prize if u know who the new duchess of cornwall will be? txt her first name to 82277.unsub stop £1.50 008704050406 sp arrow THEN + type = spam + IF message is not equal to moby pub quiz.win a £100 high street prize if u know who the new duchess of cornwall will be? txt her first name to 82277.unsub stop £1.50 008704050406 sp arrow AND + IF message is equal to free entry into our £250 weekly comp just send the word enter to 88877 now. 18 t&c www.textcomp.com THEN + type = spam + IF message is not equal to free entry into our £250 weekly comp just send the word enter to 88877 now. 18 t&c www.textcomp.com AND + IF message is equal to congrats! 2 mobile 3g videophones r yours. call 09063458130 now! videochat wid your mates, play java games, dload polyph music, noline rentl. THEN + type = spam + IF message is not equal to congrats! 2 mobile 3g videophones r yours. call 09063458130 now! videochat wid your mates, play java games, dload polyph music, noline rentl. AND + IF message is equal to january male sale! hot gay chat now cheaper, call 08709222922. national rate from 1.5p/min cheap to 7.8p/min peak! to stop texts call 08712460324 (10p/min) THEN + type = spam + IF message is not equal to january male sale! hot gay chat now cheaper, call 08709222922. national rate from 1.5p/min cheap to 7.8p/min peak! to stop texts call 08712460324 (10p/min) AND + IF message is equal to free message activate your 500 free text messages by replying to this message with the word free for terms & conditions, visit www.07781482378.com THEN + type = spam + IF message is not equal to free message activate your 500 free text messages by replying to this message with the word free for terms & conditions, visit www.07781482378.com AND + IF message is equal to ur cash-balance is currently 500 pounds - to maximize ur cash-in now send go to 86688 only 150p/msg. cc: 08718720201 po box 114/14 tcr/w1 THEN + type = spam + IF message is not equal to ur cash-balance is currently 500 pounds - to maximize ur cash-in now send go to 86688 only 150p/msg. cc: 08718720201 po box 114/14 tcr/w1 AND + IF message is equal to you have won a nokia 7250i. this is what you get when you win our free auction. to take part send nokia to 86021 now. hg/suite342/2lands row/w1jhl 16+ THEN + type = spam + IF message is not equal to you have won a nokia 7250i. this is what you get when you win our free auction. to take part send nokia to 86021 now. hg/suite342/2lands row/w1jhl 16+ AND + IF message is equal to santa calling! would your little ones like a call from santa xmas eve? call 09058094583 to book your time. THEN + type = spam + IF message is not equal to santa calling! would your little ones like a call from santa xmas eve? call 09058094583 to book your time. AND + IF message is equal to call 09095350301 and send our girls into erotic ecstacy. just 60p/min. to stop texts call 08712460324 (nat rate) THEN + type = spam + IF message is not equal to call 09095350301 and send our girls into erotic ecstacy. just 60p/min. to stop texts call 08712460324 (nat rate) AND + IF message is equal to urgent! your mobile number *************** won a £2000 bonus caller prize on 10/06/03! this is the 2nd attempt to reach you! call 09066368753 asap! box 97n7qp, 150ppm THEN + type = spam + IF message is not equal to urgent! your mobile number *************** won a £2000 bonus caller prize on 10/06/03! this is the 2nd attempt to reach you! call 09066368753 asap! box 97n7qp, 150ppm AND + IF message is equal to had your mobile 11 months or more? u r entitled to update to the latest colour mobiles with camera for free! call the mobile update co free on 08002986030 THEN + type = spam + IF message is not equal to had your mobile 11 months or more? u r entitled to update to the latest colour mobiles with camera for free! call the mobile update co free on 08002986030 AND + IF message is equal to you have 1 new message. please call 08718738034. THEN + type = spam + IF message is not equal to you have 1 new message. please call 08718738034. AND + IF message is equal to free entry to the gr8prizes wkly comp 4 a chance to win the latest nokia 8800, psp or £250 cash every wk.txt great to 80878 http//www.gr8prizes.com 08715705022 THEN + type = spam + IF message is not equal to free entry to the gr8prizes wkly comp 4 a chance to win the latest nokia 8800, psp or £250 cash every wk.txt great to 80878 http//www.gr8prizes.com 08715705022 AND + IF message is equal to rct' thnq adrian for u text. rgds vatian THEN + type = spam + IF message is not equal to rct' thnq adrian for u text. rgds vatian AND + IF message is equal to will u meet ur dream partner soon? is ur career off 2 a flyng start? 2 find out free, txt horo followed by ur star sign, e. g. horo aries THEN + type = spam + IF message is not equal to will u meet ur dream partner soon? is ur career off 2 a flyng start? 2 find out free, txt horo followed by ur star sign, e. g. horo aries AND + IF message is equal to how about getting in touch with folks waiting for company? just txt back your name and age to opt in! enjoy the community (150p/sms) THEN + type = spam + IF message is not equal to how about getting in touch with folks waiting for company? just txt back your name and age to opt in! enjoy the community (150p/sms) AND + IF message is equal to thanks for your ringtone order, ref number r836. your mobile will be charged £4.50. should your tone not arrive please call customer services on 09065069154 THEN + type = spam + IF message is not equal to thanks for your ringtone order, ref number r836. your mobile will be charged £4.50. should your tone not arrive please call customer services on 09065069154 AND + IF message is equal to you have won a guaranteed 32000 award or maybe even £1000 cash to claim ur award call free on 0800 ..... (18+). its a legitimat efreefone number wat do u think??? THEN + type = spam + IF message is not equal to you have won a guaranteed 32000 award or maybe even £1000 cash to claim ur award call free on 0800 ..... (18+). its a legitimat efreefone number wat do u think??? AND + IF message is equal to urgent we are trying to contact you last weekends draw shows u have won a £1000 prize guaranteed call 09064017295 claim code k52 valid 12hrs 150p pm THEN + type = spam + IF message is not equal to urgent we are trying to contact you last weekends draw shows u have won a £1000 prize guaranteed call 09064017295 claim code k52 valid 12hrs 150p pm AND + IF message is equal to 2p per min to call germany 08448350055 from your bt line. just 2p per min. check planettalkinstant.com for info & t's & c's. text stop to opt out THEN + type = spam + IF message is not equal to 2p per min to call germany 08448350055 from your bt line. just 2p per min. check planettalkinstant.com for info & t's & c's. text stop to opt out AND + IF message is equal to december only! had your mobile 11mths+? you are entitled to update to the latest colour camera mobile for free! call the mobile update co free on 08002986906 THEN + type = spam + IF message is not equal to december only! had your mobile 11mths+? you are entitled to update to the latest colour camera mobile for free! call the mobile update co free on 08002986906 AND + IF message is equal to urgent! call 09061749602 from landline. your complimentary 4* tenerife holiday or £10,000 cash await collection sae t&cs box 528 hp20 1yf 150ppm 18+ THEN + type = spam + IF message is not equal to urgent! call 09061749602 from landline. your complimentary 4* tenerife holiday or £10,000 cash await collection sae t&cs box 528 hp20 1yf 150ppm 18+ AND + IF message is equal to not heard from u4 a while. call 4 rude chat private line 01223585334 to cum. wan 2c pics of me gettin shagged then text pix to 8552. 2end send stop 8552 sam xxx THEN + type = spam + IF message is not equal to not heard from u4 a while. call 4 rude chat private line 01223585334 to cum. wan 2c pics of me gettin shagged then text pix to 8552. 2end send stop 8552 sam xxx AND + IF message is equal to had your contract mobile 11 mnths? latest motorola, nokia etc. all free! double mins & text on orange tariffs. text yes for callback, no to remove from records. THEN + type = spam + IF message is not equal to had your contract mobile 11 mnths? latest motorola, nokia etc. all free! double mins & text on orange tariffs. text yes for callback, no to remove from records. AND + IF message is equal to for the most sparkling shopping breaks from 45 per person; call 0121 2025050 or visit www.shortbreaks.org.uk THEN + type = spam + IF message is not equal to for the most sparkling shopping breaks from 45 per person; call 0121 2025050 or visit www.shortbreaks.org.uk AND + IF message is equal to urgent! we are trying to contact u. todays draw shows that you have won a £800 prize guaranteed. call 09050001808 from land line. claim m95. valid12hrs only THEN + type = spam + IF message is not equal to urgent! we are trying to contact u. todays draw shows that you have won a £800 prize guaranteed. call 09050001808 from land line. claim m95. valid12hrs only AND + IF message is equal to private! your 2003 account statement for 07808247860 shows 800 un-redeemed s. i. m. points. call 08719899229 identifier code: 40411 expires 06/11/04 THEN + type = spam + IF message is not equal to private! your 2003 account statement for 07808247860 shows 800 un-redeemed s. i. m. points. call 08719899229 identifier code: 40411 expires 06/11/04 AND + IF message is equal to want explicit sex in 30 secs? ring 02073162414 now! costs 20p/min gsex pobox 2667 wc1n 3xx THEN + type = spam + IF message is not equal to want explicit sex in 30 secs? ring 02073162414 now! costs 20p/min gsex pobox 2667 wc1n 3xx AND + IF message is equal to sorry i missed your call let's talk when you have the time. i'm on 07090201529 THEN + type = spam + IF message is not equal to sorry i missed your call let's talk when you have the time. i'm on 07090201529 AND + IF message is equal to hi ya babe x u 4goten bout me?' scammers getting smart..though this is a regular vodafone no, if you respond you get further prem rate msg/subscription. other nos used also. beware! THEN + type = spam + IF message is not equal to hi ya babe x u 4goten bout me?' scammers getting smart..though this is a regular vodafone no, if you respond you get further prem rate msg/subscription. other nos used also. beware! AND + IF message is equal to you are awarded a sipix digital camera! call 09061221061 from landline. delivery within 28days. t cs box177. m221bp. 2yr warranty. 150ppm. 16 . p p£3.99 THEN + type = spam + IF message is not equal to you are awarded a sipix digital camera! call 09061221061 from landline. delivery within 28days. t cs box177. m221bp. 2yr warranty. 150ppm. 16 . p p£3.99 AND + IF message is equal to a £400 xmas reward is waiting for you! our computer has randomly picked you from our loyal mobile customers to receive a £400 reward. just call 09066380611 THEN + type = spam + IF message is not equal to a £400 xmas reward is waiting for you! our computer has randomly picked you from our loyal mobile customers to receive a £400 reward. just call 09066380611 AND + IF message is equal to money!!! you r a lucky winner ! 2 claim your prize text money 2 88600 over £1million to give away ! ppt150x3+normal text rate box403 w1t1jy THEN + type = spam + IF message is not equal to money!!! you r a lucky winner ! 2 claim your prize text money 2 88600 over £1million to give away ! ppt150x3+normal text rate box403 w1t1jy AND + IF message is equal to dear matthew please call 09063440451 from a landline, your complimentary 4*lux tenerife holiday or £1000 cash await collection. ppm150 sae t&cs box334 sk38xh. THEN + type = spam + IF message is not equal to dear matthew please call 09063440451 from a landline, your complimentary 4*lux tenerife holiday or £1000 cash await collection. ppm150 sae t&cs box334 sk38xh. AND + IF message is equal to you are being contacted by our dating service by someone you know! to find out who it is, call from a land line 09050000928. pobox45w2tg150p THEN + type = spam + IF message is not equal to you are being contacted by our dating service by someone you know! to find out who it is, call from a land line 09050000928. pobox45w2tg150p AND + IF message is equal to sms services. for your inclusive text credits, pls goto www.comuk.net login= 3qxj9 unsubscribe with stop, no extra charge. help 08702840625.comuk. 220-cm2 9ae THEN + type = spam + IF message is not equal to sms services. for your inclusive text credits, pls goto www.comuk.net login= 3qxj9 unsubscribe with stop, no extra charge. help 08702840625.comuk. 220-cm2 9ae AND + IF message is equal to get your garden ready for summer with a free selection of summer bulbs and seeds worth £33:50 only with the scotsman this saturday. to stop go2 notxt.co.uk THEN + type = spam + IF message is not equal to get your garden ready for summer with a free selection of summer bulbs and seeds worth £33:50 only with the scotsman this saturday. to stop go2 notxt.co.uk AND + IF message is equal to all the lastest from stereophonics, marley, dizzee racal, libertines and the strokes! win nookii games with flirt!! click themob wap bookmark or text wap to 82468 THEN + type = spam + IF message is not equal to all the lastest from stereophonics, marley, dizzee racal, libertines and the strokes! win nookii games with flirt!! click themob wap bookmark or text wap to 82468 AND + IF message is equal to private! your 2003 account statement for shows 800 un-redeemed s.i.m. points. call 08718738001 identifier code: 49557 expires 26/11/04 THEN + type = spam + IF message is not equal to private! your 2003 account statement for shows 800 un-redeemed s.i.m. points. call 08718738001 identifier code: 49557 expires 26/11/04 AND + IF message is equal to urgent! last weekend's draw shows that you have won £1000 cash or a spanish holiday! call now 09050000332 to claim. t&c: rstm, sw7 3ss. 150ppm THEN + type = spam + IF message is not equal to urgent! last weekend's draw shows that you have won £1000 cash or a spanish holiday! call now 09050000332 to claim. t&c: rstm, sw7 3ss. 150ppm AND + IF message is equal to this message is free. welcome to the new & improved sex & dogging club! to unsubscribe from this service reply stop. msgs@150p 18 only THEN + type = spam + IF message is not equal to this message is free. welcome to the new & improved sex & dogging club! to unsubscribe from this service reply stop. msgs@150p 18 only AND + IF message is equal to back 2 work 2morro half term over! can u c me 2nite 4 some sexy passion b4 i have 2 go back? chat now 09099726481 luv dena calls £1/minmobsmorelkpobox177hp51fl THEN + type = spam + IF message is not equal to back 2 work 2morro half term over! can u c me 2nite 4 some sexy passion b4 i have 2 go back? chat now 09099726481 luv dena calls £1/minmobsmorelkpobox177hp51fl AND + IF message is equal to the current leading bid is 151. to pause this auction send out. customer care: 08718726270 THEN + type = spam + IF message is not equal to the current leading bid is 151. to pause this auction send out. customer care: 08718726270 AND + IF message is equal to filthy stories and girls waiting for your THEN + type = spam + IF message is not equal to filthy stories and girls waiting for your AND + IF message is equal to download as many ringtones as u like no restrictions, 1000s 2 choose. u can even send 2 yr buddys. txt sir to 80082 £3 THEN + type = spam + IF message is not equal to download as many ringtones as u like no restrictions, 1000s 2 choose. u can even send 2 yr buddys. txt sir to 80082 £3 AND + IF message is equal to asked 3mobile if 0870 chatlines inclu in free mins. india cust servs sed yes. l8er got mega bill. 3 dont giv a shit. bailiff due in days. i o £250 3 want £800 THEN + type = spam + IF message is not equal to asked 3mobile if 0870 chatlines inclu in free mins. india cust servs sed yes. l8er got mega bill. 3 dont giv a shit. bailiff due in days. i o £250 3 want £800 AND + IF message is equal to dating:i have had two of these. only started after i sent a text to talk sport radio last week. any connection do you think or coincidence? THEN + type = spam + IF message is not equal to dating:i have had two of these. only started after i sent a text to talk sport radio last week. any connection do you think or coincidence? AND + IF message is equal to text & meet someone sexy today. u can find a date or even flirt its up to u. join 4 just 10p. reply with name & age eg sam 25. 18 -msg recd@thirtyeight pence THEN + type = spam + IF message is not equal to text & meet someone sexy today. u can find a date or even flirt its up to u. join 4 just 10p. reply with name & age eg sam 25. 18 -msg recd@thirtyeight pence AND + IF message is equal to camera - you are awarded a sipix digital camera! call 09061221066 fromm landline. delivery within 28 days. THEN + type = spam + IF message is not equal to camera - you are awarded a sipix digital camera! call 09061221066 fromm landline. delivery within 28 days. AND + IF message is equal to thanks for your ringtone order, ref number k718. your mobile will be charged £4.50. should your tone not arrive please call customer services on 09065069120 THEN + type = spam + IF message is not equal to thanks for your ringtone order, ref number k718. your mobile will be charged £4.50. should your tone not arrive please call customer services on 09065069120 AND + IF message is equal to six chances to win cash! from 100 to 20,000 pounds txt> csh11 and send to 87575. cost 150p/day, 6days, 16+ tsandcs apply reply hl 4 info THEN + type = spam + IF message is not equal to six chances to win cash! from 100 to 20,000 pounds txt> csh11 and send to 87575. cost 150p/day, 6days, 16+ tsandcs apply reply hl 4 info AND + IF message is equal to win a year supply of cds 4 a store of ur choice worth £500 & enter our £100 weekly draw txt music to 87066 ts&cs www.ldew.com.subs16+1win150ppmx3 THEN + type = spam + IF message is not equal to win a year supply of cds 4 a store of ur choice worth £500 & enter our £100 weekly draw txt music to 87066 ts&cs www.ldew.com.subs16+1win150ppmx3 AND + IF message is equal to 500 new mobiles from 2004, must go! txt: nokia to no: 89545 & collect yours today!from only £1 www.4-tc.biz 2optout 087187262701.50gbp/mtmsg18 THEN + type = spam + IF message is not equal to 500 new mobiles from 2004, must go! txt: nokia to no: 89545 & collect yours today!from only £1 www.4-tc.biz 2optout 087187262701.50gbp/mtmsg18 AND + IF message is equal to you have 1 new message. call 0207-083-6089 THEN + type = spam + IF message is not equal to you have 1 new message. call 0207-083-6089 AND + IF message is equal to had your mobile 11mths ? update for free to oranges latest colour camera mobiles & unlimited weekend calls. call mobile upd8 on freefone 08000839402 or 2stoptx THEN + type = spam + IF message is not equal to had your mobile 11mths ? update for free to oranges latest colour camera mobiles & unlimited weekend calls. call mobile upd8 on freefone 08000839402 or 2stoptx AND + IF message is equal to freemsg hey there darling it's been 3 week's now and no word back! i'd like some fun you up for it still? tb ok! xxx std chgs to send, £1.50 to rcv THEN + type = spam + IF message is not equal to freemsg hey there darling it's been 3 week's now and no word back! i'd like some fun you up for it still? tb ok! xxx std chgs to send, £1.50 to rcv AND + IF message is equal to u r subscribed 2 textcomp 250 wkly comp. 1st wk?s free question follows, subsequent wks charged@150p/msg.2 unsubscribe txt stop 2 84128,custcare 08712405020 THEN + type = spam + IF message is not equal to u r subscribed 2 textcomp 250 wkly comp. 1st wk?s free question follows, subsequent wks charged@150p/msg.2 unsubscribe txt stop 2 84128,custcare 08712405020 AND + IF message is equal to shop till u drop, is it you, either 10k, 5k, £500 cash or £100 travel voucher, call now, 09064011000. ntt po box cr01327bt fixedline cost 150ppm mobile vary THEN + type = spam + IF message is not equal to shop till u drop, is it you, either 10k, 5k, £500 cash or £100 travel voucher, call now, 09064011000. ntt po box cr01327bt fixedline cost 150ppm mobile vary THEN + type = ham \ No newline at end of file diff --git a/data/model/if_then_rules_spam_textanalysis_6.txt b/data/model/if_then_rules_spam_textanalysis_6.txt new file mode 100644 index 00000000..2ca05075 --- /dev/null +++ b/data/model/if_then_rules_spam_textanalysis_6.txt @@ -0,0 +1,130 @@ +IF message contains call AND + IF message contains mobile THEN + type = spam + IF message does not contain mobile AND + IF message contains claim THEN + type = spam + IF message does not contain claim AND + IF message contains text THEN + type = spam + IF message does not contain text AND + IF message contains landline THEN + type = spam + IF message does not contain landline AND + IF message contains FREE THEN + type = spam + IF message does not contain FREE AND + IF message contains PRIVATE THEN + type = spam + IF message does not contain PRIVATE AND + IF message contains message AND + IF message contains please THEN + type = spam + IF message does not contain please THEN + type = ham + IF message does not contain message AND + IF message contains land THEN + type = spam + IF message does not contain land AND + IF message contains SK38XH THEN + type = spam + IF message does not contain SK38XH AND + IF message contains time AND + IF message contains DARLIN THEN + type = ham + IF message does not contain DARLIN THEN + type = spam + IF message does not contain time AND + IF message contains visit THEN + type = spam + IF message does not contain visit AND + IF message contains 2NITE THEN + type = spam + IF message does not contain 2NITE THEN + type = ham + IF message does not contain call AND + IF message contains FREE AND + IF message contains ü THEN + type = ham + IF message does not contain ü AND + IF message contains il THEN + type = ham + IF message does not contain il AND + IF message contains oso THEN + type = ham + IF message does not contain oso AND + IF message contains Hey THEN + type = ham + IF message does not contain Hey AND + IF message contains txt THEN + type = spam + IF message does not contain txt AND + IF message contains text THEN + type = spam + IF message does not contain text AND + IF message contains Co THEN + type = spam + IF message does not contain Co THEN + type = spam + IF message does not contain FREE AND + IF message contains txt AND + IF message contains time THEN + type = ham + IF message does not contain time THEN + type = spam + IF message does not contain txt AND + IF message contains text AND + IF message contains time THEN + type = ham + IF message does not contain time AND + IF message contains phone THEN + type = ham + IF message does not contain phone AND + IF message contains DARLIN THEN + type = ham + IF message does not contain DARLIN AND + IF message contains Yes THEN + type = ham + IF message does not contain Yes AND + IF message contains shit THEN + type = ham + IF message does not contain shit THEN + type = spam + IF message does not contain text AND + IF message contains currently THEN + type = spam + IF message does not contain currently AND + IF message contains Cost AND + IF message contains 1000s THEN + type = spam + IF message does not contain 1000s AND + IF message contains POBOX THEN + type = spam + IF message does not contain POBOX THEN + type = ham + IF message does not contain Cost AND + IF message contains 50 THEN + type = spam + IF message does not contain 50 AND + IF message contains rate THEN + type = spam + IF message does not contain rate AND + IF message contains http THEN + type = spam + IF message does not contain http AND + IF message contains lost AND + IF message contains help THEN + type = spam + IF message does not contain help THEN + type = ham + IF message does not contain lost AND + IF message contains STATION AND + IF message contains gas THEN + type = ham + IF message does not contain gas THEN + type = spam + IF message does not contain STATION AND + IF message contains story THEN + type = ham + IF message does not contain story THEN + type = ham \ No newline at end of file diff --git a/data/model/if_then_rules_tiny_kdd.txt b/data/model/if_then_rules_tiny_kdd.txt new file mode 100644 index 00000000..6dee16a7 --- /dev/null +++ b/data/model/if_then_rules_tiny_kdd.txt @@ -0,0 +1,31 @@ +IF src_bytes > 315 AND + IF dst_host_same_src_port_rate > 0.75 THEN + dst_host_srv_serror_rate = 0.01 + IF dst_host_same_src_port_rate <= 0.75 AND + IF src_bytes > 325 THEN + dst_host_srv_serror_rate = 0 + IF src_bytes <= 325 AND + IF count > 10 THEN + dst_host_srv_serror_rate = 0.01 + IF count <= 10 AND + IF src_bytes > 323 THEN + dst_host_srv_serror_rate = 0.01 + IF src_bytes <= 323 THEN + dst_host_srv_serror_rate = 0 + IF src_bytes <= 315 AND + IF dst_bytes > 25045 AND + IF dst_host_count > 8 THEN + dst_host_srv_serror_rate = 0 + IF dst_host_count <= 8 THEN + dst_host_srv_serror_rate = 0.01 + IF dst_bytes <= 25045 AND + IF src_bytes > 167 THEN + dst_host_srv_serror_rate = 6e-05 + IF src_bytes <= 167 AND + IF src_bytes > 161 AND + IF dst_host_count > 97 THEN + dst_host_srv_serror_rate = 0 + IF dst_host_count <= 97 THEN + dst_host_srv_serror_rate = 0.01 + IF src_bytes <= 161 THEN + dst_host_srv_serror_rate = 0 \ No newline at end of file diff --git a/data/model/iris.json b/data/model/iris.json new file mode 100644 index 00000000..a7f4b9ff --- /dev/null +++ b/data/model/iris.json @@ -0,0 +1 @@ +{"code": 200, "resource": "model/5f44e5b70d052e69e6000ab0", "location": "https://bigml.io/andromeda/model/5f44e5b70d052e69e6000ab0", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2020-08-25T10:19:35.829000", "creator": "mmartin", "credits": 0, "credits_per_prediction": 0.0, "dataset": "dataset/5f29a563529963736c0116e9", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": false, "ensemble_id": "", "ensemble_index": 0, "excluded_fields": [], "fields_meta": {"count": 5, "limit": -1, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000002", "000003"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 54], ["Iris-virginica", 46]]}, "training": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]]}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "order": 3, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000002", 0.70392], ["000003", 0.29608]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "preferred": true}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "preferred": true}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "preferred": true, "term_analysis": {"enabled": true}}}, "node_threshold": 5, "root": {"children": [{"children": [{"confidence": 0.88664, "count": 46, "id": 2, "objective_summary": {"categories": [["Iris-virginica", 45], ["Iris-versicolor", 1]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 1.75}}, {"confidence": 0.8009, "count": 54, "id": 3, "objective_summary": {"categories": [["Iris-versicolor", 49], ["Iris-virginica", 5]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": "<=", "value": 1.75}}], "confidence": 0.40383, "count": 100, "id": 1, "objective_summary": {"categories": [["Iris-versicolor", 50], ["Iris-virginica", 50]]}, "output": "Iris-versicolor", "predicate": {"field": "000002", "operator": ">", "value": 2.45}}, {"confidence": 0.92865, "count": 50, "id": 4, "objective_summary": {"categories": [["Iris-setosa", 50]]}, "output": "Iris-setosa", "predicate": {"field": "000002", "operator": "<=", "value": 2.45}}], "confidence": 0.26289, "count": 150, "id": 0, "objective_summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]]}, "output": "Iris-setosa", "predicate": true}}, "name": "classification", "name_options": "5-node, pruned, deterministic order", "node_threshold": 5, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "000004", "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["000004"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": null, "randomize": false, "range": [1, 150], "replacement": false, "resource": "model/5f44e5b70d052e69e6000ab0", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4608, "source": "source/5f29a560529963736c0116e6", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 773, "message": "The model has been created", "progress": 1}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2020-08-25T10:19:46.420000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/data/model/list_fields.txt b/data/model/list_fields.txt new file mode 100644 index 00000000..e58c097a --- /dev/null +++ b/data/model/list_fields.txt @@ -0,0 +1,3 @@ + +[petal length : numeric] +[petal width : numeric] diff --git a/data/model/predictions_distribution_diabetes.txt b/data/model/predictions_distribution_diabetes.txt new file mode 100644 index 00000000..e90b70a5 --- /dev/null +++ b/data/model/predictions_distribution_diabetes.txt @@ -0,0 +1,2 @@ +[false,119] +[true,81] \ No newline at end of file diff --git a/data/model/predictions_distribution_grades.txt b/data/model/predictions_distribution_grades.txt new file mode 100644 index 00000000..f718b293 --- /dev/null +++ b/data/model/predictions_distribution_grades.txt @@ -0,0 +1,65 @@ +[28.06,1] +[34.44,1] +[35.83,1] +[36.11,1] +[39.72,1] +[43.33,1] +[45.56,2] +[46.67,1] +[47.78,2] +[48.89,2] +[49.17,1] +[49.44,1] +[50,2] +[50.83,2] +[51.67,1] +[52.5,1] +[53.33,1] +[55,1] +[55.83,1] +[56.39,1] +[57.78,1] +[58.33,1] +[60.29,1] +[60.56,1] +[61.39,1] +[62.5,1] +[63.33,2] +[63.61,1] +[63.89,1] +[64.72,1] +[65.28,2] +[65.56,1] +[66.11,1] +[66.67,1] +[67.22,1] +[68.06,1] +[68.33,2] +[72.22,2] +[73.89,1] +[75.56,1] +[77.5,1] +[78.89,2] +[80,1] +[80.56,1] +[82.22,1] +[83.06,1] +[83.33,1] +[85,1] +[85.56,1] +[87.22,1] +[88.89,2] +[89.17,1] +[90,2] +[90.83,1] +[91.11,1] +[92.22,1] +[92.78,1] +[94.44,1] +[95,1] +[99.17,1] +[101.11,1] +[102.22,1] +[102.78,1] +[107.78,1] +[108.89,1] \ No newline at end of file diff --git a/data/model/predictions_distribution_iris.txt b/data/model/predictions_distribution_iris.txt new file mode 100644 index 00000000..ee958067 --- /dev/null +++ b/data/model/predictions_distribution_iris.txt @@ -0,0 +1,3 @@ +[Iris-setosa,50] +[Iris-versicolor,50] +[Iris-virginica,50] \ No newline at end of file diff --git a/data/model/predictions_distribution_iris_missing2.txt b/data/model/predictions_distribution_iris_missing2.txt new file mode 100644 index 00000000..bf37e3a3 --- /dev/null +++ b/data/model/predictions_distribution_iris_missing2.txt @@ -0,0 +1,3 @@ +[Iris-setosa,16] +[Iris-versicolor,14] +[Iris-virginica,17] \ No newline at end of file diff --git a/data/model/predictions_distribution_iris_sp_chars.txt b/data/model/predictions_distribution_iris_sp_chars.txt new file mode 100644 index 00000000..ee958067 --- /dev/null +++ b/data/model/predictions_distribution_iris_sp_chars.txt @@ -0,0 +1,3 @@ +[Iris-setosa,50] +[Iris-versicolor,50] +[Iris-virginica,50] \ No newline at end of file diff --git a/data/model/predictions_distribution_spam.txt b/data/model/predictions_distribution_spam.txt new file mode 100644 index 00000000..61384b9c --- /dev/null +++ b/data/model/predictions_distribution_spam.txt @@ -0,0 +1,2 @@ +[ham,577] +[spam,79] \ No newline at end of file diff --git a/data/model/predictions_distribution_tiny_kdd.txt b/data/model/predictions_distribution_tiny_kdd.txt new file mode 100644 index 00000000..b006bdf5 --- /dev/null +++ b/data/model/predictions_distribution_tiny_kdd.txt @@ -0,0 +1,3 @@ +[0,38] +[6e-05,157] +[0.01,5] \ No newline at end of file diff --git a/data/model/rdistribution_iris.txt b/data/model/rdistribution_iris.txt new file mode 100644 index 00000000..41258bcf --- /dev/null +++ b/data/model/rdistribution_iris.txt @@ -0,0 +1,22 @@ + 0.1: 3.33% (5 instances) + 0.2: 19.33% (29 instances) + 0.3: 4.67% (7 instances) + 0.4: 4.67% (7 instances) + 0.5: 0.67% (1 instance) + 0.6: 0.67% (1 instance) + 1: 4.67% (7 instances) + 1.1: 2.00% (3 instances) + 1.2: 3.33% (5 instances) + 1.3: 8.67% (13 instances) + 1.4: 5.33% (8 instances) + 1.5: 8.00% (12 instances) + 1.6: 2.67% (4 instances) + 1.7: 1.33% (2 instances) + 1.8: 8.00% (12 instances) + 1.9: 3.33% (5 instances) + 2: 4.00% (6 instances) + 2.1: 4.00% (6 instances) + 2.2: 2.00% (3 instances) + 2.3: 5.33% (8 instances) + 2.4: 2.00% (3 instances) + 2.5: 2.00% (3 instances) diff --git a/data/model/regression.json b/data/model/regression.json new file mode 100644 index 00000000..95dcc634 --- /dev/null +++ b/data/model/regression.json @@ -0,0 +1 @@ +{"code": 200, "resource": "model/5f44e603cb4f9665eb000562", "location": "https://bigml.io/andromeda/model/5f44e603cb4f9665eb000562", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2020-08-25T10:20:51.985000", "creator": "mmartin", "credits": 0, "credits_per_prediction": 0.0, "dataset": "dataset/5f29a563529963736c0116e9", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": false, "ensemble_id": "", "ensemble_index": 0, "excluded_fields": [], "fields_meta": {"count": 5, "limit": -1, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000002", "000004"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"counts": [[0.246, 50], [1.326, 50], [2.026, 50]]}, "training": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "maximum": 2.5, "median": 1.3, "minimum": 0.1}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "order": 3, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000004", 1]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "preferred": true}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "preferred": true, "term_analysis": {"enabled": true}}}, "node_threshold": 5, "root": {"children": [{"confidence": 0.1598, "count": 50, "id": 1, "objective_summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1]], "exact_histogram": {"populations": [5, 29, 7, 7, 1, 1], "start": 0.1, "width": 0.1}, "maximum": 0.6, "median": 0.2, "minimum": 0.1}, "output": 0.246, "predicate": {"field": "000004", "operator": "=", "value": "Iris-setosa"}}, {"children": [{"confidence": 0.41645, "count": 50, "id": 3, "objective_summary": {"counts": [[1.4, 1], [1.5, 2], [1.6, 1], [1.7, 1], [1.8, 11], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [3, 2, 16, 12, 11, 6], "start": 1.4, "width": 0.2}, "maximum": 2.5, "median": 2, "minimum": 1.4}, "output": 2.026, "predicate": {"field": "000004", "operator": "=", "value": "Iris-virginica"}}, {"confidence": 0.29985, "count": 50, "id": 4, "objective_summary": {"counts": [[1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 7], [1.5, 10], [1.6, 3], [1.7, 1], [1.8, 1]], "exact_histogram": {"populations": [7, 3, 5, 13, 7, 10, 3, 1, 1], "start": 1, "width": 0.1}, "maximum": 1.8, "median": 1.3, "minimum": 1}, "output": 1.326, "predicate": {"field": "000004", "operator": "!=", "value": "Iris-virginica"}}], "confidence": 0.5726, "count": 100, "id": 2, "objective_summary": {"counts": [[1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [7, 3, 5, 13, 8, 12, 4, 2, 12, 5, 6, 6, 3, 8, 3, 3], "start": 1, "width": 0.1}, "maximum": 2.5, "median": 1.6, "minimum": 1}, "output": 1.676, "predicate": {"field": "000004", "operator": "!=", "value": "Iris-setosa"}}], "confidence": 0.97442, "count": 150, "id": 0, "objective_summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "maximum": 2.5, "median": 1.3, "minimum": 0.1}, "output": 1.19933, "predicate": true}}, "name": "regression", "name_options": "5-node, pruned, deterministic order", "node_threshold": 5, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "000003", "objective_field_name": "petal width", "objective_field_type": "numeric", "objective_fields": ["000003"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": null, "randomize": false, "range": [1, 150], "replacement": false, "resource": "model/5f44e603cb4f9665eb000562", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4608, "source": "source/5f29a560529963736c0116e6", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 809, "message": "The model has been created", "progress": 1}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2020-08-25T10:20:53.143000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/data/model/rlist_fields.txt b/data/model/rlist_fields.txt new file mode 100644 index 00000000..93d0b61f --- /dev/null +++ b/data/model/rlist_fields.txt @@ -0,0 +1,2 @@ + +[species : categorical] diff --git a/data/model/rtree_csv.txt b/data/model/rtree_csv.txt new file mode 100644 index 00000000..50693454 --- /dev/null +++ b/data/model/rtree_csv.txt @@ -0,0 +1 @@ +[["petal width", "error", "bin0_value", "bin0_instances", "bin1_value", "bin1_instances", "bin2_value", "bin2_instances", "bin3_value", "bin3_instances", "bin4_value", "bin4_instances", "bin5_value", "bin5_instances", "bin6_value", "bin6_instances", "bin7_value", "bin7_instances", "bin8_value", "bin8_instances", "bin9_value", "bin9_instances", "bin10_value", "bin10_instances", "bin11_value", "bin11_instances", "bin12_value", "bin12_instances", "bin13_value", "bin13_instances", "bin14_value", "bin14_instances", "bin15_value", "bin15_instances", "bin16_value", "bin16_instances", "bin17_value", "bin17_instances", "bin18_value", "bin18_instances", "bin19_value", "bin19_instances", "bin20_value", "bin20_instances", "bin21_value", "bin21_instances"], [1.19933, 0.97442, 0.1, 5, 0.2, 29, 0.3, 7, 0.4, 7, 0.5, 1, 0.6, 1, 1, 7, 1.1, 3, 1.2, 5, 1.3, 13, 1.4, 8, 1.5, 12, 1.6, 4, 1.7, 2, 1.8, 12, 1.9, 5, 2, 6, 2.1, 6, 2.2, 3, 2.3, 8, 2.4, 3, 2.5, 3], [0.246, 0.1598, 0.1, 5, 0.2, 29, 0.3, 7, 0.4, 7, 0.5, 1, 0.6, 1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], [1.676, 0.5726, 1, 7, 1.1, 3, 1.2, 5, 1.3, 13, 1.4, 8, 1.5, 12, 1.6, 4, 1.7, 2, 1.8, 12, 1.9, 5, 2, 6, 2.1, 6, 2.2, 3, 2.3, 8, 2.4, 3, 2.5, 3, null, null, null, null, null, null, null, null, null, null, null, null], [2.026, 0.41645, 1.4, 1, 1.5, 2, 1.6, 1, 1.7, 1, 1.8, 11, 1.9, 5, 2, 6, 2.1, 6, 2.2, 3, 2.3, 8, 2.4, 3, 2.5, 3, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], [1.326, 0.29985, 1, 7, 1.1, 3, 1.2, 5, 1.3, 13, 1.4, 7, 1.5, 10, 1.6, 3, 1.7, 1, 1.8, 1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null]] \ No newline at end of file diff --git a/data/model/summarize_diabetes.txt b/data/model/summarize_diabetes.txt new file mode 100644 index 00000000..0e765e35 --- /dev/null +++ b/data/model/summarize_diabetes.txt @@ -0,0 +1,71 @@ +Data distribution: + false: 59.50% (119 instances) + true: 40.50% (81 instances) + + +Predicted distribution: + false: 59.50% (119 instances) + true: 40.50% (81 instances) + + +Field importance: + 1. plasma glucose: 27.58% + 2. diabetes pedigree: 22.39% + 3. bmi: 17.37% + 4. age: 15.75% + 5. blood pressure: 12.52% + 6. pregnancies: 2.01% + 7. insulin: 1.72% + 8. triceps skin thickness: 0.67% + + +Rules summary: + +false : (data 59.50% / prediction 59.50%) + · 25.21%: plasma glucose <= 123 and bmi <= 27.075 and pregnancies <= 9 [Confidence: 88.65%] + · 10.92%: plasma glucose <= 123 and bmi > 27.075 and age <= 22 and pregnancies <= 5 [Confidence: 77.19%] + · 8.40%: 91 < plasma glucose <= 123 and bmi > 27.075 and 22 < age <= 30 and blood pressure <= 74 and 0.1305 < diabetes pedigree <= 0.6325 [Confidence: 72.25%] + · 7.56%: 123 < plasma glucose <= 166 and bmi <= 40.81667 and age <= 24 and diabetes pedigree <= 0.8395 [Confidence: 70.08%] + · 6.72%: plasma glucose <= 123 and 35.75 < bmi <= 42.8 and age > 22 and 74 < blood pressure <= 90 and diabetes pedigree <= 0.6325 [Confidence: 67.56%] + · 4.20%: plasma glucose <= 123 and 27.075 < bmi <= 35.75 and age > 22 and 74 < blood pressure <= 90 and diabetes pedigree <= 0.6325 and triceps skin thickness <= 23 [Confidence: 56.55%] + · 4.20%: plasma glucose <= 123 and bmi > 27.075 and age > 22 and blood pressure > 90 [Confidence: 56.55%] + · 4.20%: 123 < plasma glucose <= 166 and bmi <= 40.81667 and 24 < age <= 40 and 0.367 < diabetes pedigree <= 0.8395 and blood pressure <= 79 [Confidence: 56.55%] + · 4.20%: 123 < plasma glucose <= 166 and bmi <= 40.81667 and 24 < age <= 40 and diabetes pedigree <= 0.8395 and blood pressure > 82 [Confidence: 56.55%] + · 4.20%: 123 < plasma glucose <= 166 and bmi <= 40.81667 and age > 40 and diabetes pedigree > 0.629 and blood pressure <= 88 [Confidence: 56.55%] + · 3.36%: plasma glucose <= 123 and bmi > 27.075 and age > 22 and blood pressure <= 90 and 0.8765 < diabetes pedigree <= 1.194 [Confidence: 51.01%] + · 2.52%: plasma glucose <= 99 and bmi > 27.075 and age > 30 and blood pressure <= 74 and diabetes pedigree <= 0.6325 [Confidence: 43.85%] + · 2.52%: plasma glucose <= 123 and 27.075 < bmi <= 35.75 and age > 22 and 74 < blood pressure <= 90 and diabetes pedigree <= 0.6325 and triceps skin thickness > 23 and insulin > 52 [Confidence: 43.85%] + · 2.52%: 123 < plasma glucose <= 166 and bmi <= 40.81667 and age > 40 and diabetes pedigree <= 0.3265 and insulin <= 112 and pregnancies > 9 [Confidence: 43.85%] + · 1.68%: 99 < plasma glucose <= 123 and bmi > 28.5 and age > 30 and blood pressure <= 74 and diabetes pedigree <= 0.214 [Confidence: 34.24%] + · 1.68%: 156 < plasma glucose <= 166 and bmi <= 40.81667 and 24 < age <= 40 and diabetes pedigree <= 0.367 and blood pressure <= 79 [Confidence: 34.24%] + · 0.84%: plasma glucose <= 123 and 11.55 < bmi <= 27.075 and pregnancies > 9 [Confidence: 20.65%] + · 0.84%: plasma glucose <= 123 and 27.075 < bmi <= 28.3 and age > 22 and 74 < blood pressure <= 90 and diabetes pedigree <= 0.6325 and triceps skin thickness > 23 and insulin <= 52 [Confidence: 20.65%] + · 0.84%: 123 < plasma glucose <= 166 and 38.55 < bmi <= 40.81667 and age <= 40 and diabetes pedigree > 0.8395 [Confidence: 20.65%] + · 0.84%: 123 < plasma glucose <= 166 and bmi <= 31.85 and 40 < age <= 47 and diabetes pedigree <= 0.3265 and insulin <= 112 and pregnancies <= 9 [Confidence: 20.65%] + · 0.84%: 123 < plasma glucose <= 166 and 31.85 < bmi <= 40.81667 and 40 < age <= 47 and diabetes pedigree <= 0.3265 and insulin <= 112 and pregnancies <= 9 and blood pressure <= 38 [Confidence: 20.65%] + · 0.84%: plasma glucose > 166 and bmi <= 23.1 [Confidence: 20.65%] + · 0.84%: plasma glucose > 166 and bmi > 23.1 and blood pressure <= 65 and diabetes pedigree <= 0.3345 [Confidence: 20.65%] + + +true : (data 40.50% / prediction 40.50%) + · 23.46%: plasma glucose > 166 and bmi > 23.1 and blood pressure > 65 [Confidence: 83.18%] + · 9.88%: 99 < plasma glucose <= 123 and bmi > 27.075 and age > 30 and blood pressure <= 74 and 0.214 < diabetes pedigree <= 0.6325 [Confidence: 67.56%] + · 9.88%: 123 < plasma glucose <= 166 and bmi > 40.81667 [Confidence: 67.56%] + · 8.64%: 123 < plasma glucose <= 166 and bmi <= 40.81667 and age > 40 and 0.3265 < diabetes pedigree <= 0.629 [Confidence: 64.57%] + · 6.17%: plasma glucose <= 123 and bmi > 27.075 and age > 22 and blood pressure <= 90 and 0.6325 < diabetes pedigree <= 0.8765 [Confidence: 56.55%] + · 4.94%: 123 < plasma glucose <= 166 and bmi <= 40.81667 and age > 47 and diabetes pedigree <= 0.3265 and insulin <= 112 and pregnancies <= 9 [Confidence: 51.01%] + · 4.94%: 123 < plasma glucose <= 166 and bmi <= 40.81667 and age > 40 and diabetes pedigree <= 0.3265 and insulin > 112 [Confidence: 51.01%] + · 4.94%: plasma glucose > 166 and bmi > 23.1 and blood pressure <= 65 and diabetes pedigree > 0.3345 [Confidence: 51.01%] + · 3.70%: 123 < plasma glucose <= 156 and bmi <= 40.81667 and 24 < age <= 40 and diabetes pedigree <= 0.367 and blood pressure <= 79 [Confidence: 43.85%] + · 3.70%: 123 < plasma glucose <= 166 and bmi <= 38.55 and age <= 40 and diabetes pedigree > 0.8395 [Confidence: 43.85%] + · 2.47%: plasma glucose <= 91 and bmi > 27.075 and 22 < age <= 30 and blood pressure <= 74 and diabetes pedigree <= 0.6325 [Confidence: 34.24%] + · 2.47%: plasma glucose <= 123 and 28.3 < bmi <= 35.75 and age > 22 and 74 < blood pressure <= 90 and diabetes pedigree <= 0.6325 and triceps skin thickness > 23 and insulin <= 52 [Confidence: 34.24%] + · 2.47%: plasma glucose <= 123 and bmi > 27.075 and age > 22 and blood pressure <= 90 and diabetes pedigree > 1.194 [Confidence: 34.24%] + · 2.47%: 123 < plasma glucose <= 166 and bmi <= 40.81667 and 24 < age <= 40 and diabetes pedigree <= 0.8395 and 79 < blood pressure <= 82 [Confidence: 34.24%] + · 2.47%: 123 < plasma glucose <= 166 and 31.85 < bmi <= 40.81667 and 40 < age <= 47 and diabetes pedigree <= 0.3265 and insulin <= 112 and pregnancies <= 9 and blood pressure > 38 [Confidence: 34.24%] + · 1.23%: plasma glucose <= 123 and bmi <= 11.55 and pregnancies > 9 [Confidence: 20.65%] + · 1.23%: plasma glucose <= 123 and bmi > 27.075 and age <= 22 and pregnancies > 5 [Confidence: 20.65%] + · 1.23%: 91 < plasma glucose <= 123 and bmi > 27.075 and 22 < age <= 30 and blood pressure <= 74 and diabetes pedigree <= 0.1305 [Confidence: 20.65%] + · 1.23%: 99 < plasma glucose <= 123 and 27.075 < bmi <= 28.5 and age > 30 and blood pressure <= 74 and diabetes pedigree <= 0.214 [Confidence: 20.65%] + · 1.23%: plasma glucose <= 123 and bmi > 42.8 and age > 22 and 74 < blood pressure <= 90 and diabetes pedigree <= 0.6325 [Confidence: 20.65%] + · 1.23%: 123 < plasma glucose <= 166 and bmi <= 40.81667 and age > 40 and diabetes pedigree > 0.629 and blood pressure > 88 [Confidence: 20.65%] \ No newline at end of file diff --git a/data/model/summarize_grades.txt b/data/model/summarize_grades.txt new file mode 100644 index 00000000..31405440 --- /dev/null +++ b/data/model/summarize_grades.txt @@ -0,0 +1,325 @@ +Data distribution: + 28.06: 1.30% (1 instance) + 34.44: 1.30% (1 instance) + 35.97: 2.60% (2 instances) + 39.72: 1.30% (1 instance) + 43.33: 1.30% (1 instance) + 45.56: 2.60% (2 instances) + 47.41: 3.90% (3 instances) + 49.39833: 7.79% (6 instances) + 50.83: 2.60% (2 instances) + 52.5: 3.90% (3 instances) + 55.8325: 5.19% (4 instances) + 58.055: 2.60% (2 instances) + 60.975: 2.60% (2 instances) + 63.332: 6.49% (5 instances) + 65.21: 5.19% (4 instances) + 66.66667: 3.90% (3 instances) + 68.24: 3.90% (3 instances) + 72.22: 2.60% (2 instances) + 73.89: 1.30% (1 instance) + 75.56: 1.30% (1 instance) + 77.5: 1.30% (1 instance) + 79.585: 5.19% (4 instances) + 82.87: 3.90% (3 instances) + 85.28: 2.60% (2 instances) + 87.22: 1.30% (1 instance) + 89.39: 6.49% (5 instances) + 90.97: 2.60% (2 instances) + 92.5: 2.60% (2 instances) + 94.72: 2.60% (2 instances) + 99.17: 1.30% (1 instance) + 102.03667: 3.90% (3 instances) + 108.335: 2.60% (2 instances) + + +Predicted distribution: + 28.06: 1.30% (1 instance) + 34.44: 1.30% (1 instance) + 35.83: 1.30% (1 instance) + 36.11: 1.30% (1 instance) + 39.72: 1.30% (1 instance) + 43.33: 1.30% (1 instance) + 45.56: 2.60% (2 instances) + 46.67: 1.30% (1 instance) + 47.78: 2.60% (2 instances) + 48.89: 2.60% (2 instances) + 49.17: 1.30% (1 instance) + 49.44: 1.30% (1 instance) + 50: 2.60% (2 instances) + 50.83: 2.60% (2 instances) + 51.67: 1.30% (1 instance) + 52.5: 1.30% (1 instance) + 53.33: 1.30% (1 instance) + 55: 1.30% (1 instance) + 55.83: 1.30% (1 instance) + 56.39: 1.30% (1 instance) + 57.78: 1.30% (1 instance) + 58.33: 1.30% (1 instance) + 60.29: 1.30% (1 instance) + 60.56: 1.30% (1 instance) + 61.39: 1.30% (1 instance) + 62.5: 1.30% (1 instance) + 63.33: 2.60% (2 instances) + 63.61: 1.30% (1 instance) + 63.89: 1.30% (1 instance) + 64.72: 1.30% (1 instance) + 65.28: 2.60% (2 instances) + 65.56: 1.30% (1 instance) + 66.11: 1.30% (1 instance) + 66.67: 1.30% (1 instance) + 67.22: 1.30% (1 instance) + 68.06: 1.30% (1 instance) + 68.33: 2.60% (2 instances) + 72.22: 2.60% (2 instances) + 73.89: 1.30% (1 instance) + 75.56: 1.30% (1 instance) + 77.5: 1.30% (1 instance) + 78.89: 2.60% (2 instances) + 80: 1.30% (1 instance) + 80.56: 1.30% (1 instance) + 82.22: 1.30% (1 instance) + 83.06: 1.30% (1 instance) + 83.33: 1.30% (1 instance) + 85: 1.30% (1 instance) + 85.56: 1.30% (1 instance) + 87.22: 1.30% (1 instance) + 88.89: 2.60% (2 instances) + 89.17: 1.30% (1 instance) + 90: 2.60% (2 instances) + 90.83: 1.30% (1 instance) + 91.11: 1.30% (1 instance) + 92.22: 1.30% (1 instance) + 92.78: 1.30% (1 instance) + 94.44: 1.30% (1 instance) + 95: 1.30% (1 instance) + 99.17: 1.30% (1 instance) + 101.11: 1.30% (1 instance) + 102.22: 1.30% (1 instance) + 102.78: 1.30% (1 instance) + 107.78: 1.30% (1 instance) + 108.89: 1.30% (1 instance) + + +Field importance: + 1. Midterm: 76.70% + 2. Tutorial: 12.15% + 3. TakeHome: 7.95% + 4. Assignment: 3.20% + + +Rules summary: + +28.06 : (data 1.30% / prediction 1.30%) Midterm <= 39.69 and TakeHome > 92.685 [Error: 25.65806] + + +34.44 : (data 1.30% / prediction 1.30%) 39.065 < Midterm <= 39.69 and TakeHome <= 92.685 [Error: 19.25854] + + +35.83 : (data 0.00% / prediction 1.30%) 39.69 < Midterm <= 45.94 and TakeHome <= 31.875 [Error: 29.66014] + + +36.11 : (data 0.00% / prediction 1.30%) 45.94 < Midterm <= 75.47 and TakeHome <= 31.875 and Tutorial > 102.275 [Error: 31.384] + + +39.72 : (data 1.30% / prediction 1.30%) 74.06 < Midterm <= 75.47 and 31.875 < TakeHome <= 100.83 and Tutorial > 86.76 [Error: 17.70187] + + +43.33 : (data 1.30% / prediction 1.30%) 40.625 < Midterm <= 60 and 31.875 < TakeHome <= 100.83 and Tutorial > 91.245 and Assignment <= 74.06 [Error: 17.59392] + + +45.56 : (data 2.60% / prediction 2.60%) Midterm <= 75.47 + · 50.00%: Midterm <= 29.375 and TakeHome <= 92.685 and Tutorial <= 83.145 [Error: 11.69416] + · 50.00%: 40.625 < Midterm <= 75.47 and 31.875 < TakeHome <= 100.83 and 64.285 < Tutorial <= 67.71 and Assignment > 78.52 [Error: 11.69416] + + +46.67 : (data 0.00% / prediction 1.30%) 40.625 < Midterm <= 60 and 31.875 < TakeHome <= 100.83 and 86.76 < Tutorial <= 91.245 and Assignment <= 74.06 [Error: 17.59392] + + +47.78 : (data 0.00% / prediction 2.60%) Midterm <= 75.47 + · 50.00%: Midterm <= 29.375 and TakeHome <= 92.685 and Tutorial > 83.145 [Error: 11.69416] + · 50.00%: 40.625 < Midterm <= 75.47 and 31.875 < TakeHome <= 100.83 and 67.71 < Tutorial <= 75.42 and Assignment > 78.52 [Error: 11.69416] + + +48.89 : (data 0.00% / prediction 2.60%) Midterm <= 75.47 + · 50.00%: 29.375 < Midterm <= 39.065 and TakeHome <= 92.685 and 70.57 < Tutorial <= 92.845 [Error: 1.47494] + · 50.00%: 40.625 < Midterm <= 62.5 and 31.875 < TakeHome <= 100.83 and 86.76 < Tutorial <= 96.205 and Assignment > 82.395 [Error: 5.84708] + + +49.17 : (data 0.00% / prediction 1.30%) 29.375 < Midterm <= 39.065 and TakeHome <= 92.685 and Tutorial > 92.845 [Error: 1.47494] + + +49.44 : (data 0.00% / prediction 1.30%) 45.94 < Midterm <= 75.47 and TakeHome <= 31.875 and 97.39 < Tutorial <= 102.275 [Error: 7.32202] + + +50 : (data 0.00% / prediction 2.60%) 40.625 < Midterm <= 62.5 and 31.875 < TakeHome <= 100.83 and Tutorial > 86.76 and Assignment > 82.395 + · 50.00%: 40.625 < Midterm <= 62.5 and 31.875 < TakeHome <= 100.83 and 96.205 < Tutorial <= 97.105 and Assignment > 82.395 [Error: 5.84708] + · 50.00%: 55.625 < Midterm <= 62.5 and 31.875 < TakeHome <= 100.83 and Tutorial > 97.105 and Assignment > 82.395 [Error: 23.19691] + + +50.83 : (data 2.60% / prediction 2.60%) 39.69 < Midterm <= 75.47 + · 50.00%: 45.94 < Midterm <= 75.47 and TakeHome <= 31.875 and 91.09 < Tutorial <= 97.39 [Error: 7.32202] + · 50.00%: 40.625 < Midterm <= 75.47 and 31.875 < TakeHome <= 100.83 and Tutorial <= 64.285 and Assignment > 78.52 [Error: 13.44706] + + +51.67 : (data 0.00% / prediction 1.30%) 29.375 < Midterm <= 39.065 and TakeHome <= 92.685 and Tutorial <= 70.57 [Error: 7.77904] + + +52.5 : (data 3.90% / prediction 1.30%) 40.625 < Midterm <= 65.63 and 31.875 < TakeHome <= 100.83 and Tutorial <= 46.3 and Assignment <= 78.52 [Error: 17.54124] + + +53.33 : (data 0.00% / prediction 1.30%) 45.94 < Midterm <= 75.47 and TakeHome <= 31.875 and Tutorial <= 91.09 [Error: 10.01828] + + +55 : (data 0.00% / prediction 1.30%) 65.31 < Midterm <= 74.06 and 31.875 < TakeHome <= 100.83 and Tutorial > 88.065 and 82.395 < Assignment <= 91.175 [Error: 17.54124] + + +55.83 : (data 0.00% / prediction 1.30%) 65.63 < Midterm <= 75.47 and 31.875 < TakeHome <= 100.83 and Tutorial <= 46.3 and Assignment <= 78.52 [Error: 17.54124] + + +56.39 : (data 0.00% / prediction 1.30%) 39.69 < Midterm <= 75.47 and TakeHome > 100.83 and Tutorial <= 88.84 [Error: 43.87944] + + +57.78 : (data 0.00% / prediction 1.30%) 40.625 < Midterm <= 75.47 and 31.875 < TakeHome <= 100.83 and 82.51 < Tutorial <= 84.92 [Error: 24.86326] + + +58.33 : (data 0.00% / prediction 1.30%) 65.31 < Midterm <= 74.06 and 31.875 < TakeHome <= 100.83 and 86.76 < Tutorial <= 88.065 and 82.395 < Assignment <= 91.175 [Error: 17.54124] + + +60.29 : (data 0.00% / prediction 1.30%) 39.69 < Midterm <= 75.47 [Error: 19.31738] + + +60.56 : (data 0.00% / prediction 1.30%) 40.625 < Midterm <= 48.75 and 31.875 < TakeHome <= 100.83 and 97.105 < Tutorial <= 102.38 and Assignment > 82.395 [Error: 9.38728] + + +61.39 : (data 0.00% / prediction 1.30%) 60 < Midterm <= 74.06 and 31.875 < TakeHome <= 100.83 and Tutorial > 86.76 and Assignment <= 74.06 [Error: 48.83565] + + +62.5 : (data 0.00% / prediction 1.30%) 40.625 < Midterm <= 75.47 and 31.875 < TakeHome <= 100.83 and 81.225 < Tutorial <= 82.51 [Error: 24.86326] + + +63.33 : (data 0.00% / prediction 2.60%) 65.31 < Midterm <= 74.06 and 31.875 < TakeHome <= 98.33 and Tutorial > 101.52 and Assignment > 91.175 [Error: 3.48656] + + +63.61 : (data 0.00% / prediction 1.30%) 48.75 < Midterm <= 55.625 and 31.875 < TakeHome <= 100.83 and 97.105 < Tutorial <= 99.065 and Assignment > 82.395 [Error: 1.47494] + + +63.89 : (data 0.00% / prediction 1.30%) 48.75 < Midterm <= 55.625 and 31.875 < TakeHome <= 100.83 and 99.065 < Tutorial <= 102.38 and Assignment > 82.395 [Error: 1.47494] + + +64.72 : (data 0.00% / prediction 1.30%) 39.69 < Midterm <= 75.47 and TakeHome > 100.83 and 88.84 < Tutorial <= 95.865 [Error: 43.87944] + + +65.28 : (data 0.00% / prediction 2.60%) 40.625 < Midterm <= 75.47 and 31.875 < TakeHome <= 100.83 + · 50.00%: 40.625 < Midterm <= 75.47 and 31.875 < TakeHome <= 100.83 and 61.81 < Tutorial <= 75.42 and Assignment <= 78.52 [Error: 16.0663] + · 50.00%: 65.31 < Midterm <= 74.06 and 31.875 < TakeHome <= 98.33 and 94.345 < Tutorial <= 101.52 and Assignment > 91.175 [Error: 1.47494] + + +65.56 : (data 0.00% / prediction 1.30%) 65.31 < Midterm <= 74.06 and 31.875 < TakeHome <= 98.33 and 86.76 < Tutorial <= 94.345 and Assignment > 91.175 [Error: 1.47494] + + +66.11 : (data 0.00% / prediction 1.30%) 40.625 < Midterm <= 74.06 and 31.875 < TakeHome <= 82.13 and Tutorial > 95.16 and 74.06 < Assignment <= 82.395 [Error: 2.94988] + + +66.67 : (data 0.00% / prediction 1.30%) 40.625 < Midterm <= 74.06 and 31.875 < TakeHome <= 82.13 and 86.76 < Tutorial <= 95.16 and 74.06 < Assignment <= 82.395 [Error: 2.94988] + + +67.22 : (data 0.00% / prediction 1.30%) 40.625 < Midterm <= 55.625 and 31.875 < TakeHome <= 100.83 and Tutorial > 102.38 and Assignment > 82.395 [Error: 11.0754] + + +68.06 : (data 0.00% / prediction 1.30%) 90.935 < Midterm <= 95.31 and 89.615 < Tutorial <= 100.745 and Assignment > 93.16 [Error: 15.25861] + + +68.33 : (data 0.00% / prediction 2.60%) 40.625 < Midterm <= 75.47 and 31.875 < TakeHome <= 100.83 + · 50.00%: 40.625 < Midterm <= 75.47 and 31.875 < TakeHome <= 100.83 and 46.3 < Tutorial <= 61.81 and Assignment <= 78.52 [Error: 16.0663] + · 50.00%: 65.31 < Midterm <= 74.06 and 98.33 < TakeHome <= 100.83 and Tutorial > 86.76 and Assignment > 91.175 [Error: 7.21249] + + +72.22 : (data 2.60% / prediction 2.60%) + · 50.00%: 40.625 < Midterm <= 74.06 and 82.13 < TakeHome <= 100.83 and Tutorial > 86.76 and 74.06 < Assignment <= 82.395 [Error: 17.1658] + · 50.00%: 90.935 < Midterm <= 95.31 and 96.055 < Tutorial <= 100.745 and Assignment <= 93.16 [Error: 8.79696] + + +73.89 : (data 1.30% / prediction 1.30%) 90.935 < Midterm <= 95.31 and 89.615 < Tutorial <= 96.055 and Assignment <= 93.16 [Error: 8.79696] + + +75.56 : (data 1.30% / prediction 1.30%) 62.5 < Midterm <= 65.31 and 31.875 < TakeHome <= 100.83 and Tutorial > 86.76 and Assignment > 82.395 [Error: 16.81528] + + +77.5 : (data 1.30% / prediction 1.30%) 40.625 < Midterm <= 75.47 and 31.875 < TakeHome <= 100.83 and 75.42 < Tutorial <= 81.225 [Error: 52.33173] + + +78.89 : (data 0.00% / prediction 2.60%) + · 50.00%: 39.69 < Midterm <= 40.625 and 31.875 < TakeHome <= 100.83 [Error: 18.59719] + · 50.00%: 75.47 < Midterm <= 90 and 100.63 < Tutorial <= 108.145 [Error: 23.38832] + + +80 : (data 0.00% / prediction 1.30%) 69.69 < Midterm <= 75.47 and 31.875 < TakeHome <= 100.83 and 84.92 < Tutorial <= 86.76 [Error: 46.82931] + + +80.56 : (data 0.00% / prediction 1.30%) 39.69 < Midterm <= 75.47 and TakeHome > 100.83 and 95.865 < Tutorial <= 98.24 [Error: 13.1691] + + +82.22 : (data 0.00% / prediction 1.30%) 75.47 < Midterm <= 90 and Tutorial <= 100.63 and Assignment <= 88.62 [Error: 10.42933] + + +83.06 : (data 0.00% / prediction 1.30%) 39.69 < Midterm <= 75.47 and TakeHome > 100.83 and Tutorial > 98.24 [Error: 13.1691] + + +83.33 : (data 0.00% / prediction 1.30%) 75.47 < Midterm <= 90 and Tutorial > 108.145 [Error: 23.38832] + + +85 : (data 0.00% / prediction 1.30%) 75.47 < Midterm <= 90 and Tutorial <= 94.625 and Assignment > 88.62 and TakeHome > 100.37 [Error: 2.94988] + + +85.56 : (data 0.00% / prediction 1.30%) 75.47 < Midterm <= 90 and 94.625 < Tutorial <= 100.63 and Assignment > 88.62 and TakeHome > 100.37 [Error: 2.94988] + + +87.22 : (data 1.30% / prediction 1.30%) 90.935 < Midterm <= 95.31 and Tutorial > 103.425 [Error: 38.03236] + + +88.89 : (data 0.00% / prediction 2.60%) + · 50.00%: 40.625 < Midterm <= 69.69 and 31.875 < TakeHome <= 100.83 and 84.92 < Tutorial <= 86.76 [Error: 46.82931] + · 50.00%: Midterm > 95.31 and TakeHome <= 102.13 and Tutorial > 105.45 [Error: 25.04266] + + +89.17 : (data 0.00% / prediction 1.30%) 90.935 < Midterm <= 95.31 and Tutorial <= 89.615 [Error: 37.51466] + + +90 : (data 0.00% / prediction 2.60%) 75.47 < Midterm <= 90 and 89.18 < Tutorial <= 100.63 and Assignment > 88.62 and TakeHome <= 93.24 [Error: 2.30306] + + +90.83 : (data 0.00% / prediction 1.30%) Midterm > 95.31 and TakeHome <= 102.13 and Tutorial <= 88.34 [Error: 21.96606] + + +91.11 : (data 0.00% / prediction 1.30%) 75.47 < Midterm <= 90 and Tutorial <= 89.18 and Assignment > 88.62 and TakeHome <= 93.24 [Error: 3.25702] + + +92.22 : (data 0.00% / prediction 1.30%) 75.47 < Midterm <= 90 and Tutorial <= 89.385 and Assignment > 88.62 and 93.24 < TakeHome <= 100.37 [Error: 2.94988] + + +92.78 : (data 0.00% / prediction 1.30%) 75.47 < Midterm <= 90 and 89.385 < Tutorial <= 100.63 and Assignment > 88.62 and 93.24 < TakeHome <= 100.37 [Error: 2.94988] + + +94.44 : (data 0.00% / prediction 1.30%) 90.935 < Midterm <= 95.31 and 100.745 < Tutorial <= 103.425 [Error: 38.03236] + + +95 : (data 0.00% / prediction 1.30%) Midterm > 95.31 and TakeHome <= 102.13 and 88.34 < Tutorial <= 98.1 [Error: 21.96606] + + +99.17 : (data 1.30% / prediction 1.30%) 90 < Midterm <= 90.935 and Tutorial <= 91.33 [Error: 10.21922] + + +101.11 : (data 0.00% / prediction 1.30%) 90 < Midterm <= 90.935 and Tutorial > 91.33 [Error: 10.21922] + + +102.22 : (data 0.00% / prediction 1.30%) 95.31 < Midterm <= 98.435 and TakeHome > 102.13 [Error: 18.16333] + + +102.78 : (data 0.00% / prediction 1.30%) Midterm > 95.31 and TakeHome <= 102.13 and 98.1 < Tutorial <= 105.45 [Error: 30.82504] + + +107.78 : (data 0.00% / prediction 1.30%) Midterm > 98.435 and TakeHome > 102.13 and Tutorial <= 98.3 [Error: 5.84708] + + +108.89 : (data 0.00% / prediction 1.30%) Midterm > 98.435 and TakeHome > 102.13 and Tutorial > 98.3 [Error: 5.84708] \ No newline at end of file diff --git a/data/model/summarize_iris.txt b/data/model/summarize_iris.txt new file mode 100644 index 00000000..b1738b5d --- /dev/null +++ b/data/model/summarize_iris.txt @@ -0,0 +1,35 @@ +Data distribution: + Iris-setosa: 33.33% (50 instances) + Iris-versicolor: 33.33% (50 instances) + Iris-virginica: 33.33% (50 instances) + + +Predicted distribution: + Iris-setosa: 33.33% (50 instances) + Iris-versicolor: 33.33% (50 instances) + Iris-virginica: 33.33% (50 instances) + + +Field importance: + 1. petal length: 70.19% + 2. petal width: 29.09% + 3. sepal width: 0.71% + + +Rules summary: + +Iris-setosa : (data 33.33% / prediction 33.33%) petal length <= 2.45 [Confidence: 92.86%] + + +Iris-versicolor : (data 33.33% / prediction 33.33%) petal length > 2.45 + · 94.00%: 2.45 < petal length <= 4.95 and petal width <= 1.65 [Confidence: 92.44%] + · 4.00%: 4.95 < petal length <= 5.45 and 1.55 < petal width <= 1.75 [Confidence: 34.24%] + · 2.00%: 2.45 < petal length <= 4.85 and petal width > 1.75 and sepal width > 3.1 [Confidence: 20.65%] + + +Iris-virginica : (data 33.33% / prediction 33.33%) petal length > 2.45 + · 86.00%: petal length > 4.85 and petal width > 1.75 [Confidence: 91.80%] + · 6.00%: petal length > 4.95 and petal width <= 1.55 [Confidence: 43.85%] + · 4.00%: 2.45 < petal length <= 4.85 and petal width > 1.75 and sepal width <= 3.1 [Confidence: 34.24%] + · 2.00%: 2.45 < petal length <= 4.95 and 1.65 < petal width <= 1.75 [Confidence: 20.65%] + · 2.00%: petal length > 5.45 and 1.55 < petal width <= 1.75 [Confidence: 20.65%] \ No newline at end of file diff --git a/data/model/summarize_iris_missing2.txt b/data/model/summarize_iris_missing2.txt new file mode 100644 index 00000000..66ee498e --- /dev/null +++ b/data/model/summarize_iris_missing2.txt @@ -0,0 +1,26 @@ +Data distribution: + Iris-setosa: 34.04% (16 instances) + Iris-versicolor: 29.79% (14 instances) + Iris-virginica: 36.17% (17 instances) + + +Predicted distribution: + Iris-setosa: 34.04% (16 instances) + Iris-versicolor: 29.79% (14 instances) + Iris-virginica: 36.17% (17 instances) + + +Field importance: + 1. petal width: 70.69% + 2. petal length: 29.31% + + +Rules summary: + +Iris-setosa : (data 34.04% / prediction 34.04%) petal width <= 0.7 [Confidence: 80.64%] + + +Iris-versicolor : (data 29.79% / prediction 29.79%) petal width > 0.7 and petal length <= 4.75 [Confidence: 78.47%] + + +Iris-virginica : (data 36.17% / prediction 36.17%) petal width > 0.7 and petal length > 4.75 [Confidence: 81.57%] \ No newline at end of file diff --git a/data/model/summarize_iris_sp_chars.txt b/data/model/summarize_iris_sp_chars.txt new file mode 100644 index 00000000..e41725b3 Binary files /dev/null and b/data/model/summarize_iris_sp_chars.txt differ diff --git a/data/model/summarize_spam.txt b/data/model/summarize_spam.txt new file mode 100644 index 00000000..ea7672a5 --- /dev/null +++ b/data/model/summarize_spam.txt @@ -0,0 +1,65 @@ +Data distribution: + ham: 87.96% (577 instances) + spam: 12.04% (79 instances) + + +Predicted distribution: + ham: 87.96% (577 instances) + spam: 12.04% (79 instances) + + +Field importance: + 1. Message: 100.00% + + +Rules summary: + +ham : (data 87.96% / prediction 87.96%) + · 89.77%: Message does not contain call or free or txt or text or currently or cost or 50 or rate or http or lost or station or girls [Confidence: 99.26%] + · 5.89%: Message contains call and Message does not contain mobile or claim or landline or text or free or private or message or 50 or booked or contact or luv or visit or miss [Confidence: 89.85%] + · 0.69%: Message contains text and time and Message does not contain call or free or txt [Confidence: 51.01%] + · 0.52%: Message contains free and ü and Message does not contain call [Confidence: 43.85%] + · 0.52%: Message contains call and message and Message does not contain mobile or claim or landline or text or free or private or please [Confidence: 20.77%; impurity: 0.44%] + · 0.35%: Message contains girls and Message does not contain call or free or txt or text or currently or cost or 50 or rate or http or lost or station or story [Confidence: 34.24%] + · 0.35%: Message contains text and phone and Message does not contain call or free or txt or time [Confidence: 34.24%] + · 0.17%: Message contains station and Message does not contain call or free or txt or text or currently or cost or 50 or rate or http or lost or news [Confidence: 20.65%] + · 0.17%: Message contains lost and thanks and Message does not contain call or free or txt or text or currently or cost or 50 or rate or http [Confidence: 20.65%] + · 0.17%: Message contains cost and month and Message does not contain call or free or txt or text or currently or apply [Confidence: 20.65%] + · 0.17%: Message contains cost and apply and Message does not contain call or free or txt or text or currently [Confidence: 20.65%] + · 0.17%: Message contains text and shit and Message does not contain call or free or txt or time or phone or yes or luv [Confidence: 20.65%] + · 0.17%: Message contains text and luv and Message does not contain call or free or txt or time or phone or yes [Confidence: 20.65%] + · 0.17%: Message contains text and yes and Message does not contain call or free or txt or time or phone [Confidence: 20.65%] + · 0.17%: Message contains txt and tomorrow and Message does not contain call or free [Confidence: 20.65%] + · 0.17%: Message contains free and hav and Message does not contain call or ü or ah or oso [Confidence: 20.65%] + · 0.17%: Message contains free and oso and Message does not contain call or ü or ah [Confidence: 20.65%] + · 0.17%: Message contains free and ah and Message does not contain call or ü [Confidence: 20.65%] + + +spam : (data 12.04% / prediction 12.04%) + · 12.66%: Message contains call and mobile [Confidence: 72.25%] + · 11.39%: Message contains call and claim and Message does not contain mobile [Confidence: 70.08%] + · 7.59%: Message contains text and Message does not contain call or free or txt or time or phone or yes or luv or shit [Confidence: 60.97%] + · 7.59%: Message contains txt and Message does not contain call or free or tomorrow [Confidence: 60.97%] + · 7.59%: Message contains free and txt and Message does not contain call or ü or ah or oso or hav [Confidence: 60.97%] + · 6.33%: Message contains free and Message does not contain call or ü or ah or oso or hav or txt or text or send [Confidence: 37.55%; impurity: 0.32%] + · 5.06%: Message contains free and text and Message does not contain call or ü or ah or oso or hav or txt [Confidence: 51.01%] + · 5.06%: Message contains call and text and Message does not contain mobile or claim or landline [Confidence: 51.01%] + · 5.06%: Message contains call and landline and Message does not contain mobile or claim [Confidence: 51.01%] + · 3.80%: Message contains call and free and Message does not contain mobile or claim or landline or text [Confidence: 43.85%] + · 2.53%: Message contains cost and Message does not contain call or free or txt or text or currently or apply or month [Confidence: 34.24%] + · 2.53%: Message contains currently and Message does not contain call or free or txt or text [Confidence: 34.24%] + · 2.53%: Message contains free and send and Message does not contain call or ü or ah or oso or hav or txt or text [Confidence: 34.24%] + · 2.53%: Message contains call and message and please and Message does not contain mobile or claim or landline or text or free or private [Confidence: 34.24%] + · 2.53%: Message contains call and private and Message does not contain mobile or claim or landline or text or free [Confidence: 34.24%] + · 1.27%: Message contains girls and story and Message does not contain call or free or txt or text or currently or cost or 50 or rate or http or lost or station [Confidence: 20.65%] + · 1.27%: Message contains station and news and Message does not contain call or free or txt or text or currently or cost or 50 or rate or http or lost [Confidence: 20.65%] + · 1.27%: Message contains lost and Message does not contain call or free or txt or text or currently or cost or 50 or rate or http or thanks [Confidence: 20.65%] + · 1.27%: Message contains http and Message does not contain call or free or txt or text or currently or cost or 50 or rate [Confidence: 20.65%] + · 1.27%: Message contains rate and Message does not contain call or free or txt or text or currently or cost or 50 [Confidence: 20.65%] + · 1.27%: Message contains 50 and Message does not contain call or free or txt or text or currently or cost [Confidence: 20.65%] + · 1.27%: Message contains call and miss and Message does not contain mobile or claim or landline or text or free or private or message or 50 or booked or contact or luv or visit [Confidence: 20.65%] + · 1.27%: Message contains call and visit and Message does not contain mobile or claim or landline or text or free or private or message or 50 or booked or contact or luv [Confidence: 20.65%] + · 1.27%: Message contains call and luv and Message does not contain mobile or claim or landline or text or free or private or message or 50 or booked or contact [Confidence: 20.65%] + · 1.27%: Message contains call and contact and Message does not contain mobile or claim or landline or text or free or private or message or 50 or booked [Confidence: 20.65%] + · 1.27%: Message contains call and booked and Message does not contain mobile or claim or landline or text or free or private or message or 50 [Confidence: 20.65%] + · 1.27%: Message contains call and 50 and Message does not contain mobile or claim or landline or text or free or private or message [Confidence: 20.65%] \ No newline at end of file diff --git a/data/model/summarize_tiny_kdd.txt b/data/model/summarize_tiny_kdd.txt new file mode 100644 index 00000000..05733d19 --- /dev/null +++ b/data/model/summarize_tiny_kdd.txt @@ -0,0 +1,38 @@ +Data distribution: + 0: 97.00% (194 instances) + 0.01: 3.00% (6 instances) + + +Predicted distribution: + 0: 19.00% (38 instances) + 6e-05: 78.50% (157 instances) + 0.01: 2.50% (5 instances) + + +Field importance: + 1. src_bytes: 36.16% + 2. dst_host_count: 32.23% + 3. dst_host_same_src_port_rate: 16.55% + 4. count: 13.32% + 5. dst_bytes: 1.74% + + +Rules summary: + +0 : (data 97.00% / prediction 19.00%) + · 39.47%: src_bytes > 325 and dst_host_same_src_port_rate <= 0.75 [Error: 0.00136] + · 21.05%: src_bytes <= 315 and dst_bytes > 25045 and dst_host_count > 8 [Error: 0.00302] + · 18.42%: src_bytes <= 161 and dst_bytes <= 25045 [Error: 0.00293] + · 15.79%: 315 < src_bytes <= 323 and dst_host_same_src_port_rate <= 0.75 and count <= 10 [Error: 0.00447] + · 5.26%: 161 < src_bytes <= 167 and dst_bytes <= 25045 and dst_host_count > 97 [Error: 0.02075] + + +6e-05 : (data 0.00% / prediction 78.50%) 167 < src_bytes <= 315 and dst_bytes <= 25045 [Error: 0.00101] + + +0.01 : (data 3.00% / prediction 2.50%) + · 20.00%: 161 < src_bytes <= 167 and dst_bytes <= 25045 and dst_host_count <= 97 [Error: 0.02934] + · 20.00%: src_bytes <= 315 and dst_bytes > 25045 and dst_host_count <= 8 [Error: 0.00855] + · 20.00%: 323 < src_bytes <= 325 and dst_host_same_src_port_rate <= 0.75 and count <= 10 [Error: 0.01095] + · 20.00%: 315 < src_bytes <= 325 and dst_host_same_src_port_rate <= 0.75 and count > 10 [Error: 0.01254] + · 20.00%: src_bytes > 315 and dst_host_same_src_port_rate > 0.75 [Error: 0.0061] \ No newline at end of file diff --git a/data/model/tree_csv.txt b/data/model/tree_csv.txt new file mode 100644 index 00000000..f3285ad0 --- /dev/null +++ b/data/model/tree_csv.txt @@ -0,0 +1 @@ +[["species", "confidence", "impurity", "Iris-setosa", "Iris-versicolor", "Iris-virginica"], ["Iris-setosa", 0.26289, 0.6666666666666667, 50, 50, 50], ["Iris-versicolor", 0.40383, 0.5, null, 50, 50], ["Iris-virginica", 0.88664, 0.04253308128544431, null, 1, 45], ["Iris-versicolor", 0.8009, 0.16803840877914955, null, 49, 5], ["Iris-setosa", 0.92865, 0.0, 50, null, null]] \ No newline at end of file diff --git a/data/model/w_iris.json b/data/model/w_iris.json new file mode 100644 index 00000000..cf39da14 --- /dev/null +++ b/data/model/w_iris.json @@ -0,0 +1 @@ +{"code": 200, "resource": "model/5f40433be84f94782b000774", "location": "https://bigml.io/andromeda/model/5f40433be84f94782b000774", "object": {"balance_objective": true, "boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2020-08-21T21:57:15.487000", "creator": "mmartin", "credits": 0, "credits_per_prediction": 0.0, "dataset": "dataset/5f29a563529963736c0116e9", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": false, "ensemble_id": "", "ensemble_index": 0, "excluded_fields": [], "fields_meta": {"count": 5, "limit": -1, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000002", "000003"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 54], ["Iris-virginica", 46]]}, "training": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]]}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "order": 3, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000002", 0.70392], ["000003", 0.29608]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "preferred": true}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "preferred": true}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "preferred": true, "term_analysis": {"enabled": true}}}, "node_threshold": 5, "root": {"children": [{"children": [{"confidence": 0.88664, "count": 46, "id": 2, "objective_summary": {"categories": [["Iris-virginica", 45], ["Iris-versicolor", 1]]}, "output": "Iris-virginica", "predicate": {"field": "000003", "operator": ">", "value": 1.75}, "weight": 46, "weighted_objective_summary": {"categories": [["Iris-virginica", 45], ["Iris-versicolor", 1]]}}, {"confidence": 0.8009, "count": 54, "id": 3, "objective_summary": {"categories": [["Iris-versicolor", 49], ["Iris-virginica", 5]]}, "output": "Iris-versicolor", "predicate": {"field": "000003", "operator": "<=", "value": 1.75}, "weight": 54, "weighted_objective_summary": {"categories": [["Iris-versicolor", 49], ["Iris-virginica", 5]]}}], "confidence": 0.40383, "count": 100, "id": 1, "objective_summary": {"categories": [["Iris-versicolor", 50], ["Iris-virginica", 50]]}, "output": "Iris-versicolor", "predicate": {"field": "000002", "operator": ">", "value": 2.45}, "weight": 100, "weighted_objective_summary": {"categories": [["Iris-versicolor", 50], ["Iris-virginica", 50]]}}, {"confidence": 0.92865, "count": 50, "id": 4, "objective_summary": {"categories": [["Iris-setosa", 50]]}, "output": "Iris-setosa", "predicate": {"field": "000002", "operator": "<=", "value": 2.45}, "weight": 50, "weighted_objective_summary": {"categories": [["Iris-setosa", 50]]}}], "confidence": 0.26289, "count": 150, "id": 0, "objective_summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]]}, "output": "Iris-setosa", "predicate": true, "weight": 150, "weighted_objective_summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]]}}}, "name": "XX", "name_options": "5-node, pruned, deterministic order, balanced", "node_threshold": 5, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "000004", "objective_field_name": "species", "objective_field_type": "categorical", "objective_fields": ["000004"], "objective_weights": [["Iris-setosa", 1], ["Iris-versicolor", 1], ["Iris-virginica", 1]], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": null, "randomize": false, "range": [1, 150], "replacement": false, "resource": "model/5f40433be84f94782b000774", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4608, "source": "source/5f29a560529963736c0116e6", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 762, "message": "The model has been created", "progress": 1}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2020-08-21T21:57:16.729000", "white_box": false}, "error": null} \ No newline at end of file diff --git a/data/model/w_regression.json b/data/model/w_regression.json new file mode 100644 index 00000000..e969d8f7 --- /dev/null +++ b/data/model/w_regression.json @@ -0,0 +1 @@ +{"code": 200, "resource": "model/5f4031772fb31c3272000193", "location": "https://bigml.io/andromeda/model/5f4031772fb31c3272000193", "object": {"boosted_ensemble": false, "boosting": {}, "category": 0, "cluster": null, "cluster_status": false, "code": 200, "columns": 5, "configuration": null, "configuration_status": false, "created": "2020-08-21T20:41:27.095000", "creator": "mmartin", "credits": 0, "credits_per_prediction": 0.0, "dataset": "dataset/5f29a563529963736c0116e9", "dataset_field_types": {"categorical": 1, "datetime": 0, "items": 0, "numeric": 4, "preferred": 5, "text": 0, "total": 5}, "dataset_status": true, "depth_threshold": 512, "description": "", "ensemble": false, "ensemble_id": "", "ensemble_index": 0, "excluded_fields": [], "fields_meta": {"count": 5, "limit": -1, "offset": 0, "query_total": 5, "total": 5}, "focus_field": null, "input_fields": ["000000", "000001", "000004"], "locale": "en_US", "max_columns": 5, "max_rows": 150, "missing_splits": false, "model": {"depth_threshold": 512, "distribution": {"predictions": {"counts": [[0.25007, 50], [1.34282, 50], [2.03462, 50]]}, "training": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "maximum": 2.5, "median": 1.3, "minimum": 0.1}}, "fields": {"000000": {"column_number": 0, "datatype": "double", "name": "sepal length", "optype": "numeric", "order": 0, "preferred": true, "summary": {"bins": [[4.3, 1], [4.425, 4], [4.6, 4], [4.77143, 7], [4.9625, 16], [5.1, 9], [5.2, 4], [5.3, 1], [5.4, 6], [5.5, 7], [5.6, 6], [5.7, 8], [5.8, 7], [5.9, 3], [6, 6], [6.1, 6], [6.2, 4], [6.3, 9], [6.4, 7], [6.5, 5], [6.6, 2], [6.7, 8], [6.8, 3], [6.9, 4], [7, 1], [7.1, 1], [7.2, 3], [7.3, 1], [7.4, 1], [7.6, 1], [7.7, 4], [7.9, 1]], "exact_histogram": {"populations": [1, 4, 6, 11, 19, 5, 13, 14, 10, 12, 13, 12, 10, 7, 2, 4, 1, 5, 1], "start": 4.2, "width": 0.2}, "kurtosis": -0.57357, "maximum": 7.9, "mean": 5.84333, "median": 5.8, "minimum": 4.3, "missing_count": 0, "population": 150, "skewness": 0.31175, "standard_deviation": 0.82807, "sum": 876.5, "sum_squares": 5223.85, "variance": 0.68569}}, "000001": {"column_number": 1, "datatype": "double", "name": "sepal width", "optype": "numeric", "order": 1, "preferred": true, "summary": {"counts": [[2, 1], [2.2, 3], [2.3, 4], [2.4, 3], [2.5, 8], [2.6, 5], [2.7, 9], [2.8, 14], [2.9, 10], [3, 26], [3.1, 11], [3.2, 13], [3.3, 6], [3.4, 12], [3.5, 6], [3.6, 4], [3.7, 3], [3.8, 6], [3.9, 2], [4, 1], [4.1, 1], [4.2, 1], [4.4, 1]], "exact_histogram": {"populations": [1, 7, 11, 14, 24, 37, 19, 18, 7, 8, 2, 1, 1], "start": 2, "width": 0.2}, "kurtosis": 0.18098, "maximum": 4.4, "mean": 3.05733, "median": 3, "minimum": 2, "missing_count": 0, "population": 150, "skewness": 0.31577, "standard_deviation": 0.43587, "sum": 458.6, "sum_squares": 1430.4, "variance": 0.18998}}, "000002": {"column_number": 2, "datatype": "double", "name": "petal length", "optype": "numeric", "order": 2, "preferred": true, "summary": {"bins": [[1, 1], [1.16667, 3], [1.3, 7], [1.4, 13], [1.5, 13], [1.6, 7], [1.7, 4], [1.9, 2], [3, 1], [3.3, 2], [3.5, 2], [3.6, 1], [3.75, 2], [3.9, 3], [4.0375, 8], [4.23333, 6], [4.46667, 12], [4.6, 3], [4.74444, 9], [4.94444, 9], [5.1, 8], [5.25, 4], [5.4, 2], [5.56667, 9], [5.75, 6], [5.95, 4], [6.1, 3], [6.3, 1], [6.4, 1], [6.6, 1], [6.7, 2], [6.9, 1]], "exact_histogram": {"populations": [2, 9, 26, 11, 2, 0, 0, 0, 0, 0, 1, 2, 2, 2, 4, 8, 6, 12, 8, 9, 12, 4, 5, 9, 5, 5, 1, 1, 3, 1], "start": 1, "width": 0.2}, "kurtosis": -1.39554, "maximum": 6.9, "mean": 3.758, "median": 4.35, "minimum": 1, "missing_count": 0, "population": 150, "skewness": -0.27213, "standard_deviation": 1.7653, "sum": 563.7, "sum_squares": 2582.71, "variance": 3.11628}}, "000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "order": 3, "preferred": true, "summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "kurtosis": -1.33607, "maximum": 2.5, "mean": 1.19933, "median": 1.3, "minimum": 0.1, "missing_count": 0, "population": 150, "skewness": -0.10193, "standard_deviation": 0.76224, "sum": 179.9, "sum_squares": 302.33, "variance": 0.58101}}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "order": 4, "preferred": true, "summary": {"categories": [["Iris-setosa", 50], ["Iris-versicolor", 50], ["Iris-virginica", 50]], "missing_count": 0}, "term_analysis": {"enabled": true}}}, "importance": [["000004", 1]], "kind": "mtree", "missing_tokens": ["", "NaN", "NULL", "N/A", "null", "-", "#REF!", "#VALUE!", "?", "#NULL!", "#NUM!", "#DIV/0", "n/a", "#NAME?", "NIL", "nil", "na", "#N/A", "NA"], "model_fields": {"000003": {"column_number": 3, "datatype": "double", "name": "petal width", "optype": "numeric", "preferred": true}, "000004": {"column_number": 4, "datatype": "string", "name": "species", "optype": "categorical", "preferred": true, "term_analysis": {"enabled": true}}}, "node_threshold": 5, "root": {"children": [{"confidence": 0.15226, "count": 50, "id": 1, "objective_summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1]], "exact_histogram": {"populations": [5, 29, 7, 7, 1, 1], "start": 0.1, "width": 0.1}, "maximum": 0.6, "median": 0.2, "minimum": 0.1}, "output": 0.25007, "predicate": {"field": "000004", "operator": "=", "value": "Iris-setosa"}, "weight": 73.1, "weighted_objective_summary": {"counts": [[0.1, 6.9], [0.2, 41.9], [0.3, 10], [0.4, 11], [0.5, 1.7], [0.6, 1.6]], "exact_histogram": {"populations": [7, 42, 10, 11, 2, 2], "start": 0.1, "width": 0.1}, "maximum": 0.6, "median": 0.2, "minimum": 0.1}}, {"children": [{"confidence": 0.23853, "count": 50, "id": 3, "objective_summary": {"counts": [[1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 7], [1.5, 10], [1.6, 3], [1.7, 1], [1.8, 1]], "exact_histogram": {"populations": [7, 3, 5, 13, 7, 10, 3, 1, 1], "start": 1, "width": 0.1}, "maximum": 1.8, "median": 1.3, "minimum": 1}, "output": 1.34282, "predicate": {"field": "000004", "operator": "=", "value": "Iris-versicolor"}, "weight": 213, "weighted_objective_summary": {"counts": [[1, 25.4], [1.1, 10.7], [1.2, 21.2], [1.3, 54.3], [1.4, 31.5], [1.5, 45.8], [1.6, 14.3], [1.7, 5], [1.8, 4.8]], "exact_histogram": {"populations": [25, 11, 21, 54, 32, 46, 14, 5, 5], "start": 1, "width": 0.1}, "maximum": 1.8, "median": 1.3, "minimum": 1}}, {"confidence": 0.3259, "count": 50, "id": 4, "objective_summary": {"counts": [[1.4, 1], [1.5, 2], [1.6, 1], [1.7, 1], [1.8, 11], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [3, 2, 16, 12, 11, 6], "start": 1.4, "width": 0.2}, "maximum": 2.5, "median": 2, "minimum": 1.4}, "output": 2.03462, "predicate": {"field": "000004", "operator": "!=", "value": "Iris-versicolor"}, "weight": 277.6, "weighted_objective_summary": {"counts": [[1.4, 5.6], [1.5, 10.1], [1.6, 5.8], [1.7, 4.5], [1.8, 59.2], [1.9, 26.6], [2, 33.3], [2.1, 34.7], [2.2, 18.1], [2.3, 45.6], [2.4, 16.3], [2.5, 17.8]], "exact_histogram": {"populations": [16, 10, 86, 68, 64, 34], "start": 1.4, "width": 0.2}, "maximum": 2.5, "median": 2, "minimum": 1.4}}], "confidence": 0.48126, "count": 100, "id": 2, "objective_summary": {"counts": [[1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [7, 3, 5, 13, 8, 12, 4, 2, 12, 5, 6, 6, 3, 8, 3, 3], "start": 1, "width": 0.1}, "maximum": 2.5, "median": 1.6, "minimum": 1}, "output": 1.73426, "predicate": {"field": "000004", "operator": "!=", "value": "Iris-setosa"}, "weight": 490.6, "weighted_objective_summary": {"counts": [[1, 25.4], [1.1, 10.7], [1.2, 21.2], [1.3, 54.3], [1.4, 37.1], [1.5, 55.9], [1.6, 20.1], [1.7, 9.5], [1.8, 64], [1.9, 26.6], [2, 33.3], [2.1, 34.7], [2.2, 18.1], [2.3, 45.6], [2.4, 16.3], [2.5, 17.8]], "exact_histogram": {"populations": [25, 11, 21, 54, 37, 56, 20, 10, 64, 27, 33, 35, 18, 46, 16, 18], "start": 1, "width": 0.1}, "maximum": 2.5, "median": 1.8, "minimum": 1}}], "confidence": 0.72256, "count": 150, "id": 0, "objective_summary": {"counts": [[0.1, 5], [0.2, 29], [0.3, 7], [0.4, 7], [0.5, 1], [0.6, 1], [1, 7], [1.1, 3], [1.2, 5], [1.3, 13], [1.4, 8], [1.5, 12], [1.6, 4], [1.7, 2], [1.8, 12], [1.9, 5], [2, 6], [2.1, 6], [2.2, 3], [2.3, 8], [2.4, 3], [2.5, 3]], "exact_histogram": {"populations": [5, 36, 8, 1, 0, 10, 18, 20, 6, 17, 12, 11, 6], "start": 0, "width": 0.2}, "maximum": 2.5, "median": 1.3, "minimum": 0.1}, "output": 1.5418, "predicate": true, "weight": 563.7, "weighted_objective_summary": {"counts": [[0.1, 6.9], [0.2, 41.9], [0.3, 10], [0.4, 11], [0.5, 1.7], [0.6, 1.6], [1, 25.4], [1.1, 10.7], [1.2, 21.2], [1.3, 54.3], [1.4, 37.1], [1.5, 55.9], [1.6, 20.1], [1.7, 9.5], [1.8, 64], [1.9, 26.6], [2, 33.3], [2.1, 34.7], [2.2, 18.1], [2.3, 45.6], [2.4, 16.3], [2.5, 17.8]], "exact_histogram": {"populations": [7, 52, 13, 2, 0, 36, 76, 93, 30, 91, 68, 64, 34], "start": 0, "width": 0.2}, "maximum": 2.5, "median": 1.6, "minimum": 0.1}}}, "name": "regression weighted", "name_options": "5-node, pruned, deterministic order, weight field: 000002", "node_threshold": 5, "number_of_batchpredictions": 0, "number_of_evaluations": 0, "number_of_predictions": 0, "number_of_public_predictions": 0, "objective_field": "000003", "objective_field_name": "petal width", "objective_field_type": "numeric", "objective_fields": ["000003"], "optiml": null, "optiml_status": false, "ordering": 0, "out_of_bag": false, "price": 0.0, "private": true, "project": null, "randomize": false, "range": [1, 150], "replacement": false, "resource": "model/5f4031772fb31c3272000193", "rows": 150, "sample_rate": 1.0, "selective_pruning": true, "shared": false, "size": 4608, "source": "source/5f29a560529963736c0116e6", "source_status": true, "split_candidates": 32, "split_field": null, "stat_pruning": true, "status": {"code": 5, "elapsed": 1026, "message": "The model has been created", "progress": 1}, "subscription": true, "support_threshold": 0.0, "tags": [], "type": 0, "updated": "2020-08-25T10:21:25.313000", "weight_field": "000002", "white_box": false}, "error": null} \ No newline at end of file diff --git a/data/model/wdistribution_iris.txt b/data/model/wdistribution_iris.txt new file mode 100644 index 00000000..464efae8 --- /dev/null +++ b/data/model/wdistribution_iris.txt @@ -0,0 +1,3 @@ + Iris-setosa: 33.33% (50 instances) + Iris-versicolor: 33.33% (50 instances) + Iris-virginica: 33.33% (50 instances) diff --git a/data/model/wlist_fields.txt b/data/model/wlist_fields.txt new file mode 100644 index 00000000..e58c097a --- /dev/null +++ b/data/model/wlist_fields.txt @@ -0,0 +1,3 @@ + +[petal length : numeric] +[petal width : numeric] diff --git a/data/model/wrdistribution_iris.txt b/data/model/wrdistribution_iris.txt new file mode 100644 index 00000000..41258bcf --- /dev/null +++ b/data/model/wrdistribution_iris.txt @@ -0,0 +1,22 @@ + 0.1: 3.33% (5 instances) + 0.2: 19.33% (29 instances) + 0.3: 4.67% (7 instances) + 0.4: 4.67% (7 instances) + 0.5: 0.67% (1 instance) + 0.6: 0.67% (1 instance) + 1: 4.67% (7 instances) + 1.1: 2.00% (3 instances) + 1.2: 3.33% (5 instances) + 1.3: 8.67% (13 instances) + 1.4: 5.33% (8 instances) + 1.5: 8.00% (12 instances) + 1.6: 2.67% (4 instances) + 1.7: 1.33% (2 instances) + 1.8: 8.00% (12 instances) + 1.9: 3.33% (5 instances) + 2: 4.00% (6 instances) + 2.1: 4.00% (6 instances) + 2.2: 2.00% (3 instances) + 2.3: 5.33% (8 instances) + 2.4: 2.00% (3 instances) + 2.5: 2.00% (3 instances) diff --git a/data/model/wrlist_fields.txt b/data/model/wrlist_fields.txt new file mode 100644 index 00000000..93d0b61f --- /dev/null +++ b/data/model/wrlist_fields.txt @@ -0,0 +1,2 @@ + +[species : categorical] diff --git a/data/model/wrtree_csv.txt b/data/model/wrtree_csv.txt new file mode 100644 index 00000000..ba98237c --- /dev/null +++ b/data/model/wrtree_csv.txt @@ -0,0 +1 @@ +[["petal width", "error", "bin0_value", "bin0_instances", "bin1_value", "bin1_instances", "bin2_value", "bin2_instances", "bin3_value", "bin3_instances", "bin4_value", "bin4_instances", "bin5_value", "bin5_instances", "bin6_value", "bin6_instances", "bin7_value", "bin7_instances", "bin8_value", "bin8_instances", "bin9_value", "bin9_instances", "bin10_value", "bin10_instances", "bin11_value", "bin11_instances", "bin12_value", "bin12_instances", "bin13_value", "bin13_instances", "bin14_value", "bin14_instances", "bin15_value", "bin15_instances", "bin16_value", "bin16_instances", "bin17_value", "bin17_instances", "bin18_value", "bin18_instances", "bin19_value", "bin19_instances", "bin20_value", "bin20_instances", "bin21_value", "bin21_instances"], [1.5418, 0.72256, 0.1, 5, 0.2, 29, 0.3, 7, 0.4, 7, 0.5, 1, 0.6, 1, 1, 7, 1.1, 3, 1.2, 5, 1.3, 13, 1.4, 8, 1.5, 12, 1.6, 4, 1.7, 2, 1.8, 12, 1.9, 5, 2, 6, 2.1, 6, 2.2, 3, 2.3, 8, 2.4, 3, 2.5, 3], [0.25007, 0.15226, 0.1, 5, 0.2, 29, 0.3, 7, 0.4, 7, 0.5, 1, 0.6, 1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], [1.73426, 0.48126, 1, 7, 1.1, 3, 1.2, 5, 1.3, 13, 1.4, 8, 1.5, 12, 1.6, 4, 1.7, 2, 1.8, 12, 1.9, 5, 2, 6, 2.1, 6, 2.2, 3, 2.3, 8, 2.4, 3, 2.5, 3, null, null, null, null, null, null, null, null, null, null, null, null], [1.34282, 0.23853, 1, 7, 1.1, 3, 1.2, 5, 1.3, 13, 1.4, 7, 1.5, 10, 1.6, 3, 1.7, 1, 1.8, 1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], [2.03462, 0.3259, 1.4, 1, 1.5, 2, 1.6, 1, 1.7, 1, 1.8, 11, 1.9, 5, 2, 6, 2.1, 6, 2.2, 3, 2.3, 8, 2.4, 3, 2.5, 3, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null]] \ No newline at end of file diff --git a/data/model/wtree_csv.txt b/data/model/wtree_csv.txt new file mode 100644 index 00000000..f3285ad0 --- /dev/null +++ b/data/model/wtree_csv.txt @@ -0,0 +1 @@ +[["species", "confidence", "impurity", "Iris-setosa", "Iris-versicolor", "Iris-virginica"], ["Iris-setosa", 0.26289, 0.6666666666666667, 50, 50, 50], ["Iris-versicolor", 0.40383, 0.5, null, 50, 50], ["Iris-virginica", 0.88664, 0.04253308128544431, null, 1, 45], ["Iris-versicolor", 0.8009, 0.16803840877914955, null, 49, 5], ["Iris-setosa", 0.92865, 0.0, 50, null, null]] \ No newline at end of file diff --git a/data/movies.csv b/data/movies.csv new file mode 100644 index 00000000..edb88334 --- /dev/null +++ b/data/movies.csv @@ -0,0 +1,291 @@ +user_id;gender;age_range;occupation;zipcode;movie_id;title;genres;timestamp;rating +1;Female;Under 18;K-12 student;48067;1193;One Flew Over the Cuckoo's Nest (1975);Drama;978300760;5 +1;Female;Under 18;K-12 student;48067;661;James and the Giant Peach (1996);Animation$Children's$Musical;978302109;3 +1;Female;Under 18;K-12 student;48067;914;My Fair Lady (1964);Musical$Romance;978301968;3 +1;Female;Under 18;K-12 student;48067;3408;Erin Brockovich (2000);Drama;978300275;4 +1;Female;Under 18;K-12 student;48067;2355;Bug's Life, A (1998);Animation$Children's$Comedy;978824291;5 +1;Female;Under 18;K-12 student;48067;1197;Princess Bride, The (1987);Action$Adventure$Comedy$Romance;978302268;3 +1;Female;Under 18;K-12 student;48067;1287;Ben-Hur (1959);Action$Adventure$Drama;978302039;5 +1;Female;Under 18;K-12 student;48067;2804;Christmas Story, A (1983);Comedy$Drama;978300719;5 +1;Female;Under 18;K-12 student;48067;594;Snow White and the Seven Dwarfs (1937);Animation$Children's$Musical;978302268;4 +1;Female;Under 18;K-12 student;48067;919;Wizard of Oz, The (1939);Adventure$Children's$Drama$Musical;978301368;4 +1;Female;Under 18;K-12 student;48067;595;Beauty and the Beast (1991);Animation$Children's$Musical;978824268;5 +1;Female;Under 18;K-12 student;48067;938;Gigi (1958);Musical;978301752;4 +1;Female;Under 18;K-12 student;48067;2398;Miracle on 34th Street (1947);Drama;978302281;4 +1;Female;Under 18;K-12 student;48067;2918;Ferris Bueller's Day Off (1986);Comedy;978302124;4 +1;Female;Under 18;K-12 student;48067;1035;Sound of Music, The (1965);Musical;978301753;5 +1;Female;Under 18;K-12 student;48067;2791;Airplane! (1980);Comedy;978302188;4 +1;Female;Under 18;K-12 student;48067;1097;E.T. the Extra-Terrestrial (1982);Children's$Drama$Fantasy$Sci-Fi;978301953;4 +1;Female;Under 18;K-12 student;48067;1721;Titanic (1997);Drama$Romance;978300055;4 +1;Female;Under 18;K-12 student;48067;1545;Ponette (1996);Drama;978824139;4 +1;Female;Under 18;K-12 student;48067;745;Close Shave, A (1995);Animation$Comedy$Thriller;978824268;3 +1;Female;Under 18;K-12 student;48067;2294;Antz (1998);Animation$Children's;978824291;4 +1;Female;Under 18;K-12 student;48067;3186;Girl, Interrupted (1999);Drama;978300019;4 +2;Male;56+;self-employed;70072;3451;Guess Who's Coming to Dinner (1967);Comedy$Drama;978298924;4 +2;Male;56+;self-employed;70072;3095;Grapes of Wrath, The (1940);Drama;978298517;4 +2;Male;56+;self-employed;70072;780;Independence Day (ID4) (1996);Action$Sci-Fi$War;978299966;3 +2;Male;56+;self-employed;70072;498;Mr. Jones (1993);Drama$Romance;978299418;3 +2;Male;56+;self-employed;70072;2728;Spartacus (1960);Drama;978298881;3 +2;Male;56+;self-employed;70072;2002;Lethal Weapon 3 (1992);Action$Comedy$Crime$Drama;978300100;5 +2;Male;56+;self-employed;70072;1962;Driving Miss Daisy (1989);Drama;978298813;5 +2;Male;56+;self-employed;70072;1784;As Good As It Gets (1997);Comedy$Drama;978298841;5 +2;Male;56+;self-employed;70072;2943;Indochine (1992);Drama$Romance;978298372;4 +2;Male;56+;self-employed;70072;2006;Mask of Zorro, The (1998);Action$Adventure$Romance;978299861;3 +2;Male;56+;self-employed;70072;318;Shawshank Redemption, The (1994);Drama;978298413;5 +2;Male;56+;self-employed;70072;1207;To Kill a Mockingbird (1962);Drama;978298478;4 +2;Male;56+;self-employed;70072;1968;Breakfast Club, The (1985);Comedy$Drama;978298881;2 +2;Male;56+;self-employed;70072;3678;Man with the Golden Arm, The (1955);Drama;978299250;3 +2;Male;56+;self-employed;70072;1244;Manhattan (1979);Comedy$Drama$Romance;978299143;3 +2;Male;56+;self-employed;70072;356;Forrest Gump (1994);Comedy$Romance$War;978299686;5 +2;Male;56+;self-employed;70072;1245;Miller's Crossing (1990);Drama;978299200;2 +2;Male;56+;self-employed;70072;1246;Dead Poets Society (1989);Drama;978299418;5 +2;Male;56+;self-employed;70072;3893;Nurse Betty (2000);Comedy$Thriller;978299535;1 +2;Male;56+;self-employed;70072;1247;Graduate, The (1967);Drama$Romance;978298652;5 +3;Male;25-34;scientist;55117;3421;Animal House (1978);Comedy;978298147;4 +3;Male;25-34;scientist;55117;1641;Full Monty, The (1997);Comedy;978298430;2 +3;Male;25-34;scientist;55117;648;Mission: Impossible (1996);Action$Adventure$Mystery;978297867;3 +3;Male;25-34;scientist;55117;1394;Raising Arizona (1987);Comedy;978298147;4 +3;Male;25-34;scientist;55117;3534;28 Days (2000);Comedy;978297068;3 +3;Male;25-34;scientist;55117;104;Happy Gilmore (1996);Comedy;978298486;4 +3;Male;25-34;scientist;55117;2735;Golden Child, The (1986);Action$Adventure$Comedy;978297867;4 +3;Male;25-34;scientist;55117;1210;Star Wars: Episode VI - Return of the Jedi (1983);Action$Adventure$Romance$Sci-Fi$War;978297600;4 +3;Male;25-34;scientist;55117;1431;Beverly Hills Ninja (1997);Action$Comedy;978297095;3 +3;Male;25-34;scientist;55117;3868;Naked Gun: From the Files of Police Squad!, The (1988);Comedy;978298486;3 +3;Male;25-34;scientist;55117;1079;Fish Called Wanda, A (1988);Comedy;978298296;5 +3;Male;25-34;scientist;55117;2997;Being John Malkovich (1999);Comedy;978298147;3 +3;Male;25-34;scientist;55117;1615;Edge, The (1997);Adventure$Thriller;978297710;5 +3;Male;25-34;scientist;55117;1291;Indiana Jones and the Last Crusade (1989);Action$Adventure;978297600;4 +3;Male;25-34;scientist;55117;1259;Stand by Me (1986);Adventure$Comedy$Drama;978298296;5 +3;Male;25-34;scientist;55117;653;Dragonheart (1996);Action$Adventure$Fantasy;978297757;4 +3;Male;25-34;scientist;55117;2167;Blade (1998);Action$Adventure$Horror;978297600;5 +3;Male;25-34;scientist;55117;1580;Men in Black (1997);Action$Adventure$Comedy$Sci-Fi;978297663;3 +3;Male;25-34;scientist;55117;3619;Hollywood Knights, The (1980);Comedy;978298201;2 +3;Male;25-34;scientist;55117;260;Star Wars: Episode IV - A New Hope (1977);Action$Adventure$Fantasy$Sci-Fi;978297512;5 +3;Male;25-34;scientist;55117;2858;American Beauty (1999);Comedy$Drama;978297039;4 +3;Male;25-34;scientist;55117;3114;Toy Story 2 (1999);Animation$Children's$Comedy;978298103;3 +3;Male;25-34;scientist;55117;1049;Ghost and the Darkness, The (1996);Action$Adventure;978297805;4 +3;Male;25-34;scientist;55117;1261;Evil Dead II (Dead By Dawn) (1987);Action$Adventure$Comedy$Horror;978297663;1 +3;Male;25-34;scientist;55117;552;Three Musketeers, The (1993);Action$Adventure$Comedy;978297837;4 +3;Male;25-34;scientist;55117;480;Jurassic Park (1993);Action$Adventure$Sci-Fi;978297690;4 +3;Male;25-34;scientist;55117;1265;Groundhog Day (1993);Comedy$Romance;978298316;2 +3;Male;25-34;scientist;55117;1266;Unforgiven (1992);Western;978297396;5 +3;Male;25-34;scientist;55117;733;Rock, The (1996);Action$Adventure$Thriller;978297757;5 +3;Male;25-34;scientist;55117;1196;Star Wars: Episode V - The Empire Strikes Back (1980);Action$Adventure$Drama$Sci-Fi$War;978297539;4 +4;Male;45-49;executive/managerial;2460;1210;Star Wars: Episode VI - Return of the Jedi (1983);Action$Adventure$Romance$Sci-Fi$War;978293924;3 +4;Male;45-49;executive/managerial;2460;2951;Fistful of Dollars, A (1964);Action$Western;978294282;4 +4;Male;45-49;executive/managerial;2460;1214;Alien (1979);Action$Horror$Sci-Fi$Thriller;978294260;4 +4;Male;45-49;executive/managerial;2460;1036;Die Hard (1988);Action$Thriller;978294282;4 +4;Male;45-49;executive/managerial;2460;260;Star Wars: Episode IV - A New Hope (1977);Action$Adventure$Fantasy$Sci-Fi;978294199;5 +4;Male;45-49;executive/managerial;2460;2028;Saving Private Ryan (1998);Action$Drama$War;978294230;5 +4;Male;45-49;executive/managerial;2460;480;Jurassic Park (1993);Action$Adventure$Sci-Fi;978294008;4 +4;Male;45-49;executive/managerial;2460;1196;Star Wars: Episode V - The Empire Strikes Back (1980);Action$Adventure$Drama$Sci-Fi$War;978294199;2 +4;Male;45-49;executive/managerial;2460;1198;Raiders of the Lost Ark (1981);Action$Adventure;978294199;5 +4;Male;45-49;executive/managerial;2460;1954;Rocky (1976);Action$Drama;978294282;5 +4;Male;45-49;executive/managerial;2460;1097;E.T. the Extra-Terrestrial (1982);Children's$Drama$Fantasy$Sci-Fi;978293964;4 +4;Male;45-49;executive/managerial;2460;3418;Thelma & Louise (1991);Action$Drama;978294260;4 +4;Male;45-49;executive/managerial;2460;3702;Mad Max (1979);Action$Sci-Fi;978294260;4 +4;Male;45-49;executive/managerial;2460;2366;King Kong (1933);Action$Adventure$Horror;978294230;4 +4;Male;45-49;executive/managerial;2460;1387;Jaws (1975);Action$Horror;978294199;5 +4;Male;45-49;executive/managerial;2460;3527;Predator (1987);Action$Sci-Fi$Thriller;978294008;1 +4;Male;45-49;executive/managerial;2460;1201;Good, The Bad and The Ugly, The (1966);Action$Western;978294230;5 +4;Male;45-49;executive/managerial;2460;2692;Run Lola Run (Lola rennt) (1998);Action$Crime$Romance;978294230;5 +4;Male;45-49;executive/managerial;2460;2947;Goldfinger (1964);Action;978294230;5 +4;Male;45-49;executive/managerial;2460;1240;Terminator, The (1984);Action$Sci-Fi$Thriller;978294260;5 +5;Male;25-34;writer;55455;2987;Who Framed Roger Rabbit? (1988);Adventure$Animation$Film-Noir;978243170;4 +5;Male;25-34;writer;55455;2333;Gods and Monsters (1998);Drama;978242607;4 +5;Male;25-34;writer;55455;1175;Delicatessen (1991);Comedy$Sci-Fi;978244759;5 +5;Male;25-34;writer;55455;39;Clueless (1995);Comedy$Romance;978245037;3 +5;Male;25-34;writer;55455;288;Natural Born Killers (1994);Action$Thriller;978246585;2 +5;Male;25-34;writer;55455;2337;Velvet Goldmine (1998);Drama;978243121;5 +5;Male;25-34;writer;55455;1535;Love! Valour! Compassion! (1997);Drama$Romance;978245513;4 +5;Male;25-34;writer;55455;299;Priest (1994);Drama;978242934;3 +5;Male;25-34;writer;55455;3079;Mansfield Park (1999);Drama;978246162;2 +5;Male;25-34;writer;55455;2560;Ravenous (1999);Drama$Horror;978242977;4 +5;Male;25-34;writer;55455;1909;X-Files: Fight the Future, The (1998);Mystery$Sci-Fi$Thriller;978246479;3 +5;Male;25-34;writer;55455;150;Apollo 13 (1995);Drama;978245763;2 +5;Male;25-34;writer;55455;224;Don Juan DeMarco (1995);Comedy$Drama$Romance;978245829;3 +5;Male;25-34;writer;55455;3728;One False Move (1991);Thriller;978244568;2 +5;Male;25-34;writer;55455;229;Death and the Maiden (1994);Drama$Thriller;978246528;3 +6;Female;50-55;homemaker;55117;914;My Fair Lady (1964);Musical$Romance;978237767;5 +6;Female;50-55;homemaker;55117;3408;Erin Brockovich (2000);Drama;978238230;5 +6;Female;50-55;homemaker;55117;1806;Paulie (1998);Adventure$Children's$Comedy;978236876;3 +6;Female;50-55;homemaker;55117;3624;Shanghai Noon (2000);Action;978238256;4 +6;Female;50-55;homemaker;55117;2469;Peggy Sue Got Married (1986);Comedy$Romance;978236670;3 +6;Female;50-55;homemaker;55117;2396;Shakespeare in Love (1998);Comedy$Romance;978236809;4 +6;Female;50-55;homemaker;55117;2100;Splash (1984);Comedy$Fantasy$Romance;978236567;3 +6;Female;50-55;homemaker;55117;1959;Out of Africa (1985);Drama$Romance;978236612;3 +6;Female;50-55;homemaker;55117;2321;Pleasantville (1998);Comedy;978237034;3 +6;Female;50-55;homemaker;55117;1380;Grease (1978);Comedy$Musical$Romance;978237691;5 +6;Female;50-55;homemaker;55117;920;Gone with the Wind (1939);Drama$Romance$War;978238851;4 +6;Female;50-55;homemaker;55117;569;Little Big League (1994);Children's$Comedy;978236876;4 +6;Female;50-55;homemaker;55117;1674;Witness (1985);Drama$Romance$Thriller;978236567;4 +6;Female;50-55;homemaker;55117;3565;Where the Heart Is (2000);Comedy$Drama;978238288;4 +6;Female;50-55;homemaker;55117;1028;Mary Poppins (1964);Children's$Comedy$Musical;978237767;4 +6;Female;50-55;homemaker;55117;34;Babe (1995);Children's$Comedy$Drama;978237444;4 +7;Male;35-44;academic/educator;6810;648;Mission: Impossible (1996);Action$Adventure$Mystery;978234737;4 +7;Male;35-44;academic/educator;6810;861;Supercop (1992);Action$Thriller;978234874;4 +7;Male;35-44;academic/educator;6810;2916;Total Recall (1990);Action$Adventure$Sci-Fi$Thriller;978234842;5 +7;Male;35-44;academic/educator;6810;3578;Gladiator (2000);Action$Drama;978234737;3 +7;Male;35-44;academic/educator;6810;3793;X-Men (2000);Action$Sci-Fi;978234737;3 +7;Male;35-44;academic/educator;6810;480;Jurassic Park (1993);Action$Adventure$Sci-Fi;978234607;4 +7;Male;35-44;academic/educator;6810;349;Clear and Present Danger (1994);Action$Adventure$Thriller;978234874;5 +7;Male;35-44;academic/educator;6810;3418;Thelma & Louise (1991);Action$Drama;978234810;3 +8;Male;25-34;programmer;11413;39;Clueless (1995);Comedy$Romance;978229571;3 +8;Male;25-34;programmer;11413;2336;Elizabeth (1998);Drama;978230120;3 +8;Male;25-34;programmer;11413;288;Natural Born Killers (1994);Action$Thriller;978229391;5 +8;Male;25-34;programmer;11413;3425;Mo' Better Blues (1990);Drama;978231982;3 +8;Male;25-34;programmer;11413;2268;Few Good Men, A (1992);Crime$Drama;978230852;3 +8;Male;25-34;programmer;11413;1466;Donnie Brasco (1997);Crime$Drama;978230052;4 +8;Male;25-34;programmer;11413;1393;Jerry Maguire (1996);Drama$Romance;978229702;5 +8;Male;25-34;programmer;11413;1682;Truman Show, The (1998);Drama;978230852;4 +8;Male;25-34;programmer;11413;2916;Total Recall (1990);Action$Adventure$Sci-Fi$Thriller;978229172;5 +8;Male;25-34;programmer;11413;506;Orlando (1993);Drama;978230483;3 +8;Male;25-34;programmer;11413;508;Philadelphia (1993);Drama;978230435;3 +8;Male;25-34;programmer;11413;3213;Batman: Mask of the Phantasm (1993);Animation$Children's;978233462;3 +8;Male;25-34;programmer;11413;42;Dead Presidents (1995);Action$Crime$Drama;978232754;3 +8;Male;25-34;programmer;11413;650;Moll Flanders (1996);Drama;978230943;5 +9;Male;25-34;technician/engineer;61614;1307;When Harry Met Sally... (1989);Comedy$Romance;978225429;4 +9;Male;25-34;technician/engineer;61614;25;Leaving Las Vegas (1995);Drama$Romance;978226041;4 +9;Male;25-34;technician/engineer;61614;2324;Life Is Beautiful (La Vita Ռ bella) (1997);Comedy$Drama;978226066;5 +9;Male;25-34;technician/engineer;61614;349;Clear and Present Danger (1994);Action$Adventure$Thriller;978226564;4 +9;Male;25-34;technician/engineer;61614;920;Gone with the Wind (1939);Drama$Romance$War;978225401;3 +9;Male;25-34;technician/engineer;61614;3270;Cutting Edge, The (1992);Drama;978226448;3 +9;Male;25-34;technician/engineer;61614;2762;Sixth Sense, The (1999);Thriller;978225984;4 +9;Male;25-34;technician/engineer;61614;1961;Rain Man (1988);Drama;978224859;5 +9;Male;25-34;technician/engineer;61614;2692;Run Lola Run (Lola rennt) (1998);Action$Crime$Romance;978225429;4 +9;Male;25-34;technician/engineer;61614;1310;Hype! (1996);Documentary;978226006;3 +9;Male;25-34;technician/engineer;61614;428;Bronx Tale, A (1993);Drama;978226580;3 +10;Female;35-44;academic/educator;95370;2622;Midsummer Night's Dream, A (1999);Comedy$Fantasy;978228212;5 +10;Female;35-44;academic/educator;95370;648;Mission: Impossible (1996);Action$Adventure$Mystery;978224925;4 +10;Female;35-44;academic/educator;95370;2628;Star Wars: Episode I - The Phantom Menace (1999);Action$Adventure$Fantasy$Sci-Fi;978228408;3 +10;Female;35-44;academic/educator;95370;3358;Defending Your Life (1991);Comedy$Romance;978226378;5 +10;Female;35-44;academic/educator;95370;3359;Breaking Away (1979);Drama;978227125;3 +10;Female;35-44;academic/educator;95370;1682;Truman Show, The (1998);Drama;978226319;5 +10;Female;35-44;academic/educator;95370;1756;Prophecy II, The (1998);Horror;978228655;4 +10;Female;35-44;academic/educator;95370;1320;Alienë_ (1992);Action$Horror$Sci-Fi$Thriller;978230837;3 +10;Female;35-44;academic/educator;95370;2124;Addams Family, The (1991);Comedy;978229208;3 +11;Female;25-34;academic/educator;4093;2325;Orgazmo (1997);Comedy;978904615;3 +11;Female;25-34;academic/educator;4093;3129;Sweet and Lowdown (1999);Comedy$Drama;978903701;4 +11;Female;25-34;academic/educator;4093;2329;American History X (1998);Drama;978220030;5 +11;Female;25-34;academic/educator;4093;784;Cable Guy, The (1996);Comedy;978904376;2 +11;Female;25-34;academic/educator;4093;2907;Superstar (1999);Comedy;978904916;2 +11;Female;25-34;academic/educator;4093;2762;Sixth Sense, The (1999);Thriller;978219815;5 +11;Female;25-34;academic/educator;4093;788;Nutty Professor, The (1996);Comedy$Fantasy$Romance$Sci-Fi;978904053;1 +11;Female;25-34;academic/educator;4093;2766;Adventures of Sebastian Cole, The (1998);Comedy$Drama;978903209;4 +11;Female;25-34;academic/educator;4093;3499;Misery (1990);Horror;978902477;2 +11;Female;25-34;academic/educator;4093;356;Forrest Gump (1994);Comedy$Romance$War;978903209;5 +11;Female;25-34;academic/educator;4093;357;Four Weddings and a Funeral (1994);Comedy$Romance;978903395;1 +11;Female;25-34;academic/educator;4093;36;Dead Man Walking (1995);Drama;978902405;3 +12;Male;25-34;programmer;32793;3897;Almost Famous (2000);Comedy$Drama;978218949;4 +12;Male;25-34;programmer;32793;2804;Christmas Story, A (1983);Comedy$Drama;978220237;5 +12;Male;25-34;programmer;32793;919;Wizard of Oz, The (1939);Adventure$Children's$Drama$Musical;978220120;5 +12;Male;25-34;programmer;32793;923;Citizen Kane (1941);Drama;978220237;5 +12;Male;25-34;programmer;32793;858;Godfather, The (1972);Action$Crime$Drama;978218949;5 +12;Male;25-34;programmer;32793;934;Father of the Bride (1950);Comedy;978218568;2 +12;Male;25-34;programmer;32793;3658;Quatermass and the Pit (1967);Sci-Fi;978220216;4 +12;Male;25-34;programmer;32793;1641;Full Monty, The (1997);Comedy;978218568;3 +12;Male;25-34;programmer;32793;111;Taxi Driver (1976);Drama$Thriller;978220179;5 +12;Male;25-34;programmer;32793;1221;Godfather: Part II, The (1974);Action$Crime$Drama;978218949;5 +12;Male;25-34;programmer;32793;3265;Hard-Boiled (Lashou shentan) (1992);Action$Crime;978218916;4 +12;Male;25-34;programmer;32793;1303;Man Who Would Be King, The (1975);Adventure;978218916;4 +12;Male;25-34;programmer;32793;1233;Boat, The (Das Boot) (1981);Action$Drama$War;978220120;3 +12;Male;25-34;programmer;32793;999;2 Days in the Valley (1996);Crime;978218568;4 +12;Male;25-34;programmer;32793;2616;Dick Tracy (1990);Action$Crime;978218568;1 +12;Male;25-34;programmer;32793;3785;Scary Movie (2000);Comedy$Horror;978218568;3 +12;Male;25-34;programmer;32793;1247;Graduate, The (1967);Drama$Romance;978220216;3 +13;Male;45-49;academic/educator;93304;2987;Who Framed Roger Rabbit? (1988);Adventure$Animation$Film-Noir;978202328;3 +13;Male;45-49;academic/educator;93304;648;Mission: Impossible (1996);Action$Adventure$Mystery;978201927;3 +13;Male;45-49;academic/educator;93304;2628;Star Wars: Episode I - The Phantom Menace (1999);Action$Adventure$Fantasy$Sci-Fi;978201987;3 +13;Male;45-49;academic/educator;93304;2054;Honey, I Shrunk the Kids (1989);Adventure$Children's$Comedy$Fantasy$Sci-Fi;978202563;3 +13;Male;45-49;academic/educator;93304;1259;Stand by Me (1986);Adventure$Comedy$Drama;978202246;4 +13;Male;45-49;academic/educator;93304;589;Terminator 2: Judgment Day (1991);Action$Sci-Fi$Thriller;978201811;5 +13;Male;45-49;academic/educator;93304;1690;Alien: Resurrection (1997);Action$Horror$Sci-Fi;978202057;3 +13;Male;45-49;academic/educator;93304;2;Jumanji (1995);Adventure$Children's$Fantasy;978202563;3 +13;Male;45-49;academic/educator;93304;153;Batman Forever (1995);Action$Adventure$Comedy$Crime;978202125;3 +13;Male;45-49;academic/educator;93304;1331;Audrey Rose (1977);Horror;978201342;4 +13;Male;45-49;academic/educator;93304;2135;Doctor Dolittle (1967);Adventure$Musical;978202543;3 +13;Male;45-49;academic/educator;93304;1262;Great Escape, The (1963);Adventure$War;978202201;4 +13;Male;45-49;academic/educator;93304;1196;Star Wars: Episode V - The Empire Strikes Back (1980);Action$Adventure$Drama$Sci-Fi$War;978201342;5 +14;Male;35-44;other;60126;2243;Broadcast News (1987);Comedy$Drama$Romance;978200300;3 +14;Male;35-44;other;60126;3623;Mission: Impossible 2 (2000);Action$Thriller;978200924;3 +14;Male;35-44;other;60126;2826;13th Warrior, The (1999);Action$Horror$Thriller;978200645;2 +14;Male;35-44;other;60126;2686;Red Violin, The (Le Violon rouge) (1998);Drama$Mystery;978200975;5 +14;Male;35-44;other;60126;2762;Sixth Sense, The (1999);Thriller;978201003;5 +14;Male;35-44;other;60126;2692;Run Lola Run (Lola rennt) (1998);Action$Crime$Romance;978200975;4 +14;Male;35-44;other;60126;2694;Big Daddy (1999);Comedy;978200689;1 +14;Male;35-44;other;60126;1968;Breakfast Club, The (1985);Comedy$Drama;978200300;2 +14;Male;35-44;other;60126;3578;Gladiator (2000);Action$Drama;978200828;4 +14;Male;35-44;other;60126;296;Pulp Fiction (1994);Crime$Drama;978201244;5 +14;Male;35-44;other;60126;2920;Children of Paradise (Les enfants du paradis) (1945);Drama$Romance;978200528;5 +14;Male;35-44;other;60126;1982;Halloween (1978);Horror;978200340;1 +14;Male;35-44;other;60126;2858;American Beauty (1999);Comedy$Drama;978200645;3 +14;Male;35-44;other;60126;3081;Sleepy Hollow (1999);Horror$Romance;978201003;4 +14;Male;35-44;other;60126;608;Fargo (1996);Crime$Drama$Thriller;978201244;1 +14;Male;35-44;other;60126;2959;Fight Club (1999);Drama;978200800;2 +14;Male;35-44;other;60126;1225;Amadeus (1984);Drama;978201317;4 +14;Male;35-44;other;60126;2396;Shakespeare in Love (1998);Comedy$Romance;978201003;4 +14;Male;35-44;other;60126;2976;Bringing Out the Dead (1999);Drama$Horror;978200300;3 +15;Male;25-34;executive/managerial;22903;3421;Animal House (1978);Comedy;978196170;4 +15;Male;25-34;executive/managerial;22903;648;Mission: Impossible (1996);Action$Adventure$Mystery;978212463;4 +15;Male;25-34;executive/managerial;22903;3354;Mission to Mars (2000);Sci-Fi;978196692;2 +15;Male;25-34;executive/managerial;22903;2485;She's All That (1999);Comedy$Romance;978196817;3 +15;Male;25-34;executive/managerial;22903;141;Birdcage, The (1996);Comedy;978198350;4 +15;Male;25-34;executive/managerial;22903;2126;Snake Eyes (1998);Action$Crime$Mystery$Thriller;978212274;3 +15;Male;25-34;executive/managerial;22903;2058;Negotiator, The (1998);Action$Thriller;978198516;3 +15;Male;25-34;executive/managerial;22903;3798;What Lies Beneath (2000);Thriller;978196866;4 +15;Male;25-34;executive/managerial;22903;2997;Being John Malkovich (1999);Comedy;978196418;2 +15;Male;25-34;executive/managerial;22903;653;Dragonheart (1996);Action$Adventure$Fantasy;978212570;2 +16;Female;35-44;other;20670;2369;Desperately Seeking Susan (1985);Comedy$Romance;978174535;5 +16;Female;35-44;other;20670;3175;Galaxy Quest (1999);Adventure$Comedy$Sci-Fi;978174568;4 +16;Female;35-44;other;20670;2888;Drive Me Crazy (1999);Comedy$Romance;978174535;3 +16;Female;35-44;other;20670;2392;Jack Frost (1998);Comedy$Drama;978174639;2 +16;Female;35-44;other;20670;2394;Prince of Egypt, The (1998);Animation$Musical;978174720;5 +16;Female;35-44;other;20670;2975;Best Man, The (1999);Drama;978174443;5 +17;Male;50-55;academic/educator;95350;1179;Grifters, The (1990);Crime$Drama$Film-Noir;978160157;5 +17;Male;50-55;academic/educator;95350;2553;Village of the Damned (1960);Horror$Sci-Fi$Thriller;978160616;4 +17;Male;50-55;academic/educator;95350;2554;Children of the Damned (1963);Horror$Sci-Fi$Thriller;978160739;3 +17;Male;50-55;academic/educator;95350;3932;Invisible Man, The (1933);Horror$Sci-Fi;978160437;3 +17;Male;50-55;academic/educator;95350;3863;Cell, The (2000);Sci-Fi$Thriller;978158779;3 +17;Male;50-55;academic/educator;95350;3793;X-Men (2000);Action$Sci-Fi;978158689;4 +17;Male;50-55;academic/educator;95350;1253;Day the Earth Stood Still, The (1951);Drama$Sci-Fi;978160616;5 +17;Male;50-55;academic/educator;95350;720;Wallace & Gromit: The Best of Aardman Animation (1996);Animation;978159210;5 +17;Male;50-55;academic/educator;95350;2058;Negotiator, The (1998);Action$Thriller;978160129;3 +17;Male;50-55;academic/educator;95350;1185;My Left Foot (1989);Drama;978158471;5 +17;Male;50-55;academic/educator;95350;3503;Solaris (Solyaris) (1972);Drama$Sci-Fi;978160490;5 +18;Female;18-24;clerical/admin;95825;2616;Dick Tracy (1990);Action$Crime;978152203;4 +18;Female;18-24;clerical/admin;95825;2470;Crocodile Dundee (1986);Adventure$Comedy;978154505;3 +18;Female;18-24;clerical/admin;95825;2617;Mummy, The (1999);Action$Adventure$Horror$Thriller;978153540;1 +18;Female;18-24;clerical/admin;95825;1676;Starship Troopers (1997);Action$Adventure$Sci-Fi$War;978153683;3 +18;Female;18-24;clerical/admin;95825;1678;Joy Luck Club, The (1993);Drama;978156714;5 +18;Female;18-24;clerical/admin;95825;208;Waterworld (1995);Action$Adventure;978153771;1 +18;Female;18-24;clerical/admin;95825;2115;Indiana Jones and the Temple of Doom (1984);Action$Adventure;978153303;5 +18;Female;18-24;clerical/admin;95825;2042;D2: The Mighty Ducks (1994);Children's$Comedy;978155392;1 +18;Female;18-24;clerical/admin;95825;1240;Terminator, The (1984);Action$Sci-Fi$Thriller;978153104;5 +18;Female;18-24;clerical/admin;95825;2116;Lord of the Rings, The (1978);Adventure$Animation$Children's$Sci-Fi;978154452;4 +18;Female;18-24;clerical/admin;95825;1242;Glory (1989);Action$Drama$War;978152999;5 +18;Female;18-24;clerical/admin;95825;1246;Dead Poets Society (1989);Drama;978156549;5 +19;Male;Under 18;K-12 student;48073;2987;Who Framed Roger Rabbit? (1988);Adventure$Animation$Film-Noir;978555881;4 +19;Male;Under 18;K-12 student;48073;2989;For Your Eyes Only (1981);Action;978147099;4 +19;Male;Under 18;K-12 student;48073;3421;Animal House (1978);Comedy;983074250;3 +19;Male;Under 18;K-12 student;48073;1301;Forbidden Planet (1956);Sci-Fi;978557301;3 +19;Male;Under 18;K-12 student;48073;1307;When Harry Met Sally... (1989);Comedy$Romance;978554335;4 +19;Male;Under 18;K-12 student;48073;1234;Sting, The (1973);Comedy$Crime;994556636;5 +19;Male;Under 18;K-12 student;48073;1090;Platoon (1986);Drama$War;978300207;3 +19;Male;Under 18;K-12 student;48073;2976;Bringing Out the Dead (1999);Drama$Horror;978850058;5 +19;Male;Under 18;K-12 student;48073;1097;E.T. the Extra-Terrestrial (1982);Children's$Drama$Fantasy$Sci-Fi;978557204;3 +19;Male;Under 18;K-12 student;48073;3273;Scream 3 (2000);Horror$Mystery$Thriller;978300170;3 +19;Male;Under 18;K-12 student;48073;2115;Indiana Jones and the Temple of Doom (1984);Action$Adventure;978146747;3 +19;Male;Under 18;K-12 student;48073;1240;Terminator, The (1984);Action$Sci-Fi$Thriller;978146863;3 +19;Male;Under 18;K-12 student;48073;3928;Abbott and Costello Meet Frankenstein (1948);Comedy$Horror;978555222;2 +20;Male;25-34;sales/marketing;55113;648;Mission: Impossible (1996);Action$Adventure$Mystery;978143369;4 +20;Male;25-34;sales/marketing;55113;3863;Cell, The (2000);Sci-Fi$Thriller;978143355;3 +20;Male;25-34;sales/marketing;55113;589;Terminator 2: Judgment Day (1991);Action$Sci-Fi$Thriller;978143508;4 +20;Male;25-34;sales/marketing;55113;1694;Apostle, The (1997);Drama;1009669071;3 +20;Male;25-34;sales/marketing;55113;2641;Superman II (1980);Action$Adventure$Sci-Fi;1009669115;4 +20;Male;25-34;sales/marketing;55113;1912;Out of Sight (1998);Action$Crime$Romance;978143508;5 +20;Male;25-34;sales/marketing;55113;2571;Matrix, The (1999);Action$Sci-Fi$Thriller;978143508;5 +20;Male;25-34;sales/marketing;55113;3527;Predator (1987);Action$Sci-Fi$Thriller;1009669181;4 +20;Male;25-34;sales/marketing;55113;1923;There's Something About Mary (1998);Comedy;978143576;4 +20;Male;25-34;sales/marketing;55113;1371;Star Trek: The Motion Picture (1979);Action$Adventure$Sci-Fi;1009669227;4 +20;Male;25-34;sales/marketing;55113;1375;Star Trek III: The Search for Spock (1984);Action$Adventure$Sci-Fi;1009669115;3 +20;Male;25-34;sales/marketing;55113;1527;Fifth Element, The (1997);Action$Sci-Fi;1009669181;5 diff --git a/data/one_plus_one.whizzml b/data/one_plus_one.whizzml new file mode 100644 index 00000000..3ed6dc80 --- /dev/null +++ b/data/one_plus_one.whizzml @@ -0,0 +1 @@ +(+ 1 1) diff --git a/data/predictions_c.json b/data/predictions_c.json new file mode 100644 index 00000000..825a6f1f --- /dev/null +++ b/data/predictions_c.json @@ -0,0 +1 @@ +[{"prediction": "a", "confidence": 0.2365895936154873, "order": 0, "distribution": [["a", 5], ["b", 5], ["c", 0]], "count": 10}, {"prediction": "b", "confidence": 0.2992949144298199, "order": 1, "distribution": [["a", 5], ["b", 10], ["c", 5]], "count": 20}, {"prediction": "a", "confidence": 0.6643529481431982, "order": 2, "distribution": [["a", 25], ["b", 5], ["c", 0]], "count": 30}] diff --git a/data/predictions_r.json b/data/predictions_r.json new file mode 100644 index 00000000..c56d190a --- /dev/null +++ b/data/predictions_r.json @@ -0,0 +1 @@ +[{"prediction": 1.5, "confidence": 0.2365895936154873, "order": 0, "distribution": [[1, 5], [2, 5], [3, 0]], "count": 10}, {"prediction": 2, "confidence": 0.2992949144298199, "order": 1, "distribution": [[1, 5], [2, 10], [3, 5]], "count": 20}, {"prediction": 1.1666667, "confidence": 0.6643529481431982, "order": 2, "distribution": [[1, 25], [2, 5], [3, 0]], "count": 30}] diff --git a/data/price.csv b/data/price.csv new file mode 100644 index 00000000..b2902a93 --- /dev/null +++ b/data/price.csv @@ -0,0 +1,100 @@ +Transaction_date,Price,Product +1/2/09 6:17,1200,Product1 +1/2/09 4:53,1200,Product1 +1/2/09 13:08,1200,Product1 +1/3/09 14:44,1200,Product1 +1/4/09 12:56,3600,Product2 +1/4/09 13:19,1200,Product1 +1/4/09 20:11,1200,Product1 +1/2/09 20:09,1200,Product1 +1/4/09 13:17,1200,Product1 +1/4/09 14:11,1200,Product1 +1/5/09 2:42,1200,Product1 +1/5/09 5:39,1200,Product1 +1/2/09 9:16,1200,Product1 +1/5/09 10:08,1200,Product1 +1/2/09 14:18,1200,Product1 +1/4/09 1:05,1200,Product1 +1/5/09 11:37,1200,Product1 +1/6/09 5:02,1200,Product1 +1/6/09 7:45,3600,Product2 +1/2/09 7:35,1200,Product1 +1/6/09 12:56,1200,Product1 +1/1/09 11:05,1200,Product1 +1/5/09 4:10,1200,Product1 +1/6/09 7:18,1200,Product1 +1/2/09 1:11,1200,Product1 +1/1/09 2:24,1200,Product1 +1/7/09 8:08,1200,Product1 +1/2/09 2:57,1200,Product1 +1/1/09 20:21,1200,Product1 +1/8/09 0:42,1200,Product1 +1/8/09 3:56,1200,Product1 +1/8/09 3:16,1200,Product1 +1/8/09 1:59,1200,Product1 +1/3/09 9:03,1200,Product1 +1/5/09 13:17,1200,Product1 +1/6/09 7:46,1200,Product1 +1/5/09 20:00,3600,Product2 +1/8/09 16:24,1200,Product1 +1/9/09 6:39,1200,Product1 +1/6/09 22:19,3600,Product2 +1/6/09 23:00,3600,Product2 +1/7/09 7:44,1200,Product1 +1/3/09 13:24,1200,Product1 +1/7/09 15:12,3600,Product2 +1/7/09 20:15,1200,Product1 +1/3/09 10:11,3600,Product2 +1/9/09 15:58,1200,Product1 +1/3/09 13:11,1200,Product1 +1/10/09 12:57,1200,Product1 +1/10/09 14:43,1200,Product1 +1/10/09 12:05,1200,Product1 +1/6/09 1:20,1200,Product1 +1/10/09 14:56,1200,Product1 +1/7/09 10:01,1200,Product1 +1/1/09 1:26,1200,Product1 +1/11/09 2:04,1200,Product1 +1/11/09 14:17,1200,Product1 +1/10/09 21:38,1200,Product1 +1/7/09 6:18,1200,Product1 +1/4/09 8:39,3600,Product2 +1/5/09 0:31,1200,Product1 +1/2/09 6:07,1200,Product1 +1/12/09 3:25,1200,Product1 +1/4/09 13:56,1200,Product1 +1/7/09 0:12,3600,Product2 +1/12/09 5:18,1200,Product1 +1/8/09 15:16,1200,Product1 +1/11/09 11:33,1200,Product1 +1/7/09 20:22,1200,Product1 +1/2/09 22:00,1200,Product1 +1/5/09 13:52,1200,Product1 +1/8/09 20:32,1200,Product1 +1/8/09 13:34,1200,Product1 +1/11/09 13:08,1200,Product1 +1/12/09 19:04,1200,Product1 +1/12/09 13:41,1200,Product1 +1/11/09 10:38,1200,Product1 +1/13/09 5:57,1200,Product1 +1/13/09 6:13,1200,Product1 +1/8/09 13:14,1200,Product1 +1/2/09 11:41,1200,Product1 +1/7/09 19:50,1200,Product1 +1/1/09 20:28,1200,Product1 +1/3/09 15:22,1200,Product1 +1/12/09 3:03,3600,Product2 +1/5/09 8:58,3600,Product2 +1/10/09 14:03,1200,Product1 +1/13/09 11:26,1200,Product1 +1/2/09 12:18,1200,Product1 +1/14/09 4:54,1200,Product1 +1/6/09 17:15,1200,Product1 +1/3/09 13:56,1200,Product1 +1/4/09 7:54,1200,Product1 +1/12/09 7:28,1200,Product1 +1/6/09 15:15,1200,Product1 +1/13/09 23:56,1200,Product1 +1/14/09 19:32,1200,Product1 +1/6/09 21:13,1200,Product1 +1/14/09 11:19,1200,Product1 diff --git a/data/regression_evaluation.json b/data/regression_evaluation.json new file mode 100644 index 00000000..6bb17e30 --- /dev/null +++ b/data/regression_evaluation.json @@ -0,0 +1 @@ +{"code": 200, "resource": "evaluation/64adcb654a1a2c0c57cb8784", "location": "https://bigml.io/andromeda/evaluation/64adcb654a1a2c0c57cb8784", "object": {"boosted_ensemble": false, "category": 0, "code": 200, "combiner": null, "configuration": null, "configuration_status": false, "created": "2023-07-11T21:36:37.670000", "creator": "mmartin", "dataset": "dataset/64adcb5f79c60236c3593ef5", "dataset_status": true, "datasets": [], "deepnet": "", "description": "", "ensemble": "", "evaluations": null, "excluded_fields": [], "fields_map": {"000000": "000000", "000001": "000001", "000002": "000002", "000003": "000003", "000004": "000004", "000005": "000005", "000006": "000006", "000007": "000007"}, "fusion": "", "input_fields": [], "linearregression": "", "locale": "en-US", "logisticregression": "", "max_rows": 4128, "missing_strategy": 0, "model": "model/64ad258d79c60271f4826e23", "model_status": true, "model_type": 0, "name": "Stdin input vs. Stdin input", "name_options": "512-node, pruned, deterministic order, operating kind=probability", "number_of_models": 1, "objective_field_descriptors": {"000007": {"column_number": 7, "datatype": "double", "name": "Longitude", "optype": "numeric", "order": 7, "preferred": true}}, "objective_fields": ["000007"], "objective_fields_names": ["Longitude"], "operating_kind": "probability", "optiml": null, "optiml_status": false, "out_of_bag": false, "performance": 0.9288, "private": true, "project": null, "range": null, "replacement": false, "resource": "evaluation/64adcb654a1a2c0c57cb8784", "result": {"mean": {"mean_absolute_error": 1.83374, "mean_squared_error": 4.0345, "r_squared": 0}, "model": {"mean_absolute_error": 0.30921, "mean_squared_error": 0.28725, "r_squared": 0.9288}, "random": {"mean_absolute_error": 2.93722, "mean_squared_error": 12.60007, "r_squared": -2.12308}}, "rows": 4128, "sample_rate": 1.0, "sampled_rows": 4128, "shared": false, "size": 354722, "status": {"code": 5, "elapsed": 3590, "message": "The evaluation has been created", "progress": 1}, "subscription": false, "tags": [], "timeseries": "", "type": 1, "updated": "2023-07-11T21:36:43.498000"}, "error": null} \ No newline at end of file diff --git a/data/repeat_iris.csv b/data/repeat_iris.csv new file mode 100644 index 00000000..ef2fa68f --- /dev/null +++ b/data/repeat_iris.csv @@ -0,0 +1,14 @@ +sepal length,sepal width,petal length,petal width,species +5.1,3.5,1.4,0.2,Iris-setosa +5.1,3.5,1.4,0.2,Iris-setosa +5.1,3.5,1.4,0.2,Iris-setosa +5.1,3.5,1.4,0.2,Iris-setosa +5.1,3.5,1.4,0.2,Iris-setosa +5.1,3.5,1.4,0.2,Iris-setosa +5.1,3.5,1.4,0.2,Iris-setosa +5.1,3.5,1.4,0.2,Iris-setosa +5.1,3.5,1.4,0.2,Iris-setosa +5.1,3.5,1.4,0.2,Iris-setosa +5.1,3.5,1.4,0.2,Iris-setosa +5.1,3.5,1.4,0.2,Iris-setosa +6.4,3.2,4.5,1.5,Iris-versicolor diff --git a/data/spam.csv b/data/spam.csv new file mode 100644 index 00000000..5faca0c2 --- /dev/null +++ b/data/spam.csv @@ -0,0 +1,657 @@ +Type Message +ham Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat... +ham Ok lar... Joking wif u oni... +spam Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's +ham U dun say so early hor... U c already then say... +ham Nah I don't think he goes to usf, he lives around here though +spam FreeMsg Hey there darling it's been 3 week's now and no word back! I'd like some fun you up for it still? Tb ok! XxX std chgs to send, £1.50 to rcv +ham Even my brother is not like to speak with me. They treat me like aids patent. +ham As per your request 'Melle Melle (Oru Minnaminunginte Nurungu Vettam)' has been set as your callertune for all Callers. Press *9 to copy your friends Callertune +spam WINNER!! As a valued network customer you have been selected to receivea £900 prize reward! To claim call 09061701461. Claim code KL341. Valid 12 hours only. +spam Had your mobile 11 months or more? U R entitled to Update to the latest colour mobiles with camera for Free! Call The Mobile Update Co FREE on 08002986030 +ham I'm gonna be home soon and i don't want to talk about this stuff anymore tonight, k? I've cried enough today. +spam SIX chances to win CASH! From 100 to 20,000 pounds txt> CSH11 and send to 87575. Cost 150p/day, 6days, 16+ TsandCs apply Reply HL 4 info +spam URGENT! You have won a 1 week FREE membership in our £100,000 Prize Jackpot! Txt the word: CLAIM to No: 81010 T&C www.dbuk.net LCCLTD POBOX 4403LDNW1A7RW18 +ham Finally the match heading towards draw as your prediction. +ham Tired. I haven't slept well the past few nights. +ham Easy ah?sen got selected means its good.. +ham I have to take exam with march 3 +ham Yeah you should. I think you can use your gt atm now to register. Not sure but if there's anyway i can help let me know. But when you do be sure you are ready. +ham Ok no prob. Take ur time. +ham There is os called ubandu which will run without installing in hard disk...you can use that os to copy the important files in system and give it to repair shop.. +ham Sorry, I'll call later +ham U say leh... Of course nothing happen lar. Not say v romantic jus a bit only lor. I thk e nite scenery not so nice leh. +spam 500 New Mobiles from 2004, MUST GO! Txt: NOKIA to No: 89545 & collect yours today!From ONLY £1 www.4-tc.biz 2optout 087187262701.50gbp/mtmsg18 +ham Would really appreciate if you call me. Just need someone to talk to. +spam Will u meet ur dream partner soon? Is ur career off 2 a flyng start? 2 find out free, txt HORO followed by ur star sign, e. g. HORO ARIES +ham Hey company elama po mudyadhu. +ham Life is more strict than teacher... Bcoz Teacher teaches lesson & then conducts exam, But Life first conducts Exam & then teaches Lessons. Happy morning. . . +ham Dear good morning now only i am up +ham Get down in gandhipuram and walk to cross cut road. Right side <#> street road and turn at first right. +ham Dear we are going to our rubber place +ham Sorry battery died, yeah I'm here +ham Yes:)here tv is always available in work place.. +spam Text & meet someone sexy today. U can find a date or even flirt its up to U. Join 4 just 10p. REPLY with NAME & AGE eg Sam 25. 18 -msg recd@thirtyeight pence +ham I have printed it oh. So <#> come upstairs +ham Or ill be a little closer like at the bus stop on the same street +ham Where are you?when wil you reach here? +ham New Theory: Argument wins d SITUATION, but loses the PERSON. So dont argue with ur friends just.. . . . kick them & say, I'm always correct.! +ham I love to give massages. I use lots of baby oil... What is your fave position? +ham Dude we should go sup again +ham Yoyyooo u know how to change permissions for a drive in mac. My usb flash drive +ham Gibbs unsold.mike hussey +ham I like to talk pa but am not able to. I dont know y. +ham Y dun cut too short leh. U dun like ah? She failed. She's quite sad. +ham You unbelievable faglord +ham Wife.how she knew the time of murder exactly +ham Why do you ask princess? +ham I am great princess! What are you thinking about me? :) +ham Nutter. Cutter. Ctter. Cttergg. Cttargg. Ctargg. Ctagg. ie you +ham It's ok i noe u're busy but i'm really too bored so i msg u. I oso dunno wat colour she choose 4 me one. +ham Doesn't g have class early tomorrow and thus shouldn't be trying to smoke at <#> +ham Superb Thought- "Be grateful that u dont have everything u want. That means u still have an opportunity to be happier tomorrow than u are today.":-) +ham Hope you are having a good week. Just checking in +ham I'm used to it. I just hope my agents don't drop me since i've only booked a few things this year. This whole me in boston, them in nyc was an experiment. +ham Thursday night? Yeah, sure thing, we'll work it out then +spam Your free ringtone is waiting to be collected. Simply text the password "MIX" to 85069 to verify. Get Usher and Britney. FML, PO Box 5249, MK17 92H. 450Ppw 16 +ham Probably money worries. Things are coming due and i have several outstanding invoices for work i did two and three months ago. +ham How is it possible to teach you. And where. +ham I wonder if your phone battery went dead ? I had to tell you, I love you babe +ham Lovely smell on this bus and it ain't tobacco... +ham We're all getting worried over here, derek and taylor have already assumed the worst +ham Hey what's up charles sorry about the late reply. +spam all the lastest from Stereophonics, Marley, Dizzee Racal, Libertines and The Strokes! Win Nookii games with Flirt!! Click TheMob WAP Bookmark or text WAP to 82468 +ham I.ll give her once i have it. Plus she said grinule greet you whenever we speak +ham WHITE FUDGE OREOS ARE IN STORES +spam January Male Sale! Hot Gay chat now cheaper, call 08709222922. National rate from 1.5p/min cheap to 7.8p/min peak! To stop texts call 08712460324 (10p/min) +ham My love ! How come it took you so long to leave for Zaher's? I got your words on ym and was happy to see them but was sad you had left. I miss you +ham I am sorry it hurt you. +ham Can't. I feel nauseous. I'm so pissed. I didn't eat any sweets all week cause today I was planning to pig out. I was dieting all week. And now I'm not hungry :/ +ham Ok lor but not too early. Me still having project meeting now. +ham Call me da, i am waiting for your call. +ham I could ask carlos if we could get more if anybody else can chip in +ham Was actually about to send you a reminder today. Have a wonderful weekend +ham When people see my msgs, They think Iam addicted to msging... They are wrong, Bcoz They don\'t know that Iam addicted to my sweet Friends..!! BSLVYL +ham Hey you gave them your photo when you registered for driving ah? Tmr wanna meet at yck? +ham Dont talk to him ever ok its my word. +ham When u wana see it then +ham On ma way to school. Can you pls send me ashley's number +ham It shall be fine. I have avalarr now. Will hollalater +ham She went to attend another two rounds today..but still did't reach home.. +ham Actually i deleted my old website..now i m blogging at magicalsongs.blogspot.com +ham K, wait chikku..il send aftr <#> mins +ham But I'm on a diet. And I ate 1 too many slices of pizza yesterday. Ugh I'm ALWAYS on a diet. +ham K:)i will give my kvb acc details:) +ham Oh all have to come ah? +spam money!!! you r a lucky winner ! 2 claim your prize text money 2 88600 over £1million to give away ! ppt150x3+normal text rate box403 w1t1jy +ham I'm really sorry i won't b able 2 do this friday.hope u can find an alternative.hope yr term's going ok:-) +ham Congratulations ore mo owo re wa. Enjoy it and i wish you many happy moments to and fro wherever you go +ham So do you have samus shoulders yet +ham What time you think you'll have it? Need to know when I should be near campus +spam Dear Matthew please call 09063440451 from a landline, your complimentary 4*Lux Tenerife holiday or £1000 CASH await collection. ppm150 SAE T&Cs Box334 SK38XH. +ham Then dun wear jeans lor... +ham Since when, which side, any fever, any vomitin. +ham K:)k.are you in college? +spam Urgent! call 09061749602 from Landline. Your complimentary 4* Tenerife Holiday or £10,000 cash await collection SAE T&Cs BOX 528 HP20 1YF 150ppm 18+ +ham Better. Made up for Friday and stuffed myself like a pig yesterday. Now I feel bleh. But at least its not writhing pain kind of bleh. +ham No we sell it all so we'll have tons if coins. Then sell our coins to someone thru paypal. Voila! Money back in life pockets:) +ham Theyre doing it to lots of places. Only hospitals and medical places are safe. +spam How about getting in touch with folks waiting for company? Just txt back your NAME and AGE to opt in! Enjoy the community (150p/SMS) +ham And also I've sorta blown him off a couple times recently so id rather not text him out of the blue looking for weed +ham I sent my scores to sophas and i had to do secondary application for a few schools. I think if you are thinking of applying, do a research on cost also. Contact joke ogunrinde, her school is one me the less expensive ones +ham I cant wait to see you! How were the photos were useful? :) +spam Ur cash-balance is currently 500 pounds - to maximize ur cash-in now send GO to 86688 only 150p/msg. CC: 08718720201 PO BOX 114/14 TCR/W1 +ham Hey i booked the kb on sat already... what other lessons are we going for ah? Keep your sat night free we need to meet and confirm our lodging +ham Chk in ur belovd ms dict +ham Is that what time you want me to come? +ham Awesome, lemme know whenever you're around +ham Shb b ok lor... Thanx... +ham Beautiful Truth against Gravity.. Read carefully: "Our heart feels light when someone is in it.. But it feels very heavy when someone leaves it.." GOOD NIGHT +ham Also remember to get dobby's bowl from your car +spam Filthy stories and GIRLS waiting for your +ham Sorry i now then c ur msg... Yar lor so poor thing... But only 4 one night... Tmr u'll have a brand new room 2 sleep in... +ham Love isn't a decision, it's a feeling. If we could decide who to love, then, life would be much simpler, but then less magical +ham Welp apparently he retired +ham My sort code is and acc no is . The bank is natwest. Can you reply to confirm i've sent this to the right person! +ham Where @ +ham U sure u can't take any sick time? +spam URGENT! We are trying to contact U. Todays draw shows that you have won a £800 prize GUARANTEED. Call 09050001808 from land line. Claim M95. Valid12hrs only +ham Watching cartoon, listening music & at eve had to go temple & church.. What about u? +ham Yo chad which gymnastics class do you wanna take? The site says Christians class is full.. +ham Are you this much buzy +ham Or better still can you catch her and let ask her if she can sell <#> for me. +ham I am not sure about night menu. . . I know only about noon menu +ham What do u want when i come back?.a beautiful necklace as a token of my heart for you.thats what i will give but ONLY to MY WIFE OF MY LIKING.BE THAT AND SEE..NO ONE can give you that.dont call me.i will wait till i come. +ham Are you willing to go for aptitude class. +ham It wont b until 2.15 as trying 2 sort house out, is that ok? +ham Yar lor he wan 2 go c horse racing today mah, so eat earlier lor. I ate chicken rice. U? +ham Haha awesome, omw back now then +ham Yup i thk so until e shop closes lor. +ham what is your account number? +ham Eh u send wrongly lar... +ham Hey no I ad a crap nite was borin without ya 2 boggy with me u boring biatch! Thanx but u wait til nxt time il ave ya +ham Ok i shall talk to him +ham Dont hesitate. You know this is the second time she has had weakness like that. So keep i notebook of what she eat and did the day before or if anything changed the day before so that we can be sure its nothing +ham Hey you can pay. With salary de. Only <#> . +ham Another month. I need chocolate weed and alcohol. +ham If he started searching he will get job in few days.he have great potential and talent. +ham Reckon need to be in town by eightish to walk from * carpark. +spam Congrats! 2 mobile 3G Videophones R yours. call 09063458130 now! videochat wid your mates, play java games, Dload polyPH music, noline rentl. +ham LOOK AT THE FUCKIN TIME. WHAT THE FUCK YOU THINK IS UP +ham Yo guess what I just dropped +ham Carlos says he'll be at mu in <#> minutes +ham I'm in office now . I will call you <#> min:) +ham Geeee ... I miss you already, you know ? Your all I can think about. Fuck, I can't wait till next year when we will be together ... *loving kiss* +ham Yun ah.the ubi one say if ü wan call by tomorrow.call 67441233 look for irene.ere only got bus8,22,65,61,66,382. Ubi cres,ubi tech park.6ph for 1st 5wkg days.èn +ham Ugh. Gotta drive back to sd from la. My butt is sore. +ham I will once i get home +ham Waaaat?? Lololo ok next time then! +ham The table's occupied, I'm waiting by the tree +ham I surely dont forgot to come:)i will always be in touch in with you:-) +ham Hi kindly give us back our documents which we submitted for loan from STAPATI +ham I dont have i shall buy one dear +ham Oh god i am happy to see your message after 3 days +ham What year. And how many miles. +ham Hey cutie. How goes it? Here in WALES its kinda ok. There is like hills and shit but i still avent killed myself. +ham Sad story of a Man - Last week was my b'day. My Wife did'nt wish me. My Parents forgot n so did my Kids . I went to work. Even my Colleagues did not wish. As I entered my cabin my PA said, '' Happy B'day Boss !!''. I felt special. She askd me 4 lunch. After lunch she invited me to her apartment. We went there. She said,'' do u mind if I go into the bedroom for a minute ? '' ''OK'', I sed in a sexy mood. She came out 5 minuts latr wid a cake...n My Wife, My Parents, My Kidz, My Friends n My Colleagues. All screaming.. SURPRISE !! and I was waiting on the sofa.. ... ..... ' NAKED...! +ham I think you should go the honesty road. Call the bank tomorrow. Its the tough decisions that make us great people. +spam FREE for 1st week! No1 Nokia tone 4 ur mob every week just txt NOKIA to 87077 Get txting and tell ur mates. zed POBox 36504 W45WQ norm150p/tone 16+ +ham No. Its not specialisation. Can work but its slave labor. Will look for it this month sha cos no shakara 4 beggar. +ham Is she replying. Has boye changed his phone number +ham 1) Go to write msg 2) Put on Dictionary mode 3)Cover the screen with hand, 4)Press <#> . 5)Gently remove Ur hand.. Its interesting..:) +ham hi my darlin im on my way to London and we have just been smashed into by another driver! and have a big dent! im really missing u what have u been up to? xxx +ham Nothing really, just making sure everybody's up to speed +ham I'm not coming home 4 dinner. +ham Thank you. And by the way, I just lost. +ham Yes.he have good crickiting mind +ham Thx. All will be well in a few months +spam Shop till u Drop, IS IT YOU, either 10K, 5K, £500 Cash or £100 Travel voucher, Call now, 09064011000. NTT PO Box CR01327BT fixedline Cost 150ppm mobile vary +ham "CAN I PLEASE COME UP NOW IMIN TOWN.DONTMATTER IF URGOIN OUTL8R,JUST REALLYNEED 2DOCD.PLEASE DONTPLEASE DONTIGNORE MYCALLS,U NO THECD ISV.IMPORTANT TOME 4 2MORO" +ham I wont. So wat's wit the guys +ham Yavnt tried yet and never played original either +ham Hiya, had a good day? Have you spoken to since the weekend? +ham See? I thought it all through +ham I'm at work. Please call +ham get ready to moan and scream :) +ham Oh k :)why you got job then whats up? +ham I don,t think so. You don't need to be going out that late on a school night. ESPECIALLY when the one class you have is the one you missed last wednesday and probably failed a test in on friday +ham And popping <#> ibuprofens was no help. +ham Babe ! How goes that day ? What are you doing ? Where are you ? I sip my cappuccino and think of you, my love ... I send a kiss to you from across the sea +ham Ok. +ham PS U no ur a grown up now right? +ham Chinatown got porridge, claypot rice, yam cake, fishhead beehoon... Either we eat cheap den go cafe n tok or go nydc or somethin... +ham I know a few people I can hit up and fuck to the yes +ham Purity of friendship between two is not about smiling after reading the forwarded message..Its about smiling just by seeing the name. Gud evng +ham So is there anything specific I should be doing with regards to jaklin or what because idk what the fuck +ham Oh god. I'm gonna Google nearby cliffs now. +spam FREE camera phones with linerental from 4.49/month with 750 cross ntwk mins. 1/2 price txt bundle deals also avble. Call 08001950382 or call2optout/J MF +ham Yup i shd haf ard 10 pages if i add figures... Ü all got how many pages? +ham Ooh, 4got, i'm gonna start belly dancing in moseley weds 6.30 if u want 2 join me, they have a cafe too. +ham Thankyou so much for the call. I appreciate your care. +ham Congrats ! Treat pending.i am not on mail for 2 days.will mail once thru.Respect mother at home.check mails. +ham I called but no one pick up e phone. I ask both of them already they said ok. +ham Hi my email address has changed now it is +ham V-aluable. A-ffectionate. L-oveable. E-ternal. N-oble. T-ruthful. I-ntimate. N-atural. E-namous. Happy "VALENTINES DAY" in advance +ham Not much, just some textin'. How bout you? +ham Bring it if you got it +ham I'm in a movie. Call me 4 wat? +ham Not sure I have the stomach for it ... +ham Haha... can... But i'm having dinner with my cousin... +ham A boy was late 2 home. His father: "POWER OF FRNDSHIP" +ham (And my man carlos is definitely coming by mu tonight, no excuses) +ham soon you will have the real thing princess! Do i make you wet? :) +ham Raji..pls do me a favour. Pls convey my Birthday wishes to Nimya. Pls. Today is her birthday. +ham Haha, my legs and neck are killing me and my amigos are hoping to end the night with a burn, think I could swing by in like an hour? +spam URGENT! Your mobile No 07xxxxxxxxx won a £2,000 bonus caller prize on 02/06/03! this is the 2nd attempt to reach YOU! call 09066362231 ASAP! BOX97N7QP, 150PPM +ham Usually the body takes care of it buy making sure it doesnt progress. Can we pls continue this talk on saturday. +spam URGENT!! Your 4* Costa Del Sol Holiday or £5000 await collection. Call 09050090044 Now toClaim. SAE, TC s, POBox334, Stockport, SK38xh, Cost£1.50/pm, Max10mins +ham Hmm well, night night +ham Just wanted to say holy shit you guys weren't kidding about this bud +ham Just gettin a bit arty with my collages at the mo, well tryin 2 ne way! Got a roast in a min lovely i shall enjoy that! +ham This is one of the days you have a billion classes, right? +ham Goodmorning, today i am late for 2hrs. Because of back pain. +ham Ok then i'll let him noe later n ask him call u tmr... +ham Prabha..i'm soryda..realy..frm heart i'm sory +ham OK i'm waliking ard now... Do u wan me 2 buy anything go ur house? +ham * Will have two more cartons off u and is very pleased with shelves +ham Nice talking to you! please dont forget my pix :) i want to see all of you... +spam You have WON a guaranteed £1000 cash or a £2000 prize. To claim yr prize call our customer service representative on 08714712379 between 10am-7pm Cost 10p +ham But really quite funny lor wat... Then u shd haf run shorter distance wat... +ham I notice you like looking in the shit mirror youre turning into a right freak +ham Great. I was getting worried about you. Just know that a wonderful and caring person like you will have only the best in life. Know that u r wonderful and God's love is yours. +spam Thanks for your ringtone order, ref number K718. Your mobile will be charged £4.50. Should your tone not arrive please call customer services on 09065069120 +ham I prefer my free days... Tues, wed, fri oso can... Ü ask those workin lor... +ham Alrite jod hows the revision goin? Keris bin doin a smidgin. N e way u wanna cum over after college?xx +ham If you have belive me. Come to my home. +ham Oh k.k..where did you take test? +ham Those were my exact intentions +ham haha but no money leh... Later got to go for tuition... Haha and looking for empty slots for driving lessons +ham Hey... Thk we juz go accordin to wat we discussed yest lor, except no kb on sun... Cos there's nt much lesson to go if we attend kb on sat... +ham K, wen ur free come to my home and also tel vikky i hav sent mail to him also.. Better come evening il be free today aftr 6pm..:-) +ham Nothing just getting msgs by dis name wit different no's.. +ham Good Morning plz call me sir +ham What's your room number again? Wanna make sure I'm knocking on the right door +ham "Si.como no?!listened2the plaid album-quite gd&the new air1 which is hilarious-also bought”braindance”a comp.ofstuff on aphex’s ;abel,u hav2hear it!c u sn xxxx" +ham Pls tell nelson that the bb's are no longer comin. The money i was expecting aint coming +ham Give her something to drink, if she takes it and doesn't vomit then you her temp might drop. If she unmits however let me know. +ham Think you sent the text to the home phone. That cant display texts. If you still want to send it his number is +ham Every day i use to sleep after <#> so only. +ham K I'll call you when I'm close +ham U buy newspapers already? +ham Nope wif my sis lor... Aft bathing my dog then i can bathe... Looks like it's going 2 rain soon. +ham Boo I'm on my way to my moms. She's making tortilla soup. Yummmm +ham No management puzzeles. +ham How did you find out in a way that didn't include all of these details +spam Hi ya babe x u 4goten bout me?' scammers getting smart..Though this is a regular vodafone no, if you respond you get further prem rate msg/subscription. Other nos used also. Beware! +spam Back 2 work 2morro half term over! Can U C me 2nite 4 some sexy passion B4 I have 2 go back? Chat NOW 09099726481 Luv DENA Calls £1/minMobsmoreLKPOBOX177HP51FL +ham will you like to be spoiled? :) +spam Thanks for your ringtone order, ref number R836. Your mobile will be charged £4.50. Should your tone not arrive please call customer services on 09065069154 +ham I am getting threats from your sales executive Shifad as i raised complaint against him. Its an official message. +ham hope things went well at 'doctors' ;) reminds me i still need 2go.did u c d little thing i left in the lounge? +ham Den wat will e schedule b lk on sun? +ham Lol enjoy role playing much? +ham Ok. Me watching tv too. +ham I just lov this line: "Hurt me with the truth, I don't mind,i wil tolerat.bcs ur my someone..... But, Never comfort me with a lie" gud ni8 and sweet dreams +ham Just checked out, heading out to drop off my stuff now +ham Here got lots of hair dresser fr china. +ham Sad story of a Man - Last week was my b'day. My Wife did'nt wish me. My Parents forgot n so did my Kids . I went to work. Even my Colleagues did not wish. +ham Ill call you evening ill some ideas. +spam SplashMobile: Choose from 1000s of gr8 tones each wk! This is a subscrition service with weekly tones costing 300p. U have one credit - kick back and ENJOY +ham Did you show him and wot did he say or could u not c him 4 dust? +ham It should take about <#> min +spam Not heard from U4 a while. Call 4 rude chat private line 01223585334 to cum. Wan 2C pics of me gettin shagged then text PIX to 8552. 2End send STOP 8552 SAM xxx +ham Ok . . now i am in bus. . If i come soon i will come otherwise tomorrow +ham I cant pick the phone right now. Pls send a message +spam FREE entry into our £250 weekly comp just send the word ENTER to 88877 NOW. 18 T&C www.textcomp.com +ham Finish liao... U? +spam 88066 FROM 88066 LOST 3POUND HELP +ham Haha i think i did too +ham U know we watchin at lido? +ham Life spend with someone for a lifetime may be meaningless but a few moments spent with someone who really love you means more than life itself.. +ham Haha awesome, I've been to 4u a couple times. Who all's coming? +ham Cold. Dont be sad dear +ham Think I could stop by in like an hour or so? My roommate's looking to stock up for a trip +ham Is that on the telly? No its Brdget Jones! +ham Love you aathi..love u lot.. +ham Hello! How r u? Im bored. Inever thought id get bored with the tv but I am. Tell me something exciting has happened there? Anything! =/ +ham Hmm...Bad news...Hype park plaza $700 studio taken...Only left 2 bedrm-$900... +ham Sorry, I'll call later in meeting +ham R ü comin back for dinner? +ham I hav almost reached. Call, i m unable to connect u. +ham Whom you waited for yesterday +ham I reach home safe n sound liao... +ham Velly good, yes please! +ham Hi, wkend ok but journey terrible. Wk not good as have huge back log of marking to do +ham I have had two more letters from . I will copy them for you cos one has a message for you. Speak soon +ham Alex knows a guy who sells mids but he's down in south tampa and I don't think I could set it up before like 8 +ham Dont you have message offer +spam Had your mobile 11mths ? Update for FREE to Oranges latest colour camera mobiles & unlimited weekend calls. Call Mobile Upd8 on freefone 08000839402 or 2StopTx +ham HEY THERE BABE, HOW U DOIN? WOT U UP 2 2NITE LOVE ANNIE X. +ham Remind me how to get there and I shall do so +ham :-( that's not v romantic! +ham Hello. Damn this christmas thing. I think i have decided to keep this mp3 that doesnt work. +spam You have 1 new message. Please call 08718738034. +ham HI DARLIN IM MISSIN U HOPE YOU ARE HAVING A GOOD TIME. WHEN ARE U BACK AND WHAT TIME IF U CAN GIVE ME A CALL AT HOME. JESS XX +spam Hi - this is your Mailbox Messaging SMS alert. You have 4 messages. You have 21 matches. Please call back on 09056242159 to retrieve your messages and matches +ham Draw va?i dont think so:) +ham Dont pick up d call when something important is There to tell. Hrishi +spam Congrats! 1 year special cinema pass for 2 is yours. call 09061209465 now! C Suprman V, Matrix3, StarWars3, etc all 4 FREE! bx420-ip4-5we. 150pm. Dont miss out! +ham Nothin comes to my mind. Ü help me buy hanger lor. Ur laptop not heavy? +ham <#> , that's all? Guess that's easy enough +ham We can make a baby in yo tho +ham Should I tell my friend not to come round til like <#> ish? +ham Friendship poem: Dear O Dear U R Not Near But I Can Hear Dont Get Fear Live With Cheer No More Tear U R Always my Dear. Gud ni8 +ham Still in the area of the restaurant. Ill try to come back soon +ham Aight that'll work, thanks +spam WIN a year supply of CDs 4 a store of ur choice worth £500 & enter our £100 Weekly draw txt MUSIC to 87066 Ts&Cs www.Ldew.com.subs16+1win150ppmx3 +spam Moby Pub Quiz.Win a £100 High Street prize if u know who the new Duchess of Cornwall will be? Txt her first name to 82277.unsub STOP £1.50 008704050406 SP Arrow +ham I have 2 sleeping bags, 1 blanket and paper and phone details. Anything else? +spam You have won a Nokia 7250i. This is what you get when you win our FREE auction. To take part send Nokia to 86021 now. HG/Suite342/2Lands Row/W1JHL 16+ +spam Congratulations! Thanks to a good friend U have WON the £2,000 Xmas prize. 2 claim is easy, just call 08718726971 NOW! Only 10p per minute. BT-national-rate. +spam tddnewsletter@emc1.co.uk (More games from TheDailyDraw) Dear Helen, Dozens of Free Games - with great prizesWith.. +ham So what do you guys do. +ham Also that chat was awesome but don't make it regular unless you can see her in person +ham That's significant but dont worry. +ham That's cause your old. I live to be high. +ham Waqt se pehle or naseeb se zyada kisi ko kuch nahi milta,Zindgi wo nahi he jo hum sochte hai Zindgi wo hai jo ham jeetey hai.......... +ham On the way to office da.. +ham In which place do you want da. +ham This pain couldn't have come at a worse time. +ham Ok... +ham Should I be stalking u? +ham Sorry dude. Dont know how i forgot. Even after Dan reminded me. Sorry. Hope you guys had fun. +ham Ok lor. +ham Apps class varaya elaya. +ham The Xmas story is peace.. The Xmas msg is love.. The Xmas miracle is jesus.. Hav a blessed month ahead & wish U Merry Xmas... +spam URGENT! Your mobile number *************** WON a £2000 Bonus Caller prize on 10/06/03! This is the 2nd attempt to reach you! Call 09066368753 ASAP! Box 97N7QP, 150ppm +ham That day you asked about anand number. Why:-) +ham Am surfing online store. For offers do you want to buy any thing. +ham Long beach lor. Expected... U having dinner now? +ham At home by the way +ham We are both fine. Thanks +ham What happen to her tell the truth +ham Do you like Italian food? +ham Which is weird because I know I had it at one point +ham "Aww you must be nearly dead!Well Jez isComing over toDo some workAnd that whillTake forever!" +ham Tell your friends what you plan to do on Valentines day @ <URL> +ham Alright, see you in a bit +ham Cheers for the message Zogtorius. I’ve been staring at my phone for an age deciding whether to text or not. +ham I will take care of financial problem.i will help:) +ham Tell dear what happen to you. Why you talking to me like an alian +spam Double your mins & txts on Orange or 1/2 price linerental - Motorola and SonyEricsson with B/Tooth FREE-Nokia FREE Call MobileUpd8 on 08000839402 or2optout/HV9D +ham 1) Go to write msg 2) Put on Dictionary mode 3)Cover the screen with hand, 4)Press <#> . 5)Gently remove Ur hand.. Its interesting..:) +ham Okie... +ham Hi this is yijue, can i meet u at 11 tmr? +ham Its posible dnt live in <#> century cm frwd n thnk different +ham But i dint slept in afternoon. +ham That seems unnecessarily affectionate +ham Yar else i'll thk of all sorts of funny things. +ham You will be in the place of that man +spam Download as many ringtones as u like no restrictions, 1000s 2 choose. U can even send 2 yr buddys. Txt Sir to 80082 £3 +ham Thats cool. How was your day? +spam This message is free. Welcome to the new & improved Sex & Dogging club! To unsubscribe from this service reply STOP. msgs@150p 18 only +ham Honeybee Said: *I'm d Sweetest in d World* God Laughed & Said: *Wait,U Havnt Met d Person Reading This Msg* MORAL: Even GOD Can Crack Jokes! GM+GN+GE+GN:) +ham Just do what ever is easier for you +spam RCT' THNQ Adrian for U text. Rgds Vatian +ham Stop calling everyone saying I might have cancer. My throat hurts to talk. I can't be answering everyones calls. If I get one more call I'm not babysitting on Monday +ham It'll be tough, but I'll do what I have to +ham IM GONNAMISSU SO MUCH!!I WOULD SAY IL SEND U A POSTCARD BUTTHERES ABOUTAS MUCH CHANCE OF MEREMEMBERIN ASTHERE IS OFSI NOT BREAKIN HIS CONTRACT!! LUV Yaxx +ham Ee msg na poortiyagi odalebeku: Hanumanji 7 name 1-Hanuman 2-Bajarangabali 3-Maruti 4-Pavanaputra 5-Sankatmochan 6-Ramaduth 7-Mahaveer ee 7 name <#> janarige ivatte kalisidare next saturday olage ondu good news keluviri...! Maretare inde 1 dodda problum nalli siguviri idu matra <#> % true.. Don't neglet. +ham HI DARLIN I FINISH AT 3 DO U 1 2 PICK ME UP OR MEET ME? TEXT BACK ON THIS NUMBER LUV KATE XXX +ham Set a place for me in your heart and not in your mind, as the mind easily forgets but the heart will always remember. Wish you Happy Valentines Day! +ham But i'm surprised she still can guess right lor... +ham Okie ü wan meet at bishan? Cos me at bishan now. I'm not driving today. +ham Oh ho. Is this the first time u use these type of words +ham HI DARLIN HOW WAS WORK DID U GET INTO TROUBLE? IJUST TALKED TO YOUR MUM ALL MORNING! I HAD A REALLY GOOD TIME LAST NIGHT IM GOIN OUT SOON BUT CALL ME IF U CAN +ham I know you are serving. I mean what are you doing now. +ham Huh... Hyde park not in mel ah, opps, got confused... Anyway, if tt's e best choice den we juz have to take it... +ham Oh gei. That happend to me in tron. Maybe ill dl it in 3d when its out +spam FREE MESSAGE Activate your 500 FREE Text Messages by replying to this message with the word FREE For terms & conditions, visit www.07781482378.com +ham I know girls always safe and selfish know i got it pa. Thank you. good night. +ham No worries, hope photo shoot went well. have a spiffing fun at workage. +ham I'm freezing and craving ice. Fml +ham Kay... Since we are out already +ham Eh sorry leh... I din c ur msg. Not sad already lar. Me watching tv now. U still in office? +ham Yo im right by yo work +ham Ok darlin i supose it was ok i just worry too much.i have to do some film stuff my mate and then have to babysit again! But you can call me there.xx +ham She said,'' do u mind if I go into the bedroom for a minute ? '' ''OK'', I sed in a sexy mood. She came out 5 minuts latr wid a cake...n My Wife, +ham I don wake since. I checked that stuff and saw that its true no available spaces. Pls call the embassy or send a mail to them. +ham Nope... Juz off from work... +ham Huh so fast... Dat means u havent finished painting? +ham what number do u live at? Is it 11? +ham No we put party 7 days a week and study lightly, I think we need to draw in some custom checkboxes so they know we're hardcore +ham Sac will score big hundred.he is set batsman:-) +ham Send me yetty's number pls. +ham How much it will cost approx . Per month. +ham Ok... The theory test? when are ü going to book? I think it's on 21 may. Coz thought wanna go out with jiayin. But she isnt free +spam You are being contacted by our dating service by someone you know! To find out who it is, call from a land line 09050000928. PoBox45W2TG150P +ham That's fine, have him give me a call if he knows what he wants or has any questions +ham Sorry, got a late start, we're on the way +ham Then u go back urself lor... +ham I AM AT THE GAS STATION. GO THERE. +ham K, if u bored up just come to my home.. +ham Babe !!!! I LOVE YOU !!!! *covers your face in kisses* +ham Like I made him throw up when we were smoking in our friend's car one time, it was awesome +ham Still i have not checked it da. . . +ham You will go to walmart. I.ll stay. +ham I haven't forgotten you, i might have a couple bucks to send you tomorrow, k? I love ya too +ham Oh great. I.ll disturb him more so that we can talk. +ham Reverse is cheating. That is not mathematics. +ham U're welcome... Caught u using broken english again... +ham No problem baby. Is this is a good time to talk? I called and left a message. +ham Sorry, I'll call later +ham Oh is it! Which brand? +ham Sorry i cant take your call right now. It so happens that there r 2waxsto do wat you want. She can come and ill get her medical insurance. And she'll be able to deliver and have basic care. I'm currently shopping for the right medical insurance for her. So just give me til friday morning. Thats when i.ll see the major person that can guide me to the right insurance. +ham At what time are you coming. +ham Call him and say you not coming today ok and tell them not to fool me like this ok +ham I emailed yifeng my part oredi.. Can ü get it fr him.. +ham R u sure they'll understand that! Wine * good idea just had a slurp! +ham Minimum walk is 3miles a day. +ham Ok not a problem will get them a taxi. C ing tomorrow and tuesday. On tuesday think we r all going to the cinema. +ham Brainless Baby Doll..:-D;-), vehicle sariyag drive madoke barolla.. +ham I don't run away frm u... I walk slowly & it kills me that u don't care enough to stop me... +spam Sorry I missed your call let's talk when you have the time. I'm on 07090201529 +ham Please attend the phone:) +ham You only hate me. You can call any but you didnt accept even a single call of mine. Or even you messaged +ham No messages on her phone. I'm holding it now +ham Can... I'm free... +ham Gal n boy walking in d park. gal-can i hold ur hand? boy-y? do u think i would run away? gal-no, jst wana c how it feels walking in heaven with an prince..GN:-) +ham What makes you most happy? +ham Wishing you a wonderful week. +ham Sweet heart how are you? +ham Sir, waiting for your letter. +ham Dude im no longer a pisces. Im an aquarius now. +ham X course it 2yrs. Just so her messages on messenger lik you r sending me +ham I think steyn surely get one wicket:) +ham Neither [in sterm voice] - i'm studying. All fine with me! Not sure the thing will be resolved, tho. Anyway. Have a fab hols +ham Garbage bags, eggs, jam, bread, hannaford wheat chex +ham No. It's not pride. I'm almost <#> years old and shouldn't be takin money from my kid. You're not supposed to have to deal with this stuff. This is grownup stuff--why i don't tell you. +ham Sounds better than my evening im just doing my costume. Im not sure what time i finish tomorrow but i will txt you at the end. +ham My birthday is on feb <#> da. . +ham So when do you wanna gym? +ham You'd like that wouldn't you? Jerk! +ham Are u awake? Is there snow there? +ham And of course you should make a stink! +spam u r subscribed 2 TEXTCOMP 250 wkly comp. 1st wk?s free question follows, subsequent wks charged@150p/msg.2 unsubscribe txt STOP 2 84128,custcare 08712405020 +ham No go. No openings for that room 'til after thanksgiving without an upcharge. +ham When you guys planning on coming over? +ham Wat ü doing now? +ham My Parents, My Kidz, My Friends n My Colleagues. All screaming.. SURPRISE !! and I was waiting on the sofa.. ... ..... ' NAKED...! +ham No sir. That's why i had an 8-hr trip on the bus last week. Have another audition next wednesday but i think i might drive this time. +ham Do I? I thought I put it back in the box +ham I'm home... +ham No one interested. May be some business plan. +ham Yup it's at paragon... I havent decided whether 2 cut yet... Hee... +ham Good morning princess! Have a great day! +ham Guai... Ü shd haf seen him when he's naughty... Ü so free today? Can go jogging... +ham Aiyo cos i sms ü then ü neva reply so i wait 4 ü to reply lar. I tot ü havent finish ur lab wat. +ham Living is very simple.. Loving is also simple.. Laughing is too simple.. Winning is tooo simple.. But, Being 'SIMPLE' is very difficult...;-) :-) +ham Tell me something. Thats okay. +ham Ok +ham Hmm. Shall i bring a bottle of wine to keep us amused? Just joking! I'll still bring a bottle. Red or white? See you tomorrow +ham This is ur face test ( 1 2 3 4 5 6 7 8 9 <#> ) select any number i will tell ur face astrology.... am waiting. quick reply... +ham Hey, iouri gave me your number, I'm wylie, ryan's friend +ham Yep get with the program. You're slacking. +ham I'm in inside office..still filling forms.don know when they leave me. +ham I think your mentor is , but not 100 percent sure. +spam Call 09095350301 and send our girls into erotic ecstacy. Just 60p/min. To stop texts call 08712460324 (nat rate) +spam Camera - You are awarded a SiPix Digital Camera! call 09061221066 fromm landline. Delivery within 28 days. +spam A £400 XMAS REWARD IS WAITING FOR YOU! Our computer has randomly picked you from our loyal mobile customers to receive a £400 reward. Just call 09066380611 +ham Just trying to figure out when I'm suppose to see a couple different people this week. We said we'd get together but I didn't set dates +spam IMPORTANT MESSAGE. This is a final contact attempt. You have important messages waiting out our customer claims dept. Expires 13/4/04. Call 08717507382 NOW! +ham Hi mom we might be back later than <#> +spam dating:i have had two of these. Only started after i sent a text to talk sport radio last week. Any connection do you think or coincidence? +ham Lol, oh you got a friend for the dog ? +ham Ok., is any problem to u frm him? Wats matter? +ham K I'll head out in a few mins, see you there +ham Do u konw waht is rael FRIENDSHIP Im gving yuo an exmpel: Jsut ese tihs msg.. Evrey splleing of tihs msg is wrnog.. Bt sitll yuo can raed it wihtuot ayn mitsake.. GOODNIGHT & HAVE A NICE SLEEP..SWEET DREAMS.. +ham I cant pick the phone right now. Pls send a message +ham I don't want you to leave. But i'm barely doing what i can to stay sane. fighting with you constantly isn't helping. +spam The current leading bid is 151. To pause this auction send OUT. Customer Care: 08718726270 +spam Free entry to the gr8prizes wkly comp 4 a chance to win the latest Nokia 8800, PSP or £250 cash every wk.TXT GREAT to 80878 http//www.gr8prizes.com 08715705022 +ham Somebody set up a website where you can play hold em using eve online spacebucks +ham Its sunny in california. The weather's just cool +spam You have 1 new message. Call 0207-083-6089 +ham I can make it up there, squeezed <#> bucks out of my dad +ham Good day to You too.Pray for me.Remove the teeth as its painful maintaining other stuff. +ham How are you babes. Hope your doing ok. I had a shit nights sleep. I fell asleep at 5.I’m knackered and i’m dreading work tonight. What are thou upto tonight. X +ham How do friends help us in problems? They give the most stupid suggestion that Lands us into another problem and helps us forgt the previous problem +ham I'm at work. Please call +ham I will be gentle baby! Soon you will be taking all <#> inches deep inside your tight pussy... +ham NOT MUCH NO FIGHTS. IT WAS A GOOD NITE!! +ham Ok.ok ok..then..whats ur todays plan +ham Nt joking seriously i told +ham Watching ajith film ah? +ham Ooooooh I forgot to tell u I can get on yoville on my phone +ham All done, all handed in. Don't know if mega shop in asda counts as celebration but thats what i'm doing! +ham I dont know exactly could you ask chechi. +ham Dunno lei shd b driving lor cos i go sch 1 hr oni. +ham As in i want custom officer discount oh. +ham That's necessarily respectful +ham Hi. Hope you had a good day. Have a better night. +ham And he's apparently bffs with carly quick now +ham HARD BUT TRUE: How much you show & express your love to someone....that much it will hurt when they leave you or you get seperated...!鈥┾??〨ud evening... +ham Babes I think I got ur brolly I left it in English wil bring it in 2mrw 4 u luv Franxx +ham Hi babe its me thanks for coming even though it didnt go that well!i just wanted my bed! Hope to see you soon love and kisses xxx +ham So gd got free ice cream... I oso wan... +ham Pls give her prometazine syrup. 5mls then <#> mins later feed. +ham So how many days since then? +ham Dear are you angry i was busy dear +ham Yup he msg me: is tat yijue? Then i tot it's my group mate cos we meeting today mah... I'm askin if ü leaving earlier or wat mah cos mayb ü haf to walk v far... +ham ... Are you in the pub? +ham There is a first time for everything :) +ham Daddy, shu shu is looking 4 u... U wan me 2 tell him u're not in singapore or wat? +ham I ask if u meeting da ge tmr nite... +ham Gr8. So how do you handle the victoria island traffic. Plus when's the album due +ham Nite nite pocay wocay luv u more than n e thing 4eva I promise ring u 2morrowxxxx +ham East coast +ham You should get more chicken broth if you want ramen unless there's some I don't know about +ham My slave! I want you to take 2 or 3 pictures of yourself today in bright light on your cell phone! Bright light! +ham Nope. I just forgot. Will show next week +ham So how are you really. What are you up to. How's the masters. And so on. +ham I'm at bruce & fowler now but I'm in my mom's car so I can't park (long story) +ham I dont know oh. Hopefully this month. +ham Hi elaine, is today's meeting confirmed? +ham Ok k..sry i knw 2 siva..tats y i askd.. +ham Sorry, I'll call later +ham U horrible gal... U knew dat i was going out wif him yest n u still come n ask me... +ham Otherwise had part time job na-tuition.. +ham Oh yeah! And my diet just flew out the window +spam Santa Calling! Would your little ones like a call from Santa Xmas eve? Call 09058094583 to book your time. +ham You didnt complete your gist oh. +ham Er yeah, i will b there at 15:26, sorry! Just tell me which pub/cafe to sit in and come wen u can +ham If you can make it any time tonight or whenever you can it's cool, just text me whenever you're around +ham If I was I wasn't paying attention +ham Thanx a lot 4 ur help! +ham You're gonna have to be way more specific than that +ham Jesus armand really is trying to tell everybody he can find +ham I'm wif him now buying tix lar... +ham Mode men or have you left. +ham Am slow in using biola's fne +ham "What are youdoing later? Sar xxx" +ham Hey i've booked the 2 lessons on sun liao... +ham Thank you. do you generally date the brothas? +ham By the way, make sure u get train to worc foregate street not shrub hill. Have fun night x +ham I thought i'd get him a watch, just cos thats the kind of thing u get4an18th. And he loves so much! +spam You have won a guaranteed 32000 award or maybe even £1000 cash to claim ur award call free on 0800 ..... (18+). Its a legitimat efreefone number wat do u think??? +ham Good morning. At the repair shop--the ONLY reason i'm up at this hour. +ham And that's fine, I got enough bud to last most of the night at least +ham I am back. Good journey! Let me know if you need any of the receipts. Shall i tell you like the pendent? +ham So that takes away some money worries +ham aight we can pick some up, you open before tonight? +spam Latest News! Police station toilet stolen, cops have nothing to go on! +ham Sac needs to carry on:) +ham Just sing HU. I think its also important to find someone female that know the place well preferably a citizen that is also smart to help you navigate through. Even things like choosing a phone plan require guidance. When in doubt ask especially girls. +ham What???? Hello wats talks email address? +ham Except theres a chick with huge boobs. +ham Im just wondering what your doing right now? +ham Wishing you a beautiful day. Each moment revealing even more things to keep you smiling. Do enjoy it. +spam "For the most sparkling shopping breaks from 45 per person; call 0121 2025050 or visit www.shortbreaks.org.uk" +ham Arun can u transfr me d amt +ham Sorry, I'll call later +ham If you hear a loud scream in about <#> minutes its cause my Gyno will be shoving things up me that don't belong :/ +spam December only! Had your mobile 11mths+? You are entitled to update to the latest colour camera mobile for Free! Call The Mobile Update Co FREE on 08002986906 +ham Ok i thk i got it. Then u wan me 2 come now or wat? +spam Txt: CALL to No: 86888 & claim your reward of 3 hours talk time to use from your phone now! Subscribe6GBP/mnth inc 3hrs 16 stop?txtStop www.gamb.tv +ham U GOIN OUT 2NITE? +ham I will treasure every moment we spend together... +ham Shall I bring us a bottle of wine to keep us amused? Only joking! I‘ll bring one anyway +spam http//tms. widelive.com/index. wml?id=820554ad0a1705572711&first=true¡C C Ringtone¡ +spam Get your garden ready for summer with a FREE selection of summer bulbs and seeds worth £33:50 only with The Scotsman this Saturday. To stop go2 notxt.co.uk +spam URGENT! Last weekend's draw shows that you have won £1000 cash or a Spanish holiday! CALL NOW 09050000332 to claim. T&C: RSTM, SW7 3SS. 150ppm +ham Ok lor. +ham I thought slide is enough. +ham Yup +ham Well obviously not because all the people in my cool college life went home ;_; +ham Ok lor ü reaching then message me. +ham Where's mummy's boy ? Is he being good or bad ? Is he being positive or negative ? Why is mummy being made to wait? Hmmmm? +ham Dhoni have luck to win some big title.so we will win:) +ham Yes princess! I want to please you every night. Your wish is my command... +ham What Today-sunday..sunday is holiday..so no work.. +ham No probably <#> %. +ham Really do hope the work doesnt get stressful. Have a gr8 day. +ham Have you seen who's back at Holby?! +ham Shall call now dear having food +spam URGENT We are trying to contact you Last weekends draw shows u have won a £1000 prize GUARANTEED Call 09064017295 Claim code K52 Valid 12hrs 150p pm +ham So li hai... Me bored now da lecturer repeating last weeks stuff waste time... +ham , , and picking them up from various points | going 2 yeovil | and they will do the motor project 4 3 hours | and then u take them home. || 12 2 5.30 max. || Very easy +ham Also fuck you and your family for going to rhode island or wherever the fuck and leaving me all alone the week I have a new bong >:( +ham Ofcourse I also upload some songs +spam 2p per min to call Germany 08448350055 from your BT line. Just 2p per min. Check PlanetTalkInstant.com for info & T's & C's. Text stop to opt out +ham K. I will sent it again +ham Oh thanks a lot..i already bought 2 eggs .. +ham K. I will sent it again +ham U studying in sch or going home? Anyway i'll b going 2 sch later. +spam Marvel Mobile Play the official Ultimate Spider-man game (£4.50) on ur mobile right now. Text SPIDER to 83338 for the game & we ll send u a FREE 8Ball wallpaper +ham I think if he rule tamilnadu..then its very tough for our people. +ham Cool, we shall go and see, have to go to tip anyway. Are you at home, got something to drop in later? So lets go to town tonight! Maybe mum can take us in. +ham Good afternoon, my love ... How goes your day ? How did you sleep ? I hope your well, my boytoy ... I think of you ... +ham Yes... I trust u to buy new stuff ASAP so I can try it out +spam SMS SERVICES. for your inclusive text credits, pls goto www.comuk.net login= 3qxj9 unsubscribe with STOP, no extra charge. help 08702840625.COMUK. 220-CM2 9AE +ham Why did I wake up on my own >:( +ham Now get step 2 outta the way. Congrats again. +ham Love has one law; Make happy the person you love. In the same way friendship has one law; Never make ur friend feel alone until you are alive.... Gud night +spam PRIVATE! Your 2003 Account Statement for 07808247860 shows 800 un-redeemed S. I. M. points. Call 08719899229 Identifier Code: 40411 Expires 06/11/04 +ham Apo all other are mokka players only +ham Perhaps * is much easy give your account identification, so i will tomorrow at UNI +ham Wait . I will msg after <#> min. +ham What i told before i tell. Stupid hear after i wont tell anything to you. You dad called to my brother and spoken. Not with me. +ham God's love has no limit. God's grace has no measure. God's power has no boundaries. May u have God's endless blessings always in ur life...!! Gud ni8 +ham I want to be inside you every night... +ham Machan you go to gym tomorrow, i wil come late goodnight. +ham Lol they were mad at first but then they woke up and gave in. +ham I went to project centre +ham It‘s reassuring, in this crazy world. +ham Just making dinner, you ? +ham Yes. Please leave at <#> . So that at <#> we can leave +ham Oh... Okie lor...We go on sat... +ham You are a great role model. You are giving so much and i really wish each day for a miracle but God as a reason for everything and i must say i wish i knew why but i dont. I've looked up to you since i was young and i still do. Have a great day. +ham Ya, i'm referin to mei's ex wat... No ah, waitin 4 u to treat, somebody shld b rich liao...So gd, den u dun have to work frm tmr onwards... +ham Miles and smiles r made frm same letters but do u know d difference..? smile on ur face keeps me happy even though I am miles away from u.. :-)keep smiling.. Good nyt +ham By the way, i've put a skip right outside the front of the house so you can see which house it is. Just pull up before it. +ham Can you pls send me that company name. In saibaba colany +ham No. I dont want to hear anything +ham You are a big chic. Common. Declare +ham Thats cool. I want to please you... +ham Going to join tomorrow. +spam You are awarded a SiPix Digital Camera! call 09061221061 from landline. Delivery within 28days. T Cs Box177. M221BP. 2yr warranty. 150ppm. 16 . p p£3.99 +ham I want to tell you how bad I feel that basically the only times I text you lately are when I need drugs +spam PRIVATE! Your 2003 Account Statement for shows 800 un-redeemed S.I.M. points. Call 08718738001 Identifier Code: 49557 Expires 26/11/04 +ham Total disappointment, when I texted you was the craziest shit got :( +ham Its just the effect of irritation. Just ignore it +ham What about this one then. +ham I think that tantrum's finished so yeah I'll be by at some point +ham Compliments to you. Was away from the system. How your side. +ham happened here while you were adventuring +ham Hey chief, can you give me a bell when you get this. Need to talk to you about this royal visit on the 1st june. +ham Ok which your another number +ham I know you are thinkin malaria. But relax, children cant handle malaria. She would have been worse and its gastroenteritis. If she takes enough to replace her loss her temp will reduce. And if you give her malaria meds now she will just vomit. Its a self limiting illness she has which means in a few days it will completely stop +ham Aiyah ok wat as long as got improve can already wat... +spam Want explicit SEX in 30 secs? Ring 02073162414 now! Costs 20p/min Gsex POBOX 2667 WC1N 3XX +ham I can't believe how attached I am to seeing you every day. I know you will do the best you can to get to me babe. I will go to teach my class at your midnight +ham Just sleeping..and surfing +spam ASKED 3MOBILE IF 0870 CHATLINES INCLU IN FREE MINS. INDIA CUST SERVs SED YES. L8ER GOT MEGA BILL. 3 DONT GIV A SHIT. BAILIFF DUE IN DAYS. I O £250 3 WANT £800 +ham Yeah it's jus rite... +ham Armand says get your ass over to epsilon +ham U still havent got urself a jacket ah? +ham I'm taking derek & taylor to walmart, if I'm not back by the time you're done just leave the mouse on my desk and I'll text you when priscilla's ready +ham Hi its in durban are you still on this number +ham Ic. There are a lotta childporn cars then. +spam Had your contract mobile 11 Mnths? Latest Motorola, Nokia etc. all FREE! Double Mins & Text on Orange tariffs. TEXT YES for callback, no to remove from records. +ham No, I was trying it all weekend ;V +ham You know, wot people wear. T shirts, jumpers, hat, belt, is all we know. We r at Cribbs +ham Cool, what time you think you can get here? +ham Wen did you get so spiritual and deep. That's great +ham Have a safe trip to Nigeria. Wish you happiness and very soon company to share moments with +ham Hahaha..use your brain dear +ham Well keep in mind I've only got enough gas for one more round trip barring a sudden influx of cash +ham Yeh. Indians was nice. Tho it did kane me off a bit he he. We shud go out 4 a drink sometime soon. Mite hav 2 go 2 da works 4 a laugh soon. Love Pete x x +ham Yes i have. So that's why u texted. Pshew...missing you so much +ham No. I meant the calculation is the same. That <#> units at <#> . This school is really expensive. Have you started practicing your accent. Because its important. And have you decided if you are doing 4years of dental school or if you'll just do the nmde exam. +ham Sorry, I'll call later +ham if you aren't here in the next <#> hours imma flip my shit +ham Anything lor. Juz both of us lor. +ham Get me out of this dump heap. My mom decided to come to lowes. BORING. +ham Ok lor... Sony ericsson salesman... I ask shuhui then she say quite gd 2 use so i considering... +ham Ard 6 like dat lor. +ham Why don't you wait 'til at least wednesday to see if you get your . diff --git a/data/spam_4w.csv b/data/spam_4w.csv new file mode 100644 index 00000000..bbf43ed7 --- /dev/null +++ b/data/spam_4w.csv @@ -0,0 +1,8 @@ +Type,Message +spam,mobile free help +ham,free +spam,mobile help house +ham,free mobile +spam,house help +ham,free house +spam,mobile diff --git a/data/spam_tiny.csv b/data/spam_tiny.csv new file mode 100644 index 00000000..17811ef5 --- /dev/null +++ b/data/spam_tiny.csv @@ -0,0 +1,50 @@ +Type Message +ham Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat... +ham Ok lar... Joking wif u oni... +spam Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's +ham U dun say so early hor... U c already then say... +ham Nah I don't think he goes to usf, he lives around here though +spam FreeMsg Hey there darling it's been 3 week's now and no word back! I'd like some fun you up for it still? Tb ok! XxX std chgs to send, £1.50 to rcv +ham Even my brother is not like to speak with me. They treat me like aids patent. +ham As per your request 'Melle Melle (Oru Minnaminunginte Nurungu Vettam)' has been set as your callertune for all Callers. Press *9 to copy your friends Callertune +spam WINNER!! As a valued network customer you have been selected to receivea £900 prize reward! To claim call 09061701461. Claim code KL341. Valid 12 hours only. +spam Had your mobile 11 months or more? U R entitled to Update to the latest colour mobiles with camera for Free! Call The Mobile Update Co FREE on 08002986030 +ham I'm gonna be home soon and i don't want to talk about this stuff anymore tonight, k? I've cried enough today. +spam SIX chances to win CASH! From 100 to 20,000 pounds txt> CSH11 and send to 87575. Cost 150p/day, 6days, 16+ TsandCs apply Reply HL 4 info +spam URGENT! You have won a 1 week FREE membership in our £100,000 Prize Jackpot! Txt the word: CLAIM to No: 81010 T&C www.dbuk.net LCCLTD POBOX 4403LDNW1A7RW18 +ham Finally the match heading towards draw as your prediction. +ham Tired. I haven't slept well the past few nights. +ham Easy ah?sen got selected means its good.. +ham I have to take exam with march 3 +ham Yeah you should. I think you can use your gt atm now to register. Not sure but if there's anyway i can help let me know. But when you do be sure you are ready. +ham Ok no prob. Take ur time. +ham There is os called ubandu which will run without installing in hard disk...you can use that os to copy the important files in system and give it to repair shop.. +ham Sorry, I'll call later +ham U say leh... Of course nothing happen lar. Not say v romantic jus a bit only lor. I thk e nite scenery not so nice leh. +spam 500 New Mobiles from 2004, MUST GO! Txt: NOKIA to No: 89545 & collect yours today!From ONLY £1 www.4-tc.biz 2optout 087187262701.50gbp/mtmsg18 +ham Would really appreciate if you call me. Just need someone to talk to. +spam Will u meet ur dream partner soon? Is ur career off 2 a flyng start? 2 find out free, txt HORO followed by ur star sign, e. g. HORO ARIES +ham Hey company elama po mudyadhu. +ham Life is more strict than teacher... Bcoz Teacher teaches lesson & then conducts exam, But Life first conducts Exam & then teaches Lessons. Happy morning. . . +ham Dear good morning now only i am up +ham Get down in gandhipuram and walk to cross cut road. Right side <#> street road and turn at first right. +ham Dear we are going to our rubber place +ham Sorry battery died, yeah I'm here +ham Yes:)here tv is always available in work place.. +spam Text & meet someone sexy today. U can find a date or even flirt its up to U. Join 4 just 10p. REPLY with NAME & AGE eg Sam 25. 18 -msg recd@thirtyeight pence +ham I have printed it oh. So <#> come upstairs +ham Or ill be a little closer like at the bus stop on the same street +ham Where are you?when wil you reach here? +ham New Theory: Argument wins d SITUATION, but loses the PERSON. So dont argue with ur friends just.. . . . kick them & say, I'm always correct.! +ham I love to give massages. I use lots of baby oil... What is your fave position? +ham Dude we should go sup again +ham Yoyyooo u know how to change permissions for a drive in mac. My usb flash drive +ham Gibbs unsold.mike hussey +ham I like to talk pa but am not able to. I dont know y. +ham Y dun cut too short leh. U dun like ah? She failed. She's quite sad. +ham You unbelievable faglord +ham Wife.how she knew the time of murder exactly +ham Why do you ask princess? +ham I am great princess! What are you thinking about me? :) +ham Nutter. Cutter. Ctter. Cttergg. Cttargg. Ctargg. Ctagg. ie you +ham It's ok i noe u're busy but i'm really too bored so i msg u. I oso dunno wat colour she choose 4 me one. diff --git a/data/text_missing.csv b/data/text_missing.csv new file mode 100644 index 00000000..ecacd564 --- /dev/null +++ b/data/text_missing.csv @@ -0,0 +1,10 @@ +text1,text2,category1,subject +my brief case,is full of papers,a,paperwork +and in case,it has no appeal,a,paperwork +anyhow,none work,b,swap +hardly,because it's true,b,swap +iprobably it helps,is to blame,b,swap +but you can see for yourself,The quality of the platform,a,web +to help yourself,and use your own platform,b,web +let yourself,is in may a platform,a,web +let yourself go,a rest platform,b,web diff --git a/data/tiny_kdd.csv b/data/tiny_kdd.csv new file mode 100644 index 00000000..ef09d1b0 --- /dev/null +++ b/data/tiny_kdd.csv @@ -0,0 +1,201 @@ +duration,protocol_type,service,flag,src_bytes,dst_bytes,land,wrong_fragment,urgent,hot,num_failed_logins,logged_in,num_compromised,root_shell,su_attempted,num_root,num_file_creations,num_shells,num_access_files,num_outbound_cmds,is_host_login,is_guest_login,count,srv_count,serror_rate,srv_serror_rate,rerror_rate,srv_rerror_rate,same_srv_rate,diff_srv_rate,srv_diff_host_rate,dst_host_count,dst_host_srv_count,dst_host_same_srv_rate,dst_host_diff_srv_rate,dst_host_same_src_port_rate,dst_host_srv_diff_host_rate,dst_host_serror_rate,dst_host_srv_serror_rate,dst_host_rerror_rate,dst_host_srv_rerror_rate,tag +0,tcp,http,SF,181,5450,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,8,8,0.00,0.00,0.00,0.00,1.00,0.00,0.00,9,9,1.00,0.00,0.11,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,239,486,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,8,8,0.00,0.00,0.00,0.00,1.00,0.00,0.00,19,19,1.00,0.00,0.05,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,235,1337,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,8,8,0.00,0.00,0.00,0.00,1.00,0.00,0.00,29,29,1.00,0.00,0.03,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,219,1337,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,6,6,0.00,0.00,0.00,0.00,1.00,0.00,0.00,39,39,1.00,0.00,0.03,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,217,2032,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,6,6,0.00,0.00,0.00,0.00,1.00,0.00,0.00,49,49,1.00,0.00,0.02,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,217,2032,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,6,6,0.00,0.00,0.00,0.00,1.00,0.00,0.00,59,59,1.00,0.00,0.02,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,212,1940,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,2,0.00,0.00,0.00,0.00,1.00,0.00,1.00,1,69,1.00,0.00,1.00,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,159,4087,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,5,5,0.00,0.00,0.00,0.00,1.00,0.00,0.00,11,79,1.00,0.00,0.09,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,210,151,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,8,8,0.00,0.00,0.00,0.00,1.00,0.00,0.00,8,89,1.00,0.00,0.12,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,212,786,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,8,8,0.00,0.00,0.00,0.00,1.00,0.00,0.00,8,99,1.00,0.00,0.12,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,210,624,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,18,18,0.00,0.00,0.00,0.00,1.00,0.00,0.00,18,109,1.00,0.00,0.06,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,177,1985,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,28,119,1.00,0.00,0.04,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,222,773,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,11,11,0.00,0.00,0.00,0.00,1.00,0.00,0.00,38,129,1.00,0.00,0.03,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,256,1169,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,4,0.00,0.00,0.00,0.00,1.00,0.00,0.00,4,139,1.00,0.00,0.25,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,241,259,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,14,149,1.00,0.00,0.07,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,260,1837,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,11,11,0.00,0.00,0.00,0.00,1.00,0.00,0.00,24,159,1.00,0.00,0.04,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,241,261,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,34,169,1.00,0.00,0.03,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,257,818,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,12,12,0.00,0.00,0.00,0.00,1.00,0.00,0.00,44,179,1.00,0.00,0.02,0.03,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,233,255,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,8,0.00,0.00,0.00,0.00,1.00,0.00,0.25,54,189,1.00,0.00,0.02,0.03,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,233,504,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,7,7,0.00,0.00,0.00,0.00,1.00,0.00,0.00,64,199,1.00,0.00,0.02,0.03,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,256,1273,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,17,17,0.00,0.00,0.00,0.00,1.00,0.00,0.00,74,209,1.00,0.00,0.01,0.03,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,234,255,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,5,5,0.00,0.00,0.00,0.00,1.00,0.00,0.00,84,219,1.00,0.00,0.01,0.03,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,241,259,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,12,12,0.00,0.00,0.00,0.00,1.00,0.00,0.00,94,229,1.00,0.00,0.01,0.03,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,239,968,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,3,0.00,0.00,0.00,0.00,1.00,0.00,0.00,3,239,1.00,0.00,0.33,0.03,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,245,1919,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,13,13,0.00,0.00,0.00,0.00,1.00,0.00,0.00,13,249,1.00,0.00,0.08,0.03,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,248,2129,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,23,23,0.00,0.00,0.00,0.00,1.00,0.00,0.00,23,255,1.00,0.00,0.04,0.03,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,354,1752,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,5,255,1.00,0.00,0.20,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,193,3991,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1,255,1.00,0.00,1.00,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,214,14959,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,6,6,0.00,0.00,0.00,0.00,1.00,0.00,0.00,11,255,1.00,0.00,0.09,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,212,1309,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,10,0.00,0.00,0.00,0.00,1.00,0.00,0.20,21,255,1.00,0.00,0.05,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,215,3670,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,3,0.00,0.00,0.00,0.00,1.00,0.00,0.00,31,255,1.00,0.00,0.03,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,217,18434,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,41,255,1.00,0.00,0.02,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,205,424,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,25,0.00,0.00,0.00,0.00,1.00,0.00,0.12,2,255,1.00,0.00,0.50,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,155,424,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,13,0.00,0.00,0.00,0.00,1.00,0.00,0.15,12,255,1.00,0.00,0.08,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,202,424,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,3,0.00,0.00,0.00,0.00,1.00,0.00,0.00,22,255,1.00,0.00,0.05,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,235,6627,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,32,255,1.00,0.00,0.03,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,259,3917,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,42,255,1.00,0.00,0.02,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,301,2653,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,52,255,1.00,0.00,0.02,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,322,424,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,62,255,1.00,0.00,0.02,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,370,520,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,3,0.00,0.00,0.00,0.00,1.00,0.00,0.00,72,255,1.00,0.00,0.01,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,370,520,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,3,0.00,0.00,0.00,0.00,1.00,0.00,0.00,82,255,1.00,0.00,0.01,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,172,5884,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,6,6,0.00,0.00,0.00,0.00,1.00,0.00,0.00,10,255,1.00,0.00,0.10,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,264,16123,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,13,0.00,0.00,0.00,0.00,1.00,0.00,0.23,20,255,1.00,0.00,0.05,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,255,1948,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,14,0.00,0.00,0.00,0.00,1.00,0.00,0.14,30,255,1.00,0.00,0.03,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,274,19790,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,6,6,0.00,0.00,0.00,0.00,1.00,0.00,0.00,40,255,1.00,0.00,0.03,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,313,293,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,3,0.00,0.00,0.00,0.00,1.00,0.00,0.00,3,255,1.00,0.00,0.33,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,145,4466,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,4,0.00,0.00,0.00,0.00,1.00,0.00,0.00,13,255,1.00,0.00,0.08,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,290,460,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,23,255,1.00,0.00,0.04,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,309,17798,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,2,255,1.00,0.00,0.50,0.06,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,317,2075,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,4,0.00,0.00,0.00,0.00,1.00,0.00,0.00,8,255,1.00,0.00,0.12,0.06,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,300,42747,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,5,0.00,0.00,0.00,0.00,1.00,0.00,0.40,18,255,1.00,0.00,0.06,0.06,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,307,1377,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,8,8,0.00,0.00,0.00,0.00,1.00,0.00,0.00,8,255,1.00,0.00,0.12,0.06,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,309,1030,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,18,18,0.00,0.00,0.00,0.00,1.00,0.00,0.00,18,255,1.00,0.00,0.06,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,306,1030,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,28,28,0.00,0.00,0.00,0.00,1.00,0.00,0.00,28,255,1.00,0.00,0.04,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,237,1691,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,10,10,0.00,0.00,0.00,0.00,1.00,0.00,0.00,10,255,1.00,0.00,0.10,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,237,2964,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,7,7,0.00,0.00,0.00,0.00,1.00,0.00,0.00,20,255,1.00,0.00,0.05,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,239,1691,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,4,0.00,0.00,0.00,0.00,1.00,0.00,0.00,30,255,1.00,0.00,0.03,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,160,170,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1,255,1.00,0.00,1.00,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,231,2281,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,9,10,0.00,0.00,0.00,0.00,1.00,0.00,0.20,11,255,1.00,0.00,0.09,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,227,1247,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,19,20,0.00,0.00,0.00,0.00,1.00,0.00,0.10,21,255,1.00,0.00,0.05,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,232,8766,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,10,10,0.00,0.00,0.00,0.00,1.00,0.00,0.00,31,255,1.00,0.00,0.03,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,233,3609,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,6,6,0.00,0.00,0.00,0.00,1.00,0.00,0.00,41,255,1.00,0.00,0.02,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,236,468,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,16,17,0.00,0.00,0.00,0.00,1.00,0.00,0.12,51,255,1.00,0.00,0.02,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,322,8766,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,7,7,0.00,0.00,0.00,0.00,1.00,0.00,0.00,61,255,1.00,0.00,0.02,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,327,2112,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,17,17,0.00,0.00,0.00,0.00,1.00,0.00,0.00,71,255,1.00,0.00,0.01,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,225,2063,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,8,8,0.00,0.00,0.00,0.00,1.00,0.00,0.00,81,255,1.00,0.00,0.01,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,224,1658,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,18,18,0.00,0.00,0.00,0.00,1.00,0.00,0.00,91,255,1.00,0.00,0.01,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,224,3609,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,9,10,0.00,0.00,0.00,0.00,1.00,0.00,0.20,101,255,1.00,0.00,0.02,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,230,2118,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,19,20,0.00,0.00,0.00,0.00,1.00,0.00,0.10,111,255,1.00,0.00,0.02,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,239,304,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,10,20,0.00,0.00,0.00,0.00,1.00,0.00,0.20,121,255,1.00,0.00,0.01,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,240,2164,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,6,34,0.00,0.00,0.00,0.00,1.00,0.00,0.06,131,255,1.00,0.00,0.01,0.03,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,237,2112,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,16,33,0.00,0.00,0.00,0.00,1.00,0.00,0.06,141,255,1.00,0.00,0.01,0.03,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,229,304,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,7,7,0.00,0.00,0.00,0.00,1.00,0.00,0.00,151,255,1.00,0.00,0.01,0.03,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,164,4460,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,17,18,0.00,0.00,0.00,0.00,1.00,0.00,0.11,161,255,1.00,0.00,0.01,0.02,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,242,2118,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,27,28,0.00,0.00,0.00,0.00,1.00,0.00,0.07,171,255,1.00,0.00,0.01,0.02,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,238,296,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,10,0.00,0.00,0.00,0.00,1.00,0.00,0.20,181,255,1.00,0.00,0.01,0.02,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,238,2112,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,14,20,0.00,0.00,0.00,0.00,1.00,0.00,0.10,191,255,1.00,0.00,0.01,0.01,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,208,2000,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,5,19,0.00,0.00,0.00,0.00,1.00,0.00,0.16,201,255,1.00,0.00,0.00,0.01,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,213,2164,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,15,34,0.00,0.00,0.00,0.00,1.00,0.00,0.12,211,255,1.00,0.00,0.00,0.01,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,297,2000,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,6,6,0.00,0.00,0.00,0.00,1.00,0.00,0.00,221,255,1.00,0.00,0.00,0.01,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,302,2164,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,16,16,0.00,0.00,0.00,0.00,1.00,0.00,0.00,231,255,1.00,0.00,0.00,0.01,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,310,2195,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,7,7,0.00,0.00,0.00,0.00,1.00,0.00,0.00,241,255,1.00,0.00,0.00,0.01,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,314,2298,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,9,0.00,0.00,0.00,0.00,1.00,0.00,0.22,251,255,1.00,0.00,0.00,0.01,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,309,296,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,13,19,0.00,0.00,0.00,0.00,1.00,0.00,0.11,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,307,468,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,9,14,0.00,0.00,0.00,0.00,1.00,0.00,0.14,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,310,304,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,5,5,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,187,4460,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,201,3421,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,11,11,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,155,2026,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,222,1981,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,12,12,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,219,2000,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,8,8,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,220,304,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,7,0.00,0.00,0.00,0.00,1.00,0.00,0.29,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,230,2395,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,14,18,0.00,0.00,0.00,0.00,1.00,0.00,0.11,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,221,2112,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,10,10,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,329,1735,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,11,0.00,0.00,0.00,0.00,1.00,0.00,0.18,2,255,1.00,0.00,0.50,0.01,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,337,330,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,12,12,0.00,0.00,0.00,0.00,1.00,0.00,0.00,12,255,1.00,0.00,0.08,0.01,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,293,38125,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,22,22,0.00,0.00,0.00,0.00,1.00,0.00,0.00,22,255,1.00,0.00,0.05,0.01,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,171,280,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,26,0.00,0.00,0.00,0.00,1.00,0.00,0.08,6,255,1.00,0.00,0.17,0.02,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,178,1973,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,16,255,1.00,0.00,0.06,0.02,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,331,8415,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,26,255,1.00,0.00,0.04,0.02,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,314,308,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,23,0.00,0.00,0.00,0.00,1.00,0.00,0.09,4,255,1.00,0.00,0.25,0.03,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,330,13179,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,14,255,1.00,0.00,0.07,0.03,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,341,1473,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,7,7,0.00,0.00,0.00,0.00,1.00,0.00,0.00,24,255,1.00,0.00,0.04,0.03,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,302,3966,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,4,0.00,0.00,0.00,0.00,1.00,0.00,0.00,34,255,1.00,0.00,0.03,0.03,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,172,524,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,34,0.00,0.00,0.00,0.00,1.00,0.00,0.06,10,255,1.00,0.00,0.10,0.03,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,218,13340,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,2,255,1.00,0.00,0.50,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,223,384,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,5,5,0.00,0.00,0.00,0.00,1.00,0.00,0.00,5,255,1.00,0.00,0.20,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,232,1302,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,4,0.00,0.00,0.00,0.00,1.00,0.00,0.00,4,255,1.00,0.00,0.25,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,220,8970,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,14,14,0.00,0.00,0.00,0.00,1.00,0.00,0.00,14,255,1.00,0.00,0.07,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,228,2917,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,10,29,0.00,0.00,0.00,0.00,1.00,0.00,0.07,24,255,1.00,0.00,0.04,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,232,1302,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,4,0.00,0.00,0.00,0.00,1.00,0.00,0.00,34,255,1.00,0.00,0.03,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,229,4966,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,14,14,0.00,0.00,0.00,0.00,1.00,0.00,0.00,44,255,1.00,0.00,0.02,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,204,8970,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,10,10,0.00,0.00,0.00,0.00,1.00,0.00,0.00,54,255,1.00,0.00,0.02,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,212,4433,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,6,6,0.00,0.00,0.00,0.00,1.00,0.00,0.00,64,255,1.00,0.00,0.02,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,224,1302,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,74,255,1.00,0.00,0.01,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,215,1869,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,10,14,0.00,0.00,0.00,0.00,1.00,0.00,0.14,84,255,1.00,0.00,0.01,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,267,14496,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,8,0.00,0.00,0.00,0.00,1.00,0.00,0.25,3,255,1.00,0.00,0.33,0.06,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,284,43129,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,3,0.00,0.00,0.00,0.00,1.00,0.00,0.00,13,255,1.00,0.00,0.08,0.06,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,307,3727,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,6,6,0.00,0.00,0.00,0.00,1.00,0.00,0.00,23,255,1.00,0.00,0.04,0.06,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,212,43129,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,33,255,1.00,0.00,0.03,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,267,9317,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,43,255,1.00,0.00,0.02,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,156,4027,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,17,0.00,0.00,0.00,0.00,1.00,0.00,0.12,53,255,1.00,0.00,0.02,0.05,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,218,437,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,6,6,0.00,0.00,0.00,0.00,1.00,0.00,0.00,63,255,1.00,0.00,0.02,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,298,9442,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,4,0.00,0.00,0.00,0.00,1.00,0.00,0.00,73,255,1.00,0.00,0.01,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,257,4027,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,7,7,0.00,0.00,0.00,0.00,1.00,0.00,0.00,83,255,1.00,0.00,0.01,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,141,4027,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,93,255,1.00,0.00,0.01,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,213,261,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,103,255,1.00,0.00,0.01,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,284,43129,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,4,0.00,0.00,0.00,0.00,1.00,0.00,0.00,113,255,1.00,0.00,0.01,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,183,8654,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,4,0.25,0.25,0.00,0.00,1.00,0.00,0.00,123,255,1.00,0.00,0.01,0.04,0.01,0.00,0.00,0.00,normal. +0,tcp,http,SF,191,3727,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,133,255,1.00,0.00,0.01,0.03,0.01,0.00,0.00,0.00,normal. +0,tcp,http,SF,183,8654,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,4,0.00,0.00,0.00,0.00,1.00,0.00,0.00,143,255,1.00,0.00,0.01,0.03,0.01,0.00,0.00,0.00,normal. +0,tcp,http,SF,207,6192,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,7,7,0.00,0.00,0.00,0.00,1.00,0.00,0.00,7,255,1.00,0.00,0.14,0.02,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,234,18000,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,3,0.00,0.00,0.00,0.00,1.00,0.00,0.00,17,255,1.00,0.00,0.06,0.02,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,297,5246,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,27,255,1.00,0.00,0.04,0.02,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,230,3512,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,3,0.00,0.00,0.00,0.00,1.00,0.00,0.00,3,255,1.00,0.00,0.33,0.02,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,288,1105,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,13,255,1.00,0.00,0.08,0.02,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,317,6333,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,8,8,0.00,0.00,0.00,0.00,1.00,0.00,0.00,23,255,1.00,0.00,0.04,0.02,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,342,11368,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,3,0.00,0.00,0.00,0.00,1.00,0.00,0.00,3,255,1.00,0.00,0.33,0.02,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,342,5401,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,13,13,0.00,0.00,0.00,0.00,1.00,0.00,0.00,13,255,1.00,0.00,0.08,0.02,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,339,10250,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,23,23,0.00,0.00,0.00,0.00,1.00,0.00,0.00,23,255,1.00,0.00,0.04,0.02,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,284,2810,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1,255,1.00,0.00,1.00,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,208,471,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,8,8,0.00,0.00,0.00,0.00,1.00,0.00,0.00,8,255,1.00,0.00,0.12,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,198,471,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,6,0.00,0.00,0.00,0.00,1.00,0.00,0.33,18,255,1.00,0.00,0.06,0.03,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,208,15642,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,5,5,0.00,0.00,0.00,0.00,1.00,0.00,0.00,5,255,1.00,0.00,0.20,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,226,74301,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,4,0.00,0.00,0.00,0.00,1.00,0.00,0.00,15,255,1.00,0.00,0.07,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,284,2081,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,5,5,0.00,0.00,0.00,0.00,1.00,0.00,0.00,25,255,1.00,0.00,0.04,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,212,6107,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,7,7,0.00,0.00,0.00,0.00,1.00,0.00,0.00,35,255,1.00,0.00,0.03,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,198,2081,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,7,7,0.00,0.00,0.00,0.00,1.00,0.00,0.00,45,255,1.00,0.00,0.02,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,319,5036,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1,255,1.00,0.00,1.00,0.04,0.00,0.01,0.00,0.00,normal. +0,tcp,http,SF,285,25519,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,4,0.00,0.00,0.00,0.00,1.00,0.00,0.00,4,255,1.00,0.00,0.25,0.04,0.00,0.01,0.00,0.00,normal. +0,tcp,http,SF,324,1875,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,14,255,1.00,0.00,0.07,0.04,0.00,0.01,0.00,0.00,normal. +0,tcp,http,SF,322,1721,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,12,12,0.00,0.00,0.00,0.00,1.00,0.00,0.00,24,255,1.00,0.00,0.04,0.04,0.00,0.01,0.00,0.00,normal. +0,tcp,http,SF,162,3253,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,34,255,1.00,0.00,0.03,0.04,0.00,0.01,0.00,0.00,normal. +0,tcp,http,SF,233,1645,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,7,7,0.00,0.00,0.00,0.00,1.00,0.00,0.00,44,255,1.00,0.00,0.02,0.04,0.00,0.01,0.00,0.00,normal. +0,tcp,http,SF,228,3798,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,54,255,1.00,0.00,0.02,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,233,1694,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,12,12,0.00,0.00,0.00,0.00,1.00,0.00,0.00,64,255,1.00,0.00,0.02,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,306,1721,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,5,5,0.00,0.00,0.00,0.00,1.00,0.00,0.00,74,255,1.00,0.00,0.01,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,285,25519,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,84,255,1.00,0.00,0.01,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,309,909,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,8,8,0.00,0.00,0.00,0.00,1.00,0.00,0.00,94,255,1.00,0.00,0.01,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,297,1645,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,3,0.00,0.00,0.00,0.00,1.00,0.00,0.00,104,255,1.00,0.00,0.01,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,295,1680,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,13,13,0.00,0.00,0.00,0.00,1.00,0.00,0.00,114,255,1.00,0.00,0.01,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,253,1680,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,5,0.00,0.00,0.00,0.00,1.00,0.00,0.40,124,255,1.00,0.00,0.01,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,257,1695,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,12,15,0.00,0.00,0.00,0.00,1.00,0.00,0.13,134,255,1.00,0.00,0.01,0.04,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,229,1484,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,144,255,1.00,0.00,0.01,0.03,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,234,1713,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,11,11,0.00,0.00,0.00,0.00,1.00,0.00,0.00,154,255,1.00,0.00,0.01,0.03,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,219,1651,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,6,0.00,0.00,0.00,0.00,1.00,0.00,0.33,164,255,1.00,0.00,0.01,0.03,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,212,24572,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,174,255,1.00,0.00,0.01,0.02,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,236,1680,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,3,0.00,0.00,0.00,0.00,1.00,0.00,0.00,184,255,1.00,0.00,0.01,0.02,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,234,1651,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,13,13,0.00,0.00,0.00,0.00,1.00,0.00,0.00,194,255,1.00,0.00,0.01,0.01,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,223,1719,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,10,10,0.00,0.00,0.00,0.00,1.00,0.00,0.00,204,255,1.00,0.00,0.00,0.01,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,232,1721,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,7,7,0.00,0.00,0.00,0.00,1.00,0.00,0.00,214,255,1.00,0.00,0.00,0.01,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,247,1680,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,224,255,1.00,0.00,0.00,0.01,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,246,1694,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,12,12,0.00,0.00,0.00,0.00,1.00,0.00,0.00,234,255,1.00,0.00,0.00,0.01,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,259,1108,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,9,9,0.00,0.00,0.00,0.00,1.00,0.00,0.00,244,255,1.00,0.00,0.00,0.01,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,163,3253,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,254,255,1.00,0.00,0.00,0.01,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,231,1718,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,7,7,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,212,24572,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,247,1719,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,8,8,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,334,1718,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,335,1680,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,11,11,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,277,4183,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,4,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,331,1694,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,14,14,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,323,1680,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,9,9,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,212,25519,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,297,1695,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,8,8,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,287,4479,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,300,1651,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,11,11,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,292,1719,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,8,8,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,318,1680,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,5,18,0.00,0.00,0.00,0.00,1.00,0.00,0.11,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,211,24572,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,233,1719,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,8,8,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,222,1651,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,218,1484,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,11,11,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,235,1718,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,8,27,0.00,0.00,0.00,0.00,1.00,0.00,0.07,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,232,1719,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,5,5,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,232,1721,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,15,15,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,246,1718,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,10,12,0.00,0.00,0.00,0.00,1.00,0.00,0.25,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,218,1484,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,7,7,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,222,1651,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,4,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. +0,tcp,http,SF,215,1108,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0.00,0.00,0.00,0.00,1.00,0.00,0.00,255,255,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,normal. diff --git a/data/tiny_mushrooms.csv b/data/tiny_mushrooms.csv new file mode 100644 index 00000000..252cff62 --- /dev/null +++ b/data/tiny_mushrooms.csv @@ -0,0 +1,251 @@ +Edible?,Cap shape,Cap surface,Cap color,Bruises?,Odor,Gill attachment,Gill spacing,Gill size,Gill color,Stalk shape,Stalk root,Stalk surface above ring,Stalk surface below ring,Stalk color above ring,Stalk color below ring,Veil type,Veil color,Ring number,Ring type,Spore print color,Population,Habitat +Poisonous,Convex cap,Smooth cap,Brown cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Black gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in urban areas +Edible,Convex cap,Smooth cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in grasses +Edible,Bell cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in meadows +Poisonous,Convex cap,Scaly cap,White cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Brown gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in urban areas +Edible,Convex cap,Smooth cap,Gray cap,No Bruises,No odor,Free gills,Crowded gills,Broad gills,Black gills,Tapering stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Evanescent ring(s),Brown spore print,Abundant,Grows in grasses +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in grasses +Edible,Bell cap,Smooth cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in meadows +Edible,Bell cap,Scaly cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in meadows +Poisonous,Convex cap,Scaly cap,White cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Ping gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Several,Grows in grasses +Edible,Bell cap,Smooth cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in meadows +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in grasses +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in meadows +Edible,Bell cap,Smooth cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Poisonous,Convex cap,Scaly cap,White cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Black gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in urban areas +Edible,Convex cap,Fibrous cap,Brown cap,No Bruises,No odor,Free gills,Crowded gills,Broad gills,Brown gills,Tapering stalk,Equal base,Smooth above ring,Fibrous below ring,White above ring,White below ring,Partial veil,White veil,One ring,Evanescent ring(s),Black spore print,Abundant,Grows in grasses +Edible,Sunken cap,Fibrous cap,Gray cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Black gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Solitary,Grows in urban areas +Edible,Flat cap,Fibrous cap,White cap,No Bruises,No odor,Free gills,Crowded gills,Broad gills,Black gills,Tapering stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Evanescent ring(s),Brown spore print,Abundant,Grows in grasses +Poisonous,Convex cap,Smooth cap,Brown cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Brown gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Poisonous,Convex cap,Scaly cap,White cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Brown gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in urban areas +Poisonous,Convex cap,Smooth cap,Brown cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Black gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in urban areas +Edible,Bell cap,Smooth cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in meadows +Poisonous,Convex cap,Scaly cap,Brown cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Brown gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in grasses +Edible,Bell cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in meadows +Edible,Bell cap,Scaly cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in meadows +Edible,Bell cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in meadows +Poisonous,Flat cap,Smooth cap,White cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Brown gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in grasses +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in meadows +Edible,Convex cap,Scaly cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in meadows +Edible,Flat cap,Fibrous cap,Brown cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Black gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Solitary,Grows in urban areas +Edible,Convex cap,Smooth cap,Yellow cap,Bruises,Almond odor,Free gills,Crowded gills,Narrow gills,Brown gills,Tapering stalk,Bulbous base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in the woods +Edible,Bell cap,Smooth cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in meadows +Poisonous,Convex cap,Scaly cap,White cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Black gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in urban areas +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in meadows +Edible,Convex cap,Scaly cap,Brown cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Ping gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Solitary,Grows in paths +Edible,Bell cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in meadows +Edible,Convex cap,Fibrous cap,Yellow cap,Bruises,Anise odor,Free gills,Crowded gills,Narrow gills,White gills,Tapering stalk,Bulbous base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in the woods +Edible,Sunken cap,Fibrous cap,Gray cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Black gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Several,Grows in urban areas +Poisonous,Convex cap,Scaly cap,Brown cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,White gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in urban areas +Edible,Convex cap,Fibrous cap,Yellow cap,Bruises,Almond odor,Free gills,Crowded gills,Narrow gills,Ping gills,Tapering stalk,Bulbous base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in the woods +Edible,Bell cap,Smooth cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in meadows +Edible,Bell cap,Scaly cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Solitary,Grows in paths +Edible,Convex cap,Fibrous cap,Brown cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Gray gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Solitary,Grows in urban areas +Poisonous,Convex cap,Scaly cap,White cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Ping gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in grasses +Edible,Convex cap,Smooth cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in meadows +Edible,Convex cap,Scaly cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in grasses +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in meadows +Edible,Convex cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in meadows +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in paths +Edible,Flat cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in paths +Edible,Convex cap,Scaly cap,Brown cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Convex cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Bell cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in meadows +Poisonous,Convex cap,Scaly cap,Brown cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Black gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in urban areas +Poisonous,Convex cap,Smooth cap,White cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Black gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Several,Grows in urban areas +Edible,Bell cap,Scaly cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in meadows +Edible,Flat cap,Fibrous cap,Gray cap,No Bruises,No odor,Free gills,Crowded gills,Broad gills,Brown gills,Tapering stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Evanescent ring(s),Brown spore print,Abundant,Grows in grasses +Edible,Bell cap,Smooth cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in grasses +Edible,Convex cap,Smooth cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in grasses +Edible,Convex cap,Scaly cap,Brown cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Ping gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Solitary,Grows in paths +Edible,Sunken cap,Fibrous cap,Gray cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Black gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in urban areas +Edible,Bell cap,Scaly cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in meadows +Edible,Bell cap,Smooth cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in meadows +Edible,Bell cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in meadows +Edible,Bell cap,Scaly cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Flat cap,Smooth cap,Brown cap,No Bruises,No odor,Free gills,Crowded gills,Broad gills,Black gills,Tapering stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Evanescent ring(s),Black spore print,Abundant,Grows in grasses +Edible,Convex cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Flat cap,Scaly cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in grasses +Edible,Convex cap,Fibrous cap,Gray cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Ping gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in urban areas +Edible,Flat cap,Fibrous cap,Yellow cap,Bruises,Anise odor,Free gills,Crowded gills,Narrow gills,Ping gills,Tapering stalk,Bulbous base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in the woods +Edible,Bell cap,Scaly cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in meadows +Edible,Flat cap,Fibrous cap,Yellow cap,Bruises,Anise odor,Free gills,Crowded gills,Narrow gills,White gills,Tapering stalk,Bulbous base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in the woods +Edible,Convex cap,Scaly cap,Brown cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Ping gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in paths +Edible,Bell cap,Smooth cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Flat cap,Smooth cap,Yellow cap,Bruises,Anise odor,Free gills,Crowded gills,Narrow gills,Ping gills,Tapering stalk,Bulbous base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in the woods +Edible,Convex cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Crowded gills,Narrow gills,Brown gills,Tapering stalk,Bulbous base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Purple spore print,Several,Grows in the woods +Edible,Flat cap,Scaly cap,Brown cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Ping gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Solitary,Grows in paths +Poisonous,Convex cap,Scaly cap,Brown cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,White gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in urban areas +Edible,Flat cap,Scaly cap,Brown cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Solitary,Grows in grasses +Edible,Convex cap,Smooth cap,Brown cap,No Bruises,No odor,Free gills,Crowded gills,Broad gills,Black gills,Tapering stalk,Equal base,Fibrous above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Evanescent ring(s),Brown spore print,Scattered,Grows in grasses +Poisonous,Convex cap,Scaly cap,White cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,White gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Flat cap,Fibrous cap,Gray cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Brown gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Solitary,Grows in urban areas +Edible,Convex cap,Fibrous cap,Gray cap,No Bruises,No odor,Free gills,Crowded gills,Broad gills,Brown gills,Tapering stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Evanescent ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Convex cap,Smooth cap,Brown cap,No Bruises,No odor,Free gills,Crowded gills,Broad gills,Black gills,Tapering stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Evanescent ring(s),Black spore print,Scattered,Grows in grasses +Edible,Bell cap,Smooth cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Convex cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Flat cap,Scaly cap,Brown cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Solitary,Grows in grasses +Edible,Sunken cap,Fibrous cap,Brown cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Brown gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in urban areas +Edible,Convex cap,Fibrous cap,Brown cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Brown gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Solitary,Grows in urban areas +Edible,Bell cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in meadows +Edible,Convex cap,Smooth cap,Brown cap,No Bruises,No odor,Free gills,Crowded gills,Broad gills,Brown gills,Tapering stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Evanescent ring(s),Brown spore print,Abundant,Grows in grasses +Edible,Convex cap,Smooth cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Flat cap,Scaly cap,Brown cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Ping gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Convex cap,Smooth cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in grasses +Edible,Bell cap,Smooth cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Convex cap,Scaly cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Convex cap,Fibrous cap,Brown cap,No Bruises,No odor,Free gills,Crowded gills,Broad gills,Ping gills,Tapering stalk,Equal base,Fibrous above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Evanescent ring(s),Black spore print,Scattered,Grows in grasses +Edible,Bell cap,Smooth cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in grasses +Edible,Flat cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Solitary,Grows in paths +Edible,Bell cap,Scaly cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in grasses +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in meadows +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Solitary,Grows in grasses +Edible,Bell cap,Scaly cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in meadows +Edible,Bell cap,Scaly cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in meadows +Edible,Convex cap,Smooth cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in meadows +Edible,Convex cap,Smooth cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in grasses +Edible,Sunken cap,Fibrous cap,Gray cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Gray gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Solitary,Grows in urban areas +Edible,Convex cap,Fibrous cap,White cap,Bruises,Almond odor,Free gills,Crowded gills,Narrow gills,White gills,Tapering stalk,Bulbous base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Purple spore print,Several,Grows in the woods +Edible,Convex cap,Smooth cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in meadows +Poisonous,Convex cap,Scaly cap,White cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Brown gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in urban areas +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Ping gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Sunken cap,Fibrous cap,Gray cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Ping gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Solitary,Grows in urban areas +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Solitary,Grows in grasses +Edible,Convex cap,Smooth cap,Yellow cap,Bruises,Anise odor,Free gills,Crowded gills,Narrow gills,Ping gills,Tapering stalk,Bulbous base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Purple spore print,Several,Grows in the woods +Edible,Sunken cap,Fibrous cap,Brown cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Black gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Solitary,Grows in urban areas +Poisonous,Convex cap,Smooth cap,White cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Black gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Several,Grows in grasses +Edible,Convex cap,Scaly cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in meadows +Poisonous,Flat cap,Scaly cap,Brown cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Ping gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Several,Grows in grasses +Edible,Flat cap,Smooth cap,Gray cap,No Bruises,No odor,Free gills,Crowded gills,Broad gills,Black gills,Tapering stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Evanescent ring(s),Brown spore print,Abundant,Grows in grasses +Edible,Convex cap,Smooth cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in meadows +Edible,Convex cap,Smooth cap,White cap,No Bruises,No odor,Free gills,Crowded gills,Broad gills,Brown gills,Tapering stalk,Equal base,Smooth above ring,Fibrous below ring,White above ring,White below ring,Partial veil,White veil,One ring,Evanescent ring(s),Black spore print,Scattered,Grows in grasses +Edible,Bell cap,Smooth cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in grasses +Edible,Flat cap,Fibrous cap,Gray cap,No Bruises,No odor,Free gills,Crowded gills,Broad gills,Chocolate gills,Tapering stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Evanescent ring(s),Brown spore print,Abundant,Grows in grasses +Edible,Convex cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in grasses +Edible,Bell cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in meadows +Edible,Bell cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Bell cap,Scaly cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in meadows +Edible,Flat cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Crowded gills,Narrow gills,White gills,Tapering stalk,Bulbous base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Purple spore print,Several,Grows in the woods +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in meadows +Edible,Flat cap,Smooth cap,White cap,Bruises,Almond odor,Free gills,Crowded gills,Narrow gills,Ping gills,Tapering stalk,Bulbous base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in the woods +Poisonous,Convex cap,Scaly cap,White cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,White gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in urban areas +Edible,Flat cap,Fibrous cap,White cap,Bruises,Anise odor,Free gills,Crowded gills,Narrow gills,White gills,Tapering stalk,Bulbous base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in the woods +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Poisonous,Convex cap,Smooth cap,Brown cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Ping gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in grasses +Edible,Bell cap,Smooth cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in grasses +Edible,Convex cap,Scaly cap,Brown cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Solitary,Grows in paths +Edible,Bell cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in meadows +Edible,Sunken cap,Fibrous cap,Brown cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Black gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in urban areas +Edible,Flat cap,Scaly cap,Brown cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Solitary,Grows in paths +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in grasses +Edible,Convex cap,Fibrous cap,Gray cap,No Bruises,No odor,Free gills,Crowded gills,Broad gills,Black gills,Tapering stalk,Equal base,Fibrous above ring,Fibrous below ring,White above ring,White below ring,Partial veil,White veil,One ring,Evanescent ring(s),Black spore print,Scattered,Grows in grasses +Edible,Flat cap,Fibrous cap,White cap,No Bruises,No odor,Free gills,Crowded gills,Broad gills,Black gills,Tapering stalk,Equal base,Smooth above ring,Fibrous below ring,White above ring,White below ring,Partial veil,White veil,One ring,Evanescent ring(s),Brown spore print,Abundant,Grows in grasses +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in meadows +Edible,Bell cap,Smooth cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in grasses +Edible,Bell cap,Scaly cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in meadows +Edible,Convex cap,Scaly cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Convex cap,Smooth cap,Brown cap,No Bruises,No odor,Free gills,Crowded gills,Broad gills,Ping gills,Tapering stalk,Equal base,Fibrous above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Evanescent ring(s),Brown spore print,Abundant,Grows in grasses +Edible,Convex cap,Scaly cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Sunken cap,Fibrous cap,Brown cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Black gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Several,Grows in urban areas +Edible,Convex cap,Smooth cap,White cap,Bruises,Almond odor,Free gills,Crowded gills,Narrow gills,White gills,Tapering stalk,Bulbous base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Purple spore print,Several,Grows in the woods +Edible,Convex cap,Scaly cap,Brown cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Bell cap,Scaly cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in grasses +Edible,Convex cap,Scaly cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in grasses +Edible,Bell cap,Scaly cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in meadows +Edible,Bell cap,Smooth cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Bell cap,Smooth cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in meadows +Edible,Bell cap,Scaly cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in grasses +Edible,Convex cap,Fibrous cap,Brown cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Black gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Solitary,Grows in urban areas +Edible,Flat cap,Scaly cap,Brown cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Solitary,Grows in grasses +Edible,Convex cap,Scaly cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Flat cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Solitary,Grows in paths +Edible,Bell cap,Smooth cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Bell cap,Smooth cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in meadows +Edible,Convex cap,Scaly cap,Brown cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Solitary,Grows in grasses +Edible,Bell cap,Smooth cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Convex cap,Fibrous cap,Gray cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Gray gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Solitary,Grows in urban areas +Edible,Bell cap,Smooth cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Convex cap,Fibrous cap,Yellow cap,Bruises,Anise odor,Free gills,Crowded gills,Narrow gills,Brown gills,Tapering stalk,Bulbous base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Purple spore print,Several,Grows in the woods +Edible,Bell cap,Scaly cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Flat cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Ping gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Bell cap,Scaly cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in meadows +Edible,Bell cap,Scaly cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in meadows +Edible,Bell cap,Scaly cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in meadows +Edible,Bell cap,Smooth cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in grasses +Poisonous,Convex cap,Scaly cap,White cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Ping gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in urban areas +Edible,Sunken cap,Fibrous cap,Brown cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Gray gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Solitary,Grows in urban areas +Edible,Flat cap,Fibrous cap,Brown cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Gray gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Several,Grows in urban areas +Edible,Convex cap,Smooth cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in meadows +Edible,Flat cap,Scaly cap,Brown cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Ping gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in paths +Poisonous,Convex cap,Scaly cap,White cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Black gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Bell cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in meadows +Edible,Flat cap,Fibrous cap,Gray cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Ping gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Several,Grows in urban areas +Edible,Bell cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in grasses +Edible,Convex cap,Scaly cap,Brown cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Solitary,Grows in paths +Edible,Convex cap,Fibrous cap,White cap,No Bruises,No odor,Free gills,Crowded gills,Broad gills,Ping gills,Tapering stalk,Equal base,Smooth above ring,Fibrous below ring,White above ring,White below ring,Partial veil,White veil,One ring,Evanescent ring(s),Black spore print,Scattered,Grows in grasses +Edible,Convex cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Crowded gills,Narrow gills,White gills,Tapering stalk,Bulbous base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in the woods +Edible,Bell cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in meadows +Edible,Flat cap,Smooth cap,Yellow cap,Bruises,Almond odor,Free gills,Crowded gills,Narrow gills,White gills,Tapering stalk,Bulbous base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in the woods +Edible,Convex cap,Smooth cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in meadows +Edible,Flat cap,Fibrous cap,Gray cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Gray gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Solitary,Grows in urban areas +Edible,Bell cap,Smooth cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Convex cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in meadows +Edible,Convex cap,Scaly cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in meadows +Edible,Flat cap,Smooth cap,White cap,Bruises,Almond odor,Free gills,Crowded gills,Narrow gills,Brown gills,Tapering stalk,Bulbous base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in the woods +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Bell cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Convex cap,Smooth cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in grasses +Edible,Convex cap,Fibrous cap,White cap,No Bruises,No odor,Free gills,Crowded gills,Broad gills,Chocolate gills,Tapering stalk,Equal base,Fibrous above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Evanescent ring(s),Black spore print,Scattered,Grows in grasses +Edible,Flat cap,Scaly cap,Brown cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Solitary,Grows in paths +Poisonous,Convex cap,Smooth cap,White cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Black gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in urban areas +Edible,Bell cap,Smooth cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in grasses +Edible,Bell cap,Smooth cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in meadows +Edible,Bell cap,Scaly cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in grasses +Edible,Bell cap,Scaly cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in grasses +Edible,Convex cap,Smooth cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Bell cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in meadows +Edible,Convex cap,Fibrous cap,Yellow cap,Bruises,Almond odor,Free gills,Crowded gills,Narrow gills,Brown gills,Tapering stalk,Bulbous base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in the woods +Edible,Convex cap,Fibrous cap,Gray cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Brown gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Solitary,Grows in urban areas +Edible,Flat cap,Scaly cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Bell cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in meadows +Edible,Convex cap,Smooth cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Convex cap,Scaly cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Sunken cap,Fibrous cap,Gray cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Ping gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in urban areas +Edible,Convex cap,Smooth cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in meadows +Poisonous,Convex cap,Smooth cap,White cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Black gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Flat cap,Fibrous cap,White cap,Bruises,Almond odor,Free gills,Crowded gills,Narrow gills,Ping gills,Tapering stalk,Bulbous base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in the woods +Edible,Convex cap,Scaly cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Numerous,Grows in meadows +Edible,Bell cap,Scaly cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Convex cap,Smooth cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in grasses +Edible,Convex cap,Smooth cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Gray gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Poisonous,Convex cap,Scaly cap,Brown cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Ping gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Several,Grows in urban areas +Edible,Bell cap,Smooth cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in grasses +Edible,Convex cap,Fibrous cap,Gray cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Brown gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Solitary,Grows in urban areas +Poisonous,Convex cap,Scaly cap,White cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Black gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in urban areas +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Solitary,Grows in paths +Edible,Flat cap,Fibrous cap,Brown cap,No Bruises,No odor,Free gills,Close gills,Narrow gills,Black gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Several,Grows in urban areas +Edible,Bell cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Black gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in grasses +Edible,Convex cap,Fibrous cap,White cap,Bruises,Anise odor,Free gills,Crowded gills,Narrow gills,White gills,Tapering stalk,Bulbous base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Several,Grows in the woods +Edible,Convex cap,Scaly cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Bell cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Convex cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Flat cap,Scaly cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Ping gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in paths +Edible,Flat cap,Scaly cap,Yellow cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Solitary,Grows in paths +Edible,Convex cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in grasses +Edible,Convex cap,Smooth cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Numerous,Grows in meadows +Poisonous,Convex cap,Smooth cap,White cap,Bruises,Pungent odor,Free gills,Close gills,Narrow gills,Brown gills,Enlarging stalk,Equal base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Several,Grows in urban areas +Edible,Flat cap,Fibrous cap,White cap,Bruises,Almond odor,Free gills,Crowded gills,Narrow gills,Ping gills,Tapering stalk,Bulbous base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Purple spore print,Several,Grows in the woods +Edible,Convex cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Brown spore print,Scattered,Grows in grasses +Edible,Convex cap,Smooth cap,White cap,Bruises,Anise odor,Free gills,Crowded gills,Narrow gills,Ping gills,Tapering stalk,Bulbous base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Purple spore print,Several,Grows in the woods +Edible,Convex cap,Scaly cap,White cap,Bruises,Almond odor,Free gills,Close gills,Broad gills,Brown gills,Enlarging stalk,Club base,Smooth above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Scattered,Grows in meadows +Edible,Flat cap,Scaly cap,Yellow cap,Bruises,Anise odor,Free gills,Close gills,Broad gills,White gills,Enlarging stalk,Rooted base,Smooth above ring,Scaly below ring,White above ring,White below ring,Partial veil,White veil,One ring,Pendant ring(s),Black spore print,Solitary,Grows in paths +Edible,Convex cap,Smooth cap,Brown cap,No Bruises,No odor,Free gills,Crowded gills,Broad gills,Ping gills,Tapering stalk,Equal base,Fibrous above ring,Smooth below ring,White above ring,White below ring,Partial veil,White veil,One ring,Evanescent ring(s),Black spore print,Scattered,Grows in grasses diff --git a/docs/101_anomaly.rst b/docs/101_anomaly.rst new file mode 100644 index 00000000..03fc9c31 --- /dev/null +++ b/docs/101_anomaly.rst @@ -0,0 +1,132 @@ +.. toctree:: + :hidden: + +101 - Anomaly detector usage +============================ + +Following the schema described in the `prediction workflow `_, +document, this is the code snippet that shows the minimal workflow to +create an anomaly detector to produce a single anomaly score. + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + # check how to set your credentials in the Authentication section + api = BigML() + # step 1: creating a source from the data in your local "data/iris.csv" file + source = api.create_source("data/iris.csv") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(source) + # step 3: creating a dataset from the previously created `source` + dataset = api.create_dataset(source) + # waiting for the dataset to be finished + api.ok(dataset) + # step 5: creating an anomaly detector + anomaly = api.create_anomaly(dataset) + # waiting for the anomaly detector to be finished + api.ok(anomaly) + # the input data to score + input_data = {"petal length": 4, "sepal length": 2, "petal width": 1, + "sepal witdh": 3} + # assigning an anomaly score to it + anomaly_score = api.create_anomaly_score(anomaly, input_data) + +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + +If you want to configure some of the attributes of your anomaly detector, +like the number of top anomalies retrieved, +you can use the second argument in the create call. + + +.. code-block:: python + + # step 5: creating an anomaly detector with a list of the 20 top anomalies + anomaly = api.create_anomaly(dataset, {"top_n": 20}) + # waiting for the anomaly detector to be finished + api.ok(anomaly) + +You can check all the available creation arguments in the `API documentation +`_. + +If you want to assign scores to the original dataset (or a different dataset), +you can do so by creating +a `batch_anomaly_score` resource. In the example, we'll be assuming you already +created an `anomaly` following the steps 0 to 5 in the previous snippet and +that you want to score the same data you used in the anomaly detector. + +.. code-block:: python + + test_dataset = dataset + # step 10: creating a batch anomaly score + batch_anomaly_score = api.create_batch_anomaly_score(anomaly, test_dataset) + # waiting for the batch_anomaly_score to be finished + api.ok(batch_anomaly_score) + # downloading the results to your computer + api.download_batch_anomaly_score(batch_anomaly_score, + filename='my_dir/my_anomaly_scores.csv') + +The batch anomaly score output (as well as any of the resources created) +can be configured using additional arguments in the corresponding create calls. +For instance, to include all the information in the original dataset in the +output you would change `step 10` to: + +.. code-block:: python + + batch_anomaly_score = api.create_batch_anomaly_score(anomaly, test_dataset, + {"all_fields": True}) + +Check the `API documentation `_ to learn about the +available configuration options for any BigML resource. + +You can also score your data locally using the `Anomaly` +class in the `anomaly` module. A simple example of that is: + +.. code-block:: python + + from bigml.anomaly import Anomaly + local_anomaly = Anomaly("anomaly/5968ec46983efc21b000001b") + # assigning the anomaly score to some input data + local_anomaly.anomaly_score({"petal length": 4, "sepal length": 2, + "petal width": 1, "sepal witdh": 3}) + +Or you could store first your anomaly information in a file and use that +file to create the local `Anomaly` object: + +.. code-block:: python + + # downloading the anomaly detector JSON to a local file + from bigml.api import BigML + api = BigML() + api.export("anomaly/5968ec46983efc21b000001b", + filename="my_anomaly.json") + # creating an anomaly object using the information in the file + from bigml.anomaly import Anomaly + local_anomaly = Anomaly("my_anomaly.json") + # assigning the anomaly score to some input data + local_anomaly.anomaly_score({"petal length": 4, "sepal length": 2, + "petal width": 1, "sepal witdh": 3}) + +If you want to assign the anomaly score +locally for all the rows in a CSV file (first line +should contain the field headers): + +.. code-block:: python + + import csv + from bigml.anomaly import Anomaly + local_anomaly = Anomaly("anomaly/5a414c667811dd5057000ab5") + with open("test_data.csv") as test_handler: + reader = csv.DictReader(test_handler) + for input_data in reader: + # predicting for all rows + print local_anomaly.anomaly_score(input_data) + +Every modeling resource in BigML has its corresponding local class. Check +the `Local resources `_ section of the +documentation to learn more about them. diff --git a/docs/101_association.rst b/docs/101_association.rst new file mode 100644 index 00000000..371456a2 --- /dev/null +++ b/docs/101_association.rst @@ -0,0 +1,69 @@ +.. toctree:: + :hidden: + +101 - Association Discovery usage +================================= + +Following the schema described in the `prediction workflow `_, +document, this is the code snippet that shows the minimal workflow to +create an association and produce association sets. + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a source from the data in your local "data/groceries.csv" file + source = api.create_source("data/groceries.csv") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(source) + # step 3: creating a dataset from the previously created `source` + dataset = api.create_dataset(source) + # waiting for the dataset to be finished + api.ok(dataset) + # step 5: creating an association + association = api.create_association(dataset) + # waiting for the association to be finished + api.ok(association) + # the new input data to predict for + input_data = {"Products": "Fruit, Wine"} + # creating a single association set + association_set = api.create_association_set(association, input_data) + +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + +You can also create association sets locally using the `Association` +class in the `association` module. A simple example of that is: + +.. code-block:: python + + from bigml.association import Association + local_association = Association("association/5968ec46983efc21b000001b") + # association set for some input data + local_association.association_set({"Products": "Fruit, Wine"}) + +Or you could store first your association information in a file and use that +file to create the local `Association` object: + +.. code-block:: python + + # downloading the association JSON to a local file + from bigml.api import BigML + api = BigML() + api.export("association/5968ec46983efc21b000001b", + filename="my_association.json") + # creating the association from the file + from bigml.association import Association + local_association = Association("my_association.json") + # association set for some input data + local_association.association_set({"Products": "Fruit, Wine"}) + + +Every modeling resource in BigML has its corresponding local class. Check +the `Local resources `_ section of the +documentation to learn more about them. diff --git a/docs/101_cluster.rst b/docs/101_cluster.rst new file mode 100644 index 00000000..d4998463 --- /dev/null +++ b/docs/101_cluster.rst @@ -0,0 +1,140 @@ +.. toctree:: + :hidden: + +101 - Cluster Usage +=================== + +Following the schema described in the `prediction workflow `_, +document, this is the code snippet that shows the minimal workflow to +create a cluster and find the centroid associated to a single instance. + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + # check how to set your credentials in the Authentication section + api = BigML() + # step 1: creating a source from the data in your local "data/iris.csv" file + source = api.create_source("data/iris.csv") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(source) + # step 3: creating a dataset from the previously created `source` + dataset = api.create_dataset(source) + # waiting for the dataset to be finished + api.ok(dataset) + # step 5: creating a cluster + cluster = api.create_cluster(dataset) + # waiting for the cluster to be finished + api.ok(cluster) + # the new input data to find the centroid. All numeric fields are to be + # provided. + input_data = {"petal length": 4, "sepal length": 2, "petal width": 3, + "sepal width": 1, "species": "Iris-setosa"} + # getting the associated centroid + centroid = api.create_centroid(cluster, input_data) + +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + +If you want to find the centroids for many inputs at once, you can do so by +creating a `batch_centroid` resource. You can create a `batch_centroid` using +the same `dataset` that you used to built the `cluster` and this will produce a +new dataset with a new column that contains the name of the cluster each +instance has been assigned to. + +Of course, you can also apply the `cluster` +to new data to find the associated centroids. Then, you will first +need to upload to the platform +all the input data that you want to use and create the corresponding +`source` and `dataset` resources. In the example, we'll be assuming you already +created a `cluster` following the `steps 0 to 5` in the previous snippet. +In the +next example, `steps 6 and 8` will only be necessary if you want +to use new data +to be clustered. If you just want the information about the cluster assigned +to each instance in the clustering algorithm, you can go to `step 10` and use +the dataset created in `step 3` as `test_dataset`. + +.. code-block:: python + + # step 6: creating a source from the data in your local "data/test_iris.csv" file + test_source = api.create_source("data/test_iris.csv") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(test_source) + # step 8: creating a dataset from the previously created `source` + test_dataset = api.create_dataset(test_source) + # waiting for the dataset to be finished + api.ok(test_dataset) + # step 10: creating a batch centroid + batch_centroid = api.create_batch_centroid(cluster, test_dataset) + # waiting for the batch_centroid to be finished + api.ok(batch_centroid) + # downloading the results to your computer + api.download_batch_centroid(batch_centroid, + filename='my_dir/my_centroids.csv') + +The batch centroid output (as well as any of the resources created) +can be configured using additional arguments in the corresponding create calls. +For instance, to include all the information in the original dataset in the +output you would change `step 10` to: + +.. code-block:: python + + bach_centroid = api.create_batch_centroid(cluster, test_dataset, + {"all_fields": True}) + + +Check the `API documentation `_ to learn about the +available configuration options for any BigML resource. + +You can also associate centroids locally using the `Cluster` +class in the `cluster` module. A simple example of that is: + +.. code-block:: python + + from bigml.cluster import Cluster + local_cluster = Cluster("cluster/5968ec46983efc21b000001b") + # associated centroid for some input data + local_cluster.centroid({"petal length": 4, "sepal length": 2, + "petal width": 1, "sepal witdh": 3}) + +Or you could store first your cluster information in a file and use that +file to create the local `Cluster` object: + +.. code-block:: python + + # downloading the cluster JSON to a local file + from bigml.api import BigML + api = BigML() + api.export("cluster/5968ec46983efc21b000001b", + filename="my_cluster.json") + # creating the cluster from the file + from bigml.cluster import Cluster + local_cluster = Cluster("my_cluster.json") + # associated centroid for some input data + local_cluster.centroid({"petal length": 4, "sepal length": 2, + "petal width": 1, "sepal witdh": 3}) + + +And if you want to find out locally the associated centroids +for all the rows in a CSV file (first line +should contain the field headers): + +.. code-block:: python + + import csv + from bigml.cluster import Cluster + local_cluster = Cluster("cluster/5a414c667811dd5057000ab5") + with open("test_data.csv") as test_handler: + reader = csv.DictReader(test_handler) + for input_data in reader: + # predicting for all rows + print local_cluster.centroid(input_data) + +Every modeling resource in BigML has its corresponding local class. Check +the `Local resources `_ section of the +documentation to learn more about them. diff --git a/docs/101_deepnet.rst b/docs/101_deepnet.rst new file mode 100644 index 00000000..c8f1d2c6 --- /dev/null +++ b/docs/101_deepnet.rst @@ -0,0 +1,123 @@ +.. toctree:: + :hidden: + +101 - Deepnet usage +=================== + +Following the schema described in the `prediction workflow `_, +document, this is the code snippet that shows the minimal workflow to +create a deepnet and produce a single prediction. + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a source from the data in your local "data/iris.csv" file + source = api.create_source("data/iris.csv") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(source) + # step 3: creating a dataset from the previously created `source` + dataset = api.create_dataset(source) + # waiting for the dataset to be finished + api.ok(dataset) + # step 5: creating a deepnet + deepnet = api.create_deepnet(dataset) + # waiting for the deepnet to be finished + api.ok(deepnet) + # the new input data to predict for + input_data = {"petal width": 1.75, "petal length": 2.45} + # creating a single prediction + prediction = api.create_prediction(deepnet, input_data) + + +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + +If you want to create predictions for many new inputs, you can do so by +creating +a `batch_prediction` resource. First, you will need to upload to the platform +all the input data that you want to predict for and create the corresponding +`source` and `dataset` resources. In the example, we'll be assuming you already +created a `deepnet` following the steps 0 to 5 in the previous snippet. + +.. code-block:: python + + # step 6: creating a source from the data in your local "data/test_iris.csv" file + test_source = api.create_source("data/test_iris.csv") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(test_source) + # step 8: creating a dataset from the previously created `source` + test_dataset = api.create_dataset(test_source) + # waiting for the dataset to be finished + api.ok(test_dataset) + # step 10: creating a batch prediction + batch_prediction = api.create_batch_prediction(deepnet, test_dataset) + # waiting for the batch_prediction to be finished + api.ok(batch_prediction) + # downloading the results to your computer + api.download_batch_prediction(batch_prediction, + filename='my_dir/my_predictions.csv') + +The batch prediction output (as well as any of the resources created) +can be configured using additional arguments in the corresponding create calls. +For instance, to include all the information in the original dataset in the +output you would change `step 10` to: + +.. code-block:: python + + batch_prediction = api.create_batch_prediction(deepnet, test_dataset, + {"all_fields": True}) +Check the `API documentation `_ to learn about the +available configuration options for any BigML resource. + +You can also predict locally using the `Deepnet` +class in the `deepnet` module. A simple example of that is: + +.. code-block:: python + + from bigml.deepnet import Deepnet + local_deepnet = Deepnet("deepnet/5968ec46983efc21b000001c") + # predicting for some input data + local_deepnet.predict({"petal length": 2.45, "sepal length": 2, + "petal width": 1.75, "sepal witdh": 3}) + +Or you could store first your deepnet information in a file and use that +file to create the local `Deepnet` object: + +.. code-block:: python + + # downloading the deepnet JSON to a local file + from bigml.api import BigML + api = BigML() + api.export("deepnet/5968ec46983efc21b000001b", + filename="my_deepnet.json") + # creating the deepnet from the file + from bigml.deepnet import Deepnet + local_deepnet = Deepnet("my_deepnet.json") + # predicting for some input data + local_deepnet.predict({"petal length": 2.45, "sepal length": 2, + "petal width": 1.75, "sepal witdh": 3}) + + +And if you want to predict locally for all the rows in a CSV file (first line +should contain the field headers): + +.. code-block:: python + + import csv + from bigml.deepnet import Deepnet + local_deepnet = Deepnet("deepnet/5a414c667811dd5057000ab5") + with open("test_data.csv") as test_handler: + reader = csv.DictReader(test_handler) + for input_data in reader: + # predicting for all rows + print local_deepnet.predict(input_data) + +Every modeling resource in BigML has its corresponding local class. Check +the `Local resources `_ section of the +documentation to learn more about them. diff --git a/docs/101_ensemble.rst b/docs/101_ensemble.rst new file mode 100644 index 00000000..0ca3f747 --- /dev/null +++ b/docs/101_ensemble.rst @@ -0,0 +1,122 @@ +.. toctree:: + :hidden: + +101 - Ensemble usage +==================== + +Following the schema described in the `prediction workflow `_, +document, this is the code snippet that shows the minimal workflow to +create an Ensemble and produce a single prediction. + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a source from the data in your local "data/iris.csv" file + source = api.create_source("data/iris.csv") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(source) + # step 3: creating a dataset from the previously created `source` + dataset = api.create_dataset(source) + # waiting for the dataset to be finished + api.ok(dataset) + # step 5: creating an Ensemble + ensemble = api.create_ensemble(dataset) + # waiting for the ensemble to be finished + api.ok(ensemble) + # the new input data to predict for + input_data = {"petal length": 4, "sepal length": 2} + # creating a single prediction + prediction = api.create_prediction(ensemble, input_data) + +If you want to create predictions for many new inputs, you can do so by +creating +a `batch_prediction` resource. First, you will need to upload to the platform +all the input data that you want to predict for and create the corresponding +`source` and `dataset` resources. In the example, we'll be assuming you already +created a `model` following the steps 0 to 5 in the previous snippet. + +.. code-block:: python + + # step 6: creating a source from the data in your local "data/test_iris.csv" file + test_source = api.create_source("data/test_iris.csv") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(test_source) + # step 8: creating a dataset from the previously created `source` + test_dataset = api.create_dataset(test_source) + # waiting for the dataset to be finished + api.ok(test_dataset) + # step 10: creating a batch prediction + batch_prediction = api.create_batch_prediction(ensemble, test_dataset) + # waiting for the batch_prediction to be finished + api.ok(batch_prediction) + # downloading the results to your computer + api.download_batch_prediction(batch_prediction, + filename='my_dir/my_predictions.csv') + +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + +The batch prediction output (as well as any of the resources created) +can be configured using additional arguments in the corresponding create calls. +For instance, to include all the information in the original dataset in the +output you would change `step 10` to: + +.. code-block:: python + + batch_prediction = api.create_batch_prediction(ensemble, test_dataset, + {"all_fields": True}) +Check the `API documentation `_ to learn about the +available configuration options for any BigML resource. + +You can also predict locally using the `Ensemble` +class in the `ensemble` module. A simple example of that is: + +.. code-block:: python + + from bigml.ensemble import Ensemble + local_ensemble = Ensemble("ensemble/5968ec46983efc21b000001b") + # predicting for some input data + local_ensemble.predict({"petal length": 4, "sepal length": 2, + "petal width": 1, "sepal witdh": 3}) + +Or you could store first your ensemble information in a file and use that +file to create the local `Ensemble` object: + +.. code-block:: python + + # downloading the ensemble JSON to a local file + from bigml.api import BigML + api = BigML() + api.export("ensemble/5968ec46983efc21b000001b", + filename="my_ensemble.json") + # creating the ensemble from the file + from bigml.ensemble import Ensemble + local_ensemble = Ensemble("my_ensemble.json") + # predicting for some input data + local_ensemble.predict({"petal length": 4, "sepal length": 2, + "petal width": 1, "sepal witdh": 3}) + + +And if you want to predict locally for all the rows in a CSV file (first line +should contain the field headers): + +.. code-block:: python + + import csv + from bigml.ensemble import Ensemble + local_ensemble = Ensemble("ensemble/5a414c667811dd5057000ab5") + with open("test_data.csv") as test_handler: + reader = csv.DictReader(test_handler) + for input_data in reader: + # predicting for all rows + print local_ensemble.predict(input_data) + +Every modeling resource in BigML has its corresponding local class. Check +the `Local resources `_ section of the +documentation to learn more about them. diff --git a/docs/101_fusion.rst b/docs/101_fusion.rst new file mode 100644 index 00000000..8b549759 --- /dev/null +++ b/docs/101_fusion.rst @@ -0,0 +1,121 @@ +.. toctree:: + :hidden: + +101 - Fusion usage +================== + +Following the schema described in the `prediction workflow `_, +document, this is the code snippet that shows the minimal workflow to +create a fusion model (assuming that some component models have +already been created) and produce a single prediction. + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 5: creating a fusion model from a preexisting model and a logistic + # regression with equal weight + fusion = api.create_fusion({["id": "model/1111111111111111111111111", + "weight": 1}, + {"id": "logisticregression/222222222222222222222222", + "weight": 1}]) + # waiting for the fusion to be finished + api.ok(fusion) + # the new input data to predict for + input_data = {"petal width": 1.75, "petal length": 2.45} + # creating a single prediction + prediction = api.create_prediction(fusion, input_data) + +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + +If you want to create predictions for many new inputs, you can do so by +creating +a `batch_prediction` resource. First, you will need to upload to the platform +all the input data that you want to predict for and create the corresponding +`source` and `dataset` resources. In the example, we'll be assuming you already +created a `model` following the steps 0 to 5 in the previous snippet. + +.. code-block:: python + + # step 6: creating a source from the data in your local "data/test_iris.csv" file + test_source = api.create_source("data/test_iris.csv") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(test_source) + # step 8: creating a dataset from the previously created `source` + test_dataset = api.create_dataset(test_source) + # waiting for the dataset to be finished + api.ok(test_dataset) + # step 10: creating a batch prediction + batch_prediction = api.create_batch_prediction(fusion, test_dataset) + # waiting for the batch_prediction to be finished + api.ok(batch_prediction) + # downloading the results to your computer + api.download_batch_prediction(batch_prediction, + filename='my_dir/my_predictions.csv') + +The batch prediction output (as well as any of the resources created) +can be configured using additional arguments in the corresponding create calls. +For instance, to include all the information in the original dataset in the +output you would change `step 10` to: + +.. code-block:: python + + batch_prediction = api.create_batch_prediction(fusion, test_dataset, + {"all_fields": True}) +Check the `API documentation `_ to learn about the +available configuration options for any BigML resource. + +You can also predict locally using the `Fusion` +class in the `fusion` module. A simple example of that is: + +.. code-block:: python + + from bigml.fusion import Fusion + local_fusion = Fusion("fusion/5968ec46983efc21b000001b") + # predicting for some input data + local_fusion.predict({"petal length": 2.45, "sepal length": 2, + "petal width": 1.75, "sepal witdh": 3}) + +Or you could store first your fusion information (together with the included +models) in a file per model and use those +files to create the local `Fusion` object: + +.. code-block:: python + + # downloading the model JSON to a local file + from bigml.api import BigML + api = BigML() + api.export("fusion/5968ec46983efc21b000001b", + filename="my_dir/my_fusion.json") + # creating the local fusion from the file + from bigml.fusion import Fusion + local_fusion = Fusion("my_dir/my_fusion.json") + # predicting for some input data + local_fusion.predict({"petal length": 2.45, "sepal length": 2, + "petal width": 1.75, "sepal witdh": 3}) + + +And if you want to predict locally for all the rows in a CSV file (first line +should contain the field headers): + +.. code-block:: python + + import csv + from bigml.fusion import Fusion + local_fusion = Fusion("fusion/5a414c667811dd5057000ab5") + with open("test_data.csv") as test_handler: + reader = csv.DictReader(test_handler) + for input_data in reader: + # predicting for all rows + print local_fusion.predict(input_data) + + +Every modeling resource in BigML has its corresponding local class. Check +the `Local resources `_ section of the +documentation to learn more about them. diff --git a/docs/101_images_classification.rst b/docs/101_images_classification.rst new file mode 100644 index 00000000..dd6fc4eb --- /dev/null +++ b/docs/101_images_classification.rst @@ -0,0 +1,62 @@ +.. toctree:: + :hidden: + +101 - Images Classification +=========================== + +Following the schema described in the `prediction workflow `_, +document, this is the code snippet that shows the minimal workflow to +create a deepnet from an images dataset and produce a single prediction. + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a source from the data in your local + # "data/images/fruits_hist.zip" file. The file contains two folders, each + # of which contains a collection of images. The folder name will be used + # as label for each image it contains. + # The source is created disabling image analysis, as we want the deepnet + # model to take care of extracting the features. If not said otherwise, + # the analysis would be enabled and features like the histogram of + # gradients would be extracted to become part of the resulting dataset. + source = api.create_source("data/images/fruits_hist.zip", + args={"image_analysis": {"enabled": False}}) + # waiting for the source to be finished. Results will be stored in `source` + # and the new ``image_id`` and ``label`` fields will be generated in the + # source + api.ok(source) + # step 3: creating a dataset from the previously created `source` + dataset = api.create_dataset(source) + # waiting for the dataset to be finished + api.ok(dataset) + # step 5: creating a deepnet + deepnet = api.create_deepnet(dataset) + # waiting for the deepnet to be finished + api.ok(deepnet) + # the new input data to predict for should contain the path to the + # new image to be used for testing + input_data = {"image_id": "data/images/f2/fruits2.png"} + # creating a single prediction: The image file is uploaded to BigML, + # a new source is created for it and its ID is used as value + # for the ``image_id`` field in the input data to generate the prediction + prediction = api.create_prediction(deepnet, input_data) + +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + +You can also predict locally using the `Deepnet` +class in the `deepnet` module. A simple example of that is: + +.. code-block:: python + + from bigml.deepnet import Deepnet + local_deepnet = Deepnet("deepnet/5968ec46983efc21b000001c") + # predicting for some input data + input_data = {"image_id": "data/images/f2/fruits2.png"} + local_deepnet.predict(input_data) diff --git a/docs/101_images_feature_extraction.rst b/docs/101_images_feature_extraction.rst new file mode 100644 index 00000000..f649d650 --- /dev/null +++ b/docs/101_images_feature_extraction.rst @@ -0,0 +1,71 @@ +.. toctree:: + :hidden: + +101 - Images Feature Extraction +=============================== + +Following the schema described in the `prediction workflow `_, +document, this is the code snippet that shows the minimal workflow to +extract features from images and generate an enriched dataset that can be +used to train any kind of model. + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a source from the data in your local + # "data/images/fruits_hist.zip" file. The file contains two folders, each + # of which contains a collection of images. The folder name will be used + # as label for each image it contains. + # The source is created enabling image analysis and setting some of the + # available features (see the API documentation at + # https://bigml.com/api/sources?id=source-arguments + # for details). In particular, we extract histogram of gradients and + # average pixels. + extracted_features = ["average_pixels", "histogram_of_gradients"] + source = api.create_source("data/images/fruits_hist.zip", + args={"image_analysis": {"enabled": True, + "extracted_features": extracted_features}}) + # waiting for the source to be finished. Results will be stored in `source` + # and the new extracted features will be generated. + api.ok(source) + # step 3: creating a dataset from the previously created `source` + dataset = api.create_dataset(source) + # waiting for the dataset to be finished + api.ok(dataset) + # step 5: creating an anomaly detector + anomaly = api.create_anomaly(dataset) + # waiting for the anomaly detector to be finished + api.ok(anomaly) + # the new input data to predict for should contain the path to the + # new image to be used for testing + input_data = {"image_id": "data/images/f2/fruits2.png"} + # creating a single anomaly score: The image file is uploaded to BigML, + # a new source is created for it using the same image_analysis + # used in the image field, and its ID is used as value + # for the ``image_id`` field in the input data to generate the prediction + anomaly_score = api.create_anomaly_score(anomaly, input_data) + +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + +You can also create a local anomaly score using the `Anomaly` +class in the `anomaly` module. A simple example of that is: + +.. code-block:: python + + from bigml.anomaly import Anomaly + local_anomaly = Anomaly("anomaly/5968ec46983efc21b000001c") + # creating a pipeline to store the feature extraction transformations + feature_extraction_pipeline = local_anomaly.data_transformations() + # scoring for some input data. As pipelines transform lists of rows + # we build a list with the single input data and get the first + # element of the output list + input_data = feature_extraction_pipeline.transform( + [{"image_id": "data/images/f2/fruits2.png"}])[0] + local_anomaly.anomaly_score(input_data) diff --git a/docs/101_linear_regression.rst b/docs/101_linear_regression.rst new file mode 100644 index 00000000..08f87889 --- /dev/null +++ b/docs/101_linear_regression.rst @@ -0,0 +1,132 @@ +.. toctree:: + :hidden: + +101 - Linear Regression usage +============================= + +Following the schema described in the `prediction workflow `_, +document, this is the code snippet that shows the minimal workflow to +create a linear regression model and produce a single prediction. + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a source from the data in your local "data/iris.csv" file + source = api.create_source("data/iris.csv") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(source) + # step 3: creating a dataset from the previously created `source` + dataset = api.create_dataset(source) + # waiting for the dataset to be finished + api.ok(dataset) + # step 5: creating a linear regression + linear_regression = api.create_linear_regression(dataset) + # waiting for the linear regression to be finished + api.ok(linear_regression) + # the new input data to predict for + input_data = {"sepal width": 4, "sepal length": 2, + "petal width": 1, "species": "Iris-setosa"} + # creating a single prediction + prediction = api.create_prediction(linear_regression, input_data) + +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + +If you want to create predictions for many new inputs, you can do so by +creating +a `batch_prediction` resource. First, you will need to upload to the platform +all the input data that you want to predict for and create the corresponding +`source` and `dataset` resources. In the example, we'll be assuming you already +created a `linear regression` following the steps 0 to 5 +in the previous snippet. + +.. code-block:: python + + # step 6: creating a source from the data in your local "data/test_iris.csv" file + test_source = api.create_source("data/test_iris.csv") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(test_source) + # step 8: creating a dataset from the previously created `source` + test_dataset = api.create_dataset(test_source) + # waiting for the dataset to be finished + api.ok(test_dataset) + # step 10: creating a batch prediction + batch_prediction = api.create_batch_prediction(linear_regression, + test_dataset) + # waiting for the batch_prediction to be finished + api.ok(batch_prediction) + # downloading the results to your computer + api.download_batch_prediction(batch_prediction, + filename='my_dir/my_predictions.csv') + +The batch prediction output (as well as any of the resources created) +can be configured using additional arguments in the corresponding create calls. +For instance, to include all the information in the original dataset in the +output you would change `step 10` to: + +.. code-block:: python + + batch_prediction = api.create_batch_prediction(linear_regression, + test_dataset, + {"all_fields": True}) +Check the `API documentation `_ to learn about the +available configuration options for any BigML resource. + +You can also predict locally using the `LinearRegression` +class in the `linear` module. A simple example of that is: + +.. code-block:: python + + from bigml.linear import LinearRegression + local_linear_regression = LinearRegression( \ + "linearregression/5968ec46983efc21b000001b") + # predicting for some input data + local_linear_regression.predict({"sepal width": 4, "sepal length": 2, + "petal width": 1, + "species": "Iris-setosa"}) + +Or you could store first your linear regression +information in a file and use that +file to create the local `LinearRegression` object: + +.. code-block:: python + + # downloading the linear regression JSON to a local file + from bigml.api import BigML + api = BigML() + api.export("linearregression/5968ec46983efc21b000001b", + filename="my_linear_regression.json") + # creating the linear regression from the file + from bigml.linear import LinearRegression + local_linear_regression = LinearRegression( \ + "my_linear_regression.json") + # predicting for some input data + local_linear_regression.predict({"sepal width": 4, "sepal length": 2, + "petal width": 1, + "species": "Iris-setosa"}) + + +And if you want to predict locally for all the rows in a CSV file (first line +should contain the field headers): + +.. code-block:: python + + import csv + from bigml.linear import LinearRegression + local_linear_regression = LinearRegression( \ + "linearregression/5a414c667811dd5057000ab5") + with open("test_data.csv") as test_handler: + reader = csv.DictReader(test_handler) + for input_data in reader: + # predicting for all rows + print local_linear_regression.predict(input_data) + +Every modeling resource in BigML has its corresponding local class. Check +the `Local resources `_ section of the +documentation to learn more about them. diff --git a/docs/101_logistic_regression.rst b/docs/101_logistic_regression.rst new file mode 100644 index 00000000..8cda0471 --- /dev/null +++ b/docs/101_logistic_regression.rst @@ -0,0 +1,129 @@ +.. toctree:: + :hidden: + +101 - Logistic Regression usage +=============================== + +Following the schema described in the `prediction workflow `_, +document, this is the code snippet that shows the minimal workflow to +create a logistic regression model and produce a single prediction. + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a source from the data in your local "data/iris.csv" file + source = api.create_source("data/iris.csv") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(source) + # step 3: creating a dataset from the previously created `source` + dataset = api.create_dataset(source) + # waiting for the dataset to be finished + api.ok(dataset) + # step 5: creating a logistic regression + logistic_regression = api.create_logistic_regression(dataset) + # waiting for the logistic regression to be finished + api.ok(logistic_regression) + # the new input data to predict for + input_data = {"petal length": 4, "sepal length": 2} + # creating a single prediction + prediction = api.create_prediction(logistic_regression, input_data) + +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + +If you want to create predictions for many new inputs, you can do so by +creating +a `batch_prediction` resource. First, you will need to upload to the platform +all the input data that you want to predict for and create the corresponding +`source` and `dataset` resources. In the example, we'll be assuming you already +created a `logistic regression` following the steps 0 to 5 +in the previous snippet. + +.. code-block:: python + + # step 6: creating a source from the data in your local "data/test_iris.csv" file + test_source = api.create_source("data/test_iris.csv") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(test_source) + # step 8: creating a dataset from the previously created `source` + test_dataset = api.create_dataset(test_source) + # waiting for the dataset to be finished + api.ok(test_dataset) + # step 10: creating a batch prediction + batch_prediction = api.create_batch_prediction(logistic_regression, + test_dataset) + # waiting for the batch_prediction to be finished + api.ok(batch_prediction) + # downloading the results to your computer + api.download_batch_prediction(batch_prediction, + filename='my_dir/my_predictions.csv') + +The batch prediction output (as well as any of the resources created) +can be configured using additional arguments in the corresponding create calls. +For instance, to include all the information in the original dataset in the +output you would change `step 10` to: + +.. code-block:: python + + batch_prediction = api.create_batch_prediction(logistic_regression, + test_dataset, + {"all_fields": True}) +Check the `API documentation `_ to learn about the +available configuration options for any BigML resource. + +You can also predict locally using the `LogisticRegression` +class in the `logistic` module. A simple example of that is: + +.. code-block:: python + + from bigml.logistic import LogisticRegression + local_logistic_regression = LogisticRegression( \ + "logisticregression/5968ec46983efc21b000001b") + # predicting for some input data + local_logistic_regression.predict({"petal length": 4, "sepal length": 2, + "petal width": 1, "sepal witdh": 3}) + +Or you could store first your logistic regression +information in a file and use that +file to create the local `LogisticRegression` object: + +.. code-block:: python + + # downloading the logistic regression JSON to a local file + from bigml.api import BigML + api = BigML() + api.export("logisticregression/5968ec46983efc21b000001b", + filename="my_logistic_regression.json") + # creating the logistic regression from the file + from bigml.logistic import LogisticRegression + local_logistic_regression = LogisticRegression( \ + "my_logistic_regression.json") + # predicting for some input data + local_logistic_regression.predict({"petal length": 4, "sepal length": 2, + "petal width": 1, "sepal witdh": 3}) + + +And if you want to predict locally for all the rows in a CSV file (first line +should contain the field headers): + +.. code-block:: python + + import csv + from bigml.logistic import LogisticRegression + local_logistic_regression = LogisticRegression( \ + "logisticregression/5a414c667811dd5057000ab5") + with open("test_data.csv") as test_handler: + reader = csv.DictReader(test_handler) + for input_data in reader: + # predicting for all rows + print local_logistic_regression.predict(input_data) + +Every modeling resource in BigML has its corresponding local class. Check +the `Local resources `_ section of the +documentation to learn more about them. diff --git a/docs/101_model.rst b/docs/101_model.rst new file mode 100644 index 00000000..a7bf1915 --- /dev/null +++ b/docs/101_model.rst @@ -0,0 +1,123 @@ +.. toctree:: + :hidden: + +101 - Decision Tree usage +========================= + +Following the schema described in the `prediction workflow `_, +document, this is the code snippet that shows the minimal workflow to +create a decision tree model and produce a single prediction. + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a source from the data in your local "data/iris.csv" file + source = api.create_source("data/iris.csv") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(source) + # step 3: creating a dataset from the previously created `source` + dataset = api.create_dataset(source) + # waiting for the dataset to be finished + api.ok(dataset) + # step 5: creating a decision tree model + model = api.create_model(dataset) + # waiting for the model to be finished + api.ok(model) + # the new input data to predict for + input_data = {"petal width": 1.75, "petal length": 2.45} + # creating a single prediction + prediction = api.create_prediction(model, input_data) + +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + +If you want to create predictions for many new inputs, you can do so by +creating +a `batch_prediction` resource. First, you will need to upload to the platform +all the input data that you want to predict for and create the corresponding +`source` and `dataset` resources. In the example, we'll be assuming you already +created a `model` following the steps 0 to 5 in the previous snippet. + +.. code-block:: python + + # step 6: creating a source from the data in your local "data/test_iris.csv" file + test_source = api.create_source("data/test_iris.csv") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(test_source) + # step 8: creating a dataset from the previously created `source` + test_dataset = api.create_dataset(test_source) + # waiting for the dataset to be finished + api.ok(test_dataset) + # step 10: creating a batch prediction + batch_prediction = api.create_batch_prediction(model, test_dataset) + # waiting for the batch_prediction to be finished + api.ok(batch_prediction) + # downloading the results to your computer + api.download_batch_prediction(batch_prediction, + filename='my_dir/my_predictions.csv') + +The batch prediction output (as well as any of the resources created) +can be configured using additional arguments in the corresponding create calls. +For instance, to include all the information in the original dataset in the +output you would change `step 10` to: + +.. code-block:: python + + batch_prediction = api.create_batch_prediction(model, test_dataset, + {"all_fields": True}) +Check the `API documentation `_ to learn about the +available configuration options for any BigML resource. + +You can also predict locally using the `Model` +class in the `model` module. A simple example of that is: + +.. code-block:: python + + from bigml.model import Model + local_model = Model("model/5968ec46983efc21b000001b") + # predicting for some input data + local_model.predict({"petal length": 2.45, "sepal length": 2, + "petal width": 1.75, "sepal witdh": 3}) + +Or you could store first your model information in a file and use that +file to create the local `Model` object: + +.. code-block:: python + + # downloading the model JSON to a local file + from bigml.api import BigML + api = BigML() + api.export("model/5968ec46983efc21b000001b", + filename="my_model.json") + # creating the model from the file + from bigml.model import Model + local_model = Model("my_model.json") + # predicting for some input data + local_model.predict({"petal length": 2.45, "sepal length": 2, + "petal width": 1.75, "sepal witdh": 3}) + + +And if you want to predict locally for all the rows in a CSV file (first line +should contain the field headers): + +.. code-block:: python + + import csv + from bigml.model import Model + local_model = Model("model/5a414c667811dd5057000ab5") + with open("test_data.csv") as test_handler: + reader = csv.DictReader(test_handler) + for input_data in reader: + # predicting for all rows + print local_model.predict(input_data) + + +Every modeling resource in BigML has its corresponding local class. Check +the `Local resources `_ section of the +documentation to learn more about them. diff --git a/docs/101_object_detection.rst b/docs/101_object_detection.rst new file mode 100644 index 00000000..b851366d --- /dev/null +++ b/docs/101_object_detection.rst @@ -0,0 +1,52 @@ +.. toctree:: + :hidden: + +101 - Images Object Detection +============================= + +Following the schema described in the `prediction workflow `_, +document, this is the code snippet that shows the minimal workflow to +create a deepnet and produce a single prediction. + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a source from the data in your local + # "data/images/cats.zip" file, that contains a collection of images + # and an "annotations.json" file with the corresponding annotations per + # image describing the regions labeled in the image + source = api.create_source("data/images/cats.zip") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(source) + # step 3: creating a dataset from the previously created `source` + dataset = api.create_dataset(source) + # waiting for the dataset to be finished + api.ok(dataset) + # step 5: creating a deepnet + deepnet = api.create_deepnet(dataset) + # waiting for the deepnet to be finished + api.ok(deepnet) + # the new input data to predict for + input_data = "data/images/cats_test/pexels-pixabay-33358.jpg" + # creating a single prediction + prediction = api.create_prediction(deepnet, input_data) + +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + +You can also predict locally using the `Deepnet` +class in the `deepnet` module. A simple example of that is: + +.. code-block:: python + + from bigml.deepnet import Deepnet + local_deepnet = Deepnet("deepnet/5968ec46983efc21b000001c") + # predicting for some input data + input_data = "data/images/cats_test/pexels-pixabay-33358.jpg" + local_deepnet.predict(input_data) diff --git a/docs/101_optiml.rst b/docs/101_optiml.rst new file mode 100644 index 00000000..cd1f7d2e --- /dev/null +++ b/docs/101_optiml.rst @@ -0,0 +1,45 @@ +.. toctree:: + :hidden: + +101 - OptiML usage +================== + +Following the schema described in the `prediction workflow `_, +document, this is the code snippet that shows the minimal workflow to +create an OptiML. + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a source from the data in your local "data/iris.csv" file + source = api.create_source("data/iris.csv") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(source) + # step 3: creating a dataset from the previously created `source` + dataset = api.create_dataset(source) + # waiting for the dataset to be finished + api.ok(dataset) + # step 5: creating an optiml + optiml = api.create_optiml(dataset) + # waiting for the optiml to be finished + api.ok(optiml) + +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + +If you want to configure some of the attributes of your optiml, like the +maximum training time, you can use the second argument in the create call. + + # step 5: creating an optiml with a maximum training time of 3600 seconds + optiml = api.create_optiml(dataset, {"max_training_time": 3600}) + # waiting for the optiml to be finished + api.ok(optiml) + +You can check all the available creation arguments in the `API documentation +`_. diff --git a/docs/101_pca.rst b/docs/101_pca.rst new file mode 100644 index 00000000..2138470a --- /dev/null +++ b/docs/101_pca.rst @@ -0,0 +1,135 @@ +.. toctree:: + :hidden: + +101 - PCA usage +=============== + +The PCA model is used to find the linear combination of your original +features that best describes your data. In that sense, the goal of the model +is to provide a transformation that allows dimensionality reduction. +Following the schema described in the `prediction workflow `_, +document, this is the code snippet that shows the minimal workflow to +create a PCA model and produce a single projection. + + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + # check how to set your credentials in the Authentication section + api = BigML() + # step 1: creating a source from the data in your local "data/iris.csv" file + source = api.create_source("data/iris.csv") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(source) + # step 3: creating a dataset from the previously created `source` + dataset = api.create_dataset(source) + # waiting for the dataset to be finished + api.ok(dataset) + # step 5: creating a PCA model + pca = api.create_pca(dataset) + # waiting for the PCA to be finished + api.ok(pca) + # the input data to project + input_data = {"petal length": 4, "sepal length": 2, "petal width": 1, + "sepal witdh": 3} + # getting the transformed components, the projection + projection = api.create_projection(pca, input_data) + +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + +If you want to configure some of the attributes of your PCA, +like selecting a default numeric value, you can use the second argument +in the create call. + + +.. code-block:: python + + # step 5: creating a PCA and using mean as numeric value when missing + pca = api.create_pca(dataset, {"default_numeric_value": "mean"}) + # waiting for the PCA to be finished + api.ok(pca) + +You can check all the available creation arguments in the `API documentation +`_. + +If you want to add the generated principal components to the original +dataset (or a different dataset), you can do so by creating +a `batch_projection` resource. In the example, we'll be assuming you already +created a `PCA` following the steps 0 to 5 in the previous snippet and +that you want to score the same data you used in the PCA model. + +.. code-block:: python + + test_dataset = dataset + # step 10: creating a batch projection + batch_projection = api.create_batch_projection(pca, test_dataset) + # waiting for the batch_projection to be finished + api.ok(batch_projection) + # downloading the results to your computer + api.download_batch_projection(batch_projection, + filename='my_dir/my_projection.csv') + +The batch projection output (as well as any of the resources created) +can be configured using additional arguments in the corresponding create calls. +For instance, to include all the information in the original dataset in the +output you would change `step 10` to: + +.. code-block:: python + + batch_projection = api.create_batch_projection(pca, test_dataset, + {"all_fields": True}) + +Check the `API documentation `_ to learn about the +available configuration options for any BigML resource. + +You can also project your data locally using the `PCA` +class in the `pca` module. A simple example of that is: + +.. code-block:: python + + from bigml.pca import PCA + local_pca = PCA("pca/6878ec46983efc21b000001b") + # Getting the projection of some input data + local_pca.projection({"petal length": 4, "sepal length": 2, + "petal width": 1, "sepal witdh": 3}) + +Or you could store first your PCA information in a file and use that +file to create the local `PCA` object: + +.. code-block:: python + + # downloading the anomaly detector JSON to a local file + from bigml.api import BigML + api = BigML() + api.export("pca/6878ec46983efc21b000001b", + filename="my_pca.json") + # creating a PCA object using the information in the file + from bigml.pca import PCA + local_pca = PCA("my_pca.json") + # getting the projection for some input data + local_pca.projection({"petal length": 4, "sepal length": 2, + "petal width": 1, "sepal witdh": 3}) + +If you want to get the projection locally for all the rows in a CSV file +(first line should contain the field headers): + +.. code-block:: python + + import csv + from bigml.pca import PCA + local_pca = PCA("pca/68714c667811dd5057000ab5") + with open("test_data.csv") as test_handler: + reader = csv.DictReader(test_handler) + for input_data in reader: + # predicting for all rows + print local_pca.projection(input_data) + +Every modeling resource in BigML has its corresponding local class. Check +the `Local resources `_ section of the +documentation to learn more about them. diff --git a/docs/101_scripting.rst b/docs/101_scripting.rst new file mode 100644 index 00000000..aa0f05a2 --- /dev/null +++ b/docs/101_scripting.rst @@ -0,0 +1,207 @@ +.. toctree:: + :hidden: + +101 - Creating and executing scripts +==================================== + +The bindings offer methods to create and execute `WhizzML `_ +scripts in the platform. +WhizzML is the DSL that allows you to automate tasks in BigML. + +These code snippets show examples to illustrate how to create and execute +simple scripts: + +Basic script, no inputs +----------------------- + +This is the code to create a simple script that creates a source from an +existing CSV file that is available in a remote URL: + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a script that uploads a remote file and creates a source + script = api.create_script( \ + "(create-source {\"remote\" \"https://static.bigml.com/csv/iris.csv\"})") + # waiting for the script to be finished. + api.ok(script) + # step 2: executing the script with some particular inputs: a=1, b=2 + execution = api.create_execution(script) + # waiting for the execution to be finished + api.ok(execution) + # step 3: retrieving the result (e.g. "source/5ce6a55dc984177cf7000891") + result = execution['object']['execution']['result'] + +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + +In this example. the `url` used is always the same, so no inputs are provided +to the script. This is not a realistic situation, because usually scripts +need user-provided inputs. The next example shows how to +add two variables, whose values will be provided as inputs. + +Basic script with inputs +------------------------ + +Scripts usually need some inputs to work. When defining the script, you need +to provide booth the code and the description of the inputs that it will +accept. + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a script that adds two numbers + script = api.create_script( \ + "(+ a b)", + {"inputs": [{"name": "a", + "type": "number"}, + {"name": "b", + "type": "number"}]}) + # waiting for the script to be finished. + api.ok(script) + # step 2: executing the script with some particular inputs: a=1, b=2 + execution = api.create_execution( \ + script, + {"inputs": [["a", 1], + ["b", 2]]}) + # waiting for the execution to be finished + api.ok(execution) + # step 3: retrieving the result (e.g. 3) + result = execution['object']['execution']['result'] + +And of course, you will usually store your code, inputs and outputs in files. +The ``create_script`` method can receive as first argument the path to a file +that contains the source code and the rest of arguments can be retrieved from +a JSON file using the standard tools available in Python. The previous +example could also be created from a file that contains the WhizzML code +and a metadata file that contains the inputs and outputs description as a +JSON. + +.. code-block:: python + + import json + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a script from the code stored in `my_script.whizzml` + # and the inputs and outputs metadata stored in `metadata.json` + + with open('./metadata.json') as json_file: + metadata = json.load(json_file) + script = api.create_script("./my_script.whizzml", metadata) + # waiting for the script to be finished. + api.ok(script) + +Or load the files from a gist url: + +.. code-block:: python + + import json + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a script from a gist + + gist_url = "https://gist.github.com/mmerce/49e0a69cab117b6a11fb490140326020" + script = api.create_script(gist_url) + # waiting for the script to be finished. + api.ok(script) + +Basic Execution +--------------- + +In a full-fledged script, you will also produce some outputs that can be used +in other scripts. This is an example of a script creating a dataset from a +source that was generated from a remote URL. Both the URL and the source +name are provided by the user. Once the script has been created, we +run it by creating an execution from it and placing the particular input values +that we want to apply it to. + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a script that creates a `source` and a dataset from + # a user-given remote file + script = api.create_script( \ + "(define my-dataset (create-dataset (create-source {\"remote\" url \"name\" source-name})))", + {"inputs": [{"name": "url", + "type": "string"}, + {"name": "source-name", + "type": "string"}], + "outputs": [{"name": "my-dataset", + "type": "dataset"}]}) + # waiting for the script to be finished. + api.ok(script) + + # step 2: executing the script with some particular inputs + execution = api.create_execution( \ + script, + {"inputs": [["url", "https://static.bigml.com/csv/iris.csv"], + ["source-name", "my source"]]}) + # waiting for the dataset to be finished + api.ok(execution) + # step 3: retrieving the result (e.g. "dataset/5cae5ad4b72c6609d9000356") + result = execution['object']['execution']['result'] + + +You can also use the ``Execution`` class to easily access the results, +outputs and output resources of an existing execution. +Just instantiate the class with the execution resource or ID: + +.. code-block:: python + + from bigml.execution import Execution + execution = Execution("execution/5cae5ad4b72c6609d9000468") + print "The result of the execution is %s" % execution.result + print " and the output for variable 'my_variable': %s" % \ + execution.outputs["my_variable"] + +Local and remote scripting +-------------------------- + +Any operation in BigML can be scripted by using the bindings locally +to call the API. However, the highest +efficiency, scalability and reproducibility will come only by using +WhizzML scripts in the platform to handle the Machine Learning workflow that +you need. Thus, in most situations, the bindings are used merely to +upload the data to the platform and create an execution that uses that data to +reproduce the same operations. Let's say that you have a WhizzML script that +generates a batch prediction based on an existing model. The only input +for the script will be the source ID that will be used to predict, and the +rest of steps will be handled by the WhizzML script. Therefore, in order to +use that on new data you'll need to upload that data to the platform and use +the resulting ID as input. + + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a script that uploads local data to create a `source` + source = api.create_source("my_local_file") + # waiting for the source to be finished. + api.ok(source) + + # step 2: executing the script to do a batch prediction with the new + # source as input + script = "script/5cae5ad4b72c6609d9000235" + execution = api.create_execution( \ + script, + {"inputs": [["source", source["resource"]]]}) + # waiting for the workflow to be finished + api.ok(execution) + # step 3: retrieving the result (e.g. "dataset/5cae5ad4b72c6609d9000356") + result = execution['object']['execution']['result'] + # step 4: maybe storing the result as a CSV + api.download_dataset(result, "my_predictions.csv") diff --git a/docs/101_topic_model.rst b/docs/101_topic_model.rst new file mode 100644 index 00000000..065dcd2e --- /dev/null +++ b/docs/101_topic_model.rst @@ -0,0 +1,124 @@ +.. toctree:: + :hidden: + +101 - Topic Model usage +======================= + +Following the schema described in the `prediction workflow `_, +document, this is the code snippet that shows the minimal workflow to +create a topic model and produce a single topic distribution. + +.. code-block:: python + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a source from the data in your local "data/spam.csv" file + source = api.create_source("data/spam.csv") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(source) + # step 3: creating a dataset from the previously created `source` + dataset = api.create_dataset(source) + # waiting for the dataset to be finished + api.ok(dataset) + # step 5: creating a topc model + topic_model = api.create_topic_model(dataset) + # waiting for the topic model to be finished + api.ok(topic_model) + # the new input data to predict for + input_data = {"Message": "Mobile offers, 20% discount."} + # creating a single topic distribution + topic_distribution = api.create_topic_distribution(topic_model, input_data) + +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + +Remember that your dataset needs to have at least a text field to be able +to create a topic model. +If you want to create topic distributions for many new inputs, you can do so by +creating +a `batch_topic_distribution` resource. First, you will need to upload +to the platform +all the input data that you want to use for and create the corresponding +`source` and `dataset` resources. In the example, we'll be assuming you already +created a `topic model` following the steps 0 to 5 in the previous snippet. + +.. code-block:: python + + # step 6: creating a source from the data in your local "data/test_spam.csv" file + test_source = api.create_source("data/test_spam.csv") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(test_source) + # step 8: creating a dataset from the previously created `source` + test_dataset = api.create_dataset(test_source) + # waiting for the dataset to be finished + api.ok(test_dataset) + # step 10: creating a batch topic distribution + batch_topic_distribution = api.create_batch_topic_distribution( \ + topic_model, test_dataset) + # waiting for the batch_topic_distribution to be finished + api.ok(batch_topic_distribution) + # downloading the results to your computer + api.download_batch_topic_distribution( \ + batch_topic_distribution, filename='my_dir/my_predictions.csv') + +The batch topic distribution output (as well as any of the resources created) +can be configured using additional arguments in the corresponding create calls. +For instance, to include all the information in the original dataset in the +output you would change `step 10` to: + +.. code-block:: python + + batch_topic_distribution = api.create_batch_topic_distribution( \ + topic_model, test_dataset, {"all_fields": True}) +Check the `API documentation `_ to learn about the +available configuration options for any BigML resource. + +You can also predict locally using the `TopicModel` +class in the `topicmodel` module. A simple example of that is: + +.. code-block:: python + + from bigml.topicmodel import TopicModel + local_topic_model = TopicModel("topicmodel/5968ec46983efc21b000001b") + # topic distribution for some input data + local_topic_model.distribution({"Message": "Mobile offers, 20% discount."}) + +Or you could store first your topic model information in a file and use that +file to create the local `TopicModel` object: + +.. code-block:: python + + # downloading the topic model JSON to a local file + from bigml.api import BigML + api = BigML() + api.export("topicmodel/5968ec46983efc21b000001b", + filename="my_topic_model.json") + # creating the topic model from the file + from bigml.topicmodel import TopicModel + local_topic_model = TopicModel("my_topic_model.json") + # topic distribution for some input data + local_topic_model.distribution({"Message": "Mobile offers, 20% discount."}) + + +And if you want to predict locally for all the rows in a CSV file (first line +should contain the field headers): + +.. code-block:: python + + import csv + from bigml.topicmodel import TopicModel + local_topic_model = TopicModel("topicmodel/5a414c667811dd5057000ab5") + with open("test_data.csv") as test_handler: + reader = csv.DictReader(test_handler) + for input_data in reader: + # predicting for all rows + print local_topic_model.distribution(input_data) + +Every modeling resource in BigML has its corresponding local class. Check +the `Local resources `_ section of the +documentation to learn more about them. diff --git a/docs/101_ts.rst b/docs/101_ts.rst new file mode 100644 index 00000000..ff5388b0 --- /dev/null +++ b/docs/101_ts.rst @@ -0,0 +1,74 @@ +.. toctree:: + :hidden: + +101 - Time Series usage +======================= + +Following the schema described in the `prediction workflow `_, +document, this is the code snippet that shows the minimal workflow to +create a time series and produce a forecast. + +.. code-block:: python + + + from bigml.api import BigML + # step 0: creating a connection to the service (default credentials) + api = BigML() + # step 1: creating a source from the data in your local "data/grades.csv" file + source = api.create_source("data/grades.csv") + # waiting for the source to be finished. Results will be stored in `source` + api.ok(source) + # step 3: creating a dataset from the previously created `source` + dataset = api.create_dataset(source) + # waiting for the dataset to be finished + api.ok(dataset) + # step 5: creating a time series to forecast any numeric field in the dataset + time_series = api.create_time_series(dataset) + # waiting for the time series to be finished + api.ok(time_series) # finished time series + input_data = {"000005": {"horizon": 10}} # 10 points forecast for field ID 000005 + forecast = api.create_forecast(time_series, {"000005": {"horizon": 10}}) + +In the previous code, the `api.ok `_ +method is used to wait for the resource +to be finished before calling the next create method +or accessing the resource properties. +In the first case, we could skip that `api.ok`call because the next +`create` method would internally do the waiting when needed. + +To learn more about the arguments that can be set in the `forecast` and +`timeseries` creation calls and the response properties, please have a look +at the `API documentation `_ + +If you want to produce your forecasts locally, you can use the `TimeSeries` +class in the `timeseries` module. A simple example of that is: + +.. code-block:: python + + from bigml.timeseries import TimeSeries + local_time_series = TimeSeries("timeseries/5968ec46983efc21b000001b") + # 10 points forecast for field "Final" and 5 points for field "Assignment" + local_time_series.forecast({"Final": {"horizon": 10}, + "Assignment": {"horizon": 5}}) + +Or you could store first your time series information in a file and use that +file to create the local `TimeSeries` object: + +.. code-block:: python + + # downloading the time series JSON to a local file + from bigml.api import BigML + api = BigML() + api.export("timeseries/5968ec46983efc21b000001b", + filename="my_time_series.json") + # creating the time series from the file + from bigml.timeseries import TimeSeries + local_time_series = TimeSeries("my_timeseries.json") + # 10 points forecast for field "Final" and 5 points for field "Assignment" + local_time_series.forecast({"Final": {"horizon": 10}, + "Assignment": {"horizon": 5}}) + + +Every modeling resource in BigML has its corresponding local class. Check +the `Local resources `_ section of the +documentation to learn more about them. diff --git a/docs/api_sketch.rst b/docs/api_sketch.rst new file mode 100644 index 00000000..c5876449 --- /dev/null +++ b/docs/api_sketch.rst @@ -0,0 +1,309 @@ +.. toctree:: + :hidden: + +BigML Bindings: Modeling and prediction process +=============================================== + +After going through our ``Quick Start`` section that introduced some ML +resources +in **BigML**, you may still be left with a few questions, +such as: + +- how do I change my fields' types? +- how do I change my preferred fields? +- how do I configure my model? + +and even: + +- what am I doing exactly at each point of the code? + +This document will try to bridge that gap by explaining how you can +use different creation arguments or change some of the properties used by +default to adapt your resources to your own requirements. + +As general principles, let's remember that each +binding has its own syntax to build the REST API calls to BigML, so +from this moment on we'll use the common **create** and **update** words +as placeholders for the corresponding methods in the binding of your choice. +You'll need to check each binding documentation to know the specific names for +these methods. + +Another thing to keep in mind is that resources in **BigML** are not +totally mutable. They are mostly immutable, and once they are created +there's only a limited subset of attributes +(like names, labels, descriptions, etc.) that can always be updated. +Besides those +common updatable properties, each type of resource has a different subset +of properties that can also be updated. +In this document we'll guide you as to which are the most frequently used, but +to learn about all of them and their allowed values +you can always refer to +the `API Documentation `_ descriptions of each +resource type. + +Finally, as resources in **BigML** are asynchronously created, when +the bindings' +**create** and **update** methods make a request to the **API** the +response will not usually be a resource in its final complete form. +The **API** response will always include a **resource ID** and some +`status information `_ +which keeps track of the evolution of your request. As for the rest, +the resource information +will be growing and evolving until +the final resource status is reached. Resources in **BigML** are created +using `*anytime +algorithms* `_ +so that they can be useful at any step of their construction process. +This also means that, to use the complete information of the resource, +you will need to repeatedly poll +for it using +the bindings **get** method till the status reaches one of the +two possible terminal states: **finished** or **faulty**. +Usually, the bindings provide an +auxiliary function who takes care of this iterative +polling procedure. You will need +to call it always after the **create** or **update** calls +to ensure that a fully +formed resource is available for the next step. + +The detailed prediction workflow +-------------------------------- + +The process to create a model or to generate predictions in BigML goes through +several steps. In each step, a new resource is generated or updated. Some are +mandatory, others are optional. You can see a sketch of the +process in the following image. Green arrows are creation steps +that generate new resources from old ones, and orange arrows are update steps. +The steps have been numbered from 1 to 10 and we'll discuss them in +some more detail. + +.. image:: images/steps.png + +Common pre-modeling steps (**S1 - S4**) +--------------------------------------- + +These are the steps that must be run to upload a file in BigML and get its +contents ready for modeling, so whatever the task you want to do in BigML, +you'll probably need to follow them. + +First step: uploading your data to create a **Source** +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The first step (**S1**) is compulsory: +**Creating** a **Source** by uploading your data. + +This means that you'll need to use the **create source** method in your +favourite bindings to create a **Source** object. The only mandatory argument +is the path to your data, and the rest of arguments that can be found in the +`API Documentation +`_ are +optional. + +The result of the **create_source** call includes +a JSON describing the **Source** +resource. All the resources in BigML can be identified using a +unique resource ID. This identifier can be found in the **resource** attribute +of the JSON response. The format of this ID for **Sources** would be +**source/** followed by an alphanumeric string +(e.g.: source/4f603fe203ce89bb2d000000). The rest of properties +are described in the +`properties section `_ of +the API Documentation. + + +Second step: updating your **Source** to change the inferred fields structure +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This step (**S2**) is optional: **Updating** a **Source**. + +In BigML, only the properties marked as updatable +in the previously mentioned table of the API Documentation +can be changed. For the **Source**, you can update the attributes that +control how your data has been parsed, which are the strings to be interpreted +as missing data, how will text or items handling be done and what +fields types are to be assigned. Let us focus now in the **fields** +property, which needs some further explanation. + +The fields structure is inferred once you upload your data to BigML. This is +reflected in the contents of the **fields** attribute, +where you will see that an ID has been assigned to each of the detected fields +to identify the field uniquely. Keyed by this ID, there's a structure that +contains all the field information, like its name, type, column number, etc. +Its JSON would look like: + +.. code-block:: json + + "fields": { + "000000": { + "column_number": 0, + "name": "sepal length", + "optype": "numeric", + "order": 0 + }, + "000001": { + "column_number": 1, + "name": "label", + "optype": "numeric", + "order": 1 + }} + + +In the example, there's two fields, **sepal length** and **label** +which are both considered to be numeric. +The type has been inferred from the contents of the +first rows in your data, but let's suppose the second one is not really a +numeric field but a categorical one. In this case, we should need to change +the field's type and this could only be done by **updating** the **Source** +object with the new information: + +.. code-block:: json + + "fields":{ + "000001": { + "optype": "categorical" + }} + + +Mind that you'll need to know the ID of the field you intend to change. Some +bindings have utilities that will help you produce these IDs given the field +names. As BigML let's you use +columns with duplicated names, using the ID is the only way +to ensure that the change will be applied to the correct field. + +In order to **update** a **Source** the only compulsory arguments are the +**source ID** and the changes to be applied. + + +Third step: creating a **Dataset** to serialize the whole data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This step (**S3**) is compulsory: **Creating** a **Dataset** + +This will be achieved by using the **create dataset** method of your bindings +and the only mandatory argument for this call is the **Source ID** which +points to the **Source** object we want to create the **Dataset** from. The +rest of arguments described in the +`API Documentation `_ +are optional. The response for the **create dataset** call contains also the +corresponding **resource ID** for the dataset +(e.g. dataset/4f603fe203ce89bb2d000004) amongs other properties, +described in the +`datasets section of the API Documentation +`_. The dataset +summarizes and serializes the entire set of values that each field contains. +This can cause some of the fields to be marked as non-preferred if +they are not expected to be useful for the model. For instance, a constant +field will be marked as non-preferred. This feature will be a new property +of the **fields** structure in the dataset JSON response. In order to change +the ``preferred`` flag and the rest of updatable properties you'll need to go +to the next +step. + +Fourth step: updating your **Dataset** to prepare modeling +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This step (**S4**) is optional: **Updating** a **Dataset** + +Here too, a small +subset of properties can be updated (only the ones marked as such +in the `datasets section of the API Documentation +`_. +In particular, the **Dataset** has +a **fields** attribute too. In this case, the **fields** value only allows +to modify the **preferred**, **label**, **name**, and **description** +attributes. This can be achieved using the **update dataset** function in +your bindings. + +Fifth step: creating your model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This step (**S5**) is compulsory: **Creating** a **Model**, **Ensemble**, +**Cluster** or any +of the ML modeling available resources. + +The model can be created using the configuration options by default or +customizing some values to improve its performance. The only mandatory +argument for the **create model** call is the **dataset ID** to start from. +The rest of arguments described in the +`API Documentation `_ +are optional. +For instance, when +building a classification model you might like to limit the number +of nodes in your decision tree. Also, if the number of instances that belong +to each class in your data is very skewed, you should consider balancing them. +This can be achieved using a **balance_objective** argument, as explained +in the API Documentation. Also, a major attribute to be taken into account +when doing classification or regression is the **objective_field**, which +chooses the field that will be predicted. Any of these properties cannot be +updated. If you want to change them, you'll need to create a new model with +the new configuration. + +Any other modeling resource (like clusters, anomaly detectors, etc.) +will also have its particular configuration +arguments (described in their corresponding API documentation section). +Again, to use a different configuration you'll +need to create a new resource with it. + + +Preparing test data for predictions +----------------------------------- + +When your model is ready and you want to create predictions for a new bunch +of test data, you will need to run through the **S1 - S4** previously described +steps using your test data file. +In the previous image, this process is labeled as **S6** to **S9**. +After this process, +your test data will be stored in a new **Dataset** and ready for prediction. + +Creating batch predictions +-------------------------- + +Once models and test datasets are ready, you can proceed to create the +predictions using both. + +Tenth step: creating batch predictions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This step (**S10**) is compulsory: **Creating batch predictions** + +The batch predictions can be created using the **create batch prediction** +method in your bindings. The mandatory +arguments for the **create batch prediction** call are the ID of the model +used to predict and the ID of the dataset that contains the test instances that +you want to predict for. +The rest of arguments described in the +`API Documentation `_ +are optional. + +Feature engineering and composed workflows +------------------------------------------ + +These are the steps that you should follow to model and predict using BigML. +Of course, this workflow can be more complex if you: + +- filter or extend your + datasets to build new ones (feature engineering using `flatline + `_) + +- compose different workflows to achieve your ML solution + +In both cases, you should run **S1 - S4** to get a first **Dataset** and then +add some more steps till you reach the **Dataset** you like. + +If you are using +feature engineering, you'll call the **create dataset** having as mandatory +argument the **dataset ID** that you start from and adding new fields to it +with +the transformations of your choice. This will generate a new +**Dataset** and you'll resume from **S5** using it. + +If you compose different workflows, +the final picture will be some composition of sketches like the one +enclosed. For instance, a **Dataset** generated in **S10** can be used +as origin +for a different modeling task. + +Solving your problem can also involve +both feature engineering and workflow composition, so steps might grow in +length and complexity, but in general their building blocks will +be similar to the depicted sketch. diff --git a/docs/conf.py b/docs/conf.py index 2be412f4..ac951f6b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -30,7 +30,9 @@ # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = [] +extensions = [ + 'sphinx_rtd_theme' +] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -46,7 +48,7 @@ # General information about the project. project = u'BigML' -copyright = u'2012, The BigML Team' +copyright = u'2011 - 2024, The BigML Team' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -56,7 +58,7 @@ # Read the version from bigml.__version__ without importing the package # (and thus attempting to import packages it depends on that may not be # installed yet). -init_py_path = os.path.join(project_path, 'bigml', '__init__.py') +init_py_path = os.path.join(project_path, 'bigml', 'version.py') version = re.search("__version__ = '([^']+)'", open(init_py_path).read()).group(1) # The full version, including alpha/beta/rc tags. @@ -101,7 +103,8 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'default' +# a list of builtin themes. +html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the diff --git a/docs/creating_resources.rst b/docs/creating_resources.rst new file mode 100644 index 00000000..2bd85d80 --- /dev/null +++ b/docs/creating_resources.rst @@ -0,0 +1,1552 @@ +.. toctree:: + :hidden: + +Creating Resources +================== + +Newly-created resources are returned in a dictionary with the following +keys: + +- **code**: If the request is successful you will get a + ``bigml.api.HTTP_CREATED`` (201) status code. In asynchronous file uploading + ``api.create_source`` calls, it will contain ``bigml.api.HTTP_ACCEPTED`` + (202) status code. Otherwise, it will be + one of the standard HTTP error codes `detailed in the + documentation `_. +- **resource**: The identifier of the new resource. +- **location**: The location of the new resource. +- **object**: The resource itself, as computed by BigML. +- **error**: If an error occurs and the resource cannot be created, it + will contain an additional code and a description of the error. In + this case, **location**, and **resource** will be ``None``. + +Resource creation is an asynchronous process, so the API will return a +response probably before the resource is totally processed and you'll need to +repeatedly call the API to see the evolution of the resource, reflected +in its status (see the `Statuses <#statuses>`_ section). +The bindings provide methods to help you do that. Please check the `waiting +for resources <#waiting_for_resources>`_ section to learn more about them. + +Statuses +~~~~~~~~ + +Resource creation is almost always asynchronous (with few exceptions, +like **projects**, **predictions**, and similar prediction-like results for +Unsupervised Models like **anomaly scores**, **centroids**, etc.) +Therefore, when you create a new source, a new dataset or a new model, even +if you receive an immediate response from the BigML servers, the full creation +of the resource can take from a few seconds to a few days, depending on +the size of the resource and BigML's load. A resource is not fully +created until its status is ``bigml.api.FINISHED``, or ``bigml.api.FAULTY`` if +some error occurs (like the one you would get when trying to upload an empty +file, a .ppt or a .doc). See the `documentation on status +codes `_ for the complete listing of +potential states and their semantics. + +Depending on your application +you might need to import the following constants: + +.. code-block:: python + + from bigml.api import WAITING + from bigml.api import QUEUED + from bigml.api import STARTED + from bigml.api import IN_PROGRESS + from bigml.api import SUMMARIZED + from bigml.api import FINISHED + from bigml.api import UPLOADING + from bigml.api import FAULTY + from bigml.api import UNKNOWN + from bigml.api import RUNNABLE + +Usually, you will simply need to wait until the resource is +in the ``bigml.api.FINISHED`` state for further processing. If that's the case, +the easiest way is calling the ``api.ok`` method and passing as first argument +the object that contains your resource: + +.. code-block:: python + + from bigml.api import BigML + api = BigML() # creates a connection to BigML's API + source = api.create_source('my_file.csv') # creates a source object + api.ok(source) # checks that the source is finished and updates ``source`` + +In this code, ``api.create_source`` will probably return a non-finished +``source`` object. Then, ``api.ok`` will query its status and update the +contents of the ``source`` variable with the retrieved information until it +reaches a ``bigml.api.FINISHED`` or ``bigml.api.FAULTY`` status. + + +Waiting for Resources +--------------------- + +As explained in the ``Create Resources`` section, the time needed to create +a completely finished resource can vary depending on many factors: the size +of the data to be used, the type of fields and the platform load, for +instance. In BigML, the API will answer to any creation request shortly +after receiving the creation call that starts the process. +Resources in BigML are any-time, meaning that the result contains partial but +correct information at any point of its evolution, so getting the information +of a resource which is still in progress can be useful. However, usually +you'll want to wait till the process ends to retrieve and use the resource. +The ``api.ok`` method is the mechanism provided for that, as: + +- It waits efficiently between API calls. The sleep time is modified to + be adapted to the resoruce process as given in its status. +- It adapts the parameters of the API call to minimize the amount of + information downloaded in each iteration while waiting for completion. +- It modifies the contents of the variable passed as argument to store there + the value of the resource returned by the API when it reaches the + finished or faulty state. +- It allows error handling and retries. + +Most of the time, no errors happen and a correctly finished resource is +generated. In this case, and following the example in the previous section, +the ``api.ok(source)`` method would return ``True`` and the variable +``source`` contents would be like: + +.. code-block:: python + + {"code": 200, + "resource": "source/5e4ee08e440ca1324410ccbd", + "location": "https://bigml.io/andromeda/source/5e4ee08e440ca1324410ccbd", + "error": None, + "object": {"code": 200, "fields": {...}, + ... + "status": {"code": 5, + "elapsed": 854, + "message": "The source has been created", + "progress": 1}} + } + +Where the ``object`` attribute of the dictionary would contain the response +of the last ``get`` call to the API. + +Nonetheless, two kinds of problem can arise when using ``api.ok``, +and both will cause the method to return ``False``. Firstly, +the HTTP connection that it needs to reach the API might fail. Than will +prevent the resource information retrieval and will be reflected in the +``code`` and ``error`` first-level attributes of the ``source`` new contents. + +.. code-block:: python + + {"code": 500, + "resource": "source/5e4ee08e440ca1324410ccbd", + "location": "https://bigml.io/andromeda/source/5e4ee08e440ca1324410ccbd", + "error": {"status": + {"code": 500, + "message": "The resource couldn't be retrieved", + "type": "transient"}}, + "object": {"code": 201, "fields": {...}, + ... + "status": {"code": 1, + "elapsed": 15, + "message": "The request has been queued and will be processed soon", + "progress": 0}} + } + +and as the call could not reach the API, the ``object`` attribute will not +be modified. + +In this case, the cause was a transient error, and we can decide that transient +error calls should be retried a certain amount of times. Just +set an ``error_retries`` argument: e.g. ``api.ok(source, error_retries=10)``. + +The second kind of error appears when the API can be correctly reached and +it returns a faulty resource. There's also a variety of reasons for a resource +to end in a ``bigml.api.FAULTY`` state, but an example would be trying to +create a source by uploading an empty file, or some kind of non-supported +file, like an .ipnb file. The API will accept the create task, and add the +new resource ID. Afterwards, it will realize that the uploaded contents are not +correct, so the ``api.ok`` call with get a resource in a faulty status. Let's +see what happens when trying to upload a zip file that does not contain images +or a CSV-like file. + +.. code-block:: python + + {"code": 200, + "resource": "source/5e4ee08e440ca1324410ccbd", + "location": "https://bigml.io/andromeda/source/5e4ee08e440ca1324410ccbd", + "error": None, + "object": {"code": 500, "fields": {...}, + ... + "status": {"code": -1, + "elapsed": 225, + "error": -2020, + "message": "Spreadsheet not parseable (please try to export to CSV): Encoding: application/zip", + "progress": 0}} + } + +In this case, according to the outer ``code`` and ``error`` +attributes (associated to HTTP failures) everything went smoothly, which is +correct because the ``api.ok`` method was able to connect to the API. +However, the ``object`` attribute (that contains the API response) +will show in the inner ``code`` attribute that describes the error and the +``status`` information will also contain a message describing the cause +of that error. As this particular error is not transient, no retrying will +be done even if the ``error_retries`` argument is set. + +Based on what we've seen, a safe way to check if we have been able to create +completely a resource in BigML would be checking the return value of the +``api.ok`` method. + +.. code-block:: python + + from bigml.api import BigML + api = BigML() + source = api.create_source('my_file.csv') # creates a source object + if api.ok(source): + # code that uses the finished source contents + show_fields(source) + else: + # code that handles the error + handle_error(source) + +An alternative that can also be used to check for errors is using the +``raise_on_error`` argument of the ``api.ok`` method, that will cause an +error to be raised in both the HTTP problem or faulty resource scenarios. + +.. code-block:: python + + from bigml.api import BigML + from bigml.exceptions import FaultyResourceError + api = BigML() + source = api.create_source('my_file.csv') # creates a source object + try: + api.ok(source) + except FaultyResourceError: + # code that handles the faulty resource error + handle_faulty_error(source) + except Exception: + # code that handles the HTTP connection errors + handle_http_error(source) + +The ``api.ok`` method is repeatedly calling the API but it sleeps for some +time between calls. The sleeping time is set by using an exponential function +that generates a random number in a range. The upper limit of that range is +increasing with the number of retries. When the progress of the resource +reaches 80%, the waiting times descend by applying a progress dumping. +The parameters like the initial +waiting time, the number of retries or the estimate of the maximum elapsed +time can be provided to fit every particular case. + + +.. code-block:: python + + dataset = api.get_dataset("anomaly/5e4ee08e440ca13244102dbd") + api.ok(dataset, wait_time=60, max_elapsed_estimate=300) + # if the first call response is not a finished resource, the + # method will sleep for 60 seconds and increase this sleep time + # boundary till the elapsed time goes over 5 minutes. When that + # happens and the resource is still not created, counters are + # initialized again and the sleep period will start from 60s + # repeating the increasing process. + +Sometimes, it can be useful to report the progress of the resource. To that +end, ``api.ok`` accepts a ``progress_cb`` callback function that will be called +every time that the status is checked internally. The progress will be a +decimal number in the [0, 1] range + +.. code-block:: python + + def progress_log(progress, resource): + """Logs the progress of a resource""" + resource_id = resource["resource"] + progress_percentage = int(progress * 100) + print(f"The progress of {resource_id} is {progress_percentage}%") + + dataset = api.get_dataset("anomaly/5e4ee08e440ca13244102dbd") + api.ok(dataset, progress_cb=progress_log) + + +As explained previously, the ``api.ok`` method updates the contents of the +variable that is given as first argument. If you prefer to wait +for the resource without side effects on that variable, you can +also use the ``check_resource`` function: + +.. code-block:: python + + check_resource(resource, api.get_source) + +that will constantly query the API until the resource gets to a FINISHED or +FAULTY state, or can also be used with ``wait_time`` (in seconds) +and ``retries`` +arguments to control the polling: + +.. code-block:: python + + check_resource(resource, api.get_source, wait_time=2, retries=20) + +The ``wait_time`` value is used as seed to a wait +interval that grows exponentially with the number of retries up to the given +``retries`` limit. + +However, in other scenarios you might need to control the complete +evolution of the resource, not only its final states. +There, you can query the status of any resource +with the ``status`` method, which simply returns its value and does not +update the contents of the associated variable: + +.. code-block:: python + + api.status(source) + api.status(dataset) + api.status(model) + api.status(prediction) + api.status(evaluation) + api.status(ensemble) + api.status(batch_prediction) + api.status(cluster) + api.status(centroid) + api.status(batch_centroid) + api.status(anomaly) + api.status(anomaly_score) + api.status(batch_anomaly_score) + api.status(sample) + api.status(correlation) + api.status(statistical_test) + api.status(logistic_regression) + api.status(association) + api.status(association_set) + api.status(topic_model) + api.status(topic_distribution) + api.status(batch_topic_distribution) + api.status(time_series) + api.status(forecast) + api.status(optiml) + api.status(fusion) + api.status(pca) + api.status(projection) + api.status(batch_projection) + api.status(linear_regression) + api.status(script) + api.status(execution) + api.status(library) + +Remember that, consequently, you will need to retrieve the resources +explicitly in your code to get the updated information. + + +Projects +~~~~~~~~ + +A special kind of resource is ``project``. Projects are repositories +for resources, intended to fulfill organizational purposes. Each project can +contain any other kind of resource, but the project that a certain resource +belongs to is determined by the one used in the ``source`` +they are generated from. Thus, when a source is created +and assigned a certain ``project_id``, the rest of resources generated from +this source will remain in this project. + +The REST calls to manage the ``project`` resemble the ones used to manage the +rest of resources. When you create a ``project``: + +.. code-block:: python + + from bigml.api import BigML + api = BigML() + + project = api.create_project({'name': 'my first project'}) + +the resulting resource is similar to the rest of resources, although shorter: + +.. code-block:: python + + {'code': 201, + 'resource': u'project/54a1bd0958a27e3c4c0002f0', + 'location': 'http://bigml.io/andromeda/project/54a1bd0958a27e3c4c0002f0', + 'object': {u'category': 0, + u'updated': u'2014-12-29T20:43:53.060045', + u'resource': u'project/54a1bd0958a27e3c4c0002f0', + u'name': u'my first project', + u'created': u'2014-12-29T20:43:53.060013', + u'tags': [], + u'private': True, + u'dev': None, + u'description': u''}, + 'error': None} + +and you can use its project id to get, update or delete it: + +.. code-block:: python + + project = api.get_project('project/54a1bd0958a27e3c4c0002f0') + api.update_project(project['resource'], + {'description': 'This is my first project'}) + + api.delete_project(project['resource']) + +**Important**: Deleting a non-empty project will also delete **all resources** +assigned to it, so please be extra-careful when using +the ``api.delete_project`` call. + +Creating External Connectors +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To create an external connector to an existing database you need to use the +``create_external_connector`` method. The only required parameter is the +dictionary that contains the information needed to connect to the particular +database/table. The attributes of the connection dictionary needed for the +method to work will depend on the type of database used. + +For instance, you can create a connection to an ``Elasticsearch`` database +hosted locally at port ``9200`` by calling: + +.. code-block:: python + + from bigml.api import BigML + api = BigML() + + external_connector = api.create_external_connector( \ + {"hosts": ["localhost:9200"]}, {"source": "elasticsearch"}) + +where the first argument contains the infromation about the host +and ``source`` contains the type of database to connec to (allowed types are: +``elasticsearch``, ``postgresql``, ``mysql``, ``sqlserver``). If no ``source`` +type is set, ``postgresql`` will be used as default value. + +You can add other properties to that second argument, like the name +to be used for this external +connector. All other arguments should be placed in the second parameter: + +.. code-block:: python + + from bigml.api import BigML + api = BigML() + + external_connector = api.create_external_connector( \ + {"hosts": ["localhost:9200"]}, + {"source": "elasticsearch", + "name": "My elasticsearch"}) + + +Creating Sources +~~~~~~~~~~~~~~~~ + +To create a source from a local data file, you can use the +``create_source`` method. The only required parameter is the path to the +data file (or file-like object). You can use a second optional parameter +to specify any of the +options for source creation described in the `BigML API +documentation `_. + +Here's a sample invocation: + +.. code-block:: python + + from bigml.api import BigML + api = BigML() + + source = api.create_source('./data/iris.csv', + {'name': 'my source', 'source_parser': {'missing_tokens': ['?']}}) + +or you may want to create a source from a file in a remote location: + +.. code-block:: python + + source = api.create_source('s3://bigml-public/csv/iris.csv', + {'name': 'my remote source', 'source_parser': {'missing_tokens': ['?']}}) + +or maybe reading the content from stdin: + +.. code-block:: python + + content = StringIO.StringIO(sys.stdin.read()) + source = api.create_source(content, + {'name': 'my stdin source', 'source_parser': {'missing_tokens': ['?']}}) + +or from an existing external connector: + +.. code-block:: python + + content = {"source": "postgresql", + "externalconnector_id": "5ea1d2f7c7736e160900001c", + "query": "select * from table_name"} + source = api.create_source(content, + {'name': 'my stdin source', 'source_parser': {'missing_tokens': ['?']}}) + +or using data stored in a local python variable. The following example +shows the two accepted formats: + +.. code-block:: python + + local = [['a', 'b', 'c'], [1, 2, 3], [4, 5, 6]] + local2 = [{'a': 1, 'b': 2, 'c': 3}, {'a': 4, 'b': 5, 'c': 6}] + source = api.create_source(local, {'name': 'inline source'}) + +As already mentioned, source creation is asynchronous. In both these examples, +the ``api.create_source`` call returns once the file is uploaded. +Then ``source`` will contain a resource whose status code will be either +``WAITING`` or ``QUEUED``. + +For local data files you can go one step further and use asynchronous +uploading: + +.. code-block:: python + + source = api.create_source('./data/iris.csv', + {'name': 'my source', 'source_parser': {'missing_tokens': ['?']}}, + async_load=True) + +In this case, the call fills `source` immediately with a primary resource like: + +.. code-block:: python + + {'code': 202, + 'resource': None, + 'location': None, + 'object': {'status': + {'progress': 0.99, + 'message': 'The upload is in progress', + 'code': 6}}, + 'error': None} + +where the ``source['object']`` status is set to ``UPLOADING`` and its +``progress`` is periodically updated with the current uploading +progress ranging from 0 to 1. When upload completes, this structure will be +replaced by the real resource info as computed by BigML. Therefore source's +status will eventually be (as it is in the synchronous upload case) +``WAITING`` or ``QUEUED``. + +You can retrieve the updated status at any time using the corresponding get +method. For example, to get the status of our source we would use: + +.. code-block:: python + + api.status(source) + +Creating Composite Sources (Images) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +BigML offers support to use images or collections of CSVs +in your Machine Learning models. +Uploading images to BigML is as easy as uploading any other file. Each +file will be ingested and a new source will be created from it. To build +Machine Learning models one typically needs lots of images and they are +usually uploaded in batches stored in +``.zip`` or ``.tar`` files. BigML is able to ingest such a file and creates a +``composite source`` from it +and for each file contained in the compressed file a +``component source`` will be created. Thus, a zip file containg two images +can be uploaded to BigML by using the ``create_source`` method: + + +.. code-block:: python + from bigml.api import BigML + api = BigML() + composite_source = api.create_source("images_zip.zip") + +and that operation will create three sources: one per image plus the composite +source that contains them. + +If you put together a bunch of image sources inside a composite, +that composite will also have format "image". If you create a dataset +from it, every row will correspond to one of the images in the composite, +and have a column representing the image data, and another its filename. +Also, BigML will extract around two hundred features per image by default, +representing its gradients histogram, and you can choose several others or +add labels to each image. Please, check the complete `API documentation about +composite sources `_ to +learn how to create them, update their contents while they are ``open`` +(editable) and ``close`` them so that you can create datasets and models +from them. Closing a source makes it immutable, but any source +can be cloned into a new source open to modification. + +.. code-block:: python + from bigml.api import BigML + api = BigML() + closed_source = "source/526fc344035d071ea3031d72" + open_source = api.clone_source(closed_source) + +Images are usually associated to other information, like labels or numeric +fields, which can be regarded as additional attributes related to that +image. The associated information can be described as annotations for +each of the images. These annotations can be +provided as a JSON file that contains the properties associated to +each image and the name of the image file, that is used as foreign key. +The meta information needed to create the structure of the composite source, +such as the fields to be associated and their types, +should also be included in the annotations file. +This is an example of the expected structure of the annotations file: + +.. code-block:: json + + {"description": "Fruit images to test colour distributions", + "images_file": "./fruits_hist.zip", + "new_fields": [{"name": "new_label", "optype": "categorical"}], + "source_id": null, + "annotations": [ + {"file": "f1/fruits1f.png", "new_label": "True"}, + {"file": "f1/fruits1.png", "new_label": "False"}, + {"file": "f2/fruits2e.png", "new_label": "False"}]} + +The ``images_file`` attribute should contain the path to zip-compressed +images file and the "annotations" attribute the corresponding +annotations. The ``new_fields`` attribute should be a list of the fields +used as annotations for the images. + +Also, if you prefer to keep your annotations in a separate file, you +can point to that file in the ``annotations`` attribute: + +.. code-block:: json + + {"description": "Fruit images to test colour distributions", + "images_file": "./fruits_hist.zip", + "new_fields": [{"name": "new_label", "optype": "categorical"}], + "source_id": null, + "annotations": "./annotations_detail.json"} + +The created source will contain the fields associated to the +uploaded images, plus an additional field named ``new_label`` with the +values defined in this file. + +If a source has already been created from this collection of images, +you can provide the ID of this source in the ``source_id`` attribute. +If the annotations file contains the source ID information, +the existing source will be updated to add the new annotations +(if still open for editing) or will be cloned (if the source is +closed for editing) and the new source will be updated. In both cases, +images won't be uploaded again. + + +Creating Datasets +~~~~~~~~~~~~~~~~~ + +Once you have created a source, you can create a dataset. The only +required argument to create a dataset is a source id. You can add all +the additional arguments accepted by BigML and documented in the +`Datasets section of the Developer's +documentation `_. + +For example, to create a dataset named "my dataset" with the first 1024 +bytes of a source, you can submit the following request: + +.. code-block:: python + + dataset = api.create_dataset(source, {"name": "my dataset", "size": 1024}) + +Upon success, the dataset creation job will be queued for execution, and +you can follow its evolution using ``api.status(dataset)``. + +As for the rest of resources, the create method will return an incomplete +object, that can be updated by issuing the corresponding +``api.get_dataset`` call until it reaches a ``FINISHED`` status. +Then you can export the dataset data to a CSV file using: + +.. code-block:: python + + api.download_dataset('dataset/526fc344035d071ea3031d75', + filename='my_dir/my_dataset.csv') + +You can also extract samples from an existing dataset and generate a new one +with them using the ``api.create_dataset`` method. The first argument should +be the origin dataset and the rest of arguments that set the range or the +sampling rate should be passed as a dictionary. For instance, to create a new +dataset extracting the 80% of instances from an existing one, you could use: + +.. code-block:: python + + dataset = api.create_dataset(origin_dataset, {"sample_rate": 0.8}) + +Similarly, if you want to split your source into training and test datasets, +you can set the `sample_rate` as before to create the training dataset and +use the `out_of_bag` option to assign the complementary subset of data to the +test dataset. If you set the `seed` argument to a value of your choice, you +will ensure a deterministic sampling, so that each time you execute this call +you will get the same datasets as a result and they will be complementary: + +.. code-block:: python + + origin_dataset = api.create_dataset(source) + train_dataset = api.create_dataset( + origin_dataset, {"name": "Dataset Name | Training", + "sample_rate": 0.8, "seed": "my seed"}) + test_dataset = api.create_dataset( + origin_dataset, {"name": "Dataset Name | Test", + "sample_rate": 0.8, "seed": "my seed", + "out_of_bag": True}) + +Sometimes, like for time series evaluations, it's important that the data +in your train and test datasets is ordered. In this case, the split +cannot be done at random. You will need to start from an ordered dataset and +decide the ranges devoted to training and testing using the ``range`` +attribute: + +.. code-block:: python + + origin_dataset = api.create_dataset(source) + train_dataset = api.create_dataset( + origin_dataset, {"name": "Dataset Name | Training", + "range": [1, 80]}) + test_dataset = api.create_dataset( + origin_dataset, {"name": "Dataset Name | Test", + "range": [81, 100]}) + + +It is also possible to generate a dataset from a list of datasets +(multidataset): + +.. code-block:: python + + dataset1 = api.create_dataset(source1) + dataset2 = api.create_dataset(source2) + multidataset = api.create_dataset([dataset1, dataset2]) + +Clusters can also be used to generate datasets containing the instances +grouped around each centroid. You will need the cluster id and the centroid id +to reference the dataset to be created. For instance, + +.. code-block:: python + + cluster = api.create_cluster(dataset) + cluster_dataset_1 = api.create_dataset(cluster, + args={'centroid': '000000'}) + +would generate a new dataset containing the subset of instances in the cluster +associated to the centroid id ``000000``. + +Existing datasets can also be cloned: + +.. code-block:: python + from bigml.api import BigML + api = BigML() + dataset = "dataset/526fc344035d071ea3031d76" + cloned_dataset = api.clone_dataset(dataset) + + +Creating Models +~~~~~~~~~~~~~~~ + +Once you have created a dataset you can create a model from it. If you don't +select one, the model will use the last field of the dataset as objective +field. The only required argument to create a model is a dataset id. +You can also +include in the request all the additional arguments accepted by BigML +and documented in the `Models section of the Developer's +documentation `_. + +For example, to create a model only including the first two fields and +the first 10 instances in the dataset, you can use the following +invocation: + +.. code-block:: python + + model = api.create_model(dataset, { + "name": "my model", "input_fields": ["000000", "000001"], "range": [1, 10]}) + +Again, the model is scheduled for creation, and you can retrieve its +status at any time by means of ``api.status(model)``. + +Models can also be created from lists of datasets. Just use the list of ids +as the first argument in the api call + +.. code-block:: python + + model = api.create_model([dataset1, dataset2], { + "name": "my model", "input_fields": ["000000", "000001"], "range": [1, 10]}) + +And they can also be generated as the result of a clustering procedure. When +a cluster is created, a model that predicts if a certain instance belongs to +a concrete centroid can be built by providing the cluster and centroid ids: + +.. code-block:: python + + model = api.create_model(cluster, { + "name": "model for centroid 000001", "centroid": "000001"}) + +if no centroid id is provided, the first one appearing in the cluster is used. + +Existing models can also be cloned: + +.. code-block:: python + from bigml.api import BigML + api = BigML() + model = "model/526fc344035d071ea3031d76" + cloned_model = api.clone_model(model) + + +Creating Clusters +~~~~~~~~~~~~~~~~~ + +If your dataset has no fields showing the objective information to +predict for the training data, you can still build a cluster +that will group similar data around +some automatically chosen points (centroids). Again, the only required +argument to create a cluster is the dataset id. You can also +include in the request all the additional arguments accepted by BigML +and documented in the `Clusters section of the Developer's +documentation `_. + +Let's create a cluster from a given dataset: + +.. code-block:: python + + cluster = api.create_cluster(dataset, {"name": "my cluster", + "k": 5}) + +that will create a cluster with 5 centroids. + +Existing clusters can also be cloned: + +.. code-block:: python + from bigml.api import BigML + api = BigML() + cluster = "cluster/526fc344035d071ea3031d76" + cloned_cluster = api.clone_cluster(cluster) + + +Creating Anomaly Detectors +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If your problem is finding the anomalous data in your dataset, you can +build an anomaly detector, that will use iforest to single out the +anomalous records. Again, the only required +argument to create an anomaly detector is the dataset id. You can also +include in the request all the additional arguments accepted by BigML +and documented in the `Anomaly detectors section of the Developer's +documentation `_. + +Let's create an anomaly detector from a given dataset: + +.. code-block:: python + + anomaly = api.create_anomaly(dataset, {"name": "my anomaly"}) + +that will create an anomaly resource with a `top_anomalies` block of the +most anomalous points. + +Existing anomaly detectors can also be cloned: + +.. code-block:: python + from bigml.api import BigML + api = BigML() + anomaly = "anomaly/526fc344035d071ea3031d76" + cloned_anomaly = api.clone_anomaly(anomaly) + + +Creating Associations +~~~~~~~~~~~~~~~~~~~~~ + +To find relations between the field values you can create an association +discovery resource. The only required argument to create an association +is a dataset id. +You can also +include in the request all the additional arguments accepted by BigML +and documented in the `Association section of the Developer's +documentation `_. + +For example, to create an association only including the first two fields and +the first 10 instances in the dataset, you can use the following +invocation: + +.. code-block:: python + + association = api.create_association(dataset, { \ + "name": "my association", "input_fields": ["000000", "000001"], \ + "range": [1, 10]}) + +Again, the association is scheduled for creation, and you can retrieve its +status at any time by means of ``api.status(association)``. + +Associations can also be created from lists of datasets. Just use the +list of ids as the first argument in the api call + +.. code-block:: python + + association = api.create_association([dataset1, dataset2], { \ + "name": "my association", "input_fields": ["000000", "000001"], \ + "range": [1, 10]}) + +Existing associations can also be cloned: + +.. code-block:: python + from bigml.api import BigML + api = BigML() + association = "association/526fc344035d071ea3031d76" + cloned_association = api.clone_association(association) + + +Creating Topic Models +~~~~~~~~~~~~~~~~~~~~~ + +To find which topics do your documents refer to you can create a topic model. +The only required argument to create a topic model +is a dataset id. +You can also +include in the request all the additional arguments accepted by BigML +and documented in the `Topic Model section of the Developer's +documentation `_. + +For example, to create a topic model including exactly 32 topics +you can use the following +invocation: + +.. code-block:: python + + topic_model = api.create_topic_model(dataset, { \ + "name": "my topics", "number_of_topics": 32}) + +Again, the topic model is scheduled for creation, and you can retrieve its +status at any time by means of ``api.status(topic_model)``. + +Topic models can also be created from lists of datasets. Just use the +list of ids as the first argument in the api call + +.. code-block:: python + + topic_model = api.create_topic_model([dataset1, dataset2], { \ + "name": "my topics", "number_of_topics": 32}) + +Existing topic models can also be cloned: + +.. code-block:: python + from bigml.api import BigML + api = BigML() + topic_model = "topicmodel/526fc344035d071ea3031d76" + cloned_topic_model = api.clone_topic_model(topic_model) + +Creating Time Series +~~~~~~~~~~~~~~~~~~~~ + +To forecast the behaviour of any numeric variable that depends on its +historical records you can use a time series. +The only required argument to create a time series +is a dataset id. +You can also +include in the request all the additional arguments accepted by BigML +and documented in the `Time Series section of the Developer's +documentation `_. + +For example, to create a time series including a forecast of 10 points +for the numeric values you can use the following +invocation: + +.. code-block:: python + + time_series = api.create_time_series(dataset, { \ + "name": "my time series", "horizon": 10}) + +Again, the time series is scheduled for creation, and you can retrieve its +status at any time by means of ``api.status(time_series)``. + +Time series also be created from lists of datasets. Just use the +list of ids as the first argument in the api call + +.. code-block:: python + + time_series = api.create_time_series([dataset1, dataset2], { \ + "name": "my time series", "horizon": 10}) + +Existing time series can also be cloned: + +.. code-block:: python + from bigml.api import BigML + api = BigML() + time_series = "timeseries/526fc344035d071ea3031d76" + cloned_time_series = api.clone_time_series(time_series) + + +Creating OptiML +~~~~~~~~~~~~~~~ + +To create an OptiML, the only required argument is a dataset id. +You can also +include in the request all the additional arguments accepted by BigML +and documented in the `OptiML section of the Developer's +documentation `_. + +For example, to create an OptiML which optimizes the accuracy of the model you +can use the following method + +.. code-block:: python + + optiml = api.create_optiml(dataset, { \ + "name": "my optiml", "metric": "accuracy"}) + +The OptiML is then scheduled for creation, and you can retrieve its +status at any time by means of ``api.status(optiml)``. + + +Creating Fusions +~~~~~~~~~~~~~~~~ + +To create a Fusion, the only required argument is a list of models. +You can also +include in the request all the additional arguments accepted by BigML +and documented in the `Fusion section of the Developer's +documentation `_. + +For example, to create a Fusion you can use this connection method: + +.. code-block:: python + + fusion = api.create_fusion(["model/5af06df94e17277501000010", + "model/5af06df84e17277502000019", + "deepnet/5af06df84e17277502000016", + "ensemble/5af06df74e1727750100000d"], + {"name": "my fusion"}) + +The Fusion is then scheduled for creation, and you can retrieve its +status at any time by means of ``api.status(fusion)``. + +Fusions can also be created by assigning some weights to each model in the +list. In this case, the argument for the create call will be a list of +dictionaries that contain the ``id`` and ``weight`` keys: + +.. code-block:: python + + fusion = api.create_fusion([{"id": "model/5af06df94e17277501000010", + "weight": 10}, + {"id": "model/5af06df84e17277502000019", + "weight": 20}, + {"id": "deepnet/5af06df84e17277502000016", + "weight": 5}], + {"name": "my weighted fusion"}) + + +Creating Predictions +~~~~~~~~~~~~~~~~~~~~ + +You can now use the model resource identifier together with some input +parameters to ask for predictions, using the ``create_prediction`` +method. You can also give the prediction a name: + +.. code-block:: python + + prediction = api.create_prediction(model, + {"sepal length": 5, + "sepal width": 2.5}, + {"name": "my prediction"}) + +To see the prediction you can use ``pprint``: + +.. code-block:: python + + api.pprint(prediction) + +Predictions can be created using any supervised model (model, ensemble, +logistic regression, linear regression, deepnet and fusion) as first argument. + +Creating Centroids +~~~~~~~~~~~~~~~~~~ + +To obtain the centroid associated to new input data, you +can now use the ``create_centroid`` method. Give the method a cluster +identifier and the input data to obtain the centroid. +You can also give the centroid predicition a name: + +.. code-block:: python + + centroid = api.create_centroid(cluster, + {"pregnancies": 0, + "plasma glucose": 118, + "blood pressure": 84, + "triceps skin thickness": 47, + "insulin": 230, + "bmi": 45.8, + "diabetes pedigree": 0.551, + "age": 31, + "diabetes": "true"}, + {"name": "my centroid"}) + +Creating Anomaly Scores +~~~~~~~~~~~~~~~~~~~~~~~ + +To obtain the anomaly score associated to new input data, you +can now use the ``create_anomaly_score`` method. Give the method an anomaly +detector identifier and the input data to obtain the score: + +.. code-block:: python + + anomaly_score = api.create_anomaly_score(anomaly, {"src_bytes": 350}, + args={"name": "my score"}) + +Creating Association Sets +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Using the association resource, you can obtain the consequent items associated +by its rules to your input data. These association sets can be obtained calling +the ``create_association_set`` method. The first argument is the association +ID or object and the next one is the input data. + +.. code-block:: python + + association_set = api.create_association_set( \ + association, {"genres": "Action$Adventure"}, \ + args={"name": "my association set"}) + + +Creating Topic Distributions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To obtain the topic distributions associated to new input data, you +can now use the ``create_topic_distribution`` method. Give +the method a topic model identifier and the input data to obtain the score: + +.. code-block:: python + + topic_distribution = api.create_topic_distribution( \ + topic_model, + {"Message": "The bubble exploded in 2007."}, + args={"name": "my topic distribution"}) + + +Creating Forecasts +~~~~~~~~~~~~~~~~~~ + +To obtain the forecast associated to a numeric variable, you +can now use the ``create_forecast`` method. Give +the method a time series identifier and the input data to obtain the forecast: + +.. code-block:: python + + forecast = api.create_forecast( \ + time_series, + {"Final": {"horizon": 10}}) + + +Creating Projections +~~~~~~~~~~~~~~~~~~~~ + +You can now use the PCA resource identifier together with some input +parameters to ask for the corresponding projections, +using the ``create_projection`` +method. You can also give the projection a name: + +.. code-block:: python + + projection = api.create_projection(pca, + {"sepal length": 5, + "sepal width": 2.5}, + {"name": "my projection"}) + + + +Creating Evaluations +~~~~~~~~~~~~~~~~~~~~ + +Once you have created a supervised learning model, +you can measure its perfomance by running a +dataset of test data through it and comparing its predictions to the objective +field real values. Thus, the required arguments to create an evaluation are +model id and a dataset id. You can also +include in the request all the additional arguments accepted by BigML +and documented in the `Evaluations section of the Developer's +documentation `_. + +For instance, to evaluate a previously created model using an existing dataset +you can use the following call: + +.. code-block:: python + + evaluation = api.create_evaluation(model, dataset, { + "name": "my model"}) + +Again, the evaluation is scheduled for creation and ``api.status(evaluation)`` +will show its state. + +Evaluations can also check the ensembles' performance. To evaluate an ensemble +you can do exactly what we just did for the model case, using the ensemble +object instead of the model as first argument: + +.. code-block:: python + + evaluation = api.create_evaluation(ensemble, dataset) + +Evaluations can be created using any supervised model (including time series) +as first argument. + +Creating ensembles +~~~~~~~~~~~~~~~~~~ + +To improve the performance of your predictions, you can create an ensemble +of models and combine their individual predictions. +The only required argument to create an ensemble is the dataset id: + +.. code-block:: python + + ensemble = api.create_ensemble('dataset/5143a51a37203f2cf7000972') + +BigML offers three kinds of ensembles. Two of them are known as ``Decision +Forests`` because they are built as collections of ``Decision trees`` +whose predictions +are aggregated using different combiners (``plurality``, +``confidence weighted``, ``probability weighted``) or setting a ``threshold`` +to issue the ensemble's +prediction. All ``Decision Forests`` use bagging to sample the +data used to build the underlying models. + +As an example of how to create a ``Decision Forest`` +with `20` models, you only need to provide the dataset ID that you want to +build the ensemble from and the number of models: + +.. code-block:: python + + args = {'number_of_models': 20} + ensemble = api.create_ensemble('dataset/5143a51a37203f2cf7000972', args) + +If no ``number_of_models`` is provided, the ensemble will contain 10 models. + +``Random Decision Forests`` fall +also into the ``Decision Forest`` category, +but they only use a subset of the fields chosen +at random at each split. To create this kind of ensemble, just use the +``randomize`` option: + +.. code-block:: python + + args = {'number_of_models': 20, 'randomize': True} + ensemble = api.create_ensemble('dataset/5143a51a37203f2cf7000972', args) + +The third kind of ensemble is ``Boosted Trees``. This type of ensemble uses +quite a different algorithm. The trees used in the ensemble don't have as +objective field the one you want to predict, and they don't aggregate the +underlying models' votes. Instead, the goal is adjusting the coefficients +of a function that will be used to predict. The +models' objective is, therefore, the gradient that minimizes the error +of the predicting function (when comparing its output +with the real values). The process starts with +some initial values and computes these gradients. Next step uses the previous +fields plus the last computed gradient field as +the new initial state for the next iteration. +Finally, it stops when the error is smaller than a certain threshold +or iterations reach a user-defined limit. +In classification problems, every category in the ensemble's objective field +would be associated with a subset of the ``Boosted Trees``. The objective of +each subset of trees +is adjustig the function to the probability of belonging +to this particular category. + +In order to build +an ensemble of ``Boosted Trees`` you need to provide the ``boosting`` +attributes. You can learn about the existing attributes in the `ensembles' +section of the API documentation `_, +but a typical attribute to be set would +be the maximum number of iterations: + +.. code-block:: python + + args = {'boosting': {'iterations': 20}} + ensemble = api.create_ensemble('dataset/5143a51a37203f2cf7000972', args) + +Existing ensembles can also be cloned: + +.. code-block:: python + from bigml.api import BigML + api = BigML() + ensembles = "ensembles/526fc344035d071ea3031d76" + cloned_ensembles = api.clone_ensembles(ensembles) + + +Creating Linear Regressions +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For regression problems, you can choose also linear regressions to model +your data. Linear regressions expect the predicted value for the objective +field to be computable as a linear combination of the predictors. + +As the rest of models, linear regressions can be created from a dataset by +calling the corresponding create method: + +.. code-block:: python + + linear_regression = api.create_linear_regression( \ + 'dataset/5143a51a37203f2cf7000972', + {"name": "my linear regression", + "objective_field": "my_objective_field"}) + +In this example, we created a linear regression named +``my linear regression`` and set the objective field to be +``my_objective_field``. Other arguments, like ``bias``, +can also be specified as attributes in arguments dictionary at +creation time. +Particularly for categorical fields, there are three different available +`field_codings`` options (``contrast``, ``other`` or the ``dummy`` +default coding). For a more detailed description of the +``field_codings`` attribute and its syntax, please see the `Developers API +Documentation +`_. + +Existing linear regressions can also be cloned: + +.. code-block:: python + from bigml.api import BigML + api = BigML() + linear_regression = "linearregression/526fc344035d071ea3031d76" + cloned_linear_regression = api.clone_linear_regression(linear_regression) + + +Creating logistic regressions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For classification problems, you can choose also logistic regressions to model +your data. Logistic regressions compute a probability associated to each class +in the objective field. The probability is obtained using a logistic +function, whose argument is a linear combination of the field values. + +As the rest of models, logistic regressions can be created from a dataset by +calling the corresponding create method: + +.. code-block:: python + + logistic_regression = api.create_logistic_regression( \ + 'dataset/5143a51a37203f2cf7000972', + {"name": "my logistic regression", + "objective_field": "my_objective_field"}) + +In this example, we created a logistic regression named +``my logistic regression`` and set the objective field to be +``my_objective_field``. Other arguments, like ``bias``, ``missing_numerics`` +and ``c`` can also be specified as attributes in arguments dictionary at +creation time. +Particularly for categorical fields, there are four different available +`field_codings`` options (``dummy``, ``contrast``, ``other`` or the ``one-hot`` +default coding). For a more detailed description of the +``field_codings`` attribute and its syntax, please see the `Developers API +Documentation +`_. + +Existing logistic regressions can also be cloned: + +.. code-block:: python + from bigml.api import BigML + api = BigML() + logistic_regression = "logisticregression/526fc344035d071ea3031d76" + cloned_logistic_regression = api.clone_logistic_regression( + logistic_regression) + +Creating Deepnets +~~~~~~~~~~~~~~~~~ + +Deepnets can also solve classification and regression problems. +Deepnets are an optimized version of Deep Neural Networks, +a class of machine-learned models inspired by the neural +circuitry of the human brain. In these classifiers, the input features +are fed to a group of "nodes" called a "layer". +Each node is essentially a function on the input that +transforms the input features into another value or collection of values. +Then the entire layer transforms an input vector into a new "intermediate" +feature vector. This new vector is fed as input to another layer of nodes. +This process continues layer by layer, until we reach the final "output" +layer of nodes, where the output is the network’s prediction: an array +of per-class probabilities for classification problems or a single, +real value for regression problems. + +Deepnets predictions compute a probability associated to each class +in the objective field for classification problems. +As the rest of models, deepnets can be created from a dataset by +calling the corresponding create method: + +.. code-block:: python + + deepnet = api.create_deepnet( \ + 'dataset/5143a51a37203f2cf7000972', + {"name": "my deepnet", + "objective_field": "my_objective_field"}) + +In this example, we created a deepnet named +``my deepnet`` and set the objective field to be +``my_objective_field``. Other arguments, like ``number_of_hidden_layers``, +``learning_rate`` +and ``missing_numerics`` can also be specified as attributes +in an arguments dictionary at +creation time. For a more detailed description of the +available attributes and its syntax, please see the `Developers API +Documentation +`_. + +Existing deepnets can also be cloned: + +.. code-block:: python + from bigml.api import BigML + api = BigML() + deepnet = "deepnet/526fc344035d071ea3031d76" + cloned_deepnet = api.clone_deepnet(deepnet) + + +Creating PCAs +~~~~~~~~~~~~~ + +In order to reduce the number of features used in the modeling steps, +you can use a PCA (Principal Component Analysis) to find out the best +combination of features that describe the variance of your data. +As the rest of models, PCAs can be created from a dataset by +calling the corresponding create method: + +.. code-block:: python + + pca = api.create_pca( \ + 'dataset/5143a51a37203f2cf7000972', + {"name": "my PCA"}) + +In this example, we created a PCA named +``my PCA``. Other arguments, like ``standardized`` +can also be specified as attributes in arguments dictionary at +creation time. +Please see the `Developers API +Documentation +`_. + +Creating Batch Predictions +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We have shown how to create predictions individually, but when the amount +of predictions to make increases, this procedure is far from optimal. In this +case, the more efficient way of predicting remotely is to create a dataset +containing the input data you want your model to predict from and to give its +id and the one of the model to the ``create_batch_prediction`` api call: + +.. code-block:: python + + batch_prediction = api.create_batch_prediction(model, dataset, { + "name": "my batch prediction", "all_fields": True, + "header": True, + "confidence": True}) + +In this example, setting ``all_fields`` to true causes the input +data to be included in the prediction output, ``header`` controls whether a +headers line is included in the file or not and ``confidence`` set to true +causes the confidence of the prediction to be appended. If none of these +arguments is given, the resulting file will contain the name of the +objective field as a header row followed by the predictions. + +As for the rest of resources, the create method will return an incomplete +object, that can be updated by issuing the corresponding +``api.get_batch_prediction`` call until it reaches a ``FINISHED`` status. +Then you can download the created predictions file using: + +.. code-block:: python + + api.download_batch_prediction('batchprediction/526fc344035d071ea3031d70', + filename='my_dir/my_predictions.csv') + +that will copy the output predictions to the local file given in +``filename``. If no ``filename`` is provided, the method returns a file-like +object that can be read as a stream: + +.. code-block:: python + + CHUNK_SIZE = 1024 + response = api.download_batch_prediction( + 'batchprediction/526fc344035d071ea3031d70') + chunk = response.read(CHUNK_SIZE) + if chunk: + print chunk + +The output of a batch prediction can also be transformed to a source object +using the ``source_from_batch_prediction`` method in the api: + +.. code-block:: python + + api.source_from_batch_prediction( + 'batchprediction/526fc344035d071ea3031d70', + args={'name': 'my_batch_prediction_source'}) + +This code will create a new source object, that can be used again as starting +point to generate datasets. + + +Creating Batch Centroids +~~~~~~~~~~~~~~~~~~~~~~~~ + +As described in the previous section, it is also possible to make centroids' +predictions in batch. First you create a dataset +containing the input data you want your cluster to relate to a centroid. +The ``create_batch_centroid`` call will need the id of the input +data dataset and the +cluster used to assign a centroid to each instance: + +.. code-block:: python + + batch_centroid = api.create_batch_centroid(cluster, dataset, { + "name": "my batch centroid", "all_fields": True, + "header": True}) + +Creating Batch Anomaly Scores +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Input data can also be assigned an anomaly score in batch. You train an +anomaly detector with your training data and then build a dataset from your +input data. The ``create_batch_anomaly_score`` call will need the id +of the dataset and of the +anomaly detector to assign an anomaly score to each input data instance: + +.. code-block:: python + + batch_anomaly_score = api.create_batch_anomaly_score(anomaly, dataset, { + "name": "my batch anomaly score", "all_fields": True, + "header": True}) + +Creating Batch Topic Distributions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Input data can also be assigned a topic distribution in batch. You train a +topic model with your training data and then build a dataset from your +input data. The ``create_batch_topic_distribution`` call will need the id +of the dataset and of the +topic model to assign a topic distribution to each input data instance: + +.. code-block:: python + + batch_topic_distribution = api.create_batch_topic_distribution( \ + topic_model, dataset, { + "name": "my batch topic distribution", "all_fields": True, + "header": True}) + +Creating Batch Projections +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Input data can also be assigned a projection in batch. You train a +PCA with your training data and then build a dataset from your +input data. The ``create_batch_projection`` call will need the id +of the input data dataset and of the +PCA to compute the projection that corresponds to each input data instance: + +.. code-block:: python + + batch_projection = api.create_batch_projection( \ + pca, dataset, { + "name": "my batch pca", "all_fields": True, + "header": True}) + +Cloning Resources +~~~~~~~~~~~~~~~~~ + +In the previous sections, you've been able to see that sources, +datasets and models can be cloned using the corresponding +``clone_[resource_type]`` method. + +.. code-block:: python + from bigml.api import BigML + api = BigML() + logistic_regression = "logisticregression/526fc344035d071ea3031d76" + cloned_logistic_regression = api.clone_logistic_regression( + logistic_regression) + +Usually, cloning is applied when someone +shares a resource with us and we need to use it in our account. In that case +the link to the shared resource contains a shared hash, which is at the end +of the URL. That shared ID can be used as input to clone it. + +.. code-block:: python + from bigml.api import BigML + api = BigML() + shared_deepnets = "shared/deepnet/s2KQBFQHMeIrbaTF5uncNsM8HKB" + cloned_deepnet = api.clone_deepnet(shared_deepnet) + +Sharing and cloning can be especially useful to useres that belong to +one ``Organization``. For privacy reasons, the projects created inside the +``Organization`` are not visible from the private user account environment and +vice versa. If those users create a resource in their private account and then +want to share it in a project that belongs to the organization, they can +create the corresponding secret link and use it to clone it in the +organization's project. That will, of course, need the connection to be +pointing to that specific project. + +.. code-block:: python + from bigml.api import BigML + org_project = "project/526fc344035d071ea3031436" + # Creating a connection to the organization's project + api = BigML(project=org_project) + shared_model = "shared/model/s2KQBFQHMeIrbaTF5uncNsM8HKB" + cloned_model = api.clone_model(model) diff --git a/docs/deleting_resources.rst b/docs/deleting_resources.rst new file mode 100644 index 00000000..56136fd9 --- /dev/null +++ b/docs/deleting_resources.rst @@ -0,0 +1,59 @@ +.. toctree:: + :hidden: + +Deleting Resources +================== + +Resources can be deleted individually using the corresponding method for +each type of resource. + +.. code-block:: python + + api.delete_source(source) + api.delete_dataset(dataset) + api.delete_model(model) + api.delete_prediction(prediction) + api.delete_evaluation(evaluation) + api.delete_ensemble(ensemble) + api.delete_batch_prediction(batch_prediction) + api.delete_cluster(cluster) + api.delete_centroid(centroid) + api.delete_batch_centroid(batch_centroid) + api.delete_anomaly(anomaly) + api.delete_anomaly_score(anomaly_score) + api.delete_batch_anomaly_score(batch_anomaly_score) + api.delete_sample(sample) + api.delete_correlation(correlation) + api.delete_statistical_test(statistical_test) + api.delete_logistic_regression(logistic_regression) + api.delete_linear_regression(linear_regression) + api.delete_association(association) + api.delete_association_set(association_set) + api.delete_topic_model(topic_model) + api.delete_topic_distribution(topic_distribution) + api.delete_batch_topic_distribution(batch_topic_distribution) + api.delete_time_series(time_series) + api.delete_forecast(forecast) + api.delete_fusion(fusion) + api.delete_pca(pca) + api.delete_deepnet(deepnet) + api.delete_projection(projection) + api.delete_batch_projection(batch_projection) + api.delete_project(project) + api.delete_script(script) + api.delete_library(library) + api.delete_execution(execution) + api.delete_external_connector(external_connector) + + +Each of the calls above will return a dictionary with the following +keys: + +- **code** If the request is successful, the code will be a + ``bigml.api.HTTP_NO_CONTENT`` (204) status code. Otherwise, it wil be + one of the standard HTTP error codes. See the `documentation on + status codes `_ for more + info. +- **error** If the request does not succeed, it will contain a + dictionary with an error code and a message. It will be ``None`` + otherwise. diff --git a/docs/images/steps.png b/docs/images/steps.png new file mode 100644 index 00000000..1b11ad19 Binary files /dev/null and b/docs/images/steps.png differ diff --git a/docs/index.rst b/docs/index.rst index 7f680e38..b2f20837 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,12 +3,12 @@ BigML Python Bindings `BigML `_ makes machine learning easy by taking care of the details required to add data-driven decisions and predictive -power to your company. Unlike other machine learning services, BigML +power to your applications. Unlike other machine learning services, BigML creates `beautiful predictive models `_ that can be easily understood and interacted with. -These BigML Python bindings allow you to interact with BigML.io, the API +These BigML Python bindings allow you interacting with BigML.io, the API for BigML. You can use it to easily create, retrieve, list, update, and delete BigML resources (i.e., sources, datasets, models and, predictions). @@ -16,52 +16,144 @@ predictions). This module is licensed under the `Apache License, Version 2.0 `_. -Support -------- - -Please report problems and bugs to our `BigML.io issue -tracker `_. - -Discussions about the different bindings take place in the general -`BigML mailing list `_. Or join us -in our `Campfire chatroom `_. +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: Basic Usage + + quick_start + 101_model + 101_ensemble + 101_deepnet + 101_linear_regression + 101_logistic_regression + 101_optiml + 101_fusion + 101_ts + 101_cluster + 101_anomaly + 101_topic_model + 101_association + 101_pca + 101_scripting + 101_images_classification + 101_images_feature_extraction + 101_object_detection + + +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: Resource Management + + ml_resources + creating_resources + reading_resources + updating_resources + deleting_resources + + +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: Client and Server Automation + + local_resources + whizzml_resources Requirements ------------ -Python 2.6 and Python 2.7 are currently supported by these bindings. - -The only mandatory third-party dependency is the -`requests `_ library. This -library is automatically installed during the setup. +Only ``Python 3`` versions are currently supported by these bindings. +Support for Python 2.7.X ended in version ``4.32.3``. + +The basic third-party dependencies are the +`requests `_, +`unidecode `_, +`requests-toolbelt `_, +`bigml-chronos `_, +`msgpack `_, +`numpy `_ and +`scipy `_ libraries. These +libraries are automatically installed during the basic setup. +Support for Google App Engine has been added as of version 3.0.0, +using the `urlfetch` package instead of `requests`. The bindings will also use ``simplejson`` if you happen to have it installed, but that is optional: we fall back to Python's built-in JSON libraries is ``simplejson`` is not found. +`Node.js `_ is not installed by default, but will be +needed for `Local Pipelines `_ to work +when datasets containing new added features are part of the transformation +workflow. + +The bindings provide support to use the ``BigML`` platform to create, update, +get and delete resources, but also to produce local predictions using the +models created in ``BigML``. Most of them will be actionable with the basic +installation, but some additional dependencies are needed to use local +``Topic Models`` and Image Processing models. Please, refer to the +`Installation <#installation>`_ section for details. + +OS Requirements +~~~~~~~~~~~~~~~ + +The basic installation of the bindings is compatible and can be used +on Linux and Windows based Operating Systems. +However, the extra options that allow working with +image processing models (``[images]`` and ``[full]``) are only supported +and tested on Linux-based Operating Systems. +For image models, Windows OS is not recommended and cannot be supported out of +the box, because the specific compiler versions or dlls required are +unavailable in general. + Installation ------------ -To install the latest stable release with -`pip `_:: +To install the basic latest stable release with +`pip `_, please use: + +.. code-block:: bash $ pip install bigml -You can also install the development version of the bindings directly -from the Git repository:: +Support for local Topic Distributions (Topic Models' predictions) +and local predictions for datasets that include Images will only be +available as extras, because the libraries used for that are not +usually available in all Operative Systems. If you need to support those, +please check the `Installation Extras <#installation-extras>`_ section. - $ pip install -e git://github.com/bigmlcom/python.git#egg=bigml_python +Installation Extras +------------------- -Importing the module --------------------- +Local Topic Distributions support can be installed using: -To import the module:: +.. code-block:: bash - import bigml.api + pip install bigml[topics] -Alternatively you can just import the BigML class:: +Images local predictions support can be installed using: - from bigml.api import BigML +.. code-block:: bash + + pip install bigml[images] + +The full set of features can be installed using: + +.. code-block:: bash + + pip install bigml[full] + + +WARNING: Mind that installing these extras can require some extra work, as +explained in the `Requirements <#requirements>`_ section. + +You can also install the development version of the bindings directly +from the Git repository + +.. code-block:: bash + + $ pip install -e git://github.com/bigmlcom/python.git#egg=bigml_python Authentication -------------- @@ -71,79 +163,178 @@ and `API key `_ and are always transmitted over HTTPS. This module will look for your username and API key in the environment -variables ``BIGML_USERNAME`` and ``BIGML_API_KEY`` respectively. You can +variables ``BIGML_USERNAME`` and ``BIGML_API_KEY`` respectively. + +Unix and MacOS +-------------- + +You can add the following lines to your ``.bashrc`` or ``.bash_profile`` to set -those variables automatically when you log in:: +those variables automatically when you log in: + +.. code-block:: bash export BIGML_USERNAME=myusername export BIGML_API_KEY=ae579e7e53fb9abd646a6ff8aa99d4afe83ac291 -With that environment set up, connecting to BigML is a breeze:: +refer to the next chapters to know how to do that in other operating systems. + +With that environment set up, connecting to BigML is a breeze: + +.. code-block:: python from bigml.api import BigML api = BigML() Otherwise, you can initialize directly when instantiating the BigML -class as follows:: +class as follows: + +.. code-block:: python api = BigML('myusername', 'ae579e7e53fb9abd646a6ff8aa99d4afe83ac291') -Also, you can initialize the library to work in the Sandbox -environment by passing the parameter ``dev_mode``:: +These credentials will allow you to manage any resource in your user +environment. - api = BigML(dev_mode=True) +In BigML a user can also work for an ``organization``. +In this case, the organization administrator should previously assign +permissions for the user to access one or several particular projects +in the organization. +Once permissions are granted, the user can work with resources in a project +according to his permission level by creating a special constructor for +each project. The connection constructor in this case +should include the ``project ID``: + +.. code-block:: python + + api = BigML('myusername', 'ae579e7e53fb9abd646a6ff8aa99d4afe83ac291', + project='project/53739b98d994972da7001d4a') + +If the project used in a connection object +does not belong to an existing organization but is one of the +projects under the user's account, all the resources +created or updated with that connection will also be assigned to the +specified project. + +When the resource to be managed is a ``project`` itself, the connection +needs to include the corresponding``organization ID``: + +.. code-block:: python + + api = BigML('myusername', 'ae579e7e53fb9abd646a6ff8aa99d4afe83ac291', + organization='organization/53739b98d994972da7025d4a') + + +Authentication on Windows +------------------------- + +The credentials should be permanently stored in your system using + +.. code-block:: bash + + setx BIGML_USERNAME myusername + setx BIGML_API_KEY ae579e7e53fb9abd646a6ff8aa99d4afe83ac291 + +Note that ``setx`` will not change the environment variables of your actual +console, so you will need to open a new one to start using them. + + +Authentication on Jupyter Notebook +---------------------------------- + +You can set the environment variables using the ``%env`` command in your +cells: -Quick Start ------------ - -Imagine that you want to use `this csv -file `_ containing the `Iris -flower dataset `_ to -predict the species of a flower whose ``sepal length`` is ``5`` and -whose ``sepal width`` is ``2.5``. A preview of the dataset is shown -below. It has 4 numeric fields: ``sepal length``, ``sepal width``, -``petal length``, ``petal width`` and a categorical field: ``species``. -By default, BigML considers the last field in the dataset as the -objective field (i.e., the field that you want to generate predictions -for). - -:: - - sepal length,sepal width,petal length,petal width,species - 5.1,3.5,1.4,0.2,Iris-setosa - 4.9,3.0,1.4,0.2,Iris-setosa - 4.7,3.2,1.3,0.2,Iris-setosa - ... - 5.8,2.7,3.9,1.2,Iris-versicolor - 6.0,2.7,5.1,1.6,Iris-versicolor - 5.4,3.0,4.5,1.5,Iris-versicolor - ... - 6.8,3.0,5.5,2.1,Iris-virginica - 5.7,2.5,5.0,2.0,Iris-virginica - 5.8,2.8,5.1,2.4,Iris-virginica - -You can easily generate a prediction following these steps:: +.. code-block:: bash + %env BIGML_USERNAME=myusername + %env BIGML_API_KEY=ae579e7e53fb9abd646a6ff8aa99d4afe83ac291 + + +Alternative domains +------------------- + + +The main public domain for the API service is ``bigml.io``, but there are some +alternative domains, either for Virtual Private Cloud setups or +the australian subdomain (``au.bigml.io``). You can change the remote +server domain +to the VPC particular one by either setting the ``BIGML_DOMAIN`` environment +variable to your VPC subdomain: + +.. code-block:: bash + + export BIGML_DOMAIN=my_VPC.bigml.io + +or setting it when instantiating your connection: + +.. code-block:: python + + api = BigML(domain="my_VPC.bigml.io") + +The corresponding SSL REST calls will be directed to your private domain +henceforth. + +You can also set up your connection to use a particular PredictServer +only for predictions. In order to do so, you'll need to specify a ``Domain`` +object, where you can set up the general domain name as well as the +particular prediction domain name. + +.. code-block:: python + + from bigml.domain import Domain from bigml.api import BigML - api = BigML() + domain_info = Domain(prediction_domain="my_prediction_server.bigml.com", + prediction_protocol="http") - source = api.create_source('./data/iris.csv') - dataset = api.create_dataset(source) - model = api.create_model(dataset) - prediction = api.create_prediction(model, {'sepal length': 5, 'sepal width': 2.5}) + api = BigML(domain=domain_info) -You can then print the prediction using the ``pprint`` method:: +Finally, you can combine all the options and change both the general domain +server, and the prediction domain server. - >>> api.pprint(prediction) - species for {"sepal width": 2.5, "sepal length": 5} is Iris-virginica +.. code-block:: python -Fields ------- + from bigml.domain import Domain + from bigml.api import BigML + domain_info = Domain(domain="my_VPC.bigml.io", + prediction_domain="my_prediction_server.bigml.com", + prediction_protocol="https") + + api = BigML(domain=domain_info) + +Some arguments for the Domain constructor are more unsual, but they can also +be used to set your special service endpoints: + +- protocol (string) Protocol for the service + (when different from HTTPS) +- verify (boolean) Sets on/off the SSL verification +- prediction_verify (boolean) Sets on/off the SSL verification + for the prediction server (when different from the general + SSL verification) + +**Note** that the previously existing ``dev_mode`` flag: + +.. code-block:: python + + api = BigML(dev_mode=True) + +that caused the connection to work with the Sandbox ``Development Environment`` +has been **deprecated** because this environment does not longer exist. +The existing resources that were previously +created in this environment have been moved +to a special project in the now unique ``Production Environment``, so this +flag is no longer needed to work with them. + + +Fields Structure +---------------- BigML automatically generates idenfiers for each field. To see the fields and the ids and types that have been assigned to a source you can -use ``get_fields``:: +use ``get_fields``: + +.. code-block:: python >>> source = api.get_source(source) >>> api.pprint(api.get_fields(source)) @@ -163,425 +354,371 @@ use ``get_fields``:: u'name': u'species', u'optype': u'categorical'}} -Dataset -------- +When the number of fields becomes very large, it can be useful to exclude or +paginate them. This can be done using a query string expression, for instance: -If you want to get some basic statistics for each field you can retrieve -the ``fields`` from the dataset as follows to get a dictionary keyed by -field id:: +.. code-block:: python - >>> dataset = api.get_dataset(dataset) - >>> api.pprint(api.get_fields(dataset)) - { u'000000': { u'column_number': 0, - u'datatype': u'double', - u'name': u'sepal length', - u'optype': u'numeric', - u'summary': { u'maximum': 7.9, - u'median': 5.77889, - u'minimum': 4.3, - u'missing_count': 0, - u'population': 150, - u'splits': [ 4.51526, - 4.67252, - 4.81113, + >>> source = api.get_source(source, "offset=0;limit=10&order_by=name") - [... snip ... ] +would include in the retrieved dictionary the first 10 fields sorted by name. +There's a limit to the number of fields that will be included by default in +a resource description. If your resource has more than ``1000`` fields, +you can either paginate or force all the fields to be returned by using +``limit=-1`` as query string- +To handle field structures you can use the ``Fields`` class. See the +`Fields`_ section. - u'000004': { u'column_number': 4, - u'datatype': u'string', - u'name': u'species', - u'optype': u'categorical', - u'summary': { u'categories': [ [ u'Iris-versicolor', - 50], - [u'Iris-setosa', 50], - [ u'Iris-virginica', - 50]], - u'missing_count': 0}}} - -Model ------ - -One of the greatest things about BigML is that the models that it -generates for you are fully white-boxed. To get the explicit tree-like -predictive model for the example above:: - - >>> model = api.get_model(model) - >>> api.pprint(model['object']['model']['root']) - {u'children': [ - {u'children': [ - {u'children': [{u'count': 38, - u'distribution': [[u'Iris-virginica', 38]], - u'output': u'Iris-virginica', - u'predicate': {u'field': u'000002', - u'operator': u'>', - u'value': 5.05}}, - u'children': [ - - [ ... ] - - {u'count': 50, - u'distribution': [[u'Iris-setosa', 50]], - u'output': u'Iris-setosa', - u'predicate': {u'field': u'000002', - u'operator': u'<=', - u'value': 2.45}}]}, - {u'count': 150, - u'distribution': [[u'Iris-virginica', 50], - [u'Iris-versicolor', 50], - [u'Iris-setosa', 50]], - u'output': u'Iris-virginica', - u'predicate': True}]}}} - -(Note that we have abbreviated the output in the snippet above for -readability: the full predictive model you'll get is going to contain -much more details). - -Creating Resources +ML Resources +------------ + +You'll find a description of the basic resources available in BigML in +`ML Resources `_ + +WhizzML Resources +----------------- + +You'll learn about the scripting resources available in BigML in +`WhizzML Resources `_. WizzML is our scripting +language that will allow you to create any workflow. + + +Managing Resources ------------------ -Newly-created resources are returned in a dictionary with the following -keys: - -- **code**: If the request is successful you will get a - ``bigml.api.HTTP_CREATED`` (201) status code. Otherwise, it will be - one of the standard HTTP error codes `detailed in the - documentation `_. -- **resource**: The identifier of the new resource. -- **location**: The location of the new resource. -- **object**: The resource itself, as computed by BigML. -- **error**: If an error occurs and the resource cannot be created, it - will contain an additional code and a description of the error. In - this case, **location**, and **resource** will be ``None``. - -Statuses -~~~~~~~~ - -Please, bear in mind that resource creation is almost always -asynchronous (**predictions** are the only exception). Therefore, when -you create a new source, a new dataset or a new model, even if you -receive an immediate response from the BigML servers, the full creation -of the resource can take from a few seconds to a few days, depending on -the size of the resource and BigML's load. A resource is not fully -created until its status is ``bigml.api.FINISHED``. See the -`documentation on status -codes `_ for the listing of -potential states and their semantics. So depending on your application -you might need to import the following constants:: - - from bigml.api import WAITING - from bigml.api import QUEUED - from bigml.api import STARTED - from bigml.api import IN_PROGRESS - from bigml.api import SUMMARIZED - from bigml.api import FINISHED - from bigml.api import FAULTY - from bigml.api import UNKNOWN - from bigml.api import RUNNABLE - -You can query the status of any resource with the ``status`` method:: - - api.status(source) - api.status(dataset) - api.status(model) - api.status(prediction) - -Before invoking the creation of a new resource, the library checks that -the status of the resource that is passed as a parameter is -``FINISHED``. You can change how often the status will be checked with -the ``wait_time`` argument. By default, it is set to 3 seconds. - -Creating sources -~~~~~~~~~~~~~~~~ - -To create a source from a local data file, you can use the -``create_source`` method. The only required parameter is the path to the -data file. You can use a second optional parameter to specify any of the -options for source creation described in the `BigML API -documentation `_. - -Here's a sample invocation:: +You can learn how to create, update, retrieve, list and delete any resource in: - from bigml.api import BigML - api = BigML() +- `Creating Resources `_ +- `Updating Resources `_ +- `Deleting Resources `_ +- `Reading, listing and filtering Resources `_ - source = api.create_source('./data/iris.csv', - {'name': 'my source', 'source_parser': {'missing_tokens': ['?']}}) +Local Resources +--------------- -or you may want to create a source from a file in a remote location:: +You can learn how to download and use in your local environment any of the +models created in the BigML platform in +`Local Resources `_. - source = api.create_source('s3://bigml-public/csv/iris.csv', - {'name': 'my remote source', 'source_parser': {'missing_tokens': ['?']}}) +Fields +------ -As already mentioned, source creation is asynchronous: the initial -resource status code will be either ``WAITING`` or ``QUEUED``. You can -retrieve the updated status at any time using the corresponding get -method. For example, to get the status of our source we would use:: +Once you have a resource, you can use the ``Fields`` class to generate a +representation that will allow you to easily list fields, get fields ids, get a +field id by name, column number, etc. - api.status(source) +.. code-block:: python -Creating datasets -~~~~~~~~~~~~~~~~~ + from bigml.api import BigML + from bigml.fields import Fields + api = BigML() + source = api.get_source("source/5143a51a37203f2cf7000974") -Once you have created a source, you can create a dataset. The only -required argument to create a dataset is a source id. You can add all -the additional arguments accepted by BigML and documented in the -`Datasets section of the Developer's -documentation `_. + fields = Fields(source) -For example, to create a dataset named "my dataset" with the first 1024 -bytes of a source, you can submit the following request:: +you can also instantiate the Fields object from the fields dict itself: - dataset = api.create_dataset(source, {"name": "my dataset", "size": 1024}) +.. code-block:: python -Upon success, the dataset creation job will be queued for execution, and -you can follow its evolution using ``api.status(dataset)``. + from bigml.api import BigML + from bigml.fields import Fields + api = BigML() + source = api.get_source("source/5143a51a37203f2cf7000974") -Creating models -~~~~~~~~~~~~~~~ + fields = Fields(source['object']['fields']) -Once you have created a dataset, you can create a model. The only -required argument to create a model is a dataset id. You can also -include in the request all the additional arguments accepted by BigML -and documented in the `Models section of the Developer's -documentation `_. +The newly instantiated Fields object will give direct methods to retrieve +different fields properties: -For example, to create a model only including the first two fields and -the first 10 instances in the dataset, you can use the following -invocation:: +.. code-block:: python - model = api.create_model(dataset, { - "name": "my model", "input_fields": ["000000", "000001"], "range": [1, 10]}) + # Internal id of the 'sepal length' field + fields.field_id('sepal length') -Again, the model is scheduled for creation, and you can retrieve its -status at any time by means of ``api.status(model)`` . + # Field name of field with column number 0 + fields.field_name(0) -Creating predictions -~~~~~~~~~~~~~~~~~~~~ + # Column number of field name 'petal length' + fields.field_column_number('petal length') -You can now use the model resource identifier together with some input -parameters to ask for predictions, using the ``create_prediction`` -method. You can also give the prediction a name:: + # Statistics of values in field name 'petal length') + fields.stats('petal length') - prediction = api.create_prediction(model, - {"sepal length": 5, - "sepal width": 2.5}, - {"name": "my prediction"}) +Depending on the resource type, Fields information will vary. ``Sources`` will +have only the name, label, description, type of field (``optype``) while +``dataset`` resources will have also the ``preferred`` (whether a field will is +selectable as predictor), ``missing_count``, ``errors`` and a summary of +the values found in each field. This is due to the fact that the ``source`` +object is built by inspecting the contents of a sample of the uploaded file, +while the ``dataset`` resource really reads all the uploaded information. Thus, +dataset's fields structure will always be more complete than source's. -To see the prediction you can use ``pprint``:: +In both cases, you can extract the summarized information available using +the ``summary_csv`` method: - api.pprint(prediction) +.. code-block:: python -Reading Resources ------------------ + from bigml.api import BigML + from bigml.fields import Fields + api = BigML() + dataset = api.get_dataset("dataset/5143a51a37203f2cf7300974") -When retrieved individually, resources are returned as a dictionary -identical to the one you get when you create a new resource. However, -the status code will be ``bigml.api.HTTP_OK`` if the resource can be -retrieved without problems, or one of the HTTP standard error codes -otherwise. + fields = Fields(dataset) + fields.summary_csv("my_fields_summary.csv") -Listing Resources ------------------ +In this example, the information will be stored in the +``my_fields_summary.csv`` file. For the typical ``iris.csv`` data file, the +summary will read: -You can list resources with the appropriate api method:: +.. csv-table:: + :header: "field column","field ID","field name","field label","field description","field type","preferred","missing count","errors","contents summary","errors summary" + :widths: 5, 10, 20, 5, 5, 10, 10, 5, 5, 100, 10 - api.list_sources() - api.list_datasets() - api.list_models() - api.list_predictions() + 0,000000,sepal length,,,numeric,true,0,0,"[4.3, 7.9], mean: 5.84333", + 1,000001,sepal width,,,numeric,false,0,0,"[2, 4.4], mean: 3.05733", + 2,000002,petal length,,,numeric,true,0,0,"[1, 6.9], mean: 3.758", + 3,000003,petal width,,,numeric,true,0,0,"[0.1, 2.5], mean: 1.19933", + 4,000004,species,,,categorical,true,0,0,"3 categorìes: Iris-setosa (50), Iris-versicolor (50), Iris-virginica (50)", -you will receive a dictionary with the following keys: +Another utility in the ``Fields`` object will help you update the updatable +attributes of your source or dataset fields. For instance, if you +need to update the type associated to one field in your dataset, +you can change the ``field type`` +values in the previous file and use it to obtain the fields structure +needed to update your source: -- **code**: If the request is successful you will get a - ``bigml.api.HTTP_OK`` (200) status code. Otherwise, it will be one of - the standard HTTP error codes. See `BigML documentation on status - codes `_ for more info. -- **meta**: A dictionary including the following keys that can help you - paginate listings: +.. code-block:: python - - **previous**: Path to get the previous page or ``None`` if there - is no previous page. - - **next**: Path to get the next page or ``None`` if there is no - next page. - - **offset**: How far off from the first entry in the resources is - the first one listed in the resources key. - - **limit**: Maximum number of resources that you will get listed in - the resources key. - - **total\_count**: The total number of resources in BigML. + from bigml.api import BigML + from bigml.fields import Fields + api = BigML() + source = api.get_source("source/5143a51a37203f2cf7000974") -- **objects**: A list of resources as returned by BigML. -- **error**: If an error occurs and the resource cannot be created, it - will contain an additional code and a description of the error. In - this case, **meta**, and **resources** will be ``None``. + fields = Fields(source) + fields_update_info = fields.new_fields_structure("my_fields_summary.csv") + source = api.update_source(source, \ + fields.filter_fields_update(fields_update_info)) -Filtering Resources -~~~~~~~~~~~~~~~~~~~ +where ``filter_fields_update`` will make sure that only the attributes that +can be updated in a source will be sent in the update request. +For both sources and datasets, the updatable attributes are ``name``, ``label`` +and ``description``. +In ``sources`` you can also update the type of the field (``optype``), and +in ``datasets`` you can update the ``preferred`` attribute. -You can filter resources in listings using the syntax and fields labeled -as *filterable* in the `BigML -documentation `_ for each resource. +In addition to that, you can also easily ``pair`` a list of values with fields +ids what is very +useful to make predictions. -A few examples: +For example, the following snippet may be useful to create local predictions +using a csv file as input: -Ids of the first 5 sources created before April 1st, 2012 -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: python -:: + test_reader = csv.reader(open(dir + test_set)) + local_model = Model(model) + for row in test_reader: + input_data = fields.pair([float(val) for val in row], objective_field) + prediction = local_model.predict(input_data) - [source['resource'] for source in - api.list_sources("limit=5;created__lt=2012-04-1")['objects']] +If you are interfacing with numpy-based libraries, you'll probably want to +generate or read the field values as a numpy array. The ``Fields`` object +offers the ``.from_numpy`` and ``.to_numpy`` methods to that end. In both, +categorial fields will be one-hot encoded automatically by assigning the +indices of the categories as presented in the corresponding field summary. -Name of the first 10 datasets bigger than 1MB -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: python -:: + from bigml.api import BigML + from bigml.fields import Fields + api = BigML() + model = api.get_model("model/5143a51a37203f2cf7000979") + fields = Fields(model) + # creating a numpy array for the following input data + np_inputs = fields.to_numpy({"petal length": 1}) + # creating an input data dictionary from a numpy array + input_data = fields.from_numpy(np_inputs) - [dataset['name'] for dataset in - api.list_datasets("limit=10;size__gt=1048576")['objects']] +The numpy output of ``.to_numpy`` can be used in the +`ShapWrapper `_ object or other +functions that expect numpy arrays as inputs and the ``.from_numpy`` +output can be used in BigML local predictions as input. -Name of models with more than 5 fields (columns) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +If missing values are present, the ``Fields`` object can return a dict +with the ids of the fields that contain missing values and its count. The +following example: -:: +.. code-block:: python - [model['name'] for model in api.list_models("columns__gt=5")['objects']] + from bigml.fields import Fields + from bigml.api import BigML + api = BigML() + dataset = api.get_dataset("dataset/5339d42337203f233e000015") -Ids of predictions whose model has not been deleted -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + fields = Fields(dataset) + fields.missing_counts() -:: +would output: - [prediction['resource'] for prediction in - api.list_predictions("model_status=true")['objects']] +.. code-block:: python -Ordering Resources -~~~~~~~~~~~~~~~~~~ + {'000003': 1, '000000': 1, '000001': 1} -You can order resources in listings using the syntax and fields labeled -as *sortable* in the `BigML -documentation `_ for each resource. +if the there was a missing value in each of the fields whose ids are +``000003``, ``000000``, ``000001``. -A few examples: +You can also obtain the counts of errors per field using the ``errors_count`` +method of the api: -Name of sources ordered by size -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: python -:: + from bigml.api import BigML + api = BigML() + dataset = api.get_dataset("dataset/5339d42337203f233e000015") + api.error_counts(dataset) - [source['name'] for source in api.list_sources("order_by=size")['objects']] +The generated output is like the one in ``missing_counts``, that is, the error +counts per field: -Number of instances in datasets created before April 1st, 2012 ordered by size -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: python -:: + {'000000': 1} - [dataset['rows'] for dataset in - api.list_datasets("created__lt=2012-04-1;order_by=size")['objects']] -Model ids ordered by number of predictions (in descending order). -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Account and tasks +----------------- -:: +In BigML, every account has an associated subscription level. The subscription +level will determine the number of tasks that can be performed in parallel in +the platform and the maximum allowed dataset size. This kind of information is +available through the methods ``.get_account_status`` and ``get_tasks_status`` +in the connection object: - [model['resource'] for model in - api.list_models("order_by=-number_of_predictions")['objects']] +.. code-block:: python -Name of predictions ordered by name. -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + from bigml.api import BigML + api = BigML() + api.get_tasks_status() -:: +The result will be a dictionary that contains the number of tasks in use +and their status, the maximum number of tasks and the number of tasks +available. This information can be used to manage the complexity of sending +new creation tasks to BigML. - [prediction['name'] for prediction in - api.list_predictions("order_by=name")['objects']] +However, we strongly discourage the use of this kind of mechanism, because +it's clearly suboptimal and cumbersome compared to using the scripting +utilities in the platform described in next sections and the ``101`` +documents in the `quick start <#quick-start>`_ section. Scalability, +reproducibility and reusability are the key points in Machine Learning +automation and using WhizzML, BigML's Domain Specific Language for +Machine Learning, provides them out of the box. Client-side approaches +and/or general languages are definitely not the best fit for that. -Updating Resources ------------------- +Environment variables +--------------------- -When you update a resource, it is returned in a dictionary exactly like -the one you get when you create a new one. However the status code will -be ``bigml.api.HTTP_ACCEPTED`` if the resource can be updated without -problems or one of the HTTP standard error codes otherwise. +The bindings will read some configuration values from environment variables. -:: +- ``BIGML_USERNAME``: The name of the user in BigML +- ``BIGML_API_KEY``: The API key for authentication in BigML - api.update_source(source, {"name": "new name"}) - api.update_dataset(dataset, {"name": "new name"}) - api.update_model(model, {"name": "new name"}) - api.update_prediction(prediction, {"name": "new name"}) +For VPCs or on-site API installs, +other than the general public ``bigml.io`` domain: -Deleting Resources ------------------- +- ``BIGML_DOMAIN``: The domain of the BigML API endpoints +- ``BIGML_PROTOCOL``: ``http``/``https`` protocol +- ``BIGML_API_VERSION``: `andromeda`` version name (empty string if using + PredictServer) +- ``BIGML_SSL_VERIFY``: (``0``/``1``) to set SSL verification -Resources can be deleted individually using the corresponding method for -each type of resource. +If you are using a Predict Server (or a different API url only for predictions) -:: +- ``BIGML_PREDICTION_DOMAIN``: The domain of the BigML API prediction endpoint +- ``BIGML_PREDICTION_PROTOCOL``: ``http``/``https`` for prediction domain +- ``BIGML_PREDICTION_SSL_VERIFY``: (``0``/``1``) to set SSL verification for + predictions - api.delete_source(source) - api.delete_dataset(dataset) - api.delete_model(model) - api.delete_prediction(prediction) +For users working in an organization: -Each of the calls above will return a dictionary with the following -keys: +- ``BIGML_ORGANIZATION``: The ID of the organization -- **code** If the request is successful, the code will be a - ``bigml.api.HTTP_NO_CONTENT`` (204) status code. Otherwise, it wil be - one of the standard HTTP error codes. See the `documentation on - status codes `_ for more - info. -- **error** If the request does not succeed, it will contain a - dictionary with an error code and a message. It will be ``None`` - otherwise. +To use external data connectors: + +- ``BIGML_EXTERNAL_CONN_HOST``: Host name or IP for the external database +- ``BIGML_EXTERNAL_CONN_PORT``: Port for the exteranl database +- ``BIGML_EXTERNAL_CONN_DB``: Database name +- ``BIGML_EXTERNAL_CONN_USER``: Database user name +- ``BIGML_EXTERNAL_CONN_PWD``: Database user password +- ``BIGML_EXTERNAL_CONN_SOURCE``: Type of database: ``mysql``, ``postgresql``, + ``elasticsearch``, etc. (see details in the + `API documentation for external connectors `_) Running the Tests ----------------- -To run the tests you will need to install -`lettuce `_:: +The tests will be run using `pytest `_. +You'll need to set up your authentication +via environment variables, as explained +in the authentication section. Also some of the tests need other environment +variables like ``BIGML_ORGANIZATION`` to test calls when used by Organization +members and ``BIGML_EXTERNAL_CONN_HOST``, ``BIGML_EXTERNAL_CONN_PORT``, +``BIGML_EXTERNAL_CONN_DB``, ``BIGML_EXTERNAL_CONN_USER``, +``BIGML_EXTERNAL_CONN_PWD`` and ``BIGML_EXTERNAL_CONN_SOURCE`` +in order to test external data connectors. - $ pip install lettuce +With that in place, you can run the test suite simply by issuing -and set up your authentication via environment variables, as explained -above. With that in place, you can run the test suite simply by:: +.. code-block:: bash - $ cd tests - $ lettuce + $ pytest Additionally, `Tox `_ can be used to automatically run the test suite in virtual environments for all -supported Python versions. To install Tox:: +supported Python versions. To install Tox: + +.. code-block:: bash $ pip install tox -Then run the tests from the top-level project directory:: +Then run the tests from the top-level project directory: - $ tox +.. code-block:: bash -Note that tox checks the exit status from the test command (lettuce) to -determine pass/fail, but the latest version of lettuce (0.2.5) -erroneously exits with a non-zero exit status indicating an error. So, -tox will report failures even if the test suite is passing. This -`should be fixed `_ -in the next release of lettuce. + $ tox Building the Documentation -------------------------- -Install the tools required to build the documentation:: +Install the tools required to build the documentation: + +.. code-block:: bash $ pip install sphinx + $ pip install sphinx-rtd-theme + +To build the HTML version of the documentation: -To build the HTML version of the documentation:: +.. code-block:: bash $ cd docs/ $ make html Then launch ``docs/_build/html/index.html`` in your browser. + +Support +------- + +Please report problems and bugs to our `BigML.io issue +tracker `_. + +Discussions about the different bindings take place in the general +`BigML mailing list `_. + + Additional Information ---------------------- For additional information about the API, see the -`BigML developer's documentation `_. +`BigML developer's documentation `_. diff --git a/docs/local_resources.rst b/docs/local_resources.rst new file mode 100644 index 00000000..8cd90ae9 --- /dev/null +++ b/docs/local_resources.rst @@ -0,0 +1,2970 @@ +.. toctree:: + :hidden: + +Local Resources +=============== + +All the resources in BigML can be downloaded and used afterwards locally, with +no connection whatsoever to BigML's servers. This is specially important +for all Supervised and Unsupervised models, that can be used to generate +predictions in any programmable device. The next sections describe how to +do that for each type of resource, but as a general rule, resources can be +exported to a JSON file in your file system using the ``export`` method. + +.. code-block:: python + + api.export('model/5143a51a37203f2cf7000956', + 'filename': 'my_dir/my_model.json') + +The contents of the generated file can be used just as the remote model +to generate predictions. As you'll see in next section, the local ``Model`` +object can be instantiated by giving the path to this file as first argument: + +.. code-block:: python + + from bigml.model import Model + local_model = Model("my_dir/my_model.json") + local_model.predict({"petal length": 3, "petal width": 1}) + Iris-versicolor + +These bindings define a particular class for each type of Machine Learning +model that is able to interpret the corresponding JSON and create +the local predictions. The classes can be instantiated using: + +- The ID of the resource: In this case, the class looks for the JSON + information of the resource first locally (expecting to find a file + in the local storage directory --``./storage`` by default -- + whose name is the ID of the model after replacing ``/`` by ``_``) + and also remotely if absent. + +.. code-block:: python + + from bigml.model import Model + from bigml.api import BigML + + local_model = Model('model/502fdbff15526876610002615') + +- A dictionary containing the resource information. In this case, the class + checks that this information belongs to a finished resource and + contains the attributes needed to create predictions, like the fields + structure. If any of these attributes is absent, retrieves the ID of the + model and tries to download the correct JSON from the API to store it + locally for further use. + + +.. code-block:: python + + from bigml.anomaly import Anomaly + from bigml.api import BigML + api = BigML() + anomaly = api.get_anomaly('anomaly/502fdbff15526876610002615', + query_string='only_model=true;limit=-1') + + local_anomaly = Anomaly(anomaly) + +- A path to the file that contains the JSON information for the resource. + In this case, the + file is read and the same checks mentioned above are done. If any of these + checks fails, it tries to retrieve the correct JSON from the API to store + it locally for further use. + +.. code-block:: python + + from bigml.logistic import LogisticRegression + local_logistic_regression = LogisticRegression('./my_logistic.json') + +Internally, these classes need a connection object +(``api = BigML(storage="./storage")``) to: + +- Set the local storage in your file system. +- Download the JSON of the resource if the information provided is not the + full finished resource content. + +Users can provide the connection as a second argument when instantiating the +class, but if they do and want the resource to be available locally, the +connection object must be created with an ``storage`` setting: + +.. code-block:: python + + from bigml.cluster import Cluster + from bigml.api import BigML + + local_cluster = Cluster('cluster/502fdbff15526876610002435', + api=BigML(my_username, + my_api_key + storage="my_storage")) + +If no connection is provided, a default connection will be +instantiated internally. This default connection will use ``./storage`` +as default storage directory and the credentials used to connect to +the API when needed are retrieved from the ``BIGML_USERNAME`` and +``BIGML_API_KEY`` environment variables. If no credentials are found in your +environment, any attempt to download the information will raise a condition +asking the user to set these variables. + +If a connection with no ``storage`` information is provided, then the models +will never be stored in your local file system, and will be retrieved from +BigML's API each time the local model is instantiated. + +Ensembles and composite objects, like Fusions, need more than one resource +to be downloaded and stored locally for the class to work. In this case, +the class needs all the component models, +so providing only a local file or a dictionary containing the +JSON for the resource is not enough for the ``Ensemble`` or ``Fusion`` +objects to be fully instantiated. If you only provide that partial information, +the class will use the internal API connection the first time +to download the components. +However, using the ``api.export`` method for ensembles or fusions +will download these component models for you +and will store them in the same directory as the file used to store +the ensemble or fusion information. After that, you can +instantiate the object using the path to the file where the ensemble +or fusion information was stored. The class will look internally for the +rest of components in the same directory and find them, so no connection to +the API will be done. + +If you use a tag to label the resource, you can also ask for the last resource +that has the tag: + +.. code-block:: python + + api.export_last('my_tag', + resource_type='ensemble', + 'filename': 'my_dir/my_ensemble.json') + +and even for a resource inside a project: + +.. code-block:: python + + api.export_last('my_tag', + resource_type='dataset', + project='project/5143a51a37203f2cf7000959', + 'filename': 'my_dir/my_dataset.json') + + +Local Datasets +-------------- + +You can instantiate a local version of a dataset so that you can reproduce +its transformations to generate new fields using Flatline expressions. + +.. code-block:: python + + from bigml.dataset import Dataset + local_dataset = Dataset('dataset/502fdbff15526876610003215') + +This will retrieve the remote dataset information, using an implicitly built +``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for more +details on how to set your credentials) and return a Dataset object +that will be stored in the ``./storage`` directory. If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second parameter: + +.. code-block:: python + + from bigml.dataset import Dataset + from bigml.api import BigML + + local_dataset = Dataset('dataset/502fdbff15526876610003215', + api=BigML(my_username, + my_api_key, + storage="my_storage")) + +or even use the remote dataset information previously retrieved to build the +local dataset object: + +.. code-block:: python + + from bigml.dataset import Dataset + from bigml.api import BigML + api = BigML() + dataset = api.get_dataset('dataset/502fdbff15526876610003215', + query_string='limit=-1') + + local_dataset = Dataset(dataset) + +As you can see, the ``query_string`` used to retrieve the dataset is +``limit=-1``, which avoids the pagination of fields that is used by default and +includes them all at once. These details are already taken care of in the +two previous examples, where the dataset ID is used as argument. + +You can also build a local dataset from a dataset previously retrieved and +stored in a JSON file: + +.. code-block:: python + + from bigml.dataset import Dataset + local_dataset = Dataset('./my_dataset.json') + +Adding new properties to an existing dataset is achieved by +defining some expressions based on the fields +of a previously existing origin dataset. The expressions are written using +the ``Flatline`` language. These transformations are +stored in a ``new_fields`` attribute and the +``Dataset`` object will store them, if available. +That information can be used to reproduce the same transformations +using new inputs. Of course, the fields in the input data to be transformed +are expected to match the fields structure of the dataset that was +used as origin to create the present one. + + +.. code-block:: python + + from bigml.dataset import Dataset + local_dataset = Dataset('./my_dataset.json') + # The dataset in my_dataset.json was created from a dataset whose fields + # were ``foo`` and ``baz``. The transformation that generated the new + # dataset added a new field ``qux`` whose value is ``baz`` divided by 2 + input_data_list = [{"foo": "bar", "baz": 32}] + output_data_list = local_dataset.transform(input_data_list) + # output_data_list: [{"foo": "bar", "baz": 32, "qux": 16}] + +The ``Dataset`` object offers a method to download a sample of the rows +that can be found in the dataset. + + +.. code-block:: python + + from bigml.dataset import Dataset + local_dataset = Dataset('dataset/502fdbff15526876610003215') + rows = local_dataset.get_sample(rows_number=50) + +The result will be a list of lists, which are the row values sorted as +described in the fields structure of the dataset. Of course, +this operation cannot be performed locally. BigML's API will be +called behind the scene to create a ``Sample`` object and retrieve the +corresponding rows. Similarly, you can use the ``get_input_sample`` +method to get a sample of rows of the origin dataset (if available in BigML). + +.. code-block:: python + + from bigml.dataset import Dataset + local_dataset = Dataset('dataset/502fdbff15526876610003215') + rows = local_dataset.get_input_sample(rows_number=50) + # these rows will represent the values available in the dataset + # that was used as origin to create dataset/502fdbff15526876610003215 + + +Local Models +------------ + +You can instantiate a local version of a remote model. + +.. code-block:: python + + from bigml.model import Model + local_model = Model('model/502fdbff15526876610002615') + +This will retrieve the remote model information, using an implicitly built +``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for more +details on how to set your credentials) and return a Model object +that will be stored in the ``./storage`` directory and +you can use to make local predictions. If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second parameter: + +.. code-block:: python + + from bigml.model import Model + from bigml.api import BigML + + local_model = Model('model/502fdbff15526876610002615', + api=BigML(my_username, + my_api_key, + storage="my_storage")) + +or even use the remote model information previously retrieved to build the +local model object: + +.. code-block:: python + + from bigml.model import Model + from bigml.api import BigML + api = BigML() + model = api.get_model('model/502fdbff15526876610002615', + query_string='only_model=true;limit=-1') + + local_model = Model(model) + +As you can see, the ``query_string`` used to retrieve the model has two parts. +They both act on the ``fields`` +information that is added to the JSON response. First +``only_model=true`` is used to restrict the fields described in the +``fields`` structure of the response to those used as +predictors in the model. Also +``limit=-1`` avoids the pagination of fields which is used by default and +includes them all at once. These details are already taken care of in the +two previous examples, where the model ID is used as argument. + +Any of these methods will return a ``Model`` object that you can use to make +local predictions, generate IF-THEN rules, Tableau rules +or a Python function that implements the model. + +You can also build a local model from a model previously retrieved and stored +in a JSON file: + +.. code-block:: python + + from bigml.model import Model + local_model = Model('./my_model.json') + + +Local Predictions +----------------- + +Once you have a local model you can use to generate predictions locally. + +.. code-block:: python + + local_model.predict({"petal length": 3, "petal width": 1}) + Iris-versicolor + +Local predictions have three clear advantages: + +- Removing the dependency from BigML to make new predictions. + +- No cost (i.e., you do not spend BigML credits). + +- Extremely low latency to generate predictions for huge volumes of data. + +The default output for local predictions is the prediction itself, but you can +also add other properties associated to the prediction, like its +confidence or probability, the distribution of values in the predicted node +(for decision tree models), and the number of instances supporting the +prediction. To obtain a +dictionary with the prediction and the available additional +properties use the ``full=True`` argument: + +.. code-block:: python + + local_model.predict({"petal length": 3, "petal width": 1}, full=True) + +that will return: + +.. code-block:: python + + {'count': 47, + 'confidence': 0.92444, + 'probability': 0.9861111111111112, + 'prediction': u'Iris-versicolor', + 'distribution_unit': 'categories', + 'path': [u'petal length > 2.45', + u'petal width <= 1.75', + u'petal length <= 4.95', + u'petal width <= 1.65'], + 'distribution': [[u'Iris-versicolor', 47]]} + +Note that the ``path`` attribute for the ``proportional`` missing strategy +shows the path leading to a final unique node, that gives the prediction, or +to the first split where a missing value is found. Other optional +attributes are +``next`` which contains the field that determines the next split after +the prediction node and ``distribution`` that adds the distribution +that leads to the prediction. For regression models, ``min`` and +``max`` will add the limit values for the data that supports the +prediction. + +When your test data has missing values, you can choose between ``last +prediction`` or ``proportional`` strategy to compute the +prediction. The ``last prediction`` strategy is the one used by +default. To compute a prediction, the algorithm goes down the model's +decision tree and checks the condition it finds at each node (e.g.: +'sepal length' > 2). If the field checked is missing in your input +data you have two options: by default (``last prediction`` strategy) +the algorithm will stop and issue the last prediction it computed in +the previous node. If you chose ``proportional`` strategy instead, the +algorithm will continue to go down the tree considering both branches +from that node on. Thus, it will store a list of possible predictions +from then on, one per valid node. In this case, the final prediction +will be the majority (for categorical models) or the average (for +regressions) of values predicted by the list of predicted values. + +You can set this strategy by using the ``missing_strategy`` +argument with code ``0`` to use ``last prediction`` and ``1`` for +``proportional``. + +.. code-block:: python + + from bigml.model import LAST_PREDICTION, PROPORTIONAL + # LAST_PREDICTION = 0; PROPORTIONAL = 1 + local_model.predict({"petal length": 3, "petal width": 1}, + missing_strategy=PROPORTIONAL) + +For classification models, it is sometimes useful to obtain a +probability or confidence prediction for each possible class of the +objective field. To do this, you can use the ``predict_probability`` +and ``predict_confidence`` methods respectively. The former gives a +prediction based on the distribution of instances at the appropriate +leaf node, with a Laplace correction based on the root node +distribution. The latter returns a lower confidence bound on the leaf +node probability based on the Wilson score interval. + +Each of these methods take the ``missing_strategy`` +argument that functions as it does in ``predict``, and one additional +argument, ``compact``. If ``compact`` is ``False`` (the default), the +output of these functions is a list of maps, each with the keys +``prediction`` and ``probability`` (or ``confidence``) mapped to the +class name and its associated probability (or confidence). Note that these +methods substitute the deprecated ``multiple`` parameter in the ``predict`` +method functionallity. + +So, for example, the following: + +.. code-block:: python + + local_model.predict_probability({"petal length": 3}) + +would result in + +.. code-block:: python + + [{'prediction': u'Iris-setosa', + 'probability': 0.0033003300330033}, + {'prediction': u'Iris-versicolor', + 'probability': 0.4983498349834984}, + {'prediction': u'Iris-virginica', + 'probability': 0.4983498349834984}] + +If ``compact`` is ``True``, only the probabilities themselves are +returned, as a list in class name order. Note that, for reference, +the attribute ``Model.class_names`` contains the class names in the +appropriate ordering. + +To illustrate, the following: + +.. code-block:: python + + local_model.predict_probability({"petal length": 3}, compact=True) + +would result in + +.. code-block:: python + + [0.0033003300330033, 0.4983498349834984, 0.4983498349834984] + +The output of ``predict_confidence`` is the same, except that the +output maps are keyed with ``confidence`` instead of ``probability``. + + +For classifications, the prediction of a local model will be one of the +available categories in the objective field and an associated ``confidence`` +or ``probability`` that is used to decide which is the predicted category. +If you prefer the model predictions to be operated using any of them, you can +use the ``operating_kind`` argument in the ``predict`` method. +Here's the example +to use predictions based on ``confidence``: + +.. code-block:: python + + local_model.predict({"petal length": 3, "petal width": 1}, + {"operating_kind": "confidence"}) + +Previous versions of the bindings had additional arguments in the ``predict`` +method that were used to format the prediction attributes. The signature of +the method has been changed to accept only arguments that affect the +prediction itself, (like ``missing_strategy``, ``operating_kind`` and +``opreating_point``) and ``full`` which is a boolean that controls whether +the output is the prediction itself or a dictionary will all the available +properties associated to the prediction. Formatting can be achieved by using +the ``cast_prediction`` function: + +.. code-block:: python + + def cast_prediction(full_prediction, to=None, + confidence=False, probability=False, + path=False, distribution=False, + count=False, next=False, d_min=False, + d_max=False, median=False, + unused_fields=False): + +whose first argument is the prediction obtained with the ``full=True`` +argument, the second one defines the type of output (``None`` to obtain +the prediction output only, "list" or "dict") and the rest of booleans +cause the corresponding property to be included or not. + +Operating point's predictions +----------------------------- + +In classification problems, +Models, Ensembles and Logistic Regressions can be used at different +operating points, that is, associated to particular thresholds. Each +operating point is then defined by the kind of property you use as threshold, +its value and a the class that is supposed to be predicted if the threshold +is reached. + +Let's assume you decide that you have a binary problem, with classes ``True`` +and ``False`` as possible outcomes. Imagine you want to be very sure to +predict the `True` outcome, so you don't want to predict that unless the +probability associated to it is over ``0.8``. You can achieve this with any +classification model by creating an operating point: + +.. code-block:: python + + operating_point = {"kind": "probability", + "positive_class": "True", + "threshold": 0.8}; + +to predict using this restriction, you can use the ``operating_point`` +parameter: + +.. code-block:: python + + prediction = local_model.predict(input_data, + operating_point=operating_point) + +where ``inputData`` should contain the values for which you want to predict. +Local models allow two kinds of operating points: ``probability`` and +``confidence``. For both of them, the threshold can be set to any number +in the ``[0, 1]`` range. + + +Local feature generation for predictions +---------------------------------------- + +All kind of local models (ensembles, clusters, etc.) offer a prediction-like +method that receives the input data to be used as test data and produces the +prediction output (prediction, centroid, etc.). However, one of BigML's +capabilities is automatic feature extraction from date-time +or image fields. Also, the Flatline language allows the user to create +new features to from the raw data to be used in modelling. Thus, your model +might use features that have been derived from the original raw data and should +be replicated at prediction time. + +``Local pipelines`` are objects that will store all the +feature extraction and transformations used to produce the dataset that was +used for training (see `Local Pipelines <#local-pipelines>`_). +These objects provide a ``.transform`` method that can be +applied to the raw input data to reproduce the same transformations that +were used to define the training data used by the model from the raw training +data. Every local model class offers a ``.data_transformations`` method that +generates a ``BMLPipeline`` object, storing these transformations. +The user can apply them before calling the corresponding prediction method. + +.. code-block:: python + + from bigml.model import Model + local_model = Model('model/502fdbff15526876610002435') + local_pipeline = local_model.data_transformations() + # the pipeline transform method is applied to lists of dictionaries + # (one row per dictionary). + # For a single prediction, a list of one input is sent to be + # transformed and the result will be a list, whose + # first element is used as transformed input data + input_data = local_pipeline.transform( + [{"petal length": 4.4, "sepal width": 3.2}])[0] + prediction = local_model.predict(input_data) + + +Local Clusters +-------------- + +You can also instantiate a local version of a remote cluster. + +.. code-block:: python + + from bigml.cluster import Cluster + local_cluster = Cluster('cluster/502fdbff15526876610002435') + +This will retrieve the remote cluster information, using an implicitly built +``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for more +details on how to set your credentials) and return a ``Cluster`` object +that will be stored in the ``./storage`` directory and +you can use to make local centroid predictions. If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second +parameter: + +.. code-block:: python + + from bigml.cluster import Cluster + from bigml.api import BigML + + local_cluster = Cluster('cluster/502fdbff15526876610002435', + api=BigML(my_username, + my_api_key + storage="my_storage")) + +or even use the remote cluster information previously retrieved to build the +local cluster object: + +.. code-block:: python + + from bigml.cluster import Cluster + from bigml.api import BigML + api = BigML() + cluster = api.get_cluster('cluster/502fdbff15526876610002435', + query_string='limit=-1') + + local_cluster = Cluster(cluster) + +Note that in this example we used a ``limit=-1`` query string for the cluster +retrieval. This ensures that all fields are retrieved by the get method in the +same call (unlike in the standard calls where the number of fields returned is +limited). + +Local clusters provide also methods for the significant operations that +can be done using clusters: finding the centroid assigned to a certain data +point, sorting centroids according to their distance to a data point, +summarizing +the centroids intra-distances and inter-distances and also finding the +closest points to a given one. The `Local Centroids <#local-centroids>`_ +and the +`Summary generation <#summary-generation>`_ sections will +explain these methods. + +Local Centroids +--------------- + +Using the local cluster object, you can predict the centroid associated to +an input data set: + +.. code-block:: python + + local_cluster.centroid({"pregnancies": 0, "plasma glucose": 118, + "blood pressure": 84, "triceps skin thickness": 47, + "insulin": 230, "bmi": 45.8, + "diabetes pedigree": 0.551, "age": 31, + "diabetes": "true"}) + {'distance': 0.454110207355, 'centroid_name': 'Cluster 4', + 'centroid_id': '000004'} + + +You must keep in mind, though, that to obtain a centroid prediction, input data +must have values for all the numeric fields. No missing values for the numeric +fields are allowed unless you provided a ``default_numeric_value`` in the +cluster construction configuration. If so, this value will be used to fill +the missing numeric fields. + +As in the local model predictions, producing local centroids can be done +independently of BigML servers, so no cost or connection latencies are +involved. + +Another interesting method in the cluster object is +``local_cluster.closests_in_cluster``, which given a reference data point +will provide the rest of points that fall into the same cluster sorted +in an ascending order according to their distance to this point. You can limit +the maximum number of points returned by setting the ``number_of_points`` +argument to any positive integer. + +.. code-block:: python + + local_cluster.closests_in_cluster( \ + {"pregnancies": 0, "plasma glucose": 118, + "blood pressure": 84, "triceps skin thickness": 47, + "insulin": 230, "bmi": 45.8, + "diabetes pedigree": 0.551, "age": 31, + "diabetes": "true"}, number_of_points=2) + +The response will be a dictionary with the centroid id of the cluster an +the list of closest points and their distances to the reference point. + +.. code-block:: python + + {'closest': [ \ + {'distance': 0.06912270988567025, + 'data': {'plasma glucose': '115', 'blood pressure': '70', + 'triceps skin thickness': '30', 'pregnancies': '1', + 'bmi': '34.6', 'diabetes pedigree': '0.529', + 'insulin': '96', 'age': '32', 'diabetes': 'true'}}, + {'distance': 0.10396456577958413, + 'data': {'plasma glucose': '167', 'blood pressure': '74', + 'triceps skin thickness': '17', 'pregnancies': '1', 'bmi': '23.4', + 'diabetes pedigree': '0.447', 'insulin': '144', 'age': '33', + 'diabetes': 'true'}}], + 'reference': {'age': 31, 'bmi': 45.8, 'plasma glucose': 118, + 'insulin': 230, 'blood pressure': 84, + 'pregnancies': 0, 'triceps skin thickness': 47, + 'diabetes pedigree': 0.551, 'diabetes': 'true'}, + 'centroid_id': u'000000'} + +No missing numeric values are allowed either in the reference data point. +If you want the data points to belong to a different cluster, you can +provide the ``centroid_id`` for the cluster as an additional argument. + +Other utility methods are ``local_cluster.sorted_centroids`` which given +a reference data point will provide the list of centroids sorted according +to the distance to it + +.. code-block:: python + + local_cluster.sorted_centroids( \ + {'plasma glucose': '115', 'blood pressure': '70', + 'triceps skin thickness': '30', 'pregnancies': '1', + 'bmi': '34.6', 'diabetes pedigree': '0.529', + 'insulin': '96', 'age': '32', 'diabetes': 'true'}) + {'centroids': [{'distance': 0.31656890408929705, + 'data': {u'000006': 0.34571, u'000007': 30.7619, + u'000000': 3.79592, u'000008': u'false'}, + 'centroid_id': u'000000'}, + {'distance': 0.4424198506958207, + 'data': {u'000006': 0.77087, u'000007': 45.50943, + u'000000': 5.90566, u'000008': u'true'}, + 'centroid_id': u'000001'}], + 'reference': {'age': '32', 'bmi': '34.6', 'plasma glucose': '115', + 'insulin': '96', 'blood pressure': '70', + 'pregnancies': '1', 'triceps skin thickness': '30', + 'diabetes pedigree': '0.529', 'diabetes': 'true'}} + + + +or ``points_in_cluster`` that returns the list of +data points assigned to a certain cluster, given its ``centroid_id``. + +.. code-block:: python + + centroid_id = "000000" + local_cluster.points_in_cluster(centroid_id) + + +Local Anomaly Detector +---------------------- + +You can also instantiate a local version of a remote anomaly. + +.. code-block:: python + + from bigml.anomaly import Anomaly + local_anomaly = Anomaly('anomaly/502fcbff15526876610002435') + +This will retrieve the remote anomaly detector information, using an implicitly +built ``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for +more details on how to set your credentials) and return an ``Anomaly`` object +that will be stored in the ``./storage`` directory and +you can use to make local anomaly scores. If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second +parameter: + +.. code-block:: python + + from bigml.anomaly import Anomaly + from bigml.api import BigML + + local_anomaly = Anomaly('anomaly/502fcbff15526876610002435', + api=BigML(my_username, + my_api_key, + storage="my_storage_dir")) + +or even use the remote anomaly information retrieved previously to build the +local anomaly detector object: + +.. code-block:: python + + from bigml.anomaly import Anomaly + from bigml.api import BigML + api = BigML() + anomaly = api.get_anomaly('anomaly/502fcbff15526876610002435', + query_string='limit=-1') + + local_anomaly = Anomaly(anomaly) + +Note that in this example we used a ``limit=-1`` query string for the anomaly +retrieval. This ensures that all fields are retrieved by the get method in the +same call (unlike in the standard calls where the number of fields returned is +limited). + +The anomaly detector object has also the method ``anomalies_filter`` +that will build the LISP filter you would need to filter the original +dataset and create a new one excluding +the top anomalies. Setting the ``include`` parameter to True you can do the +inverse and create a dataset with only the most anomalous data points. + + +Local Anomaly Scores +-------------------- + +Using the local anomaly detector object, you can predict the anomaly score +associated to an input data set: + +.. code-block:: python + + local_anomaly.anomaly_score({"src_bytes": 350}) + 0.9268527808726705 + + +As in the local model predictions, producing local anomaly scores can be done +independently of BigML servers, so no cost or connection latencies are +involved. + +Local Anomaly caching +--------------------- + +Anomalies can become quite large objects. That's why their use of memory +resources can be heavy. If your usual scenario is using many of them +constantly in a disordered way, the best strategy is setting up a cache +system to store them. The local anomaly class provides helpers to +interact with that cache. Here's an example using ``Redis``. + +.. code-block:: python + + from anomaly import Anomaly + import redis + r = redis.Redis() + # First build as you would any core Anomaly object: + anomaly = Anomaly('anomaly/5126965515526876630001b2') + # Store a serialized version in Redis + anomaly.dump(cache_set=r.set) + # (retrieve the external rep from its convenient place) + # Speedy Build from external rep + anomaly = Anomaly('anomaly/5126965515526876630001b2', cache_get=r.get) + # Get scores same as always: + anomaly.anomaly_score({"src_bytes": 350}) + + +Local Logistic Regression +------------------------- + +You can also instantiate a local version of a remote logistic regression. + +.. code-block:: python + + from bigml.logistic import LogisticRegression + local_log_regression = LogisticRegression( + 'logisticregression/502fdbff15526876610042435') + +This will retrieve the remote logistic regression information, +using an implicitly built +``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for more +details on how to set your credentials) and return a ``LogisticRegression`` +object that will be stored in the ``./storage`` directory and +you can use to make local predictions. If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second +parameter: + +.. code-block:: python + + from bigml.logistic import LogisticRegression + from bigml.api import BigML + + local_log_regression = LogisticRegression( + 'logisticregression/502fdbff15526876610602435', + api=BigML(my_username, my_api_key, storage="my_storage")) + +You can also reuse a remote logistic regression JSON structure +as previously retrieved to build the +local logistic regression object: + +.. code-block:: python + + from bigml.logistic import LogisticRegression + from bigml.api import BigML + api = BigML() + logistic_regression = api.get_logistic_regression( + 'logisticregression/502fdbff15526876610002435', + query_string='limit=-1') + + local_log_regression = LogisticRegression(logistic_regression) + +Note that in this example we used a ``limit=-1`` query string for the +logistic regression retrieval. This ensures that all fields are +retrieved by the get method in the same call (unlike in the standard +calls where the number of fields returned is limited). + +Local Logistic Regression Predictions +------------------------------------- + +Using the local logistic regression object, you can predict the prediction for +an input data set: + +.. code-block:: python + + local_log_regression.predict({"petal length": 2, "sepal length": 1.5, + "petal width": 0.5, "sepal width": 0.7}, + full=True) + {'distribution': [ + {'category': u'Iris-virginica', 'probability': 0.5041444478857267}, + {'category': u'Iris-versicolor', 'probability': 0.46926542042788333}, + {'category': u'Iris-setosa', 'probability': 0.02659013168639014}], + 'prediction': u'Iris-virginica', 'probability': 0.5041444478857267} + +As you can see, the prediction contains the predicted category and the +associated probability. It also shows the distribution of probabilities for +all the possible categories in the objective field. If you only need the +predicted value, you can remove the ``full`` argument. + +You must keep in mind, though, that to obtain a logistic regression +prediction, input data +must have values for all the numeric fields. No missing values for the numeric +fields are allowed. + +For consistency of interface with the ``Model`` class, logistic +regressions again have a ``predict_probability`` method, which takes +the same argument as ``Model.predict``: +``compact``. As stated above, missing values are not allowed, and so +there is no ``missing_strategy`` argument. + +As with local Models, if ``compact`` is ``False`` (the default), the +output is a list of maps, each with the keys ``prediction`` and +``probability`` mapped to the class name and its associated +probability. + +So, for example + +.. code-block:: python + + local_log_regression.predict_probability({"petal length": 2, "sepal length": 1.5, + "petal width": 0.5, "sepal width": 0.7}) + + [{'category': u'Iris-setosa', 'probability': 0.02659013168639014}, + {'category': u'Iris-versicolor', 'probability': 0.46926542042788333}, + {'category': u'Iris-virginica', 'probability': 0.5041444478857267}] + +If ``compact`` is ``True``, only the probabilities themselves are +returned, as a list in class name order, again, as is the case with +local Models. + +Operating point predictions are also available for local logistic regressions +and an example of it would be: + +.. code-block:: python + + operating_point = {"kind": "probability", + "positive_class": "True", + "threshold": 0.8} + local_logistic.predict(inputData, operating_point=operating_point) + +You can check the +`Operating point's predictions <#operating-point's-predictions>`_ section +to learn about +operating points. For logistic regressions, the only available kind is +``probability``, that sets the threshold of probability to be reached for the +prediction to be the positive class. + +Local Logistic Regression +------------------------- + +You can also instantiate a local version of a remote logistic regression: + +.. code-block:: python + + from bigml.logistic import LogisticRegression + local_log_regression = LogisticRegression( + 'logisticregression/502fdbff15526876610042435') + +This will retrieve the remote logistic regression information, +using an implicitly built +``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for more +details on how to set your credentials) and return a ``LogisticRegression`` +object that will be stored in the ``./storage`` directory and +you can use to make local predictions. If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second +parameter: + +.. code-block:: python + + from bigml.logistic import LogisticRegression + from bigml.api import BigML + + local_log_regression = LogisticRegression( + 'logisticregression/502fdbff15526876610602435', + api=BigML(my_username, my_api_key, storage="my_storage")) + +You can also reuse a remote logistic regression JSON structure +as previously retrieved to build the +local logistic regression object: + +.. code-block:: python + + from bigml.logistic import LogisticRegression + from bigml.api import BigML + api = BigML() + logistic_regression = api.get_logistic_regression( + 'logisticregression/502fdbff15526876610002435', + query_string='limit=-1') + + local_log_regression = LogisticRegression(logistic_regression) + +Note that in this example we used a ``limit=-1`` query string for the +logistic regression retrieval. This ensures that all fields are +retrieved by the get method in the same call (unlike in the standard +calls where the number of fields returned is limited). + +Local Linear Regression Predictions +----------------------------------- + +Using the local ``LinearRegression`` class, you can predict the prediction for +an input data set: + +.. code-block:: python + + local_linear_regression.predict({"petal length": 2, "sepal length": 1.5, + "species": "Iris-setosa", + "sepal width": 0.7}, + full=True) + {'confidence_bounds': { + 'prediction_interval': 0.43783924497784293, + 'confidence_interval': 0.2561542783257394}, + 'prediction': -0.6109005499999999, 'unused_fields': ['petal length']} + + +To obtain a linear regression prediction, input data can only have missing +values for fields that had already some missings in training data. + +The ``full=True`` in the predict method will cause the prediction to include +``confidence bounds`` when available. Some logistic regressions will not +contain such information by construction. Also, in order to compute these +bounds locally, you will need ``numpy`` and ``scipy`` in place. +As they are quite heavy libraries, they aren't automatically installed as +dependencies of these bindings. + +Local Deepnet +------------- + +You can also instantiate a local version of a remote Deepnet. + +.. code-block:: python + + from bigml.deepnet import Deepnet + local_deepnet = Deepnet( + 'deepnet/502fdbff15526876610022435') + +This will retrieve the remote deepnet information, +using an implicitly built +``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for more +details on how to set your credentials) and return a ``Deepnet`` +object that will be stored in the ``./storage`` directory and +you can use to make local predictions. If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second +parameter: + +.. code-block:: python + + from bigml.deepnet import Deepnet + from bigml.api import BigML + + local_deepnet = Deepnet( + 'deepnet/502fdbff15526876610602435', + api=BigML(my_username, my_api_key, storage="my_storage")) + +You can also reuse a remote Deepnet JSON structure +as previously retrieved to build the +local Deepnet object: + +.. code-block:: python + + from bigml.deepnet import Deepnet + from bigml.api import BigML + api = BigML() + deepnet = api.get_deepnet( + 'deepnet/502fdbff15526876610002435', + query_string='limit=-1') + + local_deepnet = Deepnet(deepnet) + +Note that in this example we used a ``limit=-1`` query string for the +deepnet retrieval. This ensures that all fields are +retrieved by the get method in the same call (unlike in the standard +calls where the number of fields returned is limited). + +Local Deepnet Predictions +------------------------- + +Using the local deepnet object, you can predict the prediction for +an input data set: + +.. code-block:: python + + local_deepnet.predict({"petal length": 2, "sepal length": 1.5, + "petal width": 0.5, "sepal width": 0.7}, + full=True) + {'distribution': [ + {'category': u'Iris-virginica', 'probability': 0.5041444478857267}, + {'category': u'Iris-versicolor', 'probability': 0.46926542042788333}, + {'category': u'Iris-setosa', 'probability': 0.02659013168639014}], + 'prediction': u'Iris-virginica', 'probability': 0.5041444478857267} + +As you can see, the full prediction contains the predicted category and the +associated probability. It also shows the distribution of probabilities for +all the possible categories in the objective field. If you only need the +predicted value, you can remove the ``full`` argument. + +To be consistent with the ``Model`` class interface, deepnets +have also a ``predict_probability`` method, which takes +the same argument as ``Model.predict``: +``compact``. + +As with local Models, if ``compact`` is ``False`` (the default), the +output is a list of maps, each with the keys ``prediction`` and +``probability`` mapped to the class name and its associated +probability. + +So, for example + +.. code-block:: python + + local_deepnet.predict_probability({"petal length": 2, "sepal length": 1.5, + "petal width": 0.5, "sepal width": 0.7}) + + [{'category': u'Iris-setosa', 'probability': 0.02659013168639014}, + {'category': u'Iris-versicolor', 'probability': 0.46926542042788333}, + {'category': u'Iris-virginica', 'probability': 0.5041444478857267}] + +If ``compact`` is ``True``, only the probabilities themselves are +returned, as a list in class name order, again, as is the case with +local Models. + +Operating point predictions are also available for local deepnets and an +example of it would be: + +.. code-block:: python + + operating_point = {"kind": "probability", + "positive_class": "True", + "threshold": 0.8}; + prediction = local_deepnet.predict(input_data, + operating_point=operating_point) + + +Local Deepnets for images supervised learning and object detection +------------------------------------------------------------------ + +Deepnets include Convolutional Neural Networks, so they can +be used to do classification, regression and object detection based on +images. For image classification and regression, the local Deepnets will just +need some image as input data when doing predictions. The image file should +be provided in input data as the contents to the corresponding image field. + +.. code-block:: python + + input_data = {"000002": "my_image.jpg"} + prediction = local_deepnet.predict(input_data) + +For object detection, as predictions are only based on one image, the input +to be provided is the plain image file itself. + +.. code-block:: python + + prediction = local_deepnet.predict("my_image.jpg") + +Also, object detection Deepnets allow some parameters to be set +at creation time. They slightly modify the operation of the ``Deepnet``, so +they are provided as ``operation_settings``. + +.. code-block:: python + + from bigml.deepnet import Deepnet + local_deepnet = Deepnet("deepnet/62a85964128d1c55610003cd", + operation_settings={"region_score_threshold": 0.6}) + prediction = local_deepnet.predict("my_image.jpg") + +The operation settings allowed are ``region_score_threshold``, that will set +the minimum accepted score in the predictions and ``max_objects`` which will +limit the number of regions returned. +The prediction will contain a list of dictionaries that contain the +label, score and box description of the found regions. Each box object is +an array that contains the ``xmin``, ``ymin``, ``xmax`` and ``ymax`` +coordinates: + +.. code-block:: python + + {'prediction': [{'box': [0.67742, 0.30469, 0.79472, 0.37109], + 'label': 'eye', + 'score': 0.83528}, + {'box': [0.3783, 0.27734, 0.50147, 0.35938], + 'label': 'eye', + 'score': 0.79117}, + {'box': [0.67742, 0.77344, 0.739, 0.81445], + 'label': 'eye', + 'score': 0.45094}]} + +**Note**: Local predictions for deepnets built on images datasets can differ +slightly from the predictions obtained by using BigML's API create prediction +call. When uploaded to BigML, images are standardized to a particular +resolution and compressed using the JPEG algorithm while local predictions +maintain the original image information. That can cause minor variations in +regression predictions or the probability associated to classification +predictions. Also object detection predictions can differ slightly, specially +if low region_threshold_scores are used. + +If anything, the local value will always be slightly more accurate, but if you +need to find results as close as possible to the ones produced in remote +predictions, you can use the ``remote_preprocess`` function in the ``deepnet`` +module. + +.. code-block:: python + from bigml.deepnet import Deepnet, remote_preprocess + + ld = Deepnet("deepnet/62a85964128d1c55610003cd") + ld.predict(remote_preprocess("./data/images/cats/pexels-pixabay-33358.jpg")) + + +Local Fusion +------------ + +You can also instantiate a local version of a remote Fusion. + +.. code-block:: python + + from bigml.fusion import Fusion + local_fusion = Fusion( + 'fusion/502fdbff15526876610022438') + +This will retrieve the remote fusion information, +using an implicitly built +``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for more +details on how to set your credentials) and return a ``Fusion`` +object that will be stored in the ``./storage`` directory and +you can use to make local predictions. If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second +parameter: + +.. code-block:: python + + from bigml.fusion import Fusion + from bigml.api import BigML + + local_fusion = Fusion( + 'fusion/502fdbff15526876610602435', + api=BigML(my_username, my_api_key, storage="my_storage")) + +You can also reuse a remote Fusion JSON structure +as previously retrieved to build the +local Fusion object: + +.. code-block:: python + + from bigml.fusion import Fusion + from bigml.api import BigML + api = BigML() + fusion = api.get_fusion( + 'fusion/502fdbff15526876610002435', + query_string='limit=-1') + + local_fusion = Fusion(fusion) + +Note that in this example we used a ``limit=-1`` query string for the +fusion retrieval. This ensures that all fields are +retrieved by the get method in the same call (unlike in the standard +calls where the number of fields returned is limited). + +Local Fusion Predictions +------------------------- + +Using the local fusion object, you can predict the prediction for +an input data set: + +.. code-block:: python + + local_fusion.predict({"petal length": 2, "sepal length": 1.5, + "petal width": 0.5, "sepal width": 0.7}, + full=True) + {'prediction': u'Iris-setosa', 'probability': 0.45224} + + +As you can see, the full prediction contains the predicted category and the +associated probability. If you only need the +predicted value, you can remove the ``full`` argument. + +To be consistent with the ``Model`` class interface, fusions +have also a ``predict_probability`` method, which takes +the same argument as ``Model.predict``: +``compact``. + +As with local Models, if ``compact`` is ``False`` (the default), the +output is a list of maps, each with the keys ``prediction`` and +``probability`` mapped to the class name and its associated +probability. + +So, for example + +.. code-block:: python + + local_fusion.predict_probability({"petal length": 2, "sepal length": 1.5, + "petal width": 0.5, "sepal width": 0.7}) + + [{'category': u'Iris-setosa', 'probability': 0.45224}, + {'category': u'Iris-versicolor', 'probability': 0.2854}, + {'category': u'Iris-virginica', 'probability': 0.26236}] + + +If ``compact`` is ``True``, only the probabilities themselves are +returned, as a list in class name order, again, as is the case with +local Models. + +Operating point predictions are also available with probability as threshold +for local fusions and an +example of it would be: + +.. code-block:: python + + operating_point = {"kind": "probability", + "positive_class": "True", + "threshold": 0.8}; + prediction = local_fusion.predict(inputData, + operating_point=operating_point) + +Local Association +----------------- + +You can also instantiate a local version of a remote association resource. + +.. code-block:: python + + from bigml.association import Association + local_association = Association('association/502fdcff15526876610002435') + +This will retrieve the remote association information, using an implicitly +built +``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for more +details on how to set your credentials) and return an ``Association`` object +that will be stored in the ``./storage`` directory and +you can use to extract the rules found in the original dataset. +If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second +parameter: + +.. code-block:: python + + from bigml.association import Association + from bigml.api import BigML + + local_association = Association('association/502fdcff15526876610002435', + api=BigML(my_username, + my_api_key + storage="my_storage")) + +or even use the remote association information retrieved previously +to build the +local association object: + +.. code-block:: python + + from bigml.association import Association + from bigml.api import BigML + api = BigML() + association = api.get_association('association/502fdcff15526876610002435', + query_string='limit=-1') + + local_association = Association(association) + +Note that in this example we used a ``limit=-1`` query string for the +association retrieval. This ensures that all fields are retrieved by the get +method in the +same call (unlike in the standard calls where the number of fields returned is +limited). + +The created ``Association`` object has some methods to help retrieving the +association rules found in the original data. The ``get_rules`` method will +return the association rules. Arguments can be set to filter the rules +returned according to its ``leverage``, ``strength``, ``support``, ``p_value``, +a list of items involved in the rule or a user-given filter function. + +.. code-block:: python + + from bigml.association import Association + local_association = Association('association/502fdcff15526876610002435') + local_association.get_rules(item_list=["Edible"], min_p_value=0.3) + +In this example, the only rules that will be returned by the ``get_rules`` +method will be the ones that mention ``Edible`` and their ``p_value`` +is greater or equal to ``0.3``. + +The rules can also be stored in a CSV file using ``rules_CSV``: + + +.. code-block:: python + + from bigml.association import Association + local_association = Association('association/502fdcff15526876610002435') + local_association.rules_CSV(file_name='/tmp/my_rules.csv', + min_strength=0.1) + +This example will store the rules whose strength is bigger or equal to 0.1 in +the ``/tmp/my_rules.csv`` file. + +You can also obtain the list of ``items`` parsed in the dataset using the +``get_items`` method. You can also filter the results by field name, by +item names and by a user-given function: + +.. code-block:: python + + from bigml.association import Association + local_association = Association('association/502fdcff15526876610002435') + local_association.get_items(field="Cap Color", + names=["Brown cap", "White cap", "Yellow cap"]) + +This will recover the ``Item`` objects found in the ``Cap Color`` field for +the names in the list, with their properties as described in the +`developers section `_ + + +Local Association Sets +---------------------- + +Using the local association object, you can predict the association sets +related to an input data set: + +.. code-block:: python + + local_association.association_set( \ + {"gender": "Female", "genres": "Adventure$Action", \ + "timestamp": 993906291, "occupation": "K-12 student", + "zipcode": 59583, "rating": 3}) + [{'item': {'complement': False, + 'count': 70, + 'field_id': u'000002', + 'name': u'Under 18'}, + 'rules': ['000000'], + 'score': 0.0969181441561211}, + {'item': {'complement': False, + 'count': 216, + 'field_id': u'000007', + 'name': u'Drama'}, + 'score': 0.025050115102862636}, + {'item': {'complement': False, + 'count': 108, + 'field_id': u'000007', + 'name': u'Sci-Fi'}, + 'rules': ['000003'], + 'score': 0.02384578264599424}, + {'item': {'complement': False, + 'count': 40, + 'field_id': u'000002', + 'name': u'56+'}, + 'rules': ['000008', + '000020'], + 'score': 0.021845366022721312}, + {'item': {'complement': False, + 'count': 66, + 'field_id': u'000002', + 'name': u'45-49'}, + 'rules': ['00000e'], + 'score': 0.019657155185835006}] + +As in the local model predictions, producing local association sets can be done +independently of BigML servers, so no cost or connection latencies are +involved. + +Local Topic Model +----------------- + +You can also instantiate a local version of a remote topic model. + +.. code-block:: python + + from bigml.topicmodel import TopicModel + local_topic_model = TopicModel( + 'topicmodel/502fdbcf15526876210042435') + +This will retrieve the remote topic model information, +using an implicitly built +``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for more +details on how to set your credentials) and return a ``TopicModel`` +object that will be stored in the ``./storage`` directory and +you can use to obtain local topic distributions. +If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second +parameter: + +.. code-block:: python + + from bigml.topicmodel import TopicModel + from bigml.api import BigML + + local_topic_model = TopicModel( + 'topicmodel/502fdbcf15526876210042435', + api=BigML(my_username, my_api_key, storage="my_storage")) + +You can also reuse a remote topic model JSON structure +as previously retrieved to build the +local topic model object: + +.. code-block:: python + + from bigml.topicmodel import TopicModel + from bigml.api import BigML + api = BigML() + topic_model = api.get_topic_model( + 'topicmodel/502fdbcf15526876210042435', + query_string='limit=-1') + + local_topic_model = TopicModel(topic_model) + +Note that in this example we used a ``limit=-1`` query string for the topic +model retrieval. This ensures that all fields are retrieved by the get +method in the +same call (unlike in the standard calls where the number of fields returned is +limited). + +Local Topic Distributions +------------------------- + +Using the local topic model object, you can predict the local topic +distribution for +an input data set: + +.. code-block:: python + + local_topic_model.distribution({"Message": "Our mobile phone is free"}) + [ { 'name': u'Topic 00', 'probability': 0.002627154266498529}, + { 'name': u'Topic 01', 'probability': 0.003257671290458176}, + { 'name': u'Topic 02', 'probability': 0.002627154266498529}, + { 'name': u'Topic 03', 'probability': 0.1968263976460698}, + { 'name': u'Topic 04', 'probability': 0.002627154266498529}, + { 'name': u'Topic 05', 'probability': 0.002627154266498529}, + { 'name': u'Topic 06', 'probability': 0.13692728036990331}, + { 'name': u'Topic 07', 'probability': 0.6419714165615805}, + { 'name': u'Topic 08', 'probability': 0.002627154266498529}, + { 'name': u'Topic 09', 'probability': 0.002627154266498529}, + { 'name': u'Topic 10', 'probability': 0.002627154266498529}, + { 'name': u'Topic 11', 'probability': 0.002627154266498529}] + + +As you can see, the topic distribution contains the name of the +possible topics in the model and the +associated probabilities. + +Local Time Series +----------------- + +You can also instantiate a local version of a remote time series. + +.. code-block:: python + + from bigml.timeseries import TimeSeries + local_time_series = TimeSeries( + 'timeseries/502fdbcf15526876210042435') + +This will create a series of models from +the remote time series information, +using an implicitly built +``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for more +details on how to set your credentials) and return a ``TimeSeries`` +object that will be stored in the ``./storage`` directory and +you can use to obtain local forecasts. +If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second +parameter: + +.. code-block:: python + + from bigml.timeseries import TimeSeries + from bigml.api import BigML + + local_time_series = TimeSeries( \ + 'timeseries/502fdbcf15526876210042435', + api=BigML(my_username, my_api_key, storage="my_storage")) + +You can also reuse a remote time series JSON structure +as previously retrieved to build the +local time series object: + +.. code-block:: python + + from bigml.timeseries import TimeSeries + from bigml.api import BigML + api = BigML() + time_series = api.get_time_series( \ + 'timeseries/502fdbcf15526876210042435', + query_string='limit=-1') + + local_time_series = TimeSeries(time_series) + +Note that in this example we used a ``limit=-1`` query string for the time +series retrieval. This ensures that all fields are retrieved by the get +method in the +same call (unlike in the standard calls where the number of fields returned is +limited). + + +Local Forecasts +--------------- + +Using the local time series object, you can forecast any of the objective +field values: + +.. code-block:: python + + local_time_series.forecast({"Final": {"horizon": 5}, "Assignment": { \ + "horizon": 10, "ets_models": {"criterion": "aic", "limit": 2}}}) + {u'000005': [ + {'point_forecast': [68.53181, 68.53181, 68.53181, 68.53181, 68.53181], + 'model': u'A,N,N'}], + u'000001': [{'point_forecast': [54.776650000000004, 90.00943000000001, + 83.59285000000001, 85.72403000000001, + 72.87196, 93.85872, 84.80786, 84.65522, + 92.52545, 88.78403], + 'model': u'A,N,A'}, + {'point_forecast': [55.882820120000005, 90.5255466567616, + 83.44908577909621, 87.64524353046498, + 74.32914583152592, 95.12372848262932, + 86.69298716626228, 85.31630744944385, + 93.62385478607113, 89.06905451921818], + 'model': u'A,Ad,A'}]} + + +As you can see, the forecast contains the ID of the forecasted field, the +computed points and the name of the models meeting the criterion. +For more details about the available parameters, please check the `API +documentation `_. + + +Local PCAs +---------- + +The `PCA` class will create a local version of a remote PCA. + +.. code-block:: python + + from bigml.pca import PCA + local_pca = PCA( + 'pca/502fdbcf15526876210042435') + + +This will create an object that stores the remote information that defines +the PCA, needed to generate +projections to the new dimensionally reduced components. The remote resource +is automatically downloaded the first time the the PCA is instantiated by +using an implicitly built +``BigML()`` connection object (see the +`Authentication <#authentication>`_ section for more +details on how to set your credentials). The JSON that contains this +information is stored in a ``./storage`` directory, which is the default +choice. If you want to use a +specific connection object to define the credentials for the authentication +in BigML or the directory where the JSON information is stored, +you can set it as the second parameter: + +.. code-block:: python + + from bigml.pca import PCA + from bigml.api import BigML + + local_pca = PCA( \ + 'timeseries/502fdbcf15526876210042435', + api=BigML(my_username, my_api_key, storage="my_storage")) + +You can also reuse a remote PCA JSON structure +as previously retrieved to build the +local PCA object: + +.. code-block:: python + + from bigml.pca import PCA + from bigml.api import BigML + api = BigML() + time_series = api.get_pca( \ + 'pca/502fdbcf15526876210042435', + query_string='limit=-1') + + local_pca = PCA(pca) + +Note that in this example we used a ``limit=-1`` query string for the PCA +retrieval. This ensures that all fields are retrieved by the get +method in the +same call (unlike in the standard calls where the number of fields returned is +limited). + + +Local Projections +----------------- + +Using the local PCA object, you can compute the projection of +an input dataset into the new components: + +.. code-block:: python + + local_pca.projection({"species": "Iris-versicolor"}) + [6.03852, 8.35456, 5.04432, 0.75338, 0.06787, 0.03018] + +You can use the ``max_components`` and ``variance_threshold`` arguments +to limit the number of components generated. You can also use the ``full`` +argument to produce a dictionary whose keys are the names of the generated +components. + +.. code-block:: python + + local_pca.projection({"species": "Iris-versicolor"}, full=yes) + {'PCA1': 6.03852, 'PCA2': 8.35456, 'PCA3': 5.04432, 'PCA4': 0.75338, + 'PCA5': 0.06787, 'PCA6': 0.03018} + +As in the local model predictions, producing local projections can be done +independently of BigML servers, so no cost or connection latencies are +involved. + + +Local Forecasts +--------------- + +Using the local time series object, you can forecast any of the objective +field values: + +.. code-block:: python + + local_time_series.forecast({"Final": {"horizon": 5}, "Assignment": { \ + "horizon": 10, "ets_models": {"criterion": "aic", "limit": 2}}}) + {u'000005': [ + {'point_forecast': [68.53181, 68.53181, 68.53181, 68.53181, 68.53181], + 'model': u'A,N,N'}], + u'000001': [{'point_forecast': [54.776650000000004, 90.00943000000001, + 83.59285000000001, 85.72403000000001, + 72.87196, 93.85872, 84.80786, 84.65522, + 92.52545, 88.78403], + 'model': u'A,N,A'}, + {'point_forecast': [55.882820120000005, 90.5255466567616, + 83.44908577909621, 87.64524353046498, + 74.32914583152592, 95.12372848262932, + 86.69298716626228, 85.31630744944385, + 93.62385478607113, 89.06905451921818], + 'model': u'A,Ad,A'}]} + + +As you can see, the forecast contains the ID of the forecasted field, the +computed points and the name of the models meeting the criterion. +For more details about the available parameters, please check the `API +documentation `_. + + +Multi Models +------------ + +Multi Models use a numbers of BigML remote models to build a local version +that can be used to generate predictions locally. Predictions are generated +combining the outputs of each model. + +.. code-block:: python + + from bigml.api import BigML + from bigml.multimodel import MultiModel + + api = BigML() + + model = MultiModel([api.get_model(model['resource']) for model in + api.list_models(query_string="tags__in=my_tag") + ['objects']]) + + model.predict({"petal length": 3, "petal width": 1}) + +This will create a multi model using all the models that have been previously +tagged with ``my_tag`` and predict by combining each model's prediction. +The combination method used by default is ``plurality`` for categorical +predictions and mean value for numerical ones. You can also use ``confidence +weighted``: + +.. code-block:: python + + model.predict({"petal length": 3, "petal width": 1}, method=1) + +that will weight each vote using the confidence/error given by the model +to each prediction, or even ``probability weighted``: + +.. code-block:: python + + model.predict({"petal length": 3, "petal width": 1}, method=2) + +that weights each vote by using the probability associated to the training +distribution at the prediction node. + +There's also a ``threshold`` method that uses an additional set of options: +threshold and category. The category is predicted if and only if +the number of predictions for that category is at least the threshold value. +Otherwise, the prediction is plurality for the rest of predicted values. + +An example of ``threshold`` combination method would be: + +.. code-block:: python + + model.predict({'petal length': 0.9, 'petal width': 3.0}, method=3, + options={'threshold': 3, 'category': 'Iris-virginica'}) + + +When making predictions on a test set with a large number of models, +``batch_predict`` can be useful to log each model's predictions in a +separated file. It expects a list of input data values and the directory path +to save the prediction files in. + +.. code-block:: python + + model.batch_predict([{"petal length": 3, "petal width": 1}, + {"petal length": 1, "petal width": 5.1}], + "data/predictions") + +The predictions generated for each model will be stored in an output +file in `data/predictions` using the syntax +`model_[id of the model]__predictions.csv`. For instance, when using +`model/50c0de043b563519830001c2` to predict, the output file name will be +`model_50c0de043b563519830001c2__predictions.csv`. An additional feature is +that using ``reuse=True`` as argument will force the function to skip the +creation of the file if it already exists. This can be +helpful when using repeatedly a bunch of models on the same test set. + +.. code-block:: python + + model.batch_predict([{"petal length": 3, "petal width": 1}, + {"petal length": 1, "petal width": 5.1}], + "data/predictions", reuse=True) + +Prediction files can be subsequently retrieved and converted into a votes list +using ``batch_votes``: + +.. code-block:: python + + model.batch_votes("data/predictions") + +which will return a list of MultiVote objects. Each MultiVote contains a list +of predictions (e.g. ``[{'prediction': u'Iris-versicolor', 'confidence': 0.34, +'order': 0}, {'prediction': u'Iris-setosa', 'confidence': 0.25, +'order': 1}]``). +These votes can be further combined to issue a final +prediction for each input data element using the method ``combine`` + +.. code-block:: python + + for multivote in model.batch_votes("data/predictions"): + prediction = multivote.combine() + +Again, the default method of combination is ``plurality`` for categorical +predictions and mean value for numerical ones. You can also use ``confidence +weighted``: + +.. code-block:: python + + prediction = multivote.combine(1) + +or ``probability weighted``: + +.. code-block:: python + + prediction = multivote.combine(2) + +You can also get a confidence measure for the combined prediction: + +.. code-block:: python + + prediction = multivolte.combine(0, with_confidence=True) + +For classification, the confidence associated to the combined prediction +is derived by first selecting the model's predictions that voted for the +resulting prediction and computing the weighted average of their individual +confidence. Nevertheless, when ``probability weighted`` is used, +the confidence is obtained by using each model's distribution at the +prediction node to build a probability distribution and combining them. +The confidence is then computed as the wilson score interval of the +combined distribution (using as total number of instances the sum of all +the model's distributions original instances at the prediction node) + +In regression, all the models predictions' confidences contribute +to the weighted average confidence. + + +Local Ensembles +--------------- + +Remote ensembles can also be used locally through the ``Ensemble`` +class. The simplest way to access an existing ensemble and using it to +predict locally is: + +.. code-block:: python + + from bigml.ensemble import Ensemble + ensemble = Ensemble('ensemble/5143a51a37203f2cf7020351') + ensemble.predict({"petal length": 3, "petal width": 1}) + +This is the simpler method to create a local Ensemble. The +``Ensemble('ensemble/5143a51a37203f2cf7020351')`` constructor, that fetches +all the related JSON files and stores them in an ``./storage`` directory. Next +calls to ``Ensemble('ensemble/50c0de043b5635198300033c')`` will retrieve the +files from this local storage, so that internet connection will only be needed +the first time an ``Ensemble`` is built. + +However, that method can only be used to work with the ensembles in our +account in BigML. If we intend to use ensembles created under an +``Organization``, then +we need to provide the information about the ``project`` that the ensemble +is included in. You need to provide a connection object for that: + +.. code-block:: python + + from bigml.ensemble import Ensemble + from bigml.api import BigML + + # connection object that informs about the project ID and the + # directory where the ensemble will be stored for local use + + api = BigML(project="project/5143a51a37203f2cf7020001", + storage="my_storage_directory") + + ensemble = Ensemble('ensemble/5143a51a37203f2cf7020351', api=api) + ensemble.predict({"petal length": 3, "petal width": 1}) + +The local ensemble object can be used to manage the +three types of ensembles: ``Decision Forests`` (bagging or random) and +the ones using ``Boosted Trees``. Also, you can choose +the storage directory or even avoid storing at all. The ``àpi`` connection +object controls the storage strategy through the ``storage`` argument. + +.. code-block:: python + + from bigml.api import BigML + from bigml.ensemble import Ensemble + + # api connection using a user-selected storage + api = BigML(storage='./my_storage') + + # creating ensemble + ensemble = api.create_ensemble('dataset/5143a51a37203f2cf7000972') + + # Ensemble object to predict + ensemble = Ensemble(ensemble, api) + ensemble.predict({"petal length": 3, "petal width": 1}, + operating_kind="votes") + +In this example, we create +a new ensemble and store its information in the ``./my_storage`` +folder. Then this information is used to predict locally using the number of +votes (one per model) backing each category. + +The ``operating_kind`` argument overrides the legacy ``method`` argument, which +was previously used to define the combiner for the models predictions. + +Similarly, local ensembles can also be created by giving a list of models to be +combined to issue the final prediction (note: only random decision forests and +bagging ensembles can be built using this method): + +.. code-block:: python + + from bigml.ensemble import Ensemble + ensemble = Ensemble(['model/50c0de043b563519830001c2', \ + 'model/50c0de043b5635198300031b')] + ensemble.predict({"petal length": 3, "petal width": 1}) + +or even a JSON file that contains the ensemble resource: + +.. code-block:: python + + from bigml.api import BigML + api = BigML() + api.export("ensemble/50c0de043b5635198300033c", + "my_directory/my_ensemble.json") + + from bigml.ensemble import Ensemble + local_ensemble = Ensemble("./my_directory/my_ensemble.json") + +Note: the ensemble JSON structure is not self-contained, meaning that it +contains references to the models that the ensemble is build of, but not the +information of the models themselves. +To use an ensemble locally with no connection to +the internet, you must make sure that not only a local copy of the ensemble +JSON file is available in your computer, but also the JSON files corresponding +to the models in it. The ``export`` method takes care of storing the +information of every model in the ensemble and storing it in the same directory +as the ensemble JSON file. The ``Ensemble`` class will also look up for the +model files in the same directory when using a path to an ensemble file as +argument. + +If you have no memory limitations you can create the ensemble +from a list of local model +objects. Then, local model objects will be always in memory and +will only be instantiated once. This will increase +performance for large ensembles: + +.. code-block:: python + + from bigml.model import Model + model_ids = ['model/50c0de043b563519830001c2', \ + 'model/50c0de043b5635198300031b'] + local_models = [Model(model_id) for model_id in model_ids] + local_ensemble = Ensemble(local_models) + +Local Ensemble caching +---------------------- + +Ensembles can become quite large objects and demand large memory resources. +If your usual scenario is using many of them +constantly in a disordered way, the best strategy is setting up a cache +system to store them. The local ensemble class provides helpers to +interact with that cache. Here's an example using ``Redis``. + +.. code-block:: python + + from ensemble import Ensemble + import redis + r = redis.Redis() + # First build as you would any core Ensemble object: + local_ensemble = Ensemble('ensemble/5126965515526876630001b2') + # Store a serialized version in Redis + ensemble.dump(cache_set=r.set) + # (retrieve the external rep from its convenient place) + # Speedy Build from external rep + local_ensemble = Ensemble('ensemble/5126965515526876630001b2', \ + cache_get=r.get) + # Get scores same as always: + local_ensemble.predict({"src_bytes": 350}) + + +Local Ensemble's Predictions +---------------------------- + +As in the local model's case, you can use the local ensemble to create +new predictions for your test data, and set some arguments to configure +the final output of the ``predict`` method. + +The predictions' structure will vary depending on the kind of +ensemble used. For ``Decision Forests`` local predictions will just contain +the ensemble's final prediction if no other argument is used. + +.. code-block:: python + + from bigml.ensemble import Ensemble + ensemble = Ensemble('ensemble/5143a51a37203f2cf7020351') + ensemble.predict({"petal length": 3, "petal width": 1}) + u'Iris-versicolor' + +The final prediction of an ensemble is determined +by aggregating or selecting the predictions of the individual models therein. +For classifications, the most probable class is returned if no especial +operating method is set. Using ``full=True`` you can see both the predicted +output and the associated probability: + +.. code-block:: python + + from bigml.ensemble import Ensemble + ensemble = Ensemble('ensemble/5143a51a37203f2cf7020351') + ensemble.predict({"petal length": 3, "petal width": 1}, \ + full=True) + + {'prediction': u'Iris-versicolor', + 'probability': 0.98566} + +In general, the prediction in a classification +will be one amongst the list of categories in the objective +field. When each model in the ensemble +is used to predict, each category has a confidence, a +probability or a vote associated to this prediction. +Then, through the collection +of models in the +ensemble, each category gets an averaged confidence, probabiity and number of +votes. Thus you can decide whether to operate the ensemble using the +``confidence``, the ``probability`` or the ``votes`` so that the predicted +category is the one that scores higher in any of these quantities. The +criteria can be set using the `operating_kind` option (default is set to +``probability``): + +.. code-block:: python + + ensemble.predict({"petal length": 3, "petal width": 1}, \ + operating_kind="votes") + +Regression will generate a predictiona and an associated error, however +``Boosted Trees`` don't have an associated confidence measure, so +only the prediction will be obtained in this case. + +For consistency of interface with the ``Model`` class, as well as +between boosted and non-boosted ensembles, local Ensembles again have +a ``predict_probability`` method. This takes the same optional +arguments as ``Model.predict``: ``missing_strategy`` and +``compact``. As with local Models, if ``compact`` is ``False`` (the default), +the output is a list of maps, each with the keys ``prediction`` and +``probability`` mapped to the class name and its associated +probability. + +So, for example: + +.. code-block:: python + + ensemble.predict_probability({"petal length": 3, "petal width": 1}) + + [{'category': u'Iris-setosa', 'probability': 0.006733220044732548}, + {'category': u'Iris-versicolor', 'probability': 0.9824478534614787}, + {'category': u'Iris-virginica', 'probability': 0.0108189264937886}] + +If ``compact`` is ``True``, only the probabilities themselves are +returned, as a list in class name order, again, as is the case with +local Models. + +Operating point predictions are also available for local ensembles and an +example of it would be: + +.. code-block:: python + + operating_point = {"kind": "probability", + "positive_class": "True", + "threshold": 0.8}; + prediction = local_ensemble.predict(inputData, + operating_point=operating_point) + +You can check the +`Operating point's predictions <#operating-point's-predictions>`_ section +to learn about +operating points. For ensembles, three kinds of operating points are available: +``votes``, ``probability`` and ``confidence``. ``Votes`` will use as threshold +the number of models in the ensemble that vote for the positive class. +The other two are already explained in the above mentioned section. + +Local Ensemble Predictor +------------------------ + +Predictions can take longer when the ensemble is formed by a large number of +models or when its models have a high number of nodes. In these cases, +predictions' speed can be increased and memory usage minimized by using the +``EnsemblePredictor`` object. The basic example to build it is: + +.. code-block:: python + + from bigml.ensemblepredictor import EnsemblePredictor + ensemble = EnsemblePredictor('ensemble/5143a51a37203f2cf7020351', + "./model_fns_directory") + ensemble.predict({"petal length": 3, "petal width": 1}, full=True) + {'prediction': u'Iris-versicolor', 'confidence': 0.91519} + +This constructor has two compulsory attributes: then ensemble ID (or the +corresponding API response) and the path to a directory that contains a file +per each of the ensemble models. Each file stores the ``predict`` function +needed to obtain the model's predictions. As in the ``Ensemble`` object, you +can also add an ``api`` argument with the connection to be used to download +the ensemble's JSON information. + +The functions stored in this directory are generated automatically the first +time you instantiate the ensemble. Once they are generated, the functions are +retrieved from the directory. + +Note that only last prediction missings strategy is available for these +predictions and the combiners available are ``plurality``, ``confidence`` and +``distribution`` but no ``operating_kind`` or ``operating_point`` options +are provided at present. + +Local Supervised Model +---------------------- + +There's a general class that will allow you to predict using any supervised +model resource, regardless of its particular type (model, ensemble, +logistic regression, linear regression or deepnet). + +The ``SupervisedModel`` object will retrieve the resource information and +instantiate the corresponding local object, so that you can use its +``predict`` method to produce local predictions: + +.. code-block:: python + + from bigml.supervised import SupervisedModel + local_supervised_1 = SupervisedModel( \ + "logisticregression/5143a51a37203f2cf7020351") + local_supervised_2 = SupervisedModel( \ + "model/5143a51a37203f2cf7020351") + input_data = {"petal length": 3, "petal width": 1} + logistic_regression_prediction = local_supervised_1.predict(input_data) + model_prediction = local_supervised_2.predict(input_data) + + +Local BigML Model +----------------- + +Following the approach of the local SupervisedModel class, the ``LocalModel`` +class will allow you to predict using any BigML model resource, +either supervised or unsupervised. +This class provides two methods: ``predict`` and ``batch_predict`` with +total abstraction as to the result of the predictions +(real predictions, centroids, anomaly scores, etc.), their parameters and the +format of the prediction result. +The ``predict`` method can be used on any type of +model and delegates to the specific method of each local model class. +Therefore, it will be the programmers responsibility to provide +only the parameters accepted in the low level +method and the response will be a dictionary whose contents will vary depending +on the type of prediction. Similarly, the ``batch_predict`` method +accepts a list of inputs and adds the prediction information to each +element of the list. + +The ``LocalModel`` object will retrieve the resource information and +instantiate the corresponding local object, so that you can use its +``predict`` method to produce local predictions: + +.. code-block:: python + + from bigml.local_model import LocalModel + local_model_1 = LocalModel( \ + "logisticregression/5143a51a37203f2cf7020351") + local_model_2 = LocalModel( \ + "anomaly/5143a51a37203f2cf7020351") + input_data = {"petal length": 3, "petal width": 1} + logistic_regression_prediction = local_model_1.predict(input_data) + # {"prediction": "Iris-setosa", "probability": 0.56} + anomaly_prediction = local_model_2.predict(input_data) + # {"score": 0.84} + + +Local Pipelines +--------------- + +More often than not, the Machine Learning solution to a problem entails +using data transformations and different models that produce some predictions +or scores. They all are useful information that contributes to the final +Machine Learning based decision. Usually, the training workflow becomes +a sequence of functions, each of which adds new fields to our data: engineered +features, scores, predictions, etc. Of course, once the training sequence +is determined, the same steps will need to be reproduced to create +batch predictions for a new list of test input data rows. +The ``BMLPipeline`` class offers the tools to extract that sequence from +the existing BigML objects and create the prediction pipeline. + +The first obvious goal that we may have is reproducing the same feature +extraction and transformations that were used when training our data to create +our model. That is achieved by using a ``BMLPipeline`` object built +on the training dataset. Note that, if your datasets contain features derived +from the original fields in your data, ``Nodejs`` has to be previously +installed for the transformations to work locally. + +.. code-block:: python + + from bigml.pipeline.pipeline import BMLPipeline + local_pipeline = BMLPipeline("my transformations pipeline", + ["dataset/5143a55637203f2cf7020351"]) + +Starting from ``dataset/5143a55637203f2cf7020351`` +and tracing the previous datasets up till the original source built from +our data, the pipeline will store all the steps that were done +to transform it. Maybe some year, month and day new features were +automatically extracted from our date-time fields, or even +the features corresponding to the histogram of gradients were +obtained from an image field (if your dataset had one of those). +Also, if transformations were defined using ``Flatline`` to +generate new fields, they will be detected and stored as a transformation +step. They are all retrieved and ready to be applied to a +list of dictionaries representing your rows information using the +``.transform`` method. + +.. code-block:: python + + local_pipeline.transform([{"plasma glucose": 130, "bmi":3}, + {"age":26, "plasma glucose": 70}]) + + +As a more powerful example, let's think about an entire workflow where +models have been built on a dataset adding a new field with a +simple feature engineering transformation, like the ratio of two fields. +Suppose a model has been created from the new dataset. +Also, an anomaly detector has been created from the same dataset +to check whether the new input data is too different from the original +examples used to train the model. +If the score is low, the model is still valid, so we accept its prediction. +If the score is too high, the model predictions might be inaccurate, and we +should not rely on them. Therefore, in order to take a decision on what to do +for new input data, we will need not only the values of the fields of that +new test case but also the prediction (plus the associated probability) +and anomaly score that the trained model and anomaly detector provide for it. + +To solve the problem, the process will be: on receving new data, +the transformation to generate the ratio between the raw input fields +should be applied and a new ``ratio`` field should be added. +After that, both the prediction and the anomaly score should be computed +and they also should be added to the initial data as new fields. +The ``BMLPipeline`` class will help us do that. + +First, we instantiate the ``BMLPipeline`` object by providing the models +that we want it to use and a name for it: + +.. code-block:: python + + from bigml.pipeline.pipeline import BMLPipeline + local_pipeline = BMLPipeline("my new pipeline", + ["model/5143a51a37203f2cf7020351", + "anomaly/5143a51a37203f2cf7027551"]) + +This code will retrieve all the datasets previous to the model and anomaly +detector construction and will store any transformation that they contain. +It creates a sequence starting on the first dataset that was created to +summarize the uploaded data, adding the datasets that store transformations, +and finally the model and anomaly detector. Every transformation that was +done when training those models, will be reflected as a new step in the +``BMLPipeline`` and every model that was added to the list will also be +added as an additional transformation step: the model will transform +our data by adding its prediction and associated probability and the +anomaly detector will transform the input by adding the computed +anomaly score. The result is obtained using the ``BMLPipeline`` object, that +offers a ``.transform`` method which accepts a list of input data dictionaries +or a DataFrame. For every row, it will execute the stored transformations +and generate the model's prediction and the anomaly's score. +All of them will be added to the original input data. + +.. code-block:: python + + local_pipeline.transform([{"plasma glucose": 130, "bmi":3}, + {"age":26, "plasma glucose": 70}]) + """That could produce a result such as + [{"plasma glucose": 130, "bmi":3, "prediction": "True", + "probability": 0.578, "score": 0.753}, + {"age": 26, "plasma glucose": 70, "prediction": "False", + "probability": 0.573, "score": 0.54}] + """ + +As for the rest of local resources, you can pass additional arguments to define +the API connection info and/or a ``cache_get`` function to be used when +resources are stored in memory caches. + +.. code-block:: python + + from bigml.pipeline.pipeline import BMLPipeline + local_pipeline = BMLPipeline("my new pipeline", + ["model/5143a51a37203f2cf7020351", + "anomaly/5143a51a37203f2cf7027551"], + api=BigML("my user", "my api", + storage="my_storage")) + +If no API connection is passed, or if the one given has no +``api.storage`` value, we use the default ``./storage`` directory +followed by the name of the pipeline as storage folder for the +JSON of the resources used in the pipeline. +In this case, four resources will be stored: the dataset created from +the uploaded data, the dataset generated when we added the ratio +field, the model and the anomaly detector. The ``BMLPipeline`` object +offers an ``.export`` method that can compress the entire directory to +a ``.zip`` file whose name is the name of the ``BMLPipeline`` +(conveniently encoded) and will be placed in the ``output_directory`` +given by the user: + +.. code-block:: python + + from bigml.pipeline.pipeline import BMLPipeline + local_pipeline = BMLPipeline("my new pipeline", + ["model/5143a51a37203f2cf7020351", + "anomaly/5143a51a37203f2cf7027551"] + api=BigML("my user", "my api", + storage="my_storage")) + local_pipeline.export(output_directory="my_export_dir") + +In this example, we wil find a ``my_export_dir/my_new_pipeline.zip`` file +in the current directory. The file contains a ``my new pipeline`` folder where +the four JSONs for the two datasets and two models are stored. + +The ``BMLPipeline`` provides also methods to ``dump`` and ``load`` the +data transformers it contains, in order to save them in a cache or in the file +system. As an example, we can create a ``BMLPipeline``, dump its contents to +a file system folder and build a second pipeline from them. The name of +the pipeline will be used as reference to know which object to load. + + +.. code-block:: python + + from bigml.pipeline.pipeline import BMLPipeline + local_pipeline = BMLPipeline("pipeline1", + "model/5143a51a37203f2cf7020351") + local_pipeline.dump("./pipeline1_storage") + # the `pipeline1_storage` folder is created and all the objects + # used in the pipeline are stored there, one file each + new_pipeline = BMLPipeline.load("pipeline1", "./pipeline1_storage") + # a new pipeline has been built with the same properties and steps + # that local_pipeline had + + +If using a cache system, the same methods described in the +`local caching<#local-caching>`_ section are available. + +.. code-block:: python + + from bigml.pipeline.pipeline import BMLPipeline + local_pipeline = BMLPipeline("pipeline1", + "model/631a6a6f8f679a2d31000445") + import redis + r = redis.Redis() + local_pipeline.dump(cache_set=r.set) + new_pipeline = BMLPipeline("pipeline1", cache_get=r.get) + # the new_pipeline has been recovered from Redis + + +Sometimes, one may want to aggregate pre-existing transformations +on your original data before loading it to BigML. In that case, you can use +the more general ``Pipeline`` class to store any sequence of transformations +made outside of BigML. As both ``Pipeline`` and ``BMLPipeline`` offer the +``.transform`` method, they are also data transformers, meaning that they +can be used as steps of a more general ``Pipeline`` as well. +Thus, combining pre-existing transformations +based on scikit-learn or Pandas with the transformations and models generated +in BigML is totally possible. For that, we will use the +``SKDataTransformer`` and ``DFDataTransformer`` classes, which provide a +``.transform`` method too. + +As an example of use, we'll create a ``Pipeline`` based on a existing +scikit pipeline. + +.. code-block:: python + + import pandas as pd + + from sklearn.tree import DecisionTreeClassifier + from sklearn.preprocessing import StandardScaler + from sklearn.model_selection import train_test_split + from sklearn.pipeline import Pipeline as SKPipeline + + # Building a prediction pipeline using a scikit learn + # scaler and decision tree and adding the prediction + # to the initial dataframe + + from bigml.pipeline.transformer import Pipeline, SKDataTransformer + from bigml.constants import OUT_NEW_HEADERS + + # pre-existing code to build the scikit pipeline + df = pd.read_csv("data/diabetes.csv") + X = df.drop('diabetes', axis=1) + y = df['diabetes'] + + X_train, X_test, y_train, y_test = train_test_split(X, y, + random_state=0) + + pipe = SKPipeline([('scaler', StandardScaler()), + ('DTC', DecisionTreeClassifier())]) + pipe.fit(X_train, y_train) + # end of pre-existing code + + pipeline = Pipeline( + "skpipeline", # pipeline name + steps=[SKDataTransformer(pipe, + "skDTC", + output={OUT_NEW_HEADERS: ["sk_prediction"]})]) + # the `pipe` scikit pipeline is wrapped as a SKDataTransformer to offer + # a `.transform` method + pipeline.transform(X_test) + +This new pipeline can be combined with a ``BMLPipeline`` and will accumulate +the insights of both. + +.. code-block:: python + + from bigml.pipeline import BMLPipeline + + bml_pipeline = BMLPipeline("bml_pipeline", + "anomaly/631a6a6f8f679a2d31000445") + extended_pipeline = Pipeline("extended", + steps=[pipeline, bml_pipeline]) + extended_pipeline.transform([{"plasma glucose": 80}]) + +The same can be done for a Pandas' pipe sequence + +.. code-block:: python + + # based on https://www.kdnuggets.com/2021/01/cleaner-data-analysis-pandas-pipes.html + + import pandas as pd + import numpy as np + + from bigml.pipeline.transformer import DFDataTransformer, Pipeline + + marketing = pd.read_csv("./data/DirectMarketing.csv") + + # code to define the transformations + + def drop_missing(df): + thresh = len(df) * 0.6 + df.dropna(axis=1, thresh=thresh, inplace=True) + return df + + def remove_outliers(df, column_name): + low = np.quantile(df[column_name], 0.05) + high = np.quantile(df[column_name], 0.95) + return df[df[column_name].between(low, high, inclusive=True)] + + def copy_df(df): + return df.copy() + + pipeline = Pipeline("pandas_pipeline", + steps=[DFDataTransformer([copy_df, + drop_missing, + (remove_outliers, + ['Salary'])])]) + # the list of functions are wrapped as a DFDataTransformer to offer + # a `.transform` method that generates the output using Pandas' `.pipe` + marketing_clean = pipeline.transform(marketing) + +where again, the pipeline could be combined with any ``BMLPipeline`` to +produce a more general transformation sequence. + +Of course, new classes could be built to support other transformation tools +and libraries. A new data transformer can be created by deriving the +``DataTransformer`` class and customizing its ``.data_transform`` method +to cover the particulars of the functions to be used in the generation of +new fields. + +Local Evaluations +----------------- + +You can instantiate a local version of an evaluation that will contain the +main evaluation metrics. + +.. code-block:: python + + from bigml.evaluation import Evaluation + local_evaluation = Evaluation('evaluation/502fdbff15526876610003215') + +This will retrieve the remote evaluation information, using an implicitly built +``BigML()`` connection object (see the `Authentication <#authentication>`_ +section for more +details on how to set your credentials) and return a Dataset object +that will be stored in the ``./storage`` directory. If you want to use a +specific connection object for the remote retrieval or a different storage +directory, you can set it as second parameter: + +.. code-block:: python + + from bigml.evaluation import Evaluation + from bigml.api import BigML + + local_evaluation = Evaluation('evaluation/502fdbff15526876610003215', + api=BigML(my_username, + my_api_key, + storage="my_storage")) + +or even use the remote evaluation information previously retrieved to build the +local evaluation object: + +.. code-block:: python + + from bigml.evaluation import Evaluation + from bigml.api import BigML + api = BigML() + evaluation = api.get_evaluation('evaluation/502fdbff15526876610003215') + + local_evaluation = Evaluation(evaluation) + +You can also build a local evaluation from a previously retrieved and +stored evaluation JSON file: + +.. code-block:: python + + from bigml.evaluation import Evaluation + local_evaluation = Evaluation('./my_dataset.json') + +The Evaluation attributes depend on whether it belongs to a regression or a +classification. Regression evaluations will contain ``r_square``, +``mean_absolute_error``, ``mean_squared_error``. Classification evaluations +will contain ``accuracy``, ``precision``, ``recall``, ``phi`` and ``f_measure`` +besides the ``confusion_matrix`` and a ``-full`` attribute that will contain +the entire set of metrics as downloaded from the API. + +.. code-block:: python + + from bigml.evaluation import Evaluation + local_evaluation = Evaluation('evaluation/502fdbff15526876610003215') + local_evaluation.full # entire model evaluation metrics + if local_evaluation.regression: + local_evaluation.r_squared # r-squared metric value + else: + local_evaluation.confusion_matrix # confusion matrix + local_evaluation.accuracy + + +Local batch predictions +----------------------- + +As explained in the ``101s`` provided in the +`Quick Start `_ section, batch predictions for a +list of inputs can be obtained by iterating the single predictions discussed +in each different local model. However, we've also provided a +homogeneous ``batch_predict`` method in the following local objects: + +- SupervisedModel +- Anomaly +- Cluster +- PCA +- TopicModel + +which can receive the following parameters: + +- **input_data_list**: This can be a list of input data, expressed as a + dictionary containing ``field_name: field_value`` pairs or + a Pandas' DataFrame +- **outputs**: That's a dictionary that can contain ``output_fields`` + and/or ``output_headers`` information. Each one is + defined by default as the list of prediction keys to be + added to the inputs and the list of headers to be used + as keys in the output. E.g., for a supervised learning + model, the default if no information is provided would + be equivalent to ``{"output_fields": ["prediction", + "probability"], "output_headers": ["prediction", + "probability"]}`` and both the prediction and the + associated probability would be added to the input data. +- **\*\*kwargs**: Any other parameters allowed in the ``.predict`` method + could be added to the batch prediction too. For instance, + we could add the operating kind to a supervised model + batch prediction using ``operating_kind=probability`` as + argument. + + +Let's write some examples. If we are reading data from a CSV, we can use the +``csv`` library and pass the list of inputs as an array to an anomaly detector. + +.. code-block:: python + + import csv + + from bigml.anomaly import Anomaly + + input_data_list = [] + with open("my_input_data.csv") as handler: + reader = csv.DictReader(handler) + for row_dict in reader: + input_data_list.append(row_dict) + + local_anomaly = Anomaly("anomaly/5143a51a37203f2cf7027551") + scored_data_list = local_anomaly.batch_predict(input_data_list) + +Or if we are using a Pandas' ``DataFrame`` instead to read the data, we could +also use the DataFrame directly as input argument: + +.. code-block:: python + + import pandas as pd + + from bigml.anomaly import Anomaly + dataframe = pd.read_csv("my_input_data.csv") + + local_anomaly = Anomaly("anomaly/5143a51a37203f2cf7027551") + scored_dataframe = local_anomaly.batch_predict(dataframe) + +Now, let's add some complexity and do use a supervised model. We'd like to +add both the predicted value and the associated probability but we'd like +to use an ``operating point`` when predicting. The operating point needs +specifying a positive class, the kind of metric to compare (probabily or +confidence) and the threshold to use. We also want the prediction to +be added to the input data using the key ``sm_prediction``. In this case, the +code would be similar to + +.. code-block:: python + + import pandas as pd + + from bigml.supervised import SupervisedModel + dataframe = pd.read_csv("my_input_data.csv") + + local_supervised = SupervisedModel("ensemble/5143a51a37203f2cf7027551") + operating_point = {"positive_class": "yes", + "kind": "probability", + "threshold": 0.7} + predicted_dataframe = local_supervised.batch_predict( + dataframe, + outputs={"output_headers": ["sm_prediction", "probability"]}, + operating_point=operating_point) + +and the result would be like the one below: + +.. code-block:: python + + >>>predicted_dataframe + pregnancies plasma glucose ... sm_prediction probability + 0 6 148 ... true 0.95917 + 1 1 85 ... false 0.99538 + 2 8 183 ... true 0.93701 + 3 1 89 ... false 0.99452 + 4 0 137 ... true 0.90622 + .. ... ... ... ... ... + 195 1 117 ... false 0.90906 + 196 5 123 ... false 0.97179 + 197 2 120 ... false 0.99300 + 198 1 106 ... false 0.99452 + 199 2 155 ... false 0.51737 + + [200 rows x 11 columns] + + +Local Shap Wrapper +------------------ + +The Shap library accepts customized predict functions as long as they provide +a particular input/output interface that uses numpy arrays. The previously +described local models can be used to generate such an predict funcion. +The ``ShapWrapper`` class has been created to help users connect the +Shap library to BigML supervised models and provides the ``.predict`` and +``.predict_proba`` functions especially built to be used with that libary. + +.. code-block:: python + + from bigml.shapwrapper import ShapWrapper + shap_wrapper = ShapWrapper("model/5143a51a37203f2cf7027551") + # computing the Explainer on the X_test numpy array + explainer = shap.Explainer(shap_wrapper.predict, + X_test, algorithm='partition', + feature_names=shap_wrapper.x_headers) + shap_values = explainer(X_test) + + +Local predictions with shared models +------------------------------------ + +BigML's resources are private to the owner of the account where they were +created. However, owners can decide to share their resources with other +BigML users by creating +`Secret links `_ +to them. The users that receive the link, will be able to inspect the +resource and can also download them. This is specially important in the case +of models, as they will be able to generate local predictions from them. + +The ``Secret link`` URLs leading to shared resources end in a shared ID +(starting with the string ``shared/`` followed by the type of resource and +the particular sharing key). In order to use them locally, use this +string as first argument for the local model constructor. For instance, let's +say that someone shares with you the link to a shared ensemble +``https://bigml.com/shared/ensemble/qbXem5XoEiVKcq8MPmwjHnXunFj``. + +You could use that in local predictions by instantiating the corresponding +``Ensemble`` object. + +.. code-block:: python + + from bigml.ensemble import Ensemble + local_ensemble = Ensemble("shared/ensemble/qbXem5XoEiVKcq8MPmwjHnXunFj") + +And the new ``local_ensemble`` would be ready to predict using the ``.predict`` +method, as discussed in the `Local Ensembles <#Local-Ensembles>`_ section. + + +Local caching +------------- + +All local models can use an external cache system to manage memory storage and +recovery. The ``get`` and ``set`` functions of the cache manager should be +passed to the constructor or ``dump`` function. Here's an example on how to +cache a linear regression: + +.. code-block:: python + + from bigml.linear import LinearRegression + lm = LinearRegression("linearregression/5e827ff85299630d22007198") + lm.predict({"petal length": 4, "sepal length":4, "petal width": 4, \ + "sepal width": 4, "species": "Iris-setosa"}, full=True) + import redis + r = redis.Redis() + # First build as you would any core LinearRegression object: + # Store a serialized version in Redis + lm.dump(cache_set=r.set) + # (retrieve the external rep from its convenient place) + # Speedy Build from external rep + lm = LinearRegression("linearregression/5e827ff85299630d22007198", \ + cache_get=r.get) + # Get predictions same as always: + lm.predict({"petal length": 4, "sepal length":4, "petal width": 4, \ + "sepal width": 4, "species": "Iris-setosa"}, full=True) + + +Rule Generation +--------------- + +You can also use a local model to generate a IF-THEN rule set that can be very +helpful to understand how the model works internally. + +.. code-block:: python + + local_model.rules() + IF petal_length > 2.45 AND + IF petal_width > 1.65 AND + IF petal_length > 5.05 THEN + species = Iris-virginica + IF petal_length <= 5.05 AND + IF sepal_width > 2.9 AND + IF sepal_length > 5.95 AND + IF petal_length > 4.95 THEN + species = Iris-versicolor + IF petal_length <= 4.95 THEN + species = Iris-virginica + IF sepal_length <= 5.95 THEN + species = Iris-versicolor + IF sepal_width <= 2.9 THEN + species = Iris-virginica + IF petal_width <= 1.65 AND + IF petal_length > 4.95 AND + IF sepal_length > 6.05 THEN + species = Iris-virginica + IF sepal_length <= 6.05 AND + IF sepal_width > 2.45 THEN + species = Iris-versicolor + IF sepal_width <= 2.45 THEN + species = Iris-virginica + IF petal_length <= 4.95 THEN + species = Iris-versicolor + IF petal_length <= 2.45 THEN + species = Iris-setosa + + +Python, Tableau and Hadoop-ready Generation +------------------------------------------- + +If you prefer, you can also generate a Python function that implements the model +and that can be useful to make the model actionable right away with ``local_model.python()``. + +.. code-block:: python + + local_model.python() + def predict_species(sepal_length=None, + sepal_width=None, + petal_length=None, + petal_width=None): + """ Predictor for species from model/50a8e2d9eabcb404d2000293 + + Predictive model by BigML - Machine Learning Made Easy + """ + if (petal_length is None): + return 'Iris-virginica' + if (petal_length <= 2.45): + return 'Iris-setosa' + if (petal_length > 2.45): + if (petal_width is None): + return 'Iris-virginica' + if (petal_width <= 1.65): + if (petal_length <= 4.95): + return 'Iris-versicolor' + if (petal_length > 4.95): + if (sepal_length is None): + return 'Iris-virginica' + if (sepal_length <= 6.05): + if (petal_width <= 1.55): + return 'Iris-virginica' + if (petal_width > 1.55): + return 'Iris-versicolor' + if (sepal_length > 6.05): + return 'Iris-virginica' + if (petal_width > 1.65): + if (petal_length <= 5.05): + if (sepal_width is None): + return 'Iris-virginica' + if (sepal_width <= 2.9): + return 'Iris-virginica' + if (sepal_width > 2.9): + if (sepal_length is None): + return 'Iris-virginica' + if (sepal_length <= 6.4): + if (sepal_length <= 5.95): + return 'Iris-versicolor' + if (sepal_length > 5.95): + return 'Iris-virginica' + if (sepal_length > 6.4): + return 'Iris-versicolor' + if (petal_length > 5.05): + return 'Iris-virginica' + +The ``local.python(hadoop=True)`` call will generate the code that you need +for the Hadoop map-reduce engine to produce batch predictions using `Hadoop +streaming `_ . +Saving the mapper and reducer generated functions in their corresponding files +(let's say ``/home/hduser/hadoop_mapper.py`` and +``/home/hduser/hadoop_reducer.py``) you can start a Hadoop job +to generate predictions by issuing +the following Hadoop command in your system console: + +.. code-block:: bash + + bin/hadoop jar contrib/streaming/hadoop-*streaming*.jar \ + -file /home/hduser/hadoop_mapper.py -mapper hadoop_mapper.py \ + -file /home/hduser/hadoop_reducer.py -reducer hadoop_reducer.py \ + -input /home/hduser/hadoop/input.csv \ + -output /home/hduser/hadoop/output_dir + +assuming you are in the Hadoop home directory, your input file is in the +corresponding dfs directory +(``/home/hduser/hadoop/input.csv`` in this example) and the output will +be placed at ``/home/hduser/hadoop/output_dir`` (inside the dfs directory). + +Tableau-ready rules are also available through ``local_model.tableau()`` for +all the models except those that use text predictors. + +.. code-block:: python + + local_model.tableau() + IF ISNULL([petal width]) THEN 'Iris-virginica' + ELSEIF [petal width]>0.8 AND [petal width]>1.75 AND ISNULL([petal length]) THEN 'Iris-virginica' + ELSEIF [petal width]>0.8 AND [petal width]>1.75 AND [petal length]>4.85 THEN 'Iris-virginica' + ELSEIF [petal width]>0.8 AND [petal width]>1.75 AND [petal length]<=4.85 AND ISNULL([sepal width]) THEN 'Iris-virginica' + ELSEIF [petal width]>0.8 AND [petal width]>1.75 AND [petal length]<=4.85 AND [sepal width]>3.1 THEN 'Iris-versicolor' + ELSEIF [petal width]>0.8 AND [petal width]>1.75 AND [petal length]<=4.85 AND [sepal width]<=3.1 THEN 'Iris-virginica' + ELSEIF [petal width]>0.8 AND [petal width]<=1.75 AND ISNULL([petal length]) THEN 'Iris-versicolor' + ELSEIF [petal width]>0.8 AND [petal width]<=1.75 AND [petal length]>4.95 AND [petal width]>1.55 AND [petal length]>5.45 THEN 'Iris-virginica' + ELSEIF [petal width]>0.8 AND [petal width]<=1.75 AND [petal length]>4.95 AND [petal width]>1.55 AND [petal length]<=5.45 THEN 'Iris-versicolor' + ELSEIF [petal width]>0.8 AND [petal width]<=1.75 AND [petal length]>4.95 AND [petal width]<=1.55 THEN 'Iris-virginica' + ELSEIF [petal width]>0.8 AND [petal width]<=1.75 AND [petal length]<=4.95 AND [petal width]>1.65 THEN 'Iris-virginica' + ELSEIF [petal width]>0.8 AND [petal width]<=1.75 AND [petal length]<=4.95 AND [petal width]<=1.65 THEN 'Iris-versicolor' + ELSEIF [petal width]<=0.8 THEN 'Iris-setosa' + END + + +Summary generation +------------------ + +You can also print the model from the point of view of the classes it predicts +with ``local_model.summarize()``. +It shows a header section with the training data initial distribution per class +(instances and percentage) and the final predicted distribution per class. + +Then each class distribution is detailed. First a header section +shows the percentage of the total data that belongs to the class (in the +training set and in the predicted results) and the rules applicable to +all the +the instances of that class (if any). Just after that, a detail section shows +each of the leaves in which the class members are distributed. +They are sorted in descending +order by the percentage of predictions of the class that fall into that leaf +and also show the full rule chain that leads to it. + +:: + + Data distribution: + Iris-setosa: 33.33% (50 instances) + Iris-versicolor: 33.33% (50 instances) + Iris-virginica: 33.33% (50 instances) + + + Predicted distribution: + Iris-setosa: 33.33% (50 instances) + Iris-versicolor: 33.33% (50 instances) + Iris-virginica: 33.33% (50 instances) + + + Field importance: + 1. petal length: 53.16% + 2. petal width: 46.33% + 3. sepal length: 0.51% + 4. sepal width: 0.00% + + + Iris-setosa : (data 33.33% / prediction 33.33%) petal length <= 2.45 + · 100.00%: petal length <= 2.45 [Confidence: 92.86%] + + + Iris-versicolor : (data 33.33% / prediction 33.33%) petal length > 2.45 + · 94.00%: petal length > 2.45 and petal width <= 1.65 and petal length <= 4.95 [Confidence: 92.44%] + · 2.00%: petal length > 2.45 and petal width <= 1.65 and petal length > 4.95 and sepal length <= 6.05 and petal width > 1.55 [Confidence: 20.65%] + · 2.00%: petal length > 2.45 and petal width > 1.65 and petal length <= 5.05 and sepal width > 2.9 and sepal length > 6.4 [Confidence: 20.65%] + · 2.00%: petal length > 2.45 and petal width > 1.65 and petal length <= 5.05 and sepal width > 2.9 and sepal length <= 6.4 and sepal length <= 5.95 [Confidence: 20.65%] + + + Iris-virginica : (data 33.33% / prediction 33.33%) petal length > 2.45 + · 76.00%: petal length > 2.45 and petal width > 1.65 and petal length > 5.05 [Confidence: 90.82%] + · 12.00%: petal length > 2.45 and petal width > 1.65 and petal length <= 5.05 and sepal width <= 2.9 [Confidence: 60.97%] + · 6.00%: petal length > 2.45 and petal width <= 1.65 and petal length > 4.95 and sepal length > 6.05 [Confidence: 43.85%] + · 4.00%: petal length > 2.45 and petal width > 1.65 and petal length <= 5.05 and sepal width > 2.9 and sepal length <= 6.4 and sepal length > 5.95 [Confidence: 34.24%] + · 2.00%: petal length > 2.45 and petal width <= 1.65 and petal length > 4.95 and sepal length <= 6.05 and petal width <= 1.55 [Confidence: 20.65%] + + +You can also use ``local_model.get_data_distribution()`` and +``local_model.get_prediction_distribution()`` to obtain the training and +prediction basic distribution +information as a list (suitable to draw histograms or any further processing). +The tree nodes' information (prediction, confidence, impurity and distribution) +can also be retrieved in a CSV format using the method +``local_model.tree_CSV()``. The output can be sent to a file by providing a +``file_name`` argument or used as a list. + +Local ensembles have a ``local_ensemble.summarize()`` method too, the output +in this case shows only the data distribution (only available in +``Decision Forests``) and field importance sections. + +For local clusters, the ``local_cluster.summarize()`` method prints also the +data distribution, the training data statistics per cluster and the basic +intercentroid distance statistics. There's also a +``local_cluster.statistics_CSV(file_name)`` method that store in a CSV format +the values shown by the ``summarize()`` method. If no file name is provided, +the function returns the rows that would have been stored in the file as +a list. diff --git a/docs/ml_resources.rst b/docs/ml_resources.rst new file mode 100644 index 00000000..45ba0020 --- /dev/null +++ b/docs/ml_resources.rst @@ -0,0 +1,3880 @@ +.. toctree:: + :hidden: + +ML Resources +============ + +This section describes the resources available in the BigML API. When retrieved +with the corresponding bindings ``get_[resource_type]`` method, they will +some common attributes, like: + +- ``resource`` which contains their ID +- ``category`` which can be set to the list of categories as defined in the + API documentation. +- ``creator`` which refers to the creator username. + +To name some. + +Beside, every resource type will have different properties as required +by its nature, that can be checked in the +`API documentation +`_. Here's a list of the different +resource types and their associated structures and properties. + +Data Ingestion and Preparation +------------------------------ + +External Connectors +~~~~~~~~~~~~~~~~~~~ + +The ``Externalconnector`` object is is an abstract resource that helps +you create ``Sources`` from several external data sources +like relational databases or ElasticSearch engines. This is not strictly +a Machine Learning resource, but a helper to connect your data repos to BigML. + +.. code-block:: python + + >>> external_connector = api.get_external_connector( \ + "externalconnector/5e30b685e476845dd901df83") + +You can check the external connector properties at the `API documentation +`_. + +Source +~~~~~~ + +The ``Source`` is the first resource that you build in BigML when uploading +a file. BigML infers the structure of the file, whether it has headers or not, +the column separator or the field types and names and stores the results in +the ``Source`` information: + +.. code-block:: python + + >>> source = api.get_source("source/5e30b685e476845dd901df83") + >>> api.pprint(source["object"]) + { 'category': 0, + 'charset': 'UTF-8', + 'code': 200, + 'configuration': None, + 'configuration_status': False, + 'content_type': 'text/plain;UTF-8', + 'created': '2020-01-28T22:32:37.290000', + 'creator': 'mmartin', + 'credits': 0, + 'description': '', + 'disable_datetime': False, + 'field_types': { 'categorical': 0, + 'datetime': 0, + 'items': 0, + 'numeric': 4, + 'text': 1, + 'total': 5}, + 'fields': { '000000': { 'column_number': 0, + 'name': 'sepal length', + 'optype': 'numeric', + 'order': 0}, + '000001': { 'column_number': 1, + 'name': 'sepal width', + 'optype': 'numeric', + 'order': 1}, + '000002': { 'column_number': 2, + 'name': 'petal length', + 'optype': 'numeric', + 'order': 2}, + '000003': { 'column_number': 3, + 'name': 'petal width', + 'optype': 'numeric', + 'order': 3}, + '000004': { 'column_number': 4, + 'name': 'species', + 'optype': 'text', + 'order': 4, + 'term_analysis': { 'enabled': True}}}, + 'fields_meta': { 'count': 5, + 'image': 0, + 'limit': 1000, + 'offset': 0, + 'query_total': 5, + 'total': 5}, + ... + } + +You can check the source properties at the `API documentation +`_. + +Dataset +~~~~~~~ + +If you want to get some basic statistics for each field you can retrieve +the ``fields`` from the dataset as follows to get a dictionary keyed by +field id: + +.. code-block:: python + + >>> dataset = api.get_dataset(dataset) + >>> api.pprint(api.get_fields(dataset)) + { '000000': { 'column_number': 0, + 'datatype': 'double', + 'name': 'sepal length', + 'optype': 'numeric', + 'summary': { 'maximum': 7.9, + 'median': 5.77889, + 'minimum': 4.3, + 'missing_count': 0, + 'population': 150, + 'splits': [ 4.51526, + 4.67252, + 4.81113, + + [... snip ... ] + + + '000004': { 'column_number': 4, + 'datatype': 'string', + 'name': 'species', + 'optype': 'categorical', + 'summary': { 'categories': [ [ 'Iris-versicolor', + 50], + ['Iris-setosa', 50], + [ 'Iris-virginica', + 50]], + 'missing_count': 0}}} + + +The field filtering options are also available using a query string expression, +for instance: + +.. code-block:: python + + >>> dataset = api.get_dataset(dataset, "limit=20") + +limits the number of fields that will be included in ``dataset`` to 20. + +You can check the dataset properties at the `API documentation +`_. + +Samples +~~~~~~~ + +To provide quick access to your row data you can create a ``sample``. Samples +are in-memory objects that can be queried for subsets of data by limiting +their size, the fields or the rows returned. The structure of a sample would +be: + + +.. code-block:: python + + >>> from bigml.api import BigML + >>> api = BigML() + >>> sample = api.create_sample('dataset/55b7a6749841fa2500000d41', + {"max_rows": 150}) + >>> api.ok(sample) + >>> api.pprint(sample['object']) + { + "category": 0, + "code": 201, + "columns": 0, + "configuration": null, + "configuration_status": false, + "created": "2021-03-02T14:32:59.603699", + "creator": "alfred", + "dataset": "dataset/603e20a91f386f43db000004", + "dataset_status": true, + "description": "", + "excluded_fields": [], + "fields_meta": { + "count": 0, + "limit": 1000, + "offset": 0, + "total": 0 + }, + "input_fields": [ + "000000", + "000001", + "000002", + "000003", + "000004" + ], + "locale": "en_US", + "max_columns": 0, + "max_rows": 150, + "name": "iris", + "name_options": "", + "private": true, + "project": null, + "resource": "sample/603e4c9b1f386fdea6000000", + "rows": 0, + "seed": "d1dc0a2819344a079af521507b7e7ea8", + "shared": false, + "size": 4608, + "status": { + "code": 1, + "message": "The sample creation request has been queued and will be processed soon", + "progress": 0 + }, + "subscription": true, + "tags": [], + "type": 0, + "updated": "2021-03-02T14:32:59.603751" + } + + +Samples are not permanent objects. Once they are created, they will be +available as long as GETs are requested within periods smaller than +a pre-established TTL (Time to Live). The expiration timer of a sample is +reset every time a new GET is received. + +If requested, a sample can also perform linear regression and compute +Pearson's and Spearman's correlations for either one numeric field +against all other numeric fields or between two specific numeric fields. + +You can check the sample properties at the `API documentation +`_. + +Correlations +~~~~~~~~~~~~ + +A ``correlation`` resource contains a series of computations that reflect the +degree of dependence between the field set as objective for your predictions +and the rest of fields in your dataset. The dependence degree is obtained by +comparing the distributions in every objective and non-objective field pair, +as independent fields should have probabilistic +independent distributions. Depending on the types of the fields to compare, +the metrics used to compute the correlation degree will be: + +- for numeric to numeric pairs: + `Pearson's `_ + and `Spearman's correlation `_ + coefficients. +- for numeric to categorical pairs: + `One-way Analysis of Variance `_, with the + categorical field as the predictor variable. +- for categorical to categorical pairs: + `contingency table (or two-way table) `_, + `Chi-square test of independence `_ + , and `Cramer's V `_ + and `Tschuprow's T `_ coefficients. + +An example of the correlation resource JSON structure is: + +.. code-block:: python + + >>> from bigml.api import BigML + >>> api = BigML() + >>> correlation = api.create_correlation('dataset/55b7a6749841fa2500000d41') + >>> api.ok(correlation) + >>> api.pprint(correlation['object']) + { 'category': 0, + 'clones': 0, + 'code': 200, + 'columns': 5, + 'correlations': { 'correlations': [ { 'name': 'one_way_anova', + 'result': { '000000': { 'eta_square': 0.61871, + 'f_ratio': 119.2645, + 'p_value': 0, + 'significant': [ True, + True, + True]}, + '000001': { 'eta_square': 0.40078, + 'f_ratio': 49.16004, + 'p_value': 0, + 'significant': [ True, + True, + True]}, + '000002': { 'eta_square': 0.94137, + 'f_ratio': 1180.16118, + 'p_value': 0, + 'significant': [ True, + True, + True]}, + '000003': { 'eta_square': 0.92888, + 'f_ratio': 960.00715, + 'p_value': 0, + 'significant': [ True, + True, + True]}}}], + 'fields': { '000000': { 'column_number': 0, + 'datatype': 'double', + 'idx': 0, + 'name': 'sepal length', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'bins': [ [ 4.3, + 1], + [ 4.425, + 4], + ... + [ 7.9, + 1]], + 'kurtosis': -0.57357, + 'maximum': 7.9, + 'mean': 5.84333, + 'median': 5.8, + 'minimum': 4.3, + 'missing_count': 0, + 'population': 150, + 'skewness': 0.31175, + 'splits': [ 4.51526, + 4.67252, + 4.81113, + 4.89582, + 4.96139, + 5.01131, + ... + 6.92597, + 7.20423, + 7.64746], + 'standard_deviation': 0.82807, + 'sum': 876.5, + 'sum_squares': 5223.85, + 'variance': 0.68569}}, + '000001': { 'column_number': 1, + 'datatype': 'double', + 'idx': 1, + 'name': 'sepal width', + 'optype': 'numeric', + 'order': 1, + 'preferred': True, + 'summary': { 'counts': [ [ 2, + 1], + [ 2.2, + ... + '000004': { 'column_number': 4, + 'datatype': 'string', + 'idx': 4, + 'name': 'species', + 'optype': 'categorical', + 'order': 4, + 'preferred': True, + 'summary': { 'categories': [ [ 'Iris-setosa', + 50], + [ 'Iris-versicolor', + 50], + [ 'Iris-virginica', + 50]], + 'missing_count': 0}, + 'term_analysis': { 'enabled': True}}}, + 'significance_levels': [0.01, 0.05, 0.1]}, + 'created': '2015-07-28T18:07:37.010000', + 'credits': 0.017581939697265625, + 'dataset': 'dataset/55b7a6749841fa2500000d41', + 'dataset_status': True, + 'dataset_type': 0, + 'description': '', + 'excluded_fields': [], + 'fields_meta': { 'count': 5, + 'limit': 1000, + 'offset': 0, + 'query_total': 5, + 'total': 5}, + 'input_fields': ['000000', '000001', '000002', '000003'], + 'locale': 'en_US', + 'max_columns': 5, + 'max_rows': 150, + 'name': u"iris' dataset correlation", + 'objective_field_details': { 'column_number': 4, + 'datatype': 'string', + 'name': 'species', + 'optype': 'categorical', + 'order': 4}, + 'out_of_bag': False, + 'price': 0.0, + 'private': True, + 'project': None, + 'range': [1, 150], + 'replacement': False, + 'resource': 'correlation/55b7c4e99841fa24f20009bf', + 'rows': 150, + 'sample_rate': 1.0, + 'shared': False, + 'size': 4609, + 'source': 'source/55b7a6729841fa24f100036a', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 274, + 'message': 'The correlation has been created', + 'progress': 1.0}, + 'subscription': True, + 'tags': [], + 'updated': '2015-07-28T18:07:49.057000', + 'white_box': False} + +Note that the output in the snippet above has been abbreviated. As you see, the +``correlations`` attribute contains the information about each field +correlation to the objective field. + +You can check the correlations properties at the `API documentation +`_. + + +Statistical Tests +~~~~~~~~~~~~~~~~~ + +A ``statisticaltest`` resource contains a series of tests +that compare the +distribution of data in each numeric field of a dataset +to certain canonical distributions, +such as the +`normal distribution `_ +or `Benford's law `_ +distribution. Statistical test are useful in tasks such as fraud, normality, +or outlier detection. + +- Fraud Detection Tests: +Benford: This statistical test performs a comparison of the distribution of +first significant digits (FSDs) of each value of the field to the Benford's +law distribution. Benford's law applies to numerical distributions spanning +several orders of magnitude, such as the values found on financial balance +sheets. It states that the frequency distribution of leading, or first +significant digits (FSD) in such distributions is not uniform. +On the contrary, lower digits like 1 and 2 occur disproportionately +often as leading significant digits. The test compares the distribution +in the field to Bendford's distribution using a Chi-square goodness-of-fit +test, and Cho-Gaines d test. If a field has a dissimilar distribution, +it may contain anomalous or fraudulent values. + +- Normality tests: +These tests can be used to confirm the assumption that the data in each field +of a dataset is distributed according to a normal distribution. The results +are relevant because many statistical and machine learning techniques rely on +this assumption. +Anderson-Darling: The Anderson-Darling test computes a test statistic based on +the difference between the observed cumulative distribution function (CDF) to +that of a normal distribution. A significant result indicates that the +assumption of normality is rejected. +Jarque-Bera: The Jarque-Bera test computes a test statistic based on the third +and fourth central moments (skewness and kurtosis) of the data. Again, a +significant result indicates that the normality assumption is rejected. +Z-score: For a given sample size, the maximum deviation from the mean that +would expected in a sampling of a normal distribution can be computed based +on the 68-95-99.7 rule. This test simply reports this expected deviation and +the actual deviation observed in the data, as a sort of sanity check. + +- Outlier tests: +Grubbs: When the values of a field are normally distributed, a few values may +still deviate from the mean distribution. The outlier tests reports whether +at least one value in each numeric field differs significantly from the mean +using Grubb's test for outliers. If an outlier is found, then its value will +be returned. + +The JSON structure for ``statisticaltest`` resources is similar to this one: + +.. code-block:: python + + >>> statistical_test = api.create_statistical_test('dataset/55b7a6749841fa2500000d41') + >>> api.ok(statistical_test) + True + >>> api.pprint(statistical_test['object']) + { 'category': 0, + 'clones': 0, + 'code': 200, + 'columns': 5, + 'created': '2015-07-28T18:16:40.582000', + 'credits': 0.017581939697265625, + 'dataset': 'dataset/55b7a6749841fa2500000d41', + 'dataset_status': True, + 'dataset_type': 0, + 'description': '', + 'excluded_fields': [], + 'fields_meta': { 'count': 5, + 'limit': 1000, + 'offset': 0, + 'query_total': 5, + 'total': 5}, + 'input_fields': ['000000', '000001', '000002', '000003'], + 'locale': 'en_US', + 'max_columns': 5, + 'max_rows': 150, + 'name': u"iris' dataset test", + 'out_of_bag': False, + 'price': 0.0, + 'private': True, + 'project': None, + 'range': [1, 150], + 'replacement': False, + 'resource': 'statisticaltest/55b7c7089841fa25000010ad', + 'rows': 150, + 'sample_rate': 1.0, + 'shared': False, + 'size': 4609, + 'source': 'source/55b7a6729841fa24f100036a', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 302, + 'message': 'The test has been created', + 'progress': 1.0}, + 'subscription': True, + 'tags': [], + 'statistical_tests': { 'ad_sample_size': 1024, + 'fields': { '000000': { 'column_number': 0, + 'datatype': 'double', + 'idx': 0, + 'name': 'sepal length', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'bins': [ [ 4.3, + 1], + [ 4.425, + 4], + ... + [ 7.9, + 1]], + 'kurtosis': -0.57357, + 'maximum': 7.9, + 'mean': 5.84333, + 'median': 5.8, + 'minimum': 4.3, + 'missing_count': 0, + 'population': 150, + 'skewness': 0.31175, + 'splits': [ 4.51526, + 4.67252, + 4.81113, + 4.89582, + ... + 7.20423, + 7.64746], + 'standard_deviation': 0.82807, + 'sum': 876.5, + 'sum_squares': 5223.85, + 'variance': 0.68569}}, + ... + '000004': { 'column_number': 4, + 'datatype': 'string', + 'idx': 4, + 'name': 'species', + 'optype': 'categorical', + 'order': 4, + 'preferred': True, + 'summary': { 'categories': [ [ 'Iris-setosa', + 50], + [ 'Iris-versicolor', + 50], + [ 'Iris-virginica', + 50]], + 'missing_count': 0}, + 'term_analysis': { 'enabled': True}}}, + 'fraud': [ { 'name': 'benford', + 'result': { '000000': { 'chi_square': { 'chi_square_value': 506.39302, + 'p_value': 0, + 'significant': [ True, + True, + True]}, + 'cho_gaines': { 'd_statistic': 7.124311073683573, + 'significant': [ True, + True, + True]}, + 'distribution': [ 0, + 0, + 0, + 22, + 61, + 54, + 13, + 0, + 0], + 'negatives': 0, + 'zeros': 0}, + '000001': { 'chi_square': { 'chi_square_value': 396.76556, + 'p_value': 0, + 'significant': [ True, + True, + True]}, + 'cho_gaines': { 'd_statistic': 7.503503138331123, + 'significant': [ True, + True, + True]}, + 'distribution': [ 0, + 57, + 89, + 4, + 0, + 0, + 0, + 0, + 0], + 'negatives': 0, + 'zeros': 0}, + '000002': { 'chi_square': { 'chi_square_value': 154.20728, + 'p_value': 0, + 'significant': [ True, + True, + True]}, + 'cho_gaines': { 'd_statistic': 3.9229974017266054, + 'significant': [ True, + True, + True]}, + 'distribution': [ 50, + 0, + 11, + 43, + 35, + 11, + 0, + 0, + 0], + 'negatives': 0, + 'zeros': 0}, + '000003': { 'chi_square': { 'chi_square_value': 111.4438, + 'p_value': 0, + 'significant': [ True, + True, + True]}, + 'cho_gaines': { 'd_statistic': 4.103257341299901, + 'significant': [ True, + True, + True]}, + 'distribution': [ 76, + 58, + 7, + 7, + 1, + 1, + 0, + 0, + 0], + 'negatives': 0, + 'zeros': 0}}}], + 'normality': [ { 'name': 'anderson_darling', + 'result': { '000000': { 'p_value': 0.02252, + 'significant': [ False, + True, + True]}, + '000001': { 'p_value': 0.02023, + 'significant': [ False, + True, + True]}, + '000002': { 'p_value': 0, + 'significant': [ True, + True, + True]}, + '000003': { 'p_value': 0, + 'significant': [ True, + True, + True]}}}, + { 'name': 'jarque_bera', + 'result': { '000000': { 'p_value': 0.10615, + 'significant': [ False, + False, + False]}, + '000001': { 'p_value': 0.25957, + 'significant': [ False, + False, + False]}, + '000002': { 'p_value': 0.0009, + 'significant': [ True, + True, + True]}, + '000003': { 'p_value': 0.00332, + 'significant': [ True, + True, + True]}}}, + { 'name': 'z_score', + 'result': { '000000': { 'expected_max_z': 2.71305, + 'max_z': 2.48369}, + '000001': { 'expected_max_z': 2.71305, + 'max_z': 3.08044}, + '000002': { 'expected_max_z': 2.71305, + 'max_z': 1.77987}, + '000003': { 'expected_max_z': 2.71305, + 'max_z': 1.70638}}}], + 'outliers': [ { 'name': 'grubbs', + 'result': { '000000': { 'p_value': 1, + 'significant': [ False, + False, + False]}, + '000001': { 'p_value': 0.26555, + 'significant': [ False, + False, + False]}, + '000002': { 'p_value': 1, + 'significant': [ False, + False, + False]}, + '000003': { 'p_value': 1, + 'significant': [ False, + False, + False]}}}], + 'significance_levels': [0.01, 0.05, 0.1]}, + 'updated': '2015-07-28T18:17:11.829000', + 'white_box': False} + +Note that the output in the snippet above has been abbreviated. As you see, the +``statistical_tests`` attribute contains the ``fraud`, ``normality`` +and ``outliers`` +sections where the information for each field's distribution is stored. + +You can check the statistical tests properties at the `API documentation +`_. + + +Supervised Models +----------------- + +Model +~~~~~ + +One of the greatest things about BigML is that the models that it +generates for you are fully white-boxed. To get the explicit tree-like +predictive model for the example above: + +.. code-block:: python + + >>> model = api.get_model(model) + >>> api.pprint(model['object']['model']['root']) + {'children': [ + {'children': [ + {'children': [{'count': 38, + 'distribution': [['Iris-virginica', 38]], + 'output': 'Iris-virginica', + 'predicate': {'field': '000002', + 'operator': '>', + 'value': 5.05}}, + 'children': [ + + [ ... ] + + {'count': 50, + 'distribution': [['Iris-setosa', 50]], + 'output': 'Iris-setosa', + 'predicate': {'field': '000002', + 'operator': '<=', + 'value': 2.45}}]}, + {'count': 150, + 'distribution': [['Iris-virginica', 50], + ['Iris-versicolor', 50], + ['Iris-setosa', 50]], + 'output': 'Iris-virginica', + 'predicate': True}]}}} + +(Note that we have abbreviated the output in the snippet above for +readability: the full predictive model yo'll get is going to contain +much more details). + +Again, filtering options are also available using a query string expression, +for instance: + +.. code-block:: python + + >>> model = api.get_model(model, "limit=5") + +limits the number of fields that will be included in ``model`` to 5. + +You can check the model properties at the `API documentation +`_. + + +Linear Regressions +~~~~~~~~~~~~~~~~~~ + +A linear regression is a supervised machine learning method for +solving regression problems by computing the objective as a linear +combination of factors. The implementation is a multiple linear regression +that models the output as a linear combination of the predictors. +The coefficients are estimated doing a least-squares fit on the training data. + +As a linear combination can only be done using numeric values, non-numeric +fields need to be transformed to numeric ones following some rules: + +- Categorical fields will be encoded and each class appearance in input data + will convey a different contribution to the input vector. +- Text and items fields will be expanded to several numeric predictors, + each one indicating the number of occurences for a specific term. + Text fields without term analysis are excluded from the model. + +Therefore, the initial input data is transformed into an input vector with one +or may components per field. Also, if a field in the training data contains +missing data, the components corresponding to that field will include an +additional 1 or 0 value depending on whether the field is missing in the +input data or not. + +The JSON structure for a linear regression is: + +.. code-block:: python + + >>> api.pprint(linear_regression["object"]) + { 'category': 0, + 'code': 200, + 'columns': 4, + 'composites': None, + 'configuration': None, + 'configuration_status': False, + 'created': '2019-02-20T21:02:40.027000', + 'creator': 'merce', + 'credits': 0.0, + 'credits_per_prediction': 0.0, + 'dataset': 'dataset/5c6dc06a983efc18e2000084', + 'dataset_field_types': { 'categorical': 0, + 'datetime': 0, + 'items': 0, + 'numeric': 6, + 'preferred': 6, + 'text': 0, + 'total': 6}, + 'dataset_status': True, + 'datasets': [], + 'default_numeric_value': None, + 'description': '', + 'excluded_fields': [], + 'execution_id': None, + 'execution_status': None, + 'fields_maps': None, + 'fields_meta': { 'count': 4, + 'limit': 1000, + 'offset': 0, + 'query_total': 4, + 'total': 4}, + 'fusions': None, + 'input_fields': ['000000', '000001', '000002'], + 'linear_regression': { 'bias': True, + 'coefficients': [ [-1.88196], + [0.475633], + [0.122468], + [30.9141]], + 'fields': { '000000': { 'column_number': 0, + 'datatype': 'int8', + 'name': 'Prefix', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'counts': [ [ 4, + 1], + + ... + 'stats': { 'confidence_intervals': [ [ 5.63628], + [ 0.375062], + [ 0.348577], + [ 44.4112]], + 'mean_squared_error': 342.206, + 'number_of_parameters': 4, + 'number_of_samples': 77, + 'p_values': [ [0.512831], + [0.0129362], + [0.491069], + [0.172471]], + 'r_squared': 0.136672, + 'standard_errors': [ [ 2.87571], + [ 0.191361], + [ 0.177849], + [ 22.6592]], + 'sum_squared_errors': 24981, + 'xtx': [ [ 4242, + 48396.9, + 51273.97, + 568], + [ 48396.9, + 570177.6584, + 594274.3274, + 6550.52], + [ 51273.97, + 594274.3274, + 635452.7068, + 6894.24], + [ 568, + 6550.52, + 6894.24, + 77]], + 'z_scores': [ [-0.654436], + [2.48552], + [0.688609], + [1.36431]]}}, + 'locale': 'en_US', + 'max_columns': 6, + 'max_rows': 80, + 'name': 'grades', + 'name_options': 'bias', + 'number_of_batchpredictions': 0, + 'number_of_evaluations': 0, + 'number_of_predictions': 2, + 'number_of_public_predictions': 0, + 'objective_field': '000005', + 'objective_field_name': 'Final', + 'objective_field_type': 'numeric', + 'objective_fields': ['000005'], + 'operating_point': { }, + 'optiml': None, + 'optiml_status': False, + 'ordering': 0, + 'out_of_bag': False, + 'out_of_bags': None, + 'price': 0.0, + 'private': True, + 'project': 'project/5c6dc062983efc18d5000129', + 'range': None, + 'ranges': None, + 'replacement': False, + 'replacements': None, + 'resource': 'linearregression/5c6dc070983efc18e00001f1', + 'rows': 80, + 'sample_rate': 1.0, + 'sample_rates': None, + 'seed': None, + 'seeds': None, + 'shared': False, + 'size': 2691, + 'source': 'source/5c6dc064983efc18e00001ed', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 62086, + 'message': 'The linear regression has been created', + 'progress': 1}, + 'subscription': True, + 'tags': [], + 'type': 0, + 'updated': '2019-02-27T18:01:18.539000', + 'user_metadata': { }, + 'webhook': None, + 'weight_field': None, + 'white_box': False} + +Note that the output in the snippet above has been abbreviated. As you see, +the ``linear_regression`` attribute stores the coefficients used in the +linear function as well as the configuration parameters described in +the `developers section `_ . + + +Logistic Regressions +~~~~~~~~~~~~~~~~~~~~ + +A logistic regression is a supervised machine learning method for +solving classification problems. Each of the classes in the field +you want to predict, the objective field, is assigned a probability depending +on the values of the input fields. The probability is computed +as the value of a logistic function, +whose argument is a linear combination of the predictors' values. +You can create a logistic regression selecting which fields from your +dataset you want to use as input fields (or predictors) and which +categorical field you want to predict, the objective field. Then the +created logistic regression is defined by the set of coefficients in the +linear combination of the values. Categorical +and text fields need some prior work to be modelled using this method. They +are expanded as a set of new fields, one per category or term (respectively) +where the number of occurrences of the category or term is store. Thus, +the linear combination is made on the frequency of the categories or terms. + +The JSON structure for a logistic regression is: + +.. code-block:: python + + >>> api.pprint(logistic_regression['object']) + { 'balance_objective': False, + 'category': 0, + 'code': 200, + 'columns': 5, + 'created': '2015-10-09T16:11:08.444000', + 'credits': 0.017581939697265625, + 'credits_per_prediction': 0.0, + 'dataset': 'dataset/561304f537203f4c930001ca', + 'dataset_field_types': { 'categorical': 1, + 'datetime': 0, + 'effective_fields': 5, + 'numeric': 4, + 'preferred': 5, + 'text': 0, + 'total': 5}, + 'dataset_status': True, + 'description': '', + 'excluded_fields': [], + 'fields_meta': { 'count': 5, + 'limit': 1000, + 'offset': 0, + 'query_total': 5, + 'total': 5}, + 'input_fields': ['000000', '000001', '000002', '000003'], + 'locale': 'en_US', + 'logistic_regression': { 'bias': 1, + 'c': 1, + 'coefficients': [ [ 'Iris-virginica', + [ -1.7074433493289376, + -1.533662474502423, + 2.47026986670851, + 2.5567582221085563, + -1.2158200612711925]], + [ 'Iris-setosa', + [ 0.41021712519841674, + 1.464162165246765, + -2.26003266131107, + -1.0210350909174153, + 0.26421852991732514]], + [ 'Iris-versicolor', + [ 0.42702327817072505, + -1.611817241669904, + 0.5763832839459982, + -1.4069842681625884, + 1.0946877732663143]]], + 'eps': 1e-05, + 'fields': { '000000': { 'column_number': 0, + 'datatype': 'double', + 'name': 'sepal length', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'bins': [ [ 4.3, + 1], + [ 4.425, + 4], + [ 4.6, + 4], + ... + [ 7.9, + 1]], + 'kurtosis': -0.57357, + 'maximum': 7.9, + 'mean': 5.84333, + 'median': 5.8, + 'minimum': 4.3, + 'missing_count': 0, + 'population': 150, + 'skewness': 0.31175, + 'splits': [ 4.51526, + 4.67252, + 4.81113, + ... + 6.92597, + 7.20423, + 7.64746], + 'standard_deviation': 0.82807, + 'sum': 876.5, + 'sum_squares': 5223.85, + 'variance': 0.68569}}, + '000001': { 'column_number': 1, + 'datatype': 'double', + 'name': 'sepal width', + 'optype': 'numeric', + 'order': 1, + 'preferred': True, + 'summary': { 'counts': [ [ 2, + 1], + [ 2.2, + 3], + ... + [ 4.2, + 1], + [ 4.4, + 1]], + 'kurtosis': 0.18098, + 'maximum': 4.4, + 'mean': 3.05733, + 'median': 3, + 'minimum': 2, + 'missing_count': 0, + 'population': 150, + 'skewness': 0.31577, + 'standard_deviation': 0.43587, + 'sum': 458.6, + 'sum_squares': 1430.4, + 'variance': 0.18998}}, + '000002': { 'column_number': 2, + 'datatype': 'double', + 'name': 'petal length', + 'optype': 'numeric', + 'order': 2, + 'preferred': True, + 'summary': { 'bins': [ [ 1, + 1], + [ 1.16667, + 3], + ... + [ 6.6, + 1], + [ 6.7, + 2], + [ 6.9, + 1]], + 'kurtosis': -1.39554, + 'maximum': 6.9, + 'mean': 3.758, + 'median': 4.35, + 'minimum': 1, + 'missing_count': 0, + 'population': 150, + 'skewness': -0.27213, + 'splits': [ 1.25138, + 1.32426, + 1.37171, + ... + 6.02913, + 6.38125], + 'standard_deviation': 1.7653, + 'sum': 563.7, + 'sum_squares': 2582.71, + 'variance': 3.11628}}, + '000003': { 'column_number': 3, + 'datatype': 'double', + 'name': 'petal width', + 'optype': 'numeric', + 'order': 3, + 'preferred': True, + 'summary': { 'counts': [ [ 0.1, + 5], + [ 0.2, + 29], + ... + [ 2.4, + 3], + [ 2.5, + 3]], + 'kurtosis': -1.33607, + 'maximum': 2.5, + 'mean': 1.19933, + 'median': 1.3, + 'minimum': 0.1, + 'missing_count': 0, + 'population': 150, + 'skewness': -0.10193, + 'standard_deviation': 0.76224, + 'sum': 179.9, + 'sum_squares': 302.33, + 'variance': 0.58101}}, + '000004': { 'column_number': 4, + 'datatype': 'string', + 'name': 'species', + 'optype': 'categorical', + 'order': 4, + 'preferred': True, + 'summary': { 'categories': [ [ 'Iris-setosa', + 50], + [ 'Iris-versicolor', + 50], + [ 'Iris-virginica', + 50]], + 'missing_count': 0}, + 'term_analysis': { 'enabled': True}}}, + 'normalize': False, + 'regularization': 'l2'}, + 'max_columns': 5, + 'max_rows': 150, + 'name': u"iris' dataset's logistic regression", + 'number_of_batchpredictions': 0, + 'number_of_evaluations': 0, + 'number_of_predictions': 1, + 'objective_field': '000004', + 'objective_field_name': 'species', + 'objective_field_type': 'categorical', + 'objective_fields': ['000004'], + 'out_of_bag': False, + 'private': True, + 'project': 'project/561304c137203f4c9300016c', + 'range': [1, 150], + 'replacement': False, + 'resource': 'logisticregression/5617e71c37203f506a000001', + 'rows': 150, + 'sample_rate': 1.0, + 'shared': False, + 'size': 4609, + 'source': 'source/561304f437203f4c930001c3', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 86, + 'message': 'The logistic regression has been created', + 'progress': 1.0}, + 'subscription': False, + 'tags': ['species'], + 'updated': '2015-10-09T16:14:02.336000', + 'white_box': False} + +Note that the output in the snippet above has been abbreviated. As you see, +the ``logistic_regression`` attribute stores the coefficients used in the +logistic function as well as the configuration parameters described in +the `developers section +`_ . + +Ensembles +~~~~~~~~~ + +Ensembles are superveised machine learning models that contain several decision +tree models. In BigML, we offer different flavors or ensembles: bagging, +boosted and random decision forests. + +The structure of an ensemble can be obtained as follows: + +.. code-block:: python + + >>> ensemble = api.get_ensemble("ensemble/5d5aea06e476842219000add") + >>> api.pprint(ensemble["object"]) + { 'boosting': None, + 'category': 0, + 'code': 200, + 'columns': 5, + 'configuration': None, + 'configuration_status': False, + 'created': '2019-08-19T18:27:18.529000', + 'creator': 'mmartin', + 'dataset': 'dataset/5d5ae9f97811dd0195009c17', + 'dataset_field_types': { 'categorical': 1, + 'datetime': 0, + 'items': 0, + 'numeric': 4, + 'preferred': 5, + 'text': 0, + 'total': 5}, + 'dataset_status': False, + 'depth_threshold': 512, + 'description': '', + 'distributions': [ { 'importance': [ ['000002', 0.72548], + ['000003', 0.24971], + ['000001', 0.02481]], + 'predictions': { 'categories': [ [ 'Iris-setosa', + 52], + [ 'Iris-versicolor', + 49], + [ 'Iris-virginica', + 49]]}, + 'training': { 'categories': [ [ 'Iris-setosa', + 52], + [ 'Iris-versicolor', + 49], + [ 'Iris-virginica', + 49]]}}, + { 'importance': [ ['000002', 0.7129], + ['000003', 0.2635], + ['000000', 0.01485], + ['000001', 0.00875]], + 'predictions': { 'categories': [ [ 'Iris-setosa', + 52], + [ 'Iris-versicolor', + 46], + [ 'Iris-virginica', + 52]]}, + 'training': { 'categories': [ [ 'Iris-setosa', + 52], + [ 'Iris-versicolor', + 46], + [ 'Iris-virginica', + 52]]}}], + 'ensemble': { 'fields': { '000000': { 'column_number': 0, + 'datatype': 'double', + 'name': 'sepal length', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': + ... + 'missing_count': 0}, + 'term_analysis': { 'enabled': True}}}}, + 'ensemble_sample': { 'rate': 1, + 'replacement': True, + 'seed': '820c4aa0a34a4fb69392476c6ffc38dc'}, + 'error_models': 0, + 'fields_meta': { 'count': 5, + 'limit': 1000, + 'offset': 0, + 'query_total': 5, + 'total': 5}, + 'finished_models': 2, + 'focus_field': None, + 'focus_field_name': None, + 'fusions': ['fusion/6488ab197411b45de19f1e19'], + 'importance': { '000000': 0.00743, + '000001': 0.01678, + '000002': 0.71919, + '000003': 0.2566}, + 'input_fields': ['000000', '000001', '000002', '000003'], + 'locale': 'en_US', + 'max_columns': 5, + 'max_rows': 150, + 'missing_splits': False, + 'models': [ 'model/5d5aea073514cd6bf200a630', + 'model/5d5aea083514cd6bf200a632'], + 'name': 'iris', + 'name_options': 'bootstrap decision forest, 512-node, 2-model, pruned, ' + 'deterministic order', + 'node_threshold': 512, + 'number_of_batchpredictions': 0, + 'number_of_evaluations': 0, + 'number_of_models': 2, + 'number_of_predictions': 0, + 'number_of_public_predictions': 0, + 'objective_field': '000004', + 'objective_field_details': { 'column_number': 4, + 'datatype': 'string', + 'name': 'species', + 'optype': 'categorical', + 'order': 4}, + 'objective_field_name': 'species', + 'objective_field_type': 'categorical', + 'objective_fields': ['000004'], + 'optiml': None, + 'optiml_status': False, + 'ordering': 0, + 'out_of_bag': False, + 'price': 0.0, + 'private': True, + 'project': None, + 'randomize': False, + 'range': None, + 'replacement': False, + 'resource': 'ensemble/5d5aea06e476842219000add', + 'rows': 150, + 'sample_rate': 1.0, + 'selective_pruning': True, + 'shared': True, + 'shared_clonable': True, + 'shared_hash': 'qfCR2ezORt5u8GNyGaTtJqwJemh', + 'sharing_key': '125380a1560a8efdc0e3eedee7bd2ccce1c4936c', + 'size': 4608, + 'source': 'source/5d5ae9f7e47684769e001337', + 'source_status': False, + 'split_candidates': 32, + 'split_field': None, + 'split_field_name': None, + 'stat_pruning': True, + 'status': { 'code': 5, + 'elapsed': 804, + 'message': 'The ensemble has been created', + 'progress': 1}, + 'subscription': False, + 'support_threshold': 0.0, + 'tags': [], + 'type': 0, + 'updated': '2023-06-13T17:44:57.780000', + 'white_box': False} + +Note that the output in the snippet above has been abbreviated. As you see, +the ``number_of_models`` attribute stores number of decision trees used in the +ensemble and the rest of the dictionary contains the configuration parameters described in the `developers section +`_ . + +Deepnets +~~~~~~~~ + +Ensembles are superveised machine learning models that contain several decision +tree models. In BigML, we offer different flavors or ensembles: bagging, +boosted and random decision forests. + +The structure of an ensemble can be obtained as follows: + +.. code-block:: python + + >>> deepnet = api.get_deepnet("deepnet/64f2193379c602359ec90197") + >>> api.pprint(deepnet["object"]) + { 'category': 0, + 'code': 200, + 'columns': 11, + 'configuration': None, + 'configuration_status': False, + 'created': '2023-09-01T17:02:43.222000', + 'creator': 'mmartin', + 'dataset': 'dataset/64f2192251595a5d90394c1e', + 'dataset_field_types': { 'categorical': 1, + 'datetime': 1, + 'image': 0, + 'items': 0, + 'numeric': 9, + 'path': 0, + 'preferred': 10, + 'regions': 0, + 'text': 0, + 'total': 11}, + 'dataset_status': True, + 'deepnet': { 'batch_normalization': False, + 'deepnet_seed': 'bigml', + 'deepnet_version': 'alpha', + 'dropout_rate': 0.0, + 'fields': { '000000': { 'column_number': 0, + 'datatype': 'string', + 'name': 'cat-0', + 'optype': 'categorical', + 'order': 0, + 'preferred': True, + 'summary': { + ... + 1954.26254, + 'variance': 0.9737}}}, + 'hidden_layers': [ { 'activation_function': 'tanh', + 'number_of_nodes': 64, + 'offset': 'zeros', + 'seed': 0, + 'type': 'dense', + 'weights': 'glorot_uniform'}], + 'holdout_metrics': { 'mean_absolute_error': 0.8178046941757202, + 'mean_squared_error': 1.0125617980957031, + 'median_absolute_error': 0.6850314736366272, + 'r_squared': -0.009405492794412496, + 'spearman_r': 0.07955370033562714}, + 'learn_residuals': False, + 'learning_rate': 0.01, + 'max_iterations': 100, + 'missing_numerics': True, + 'network': { 'image_network': None, + 'layers': [ { 'activation_function': 'tanh', + 'mean': None, + 'number_of_nodes': 64, + 'offset': [ -0.01426, + 0.06489, + 0.00609, + ... + -0.06769, + 0.2289, + 0.03777]]}], + 'output_exposition': { 'mean': -0.06256, + 'stdev': 0.98676, + 'type': 'numeric'}, + 'preprocess': [ { 'index': 0, + 'type': 'categorical', + 'values': [ 'cat0', + 'cat1', + 'cat2']}, + { 'index': 1, + 'mean': 1974.3085, + 'stdev': 43.39534, + 'type': 'numeric'}, + { 'index': 2, + 'mean': 6.459, + 'stdev': 3.4764, + 'type': 'numeric'}, + { 'index': 3, + 'mean': 15.537, + 'stdev': 8.7924, + 'type': 'numeric'}, + { 'index': 4, + 'mean': 4.0015, + 'stdev': 2.02893, + 'type': 'numeric'}, + { 'index': 5, + 'mean': 11.8105, + 'stdev': 6.84646, + 'type': 'numeric'}, + { 'index': 6, + 'mean': 29.3555, + 'stdev': 17.3928, + 'type': 'numeric'}, + { 'index': 7, + 'mean': 29.715, + 'stdev': 17.14149, + 'type': 'numeric'}, + { 'index': 8, + 'mean': 501.6185, + 'stdev': 292.27451, + 'type': 'numeric'}], + 'trees': None}, + 'network_structure': { 'image_network': None, + 'layers': [ { 'activation_function': 'tanh', + 'mean': None, + 'number_of_nodes': 64, + 'offset': 'zeros', + 'residuals': False, + 'scale': None, + 'stdev': None, + 'weights': 'glorot_uniform'}, + { 'activation_function': 'linear', + 'mean': None, + 'number_of_nodes': 1, + 'offset': 'zeros', + 'residuals': False, + 'scale': None, + 'stdev': None, + 'weights': 'glorot_uniform'}], + 'output_exposition': { 'mean': -0.06256, + 'stdev': 0.98676, + 'type': 'numeric'}, + 'preprocess': [ { 'index': 0, + 'type': 'categorical', + 'values': [ 'cat0', + 'cat1', + 'cat2']}, + { 'index': 1, + 'mean': 1974.3085, + 'stdev': 43.39534, + 'type': 'numeric'}, + { 'index': 2, + 'mean': 6.459, + 'stdev': 3.4764, + 'type': 'numeric'}, + { 'index': 3, + 'mean': 15.537, + 'stdev': 8.7924, + 'type': 'numeric'}, + { 'index': 4, + 'mean': 4.0015, + 'stdev': 2.02893, + 'type': 'numeric'}, + { 'index': 5, + 'mean': 11.8105, + 'stdev': 6.84646, + 'type': 'numeric'}, + { 'index': 6, + 'mean': 29.3555, + 'stdev': 17.3928, + 'type': 'numeric'}, + { 'index': 7, + 'mean': 29.715, + 'stdev': 17.14149, + 'type': 'numeric'}, + { 'index': 8, + 'mean': 501.6185, + 'stdev': 292.27451, + 'type': 'numeric'}], + 'trees': None}, + 'number_of_hidden_layers': 1, + 'number_of_iterations': 100, + 'optimizer': { 'adam': { 'beta1': 0.9, + 'beta2': 0.999, + 'epsilon': 1e-08}}, + 'search': False, + 'suggest_structure': False, + 'tree_embedding': False}, + 'description': '', + 'excluded_fields': [], + 'fields_meta': { 'count': 11, + 'limit': 1000, + 'offset': 0, + 'query_total': 11, + 'total': 11}, + 'importance': { '000000': 0.12331, + '000001-0': 0.25597, + '000001-1': 0.07716, + '000001-2': 0.15659, + '000001-3': 0.11564, + '000001-4': 0.0644, + '000001-5': 0.09814, + '000001-6': 0.0555, + '000001-7': 0.05329}, + 'input_fields': [ '000000', + '000001-0', + '000001-1', + '000001-2', + '000001-3', + '000001-4', + '000001-5', + '000001-6', + '000001-7'], + 'locale': 'en_US', + 'max_columns': 11, + 'max_rows': 2000, + 'name': 'dates2', + 'name_options': '1 hidden layers, adam, learning rate=0.01, 100-iteration, ' + 'beta1=0.9, beta2=0.999, epsilon=1e-08, missing values', + 'number_of_batchpredictions': 0, + 'number_of_evaluations': 0, + 'number_of_predictions': 0, + 'number_of_public_predictions': 0, + 'objective_field': '000002', + 'objective_field_name': 'target-2', + 'objective_field_type': 'numeric', + 'objective_fields': ['000002'], + 'optiml': None, + 'optiml_status': False, + 'ordering': 0, + 'out_of_bag': False, + 'price': 0.0, + 'private': True, + 'project': 'project/64f2191c4a1a2c29a1084943', + 'range': None, + 'regression_weight_ratio': None, + 'replacement': False, + 'resource': 'deepnet/64f2193379c602359ec90197', + 'rows': 2000, + 'sample_rate': 1.0, + 'shared': False, + 'size': 96976, + 'source': 'source/64f2191f51595a5d8cbf7883', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 10013, + 'message': 'The deepnet has been created', + 'progress': 1.0}, + 'subscription': False, + 'tags': [], + 'type': 0, + 'updated': '2023-09-01T17:11:28.762000', + 'white_box': False} + + +Note that the output in the snippet above has been abbreviated. As you see, +the ``network`` attribute stores the coefficients used in the +neural network structure and the rest of the dictionary shows the +configuration parameters described in the `developers section +`_ . + +OptiMLs +~~~~~~~ + +An OptiML is the result of an automated optimization process to find the +best model (type and configuration) to solve a particular +classification or regression problem. + +The selection process automates the usual time-consuming task of trying +different models and parameters and evaluating their results to find the +best one. Using the OptiML, non-experts can build top-performing models. + +You can create an OptiML selecting the ojective field to be predicted, the +evaluation metric to be used to rank the models tested in the process and +a maximum time for the task to be run. + +The JSON structure for an OptiML is: + +.. code-block:: python + + >>> api.pprint(optiml["object"]) + { 'category': 0, + 'code': 200, + 'configuration': None, + 'configuration_status': False, + 'created': '2018-05-17T20:23:00.060000', + 'creator': 'mmartin', + 'dataset': 'dataset/5afdb7009252732d930009e8', + 'dataset_status': True, + 'datasets': [ 'dataset/5afde6488bf7d551ee00081c', + 'dataset/5afde6488bf7d551fd00511f', + 'dataset/5afde6488bf7d551fe002e0f', + ... + 'dataset/5afde64d8bf7d551fd00512e'], + 'description': '', + 'evaluations': [ 'evaluation/5afde65c8bf7d551fd00514c', + 'evaluation/5afde65c8bf7d551fd00514f', + ... + 'evaluation/5afde6628bf7d551fd005161'], + 'excluded_fields': [], + 'fields_meta': { 'count': 5, + 'limit': 1000, + 'offset': 0, + 'query_total': 5, + 'total': 5}, + 'input_fields': ['000000', '000001', '000002', '000003'], + 'model_count': { 'logisticregression': 1, 'model': 8, 'total': 9}, + 'models': [ 'model/5afde64e8bf7d551fd005131', + 'model/5afde64f8bf7d551fd005134', + 'model/5afde6518bf7d551fd005137', + 'model/5afde6538bf7d551fd00513a', + 'logisticregression/5afde6558bf7d551fd00513d', + ... + 'model/5afde65a8bf7d551fd005149'], + 'models_meta': { 'count': 9, 'limit': 1000, 'offset': 0, 'total': 9}, + 'name': 'iris', + 'name_options': '9 total models (logisticregression: 1, model: 8), metric=max_phi, model candidates=18, max. training time=300', + 'objective_field': '000004', + 'objective_field_details': { 'column_number': 4, + 'datatype': 'string', + 'name': 'species', + 'optype': 'categorical', + 'order': 4}, + 'objective_field_name': 'species', + 'objective_field_type': 'categorical', + 'objective_fields': ['000004'], + 'optiml': { 'created_resources': { 'dataset': 10, + 'logisticregression': 11, + 'logisticregression_evaluation': 11, + 'model': 29, + 'model_evaluation': 29}, + 'datasets': [ { 'id': 'dataset/5afde6488bf7d551ee00081c', + 'name': 'iris', + 'name_options': '120 instances, 5 fields (1 categorical, 4 numeric), sample rate=0.8'}, + { 'id': 'dataset/5afde6488bf7d551fd00511f', + 'name': 'iris', + 'name_options': '30 instances, 5 fields (1 categorical, 4 numeric), sample rate=0.2, out of bag'}, + { 'id': 'dataset/5afde6488bf7d551fe002e0f', + 'name': 'iris', + 'name_options': '120 instances, 5 fields (1 categorical, 4 numeric), sample rate=0.8'}, + ... + { 'id': 'dataset/5afde64d8bf7d551fd00512e', + 'name': 'iris', + 'name_options': '120 instances, 5 fields (1 categorical, 4 numeric), sample rate=0.8'}], + 'fields': { '000000': { 'column_number': 0, + 'datatype': 'double', + 'name': 'sepal length', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'bins': [ [ 4.3, + 1], + ... + [ 7.9, + 1]], + ... + 'sum': 179.9, + 'sum_squares': 302.33, + 'variance': 0.58101}}, + '000004': { 'column_number': 4, + 'datatype': 'string', + 'name': 'species', + 'optype': 'categorical', + 'order': 4, + 'preferred': True, + 'summary': { 'categories': [ [ 'Iris-setosa', + 50], + [ 'Iris-versicolor', + 50], + [ 'Iris-virginica', + 50]], + 'missing_count': 0}, + 'term_analysis': { 'enabled': True}}}, + 'max_training_time': 300, + 'metric': 'max_phi', + 'model_types': ['model', 'logisticregression'], + 'models': [ { 'evaluation': { 'id': 'evaluation/5afde65c8bf7d551fd00514c', + 'info': { 'accuracy': 0.96667, + 'average_area_under_pr_curve': 0.97867, + ... + 'per_class_statistics': [ { 'accuracy': 1, + 'area_under_pr_curve': 1, + ... + 'spearmans_rho': 0.82005}]}, + 'metric_value': 0.95356, + 'metric_variance': 0.00079, + 'name': 'iris vs. iris', + 'name_options': '279-node, deterministic order, operating kind=probability'}, + 'evaluation_count': 3, + 'id': 'model/5afde64e8bf7d551fd005131', + 'importance': [ [ '000002', + 0.70997], + [ '000003', + 0.27289], + [ '000000', + 0.0106], + [ '000001', + 0.00654]], + 'kind': 'model', + 'name': 'iris', + 'name_options': '279-node, deterministic order'}, + { 'evaluation': { 'id': 'evaluation/5afde65c8bf7d551fd00514f', + 'info': { 'accuracy': 0.93333, + + ... + [ '000001', + 0.02133]], + 'kind': 'model', + 'name': 'iris', + 'name_options': '12-node, randomize, deterministic order, balanced'}], + 'number_of_model_candidates': 18, + 'recent_evaluations': [ 0.90764, + 0.94952, + ... + 0.90427], + 'search_complete': True, + 'summary': { 'logisticregression': { 'best': 'logisticregression/5afde6558bf7d551fd00513d', + 'count': 1}, + 'model': { 'best': 'model/5afde64e8bf7d551fd005131', + 'count': 8}}}, + 'private': True, + 'project': None, + 'resource': 'optiml/5afde4a42a83475c1b0008a2', + 'shared': False, + 'size': 3686, + 'source': 'source/5afdb6fb9252732d930009e5', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 448878.0, + 'message': 'The optiml has been created', + 'progress': 1}, + 'subscription': False, + 'tags': [], + 'test_dataset': None, + 'type': 0, + 'updated': '2018-05-17T20:30:29.063000'} + + +You can check the optiml properties at the `API documentation +`_. + + +Fusions +~~~~~~~ + +A Fusion is a special type of composed resource for which all +submodels satisfy the following constraints: they're all either +classifications or regressions over the same kind of data or +compatible fields, with the same objective field. Given those +properties, a fusion can be considered a supervised model, +and therefore one can predict with fusions and evaluate them. +Ensembles can be viewed as a kind of fusion subject to the additional +constraints that all its submodels are tree models that, moreover, +have been built from the same base input data, but sampled in particular ways. + +The model types allowed to be a submodel of a fusion are: +deepnet, ensemble, fusion, model, logistic regression and linear regression. + +The JSON structure for an Fusion is: + +.. code-block:: python + + >>> api.pprint(fusion["object"]) + { + "category": 0, + "code": 200, + "configuration": null, + "configuration_status": false, + "created": "2018-05-09T20:11:05.821000", + "credits_per_prediction": 0, + "description": "", + "fields_meta": { + "count": 5, + "limit": 1000, + "offset": 0, + "query_total": 5, + "total": 5 + }, + "fusion": { + "models": [ + { + "id": "ensemble/5af272eb4e1727d378000050", + "kind": "ensemble", + "name": "Iris ensemble", + "name_options": "boosted trees, 1999-node, 16-iteration, deterministic order, balanced" + }, + { + "id": "model/5af272fe4e1727d3780000d6", + "kind": "model", + "name": "Iris model", + "name_options": "1999-node, pruned, deterministic order, balanced" + }, + { + "id": "logisticregression/5af272ff4e1727d3780000d9", + "kind": "logisticregression", + "name": "Iris LR", + "name_options": "L2 regularized (c=1), bias, auto-scaled, missing values, eps=0.001" + } + ] + }, + "importance": { + "000000": 0.05847, + "000001": 0.03028, + "000002": 0.13582, + "000003": 0.4421 + }, + "model_count": { + "ensemble": 1, + "logisticregression": 1, + "model": 1, + "total": 3 + }, + "models": [ + "ensemble/5af272eb4e1727d378000050", + "model/5af272fe4e1727d3780000d6", + "logisticregression/5af272ff4e1727d3780000d9" + ], + "models_meta": { + "count": 3, + "limit": 1000, + "offset": 0, + "total": 3 + }, + "name": "iris", + "name_options": "3 total models (ensemble: 1, logisticregression: 1, model: 1)", + "number_of_batchpredictions": 0, + "number_of_evaluations": 0, + "number_of_predictions": 0, + "number_of_public_predictions": 0, + "objective_field": "000004", + "objective_field_details": { + "column_number": 4, + "datatype": "string", + "name": "species", + "optype": "categorical", + "order": 4 + }, + "objective_field_name": "species", + "objective_field_type": "categorical", + "objective_fields": [ + "000004" + ], + "private": true, + "project": null, + "resource":"fusion/59af8107b8aa0965d5b61138", + "shared": false, + "status": { + "code": 5, + "elapsed": 8420, + "message": "The fusion has been created", + "progress": 1 + }, + "subscription": false, + "tags": [], + "type": 0, + "updated": "2018-05-09T20:11:14.258000" + } + +You can check the fusion properties at the `API documentation +`_. + + +Time Series +~~~~~~~~~~~ + +A time series model is a supervised learning method to forecast the future +values of a field based on its previously observed values. +It is used to analyze time based data when historical patterns can explain +the future behavior such as stock prices, sales forecasting, +website traffic, production and inventory analysis, weather forecasting, etc. +A time series model needs to be trained with time series data, +i.e., a field containing a sequence of equally distributed data points in time. + +BigML implements exponential smoothing to train time series models. +Time series data is modeled as a level component and it can optionally +include a trend (damped or not damped) and a seasonality +components. You can learn more about how to include these components and their +use in the `API documentation page `_. + +You can create a time series model selecting one or several fields from +your dataset, that will be the ojective fields. The forecast will compute +their future values. + + +The JSON structure for a time series is: + +.. code-block:: python + + >>> api.pprint(time_series['object']) + { 'category': 0, + 'clones': 0, + 'code': 200, + 'columns': 1, + 'configuration': None, + 'configuration_status': False, + 'created': '2017-07-15T12:49:42.601000', + 'credits': 0.0, + 'dataset': 'dataset/5968ec42983efc21b0000016', + 'dataset_field_types': { 'categorical': 0, + 'datetime': 0, + 'effective_fields': 6, + 'items': 0, + 'numeric': 6, + 'preferred': 6, + 'text': 0, + 'total': 6}, + 'dataset_status': True, + 'dataset_type': 0, + 'description': '', + 'fields_meta': { 'count': 1, + 'limit': 1000, + 'offset': 0, + 'query_total': 1, + 'total': 1}, + 'forecast': { '000005': [ { 'lower_bound': [ 30.14111, + 30.14111, + ... + 30.14111], + 'model': 'A,N,N', + 'point_forecast': [ 68.53181, + 68.53181, + ... + 68.53181, + 68.53181], + 'time_range': { 'end': 129, + 'interval': 1, + 'interval_unit': 'milliseconds', + 'start': 80}, + 'upper_bound': [ 106.92251, + 106.92251, + ... + 106.92251, + 106.92251]}, + { 'lower_bound': [ 35.44118, + 35.5032, + ... + 35.28083], + 'model': 'A,Ad,N', + ... + 66.83537, + 66.9465], + 'time_range': { 'end': 129, + 'interval': 1, + 'interval_unit': 'milliseconds', + 'start': 80}}]}, + 'horizon': 50, + 'locale': 'en_US', + 'max_columns': 6, + 'max_rows': 80, + 'name': 'my_ts_data', + 'name_options': 'period=1, range=[1, 80]', + 'number_of_evaluations': 0, + 'number_of_forecasts': 0, + 'number_of_public_forecasts': 0, + 'objective_field': '000005', + 'objective_field_name': 'Final', + 'objective_field_type': 'numeric', + 'objective_fields': ['000005'], + 'objective_fields_names': ['Final'], + 'price': 0.0, + 'private': True, + 'project': None, + 'range': [1, 80], + 'resource': 'timeseries/596a0f66983efc53f3000000', + 'rows': 80, + 'shared': False, + 'short_url': '', + 'size': 2691, + 'source': 'source/5968ec3c983efc218c000006', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 8358, + 'message': 'The time series has been created', + 'progress': 1.0}, + 'subscription': True, + 'tags': [], + 'time_series': { 'all_numeric_objectives': False, + 'datasets': { '000005': 'dataset/596a0f70983efc53f3000003'}, + 'ets_models': { '000005': [ { 'aic': 831.30903, + 'aicc': 831.84236, + 'alpha': 0.00012, + 'beta': 0, + 'bic': 840.83713, + 'final_state': { 'b': 0, + 'l': 68.53181, + 's': [ 0]}, + 'gamma': 0, + 'initial_state': { 'b': 0, + 'l': 68.53217, + 's': [ 0]}, + 'name': 'A,N,N', + 'period': 1, + 'phi': 1, + 'r_squared': -0.0187, + 'sigma': 19.19535}, + { 'aic': 834.43049, + ... + 'slope': 0.11113, + 'value': 61.39}]}, + 'fields': { '000005': { 'column_number': 5, + 'datatype': 'double', + 'name': 'Final', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'bins': [ [ 28.06, + 1], + [ 34.44, + ... + [ 108.335, + 2]], + ... + 'sum_squares': 389814.3944, + 'variance': 380.73315}}}, + 'period': 1, + 'time_range': { 'end': 79, + 'interval': 1, + 'interval_unit': 'milliseconds', + 'start': 0}}, + 'type': 0, + 'updated': '2017-07-15T12:49:52.549000', + 'white_box': False} + + +You can check the time series properties at the `API documentation +`_. + + +Unsupervised Models +------------------- + +Cluster +~~~~~~~ + +For unsupervised learning problems, the cluster is used to classify in a +limited number of groups your training data. The cluster structure is defined +by the centers of each group of data, named centroids, and the data enclosed +in the group. As for in the model's case, the cluster is a white-box resource +and can be retrieved as a JSON: + +.. code-block:: python + + >>> cluster = api.get_cluster(cluster) + >>> api.pprint(cluster['object']) + { 'balance_fields': True, + 'category': 0, + 'cluster_datasets': { '000000': '', '000001': '', '000002': ''}, + 'cluster_datasets_ids': { '000000': '53739b9ae4b0dad82b0a65e6', + '000001': '53739b9ae4b0dad82b0a65e7', + '000002': '53739b9ae4b0dad82b0a65e8'}, + 'cluster_seed': '2c249dda00fbf54ab4cdd850532a584f286af5b6', + 'clusters': { 'clusters': [ { 'center': { '000000': 58.5, + '000001': 26.8314, + '000002': 44.27907, + '000003': 14.37209}, + 'count': 56, + 'distance': { 'bins': [ [ 0.69602, + 2], + [ ... ] + [ 3.77052, + 1]], + 'maximum': 3.77052, + 'mean': 1.61711, + 'median': 1.52146, + 'minimum': 0.69237, + 'population': 56, + 'standard_deviation': 0.6161, + 'sum': 90.55805, + 'sum_squares': 167.31926, + 'variance': 0.37958}, + 'id': '000000', + 'name': 'Cluster 0'}, + { 'center': { '000000': 50.06, + '000001': 34.28, + '000002': 14.62, + '000003': 2.46}, + 'count': 50, + 'distance': { 'bins': [ [ 0.16917, + 1], + [ ... ] + [ 4.94699, + 1]], + 'maximum': 4.94699, + 'mean': 1.50725, + 'median': 1.3393, + 'minimum': 0.16917, + 'population': 50, + 'standard_deviation': 1.00994, + 'sum': 75.36252, + 'sum_squares': 163.56918, + 'variance': 1.01998}, + 'id': '000001', + 'name': 'Cluster 1'}, + { 'center': { '000000': 68.15625, + '000001': 31.25781, + '000002': 55.48438, + '000003': 19.96875}, + 'count': 44, + 'distance': { 'bins': [ [ 0.36825, + 1], + [ ... ] + [ 3.87216, + 1]], + 'maximum': 3.87216, + 'mean': 1.67264, + 'median': 1.63705, + 'minimum': 0.36825, + 'population': 44, + 'standard_deviation': 0.78905, + 'sum': 73.59627, + 'sum_squares': 149.87194, + 'variance': 0.6226}, + 'id': '000002', + 'name': 'Cluster 2'}], + 'fields': { '000000': { 'column_number': 0, + 'datatype': 'int8', + 'name': 'sepal length', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'bins': [ [ 43.75, + 4], + [ ... ] + [ 79, + 1]], + 'maximum': 79, + 'mean': 58.43333, + 'median': 57.7889, + 'minimum': 43, + 'missing_count': 0, + 'population': 150, + 'splits': [ 45.15258, + 46.72525, + 72.04226, + 76.47461], + 'standard_deviation': 8.28066, + 'sum': 8765, + 'sum_squares': 522385, + 'variance': 68.56935}}, + [ ... ] + [ 25, + 3]], + 'maximum': 25, + 'mean': 11.99333, + 'median': 13.28483, + 'minimum': 1, + 'missing_count': 0, + 'population': 150, + 'standard_deviation': 7.62238, + 'sum': 1799, + 'sum_squares': 30233, + 'variance': 58.10063}}}}, + 'code': 202, + 'columns': 4, + 'created': '2014-05-14T16:36:40.993000', + 'credits': 0.017578125, + 'credits_per_prediction': 0.0, + 'dataset': 'dataset/53739b88c8db63122b000411', + 'dataset_field_types': { 'categorical': 1, + 'datetime': 0, + 'numeric': 4, + 'preferred': 5, + 'text': 0, + 'total': 5}, + 'dataset_status': True, + 'dataset_type': 0, + 'description': '', + 'excluded_fields': ['000004'], + 'field_scales': None, + 'fields_meta': { 'count': 4, + 'limit': 1000, + 'offset': 0, + 'query_total': 4, + 'total': 4}, + 'input_fields': ['000000', '000001', '000002', '000003'], + 'k': 3, + 'locale': 'es-ES', + 'max_columns': 5, + 'max_rows': 150, + 'name': 'my iris', + 'number_of_batchcentroids': 0, + 'number_of_centroids': 0, + 'number_of_public_centroids': 0, + 'out_of_bag': False, + 'price': 0.0, + 'private': True, + 'range': [1, 150], + 'replacement': False, + 'resource': 'cluster/53739b98d994972da7001de9', + 'rows': 150, + 'sample_rate': 1.0, + 'scales': { '000000': 0.22445382597655375, + '000001': 0.4264213814821549, + '000002': 0.10528680248949522, + '000003': 0.2438379900517961}, + 'shared': False, + 'size': 4608, + 'source': 'source/53739b24d994972da7001ddd', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 1009, + 'message': 'The cluster has been created', + 'progress': 1.0}, + 'subscription': True, + 'tags': [], + 'updated': '2014-05-14T16:40:26.234728', + 'white_box': False} + +(Note that we have abbreviated the output in the snippet above for +readability: the full predictive cluster yo'll get is going to contain +much more details). + +You can check the cluster properties at the `API documentation +`_. + +Anomaly detector +~~~~~~~~~~~~~~~~ + +For anomaly detection problems, BigML anomaly detector uses iforest as an +unsupervised kind of model that detects anomalous data in a dataset. The +information it returns encloses a `top_anomalies` block +that contains a list of the most anomalous +points. For each, we capture a `score` from 0 to 1. The closer to 1, +the more anomalous. We also capture the `row` which gives values for +each field in the order defined by `input_fields`. Similarly we give +a list of `importances` which match the `row` values. These +importances tell us which values contributed most to the anomaly +score. Thus, the structure of an anomaly detector is similar to: + +.. code-block:: python + + { 'category': 0, + 'code': 200, + 'columns': 14, + 'constraints': False, + 'created': '2014-09-08T18:51:11.893000', + 'credits': 0.11653518676757812, + 'credits_per_prediction': 0.0, + 'dataset': 'dataset/540dfa9d9841fa5c88000765', + 'dataset_field_types': { 'categorical': 21, + 'datetime': 0, + 'numeric': 21, + 'preferred': 14, + 'text': 0, + 'total': 42}, + 'dataset_status': True, + 'dataset_type': 0, + 'description': '', + 'excluded_fields': [], + 'fields_meta': { 'count': 14, + 'limit': 1000, + 'offset': 0, + 'query_total': 14, + 'total': 14}, + 'forest_size': 128, + 'input_fields': [ '000004', + '000005', + '000009', + '000016', + '000017', + '000018', + '000019', + '00001e', + '00001f', + '000020', + '000023', + '000024', + '000025', + '000026'], + 'locale': 'en_US', + 'max_columns': 42, + 'max_rows': 200, + 'model': { 'fields': { '000004': { 'column_number': 4, + 'datatype': 'int16', + 'name': 'src_bytes', + 'optype': 'numeric', + 'order': 0, + 'preferred': True, + 'summary': { 'bins': [ [ 143, + 2], + ... + [ 370, + 2]], + 'maximum': 370, + 'mean': 248.235, + 'median': 234.57157, + 'minimum': 141, + 'missing_count': 0, + 'population': 200, + 'splits': [ 159.92462, + 173.73312, + 188, + ... + 339.55228], + 'standard_deviation': 49.39869, + 'sum': 49647, + 'sum_squares': 12809729, + 'variance': 2440.23093}}, + '000005': { 'column_number': 5, + 'datatype': 'int32', + 'name': 'dst_bytes', + 'optype': 'numeric', + 'order': 1, + 'preferred': True, + ... + 'sum': 1030851, + 'sum_squares': 22764504759, + 'variance': 87694652.45224}}, + '000009': { 'column_number': 9, + 'datatype': 'string', + 'name': 'hot', + 'optype': 'categorical', + 'order': 2, + 'preferred': True, + 'summary': { 'categories': [ [ '0', + 199], + [ '1', + 1]], + 'missing_count': 0}, + 'term_analysis': { 'enabled': True}}, + '000016': { 'column_number': 22, + 'datatype': 'int8', + 'name': 'count', + 'optype': 'numeric', + 'order': 3, + 'preferred': True, + ... + 'population': 200, + 'standard_deviation': 5.42421, + 'sum': 1351, + 'sum_squares': 14981, + 'variance': 29.42209}}, + '000017': { ... }}}, + 'kind': 'iforest', + 'mean_depth': 12.314174107142858, + 'top_anomalies': [ { 'importance': [ 0.06768, + 0.01667, + 0.00081, + 0.02437, + 0.04773, + 0.22197, + 0.18208, + 0.01868, + 0.11855, + 0.01983, + 0.01898, + 0.05306, + 0.20398, + 0.00562], + 'row': [ 183.0, + 8654.0, + '0', + 4.0, + 4.0, + 0.25, + 0.25, + 0.0, + 123.0, + 255.0, + 0.01, + 0.04, + 0.01, + 0.0], + 'score': 0.68782}, + { 'importance': [ 0.05645, + 0.02285, + 0.0015, + 0.05196, + 0.04435, + 0.0005, + 0.00056, + 0.18979, + 0.12402, + 0.23671, + 0.20723, + 0.05651, + 0.00144, + 0.00612], + 'row': [ 212.0, + 1940.0, + '0', + 1.0, + 2.0, + 0.0, + 0.0, + 1.0, + 1.0, + 69.0, + 1.0, + 0.04, + 0.0, + 0.0], + 'score': 0.6239}, + ...], + 'trees': [ { 'root': { 'children': [ { 'children': [ { 'children': [ { 'children': [ { 'children': + [ { 'population': 1, + 'predicates': [ { 'field': '00001f', + 'op': '>', + 'value': 35.54357}]}, + + ... + { 'population': 1, + 'predicates': [ { 'field': '00001f', + 'op': '<=', + 'value': 35.54357}]}], + 'population': 2, + 'predicates': [ { 'field': '000005', + 'op': '<=', + 'value': 1385.5166}]}], + 'population': 3, + 'predicates': [ { 'field': '000020', + 'op': '<=', + 'value': 65.14308}, + { 'field': '000019', + 'op': '=', + 'value': 0}]}], + 'population': 105, + 'predicates': [ { 'field': '000017', + 'op': '<=', + 'value': 13.21754}, + { 'field': '000009', + 'op': 'in', + 'value': [ '0']}]}], + 'population': 126, + 'predicates': [ True, + { 'field': '000018', + 'op': '=', + 'value': 0}]}, + 'training_mean_depth': 11.071428571428571}]}, + 'name': "tiny_kdd's dataset anomaly detector", + 'number_of_batchscores': 0, + 'number_of_public_predictions': 0, + 'number_of_scores': 0, + 'out_of_bag': False, + 'price': 0.0, + 'private': True, + 'project': None, + 'range': [1, 200], + 'replacement': False, + 'resource': 'anomaly/540dfa9f9841fa5c8800076a', + 'rows': 200, + 'sample_rate': 1.0, + 'sample_size': 126, + 'seed': 'BigML', + 'shared': False, + 'size': 30549, + 'source': 'source/540dfa979841fa5c7f000363', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 32397, + 'message': 'The anomaly detector has been created', + 'progress': 1.0}, + 'subscription': False, + 'tags': [], + 'updated': '2014-09-08T23:54:28.647000', + 'white_box': False} + +Note that we have abbreviated the output in the snippet above for +readability: the full anomaly detector yo'll get is going to contain +much more details). + +The `trees` list contains the actual isolation forest, and it can be quite +large usually. That's why, this part of the resource should only be included +in downloads when needed. If you are only interested in other properties, such +as `top_anomalies`, yo'll improve performance by excluding it, using the +`excluded=trees` query string in the API call: + +.. code-block:: python + + anomaly = api.get_anomaly('anomaly/540dfa9f9841fa5c8800076a', \ + query_string='excluded=trees') + +Each node in an isolation tree can have multiple predicates. +For the node to be a valid branch when evaluated with a data point, all of its +predicates must be true. + +You can check the anomaly detector properties at the `API documentation +`_. + +Associations +~~~~~~~~~~~~ + +Association Discovery is a popular method to find out relations among values +in high-dimensional datasets. + +A common case where association discovery is often used is +market basket analysis. This analysis seeks for customer shopping +patterns across large transactional +datasets. For instance, do customers who buy hamburgers and ketchup also +consume bread? + +Businesses use those insights to make decisions on promotions and product +placements. +Association Discovery can also be used for other purposes such as early +incident detection, web usage analysis, or software intrusion detection. + +In BigML, the Association resource object can be built from any dataset, and +its results are a list of association rules between the items in the dataset. +In the example case, the corresponding +association rule would have hamburguers and ketchup as the items at the +left hand side of the association rule and bread would be the item at the +right hand side. Both sides in this association rule are related, +in the sense that observing +the items in the left hand side implies observing the items in the right hand +side. There are some metrics to ponder the quality of these association rules: + +- Support: the proportion of instances which contain an itemset. + +For an association rule, it means the number of instances in the dataset which +contain the rule's antecedent and rule's consequent together +over the total number of instances (N) in the dataset. + +It gives a measure of the importance of the rule. Association rules have +to satisfy a minimum support constraint (i.e., min_support). + +- Coverage: the support of the antedecent of an association rule. +It measures how often a rule can be applied. + +- Confidence or (strength): The probability of seeing the rule's consequent +under the condition that the instances also contain the rule's antecedent. +Confidence is computed using the support of the association rule over the +coverage. That is, the percentage of instances which contain the consequent +and antecedent together over the number of instances which only contain +the antecedent. + +Confidence is directed and gives different values for the association +rules Antecedent → Consequent and Consequent → Antecedent. Association +rules also need to satisfy a minimum confidence constraint +(i.e., min_confidence). + +- Leverage: the difference of the support of the association +rule (i.e., the antecedent and consequent appearing together) and what would +be expected if antecedent and consequent where statistically independent. +This is a value between -1 and 1. A positive value suggests a positive +relationship and a negative value suggests a negative relationship. +0 indicates independence. + +Lift: how many times more often antecedent and consequent occur together +than expected if they where statistically independent. +A value of 1 suggests that there is no relationship between the antecedent +and the consequent. Higher values suggest stronger positive relationships. +Lower values suggest stronger negative relationships (the presence of the +antecedent reduces the likelihood of the consequent) + +As to the items used in association rules, each type of field is parsed to +extract items for the rules as follows: + +- Categorical: each different value (class) will be considered a separate item. +- Text: each unique term will be considered a separate item. +- Items: each different item in the items summary will be considered. +- Numeric: Values will be converted into categorical by making a +segmentation of the values. +For example, a numeric field with values ranging from 0 to 600 split +into 3 segments: +segment 1 → [0, 200), segment 2 → [200, 400), segment 3 → [400, 600]. +You can refine the behavior of the transformation using +`discretization `_ +and `field_discretizations `_. + +The JSON structure for an association resource is: + +.. code-block:: python + + + >>> api.pprint(association['object']) + { + "associations":{ + "complement":false, + "discretization":{ + "pretty":true, + "size":5, + "trim":0, + "type":"width" + }, + "items":[ + { + "complement":false, + "count":32, + "field_id":"000000", + "name":"Segment 1", + "bin_end":5, + "bin_start":null + }, + { + "complement":false, + "count":49, + "field_id":"000000", + "name":"Segment 3", + "bin_end":7, + "bin_start":6 + }, + { + "complement":false, + "count":12, + "field_id":"000000", + "name":"Segment 4", + "bin_end":null, + "bin_start":7 + }, + { + "complement":false, + "count":19, + "field_id":"000001", + "name":"Segment 1", + "bin_end":2.5, + "bin_start":null + }, + ... + { + "complement":false, + "count":50, + "field_id":"000004", + "name":"Iris-versicolor" + }, + { + "complement":false, + "count":50, + "field_id":"000004", + "name":"Iris-virginica" + } + ], + "max_k": 100, + "min_confidence":0, + "min_leverage":0, + "min_lift":1, + "min_support":0, + "rules":[ + { + "confidence":1, + "id":"000000", + "leverage":0.22222, + "lhs":[ + 13 + ], + "lhs_cover":[ + 0.33333, + 50 + ], + "lift":3, + "p_value":0.000000000, + "rhs":[ + 6 + ], + "rhs_cover":[ + 0.33333, + 50 + ], + "support":[ + 0.33333, + 50 + ] + }, + { + "confidence":1, + "id":"000001", + "leverage":0.22222, + "lhs":[ + 6 + ], + "lhs_cover":[ + 0.33333, + 50 + ], + "lift":3, + "p_value":0.000000000, + "rhs":[ + 13 + ], + "rhs_cover":[ + 0.33333, + 50 + ], + "support":[ + 0.33333, + 50 + ] + }, + ... + { + "confidence":0.26, + "id":"000029", + "leverage":0.05111, + "lhs":[ + 13 + ], + "lhs_cover":[ + 0.33333, + 50 + ], + "lift":2.4375, + "p_value":0.0000454342, + "rhs":[ + 5 + ], + "rhs_cover":[ + 0.10667, + 16 + ], + "support":[ + 0.08667, + 13 + ] + }, + { + "confidence":0.18, + "id":"00002a", + "leverage":0.04, + "lhs":[ + 15 + ], + "lhs_cover":[ + 0.33333, + 50 + ], + "lift":3, + "p_value":0.0000302052, + "rhs":[ + 9 + ], + "rhs_cover":[ + 0.06, + 9 + ], + "support":[ + 0.06, + 9 + ] + }, + { + "confidence":1, + "id":"00002b", + "leverage":0.04, + "lhs":[ + 9 + ], + "lhs_cover":[ + 0.06, + 9 + ], + "lift":3, + "p_value":0.0000302052, + "rhs":[ + 15 + ], + "rhs_cover":[ + 0.33333, + 50 + ], + "support":[ + 0.06, + 9 + ] + } + ], + "rules_summary":{ + "confidence":{ + "counts":[ + [ + 0.18, + 1 + ], + [ + 0.24, + 1 + ], + [ + 0.26, + 2 + ], + ... + [ + 0.97959, + 1 + ], + [ + 1, + 9 + ] + ], + "maximum":1, + "mean":0.70986, + "median":0.72864, + "minimum":0.18, + "population":44, + "standard_deviation":0.24324, + "sum":31.23367, + "sum_squares":24.71548, + "variance":0.05916 + }, + "k":44, + "leverage":{ + "counts":[ + [ + 0.04, + 2 + ], + [ + 0.05111, + 4 + ], + [ + 0.05316, + 2 + ], + ... + [ + 0.22222, + 2 + ] + ], + "maximum":0.22222, + "mean":0.10603, + "median":0.10156, + "minimum":0.04, + "population":44, + "standard_deviation":0.0536, + "sum":4.6651, + "sum_squares":0.61815, + "variance":0.00287 + }, + "lhs_cover":{ + "counts":[ + [ + 0.06, + 2 + ], + [ + 0.08, + 2 + ], + [ + 0.10667, + 4 + ], + [ + 0.12667, + 1 + ], + ... + [ + 0.5, + 4 + ] + ], + "maximum":0.5, + "mean":0.29894, + "median":0.33213, + "minimum":0.06, + "population":44, + "standard_deviation":0.13386, + "sum":13.15331, + "sum_squares":4.70252, + "variance":0.01792 + }, + "lift":{ + "counts":[ + [ + 1.40625, + 2 + ], + [ + 1.5067, + 2 + ], + ... + [ + 2.63158, + 4 + ], + [ + 3, + 10 + ], + [ + 4.93421, + 2 + ], + [ + 12.5, + 2 + ] + ], + "maximum":12.5, + "mean":2.91963, + "median":2.58068, + "minimum":1.40625, + "population":44, + "standard_deviation":2.24641, + "sum":128.46352, + "sum_squares":592.05855, + "variance":5.04635 + }, + "p_value":{ + "counts":[ + [ + 0.000000000, + 2 + ], + [ + 0.000000000, + 4 + ], + [ + 0.000000000, + 2 + ], + ... + [ + 0.0000910873, + 2 + ] + ], + "maximum":0.0000910873, + "mean":0.0000106114, + "median":0.00000000, + "minimum":0.000000000, + "population":44, + "standard_deviation":0.0000227364, + "sum":0.000466903, + "sum_squares":0.0000000, + "variance":0.000000001 + }, + "rhs_cover":{ + "counts":[ + [ + 0.06, + 2 + ], + [ + 0.08, + 2 + ], + ... + [ + 0.42667, + 2 + ], + [ + 0.46667, + 3 + ], + [ + 0.5, + 4 + ] + ], + "maximum":0.5, + "mean":0.29894, + "median":0.33213, + "minimum":0.06, + "population":44, + "standard_deviation":0.13386, + "sum":13.15331, + "sum_squares":4.70252, + "variance":0.01792 + }, + "support":{ + "counts":[ + [ + 0.06, + 4 + ], + [ + 0.06667, + 2 + ], + [ + 0.08, + 2 + ], + [ + 0.08667, + 4 + ], + [ + 0.10667, + 4 + ], + [ + 0.15333, + 2 + ], + [ + 0.18667, + 4 + ], + [ + 0.19333, + 2 + ], + [ + 0.20667, + 2 + ], + [ + 0.27333, + 2 + ], + [ + 0.28667, + 2 + ], + [ + 0.3, + 4 + ], + [ + 0.32, + 2 + ], + [ + 0.33333, + 6 + ], + [ + 0.37333, + 2 + ] + ], + "maximum":0.37333, + "mean":0.20152, + "median":0.19057, + "minimum":0.06, + "population":44, + "standard_deviation":0.10734, + "sum":8.86668, + "sum_squares":2.28221, + "variance":0.01152 + } + }, + "search_strategy":"leverage", + "significance_level":0.05 + }, + "category":0, + "clones":0, + "code":200, + "columns":5, + "created":"2015-11-05T08:06:08.184000", + "credits":0.017581939697265625, + "dataset":"dataset/562fae3f4e1727141d00004e", + "dataset_status":true, + "dataset_type":0, + "description":"", + "excluded_fields":[ ], + "fields_meta":{ + "count":5, + "limit":1000, + "offset":0, + "query_total":5, + "total":5 + }, + "input_fields":[ + "000000", + "000001", + "000002", + "000003", + "000004" + ], + "locale":"en_US", + "max_columns":5, + "max_rows":150, + "name":"iris' dataset's association", + "out_of_bag":false, + "price":0, + "private":true, + "project":null, + "range":[ + 1, + 150 + ], + "replacement":false, + "resource":"association/5621b70910cb86ae4c000000", + "rows":150, + "sample_rate":1, + "shared":false, + "size":4609, + "source":"source/562fae3a4e1727141d000048", + "source_status":true, + "status":{ + "code":5, + "elapsed":1072, + "message":"The association has been created", + "progress":1 + }, + "subscription":false, + "tags":[ ], + "updated":"2015-11-05T08:06:20.403000", + "white_box":false + } +Note that the output in the snippet above has been abbreviated. As you see, +the ``associations`` attribute stores items, rules and metrics extracted +from the datasets as well as the configuration parameters described in +the `developers section `_ . + + +Topic Models +~~~~~~~~~~~~ + +A topic model is an unsupervised machine learning method +for unveiling all the different topics +underlying a collection of documents. +BigML uses Latent Dirichlet Allocation (LDA), one of the most popular +probabilistic methods for topic modeling. +In BigML, each instance (i.e. each row in your dataset) will +be considered a document and the contents of all the text fields +given as inputs will be automatically concatenated and considered the +document bag of words. + +Topic model is based on the assumption that any document +exhibits a mixture of topics. Each topic is composed of a set of words +which are thematically related. The words from a given topic have different +probabilities for that topic. At the same time, each word can be attributable +to one or several topics. So for example the word "sea" may be found in +a topic related with sea transport but also in a topic related to holidays. +Topic model automatically discards stop words and high +frequency words. + +Topic model's main applications include browsing, organizing and understanding +large archives of documents. It can been applied for information retrieval, +collaborative filtering, assessing document similarity among others. +The topics found in the dataset can also be very useful new features +before applying other models like classification, clustering, or +anomaly detection. + +The JSON structure for a topic model is: + +.. code-block:: python + + >>> api.pprint(topic['object']) + { 'category': 0, + 'code': 200, + 'columns': 1, + 'configuration': None, + 'configuration_status': False, + 'created': '2016-11-23T23:47:54.703000', + 'credits': 0.0, + 'credits_per_prediction': 0.0, + 'dataset': 'dataset/58362aa0983efc45a0000005', + 'dataset_field_types': { 'categorical': 1, + 'datetime': 0, + 'effective_fields': 672, + 'items': 0, + 'numeric': 0, + 'preferred': 2, + 'text': 1, + 'total': 2}, + 'dataset_status': True, + 'dataset_type': 0, + 'description': '', + 'excluded_fields': [], + 'fields_meta': { 'count': 1, + 'limit': 1000, + 'offset': 0, + 'query_total': 1, + 'total': 1}, + 'input_fields': ['000001'], + 'locale': 'en_US', + 'max_columns': 2, + 'max_rows': 656, + 'name': u"spam dataset's Topic Model ", + 'number_of_batchtopicdistributions': 0, + 'number_of_public_topicdistributions': 0, + 'number_of_topicdistributions': 0, + 'ordering': 0, + 'out_of_bag': False, + 'price': 0.0, + 'private': True, + 'project': None, + 'range': [1, 656], + 'replacement': False, + 'resource': 'topicmodel/58362aaa983efc45a1000007', + 'rows': 656, + 'sample_rate': 1.0, + 'shared': False, + 'size': 54740, + 'source': 'source/58362a69983efc459f000001', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 3222, + 'message': 'The topic model has been created', + 'progress': 1.0}, + 'subscription': True, + 'tags': [], + 'topic_model': { 'alpha': 4.166666666666667, + 'beta': 0.1, + 'bigrams': False, + 'case_sensitive': False, + 'fields': { '000001': { 'column_number': 1, + 'datatype': 'string', + 'name': 'Message', + 'optype': 'text', + 'order': 0, + 'preferred': True, + 'summary': { 'average_length': 78.14787, + 'missing_count': 0, + 'tag_cloud': [ [ 'call', + 72], + [ 'ok', + 36], + [ 'gt', + 34], + ... + [ 'worse', + 2], + [ 'worth', + 2], + [ 'write', + 2], + [ 'yest', + 2], + [ 'yijue', + 2]], + 'term_forms': { }}, + 'term_analysis': { 'case_sensitive': False, + 'enabled': True, + 'language': 'en', + 'stem_words': False, + 'token_mode': 'all', + 'use_stopwords': False}}}, + 'hashed_seed': 62146850, + 'language': 'en', + 'number_of_topics': 12, + 'term_limit': 4096, + 'term_topic_assignments': [ [ 0, + 5, + 0, + 1, + 0, + 19, + 0, + 0, + 19, + 0, + 1, + 0], + [ 0, + 0, + 0, + 13, + 0, + 0, + 0, + 0, + 5, + 0, + 0, + 0], + ... + [ 0, + 7, + 27, + 0, + 112, + 0, + 0, + 0, + 0, + 0, + 14, + 2]], + 'termset': [ '000', + '03', + '04', + '06', + '08000839402', + '08712460324', + ... + + 'yes', + 'yest', + 'yesterday', + 'yijue', + 'yo', + 'yr', + 'yup', + '\xfc'], + 'top_n_terms': 10, + 'topicmodel_seed': '26c386d781963ca1ea5c90dab8a6b023b5e1d180', + 'topics': [ { 'id': '000000', + 'name': 'Topic 00', + 'probability': 0.09375, + 'top_terms': [ [ 'im', + 0.04849], + [ 'hi', + 0.04717], + [ 'love', + 0.04585], + [ 'please', + 0.02867], + [ 'tomorrow', + 0.02867], + [ 'cos', + 0.02823], + [ 'sent', + 0.02647], + [ 'da', + 0.02383], + [ 'meet', + 0.02207], + [ 'dinner', + 0.01898]]}, + { 'id': '000001', + 'name': 'Topic 01', + 'probability': 0.08215, + 'top_terms': [ [ 'lt', + 0.1015], + [ 'gt', + 0.1007], + [ 'wish', + 0.03958], + [ 'feel', + 0.0272], + [ 'shit', + 0.02361], + [ 'waiting', + 0.02281], + [ 'stuff', + 0.02001], + [ 'name', + 0.01921], + [ 'comp', + 0.01522], + [ 'forgot', + 0.01482]]}, + ... + { 'id': '00000b', + 'name': 'Topic 11', + 'probability': 0.0826, + 'top_terms': [ [ 'call', + 0.15084], + [ 'min', + 0.05003], + [ 'msg', + 0.03185], + [ 'home', + 0.02648], + [ 'mind', + 0.02152], + [ 'lt', + 0.01987], + [ 'bring', + 0.01946], + [ 'camera', + 0.01905], + [ 'set', + 0.01905], + [ 'contact', + 0.01781]]}], + 'use_stopwords': False}, + 'updated': '2016-11-23T23:48:03.336000', + 'white_box': False} + +Note that the output in the snippet above has been abbreviated. + + +The topic model returns a list of top terms for each topic found in the data. +Note that topics are not labeled, so you have to infer their meaning according +to the words they are composed of. + +Once you build the topic model you can calculate each topic probability +for a given document by using Topic Distribution. +This information can be useful to find documents similarities based +on their thematic. + +As you see, +the ``topic_model`` attribute stores the topics and termset and term to +topic assignment, +as well as the configuration parameters described in +the `developers section `_ . + +PCAs +~~~~ + +A PCA (Principal Component Analysis) resource fits a number of orthogonal +projections (components) to maximally capture the variance in a dataset. This +is a dimensional reduction technique, as it can be used to reduce +the number of inputs for the modeling step. PCA models belong to the +unsupervised class of models (there is no objective field). + +The JSON structure for an PCA is: + +.. code-block:: python + + + {'code': 200, + 'error': None, + 'location': 'https://strato.dev.bigml.io/andromeda/pca/5c002572983efc0ac5000003', + 'object': {'category': 0, + 'code': 200, + 'columns': 2, + 'configuration': None, + 'configuration_status': False, + 'created': '2018-11-29T17:44:18.359000', + 'creator': 'merce', + 'credits': 0.0, + 'credits_per_prediction': 0.0, + 'dataset': 'dataset/5c00256a983efc0acf000000', + 'dataset_field_types': {'categorical': 1, + 'datetime': 0, + 'items': 0, + 'numeric': 0, + 'preferred': 2, + 'text': 1, + 'total': 2}, + 'dataset_status': True, + 'description': '', + 'excluded_fields': [], + 'fields_meta': {'count': 2, + 'limit': 1000, + 'offset': 0, + 'query_total': 2, + 'total': 2}, + 'input_fields': ['000000', '000001'], + 'locale': 'en-us', + 'max_columns': 2, + 'max_rows': 7, + 'name': 'spam 4 words', + 'name_options': 'standardized', + 'number_of_batchprojections': 2, + 'number_of_projections': 0, + 'number_of_public_projections': 0, + 'ordering': 0, + 'out_of_bag': False, + 'pca': {'components': [[-0.64757, + 0.83392, + 0.1158, + 0.83481, + ... + -0.09426, + -0.08544, + -0.03457]], + 'cumulative_variance': [0.43667, + 0.74066, + 0.87902, + 0.98488, + 0.99561, + 1], + 'eigenvectors': [[-0.3894, + 0.50146, + 0.06963, + ... + -0.56542, + -0.5125, + -0.20734]], + 'fields': {'000000': {'column_number': 0, + 'datatype': 'string', + 'name': 'Type', + ... + 'token_mode': 'all', + 'use_stopwords': False}}}, + 'pca_seed': '2c249dda00fbf54ab4cdd850532a584f286af5b6', + 'standardized': True, + 'text_stats': {'000001': {'means': [0.71429, + 0.71429, + 0.42857, + 0.28571], + 'standard_deviations': [0.75593, + 0.75593, + 0.53452, + 0.48795]}}, + 'variance': [0.43667, + 0.30399, + 0.13837, + 0.10585, + 0.01073, + 0.00439]}, + 'price': 0.0, + 'private': True, + 'project': None, + 'range': None, + 'replacement': False, + 'resource': 'pca/5c002572983efc0ac5000003', + 'rows': 7, + 'sample_rate': 1.0, + 'shared': False, + 'size': 127, + 'source': 'source/5c00255e983efc0acd00001b', + 'source_status': True, + 'status': {'code': 5, + 'elapsed': 1571, + 'message': 'The pca has been created', + 'progress': 1}, + 'subscription': True, + 'tags': [], + 'type': 0, + 'updated': '2018-11-29T18:13:19.714000', + 'white_box': False}, + 'resource': 'pca/5c002572983efc0ac5000003'} + +You can check the PCA properties at the `API documentation +`_. + +Predictions and Evaluations +--------------------------- + +Prediction +~~~~~~~~~~ + +The output of a supervised learning model for a particular input is its +prediction. In BigML, a model is ready to produce predictions immediately, so +there's no need of a special deployment in order to start using it. Here's how +you create a prediction for a model and its response: + +.. code-block:: python + + >>> input_data = {"petal length": 4} + >>> prediction = api.create_prediction(model_id, input_data) + >>> api.pprint(prediction["object"]) + { 'boosted_ensemble': False, + 'category': 12, + 'code': 201, + 'confidence': 0.40383, + 'confidence_bounds': {}, + 'confidences': [ ['Iris-setosa', 0], + ['Iris-versicolor', 0.40383], + ['Iris-virginica', 0.40383]], + 'configuration': None, + 'configuration_status': False, + 'created': '2024-09-09T15:48:58.918313', + 'creator': 'mmartin', + 'dataset': 'dataset/6668805ad7413f90007ab83e', + 'dataset_status': True, + 'description': 'Created using BigMLer', + 'expanded_input_data': {'000002': 4.0}, + 'explanation': None, + 'fields': { '000002': { 'column_number': 2, + 'datatype': 'double', + 'name': 'petal length', + 'optype': 'numeric', + 'order': 2, + 'preferred': True}, + '000003': { 'column_number': 3, + 'datatype': 'double', + 'name': 'petal width', + 'optype': 'numeric', + 'order': 3, + 'preferred': True}, + '000004': { 'column_number': 4, + 'datatype': 'string', + 'name': 'species', + 'optype': 'categorical', + 'order': 4, + 'preferred': True, + 'term_analysis': {'enabled': True}}}, + 'importance': {'000002': 1}, + 'input_data': {'petal length': 4}, + 'locale': 'en_US', + 'missing_strategy': 0, + 'model': 'model/6668805f002883f09483369d', + 'model_status': True, + 'model_type': 0, + 'name': 'iris.csv', + 'name_options': 'operating kind=probability, 1 inputs', + 'number_of_models': 1, + 'objective_field': '000004', + 'objective_field_name': 'species', + 'objective_field_type': 'categorical', + 'objective_fields': ['000004'], + 'operating_kind': 'probability', + 'output': 'Iris-versicolor', + 'prediction': {'000004': 'Iris-versicolor'}, + 'prediction_path': { 'confidence': 0.40383, + 'next_predicates': [ { 'count': 46, + 'field': '000003', + 'operator': '>', + 'value': 1.75}, + { 'count': 54, + 'field': '000003', + 'operator': '<=', + 'value': 1.75}], + 'node_id': 1, + 'objective_summary': { 'categories': [ [ 'Iris-versicolor', + 50], + [ 'Iris-virginica', + 50]]}, + 'path': [ { 'field': '000002', + 'operator': '>', + 'value': 2.45}]}, + 'private': True, + 'probabilities': [ ['Iris-setosa', 0.0033], + ['Iris-versicolor', 0.49835], + ['Iris-virginica', 0.49835]], + 'probability': 0.49835, + 'project': None, + 'query_string': '', + 'resource': 'prediction/66df18eac6f7849b7b3f10ec', + 'shared': False, + 'source': 'source/66688055450bc914a2c147e0', + 'source_status': True, + 'status': { 'code': 5, + 'elapsed': 227, + 'message': 'The prediction has been created', + 'progress': 1}, + 'subscription': True, + 'tags': ['BigMLer', 'BigMLer_TueJun1124_094957'], + 'task': 'classification', + 'type': 0, + 'updated': '2024-09-09T15:48:58.918335'} + +As you see, +the ``output`` attribute stores the prediction value and the ``confidence`` +and ``probability`` attributes show the respective values. The rest of the +dictionary contains the configuration parameters described in +the `developers section `_. + +Evaluation +~~~~~~~~~~ + +The predictive performance of a model can be measured using many different +measures. In BigML these measures can be obtained by creating evaluations. To +create an evaluation you need the id of the model you are evaluating and the id +of the dataset that contains the data to be tested with. The result is shown +as: + +.. code-block:: python + + >>> evaluation = api.get_evaluation(evaluation) + >>> api.pprint(evaluation['object']['result']) + { 'class_names': ['0', '1'], + 'mode': { 'accuracy': 0.9802, + 'average_f_measure': 0.495, + 'average_phi': 0, + 'average_precision': 0.5, + 'average_recall': 0.4901, + 'confusion_matrix': [[99, 0], [2, 0]], + 'per_class_statistics': [ { 'accuracy': 0.9801980198019802, + 'class_name': '0', + 'f_measure': 0.99, + 'phi_coefficient': 0, + 'precision': 1.0, + 'present_in_test_data': True, + 'recall': 0.9801980198019802}, + { 'accuracy': 0.9801980198019802, + 'class_name': '1', + 'f_measure': 0, + 'phi_coefficient': 0, + 'precision': 0.0, + 'present_in_test_data': True, + 'recall': 0}]}, + 'model': { 'accuracy': 0.9901, + 'average_f_measure': 0.89746, + 'average_phi': 0.81236, + 'average_precision': 0.99495, + 'average_recall': 0.83333, + 'confusion_matrix': [[98, 1], [0, 2]], + 'per_class_statistics': [ { 'accuracy': 0.9900990099009901, + 'class_name': '0', + 'f_measure': 0.9949238578680203, + 'phi_coefficient': 0.8123623944599232, + 'precision': 0.98989898989899, + 'present_in_test_data': True, + 'recall': 1.0}, + { 'accuracy': 0.9900990099009901, + 'class_name': '1', + 'f_measure': 0.8, + 'phi_coefficient': 0.8123623944599232, + 'precision': 1.0, + 'present_in_test_data': True, + 'recall': 0.6666666666666666}]}, + 'random': { 'accuracy': 0.50495, + 'average_f_measure': 0.36812, + 'average_phi': 0.13797, + 'average_precision': 0.74747, + 'average_recall': 0.51923, + 'confusion_matrix': [[49, 50], [0, 2]], + 'per_class_statistics': [ { 'accuracy': 0.504950495049505, + 'class_name': '0', + 'f_measure': 0.6621621621621622, + 'phi_coefficient': 0.1379728923974526, + 'precision': 0.494949494949495, + 'present_in_test_data': True, + 'recall': 1.0}, + { 'accuracy': 0.504950495049505, + 'class_name': '1', + 'f_measure': 0.07407407407407407, + 'phi_coefficient': 0.1379728923974526, + 'precision': 1.0, + 'present_in_test_data': True, + 'recall': 0.038461538461538464}]}} + +where two levels of detail are easily identified. For classifications, +the first level shows these keys: + +- **class_names**: A list with the names of all the categories for the objective field (i.e., all the classes) +- **mode**: A detailed result object. Measures of the performance of the classifier that predicts the mode class for all the instances in the dataset +- **model**: A detailed result object. +- **random**: A detailed result object. Measures the performance of the classifier that predicts a random class for all the instances in the dataset. + +and the detailed result objects include ``accuracy``, ``average_f_measure``, ``average_phi``, +``average_precision``, ``average_recall``, ``confusion_matrix`` +and ``per_class_statistics``. + +For regressions first level will contain these keys: + +- **mean**: A detailed result object. Measures the performance of the model that predicts the mean for all the instances in the dataset. +- **model**: A detailed result object. +- **random**: A detailed result object. Measures the performance of the model that predicts a random class for all the instances in the dataset. + +where the detailed result objects include ``mean_absolute_error``, +``mean_squared_error`` and ``r_squared`` (refer to +`developers documentation `_ for +more info on the meaning of these measures. + +You can check the evaluation properties at the `API documentation +`_. + +Centroid +~~~~~~~~ + +A ``centroid`` is the value predicted by a cluster model. Here's how to create +a centroid: + + +.. code-block:: python + + >>> input_data = {"petal length": 4} + >>> centroid = api.create_centroid(cluster_id, input_data) + +Mind that you will need to provide values for all the input fields in order to +create a centroid. To know more details about the centroid properties and +parameters you can check the corresponding +`API documentation `_. + +Anomaly Score +~~~~~~~~~~~~~ + +An ``anomaly score`` is the value predicted by an anomaly detector. +Here's how to create an anomaly score: + + +.. code-block:: python + + >>> input_data = {"petal length": 4} + >>> anomaly_score = api.create_anomaly_score(anomaly_id, input_data) + +To know more details about the anomaly score properties and +parameters you can check the corresponding +`API documentation `_. + +Association Set +~~~~~~~~~~~~~~~ + +An ``association set`` is the value predicted by an association discovery model. +Here's how to create an association set: + + +.. code-block:: python + + >>> input_data = {"petal length": 4} + >>> association_set = api.create_association_set(association_id, input_data) + +To know more details about the association set properties and +parameters you can check the corresponding +`API documentation `_. + +Topic Distribution +~~~~~~~~~~~~~~~~~~ + +A ``topic distribution`` is the value predicted by a topic model. +Here's how to create a topic distribution: + + +.. code-block:: python + + >>> input_data = {"text": "Now is the winter of our discontent"} + >>> topic_model = api.create_topic_model(topic_model_id, input_data) + +To know more details about the topic distribution properties and +parameters you can check the corresponding +`API documentation `_. + +Batch Prediction +~~~~~~~~~~~~~~~~ + +In BigML, you can create predictions for all the inputs provided as rows of a +dataset, i.e. a batch prediction. +The result of a batch prediction can either be downloaded as a CSV or +become a new dataset. As with predictions, a model is ready to produce batch +predictions immediately, so there's no need of a special deployment in order +to start using it. Here's how you create a batch prediction for a model +and its response: + +.. code-block:: python + + >>> batch_prediction = api.create_batch_prediction(model_id, test_dataset) + +To know more details about the batch prediction properties and +parameters you can check the corresponding +`API documentation `_. + +Batch Centroid +~~~~~~~~~~~~~~ + +In BigML, you can create centroids for all the inputs provided as rows of a +dataset, i.e. a batch centroid. +The result of a batch centroid can either be downloaded as a CSV or +become a new dataset. As with predictions, a cluster is ready to produce batch +centroids immediately, so there's no need of a special deployment in order +to start using it. Here's how you create a batch centroid for a cluster +and its response: + +.. code-block:: python + + >>> batch_centroid = api.create_batch_centroid(cluster_id, test_dataset) + +To know more details about the batch centroid properties and +parameters you can check the corresponding +`API documentation `_. + +Batch Anomaly Score +~~~~~~~~~~~~~~~~~~~ + +In BigML, you can create anomaly scores for all the inputs provided as rows of a +dataset, i.e. a batch anomaly score. +The result of a batch anomaly score can either be downloaded as a CSV or +become a new dataset. As with predictions, an anomaly detector +is ready to produce batch anomaly scores immediately, +so there's no need of a special deployment in order +to start using it. Here's how you create a batch anomaly score for an anomaly +detector and its response: + +.. code-block:: python + + >>> batch_anomaly_score = api.create_batch_anomaly_score( + anomaly_id, test_dataset) + +To know more details about the batch anomaly score properties and +parameters you can check the corresponding +`API documentation `_. + +Batch Topic Distribution +~~~~~~~~~~~~~~~~~~~~~~~~ + +In BigML, you can create topic distributions for all the inputs +provided as rows of a dataset, i.e. a batch topic distribution. +The result of a batch topic distribution can either be downloaded as a CSV or +become a new dataset. As with predictions, a topic model is ready to produce +batch topic distributions immediately, so there's no need of a +special deployment in order to start using it. +Here's how you create a batch topic distribution for a topic model +and its response: + +.. code-block:: python + + >>> batch_topic_distribution = api.create_batch_topic_distribution( + topic_id, test_dataset) + +To know more details about the batch topic distribution properties and +parameters you can check the corresponding +`API documentation `_. diff --git a/docs/quick_start.rst b/docs/quick_start.rst new file mode 100644 index 00000000..2ff7b0ac --- /dev/null +++ b/docs/quick_start.rst @@ -0,0 +1,284 @@ +Quick Start +=========== + +Imagine that you want to use `this csv +file `_ containing the `Iris +flower dataset `_ to +predict the species of a flower whose ``petal length`` is ``2.45`` and +whose ``petal width`` is ``1.75``. A preview of the dataset is shown +below. It has 4 numeric fields: ``sepal length``, ``sepal width``, +``petal length``, ``petal width`` and a categorical field: ``species``. +By default, BigML considers the last field in the dataset as the +objective field (i.e., the field that you want to generate predictions +for). + +:: + + sepal length,sepal width,petal length,petal width,species + 5.1,3.5,1.4,0.2,Iris-setosa + 4.9,3.0,1.4,0.2,Iris-setosa + 4.7,3.2,1.3,0.2,Iris-setosa + ... + 5.8,2.7,3.9,1.2,Iris-versicolor + 6.0,2.7,5.1,1.6,Iris-versicolor + 5.4,3.0,4.5,1.5,Iris-versicolor + ... + 6.8,3.0,5.5,2.1,Iris-virginica + 5.7,2.5,5.0,2.0,Iris-virginica + 5.8,2.8,5.1,2.4,Iris-virginica + +You can easily generate a prediction following these steps: + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + source = api.create_source('./data/iris.csv') + dataset = api.create_dataset(source) + model = api.create_model(dataset) + prediction = api.create_prediction(model, \ + {"petal width": 1.75, "petal length": 2.45}) + +You can then print the prediction using the ``pprint`` method: + +.. code-block:: python + + >>> api.pprint(prediction) + species for {"petal width": 1.75, "petal length": 2.45} is Iris-setosa + +Certainly, any of the resources created in BigML can be configured using +several arguments described in the `API documentation `_. +Any of these configuration arguments can be added to the ``create`` method +as a dictionary in the last optional argument of the calls: + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + source_args = {"name": "my source", + "source_parser": {"missing_tokens": ["NULL"]}} + source = api.create_source('./data/iris.csv', source_args) + dataset_args = {"name": "my dataset"} + dataset = api.create_dataset(source, dataset_args) + model_args = {"objective_field": "species"} + model = api.create_model(dataset, model_args) + prediction_args = {"name": "my prediction"} + prediction = api.create_prediction(model, \ + {"petal width": 1.75, "petal length": 2.45}, + prediction_args) + +The ``iris`` dataset has a small number of instances, and usually will be +instantly created, so the ``api.create_`` calls will probably return the +finished resources outright. As BigML's API is asynchronous, +in general you will need to ensure +that objects are finished before using them by using ``api.ok``. + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + source = api.create_source('./data/iris.csv') + api.ok(source) + dataset = api.create_dataset(source) + api.ok(dataset) + model = api.create_model(dataset) + api.ok(model) + prediction = api.create_prediction(model, \ + {"petal width": 1.75, "petal length": 2.45}) + +Note that the prediction +call is not followed by the ``api.ok`` method. Predictions are so quick to be +generated that, unlike the +rest of resouces, will be generated synchronously as a finished object. + +Alternatively to the ``api.ok`` method, BigML offers +`webhooks `_ that can be set +when creating a resource and will call the url of you choice when the +finished or failed event is reached. A secret can be included in the call to +verify the webhook call authenticity, and a + +.. code-block:: python + + bigml.webhooks.check_signature(request, signature) + +function is offered to that end. As an example, this snippet creates a source +and sets a webhook to call ``https://my_webhook.com/endpoint`` when finished: + +.. code-block:: python + + from bigml.api import BigML + api = BigML() + # using a webhook with a secret + api.create_source("https://static.bigml.com/csv/iris.csv", + {"webhook": {"url": "https://my_webhook.com/endpoint", + "secret": "mysecret"}}) + + +The ``iris`` prediction example assumed that your objective +field (the one you want to predict) is the last field in the dataset. +If that's not he case, you can explicitly +set the name of this field in the creation call using the ``objective_field`` +argument: + + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + source = api.create_source('./data/iris.csv') + api.ok(source) + dataset = api.create_dataset(source) + api.ok(dataset) + model = api.create_model(dataset, {"objective_field": "species"}) + api.ok(model) + prediction = api.create_prediction(model, \ + {'sepal length': 5, 'sepal width': 2.5}) + + +You can also generate an evaluation for the model by using: + +.. code-block:: python + + test_source = api.create_source('./data/test_iris.csv') + api.ok(test_source) + test_dataset = api.create_dataset(test_source) + api.ok(test_dataset) + evaluation = api.create_evaluation(model, test_dataset) + api.ok(evaluation) + + +The API object also offers the ``create``, ``get``, ``update`` and ``delete`` +generic methods to manage all type of resources. The type of resource to be +created is passed as first argument to the ``create`` method; + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + source = api.create('source', './data/iris.csv') + source = api.update(source, {"name": "my new source name"}) + +Note that these methods don't need the ``api.ok`` method to be called +to wait for the resource to be finished. +The method waits internally for it by default. +This can be avoided by using ``finished=False`` as one of the arguments. + + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + source = api.create('source', './data/iris.csv') + dataset = api.create('dataset', source, finished=False) # unfinished + api.ok(dataset) # waiting explicitly for the dataset to finish + dataset = api.update(dataset, {"name": "my_new_dataset_name"}, + finised=False) + api.ok(dataset) + +As an example for the ``delete`` and ``get`` methods, we could +create a batch prediction, put the predictions in a +dataset object and delete the ``batch_prediction``. + +.. code-block:: python + + from bigml.api import BigML + + api = BigML() + + batch_prediction = api.create('batchprediction', + 'model/5f3c3d2b5299637102000882', + 'dataset/5f29a563529963736c0116e9', + args={"output_dataset": True}) + batch_prediction_dataset = api.get(batch_prediction["object"][ \ + "output_dataset_resource"]) + api.delete(batch_prediction) + +If you set the ``storage`` argument in the ``api`` instantiation: + +.. code-block:: python + + api = BigML(storage='./storage') + +all the generated, updated or retrieved resources will be automatically +saved to the chosen directory. Once they are stored locally, the +``retrieve_resource`` method will look for the resource information +first in the local storage before trying to download the information from +the API. + +.. code-block:: python + + dataset = api.retrieve_resource("dataset/5e8e5672c7736e3d830037b5", + query_string="limit=-1") + + +Alternatively, you can use the ``export`` method to explicitly +download the JSON information +that describes any of your resources in BigML to a particular file: + +.. code-block:: python + + api.export('model/5acea49a08b07e14b9001068', + filename="my_dir/my_model.json") + +This example downloads the JSON for the model and stores it in +the ``my_dir/my_model.json`` file. + +In the case of models that can be represented in a `PMML` syntax, the +export method can be used to produce the corresponding `PMML` file. + +.. code-block:: python + + api.export('model/5acea49a08b07e14b9001068', + filename="my_dir/my_model.pmml", + pmml=True) + +You can also retrieve the last resource with some previously given tag: + +.. code-block:: python + + api.export_last("foo", + resource_type="ensemble", + filename="my_dir/my_ensemble.json") + +which selects the last ensemble that has a ``foo`` tag. This mechanism can +be specially useful when retrieving retrained models that have been created +with a shared unique keyword as tag. + +For a descriptive overview of the steps that you will usually need to +follow to model +your data and obtain predictions, please see the `basic Workflow sketch +`_ +document. You can also check other simple examples in the following documents: + +- `model 101 <101_model.html>`_ +- `logistic regression 101 <101_logistic_regression.html>`_ +- `linear regression 101 <101_linear_regression.html>`_ +- `ensemble 101 <101_ensemble.html>`_ +- `cluster 101 <101_cluster>`_ +- `anomaly detector 101 <101_anomaly.html>`_ +- `association 101 <101_association.html>`_ +- `topic model 101 <101_topic_model.html>`_ +- `deepnet 101 <101_deepnet.html>`_ +- `time series 101 <101_ts.html>`_ +- `fusion 101 <101_fusion.html>`_ +- `optiml 101 <101_optiml.html>`_ +- `PCA 101 <101_pca.html>`_ +- `scripting 101 <101_scripting.html>`_ + +And for examples on Image Processing: + +- `Images Classification 101 <101_images_classification.html>`_ +- `Object Detection 101<101_object_detection.html>`_ +- `Images Feature Extraction 101 <101_images_feature_extraction.html>`_ diff --git a/docs/reading_resources.rst b/docs/reading_resources.rst new file mode 100644 index 00000000..541125e4 --- /dev/null +++ b/docs/reading_resources.rst @@ -0,0 +1,240 @@ +.. toctree:: + :hidden: + +Reading Resources +----------------- + +When retrieved individually, resources are returned as a dictionary +identical to the one you get when you create a new resource. However, +the status code will be ``bigml.api.HTTP_OK`` if the resource can be +retrieved without problems, or one of the HTTP standard error codes +otherwise. To know more about the errors that can happen when retrieving +a resource and what to expect if a resource is not correctly created, please +refer to the +`Waiting for Resources `_ +section. + +To retrieve an existing resource, you just need to use the corresponding +``get_[resouce type]`` method. There's a query string argument +that can be used to filter out or limit the attributes obtained: + +.. code-block:: python + + # gets the source information with no filters + api.get_source("source/5143a51a37203f2cf7000956") + # gets the dataset information with only 10 of the fields + api.get_dataset("dataset/5143a51a37203f2cf7000936", + query_string="limit=10") + # gets the model information excluding the model predicates tree + api.get_model("model/5143a51a37203f2cf7000956", + query_string="exclude=root") + + +Public and shared resources +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The previous examples use resources that were created by the same user +that asks for their retrieval or modification. If a user wants to share one +of her resources, she can make them public or share them. Declaring a resource +public means that anyone can see the resource. This can be applied to datasets +and models. To turn a dataset public, just update its ``private`` property: + +.. code-block:: python + + api.update_dataset('dataset/5143a51a37203f2cf7000972', {'private': false}) + +and any user will be able to download it using its id prepended by ``public``: + +.. code-block:: python + + api.get_dataset('public/dataset/5143a51a37203f2cf7000972') + +In the models' case, you can also choose if you want the model to be fully +downloadable or just accesible to make predictions. This is controlled with the +``white_box`` property. If you want to publish your model completely, just +use: + +.. code-block:: python + + api.update_model('model/5143a51a37203f2cf7000956', {'private': false, + 'white_box': true}) + +Both public models and datasets, will be openly accessible for anyone, +registered or not, from the web +gallery. + +Still, you may want to share your models with other users, but without making +them public for everyone. This can be achieved by setting the ``shared`` +property: + +.. code-block:: python + + api.update_model('model/5143a51a37203f2cf7000956', {'shared': true}) + +Shared models can be accessed using their share hash (propery ``shared_hash`` +in the original model): + +.. code-block:: python + + api.get_model('shared/model/d53iw39euTdjsgesj7382ufhwnD') + +or by using their original id with the creator user as username and a specific +sharing api_key you will find as property ``sharing_api_key`` in the updated +model: + +.. code-block:: python + + api.get_model('model/5143a51a37203f2cf7000956', shared_username='creator', + shared_api_key='c972018dc5f2789e65c74ba3170fda31d02e00c3') + +Only users with the share link or credentials information will be able to +access your shared models. + +Listing Resources +----------------- + +You can list resources with the appropriate api method: + +.. code-block:: python + + api.list_sources() + api.list_datasets() + api.list_models() + api.list_predictions() + api.list_evaluations() + api.list_ensembles() + api.list_batch_predictions() + api.list_clusters() + api.list_centroids() + api.list_batch_centroids() + api.list_anomalies() + api.list_anomaly_scores() + api.list_batch_anomaly_scores() + api.list_projects() + api.list_samples() + api.list_correlations() + api.list_statistical_tests() + api.list_logistic_regressions() + api.list_linear_regressions() + api.list_associations() + api.list_association_sets() + api.list_topic_models() + api.list_topic_distributions() + api.list_batch_topic_distributions() + api.list_time_series() + api.list_deepnets() + api.list_fusions() + api.list_pcas() + api.list_projections() + api.list_batch_projections() + api.list_forecasts() + api.list_scripts() + api.list_libraries() + api.list_executions() + api.list_external_connectors() + + +you will receive a dictionary with the following keys: + +- **code**: If the request is successful you will get a + ``bigml.api.HTTP_OK`` (200) status code. Otherwise, it will be one of + the standard HTTP error codes. See `BigML documentation on status + codes `_ for more info. +- **meta**: A dictionary including the following keys that can help you + paginate listings: + + - **previous**: Path to get the previous page or ``None`` if there + is no previous page. + - **next**: Path to get the next page or ``None`` if there is no + next page. + - **offset**: How far off from the first entry in the resources is + the first one listed in the resources key. + - **limit**: Maximum number of resources that you will get listed in + the resources key. + - **total\_count**: The total number of resources in BigML. + +- **objects**: A list of resources as returned by BigML. +- **error**: If an error occurs and the resource cannot be created, it + will contain an additional code and a description of the error. In + this case, **meta**, and **resources** will be ``None``. + +Filtering Resources +~~~~~~~~~~~~~~~~~~~ + +You can filter resources in listings using the syntax and fields labeled +as *filterable* in the `BigML +documentation `_ for each resource. + +A few examples: + +Ids of the first 5 sources created before April 1st, 2012 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + [source['resource'] for source in + api.list_sources("limit=5;created__lt=2012-04-1")['objects']] + +Name of the first 10 datasets bigger than 1MB +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + [dataset['name'] for dataset in + api.list_datasets("limit=10;size__gt=1048576")['objects']] + +Name of models with more than 5 fields (columns) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + [model['name'] for model in api.list_models("columns__gt=5")['objects']] + +Ids of predictions whose model has not been deleted +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + [prediction['resource'] for prediction in + api.list_predictions("model_status=true")['objects']] + +Ordering Resources +~~~~~~~~~~~~~~~~~~ + +You can order resources in listings using the syntax and fields labeled +as *sortable* in the `BigML +documentation `_ for each resource. + +A few examples: + +Name of sources ordered by size +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + [source['name'] for source in api.list_sources("order_by=size")['objects']] + +Number of instances in datasets created before April 1st, 2012 ordered by size +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + [dataset['rows'] for dataset in + api.list_datasets( + "created__lt=2012-04-01T00:00:00.00000;order_by=size")['objects']] + +Model ids ordered by number of predictions (in descending order). +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + [model['resource'] for model in + api.list_models("order_by=-number_of_predictions")['objects']] + +Name of predictions ordered by name. +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + [prediction['name'] for prediction in + api.list_predictions("order_by=name")['objects']] diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..6daf89af --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,2 @@ +sphinx +sphinx_rtd_theme==2.0.0 diff --git a/docs/updating_resources.rst b/docs/updating_resources.rst new file mode 100644 index 00000000..c4cb88a5 --- /dev/null +++ b/docs/updating_resources.rst @@ -0,0 +1,99 @@ +.. toctree:: + :hidden: + +Updating Resources +================== + +When you update a resource, it is returned in a dictionary exactly like +the one you get when you create a new one. However the status code will +be ``bigml.api.HTTP_ACCEPTED`` if the resource can be updated without +problems or one of the HTTP standard error codes otherwise. + +.. code-block:: python + + api.update_source(source, {"name": "new name"}) + api.update_dataset(dataset, {"name": "new name"}) + api.update_model(model, {"name": "new name"}) + api.update_prediction(prediction, {"name": "new name"}) + api.update_evaluation(evaluation, {"name": "new name"}) + api.update_ensemble(ensemble, {"name": "new name"}) + api.update_batch_prediction(batch_prediction, {"name": "new name"}) + api.update_cluster(cluster, {"name": "new name"}) + api.update_centroid(centroid, {"name": "new name"}) + api.update_batch_centroid(batch_centroid, {"name": "new name"}) + api.update_anomaly(anomaly, {"name": "new name"}) + api.update_anomaly_score(anomaly_score, {"name": "new name"}) + api.update_batch_anomaly_score(batch_anomaly_score, {"name": "new name"}) + api.update_project(project, {"name": "new name"}) + api.update_correlation(correlation, {"name": "new name"}) + api.update_statistical_test(statistical_test, {"name": "new name"}) + api.update_logistic_regression(logistic_regression, {"name": "new name"}) + api.update_linear_regression(linear_regression, {"name": "new name"}) + api.update_association(association, {"name": "new name"}) + api.update_association_set(association_set, {"name": "new name"}) + api.update_topic_model(topic_model, {"name": "new name"}) + api.update_topic_distribution(topic_distribution, {"name": "new name"}) + api.update_batch_topic_distribution(\ + batch_topic_distribution, {"name": "new name"}) + api.update_time_series(\ + time_series, {"name": "new name"}) + api.update_forecast(\ + forecast, {"name": "new name"}) + api.update_deepnet(deepnet, {"name": "new name"}) + api.update_fusion(fusion, {"name": "new name"}) + api.update_pca(pca, {"name": "new name"}) + api.update_projection(projection, {"name": "new name"}) + api.update_batch_projection(batch_projection, {"name": "new name"}) + api.update_script(script, {"name": "new name"}) + api.update_library(library, {"name": "new name"}) + api.update_execution(execution, {"name": "new name"}) + api.update_external_connector(external_connector, {"name": "new name"}) + +Updates can change resource general properties, such as the ``name`` or +``description`` attributes of a dataset, or specific properties, like +the ``missing tokens`` (strings considered as missing values). As an example, +let's say that your source has a certain field whose contents are +numeric integers. BigML will assign a numeric type to the field, but you +might want it to be used as a categorical field. You could change +its type to ``categorical`` by calling: + +.. code-block:: python + + api.update_source(source, \ + {"fields": {"000001": {"optype": "categorical"}}}) + +where ``000001`` is the field id that corresponds to the updated field. + +Another usually needed update is changing a fields' ``non-preferred`` +attribute, +so that it can be used in the modeling process: + + +.. code-block:: python + + api.update_dataset(dataset, {"fields": {"000001": {"preferred": True}}}) + +where you would be setting as ``preferred`` the field whose id is ``000001``. + +You may also want to change the name of one of the clusters found in your +clustering: + + +.. code-block:: python + + api.update_cluster(cluster, \ + {"clusters": {"000001": {"name": "my cluster"}}}) + +which is changing the name of the cluster whose centroid id is ``000001`` to +``my_cluster``. Or, similarly, changing the name of one detected topic: + + +.. code-block:: python + + api.update_topic_model(topic_model, \ + {"topics": {"000001": {"name": "my topic"}}}) + + +You will find detailed information about +the updatable attributes of each resource in +`BigML developer's documentation `_. diff --git a/docs/whizzml_resources.rst b/docs/whizzml_resources.rst new file mode 100644 index 00000000..440f6de3 --- /dev/null +++ b/docs/whizzml_resources.rst @@ -0,0 +1,267 @@ +.. toctree:: + :hidden: + +WhizzML Resources +================= + +WhizzML is a Domain Specific Language that allows the definition and +execution of ML-centric workflows. Its objective is allowing BigML +users to define their own composite tasks, using as building blocks +the basic resources provided by BigML itself. Using Whizzml they can be +glued together using a higher order, functional, Turing-complete language. +The WhizzML code can be stored and executed in BigML using three kinds of +resources: ``Scripts``, ``Libraries`` and ``Executions``. + +WhizzML ``Scripts`` can be executed in BigML's servers, that is, +in a controlled, fully-scalable environment which takes care of their +parallelization and fail-safe operation. Each execution uses an ``Execution`` +resource to store the arguments and results of the process. WhizzML +``Libraries`` store generic code to be shared of reused in other WhizzML +``Scripts``. + +Scripts +------- + +In BigML a ``Script`` resource stores WhizzML source code, and the results of +its compilation. Once a WhizzML script is created, it's automatically compiled; +if compilation succeeds, the script can be run, that is, +used as the input for a WhizzML execution resource. + +An example of a ``script`` that would create a ``source`` in BigML using the +contents of a remote file is: + +.. code-block:: python + + >>> from bigml.api import BigML + >>> api = BigML() + # creating a script directly from the source code. This script creates + # a source uploading data from an s3 repo. You could also create a + # a script by using as first argument the path to a .whizzml file which + # contains your source code. + >>> script = api.create_script( \ + "(create-source {\"remote\" \"s3://bigml-public/csv/iris.csv\"})") + >>> api.ok(script) # waiting for the script compilation to finish + >>> api.pprint(script['object']) + { u'approval_status': 0, + u'category': 0, + u'code': 200, + u'created': u'2016-05-18T16:54:05.666000', + u'description': u'', + u'imports': [], + u'inputs': None, + u'line_count': 1, + u'locale': u'en-US', + u'name': u'Script', + u'number_of_executions': 0, + u'outputs': None, + u'price': 0.0, + u'private': True, + u'project': None, + u'provider': None, + u'resource': u'script/573c9e2db85eee23cd000489', + u'shared': False, + u'size': 59, + u'source_code': u'(create-source {"remote" "s3://bigml-public/csv/iris.csv"})', + u'status': { u'code': 5, + u'elapsed': 4, + u'message': u'The script has been created', + u'progress': 1.0}, + u'subscription': True, + u'tags': [], + u'updated': u'2016-05-18T16:54:05.850000', + u'white_box': False} + +A ``script`` allows to define some variables as ``inputs``. In the previous +example, no input has been defined, but we could modify our code to +allow the user to set the remote file name as input: + +.. code-block:: python + + >>> from bigml.api import BigML + >>> api = BigML() + >>> script = api.create_script( \ + "(create-source {\"remote\" my_remote_data})", + {"inputs": [{"name": "my_remote_data", + "type": "string", + "default": "s3://bigml-public/csv/iris.csv", + "description": "Location of the remote data"}]}) + +The ``script`` can also use a ``library`` resource (please, see the +``Libraries`` section below for more details) by including its id in the +``imports`` attribute. Other attributes can be checked at the +`API Developers documentation for Scripts `_. + +Executions +---------- + +To execute in BigML a compiled WhizzML ``script`` you need to create an +``execution`` resource. It's also possible to execute a pipeline of +many compiled scripts in one request. + +Each ``execution`` is run under its associated user credentials and its +particular environment constrains. As ``scripts`` can be shared, +different users can execute the same ``script`` using different inputs. +Each particular execution will generate an ``execution`` resource in BigML. + +As an example of an ``execution`` resource, let's create one for the first +script in the previous section. In this case, no inputs are required because +the ``script`` expects none: + +.. code-block:: python + + >>> from bigml.api import BigML + >>> api = BigML() + >>> execution = api.create_execution('script/573c9e2db85eee23cd000489') + >>> api.ok(execution) # waiting for the execution to finish + >>> api.pprint(execution['object']) + { u'category': 0, + u'code': 200, + u'created': u'2016-05-18T16:58:01.613000', + u'creation_defaults': { }, + u'description': u'', + u'execution': { u'output_resources': [ { u'code': 1, + u'id': u'source/573c9f19b85eee23c600024a', + u'last_update': 1463590681854, + u'progress': 0.0, + u'state': u'queued', + u'task': u'Queuing job', + u'variable': u''}], + u'outputs': [], + u'result': u'source/573c9f19b85eee23c600024a', + u'results': [u'source/573c9f19b85eee23c600024a'], + u'sources': [[ u'script/573c9e2db85eee23cd000489', + u'']], + u'steps': 16}, + u'inputs': None, + u'locale': u'en-US', + u'name': u"Script's Execution", + u'project': None, + u'resource': u'execution/573c9f19b85eee23bd000125', + u'script': u'script/573c9e2db85eee23cd000489', + u'script_status': True, + u'shared': False, + u'status': { u'code': 5, + u'elapsed': 249, + u'elapsed_times': { u'in-progress': 247, + u'queued': 62, + u'started': 2}, + u'message': u'The execution has been created', + u'progress': 1.0}, + u'subscription': True, + u'tags': [], + u'updated': u'2016-05-18T16:58:02.035000'} + +As you can see, the execution resource contains information about the result +of the execution, the resources that have been generated while executing and +users can define some variables in the code to be exported as outputs. + +An ``execution`` receives inputs, the ones defined in the ``script`` chosen +to be executed, and generates a result. It can also generate outputs and +create resources. To +execute a ``script`` that expects some inputs, you will need to specify the +concrete values of those inputs, unless a default value has been assigned +for them in the script's inputs definition. Following the second example in +the previous section, we can execute the script that creates a source from a +URL pointing to a CSV file: + +.. code-block:: python + + >>> from bigml.api import BigML + >>> api = BigML() + >>> execution = api.create_execution( \ + script, + {"inputs": [["my_remote_data", + "https://static.bigml.com/csv/iris.csv"]]}) + +For more details on executions' structure, please refer to the +`Developers documentation for Executions `_. + + +The results of an execution can be easily obtained by using the ``Execution`` +class. This class can be used to instantiate a local object that will +expose the result, outputs and output resources generated in the execution +in its attributes. + + +.. code-block:: python + + from bigml.execution import Execution + execution = Execution("execution/5cae5ad4b72c6609d9000468") + print "The result of the execution is %s" % execution.result + print " and the output for variable 'my_variable': %s" % \ + execution.outputs["my_variable"] + print "The resources created in the execution are: %s" % \ + execution.output_resources + +As an execution is in progress, the ``execution.result`` attribute will +contain the value of the last evaluated expression at that point. +Therefore, the value of the ``result`` attribute will change untill it +will contain the final result of the execution when finished. + +Also, if the execution fails, the error information can be found in the +corresponding attributes: + +.. code-block:: python + + from bigml.execution import Execution + execution = Execution("execution/5cae5ad4b72c6609d9000468") + print "The status of the execution is %s" % execution.status + print "The execution failed at %s with error %s: %s" % ( \ + execution.error_location, execution.error, execution.error_message) + + +Libraries +--------- + +The ``library`` resource in BigML stores a special kind of compiled Whizzml +source code that only defines functions and constants. The ``library`` is +intended as an import for executable scripts. +Thus, a compiled library cannot be executed, just used as an +import in other ``libraries`` and ``scripts`` (which then have access +to all identifiers defined in the ``library``). + +As an example, we build a ``library`` to store the definition of two functions: +``mu`` and ``g``. The first one adds one to the value set as argument and +the second one adds two variables and increments the result by one. + + +.. code-block:: python + + >>> from bigml.api import BigML + >>> api = BigML() + >>> library = api.create_library( \ + "(define (mu x) (+ x 1)) (define (g z y) (mu (+ y z)))") + >>> api.ok(library) # waiting for the library compilation to finish + >>> api.pprint(library['object']) + { u'approval_status': 0, + u'category': 0, + u'code': 200, + u'created': u'2016-05-18T18:58:50.838000', + u'description': u'', + u'exports': [ { u'name': u'mu', u'signature': [u'x']}, + { u'name': u'g', u'signature': [u'z', u'y']}], + u'imports': [], + u'line_count': 1, + u'name': u'Library', + u'price': 0.0, + u'private': True, + u'project': None, + u'provider': None, + u'resource': u'library/573cbb6ab85eee23c300018e', + u'shared': False, + u'size': 53, + u'source_code': u'(define (mu x) (+ x 1)) (define (g z y) (mu (+ y z)))', + u'status': { u'code': 5, + u'elapsed': 2, + u'message': u'The library has been created', + u'progress': 1.0}, + u'subscription': True, + u'tags': [], + u'updated': u'2016-05-18T18:58:52.432000', + u'white_box': False} + +Libraries can be imported in scripts. The ``imports`` attribute of a ``script`` +can contain a list of ``library`` IDs whose defined functions +and constants will be ready to be used throughout the ``script``. Please, +refer to the `API Developers documentation for Libraries `_ +for more details. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..1de495d4 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,8 @@ +[build-system] +requires=[ + "setuptools==69.0.0" +] + +[tool.black] +line-length = 80 +target-version = ['py312'] diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..24f5e88c --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[lint] +lint-exclude-packages=bigml.tests.my_ensemble diff --git a/setup.py b/setup.py index 0024cbe0..c7858b6c 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Copyright 2012 BigML, Inc +# Copyright 2012-2025 BigML, Inc # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -14,16 +14,10 @@ # License for the specific language governing permissions and limitations # under the License. -import distutils.core import os import re -# Importing setuptools adds some features like "setup.py develop", but -# it's optional so swallow the error if it's not there. -try: - import setuptools -except ImportError: - pass +import setuptools # Get the path to this project project_path = os.path.dirname(__file__) @@ -31,9 +25,12 @@ # Read the version from bigml.__version__ without importing the package # (and thus attempting to import packages it depends on that may not be # installed yet) -init_py_path = os.path.join(project_path, 'bigml', '__init__.py') +version_py_path = os.path.join(project_path, 'bigml', 'version.py') version = re.search("__version__ = '([^']+)'", - open(init_py_path).read()).group(1) + open(version_py_path).read()).group(1) + +TOPIC_MODELING_DEPENDENCIES = ["cython", "pystemmer==2.2.0.1"] +IMAGES_DEPENDENCIES = ["bigml-sensenet==0.7.5"] # Concatenate files into the long description file_contents = [] @@ -42,8 +39,7 @@ file_contents.append(open(path).read()) long_description = '\n\n'.join(file_contents) - -distutils.core.setup( +setuptools.setup( name="bigml", description="An open source binding to BigML.io, the public BigML API", long_description=long_description, @@ -53,10 +49,21 @@ url="https://bigml.com/developers", download_url="https://github.com/bigmlcom/python", license="http://www.apache.org/licenses/LICENSE-2.0", - setup_requires = [], - packages = ['bigml'], - include_package_data = True, - install_requires = ['requests'], + setup_requires = ['pytest'], + install_requires = ["setuptools==70.0.0", "unidecode", + "bigml-chronos>=0.4.3", "requests", + "requests-toolbelt", "msgpack", "numpy>=1.22", "scipy", + "javascript"], + extras_require={"images": IMAGES_DEPENDENCIES, + "topics": TOPIC_MODELING_DEPENDENCIES, + "full": IMAGES_DEPENDENCIES + TOPIC_MODELING_DEPENDENCIES}, + packages = ['bigml', 'bigml.tests', 'bigml.laminar', + 'bigml.tests.my_ensemble', + 'bigml.api_handlers', 'bigml.predicate_utils', + 'bigml.generators', 'bigml.predict_utils', + 'bigml.images', 'bigml.pipeline'], + package_data={'bigml':['generators/static/*', + 'flatline/*']}, classifiers=[ 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', @@ -64,9 +71,7 @@ 'Natural Language :: English', 'Operating System :: OS Independent', 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.6', - 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', 'Topic :: Software Development :: Libraries :: Python Modules', - ], + ] ) diff --git a/tests/features/common_steps.py b/tests/features/common_steps.py deleted file mode 100644 index d158aa00..00000000 --- a/tests/features/common_steps.py +++ /dev/null @@ -1,53 +0,0 @@ -from lettuce import * - -from bigml.api import HTTP_OK -from bigml.api import HTTP_CREATED -from bigml.api import HTTP_ACCEPTED -from bigml.api import HTTP_BAD_REQUEST -from bigml.api import HTTP_UNAUTHORIZED -from bigml.api import HTTP_NOT_FOUND - -@step(r'I get an OK response') -def i_get_an_OK_response(step): - assert world.status == HTTP_OK - -@step(r'I get a created response') -def i_get_a_created_response(step): - assert world.status == HTTP_CREATED - -@step(r'I get an accepted response') -def i_get_an_accepted_response(step): - assert world.status == HTTP_ACCEPTED - -@step(r'I get a bad request response') -def i_get_a_bad_request_response(step): - assert world.status == HTTP_BAD_REQUEST - -@step(r'I get a unauthorized response') -def i_get_a_unauthorized_response(step): - assert world.status == HTTP_UNAUTHORIZED - -@step(r'I get a not found response') -def i_get_a_not_found_response(step): - assert world.status == HTTP_NOT_FOUND - -@step(r'I want to use api in DEV mode') -def i_want_api_dev_mode(step): - world.api = world.api_dev_mode - # Update counters of resources for DEV mode - sources = world.api.list_sources() - assert sources['code'] == HTTP_OK - world.init_sources_count = sources['meta']['total_count'] - - datasets = world.api.list_datasets() - assert datasets['code'] == HTTP_OK - world.init_datasets_count = datasets['meta']['total_count'] - - models = world.api.list_models() - assert models['code'] == HTTP_OK - world.init_models_count = models['meta']['total_count'] - - predictions = world.api.list_predictions() - assert predictions['code'] == HTTP_OK - world.init_predictions_count = predictions['meta']['total_count'] - diff --git a/tests/features/create_dataset-steps.py b/tests/features/create_dataset-steps.py deleted file mode 100644 index 97dd0a29..00000000 --- a/tests/features/create_dataset-steps.py +++ /dev/null @@ -1,31 +0,0 @@ -import os -import time -from datetime import datetime, timedelta -from lettuce import * -from bigml.api import HTTP_CREATED -from bigml.api import FINISHED -from bigml.api import FAULTY - -@step(r'I create a dataset$') -def i_create_a_dataset(step): - resource = world.api.create_dataset(world.source['resource']) - world.status = resource['code'] - assert world.status == HTTP_CREATED - world.location = resource['location'] - world.dataset = resource['object'] - world.datasets.append(resource['resource']) - -@step(r'I wait until the dataset status code is either (\d) or (\d) less than (\d+)') -def wait_until_dataset_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - step.given('I get the dataset "{id}"'.format(id=world.dataset['resource'])) - while (world.dataset['status']['code'] != int(code1) and - world.dataset['status']['code'] != int(code2)): - time.sleep(3) - assert datetime.utcnow() - start < timedelta(seconds=int(secs)) - step.given('I get the dataset "{id}"'.format(id=world.dataset['resource'])) - assert world.dataset['status']['code'] == int(code1) - -@step(r'I wait until the dataset is ready less than (\d+)') -def the_dataset_is_finished_in_less_than(step, secs): - wait_until_dataset_status_code_is(step, FINISHED, FAULTY, secs) diff --git a/tests/features/create_model-steps.py b/tests/features/create_model-steps.py deleted file mode 100644 index 25c3d2fd..00000000 --- a/tests/features/create_model-steps.py +++ /dev/null @@ -1,50 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2012 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import time -from datetime import datetime, timedelta -from lettuce import step, world - -from bigml.api import HTTP_CREATED -from bigml.api import FINISHED -from bigml.api import FAULTY - -@step(r'I create a model$') -def i_create_a_model(step): - dataset = world.dataset.get('resource') - resource = world.api.create_model(dataset) - world.status = resource['code'] - assert world.status == HTTP_CREATED - - world.location = resource['location'] - world.model = resource['object'] - world.models.append(resource['resource']) - -@step(r'I wait until the model status code is either (\d) or (-\d) less than (\d+)') -def wait_until_model_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - step.given('I get the model "{id}"'.format(id=world.model['resource'])) - while (world.model['status']['code'] != int(code1) and - world.model['status']['code'] != int(code2)): - time.sleep(3) - assert datetime.utcnow() - start < timedelta(seconds=int(secs)) - step.given('I get the model "{id}"'.format(id=world.model['resource'])) - assert world.model['status']['code'] == int(code1) - -@step(r'I wait until the model is ready less than (\d+)') -def the_model_is_finished_in_less_than(step, secs): - wait_until_model_status_code_is(step, FINISHED, FAULTY, secs) diff --git a/tests/features/create_prediction-steps.py b/tests/features/create_prediction-steps.py deleted file mode 100644 index 6528f700..00000000 --- a/tests/features/create_prediction-steps.py +++ /dev/null @@ -1,20 +0,0 @@ -import json -from lettuce import step, world -from bigml.api import HTTP_CREATED - -@step(r'I create a prediction for "(.*)"') -def i_create_a_prediction(step, data=None): - if data is None: - data = {} - model = world.model['resource'] - data = json.loads(data) - resource = world.api.create_prediction(model, data) - world.status = resource['code'] - assert world.status == HTTP_CREATED - world.location = resource['location'] - world.prediction = resource['object'] - world.predictions.append(resource['resource']) - -@step(r'the prediction for "(.*)" is "(.*)"') -def the_prediction_is(step, objective, prediction): - assert world.prediction['prediction'][objective] == prediction diff --git a/tests/features/create_prediction.feature b/tests/features/create_prediction.feature deleted file mode 100644 index 51a4800b..00000000 --- a/tests/features/create_prediction.feature +++ /dev/null @@ -1,31 +0,0 @@ -Feature: Create Predictions - In order to create a prediction - I need to create a model first - - Scenario: Successfully creating a prediction: - Given I create a data source uploading a "" file - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - When I create a prediction for "" - Then the prediction for "" is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | objective | prediction | - | ../data/iris.csv | 10 | 10 | 10 | {"petal length": 1} | 000004 | Iris-setosa | - - Scenario: Successfully creating a prediction from a source in a remote location: - Given I create a data source using the url "" - And I wait until the source is ready less than secs - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - When I create a prediction for "" - Then the prediction for "" is "" - - Examples: - | url | time_1 | time_2 | time_3 | data_input | objective | prediction | - | s3://bigml-public/csv/iris.csv | 10 | 10 | 10 | {"petal length": 1} | 000004 | Iris-setosa | diff --git a/tests/features/create_prediction_dev.feature b/tests/features/create_prediction_dev.feature deleted file mode 100644 index d381a676..00000000 --- a/tests/features/create_prediction_dev.feature +++ /dev/null @@ -1,20 +0,0 @@ -Feature: Create Predictions in DEV mode - In order to create a prediction in DEV mode - I need to change to an API instance in DEV mode - And I need to create a model first - - Scenario: Successfully creating a prediction in DEV mode: - Given I want to use api in DEV mode - When I create a data source uploading a "" file - And I wait until the source is ready less than secs - And the source has DEV True - And I create a dataset - And I wait until the dataset is ready less than secs - And I create a model - And I wait until the model is ready less than secs - When I create a prediction for "" - Then the prediction for "" is "" - - Examples: - | data | time_1 | time_2 | time_3 | data_input | objective | prediction | - | ../data/iris.csv | 10 | 10 | 10 | {"petal length": 1} | 000004 | Iris-setosa | diff --git a/tests/features/create_source-steps.py b/tests/features/create_source-steps.py deleted file mode 100644 index 5986aed8..00000000 --- a/tests/features/create_source-steps.py +++ /dev/null @@ -1,66 +0,0 @@ -# -*- coding: utf-8 -*- -#!/usr/bin/env python -# -# Copyright 2012 BigML -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import time -import json -from datetime import datetime, timedelta -from urllib import urlencode -from lettuce import step, world - -from bigml.api import HTTP_CREATED -from bigml.api import FINISHED -from bigml.api import FAULTY - -@step(r'I create a data source uploading a "(.*)" file$') -def i_upload_a_file(step, file): - resource = world.api.create_source(file) - # update status - world.status = resource['code'] - world.location = resource['location'] - world.source = resource['object'] - # save reference - world.sources.append(resource['resource']) - -@step(r'I create a data source using the url "(.*)"') -def i_create_using_url(step, url): - resource = world.api.create_source(url) - # update status - world.status = resource['code'] - world.location = resource['location'] - world.source = resource['object'] - # save reference - world.sources.append(resource['resource']) - -@step(r'the source has been created') -def the_source_has_been_created(step): - assert world.status == HTTP_CREATED - -@step(r'I wait until the source status code is either (\d) or (\d) less than (\d+)') -def wait_until_source_status_code_is(step, code1, code2, secs): - start = datetime.utcnow() - step.given('I get the source "{id}"'.format(id=world.source['resource'])) - while (world.source['status']['code'] != int(code1) and - world.source['status']['code'] != int(code2)): - time.sleep(3) - assert datetime.utcnow() - start < timedelta(seconds=int(secs)) - step.given('I get the source "{id}"'.format(id=world.source['resource'])) - assert world.source['status']['code'] == int(code1) - -@step(r'I wait until the source is ready less than (\d+)') -def the_source_is_finished(step, secs): - wait_until_source_status_code_is(step, FINISHED, FAULTY, secs) diff --git a/tests/features/read_dataset-steps.py b/tests/features/read_dataset-steps.py deleted file mode 100644 index 8311f3a8..00000000 --- a/tests/features/read_dataset-steps.py +++ /dev/null @@ -1,9 +0,0 @@ -from lettuce import step, world -from bigml.api import HTTP_OK - -@step(r'I get the dataset "(.*)"') -def i_get_the_dataset(step, dataset): - resource = world.api.get_dataset(dataset) - world.status = resource['code'] - assert world.status == HTTP_OK - world.dataset = resource['object'] diff --git a/tests/features/read_model-steps.py b/tests/features/read_model-steps.py deleted file mode 100644 index df0dba2c..00000000 --- a/tests/features/read_model-steps.py +++ /dev/null @@ -1,12 +0,0 @@ -import os -from lettuce import step, world - -from bigml.api import HTTP_OK - -@step(r'I get the model "(.*)"') -def i_get_the_model(step, model): - resource = world.api.get_model(model) - world.status = resource['code'] - assert world.status == HTTP_OK - world.model = resource['object'] - diff --git a/tests/features/read_prediction-steps.py b/tests/features/read_prediction-steps.py deleted file mode 100644 index d860223c..00000000 --- a/tests/features/read_prediction-steps.py +++ /dev/null @@ -1,10 +0,0 @@ -import os -from lettuce import step, world -from bigml.api import HTTP_OK - -@step(r'I get the prediction "(.*)"') -def i_get_the_prediction(step, prediction): - resource = world.api.get_prediction(prediction) - world.status = resource['code'] - assert world.status == HTTP_OK - world.prediction = resource['object'] diff --git a/tests/features/terrain.py b/tests/features/terrain.py deleted file mode 100644 index 06b381d6..00000000 --- a/tests/features/terrain.py +++ /dev/null @@ -1,75 +0,0 @@ -# terrain.py -import os -from lettuce import before, after, world - -from bigml.api import BigML -from bigml.api import HTTP_OK - -@before.each_feature -def setup_resources(feature): - world.USERNAME = os.environ['BIGML_USERNAME'] - world.API_KEY = os.environ['BIGML_API_KEY'] - assert world.USERNAME is not None - assert world.API_KEY is not None - world.api = BigML(world.USERNAME, world.API_KEY) - world.api_dev_mode = BigML(world.USERNAME, world.API_KEY, dev_mode=True) - - sources = world.api.list_sources() - assert sources['code'] == HTTP_OK - world.init_sources_count = sources['meta']['total_count'] - - datasets = world.api.list_datasets() - assert datasets['code'] == HTTP_OK - world.init_datasets_count = datasets['meta']['total_count'] - - models = world.api.list_models() - assert models['code'] == HTTP_OK - world.init_models_count = models['meta']['total_count'] - - predictions = world.api.list_predictions() - assert predictions['code'] == HTTP_OK - world.init_predictions_count = predictions['meta']['total_count'] - - world.sources = [] - world.datasets = [] - world.models = [] - world.predictions = [] - -@after.each_feature -def cleanup_resources(feature): - for id in world.sources: - world.api.delete_source(id) - world.sources = [] - - for id in world.datasets: - world.api.delete_dataset(id) - world.datasets = [] - - for id in world.models: - world.api.delete_model(id) - world.models = [] - - for id in world.predictions: - world.api.delete_prediction(id) - world.predictions = [] - - sources = world.api.list_sources() - assert sources['code'] == HTTP_OK - world.final_sources_count = sources['meta']['total_count'] - - datasets = world.api.list_datasets() - assert datasets['code'] == HTTP_OK - world.final_datasets_count = datasets['meta']['total_count'] - - models = world.api.list_models() - assert models['code'] == HTTP_OK - world.final_models_count = models['meta']['total_count'] - - predictions = world.api.list_predictions() - assert predictions['code'] == HTTP_OK - world.final_predictions_count = predictions['meta']['total_count'] - - assert world.final_sources_count == world.init_sources_count - assert world.final_datasets_count == world.init_datasets_count - assert world.final_models_count == world.init_models_count - assert world.final_predictions_count == world.init_predictions_count diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 75326c8f..00000000 --- a/tox.ini +++ /dev/null @@ -1,21 +0,0 @@ -# Tox (http://tox.testrun.org/) is a tool for running tests in multiple -# virtualenvs. This configuration file will run the test suite on all -# supported Python versions. To use it, "pip install tox" and then run -# "tox" from this directory. - -[tox] -envlist = py26, py27, rtfd - -[testenv] -changedir = tests -deps = lettuce -commands = lettuce - -[testenv:rtfd] -# Verify the documentation will build for readthedocs.org -basepython = python2.7 -changedir = docs -commands = - sphinx-build -W -b html -d {envtmpdir}/doctrees . {envtmpdir}/html -deps = - sphinx