diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index f8a8d40fb3..763cb52d0b 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -126,95 +126,83 @@ jobs: run: ./infra/scripts/helm/push-helm-charts.sh $VERSION_WITHOUT_PREFIX publish-python-sdk: + runs-on: ubuntu-latest + needs: [build-python-sdk, build-python-sdk-macos-py310] + steps: + - uses: actions/download-artifact@v2 + with: + name: wheels + path: dist + - uses: pypa/gh-action-pypi-publish@v1.4.2 + with: + user: __token__ + password: ${{ secrets.PYPI_PASSWORD }} + + + build-python-sdk: + name: Build wheels on ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: - fail-fast: false matrix: - python-version: [ "3.7", "3.8", "3.9", "3.10" ] - os: [ ubuntu-latest, macOS-latest ] - compile-go: [ True ] - include: - - python-version: "3.7" - os: ubuntu-latest - compile-go: False - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - COMPILE_GO: ${{ matrix.compile-go }} + os: [ ubuntu-latest, macos-10.15 ] + steps: - uses: actions/checkout@v2 - - name: Setup Python - id: setup-python - uses: actions/setup-python@v2 + + - name: Build wheels + uses: pypa/cibuildwheel@v2.4.0 with: - python-version: ${{ matrix.python-version }} - architecture: x64 - - name: Setup Go - id: setup-go - uses: actions/setup-go@v2 + package-dir: sdk/python + env: + CIBW_BUILD: "cp3*_x86_64" + CIBW_SKIP: "cp36-* *-musllinux_x86_64 cp310-macosx_x86_64" + CIBW_ARCHS: "native" + CIBW_ENVIRONMENT: > + COMPILE_GO=True + CIBW_BEFORE_ALL_LINUX: | + yum install -y golang + CIBW_BEFORE_ALL_MACOS: | + curl -o python.pkg https://www.python.org/ftp/python/3.9.12/python-3.9.12-macosx10.9.pkg + sudo installer -pkg python.pkg -target / + CIBW_BEFORE_BUILD: | + make install-protoc-dependencies + make install-go-proto-dependencies + make install-go-ci-dependencies + + - uses: actions/upload-artifact@v2 with: - go-version: 1.17.7 - - name: Upgrade pip version - run: | - pip install --upgrade "pip>=21.3.1" - - name: Install pip-tools - run: pip install pip-tools - - name: Install dependencies - run: make install-python-ci-dependencies PYTHON=${{ matrix.python-version }} - - name: Publish Python Package - run: | - cd sdk/python - python3 -m pip install --user --upgrade setuptools wheel twine - python3 setup.py sdist bdist_wheel - python3 -m twine upload --verbose dist/*.whl + name: wheels + path: ./wheelhouse/*.whl - publish-python-sdk-no-telemetry: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - python-version: [ "3.7", "3.8", "3.9", "3.10" ] - os: [ ubuntu-latest, macOS-latest ] - compile-go: [ True ] - include: - - python-version: "3.7" - os: ubuntu-latest - compile-go: False - needs: get-version + + build-python-sdk-macos-py310: + runs-on: macos-10.15 env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - COMPILE_GO: ${{ matrix.compile-go }} + COMPILE_GO: True steps: - uses: actions/checkout@v2 - name: Setup Python id: setup-python uses: actions/setup-python@v2 with: - python-version: ${{ matrix.python-version }} + python-version: "3.10" architecture: x64 - - name: Setup Go - id: setup-go - uses: actions/setup-go@v2 - with: - go-version: 1.17.7 - - name: Upgrade pip version - run: | - pip install --upgrade "pip>=21.3.1" - - name: Install pip-tools - run: pip install pip-tools - name: Install dependencies - run: make install-python-ci-dependencies PYTHON=${{ matrix.python-version }} - - name: Publish Python Package - env: - SETUPTOOLS_SCM_PRETEND_VERSION: ${{ needs.get-version.outputs.version_without_prefix }} + run: | + pip install -U pip setuptools wheel twine + make install-protoc-dependencies + make install-go-proto-dependencies + make install-go-ci-dependencies + - name: Build run: | cd sdk/python - sed -i 's/DEFAULT_FEAST_USAGE_VALUE = "True"/DEFAULT_FEAST_USAGE_VALUE = "False"/g' feast/constants.py - sed -i 's/NAME = "feast"/NAME = "feast-no-telemetry"/g' setup.py - python3 -m pip install --user --upgrade setuptools wheel twine python3 setup.py sdist bdist_wheel - python3 -m twine upload --verbose dist/*.whl + + - uses: actions/upload-artifact@v2 + with: + name: wheels + path: sdk/python/dist/* + publish-java-sdk: container: maven:3.6-jdk-11 @@ -268,6 +256,7 @@ jobs: working-directory: ./ui run: yarn build:lib - name: Publish UI package + working-directory: ./ui run: npm publish env: NODE_AUTH_TOKEN: ${{secrets.NPM_TOKEN}} diff --git a/.releaserc.js b/.releaserc.js index 8cdcc1f277..2acf9b7350 100644 --- a/.releaserc.js +++ b/.releaserc.js @@ -57,7 +57,8 @@ module.exports = { assets: [ "CHANGELOG.md", "java/pom.xml", - "infra/charts/**/*.*" + "infra/charts/**/*.*", + "ui/package.json" ], message: "chore(release): release ${nextRelease.version}\n\n${nextRelease.notes}" } diff --git a/CHANGELOG.md b/CHANGELOG.md index bcb6f8cde3..5c40558e1c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,24 @@ # Changelog +## [0.20.1](https://github.com/feast-dev/feast/compare/v0.20.0...v0.20.1) (2022-04-20) + + +### Bug Fixes + +* Addresses ZeroDivisionError when materializing file source with same timestamps ([#2551](https://github.com/feast-dev/feast/issues/2551)) ([5539c51](https://github.com/feast-dev/feast/commit/5539c51646d3d2150df7476aa5ac9d075998b235)) +* Build platform specific python packages with ci-build-wheel ([#2555](https://github.com/feast-dev/feast/issues/2555)) ([1757639](https://github.com/feast-dev/feast/commit/17576396980a02e6ad7d70d69367df0823ef5408)) +* Enforce kw args featureservice ([#2575](https://github.com/feast-dev/feast/issues/2575)) ([4dce254](https://github.com/feast-dev/feast/commit/4dce254dc8c4f7de0c6005907ceba53b44f264ce)) +* Enforce kw args in datasources ([#2567](https://github.com/feast-dev/feast/issues/2567)) ([6374634](https://github.com/feast-dev/feast/commit/6374634c35b3820c4ed12edc7b2e70a9c561bfe5)) +* Fix `__hash__` methods ([#2556](https://github.com/feast-dev/feast/issues/2556)) ([dd8b854](https://github.com/feast-dev/feast/commit/dd8b8546fce90fab099cab71ab318681c3a0c998)) +* Fix DynamoDB fetches when there are entities that are not found ([#2573](https://github.com/feast-dev/feast/issues/2573)) ([882328f](https://github.com/feast-dev/feast/commit/882328f9b6da45a310916e5af23e0926b4186a85)) +* Fix push sources and add docs / tests pushing via the python feature server ([#2561](https://github.com/feast-dev/feast/issues/2561)) ([c5006c2](https://github.com/feast-dev/feast/commit/c5006c2cf47fd489d8f740d300f06b8fab387148)) +* Fixed data mapping errors for Snowflake ([#2558](https://github.com/feast-dev/feast/issues/2558)) ([abd6be7](https://github.com/feast-dev/feast/commit/abd6be73ec0b795e1ea043d9db2744156f04c5d3)) +* Small typo in CLI ([#2578](https://github.com/feast-dev/feast/issues/2578)) ([8717bc8](https://github.com/feast-dev/feast/commit/8717bc8c19be13158eb7c3de42d38383803195b9)) +* Switch from `join_key` to `join_keys` in tests and docs ([#2580](https://github.com/feast-dev/feast/issues/2580)) ([6130b80](https://github.com/feast-dev/feast/commit/6130b80f64b0952ed209213a371d959f41b9a350)) +* Update build_go_protos to use a consistent python path ([#2550](https://github.com/feast-dev/feast/issues/2550)) ([1c523bf](https://github.com/feast-dev/feast/commit/1c523bf8acd1d554efa4b6211420185f2b66ec36)) +* Update RedisCluster to use redis-py official implementation ([#2554](https://github.com/feast-dev/feast/issues/2554)) ([c47fa2a](https://github.com/feast-dev/feast/commit/c47fa2a58ddaee892095b867a022cfcf236ff7c1)) +* Use cwd when getting module path ([#2577](https://github.com/feast-dev/feast/issues/2577)) ([28752f2](https://github.com/feast-dev/feast/commit/28752f23a365716d98b9266d449ee0aa0572165f)) + # [0.20.0](https://github.com/feast-dev/feast/compare/v0.19.0...v0.20.0) (2022-04-14) diff --git a/Makefile b/Makefile index 4d961a0472..41041d7c08 100644 --- a/Makefile +++ b/Makefile @@ -145,15 +145,15 @@ install-go-ci-dependencies: go get github.com/go-python/gopy go install golang.org/x/tools/cmd/goimports go install github.com/go-python/gopy + python -m pip install pybindgen==0.22.0 install-protoc-dependencies: - pip install grpcio-tools==1.34.0 + pip install grpcio-tools==1.44.0 mypy-protobuf==3.1.0 compile-protos-go: install-go-proto-dependencies install-protoc-dependencies cd sdk/python && python setup.py build_go_protos compile-go-lib: install-go-proto-dependencies install-go-ci-dependencies - python -m pip install pybindgen==0.22.0 cd sdk/python && python setup.py build_go_lib # Needs feast package to setup the feature store diff --git a/README.md b/README.md index b0cc61c91d..af4df06175 100644 --- a/README.md +++ b/README.md @@ -148,7 +148,7 @@ The list below contains the functionality that contributors are planning to deve * [x] [Hive (community plugin)](https://github.com/baineng/feast-hive) * [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) * [x] [Spark (community plugin)](https://docs.feast.dev/reference/data-sources/spark) - * [x] Kafka / Kinesis sources (via [push support into the online store](https://docs.feast.dev/reference/data-sources/push) + * [x] Kafka / Kinesis sources (via [push support into the online store](https://docs.feast.dev/reference/data-sources/push)) * [ ] HTTP source * **Offline Stores** * [x] [Snowflake](https://docs.feast.dev/reference/offline-stores/snowflake) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index e73996665e..11e20ab831 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -81,11 +81,10 @@ * [feature\_store.yaml](reference/feature-repository/feature-store-yaml.md) * [.feastignore](reference/feature-repository/feast-ignore.md) * [Feature servers](reference/feature-servers/README.md) - * [Local feature server](reference/feature-servers/local-feature-server.md) + * [Python feature server](reference/feature-servers/python-feature-server.md) * [Go-based feature retrieval](reference/feature-servers/go-feature-retrieval.md) * [\[Alpha\] Data quality monitoring](reference/dqm.md) * [\[Alpha\] On demand feature view](reference/alpha-on-demand-feature-view.md) -* [\[Alpha\] Stream ingestion](reference/alpha-stream-ingestion.md) * [\[Alpha\] AWS Lambda feature server](reference/alpha-aws-lambda-feature-server.md) * [Feast CLI reference](reference/feast-cli-commands.md) * [Python API reference](http://rtd.feast.dev) diff --git a/docs/getting-started/concepts/entity.md b/docs/getting-started/concepts/entity.md index bc8aa2ac99..77cfc0aff2 100644 --- a/docs/getting-started/concepts/entity.md +++ b/docs/getting-started/concepts/entity.md @@ -3,7 +3,7 @@ An entity is a collection of semantically related features. Users define entities to map to the domain of their use case. For example, a ride-hailing service could have customers and drivers as their entities, which group related features that correspond to these customers and drivers. ```python -driver = Entity(name='driver', value_type=ValueType.STRING, join_key='driver_id') +driver = Entity(name='driver', value_type=ValueType.STRING, join_keys=['driver_id']) ``` Entities are typically defined as part of feature views. Entity name is used to reference the entity from a feature view definition and join key is used to identify the physical primary key on which feature values should be stored and retrieved. These keys are used during the lookup of feature values from the online store and the join process in point-in-time joins. It is possible to define composite entities \(more than one entity object\) in a feature view. It is also possible for feature views to have zero entities. See [feature view](feature-view.md) for more details. diff --git a/docs/getting-started/concepts/feature-view.md b/docs/getting-started/concepts/feature-view.md index 80fd803d1f..e3decf39c9 100644 --- a/docs/getting-started/concepts/feature-view.md +++ b/docs/getting-started/concepts/feature-view.md @@ -79,7 +79,7 @@ It is suggested that you dynamically specify the new FeatureView name using `.wi from feast import BigQuerySource, Entity, FeatureView, Field, ValueType from feast.types import Int32 -location = Entity(name="location", join_key="location_id", value_type=ValueType.INT64) +location = Entity(name="location", join_keys=["location_id"], value_type=ValueType.INT64) location_stats_fv= FeatureView( name="location_stats", diff --git a/docs/getting-started/faq.md b/docs/getting-started/faq.md index ebae912962..21bad82312 100644 --- a/docs/getting-started/faq.md +++ b/docs/getting-started/faq.md @@ -3,7 +3,7 @@ {% hint style="info" %} **Don't see your question?** -We encourage you to ask questions on [Slack](https://slack.feast.dev) or [Github](https://github.com/feast-dev/feast). Even better, once you get an answer, add the answer to this FAQ via a [pull request](../project/development-guide.md)! +We encourage you to ask questions on [Slack](https://slack.feast.dev) or [GitHub](https://github.com/feast-dev/feast). Even better, once you get an answer, add the answer to this FAQ via a [pull request](../project/development-guide.md)! {% endhint %} ## Getting started @@ -38,7 +38,7 @@ Feast currently does not support any access control other than the access contro ### Does Feast support streaming sources? -Yes. In earlier versions of Feast, we used Feast Spark to manage ingestion from stream sources. In the current version of Feast, we support [push based ingestion](../reference/alpha-stream-ingestion.md). +Yes. In earlier versions of Feast, we used Feast Spark to manage ingestion from stream sources. In the current version of Feast, we support [push based ingestion](../reference/data-sources/push.md). ### Does Feast support composite keys? @@ -83,7 +83,7 @@ Yes. For example, the Postgres [connector](https://github.com/nossrannug/feast-p Yes. There are two ways to use S3 in Feast: -* Using Redshift as a data source via Spectrum ([AWS tutorial](https://docs.aws.amazon.com/redshift/latest/dg/tutorial-nested-data-create-table.html)), and then continuing with the [Running Feast with GCP/AWS](../how-to-guides/feast-gcp-aws/) guide. See a [presentation](https://youtu.be/pMFbRJ7AnBk?t=9463) we did on this at our apply() meetup. +* Using Redshift as a data source via Spectrum ([AWS tutorial](https://docs.aws.amazon.com/redshift/latest/dg/tutorial-nested-data-create-table.html)), and then continuing with the [Running Feast with Snowflake/GCP/AWS](../how-to-guides/feast-snowflake-gcp-aws/) guide. See a [presentation](https://youtu.be/pMFbRJ7AnBk?t=9463) we did on this at our apply() meetup. * Using the `s3_endpoint_override` in a `FileSource` data source. This endpoint is more suitable for quick proof of concepts that won't necessarily scale for production use cases. ### How can I use Spark with Feast? diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index b188e0189d..e9a294d5fc 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -98,7 +98,7 @@ driver_hourly_stats = FileSource( # fetch features. # Entity has a name used for later reference (in a feature view, eg) # and join_key to identify physical field name used in storages -driver = Entity(name="driver", value_type=ValueType.INT64, join_key="driver_id", description="driver id",) +driver = Entity(name="driver", value_type=ValueType.INT64, join_keys=["driver_id"], description="driver id",) # Our parquet files contain sample data that includes a driver_id column, timestamps and # three feature column. Here we define a Feature View that will allow us to serve this @@ -168,7 +168,7 @@ driver_hourly_stats = FileSource( # fetch features. # Entity has a name used for later reference (in a feature view, eg) # and join_key to identify physical field name used in storages -driver = Entity(name="driver", value_type=ValueType.INT64, join_key="driver_id", description="driver id",) +driver = Entity(name="driver", value_type=ValueType.INT64, join_keys=["driver_id"], description="driver id",) # Our parquet files contain sample data that includes a driver_id column, timestamps and # three feature column. Here we define a Feature View that will allow us to serve this diff --git a/docs/getting-started/third-party-integrations.md b/docs/getting-started/third-party-integrations.md index a731fd85dc..0c233d7b69 100644 --- a/docs/getting-started/third-party-integrations.md +++ b/docs/getting-started/third-party-integrations.md @@ -21,7 +21,7 @@ Don't see your offline store or online store of choice here? Check out our guide * [x] [Hive (community plugin)](https://github.com/baineng/feast-hive) * [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) * [x] [Spark (community plugin)](https://docs.feast.dev/reference/data-sources/spark) -* [x] Kafka / Kinesis sources (via [push support into the online store](https://docs.feast.dev/reference/data-sources/push) +* [x] Kafka / Kinesis sources (via [push support into the online store](https://docs.feast.dev/reference/data-sources/push)) * [ ] HTTP source ### Offline Stores diff --git a/docs/how-to-guides/running-feast-in-production.md b/docs/how-to-guides/running-feast-in-production.md index 8518bb28d0..5380832609 100644 --- a/docs/how-to-guides/running-feast-in-production.md +++ b/docs/how-to-guides/running-feast-in-production.md @@ -274,7 +274,7 @@ For seamless integration with Kubernetes (including services created by Feast He ## 5. Ingesting features from a stream source -Recently Feast added functionality for [stream ingestion](../reference/alpha-stream-ingestion.md). +Recently Feast added functionality for [stream ingestion](../reference/data-sources/push.md). Please note that this is still in an early phase and new incompatible changes may be introduced. ### 5.1. Using Python SDK in your Apache Spark / Beam pipeline diff --git a/docs/reference/alpha-aws-lambda-feature-server.md b/docs/reference/alpha-aws-lambda-feature-server.md index 58a0f6862b..eadcf40bb4 100644 --- a/docs/reference/alpha-aws-lambda-feature-server.md +++ b/docs/reference/alpha-aws-lambda-feature-server.md @@ -8,7 +8,7 @@ To enable this feature, run **`feast alpha enable aws_lambda_feature_server`** ## Overview -The AWS Lambda feature server is an HTTP endpoint that serves features with JSON I/O, deployed as a Docker image through AWS Lambda and AWS API Gateway. This enables users to get features from Feast using any programming language that can make HTTP requests. A [local feature server](feature-server.md) is also available. A remote feature server on GCP Cloud Run is currently being developed. +The AWS Lambda feature server is an HTTP endpoint that serves features with JSON I/O, deployed as a Docker image through AWS Lambda and AWS API Gateway. This enables users to get features from Feast using any programming language that can make HTTP requests. A [local feature server](feature-servers/python-feature-server.md) is also available. A remote feature server on GCP Cloud Run is currently being developed. ## Deployment diff --git a/docs/reference/data-sources/push.md b/docs/reference/data-sources/push.md index 9f377d2099..e6eff312ec 100644 --- a/docs/reference/data-sources/push.md +++ b/docs/reference/data-sources/push.md @@ -1,5 +1,7 @@ # Push source +**Warning**: This is an _experimental_ feature. It's intended for early testing and feedback, and could change without warnings in future releases. + ## Description Push sources allow feature values to be pushed to the online store in real time. This allows fresh feature values to be made available to applications. Push sources supercede the @@ -31,10 +33,6 @@ from feast.types import Int64 push_source = PushSource( name="push_source", - schema=[ - Field(name="user_id", dtype=Int64), - Field(name="life_time_value", dtype=Int64) - ], batch_source=BigQuerySource(table="test.test"), ) @@ -42,7 +40,7 @@ fv = FeatureView( name="feature view", entities=["user_id"], schema=[Field(name="life_time_value", dtype=Int64)], - stream_source=push_source, + source=push_source, ) ``` @@ -53,6 +51,8 @@ import pandas as pd fs = FeatureStore(...) feature_data_frame = pd.DataFrame() -fs.push("push_source", feature_data_frame) +fs.push("push_source_name", feature_data_frame) ``` +See also [Python feature server](../feature-servers/python-feature-server.md) for instructions on how to push data to a deployed feature server. + diff --git a/docs/reference/feast-cli-commands.md b/docs/reference/feast-cli-commands.md index 7fb2ccbeb5..38e85843d4 100644 --- a/docs/reference/feast-cli-commands.md +++ b/docs/reference/feast-cli-commands.md @@ -2,7 +2,7 @@ ## Overview -The Feast CLI comes bundled with the Feast Python package. It is immediately available after [installing Feast](../how-to-guides/feast-gcp-aws/install-feast.md). +The Feast CLI comes bundled with the Feast Python package. It is immediately available after [installing Feast](../how-to-guides/feast-snowflake-gcp-aws/install-feast.md). ```text Usage: feast [OPTIONS] COMMAND [ARGS]... @@ -54,7 +54,7 @@ feast apply **What does Feast apply do?** 1. Feast will scan Python files in your feature repository and find all Feast object definitions, such as feature views, entities, and data sources. -2. Feast will validate your feature definitions +2. Feast will validate your feature definitions (e.g. for uniqueness of features) 3. Feast will sync the metadata about Feast objects to the registry. If a registry does not exist, then it will be instantiated. The standard registry is a simple protobuf binary file that is stored on disk \(locally or in an object store\). 4. Feast CLI will create all necessary feature store infrastructure. The exact infrastructure that is deployed or configured depends on the `provider` configuration that you have set in `feature_store.yaml`. For example, setting `local` as your provider will result in a `sqlite` online store being created. diff --git a/docs/reference/feature-servers/README.md b/docs/reference/feature-servers/README.md index e9e3afa4c0..301cea372c 100644 --- a/docs/reference/feature-servers/README.md +++ b/docs/reference/feature-servers/README.md @@ -2,4 +2,4 @@ Feast users can choose to retrieve features from a feature server, as opposed to through the Python SDK. -{% page-ref page="local-feature-server.md" %} +{% page-ref page="python-feature-server.md" %} diff --git a/docs/reference/feature-servers/go-feature-retrieval.md b/docs/reference/feature-servers/go-feature-retrieval.md index 999a142c07..05411a7f8c 100644 --- a/docs/reference/feature-servers/go-feature-retrieval.md +++ b/docs/reference/feature-servers/go-feature-retrieval.md @@ -2,7 +2,7 @@ ## Overview -The Go Feature Retrieval component is a Go implementation of the core feature serving logic, embedded in the Python SDK. It supports retrieval of feature references, feature services, and on demand feature views, and can be used either through the Python SDK or the [Python feature server](local-feature-server.md). +The Go Feature Retrieval component is a Go implementation of the core feature serving logic, embedded in the Python SDK. It supports retrieval of feature references, feature services, and on demand feature views, and can be used either through the Python SDK or the [Python feature server](python-feature-server.md). Currently, this component only supports online serving and does not have an offline component including APIs to create feast feature repositories or apply configuration to the registry to facilitate online materialization. It also does not expose its own dedicated cli to perform feast actions. Furthermore, this component is only meant to expose an online serving API that can be called through the python SDK to facilitate faster online feature retrieval. @@ -10,7 +10,7 @@ The Go Feature Retrieval component currently only supports Redis and Sqlite as o ## Installation -As long as you are running macOS or linux x86 with python version 3.7-3.10, the go component comes pre-compiled when you run install feast. +As long as you are running macOS or linux, on x86, with python version 3.7-3.10, the go component comes pre-compiled when you install feast. For developers, if you want to build from source, run `make compile-go-lib` to build and compile the go server. diff --git a/docs/reference/feature-servers/local-feature-server.md b/docs/reference/feature-servers/python-feature-server.md similarity index 64% rename from docs/reference/feature-servers/local-feature-server.md rename to docs/reference/feature-servers/python-feature-server.md index 4ea37d4f1e..352f0edc16 100644 --- a/docs/reference/feature-servers/local-feature-server.md +++ b/docs/reference/feature-servers/python-feature-server.md @@ -1,15 +1,23 @@ -# Local feature server +# Python feature server ## Overview -The local feature server is an HTTP endpoint that serves features with JSON I/O. This enables users to get features from Feast using any programming language that can make HTTP requests. A [remote feature server](../alpha-aws-lambda-feature-server.md) on AWS Lambda is also available. A remote feature server on GCP Cloud Run is currently being developed. +The feature server is an HTTP endpoint that serves features with JSON I/O. This enables users to write + read features from Feast online stores using any programming language that can make HTTP requests. ## CLI -There is a new CLI command that starts the server: `feast serve`. By default Feast uses port 6566; the port be overridden by a `--port` flag. +There is a CLI command that starts the server: `feast serve`. By default, Feast uses port 6566; the port be overridden by a `--port` flag. + +## Deploying as a service + +One can also deploy a feature server by building a docker image that bundles in the project's `feature_store.yaml`. See [helm chart](https://github.com/feast-dev/feast/blob/master/infra/charts/feast-python-server) for example. + +A [remote feature server](../alpha-aws-lambda-feature-server.md) on AWS Lambda is available. A remote feature server on GCP Cloud Run is currently being developed. + ## Example +### Initializing a feature server Here's the local feature server usage example with the local template: ```bash @@ -41,6 +49,7 @@ INFO: Uvicorn running on http://127.0.0.1:6566 (Press CTRL+C to quit) 09/10/2021 10:42:11 AM INFO:Uvicorn running on http://127.0.0.1:6566 (Press CTRL+C to quit) ``` +### Retrieving features from the online store After the server starts, we can execute cURL commands from another terminal tab: ```bash @@ -142,3 +151,45 @@ curl -X POST \ } }' | jq ``` + +### Pushing features to the online store +You can push data corresponding to a push source to the online store (note that timestamps need to be strings): + +```text +curl -X POST "http://localhost:6566/push" -d '{ + "push_source_name": "driver_hourly_stats_push_source", + "df": { + "driver_id": [1001], + "event_timestamp": ["2022-05-13 10:59:42"], + "created": ["2022-05-13 10:59:42"], + "conv_rate": [1.0], + "acc_rate": [1.0], + "avg_daily_trips": [1000] + } + }' | jq +``` + +or equivalently from Python: +```python +import json +import requests +import pandas as pd +from datetime import datetime + +event_dict = { + "driver_id": [1001], + "event_timestamp": [str(datetime(2021, 5, 13, 10, 59, 42))], + "created": [str(datetime(2021, 5, 13, 10, 59, 42))], + "conv_rate": [1.0], + "acc_rate": [1.0], + "avg_daily_trips": [1000], + "string_feature": "test2", +} +push_data = { + "push_source_name":"driver_stats_push_source", + "df":event_dict +} +requests.post( + "http://localhost:6566/push", + data=json.dumps(push_data)) +``` diff --git a/docs/roadmap.md b/docs/roadmap.md index 080cf16c02..3eb181c0da 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -16,7 +16,7 @@ The list below contains the functionality that contributors are planning to deve * [x] [Hive (community plugin)](https://github.com/baineng/feast-hive) * [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) * [x] [Spark (community plugin)](https://docs.feast.dev/reference/data-sources/spark) - * [x] Kafka / Kinesis sources (via [push support into the online store](https://docs.feast.dev/reference/data-sources/push) + * [x] Kafka / Kinesis sources (via [push support into the online store](https://docs.feast.dev/reference/data-sources/push)) * [ ] HTTP source * **Offline Stores** * [x] [Snowflake](https://docs.feast.dev/reference/offline-stores/snowflake) diff --git a/docs/tutorials/validating-historical-features.md b/docs/tutorials/validating-historical-features.md index 5f85e66c94..addd309902 100644 --- a/docs/tutorials/validating-historical-features.md +++ b/docs/tutorials/validating-historical-features.md @@ -129,7 +129,7 @@ batch_source = FileSource( ```python -taxi_entity = Entity(name='taxi', join_key='taxi_id') +taxi_entity = Entity(name='taxi', join_keys=['taxi_id']) ``` diff --git a/examples/quickstart/quickstart.ipynb b/examples/quickstart/quickstart.ipynb index 3679fcc778..60974d2751 100644 --- a/examples/quickstart/quickstart.ipynb +++ b/examples/quickstart/quickstart.ipynb @@ -59,7 +59,7 @@ "base_uri": "https://localhost:8080/" }, "id": "rXNMAAJKQPG5", - "outputId": "b27420ac-c6ba-4d9f-cae8-51a2007b4189" + "outputId": "52297709-380b-4200-8e7c-3d0102a82ea4" }, "source": [ "%%sh\n", @@ -67,14 +67,14 @@ "pip install Pygments -q\n", "echo \"Please restart your runtime now (Runtime -> Restart runtime). This ensures that the correct dependencies are loaded.\"" ], - "execution_count": null, + "execution_count": 1, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "Please restart your runtime now (Runtime -> Restart runtime). This ensures that the correct dependencies are loaded.\n" - ], - "name": "stdout" + ] } ] }, @@ -112,22 +112,22 @@ "base_uri": "https://localhost:8080/" }, "id": "IhirSkgUvYau", - "outputId": "a2a5631e-1703-4957-b896-9c432851a261" + "outputId": "df90af1a-06bd-48a1-94e6-7def19e87d5f" }, "source": [ "!feast init feature_repo" ], - "execution_count": null, + "execution_count": 1, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "Feast is an open source project that collects anonymized error reporting and usage statistics. To opt out or learn more see https://docs.feast.dev/reference/usage\n", "\n", "Creating a new Feast repository in \u001b[1m\u001b[32m/content/feature_repo\u001b[0m.\n", "\n" - ], - "name": "stdout" + ] } ] }, @@ -155,25 +155,25 @@ "base_uri": "https://localhost:8080/" }, "id": "9jXuzt4ovzA3", - "outputId": "1ef1bf42-2306-4cc0-c959-1ea2d62e3149" + "outputId": "bff15f0c-9f8e-4a3c-e605-5ad84be30709" }, "source": [ "%cd feature_repo\n", "!ls -R" ], - "execution_count": null, + "execution_count": 2, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "/content/feature_repo\n", ".:\n", - "data example.py feature_store.yaml\n", + "data example.py feature_store.yaml __init__.py\n", "\n", "./data:\n", "driver_stats.parquet\n" - ], - "name": "stdout" + ] } ] }, @@ -192,8 +192,7 @@ "* gcp: use BigQuery/Snowflake with Google Cloud Datastore/Redis\n", "* aws: use Redshift/Snowflake with DynamoDB/Redis\n", "\n", - "Note that there are many other sources Feast works with, including Azure, Hive, Trino, and PostgreSQL via community plugins. See https://docs.feast.dev/getting-started/third-party-integrations for all supported datasources.", - "\n", + "Note that there are many other sources Feast works with, including Azure, Hive, Trino, and PostgreSQL via community plugins. See https://docs.feast.dev/getting-started/third-party-integrations for all supported datasources.\n", "A custom setup can also be made by following https://docs.feast.dev/v/master/how-to-guides/creating-a-custom-provider" ] }, @@ -204,23 +203,23 @@ "base_uri": "https://localhost:8080/" }, "id": "9_YJ--uYdtcP", - "outputId": "8d772619-aa4d-4cb4-e7e0-2ed45bc09a87" + "outputId": "89268e31-6be0-43fb-e576-6d335a2c1dd9" }, "source": [ "!pygmentize feature_store.yaml" ], - "execution_count": null, + "execution_count": 3, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "project: feature_repo\n", - "registry: data/registry.db\n", - "provider: local\n", - "online_store:\n", - " path: data/online_store.db\n" - ], - "name": "stdout" + "\u001b[94mproject\u001b[39;49;00m: feature_repo\n", + "\u001b[94mregistry\u001b[39;49;00m: data/registry.db\n", + "\u001b[94mprovider\u001b[39;49;00m: local\n", + "\u001b[94monline_store\u001b[39;49;00m:\n", + " \u001b[94mpath\u001b[39;49;00m: data/online_store.db\n" + ] } ] }, @@ -240,23 +239,55 @@ "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 419 + "height": 424 }, "id": "sIF2lO59dwzi", - "outputId": "3e7ff19e-1052-49a6-a889-de76cce61714" + "outputId": "80e798d5-df21-4ebd-de1c-9bde282bd742" }, "source": [ "import pandas as pd\n", "\n", "pd.read_parquet(\"data/driver_stats.parquet\")" ], - "execution_count": null, + "execution_count": 4, "outputs": [ { "output_type": "execute_result", "data": { + "text/plain": [ + " event_timestamp driver_id conv_rate acc_rate \\\n", + "0 2022-03-31 14:00:00+00:00 1005 0.313336 0.231481 \n", + "1 2022-03-31 15:00:00+00:00 1005 0.959499 0.942614 \n", + "2 2022-03-31 16:00:00+00:00 1005 0.231786 0.313516 \n", + "3 2022-03-31 17:00:00+00:00 1005 0.886911 0.531613 \n", + "4 2022-03-31 18:00:00+00:00 1005 0.574945 0.718223 \n", + "... ... ... ... ... \n", + "1802 2022-04-15 12:00:00+00:00 1001 0.521622 0.266667 \n", + "1803 2022-04-15 13:00:00+00:00 1001 0.003188 0.535501 \n", + "1804 2021-04-12 07:00:00+00:00 1001 0.709081 0.823138 \n", + "1805 2022-04-08 02:00:00+00:00 1003 0.033297 0.053268 \n", + "1806 2022-04-08 02:00:00+00:00 1003 0.033297 0.053268 \n", + "\n", + " avg_daily_trips created \n", + "0 303 2022-04-15 14:34:10.056 \n", + "1 842 2022-04-15 14:34:10.056 \n", + "2 782 2022-04-15 14:34:10.056 \n", + "3 634 2022-04-15 14:34:10.056 \n", + "4 441 2022-04-15 14:34:10.056 \n", + "... ... ... \n", + "1802 406 2022-04-15 14:34:10.056 \n", + "1803 593 2022-04-15 14:34:10.056 \n", + "1804 997 2022-04-15 14:34:10.056 \n", + "1805 534 2022-04-15 14:34:10.056 \n", + "1806 534 2022-04-15 14:34:10.056 \n", + "\n", + "[1807 rows x 6 columns]" + ], "text/html": [ - "
\n", + "\n", + "
\n", + "
\n", + "
\n", "\n", + "\n", + " \n", + "
\n", + "
\n", + " " ] }, "metadata": {}, - "execution_count": 5 + "execution_count": 4 } ] }, @@ -438,53 +529,54 @@ "base_uri": "https://localhost:8080/" }, "id": "DPqXCoNpL0SX", - "outputId": "a252e224-61da-48ee-92b8-1780def99244" + "outputId": "be1308b2-0c83-4dd3-eb88-e79ffcbd20d6" }, "source": [ "!pygmentize -f terminal16m example.py" ], - "execution_count": null, + "execution_count": 5, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "\u001b[38;2;64;128;128m# This is an example feature definition file\u001b[39m\n", + "\u001b[38;2;64;128;128;03m# This is an example feature definition file\u001b[39;00m\n", "\n", - "\u001b[38;2;0;128;0;01mfrom\u001b[39;00m \u001b[38;2;0;0;255;01mgoogle.protobuf.duration_pb2\u001b[39;00m \u001b[38;2;0;128;0;01mimport\u001b[39;00m Duration\n", + "\u001b[38;2;0;128;0;01mfrom\u001b[39;00m \u001b[38;2;0;0;255;01mdatetime\u001b[39;00m \u001b[38;2;0;128;0;01mimport\u001b[39;00m timedelta\n", "\n", - "\u001b[38;2;0;128;0;01mfrom\u001b[39;00m \u001b[38;2;0;0;255;01mfeast\u001b[39;00m \u001b[38;2;0;128;0;01mimport\u001b[39;00m Entity, Feature, FeatureView, FileSource, ValueType\n", + "\u001b[38;2;0;128;0;01mfrom\u001b[39;00m \u001b[38;2;0;0;255;01mfeast\u001b[39;00m \u001b[38;2;0;128;0;01mimport\u001b[39;00m Entity, FeatureView, Field, FileSource, ValueType\n", + "\u001b[38;2;0;128;0;01mfrom\u001b[39;00m \u001b[38;2;0;0;255;01mfeast\u001b[39;00m\u001b[38;2;0;0;255;01m.\u001b[39;00m\u001b[38;2;0;0;255;01mtypes\u001b[39;00m \u001b[38;2;0;128;0;01mimport\u001b[39;00m Float32, Int64\n", "\n", - "\u001b[38;2;64;128;128m# Read data from parquet files. Parquet is convenient for local development mode. For\u001b[39m\n", - "\u001b[38;2;64;128;128m# production, you can use your favorite DWH, such as BigQuery. See Feast documentation\u001b[39m\n", - "\u001b[38;2;64;128;128m# for more info.\u001b[39m\n", + "\u001b[38;2;64;128;128;03m# Read data from parquet files. Parquet is convenient for local development mode. For\u001b[39;00m\n", + "\u001b[38;2;64;128;128;03m# production, you can use your favorite DWH, such as BigQuery. See Feast documentation\u001b[39;00m\n", + "\u001b[38;2;64;128;128;03m# for more info.\u001b[39;00m\n", "driver_hourly_stats \u001b[38;2;102;102;102m=\u001b[39m FileSource(\n", " path\u001b[38;2;102;102;102m=\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m\u001b[38;2;186;33;33m/content/feature_repo/data/driver_stats.parquet\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m,\n", - " event_timestamp_column\u001b[38;2;102;102;102m=\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m\u001b[38;2;186;33;33mevent_timestamp\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m,\n", + " timestamp_field\u001b[38;2;102;102;102m=\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m\u001b[38;2;186;33;33mevent_timestamp\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m,\n", " created_timestamp_column\u001b[38;2;102;102;102m=\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m\u001b[38;2;186;33;33mcreated\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m,\n", ")\n", "\n", - "\u001b[38;2;64;128;128m# Define an entity for the driver. You can think of entity as a primary key used to\u001b[39m\n", - "\u001b[38;2;64;128;128m# fetch features.\u001b[39m\n", - "driver \u001b[38;2;102;102;102m=\u001b[39m Entity(name\u001b[38;2;102;102;102m=\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m\u001b[38;2;186;33;33mdriver_id\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m, value_type\u001b[38;2;102;102;102m=\u001b[39mValueType\u001b[38;2;102;102;102m.\u001b[39mINT64, description\u001b[38;2;102;102;102m=\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m\u001b[38;2;186;33;33mdriver id\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m,)\n", + "\u001b[38;2;64;128;128;03m# Define an entity for the driver. You can think of entity as a primary key used to\u001b[39;00m\n", + "\u001b[38;2;64;128;128;03m# fetch features.\u001b[39;00m\n", + "driver \u001b[38;2;102;102;102m=\u001b[39m Entity(name\u001b[38;2;102;102;102m=\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m\u001b[38;2;186;33;33mdriver\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m, value_type\u001b[38;2;102;102;102m=\u001b[39mValueType\u001b[38;2;102;102;102m.\u001b[39mINT64, join_key\u001b[38;2;102;102;102m=\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m\u001b[38;2;186;33;33mdriver_id\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m,)\n", "\n", - "\u001b[38;2;64;128;128m# Our parquet files contain sample data that includes a driver_id column, timestamps and\u001b[39m\n", - "\u001b[38;2;64;128;128m# three feature column. Here we define a Feature View that will allow us to serve this\u001b[39m\n", - "\u001b[38;2;64;128;128m# data to our model online.\u001b[39m\n", + "\u001b[38;2;64;128;128;03m# Our parquet files contain sample data that includes a driver_id column, timestamps and\u001b[39;00m\n", + "\u001b[38;2;64;128;128;03m# three feature column. Here we define a Feature View that will allow us to serve this\u001b[39;00m\n", + "\u001b[38;2;64;128;128;03m# data to our model online.\u001b[39;00m\n", "driver_hourly_stats_view \u001b[38;2;102;102;102m=\u001b[39m FeatureView(\n", " name\u001b[38;2;102;102;102m=\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m\u001b[38;2;186;33;33mdriver_hourly_stats\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m,\n", - " entities\u001b[38;2;102;102;102m=\u001b[39m[\u001b[38;2;186;33;33m\"\u001b[39m\u001b[38;2;186;33;33mdriver_id\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m],\n", - " ttl\u001b[38;2;102;102;102m=\u001b[39mDuration(seconds\u001b[38;2;102;102;102m=\u001b[39m\u001b[38;2;102;102;102m86400\u001b[39m \u001b[38;2;102;102;102m*\u001b[39m \u001b[38;2;102;102;102m1\u001b[39m),\n", - " features\u001b[38;2;102;102;102m=\u001b[39m[\n", - " Feature(name\u001b[38;2;102;102;102m=\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m\u001b[38;2;186;33;33mconv_rate\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m, dtype\u001b[38;2;102;102;102m=\u001b[39mValueType\u001b[38;2;102;102;102m.\u001b[39mFLOAT),\n", - " Feature(name\u001b[38;2;102;102;102m=\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m\u001b[38;2;186;33;33macc_rate\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m, dtype\u001b[38;2;102;102;102m=\u001b[39mValueType\u001b[38;2;102;102;102m.\u001b[39mFLOAT),\n", - " Feature(name\u001b[38;2;102;102;102m=\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m\u001b[38;2;186;33;33mavg_daily_trips\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m, dtype\u001b[38;2;102;102;102m=\u001b[39mValueType\u001b[38;2;102;102;102m.\u001b[39mINT64),\n", + " entities\u001b[38;2;102;102;102m=\u001b[39m[\u001b[38;2;186;33;33m\"\u001b[39m\u001b[38;2;186;33;33mdriver\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m],\n", + " ttl\u001b[38;2;102;102;102m=\u001b[39mtimedelta(days\u001b[38;2;102;102;102m=\u001b[39m\u001b[38;2;102;102;102m1\u001b[39m),\n", + " schema\u001b[38;2;102;102;102m=\u001b[39m[\n", + " Field(name\u001b[38;2;102;102;102m=\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m\u001b[38;2;186;33;33mconv_rate\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m, dtype\u001b[38;2;102;102;102m=\u001b[39mFloat32),\n", + " Field(name\u001b[38;2;102;102;102m=\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m\u001b[38;2;186;33;33macc_rate\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m, dtype\u001b[38;2;102;102;102m=\u001b[39mFloat32),\n", + " Field(name\u001b[38;2;102;102;102m=\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m\u001b[38;2;186;33;33mavg_daily_trips\u001b[39m\u001b[38;2;186;33;33m\"\u001b[39m, dtype\u001b[38;2;102;102;102m=\u001b[39mInt64),\n", " ],\n", - " online\u001b[38;2;102;102;102m=\u001b[39m\u001b[38;2;0;128;0mTrue\u001b[39m,\n", - " batch_source\u001b[38;2;102;102;102m=\u001b[39mdriver_hourly_stats,\n", + " online\u001b[38;2;102;102;102m=\u001b[39m\u001b[38;2;0;128;0;01mTrue\u001b[39;00m,\n", + " source\u001b[38;2;102;102;102m=\u001b[39mdriver_hourly_stats,\n", " tags\u001b[38;2;102;102;102m=\u001b[39m{},\n", ")\n" - ], - "name": "stdout" + ] } ] }, @@ -505,21 +597,23 @@ "base_uri": "https://localhost:8080/" }, "id": "RYKCKKrcxYZG", - "outputId": "d36b3fb2-9292-4b43-f26a-5441c301c92d" + "outputId": "9745d7eb-b4b8-4a43-bf47-189bbf07ae09" }, "source": [ "!feast apply" ], - "execution_count": null, + "execution_count": 6, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "Registered entity \u001b[1m\u001b[32mdriver_id\u001b[0m\n", - "Registered feature view \u001b[1m\u001b[32mdriver_hourly_stats\u001b[0m\n", - "Deploying infrastructure for \u001b[1m\u001b[32mdriver_hourly_stats\u001b[0m\n" - ], - "name": "stdout" + "Created entity \u001b[1m\u001b[32mdriver_id\u001b[0m\n", + "Created feature view \u001b[1m\u001b[32mdriver_hourly_stats\u001b[0m\n", + "\n", + "Created sqlite table \u001b[1m\u001b[32mfeature_repo_driver_hourly_stats\u001b[0m\n", + "\n" + ] } ] }, @@ -544,7 +638,7 @@ "base_uri": "https://localhost:8080/" }, "id": "C6Fzia7YwBzz", - "outputId": "250e9be1-2283-4d74-cf48-297b8ae0d23a" + "outputId": "b99aedae-9c47-4b9f-acdd-cd02e2e091b7" }, "source": [ "from datetime import datetime, timedelta\n", @@ -583,21 +677,22 @@ "print(\"----- Example features -----\\n\")\n", "print(training_df.head())" ], - "execution_count": null, + "execution_count": 7, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "----- Feature schema -----\n", "\n", "\n", - "Int64Index: 3 entries, 0 to 2\n", + "Int64Index: 3 entries, 720 to 1081\n", "Data columns (total 6 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 event_timestamp 3 non-null datetime64[ns, UTC]\n", - " 1 driver_id 3 non-null int64 \n", - " 2 label_driver_reported_satisfaction 3 non-null int64 \n", + " 0 driver_id 3 non-null int64 \n", + " 1 label_driver_reported_satisfaction 3 non-null int64 \n", + " 2 event_timestamp 3 non-null datetime64[ns, UTC]\n", " 3 conv_rate 3 non-null float32 \n", " 4 acc_rate 3 non-null float32 \n", " 5 avg_daily_trips 3 non-null int32 \n", @@ -607,14 +702,16 @@ "\n", "----- Example features -----\n", "\n", - " event_timestamp driver_id ... acc_rate avg_daily_trips\n", - "0 2021-08-23 15:12:55.489091+00:00 1003 ... 0.120588 938\n", - "1 2021-08-23 15:49:55.489089+00:00 1002 ... 0.504881 635\n", - "2 2021-08-23 16:14:55.489075+00:00 1001 ... 0.138416 606\n", + " driver_id label_driver_reported_satisfaction \\\n", + "720 1002 5 \n", + "359 1001 1 \n", + "1081 1003 3 \n", "\n", - "[3 rows x 6 columns]\n" - ], - "name": "stdout" + " event_timestamp conv_rate acc_rate avg_daily_trips \n", + "720 2022-04-15 13:58:30.900257+00:00 0.368052 0.417923 346 \n", + "359 2022-04-15 14:23:30.900240+00:00 0.003188 0.535501 593 \n", + "1081 2022-04-15 13:21:30.900260+00:00 0.214944 0.788695 904 \n" + ] } ] }, @@ -645,23 +742,23 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "efbb493a-89a2-41ce-b3b4-d0d05131a8ff" + "outputId": "d38a0e0a-2802-4408-ab92-a26baf82752e" }, "source": [ "from datetime import datetime\n", "!feast materialize-incremental {datetime.now().isoformat()}" ], - "execution_count": null, + "execution_count": 8, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ - "Materializing \u001b[1m\u001b[32m1\u001b[0m feature views to \u001b[1m\u001b[32m2021-08-23 16:25:46+00:00\u001b[0m into the \u001b[1m\u001b[32msqlite\u001b[0m online store.\n", + "Materializing \u001b[1m\u001b[32m1\u001b[0m feature views to \u001b[1m\u001b[32m2022-04-15 14:34:37+00:00\u001b[0m into the \u001b[1m\u001b[32msqlite\u001b[0m online store.\n", "\n", - "\u001b[1m\u001b[32mdriver_hourly_stats\u001b[0m from \u001b[1m\u001b[32m2021-08-22 16:25:47+00:00\u001b[0m to \u001b[1m\u001b[32m2021-08-23 16:25:46+00:00\u001b[0m:\n", - "\r 0%| | 0/5 [00:00 None: diff --git a/java/pom.xml b/java/pom.xml index 4a4049305b..65dca26725 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -38,7 +38,7 @@ - 0.20.0 + 0.20.1 https://github.com/feast-dev/feast UTF-8 @@ -68,7 +68,7 @@ 2.3.1 1.3.2 2.0.1.Final - 0.20.0 + 0.20.1 1.6.6 29.0-jre diff --git a/protos/feast/core/DataSource.proto b/protos/feast/core/DataSource.proto index d958281ca2..9e6028ccfa 100644 --- a/protos/feast/core/DataSource.proto +++ b/protos/feast/core/DataSource.proto @@ -222,8 +222,7 @@ message DataSource { // Defines options for DataSource that supports pushing data to it. This allows data to be pushed to // the online store on-demand, such as by stream consumers. message PushOptions { - // Mapping of feature name to type - map schema = 1; + reserved 1; } diff --git a/sdk/python/feast/base_feature_view.py b/sdk/python/feast/base_feature_view.py index 67435fa44c..80b3b0cec8 100644 --- a/sdk/python/feast/base_feature_view.py +++ b/sdk/python/feast/base_feature_view.py @@ -110,7 +110,7 @@ def __str__(self): return str(MessageToJson(self.to_proto())) def __hash__(self): - return hash((id(self), self.name)) + return hash((self.name)) def __getitem__(self, item): assert isinstance(item, list) @@ -134,6 +134,7 @@ def __eq__(self, other): if ( self.name != other.name or sorted(self.features) != sorted(other.features) + or self.projection != other.projection or self.description != other.description or self.tags != other.tags or self.owner != other.owner diff --git a/sdk/python/feast/cli.py b/sdk/python/feast/cli.py index 7dc8e19859..80cd1844b6 100644 --- a/sdk/python/feast/cli.py +++ b/sdk/python/feast/cli.py @@ -113,7 +113,7 @@ def version(): @click.pass_context def endpoint(ctx: click.Context): """ - Display feature server endpoints. + Display feature server endpoints """ repo = ctx.obj["CHDIR"] cli_check_repo(repo) @@ -593,7 +593,7 @@ def serve_command(ctx: click.Context, host: str, port: int, no_access_log: bool) ) @click.pass_context def serve_transformations_command(ctx: click.Context, port: int): - """[Experimental] Start a the feature consumption server locally on a given port.""" + """[Experimental] Start a feature consumption server locally on a given port.""" repo = ctx.obj["CHDIR"] cli_check_repo(repo) store = FeatureStore(repo_path=str(repo)) diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py index 0e264117ae..79c6cbdf51 100644 --- a/sdk/python/feast/data_source.py +++ b/sdk/python/feast/data_source.py @@ -21,7 +21,7 @@ from feast import type_map from feast.data_format import StreamFormat -from feast.field import Field, from_value_type +from feast.field import Field from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto from feast.repo_config import RepoConfig, get_data_source_class_from_type from feast.types import VALUE_TYPES_TO_FEAST_TYPES @@ -186,6 +186,7 @@ class DataSource(ABC): def __init__( self, + *, event_timestamp_column: Optional[str] = None, created_timestamp_column: Optional[str] = None, field_mapping: Optional[Dict[str, str]] = None, @@ -245,7 +246,7 @@ def __init__( self.owner = owner or "" def __hash__(self): - return hash((id(self), self.name)) + return hash((self.name, self.timestamp_field)) def __str__(self): return str(MessageToJson(self.to_proto())) @@ -263,9 +264,9 @@ def __eq__(self, other): or self.created_timestamp_column != other.created_timestamp_column or self.field_mapping != other.field_mapping or self.date_partition_column != other.date_partition_column + or self.description != other.description or self.tags != other.tags or self.owner != other.owner - or self.description != other.description ): return False @@ -354,11 +355,12 @@ def get_table_column_names_and_types( def __init__( self, - name: str, - event_timestamp_column: str, - bootstrap_servers: str, - message_format: StreamFormat, - topic: str, + *args, + name: Optional[str] = None, + event_timestamp_column: Optional[str] = "", + bootstrap_servers: Optional[str] = None, + message_format: Optional[StreamFormat] = None, + topic: Optional[str] = None, created_timestamp_column: Optional[str] = "", field_mapping: Optional[Dict[str, str]] = None, date_partition_column: Optional[str] = "", @@ -368,22 +370,62 @@ def __init__( timestamp_field: Optional[str] = "", batch_source: Optional[DataSource] = None, ): + positional_attributes = [ + "name", + "event_timestamp_column", + "bootstrap_servers", + "message_format", + "topic", + ] + _name = name + _event_timestamp_column = event_timestamp_column + _bootstrap_servers = bootstrap_servers or "" + _message_format = message_format + _topic = topic or "" + + if args: + warnings.warn( + ( + "Kafka parameters should be specified as a keyword argument instead of a positional arg." + "Feast 0.23+ will not support positional arguments to construct Kafka sources" + ), + DeprecationWarning, + ) + if len(args) > len(positional_attributes): + raise ValueError( + f"Only {', '.join(positional_attributes)} are allowed as positional args when defining " + f"Kafka sources, for backwards compatibility." + ) + if len(args) >= 1: + _name = args[0] + if len(args) >= 2: + _event_timestamp_column = args[1] + if len(args) >= 3: + _bootstrap_servers = args[2] + if len(args) >= 4: + _message_format = args[3] + if len(args) >= 5: + _topic = args[4] + + if _message_format is None: + raise ValueError("Message format must be specified for Kafka source") + print("Asdfasdf") super().__init__( - event_timestamp_column=event_timestamp_column, + event_timestamp_column=_event_timestamp_column, created_timestamp_column=created_timestamp_column, field_mapping=field_mapping, date_partition_column=date_partition_column, description=description, tags=tags, owner=owner, - name=name, + name=_name, timestamp_field=timestamp_field, ) self.batch_source = batch_source self.kafka_options = KafkaOptions( - bootstrap_servers=bootstrap_servers, - message_format=message_format, - topic=topic, + bootstrap_servers=_bootstrap_servers, + message_format=_message_format, + topic=_topic, ) def __eq__(self, other): @@ -392,6 +434,9 @@ def __eq__(self, other): "Comparisons should only involve KafkaSource class objects." ) + if not super().__eq__(other): + return False + if ( self.kafka_options.bootstrap_servers != other.kafka_options.bootstrap_servers @@ -402,6 +447,9 @@ def __eq__(self, other): return True + def __hash__(self): + return super().__hash__() + @staticmethod def from_proto(data_source: DataSourceProto): return KafkaSource( @@ -466,32 +514,56 @@ class RequestSource(DataSource): def __init__( self, - name: str, - schema: Union[Dict[str, ValueType], List[Field]], + *args, + name: Optional[str] = None, + schema: Optional[Union[Dict[str, ValueType], List[Field]]] = None, description: Optional[str] = "", tags: Optional[Dict[str, str]] = None, owner: Optional[str] = "", ): """Creates a RequestSource object.""" - super().__init__(name=name, description=description, tags=tags, owner=owner) - if isinstance(schema, Dict): + positional_attributes = ["name", "schema"] + _name = name + _schema = schema + if args: + warnings.warn( + ( + "Request source parameters should be specified as a keyword argument instead of a positional arg." + "Feast 0.23+ will not support positional arguments to construct request sources" + ), + DeprecationWarning, + ) + if len(args) > len(positional_attributes): + raise ValueError( + f"Only {', '.join(positional_attributes)} are allowed as positional args when defining " + f"feature views, for backwards compatibility." + ) + if len(args) >= 1: + _name = args[0] + if len(args) >= 2: + _schema = args[1] + + super().__init__(name=_name, description=description, tags=tags, owner=owner) + if not _schema: + raise ValueError("Schema needs to be provided for Request Source") + if isinstance(_schema, Dict): warnings.warn( "Schema in RequestSource is changing type. The schema data type Dict[str, ValueType] is being deprecated in Feast 0.23. " "Please use List[Field] instead for the schema", DeprecationWarning, ) schemaList = [] - for key, valueType in schema.items(): + for key, valueType in _schema.items(): schemaList.append( Field(name=key, dtype=VALUE_TYPES_TO_FEAST_TYPES[valueType]) ) self.schema = schemaList - elif isinstance(schema, List): - self.schema = schema + elif isinstance(_schema, List): + self.schema = _schema else: raise Exception( "Schema type must be either dictionary or list, not " - + str(type(schema)) + + str(type(_schema)) ) def validate(self, config: RepoConfig): @@ -507,13 +579,10 @@ def __eq__(self, other): raise TypeError( "Comparisons should only involve RequestSource class objects." ) - if ( - self.name != other.name - or self.description != other.description - or self.owner != other.owner - or self.tags != other.tags - ): + + if not super().__eq__(other): return False + if isinstance(self.schema, List) and isinstance(other.schema, List): for field1, field2 in zip(self.schema, other.schema): if field1 != field2: @@ -640,12 +709,13 @@ def get_table_query_string(self) -> str: def __init__( self, - name: str, - event_timestamp_column: str, - created_timestamp_column: str, - record_format: StreamFormat, - region: str, - stream_name: str, + *args, + name: Optional[str] = None, + event_timestamp_column: Optional[str] = "", + created_timestamp_column: Optional[str] = "", + record_format: Optional[StreamFormat] = None, + region: Optional[str] = "", + stream_name: Optional[str] = "", field_mapping: Optional[Dict[str, str]] = None, date_partition_column: Optional[str] = "", description: Optional[str] = "", @@ -654,10 +724,53 @@ def __init__( timestamp_field: Optional[str] = "", batch_source: Optional[DataSource] = None, ): + positional_attributes = [ + "name", + "event_timestamp_column", + "created_timestamp_column", + "record_format", + "region", + "stream_name", + ] + _name = name + _event_timestamp_column = event_timestamp_column + _created_timestamp_column = created_timestamp_column + _record_format = record_format + _region = region or "" + _stream_name = stream_name or "" + if args: + warnings.warn( + ( + "Kinesis parameters should be specified as a keyword argument instead of a positional arg." + "Feast 0.23+ will not support positional arguments to construct kinesis sources" + ), + DeprecationWarning, + ) + if len(args) > len(positional_attributes): + raise ValueError( + f"Only {', '.join(positional_attributes)} are allowed as positional args when defining " + f"kinesis sources, for backwards compatibility." + ) + if len(args) >= 1: + _name = args[0] + if len(args) >= 2: + _event_timestamp_column = args[1] + if len(args) >= 3: + _created_timestamp_column = args[2] + if len(args) >= 4: + _record_format = args[3] + if len(args) >= 5: + _region = args[4] + if len(args) >= 6: + _stream_name = args[5] + + if _record_format is None: + raise ValueError("Record format must be specified for kinesis source") + super().__init__( - name=name, - event_timestamp_column=event_timestamp_column, - created_timestamp_column=created_timestamp_column, + name=_name, + event_timestamp_column=_event_timestamp_column, + created_timestamp_column=_created_timestamp_column, field_mapping=field_mapping, date_partition_column=date_partition_column, description=description, @@ -667,21 +780,20 @@ def __init__( ) self.batch_source = batch_source self.kinesis_options = KinesisOptions( - record_format=record_format, region=region, stream_name=stream_name + record_format=_record_format, region=_region, stream_name=_stream_name ) def __eq__(self, other): - if other is None: - return False - if not isinstance(other, KinesisSource): raise TypeError( "Comparisons should only involve KinesisSource class objects." ) + if not super().__eq__(other): + return False + if ( - self.name != other.name - or self.kinesis_options.record_format != other.kinesis_options.record_format + self.kinesis_options.record_format != other.kinesis_options.record_format or self.kinesis_options.region != other.kinesis_options.region or self.kinesis_options.stream_name != other.kinesis_options.stream_name ): @@ -689,6 +801,9 @@ def __eq__(self, other): return True + def __hash__(self): + return super().__hash__() + def to_proto(self) -> DataSourceProto: data_source_proto = DataSourceProto( name=self.name, @@ -714,45 +829,73 @@ class PushSource(DataSource): A source that can be used to ingest features on request """ - name: str - schema: List[Field] + # TODO(adchia): consider adding schema here in case where Feast manages pushing events to the offline store + # TODO(adchia): consider a "mode" to support pushing raw vs transformed events batch_source: DataSource - timestamp_field: str def __init__( self, - *, - name: str, - schema: List[Field], - batch_source: DataSource, + *args, + name: Optional[str] = None, + batch_source: Optional[DataSource] = None, description: Optional[str] = "", tags: Optional[Dict[str, str]] = None, owner: Optional[str] = "", - timestamp_field: Optional[str] = "", ): """ Creates a PushSource object. Args: name: Name of the push source - schema: Schema mapping from the input feature name to a ValueType batch_source: The batch source that backs this push source. It's used when materializing from the offline store to the online store, and when retrieving historical features. description (optional): A human-readable description. tags (optional): A dictionary of key-value pairs to store arbitrary metadata. owner (optional): The owner of the data source, typically the email of the primary maintainer. - timestamp_field (optional): Event timestamp foe;d used for point in time - joins of feature values. """ - super().__init__(name=name, description=description, tags=tags, owner=owner) - self.schema = sorted(schema) # TODO: add schema inference from a batch source - self.batch_source = batch_source - if not self.batch_source: - raise ValueError(f"batch_source is needed for push source {self.name}") - if not timestamp_field: - raise ValueError(f"timestamp field is needed for push source {self.name}") - self.timestamp_field = timestamp_field + positional_attributes = ["name", "batch_source"] + _name = name + _batch_source = batch_source + if args: + warnings.warn( + ( + "Push source parameters should be specified as a keyword argument instead of a positional arg." + "Feast 0.23+ will not support positional arguments to construct push sources" + ), + DeprecationWarning, + ) + if len(args) > len(positional_attributes): + raise ValueError( + f"Only {', '.join(positional_attributes)} are allowed as positional args when defining " + f"push sources, for backwards compatibility." + ) + if len(args) >= 1: + _name = args[0] + if len(args) >= 2: + _batch_source = args[1] + + super().__init__(name=_name, description=description, tags=tags, owner=owner) + if not _batch_source: + raise ValueError( + f"batch_source parameter is needed for push source {self.name}" + ) + self.batch_source = _batch_source + + def __eq__(self, other): + if not isinstance(other, PushSource): + raise TypeError("Comparisons should only involve PushSource class objects.") + + if not super().__eq__(other): + return False + + if self.batch_source != other.batch_source: + return False + + return True + + def __hash__(self): + return super().__hash__() def validate(self, config: RepoConfig): pass @@ -764,38 +907,25 @@ def get_table_column_names_and_types( @staticmethod def from_proto(data_source: DataSourceProto): - schema_pb = data_source.push_options.schema - schema = [] - for key, val in schema_pb.items(): - schema.append(Field(name=key, dtype=from_value_type(ValueType(val)))) - assert data_source.HasField("batch_source") batch_source = DataSource.from_proto(data_source.batch_source) return PushSource( name=data_source.name, - schema=sorted(schema), batch_source=batch_source, - timestamp_field=data_source.timestamp_field, description=data_source.description, tags=dict(data_source.tags), owner=data_source.owner, ) def to_proto(self) -> DataSourceProto: - schema_pb = {} - for field in self.schema: - schema_pb[field.name] = field.dtype.to_value_type().value batch_source_proto = None if self.batch_source: batch_source_proto = self.batch_source.to_proto() - options = DataSourceProto.PushOptions(schema=schema_pb,) data_source_proto = DataSourceProto( name=self.name, type=DataSourceProto.PUSH_SOURCE, - push_options=options, - timestamp_field=self.timestamp_field, description=self.description, tags=self.tags, owner=self.owner, diff --git a/sdk/python/feast/diff/registry_diff.py b/sdk/python/feast/diff/registry_diff.py index 10bd88c56f..b2caec2b68 100644 --- a/sdk/python/feast/diff/registry_diff.py +++ b/sdk/python/feast/diff/registry_diff.py @@ -177,7 +177,7 @@ def extract_objects_for_keep_delete_update_add( FeastObjectType, List[Any] ] = FeastObjectType.get_objects_from_registry(registry, current_project) registry_object_type_to_repo_contents: Dict[ - FeastObjectType, Set[Any] + FeastObjectType, List[Any] ] = FeastObjectType.get_objects_from_repo_contents(desired_repo_contents) for object_type in FEAST_OBJECT_TYPES: diff --git a/sdk/python/feast/entity.py b/sdk/python/feast/entity.py index 3988626860..2142900050 100644 --- a/sdk/python/feast/entity.py +++ b/sdk/python/feast/entity.py @@ -105,7 +105,7 @@ def __init__( self.last_updated_timestamp = None def __hash__(self) -> int: - return hash((id(self), self.name)) + return hash((self.name, self.join_key)) def __eq__(self, other): if not isinstance(other, Entity): diff --git a/sdk/python/feast/feature_server.py b/sdk/python/feast/feature_server.py index 20fcd410c2..8347bed6da 100644 --- a/sdk/python/feast/feature_server.py +++ b/sdk/python/feast/feature_server.py @@ -94,9 +94,7 @@ def push(body=Depends(get_body)): @app.post("/write-to-online-store") def write_to_online_store(body=Depends(get_body)): warnings.warn( - "write_to_online_store is an experimental feature. " - "This API is unstable and it could be changed in the future. " - "We do not guarantee that future changes will maintain backward compatibility.", + "write_to_online_store is deprecated. Please consider using /push instead", RuntimeWarning, ) try: diff --git a/sdk/python/feast/feature_service.py b/sdk/python/feast/feature_service.py index 40030b34ce..492d31a809 100644 --- a/sdk/python/feast/feature_service.py +++ b/sdk/python/feast/feature_service.py @@ -1,3 +1,4 @@ +import warnings from datetime import datetime from typing import Dict, List, Optional, Union @@ -47,8 +48,9 @@ class FeatureService: @log_exceptions def __init__( self, - name: str, - features: List[Union[FeatureView, OnDemandFeatureView]], + *args, + name: Optional[str] = None, + features: Optional[List[Union[FeatureView, OnDemandFeatureView]]] = None, tags: Dict[str, str] = None, description: str = "", owner: str = "", @@ -59,10 +61,38 @@ def __init__( Raises: ValueError: If one of the specified features is not a valid type. """ - self.name = name + positional_attributes = ["name", "features"] + _name = name + _features = features + if args: + warnings.warn( + ( + "Feature service parameters should be specified as a keyword argument instead of a positional arg." + "Feast 0.23+ will not support positional arguments to construct feature service" + ), + DeprecationWarning, + ) + if len(args) > len(positional_attributes): + raise ValueError( + f"Only {', '.join(positional_attributes)} are allowed as positional args when defining " + f"feature service, for backwards compatibility." + ) + if len(args) >= 1: + _name = args[0] + if len(args) >= 2: + _features = args[1] + + if not _name: + raise ValueError("Feature service name needs to be specified") + + if not _features: + # Technically, legal to create feature service with no feature views before. + _features = [] + + self.name = _name self.feature_view_projections = [] - for feature_grouping in features: + for feature_grouping in _features: if isinstance(feature_grouping, BaseFeatureView): self.feature_view_projections.append(feature_grouping.projection) else: @@ -85,7 +115,7 @@ def __str__(self): return str(MessageToJson(self.to_proto())) def __hash__(self): - return hash((id(self), self.name)) + return hash((self.name)) def __eq__(self, other): if not isinstance(other, FeatureService): diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 33d297f3ca..4f456be384 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -92,7 +92,6 @@ warnings.simplefilter("once", DeprecationWarning) - if TYPE_CHECKING: from feast.embedded_go.online_features_service import EmbeddedOnlineFeatureServer @@ -534,25 +533,25 @@ def _plan( ... batch_source=driver_hourly_stats, ... ) >>> registry_diff, infra_diff, new_infra = fs._plan(RepoContents( - ... data_sources={driver_hourly_stats}, - ... feature_views={driver_hourly_stats_view}, - ... on_demand_feature_views=set(), - ... request_feature_views=set(), - ... entities={driver}, - ... feature_services=set())) # register entity and feature view + ... data_sources=[driver_hourly_stats], + ... feature_views=[driver_hourly_stats_view], + ... on_demand_feature_views=list(), + ... request_feature_views=list(), + ... entities=[driver], + ... feature_services=list())) # register entity and feature view """ # Validate and run inference on all the objects to be registered. self._validate_all_feature_views( - list(desired_repo_contents.feature_views), - list(desired_repo_contents.on_demand_feature_views), - list(desired_repo_contents.request_feature_views), + desired_repo_contents.feature_views, + desired_repo_contents.on_demand_feature_views, + desired_repo_contents.request_feature_views, ) - _validate_data_sources(list(desired_repo_contents.data_sources)) + _validate_data_sources(desired_repo_contents.data_sources) self._make_inferences( - list(desired_repo_contents.data_sources), - list(desired_repo_contents.entities), - list(desired_repo_contents.feature_views), - list(desired_repo_contents.on_demand_feature_views), + desired_repo_contents.data_sources, + desired_repo_contents.entities, + desired_repo_contents.feature_views, + desired_repo_contents.on_demand_feature_views, ) # Compute the desired difference between the current objects in the registry and @@ -1186,16 +1185,25 @@ def tqdm_builder(length): ) @log_exceptions_and_usage - def push(self, push_source_name: str, df: pd.DataFrame): + def push( + self, push_source_name: str, df: pd.DataFrame, allow_registry_cache: bool = True + ): """ Push features to a push source. This updates all the feature views that have the push source as stream source. Args: push_source_name: The name of the push source we want to push data to. df: the data being pushed. + allow_registry_cache: whether to allow cached versions of the registry. """ + warnings.warn( + "Push source is an experimental feature. " + "This API is unstable and it could and might change in the future. " + "We do not guarantee that future changes will maintain backward compatibility.", + RuntimeWarning, + ) from feast.data_source import PushSource - all_fvs = self.list_feature_views(allow_cache=True) + all_fvs = self.list_feature_views(allow_cache=allow_registry_cache) fvs_with_push_sources = { fv @@ -1208,7 +1216,9 @@ def push(self, push_source_name: str, df: pd.DataFrame): } for fv in fvs_with_push_sources: - self.write_to_online_store(fv.name, df, allow_registry_cache=True) + self.write_to_online_store( + fv.name, df, allow_registry_cache=allow_registry_cache + ) @log_exceptions_and_usage def write_to_online_store( diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index 7d29a4b69b..7060870780 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -45,7 +45,7 @@ DUMMY_ENTITY_NAME = "__dummy" DUMMY_ENTITY_VAL = "" DUMMY_ENTITY = Entity( - name=DUMMY_ENTITY_NAME, join_key=DUMMY_ENTITY_ID, value_type=ValueType.STRING, + name=DUMMY_ENTITY_NAME, join_keys=[DUMMY_ENTITY_ID], value_type=ValueType.STRING, ) @@ -137,7 +137,7 @@ def __init__( ValueError: A field mapping conflicts with an Entity or a Feature. """ - positional_attributes = ["name, entities, ttl"] + positional_attributes = ["name", "entities", "ttl"] _name = name _entities = entities @@ -270,7 +270,6 @@ def _initialize_sources(self, name, batch_source, stream_source, source): self.batch_source = batch_source self.source = source - # Note: Python requires redefining hash in child classes that override __eq__ def __hash__(self): return super().__hash__() @@ -298,19 +297,15 @@ def __eq__(self, other): return False if ( - self.tags != other.tags + sorted(self.entities) != sorted(other.entities) or self.ttl != other.ttl or self.online != other.online + or self.batch_source != other.batch_source + or self.stream_source != other.stream_source + or self.schema != other.schema ): return False - if sorted(self.entities) != sorted(other.entities): - return False - if self.batch_source != other.batch_source: - return False - if self.stream_source != other.stream_source: - return False - return True def ensure_valid(self): diff --git a/sdk/python/feast/inference.py b/sdk/python/feast/inference.py index 9d15a6a25f..1f03fc50fc 100644 --- a/sdk/python/feast/inference.py +++ b/sdk/python/feast/inference.py @@ -2,7 +2,7 @@ from typing import List from feast import BigQuerySource, Entity, FileSource, RedshiftSource, SnowflakeSource -from feast.data_source import DataSource, RequestSource +from feast.data_source import DataSource, PushSource, RequestSource from feast.errors import RegistryInferenceFailure from feast.feature_view import FeatureView from feast.field import Field, from_value_type @@ -32,7 +32,9 @@ def update_entities_with_inferred_types_from_feature_views( if not (incomplete_entities_keys & set(view.entities)): continue # skip if view doesn't contain any entities that need inference - col_names_and_types = view.batch_source.get_table_column_names_and_types(config) + col_names_and_types = list( + view.batch_source.get_table_column_names_and_types(config) + ) for entity_name in view.entities: if entity_name in incomplete_entities: entity = incomplete_entities[entity_name] @@ -74,6 +76,8 @@ def update_data_sources_with_inferred_event_timestamp_col( for data_source in data_sources: if isinstance(data_source, RequestSource): continue + if isinstance(data_source, PushSource): + data_source = data_source.batch_source if data_source.timestamp_field is None or data_source.timestamp_field == "": # prepare right match pattern for data source ts_column_type_regex_pattern = "" diff --git a/sdk/python/feast/infra/feature_servers/aws_lambda/Dockerfile b/sdk/python/feast/infra/feature_servers/aws_lambda/Dockerfile index 5b685dbcf6..0c342a77ce 100644 --- a/sdk/python/feast/infra/feature_servers/aws_lambda/Dockerfile +++ b/sdk/python/feast/infra/feature_servers/aws_lambda/Dockerfile @@ -10,7 +10,9 @@ COPY go go COPY README.md README.md # Install Feast for AWS with Lambda dependencies -RUN pip3 install -e 'sdk/python[aws,redis]' +# TODO(felixwang9817): Remove Snowflake dependencies once lazy loading of offline stores is supported. +# See https://github.com/feast-dev/feast/issues/2566 for more details. +RUN pip3 install -e 'sdk/python[aws,redis,snowflake]' RUN pip3 install -r sdk/python/feast/infra/feature_servers/aws_lambda/requirements.txt --target "${LAMBDA_TASK_ROOT}" # Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) diff --git a/sdk/python/feast/infra/offline_stores/bigquery_source.py b/sdk/python/feast/infra/offline_stores/bigquery_source.py index 31b0ed617e..cb4cd1b5be 100644 --- a/sdk/python/feast/infra/offline_stores/bigquery_source.py +++ b/sdk/python/feast/infra/offline_stores/bigquery_source.py @@ -16,6 +16,7 @@ class BigQuerySource(DataSource): def __init__( self, + *, event_timestamp_column: Optional[str] = "", table: Optional[str] = None, created_timestamp_column: Optional[str] = "", diff --git a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py index 65997040cc..dc92e08a50 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py @@ -30,6 +30,7 @@ class SparkSourceFormat(Enum): class SparkSource(DataSource): def __init__( self, + *, name: Optional[str] = None, table: Optional[str] = None, query: Optional[str] = None, diff --git a/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino_source.py b/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino_source.py index 7d6280746e..b8fddee89f 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino_source.py @@ -88,7 +88,6 @@ def __init__( table: Optional[str] = None, created_timestamp_column: Optional[str] = "", field_mapping: Optional[Dict[str, str]] = None, - date_partition_column: Optional[str] = None, query: Optional[str] = None, name: Optional[str] = None, description: Optional[str] = "", diff --git a/sdk/python/feast/infra/offline_stores/file.py b/sdk/python/feast/infra/offline_stores/file.py index cb6e874f8a..a7d8b25abf 100644 --- a/sdk/python/feast/infra/offline_stores/file.py +++ b/sdk/python/feast/infra/offline_stores/file.py @@ -299,11 +299,25 @@ def evaluate_offline_job(): if created_timestamp_column else [event_timestamp_column] ) + # try-catch block is added to deal with this issue https://github.com/dask/dask/issues/8939. + # TODO(kevjumba): remove try catch when fix is merged upstream in Dask. + try: + if created_timestamp_column: + source_df = source_df.sort_values(by=created_timestamp_column,) + + source_df = source_df.sort_values(by=event_timestamp_column) + + except ZeroDivisionError: + # Use 1 partition to get around case where everything in timestamp column is the same so the partition algorithm doesn't + # try to divide by zero. + if created_timestamp_column: + source_df = source_df.sort_values( + by=created_timestamp_column, npartitions=1 + ) - if created_timestamp_column: - source_df = source_df.sort_values(by=created_timestamp_column) - - source_df = source_df.sort_values(by=event_timestamp_column) + source_df = source_df.sort_values( + by=event_timestamp_column, npartitions=1 + ) source_df = source_df[ (source_df[event_timestamp_column] >= start_date) diff --git a/sdk/python/feast/infra/offline_stores/file_source.py b/sdk/python/feast/infra/offline_stores/file_source.py index 3df0db69b1..e177642a32 100644 --- a/sdk/python/feast/infra/offline_stores/file_source.py +++ b/sdk/python/feast/infra/offline_stores/file_source.py @@ -20,7 +20,8 @@ class FileSource(DataSource): def __init__( self, - path: str, + *args, + path: Optional[str] = None, event_timestamp_column: Optional[str] = "", file_format: Optional[FileFormat] = None, created_timestamp_column: Optional[str] = "", @@ -58,13 +59,31 @@ def __init__( >>> from feast import FileSource >>> file_source = FileSource(path="my_features.parquet", timestamp_field="event_timestamp") """ - if path is None: + positional_attributes = ["path"] + _path = path + if args: + if args: + warnings.warn( + ( + "File Source parameters should be specified as a keyword argument instead of a positional arg." + "Feast 0.23+ will not support positional arguments to construct File sources" + ), + DeprecationWarning, + ) + if len(args) > len(positional_attributes): + raise ValueError( + f"Only {', '.join(positional_attributes)} are allowed as positional args when defining " + f"File sources, for backwards compatibility." + ) + if len(args) >= 1: + _path = args[0] + if _path is None: raise ValueError( 'No "path" argument provided. Please set "path" to the location of your file source.' ) self.file_options = FileOptions( file_format=file_format, - uri=path, + uri=_path, s3_endpoint_override=s3_endpoint_override, ) diff --git a/sdk/python/feast/infra/offline_stores/redshift_source.py b/sdk/python/feast/infra/offline_stores/redshift_source.py index f099e307cc..dcfcb50aa6 100644 --- a/sdk/python/feast/infra/offline_stores/redshift_source.py +++ b/sdk/python/feast/infra/offline_stores/redshift_source.py @@ -16,6 +16,7 @@ class RedshiftSource(DataSource): def __init__( self, + *, event_timestamp_column: Optional[str] = "", table: Optional[str] = None, schema: Optional[str] = None, diff --git a/sdk/python/feast/infra/offline_stores/snowflake_source.py b/sdk/python/feast/infra/offline_stores/snowflake_source.py index 1d24cba44a..8f3f2f0bb5 100644 --- a/sdk/python/feast/infra/offline_stores/snowflake_source.py +++ b/sdk/python/feast/infra/offline_stores/snowflake_source.py @@ -15,6 +15,7 @@ class SnowflakeSource(DataSource): def __init__( self, + *, database: Optional[str] = None, warehouse: Optional[str] = None, schema: Optional[str] = None, diff --git a/sdk/python/feast/infra/online_stores/dynamodb.py b/sdk/python/feast/infra/online_stores/dynamodb.py index 01562ad900..406bee525f 100644 --- a/sdk/python/feast/infra/online_stores/dynamodb.py +++ b/sdk/python/feast/infra/online_stores/dynamodb.py @@ -59,9 +59,6 @@ class DynamoDBOnlineStoreConfig(FeastConfigBaseModel): region: StrictStr """AWS Region Name""" - sort_response: bool = True - """Whether or not to sort BatchGetItem response.""" - table_name_template: StrictStr = "{project}.{table_name}" """DynamoDB table name template""" @@ -204,9 +201,6 @@ def online_read( """ Retrieve feature values from the online DynamoDB store. - Note: This method is currently not optimized to retrieve a lot of data at a time - as it does sequential gets from the DynamoDB table. - Args: config: The RepoConfig for the current FeatureStore. table: Feast FeatureView. @@ -224,7 +218,6 @@ def online_read( result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] entity_ids = [compute_entity_id(entity_key) for entity_key in entity_keys] batch_size = online_config.batch_size - sort_response = online_config.sort_response entity_ids_iter = iter(entity_ids) while True: batch = list(itertools.islice(entity_ids_iter, batch_size)) @@ -243,20 +236,27 @@ def online_read( response = response.get("Responses") table_responses = response.get(table_instance.name) if table_responses: - if sort_response: - table_responses = self._sort_dynamodb_response( - table_responses, entity_ids - ) + table_responses = self._sort_dynamodb_response( + table_responses, entity_ids + ) + entity_idx = 0 for tbl_res in table_responses: + entity_id = tbl_res["entity_id"] + while entity_id != batch[entity_idx]: + result.append((None, None)) + entity_idx += 1 res = {} for feature_name, value_bin in tbl_res["values"].items(): val = ValueProto() val.ParseFromString(value_bin.value) res[feature_name] = val result.append((datetime.fromisoformat(tbl_res["event_ts"]), res)) - else: - batch_size_nones = ((None, None),) * len(batch) - result.extend(batch_size_nones) + entity_idx += 1 + + # Not all entities in a batch may have responses + # Pad with remaining values in batch that were not found + batch_size_nones = ((None, None),) * (len(batch) - len(result)) + result.extend(batch_size_nones) return result def _get_dynamodb_client(self, region: str, endpoint_url: Optional[str] = None): diff --git a/sdk/python/feast/infra/online_stores/online_store.py b/sdk/python/feast/infra/online_stores/online_store.py index 1f177996de..04c6a065fb 100644 --- a/sdk/python/feast/infra/online_stores/online_store.py +++ b/sdk/python/feast/infra/online_stores/online_store.py @@ -76,9 +76,9 @@ def online_read( entity_keys: a list of entity keys that should be read from the FeatureStore. requested_features: (Optional) A subset of the features that should be read from the FeatureStore. Returns: - Data is returned as a list, one item per entity key. Each item in the list is a tuple - of event_ts for the row, and the feature data as a dict from feature names to values. - Values are returned as Value proto message. + Data is returned as a list, one item per entity key in the original order as the entity_keys argument. + Each item in the list is a tuple of event_ts for the row, and the feature data as a dict from feature names + to values. Values are returned as Value proto message. """ ... diff --git a/sdk/python/feast/infra/online_stores/redis.py b/sdk/python/feast/infra/online_stores/redis.py index a2e8e27d80..9ceceff0ac 100644 --- a/sdk/python/feast/infra/online_stores/redis.py +++ b/sdk/python/feast/infra/online_stores/redis.py @@ -42,7 +42,7 @@ try: from redis import Redis - from rediscluster import RedisCluster + from redis.cluster import ClusterNode, RedisCluster except ImportError as e: from feast.errors import FeastExtrasDependencyImportError @@ -164,7 +164,9 @@ def _get_client(self, online_store_config: RedisOnlineStoreConfig): online_store_config.connection_string ) if online_store_config.redis_type == RedisType.redis_cluster: - kwargs["startup_nodes"] = startup_nodes + kwargs["startup_nodes"] = [ + ClusterNode(**node) for node in startup_nodes + ] self._client = RedisCluster(**kwargs) else: kwargs["host"] = startup_nodes[0]["host"] diff --git a/sdk/python/feast/infra/online_stores/sqlite.py b/sdk/python/feast/infra/online_stores/sqlite.py index 710f4c386a..5657fbe372 100644 --- a/sdk/python/feast/infra/online_stores/sqlite.py +++ b/sdk/python/feast/infra/online_stores/sqlite.py @@ -230,7 +230,9 @@ def teardown( def _initialize_conn(db_path: str): Path(db_path).parent.mkdir(exist_ok=True) return sqlite3.connect( - db_path, detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES, + db_path, + detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES, + check_same_thread=False, ) diff --git a/sdk/python/feast/on_demand_feature_view.py b/sdk/python/feast/on_demand_feature_view.py index 790891b078..a807f3b4a4 100644 --- a/sdk/python/feast/on_demand_feature_view.py +++ b/sdk/python/feast/on_demand_feature_view.py @@ -234,14 +234,19 @@ def __copy__(self): return fv def __eq__(self, other): + if not isinstance(other, OnDemandFeatureView): + raise TypeError( + "Comparisons should only involve OnDemandFeatureView class objects." + ) + if not super().__eq__(other): return False if ( - not self.source_feature_view_projections - == other.source_feature_view_projections - or not self.source_request_sources == other.source_request_sources - or not self.udf.__code__.co_code == other.udf.__code__.co_code + self.source_feature_view_projections + != other.source_feature_view_projections + or self.source_request_sources != other.source_request_sources + or self.udf.__code__.co_code != other.udf.__code__.co_code ): return False diff --git a/sdk/python/feast/registry.py b/sdk/python/feast/registry.py index da9c6c6b21..5f5d27318a 100644 --- a/sdk/python/feast/registry.py +++ b/sdk/python/feast/registry.py @@ -18,7 +18,7 @@ from enum import Enum from pathlib import Path from threading import Lock -from typing import Any, Dict, List, Optional, Set +from typing import Any, Dict, List, Optional from urllib.parse import urlparse import dill @@ -98,7 +98,7 @@ def get_objects_from_registry( @staticmethod def get_objects_from_repo_contents( repo_contents: RepoContents, - ) -> Dict["FeastObjectType", Set[Any]]: + ) -> Dict["FeastObjectType", List[Any]]: return { FeastObjectType.DATA_SOURCE: repo_contents.data_sources, FeastObjectType.ENTITY: repo_contents.entities, diff --git a/sdk/python/feast/repo_contents.py b/sdk/python/feast/repo_contents.py index b59adc34db..4d7c92f2a6 100644 --- a/sdk/python/feast/repo_contents.py +++ b/sdk/python/feast/repo_contents.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import NamedTuple, Set +from typing import List, NamedTuple from feast.data_source import DataSource from feast.entity import Entity @@ -27,12 +27,12 @@ class RepoContents(NamedTuple): Represents the objects in a Feast feature repo. """ - data_sources: Set[DataSource] - feature_views: Set[FeatureView] - on_demand_feature_views: Set[OnDemandFeatureView] - request_feature_views: Set[RequestFeatureView] - entities: Set[Entity] - feature_services: Set[FeatureService] + data_sources: List[DataSource] + feature_views: List[FeatureView] + on_demand_feature_views: List[OnDemandFeatureView] + request_feature_views: List[RequestFeatureView] + entities: List[Entity] + feature_services: List[FeatureService] def to_registry_proto(self) -> RegistryProto: registry_proto = RegistryProto() diff --git a/sdk/python/feast/repo_operations.py b/sdk/python/feast/repo_operations.py index 5e223aac8a..40f1a055a9 100644 --- a/sdk/python/feast/repo_operations.py +++ b/sdk/python/feast/repo_operations.py @@ -28,9 +28,9 @@ from feast.usage import log_exceptions_and_usage -def py_path_to_module(path: Path, repo_root: Path) -> str: +def py_path_to_module(path: Path) -> str: return ( - str(path.relative_to(repo_root))[: -len(".py")] + str(path.relative_to(os.getcwd()))[: -len(".py")] .replace("./", "") .replace("/", ".") ) @@ -94,36 +94,56 @@ def get_repo_files(repo_root: Path) -> List[Path]: def parse_repo(repo_root: Path) -> RepoContents: - """Collect feature table definitions from feature repo""" + """ + Collects unique Feast object definitions from the given feature repo. + + Specifically, if an object foo has already been added, bar will still be added if + (bar == foo), but not if (bar is foo). This ensures that import statements will + not result in duplicates, but defining two equal objects will. + """ res = RepoContents( - data_sources=set(), - entities=set(), - feature_views=set(), - feature_services=set(), - on_demand_feature_views=set(), - request_feature_views=set(), + data_sources=[], + entities=[], + feature_views=[], + feature_services=[], + on_demand_feature_views=[], + request_feature_views=[], ) for repo_file in get_repo_files(repo_root): - module_path = py_path_to_module(repo_file, repo_root) + module_path = py_path_to_module(repo_file) module = importlib.import_module(module_path) for attr_name in dir(module): obj = getattr(module, attr_name) - if isinstance(obj, DataSource): - res.data_sources.add(obj) - if isinstance(obj, FeatureView): - res.feature_views.add(obj) - if isinstance(obj.stream_source, PushSource): - res.data_sources.add(obj.stream_source.batch_source) - elif isinstance(obj, Entity): - res.entities.add(obj) - elif isinstance(obj, FeatureService): - res.feature_services.add(obj) - elif isinstance(obj, OnDemandFeatureView): - res.on_demand_feature_views.add(obj) - elif isinstance(obj, RequestFeatureView): - res.request_feature_views.add(obj) - res.entities.add(DUMMY_ENTITY) + if isinstance(obj, DataSource) and not any( + (obj is ds) for ds in res.data_sources + ): + res.data_sources.append(obj) + if isinstance(obj, FeatureView) and not any( + (obj is fv) for fv in res.feature_views + ): + res.feature_views.append(obj) + if isinstance(obj.stream_source, PushSource) and not any( + (obj is ds) for ds in res.data_sources + ): + res.data_sources.append(obj.stream_source.batch_source) + elif isinstance(obj, Entity) and not any( + (obj is entity) for entity in res.entities + ): + res.entities.append(obj) + elif isinstance(obj, FeatureService) and not any( + (obj is fs) for fs in res.feature_services + ): + res.feature_services.append(obj) + elif isinstance(obj, OnDemandFeatureView) and not any( + (obj is odfv) for odfv in res.on_demand_feature_views + ): + res.on_demand_feature_views.append(obj) + elif isinstance(obj, RequestFeatureView) and not any( + (obj is rfv) for rfv in res.request_feature_views + ): + res.request_feature_views.append(obj) + res.entities.append(DUMMY_ENTITY) return res diff --git a/sdk/python/feast/saved_dataset.py b/sdk/python/feast/saved_dataset.py index 7a05a9ca22..aead7fe8ef 100644 --- a/sdk/python/feast/saved_dataset.py +++ b/sdk/python/feast/saved_dataset.py @@ -92,17 +92,23 @@ def __str__(self): return str(MessageToJson(self.to_proto())) def __hash__(self): - return hash((id(self), self.name)) + return hash((self.name)) def __eq__(self, other): if not isinstance(other, SavedDataset): raise TypeError( - "Comparisons should only involve FeatureService class objects." + "Comparisons should only involve SavedDataset class objects." ) - if self.name != other.name: - return False - if sorted(self.features) != sorted(other.features): + if ( + self.name != other.name + or sorted(self.features) != sorted(other.features) + or sorted(self.join_keys) != sorted(other.join_keys) + or self.storage != other.storage + or self.full_feature_names != other.full_feature_names + or self.tags != other.tags + or self.feature_service_name != other.feature_service_name + ): return False return True diff --git a/sdk/python/feast/templates/aws/driver_repo.py b/sdk/python/feast/templates/aws/driver_repo.py index 19ba44807b..5188f57cf8 100644 --- a/sdk/python/feast/templates/aws/driver_repo.py +++ b/sdk/python/feast/templates/aws/driver_repo.py @@ -8,11 +8,11 @@ # construction of feature vectors driver = Entity( # Name of the entity. Must be unique within a project - name="driver_id", - # The join key of an entity describes the storage level field/column on which - # features can be looked up. The join key is also used to join feature + name="driver", + # The join keys of an entity describe the storage level field/column on which + # features can be looked up. The join keys are also used to join feature # tables/views when building feature vectors - join_key="driver_id", + join_keys=["driver_id"], # The storage level type for an entity value_type=ValueType.INT64, ) @@ -41,7 +41,7 @@ # The list of entities specifies the keys required for joining or looking # up features from this feature view. The reference provided in this field # correspond to the name of a defined entity (or entities) - entities=["driver_id"], + entities=["driver"], # The timedelta is the maximum age that each feature value may have # relative to its lookup time. For historical features (used in training), # TTL is relative to each timestamp provided in the entity dataframe. @@ -60,7 +60,7 @@ # Batch sources are used to find feature values. In the case of this feature # view we will query a source table on Redshift for driver statistics # features - batch_source=driver_stats_source, + source=driver_stats_source, # Tags are user defined key/value pairs that are attached to each # feature view tags={"team": "driver_performance"}, diff --git a/sdk/python/feast/templates/gcp/driver_repo.py b/sdk/python/feast/templates/gcp/driver_repo.py index e494e021f2..7d137f996b 100644 --- a/sdk/python/feast/templates/gcp/driver_repo.py +++ b/sdk/python/feast/templates/gcp/driver_repo.py @@ -8,11 +8,11 @@ # construction of feature vectors driver = Entity( # Name of the entity. Must be unique within a project - name="driver_id", - # The join key of an entity describes the storage level field/column on which - # features can be looked up. The join key is also used to join feature + name="driver", + # The join keys of an entity describe the storage level field/column on which + # features can be looked up. The join keys are also used to join feature # tables/views when building feature vectors - join_key="driver_id", + join_keys=["driver_id"], # The storage level type for an entity value_type=ValueType.INT64, ) @@ -39,7 +39,7 @@ # The list of entities specifies the keys required for joining or looking # up features from this feature view. The reference provided in this field # correspond to the name of a defined entity (or entities) - entities=["driver_id"], + entities=["driver"], # The timedelta is the maximum age that each feature value may have # relative to its lookup time. For historical features (used in training), # TTL is relative to each timestamp provided in the entity dataframe. @@ -58,7 +58,7 @@ # Batch sources are used to find feature values. In the case of this feature # view we will query a source table on BigQuery for driver statistics # features - batch_source=driver_stats_source, + source=driver_stats_source, # Tags are user defined key/value pairs that are attached to each # feature view tags={"team": "driver_performance"}, diff --git a/sdk/python/feast/templates/local/example.py b/sdk/python/feast/templates/local/example.py index 076a331f91..1d441e0e99 100644 --- a/sdk/python/feast/templates/local/example.py +++ b/sdk/python/feast/templates/local/example.py @@ -16,14 +16,14 @@ # Define an entity for the driver. You can think of entity as a primary key used to # fetch features. -driver = Entity(name="driver_id", value_type=ValueType.INT64, description="driver id",) +driver = Entity(name="driver", join_keys=["driver_id"], value_type=ValueType.INT64,) # Our parquet files contain sample data that includes a driver_id column, timestamps and # three feature column. Here we define a Feature View that will allow us to serve this # data to our model online. driver_hourly_stats_view = FeatureView( name="driver_hourly_stats", - entities=["driver_id"], + entities=["driver"], ttl=timedelta(days=1), schema=[ Field(name="conv_rate", dtype=Float32), @@ -31,6 +31,6 @@ Field(name="avg_daily_trips", dtype=Int64), ], online=True, - batch_source=driver_hourly_stats, + source=driver_hourly_stats, tags={}, ) diff --git a/sdk/python/feast/templates/snowflake/driver_repo.py b/sdk/python/feast/templates/snowflake/driver_repo.py index c14e4c38cc..ecccb9863b 100644 --- a/sdk/python/feast/templates/snowflake/driver_repo.py +++ b/sdk/python/feast/templates/snowflake/driver_repo.py @@ -10,11 +10,11 @@ # construction of feature vectors driver = Entity( # Name of the entity. Must be unique within a project - name="driver_id", - # The join key of an entity describes the storage level field/column on which - # features can be looked up. The join key is also used to join feature + name="driver", + # The join keys of an entity describe the storage level field/column on which + # features can be looked up. The join keys are also used to join feature # tables/views when building feature vectors - join_key="driver_id", + join_keys=["driver_id"], ) # Indicates a data source from which feature values can be retrieved. Sources are queried when building training @@ -43,7 +43,7 @@ # The list of entities specifies the keys required for joining or looking # up features from this feature view. The reference provided in this field # correspond to the name of a defined entity (or entities) - entities=["driver_id"], + entities=["driver"], # The timedelta is the maximum age that each feature value may have # relative to its lookup time. For historical features (used in training), # TTL is relative to each timestamp provided in the entity dataframe. diff --git a/sdk/python/feast/templates/spark/example.py b/sdk/python/feast/templates/spark/example.py index c8c1c1257e..58f3df740f 100644 --- a/sdk/python/feast/templates/spark/example.py +++ b/sdk/python/feast/templates/spark/example.py @@ -16,9 +16,9 @@ # Entity definitions -driver = Entity(name="driver_id", value_type=ValueType.INT64, description="driver id",) +driver = Entity(name="driver", value_type=ValueType.INT64, description="driver id",) customer = Entity( - name="customer_id", value_type=ValueType.INT64, description="customer id", + name="customer", value_type=ValueType.INT64, description="customer id", ) # Sources @@ -40,7 +40,7 @@ # Feature Views driver_hourly_stats_view = FeatureView( name="driver_hourly_stats", - entities=["driver_id"], + entities=["driver"], ttl=timedelta(days=7), schema=[ Field(name="conv_rate", dtype=Float32), @@ -48,12 +48,12 @@ Field(name="avg_daily_trips", dtype=Int64), ], online=True, - batch_source=driver_hourly_stats, + source=driver_hourly_stats, tags={}, ) customer_daily_profile_view = FeatureView( name="customer_daily_profile", - entities=["customer_id"], + entities=["customer"], ttl=timedelta(days=7), schema=[ Field(name="current_balance", dtype=Float32), @@ -61,6 +61,6 @@ Field(name="lifetime_trip_count", dtype=Int64), ], online=True, - batch_source=customer_daily_profile, + source=customer_daily_profile, tags={}, ) diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 9798faf508..a94d8aa59b 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -529,7 +529,8 @@ def snowflake_python_type_to_feast_value_type( "uint8": ValueType.INT32, "int8": ValueType.INT32, "datetime64[ns]": ValueType.UNIX_TIMESTAMP, - "object": ValueType.UNKNOWN, + "object": ValueType.STRING, + "bool": ValueType.BOOL, } return type_map[snowflake_python_type_as_str.lower()] diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index e120f8c58e..346aa6da47 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -23,20 +23,16 @@ alabaster==0.7.12 altair==4.2.0 # via great-expectations anyio==3.5.0 - # via starlette + # via + # starlette + # watchgod appdirs==1.4.4 # via black -appnope==0.1.2 - # via - # ipykernel - # ipython -argon2-cffi==21.3.0 - # via notebook -argon2-cffi-bindings==21.2.0 - # via argon2-cffi +appnope==0.1.3 + # via ipython asgiref==3.5.0 # via uvicorn -asn1crypto==1.4.0 +asn1crypto==1.5.1 # via # oscrypto # snowflake-connector-python @@ -45,7 +41,9 @@ assertpy==1.1 asttokens==2.0.5 # via stack-data async-timeout==4.0.2 - # via aiohttp + # via + # aiohttp + # redis attrs==21.4.0 # via # aiohttp @@ -54,16 +52,16 @@ attrs==21.4.0 # pytest avro==1.10.0 # via feast (setup.py) -azure-core==1.23.0 +azure-core==1.23.1 # via # adlfs # azure-identity # azure-storage-blob azure-datalake-store==0.0.52 # via adlfs -azure-identity==1.8.0 +azure-identity==1.9.0 # via adlfs -azure-storage-blob==12.9.0 +azure-storage-blob==12.11.0 # via adlfs babel==2.9.1 # via sphinx @@ -71,13 +69,11 @@ backcall==0.2.0 # via ipython black==19.10b0 # via feast (setup.py) -bleach==4.1.0 - # via nbconvert -boto3==1.21.11 +boto3==1.21.41 # via # feast (setup.py) # moto -botocore==1.24.11 +botocore==1.24.41 # via # boto3 # moto @@ -94,7 +90,6 @@ certifi==2021.10.8 # snowflake-connector-python cffi==1.15.0 # via - # argon2-cffi-bindings # azure-datalake-store # cryptography # snowflake-connector-python @@ -105,7 +100,7 @@ charset-normalizer==2.0.12 # aiohttp # requests # snowflake-connector-python -click==8.0.4 +click==8.1.2 # via # black # feast (setup.py) @@ -115,30 +110,32 @@ click==8.0.4 cloudpickle==2.0.0 # via dask colorama==0.4.4 - # via feast (setup.py) + # via + # feast (setup.py) + # great-expectations coverage[toml]==6.3.2 # via pytest-cov -cryptography==3.3.2 +cryptography==3.4.8 # via # adal # azure-identity # azure-storage-blob # feast (setup.py) + # great-expectations # moto # msal - # pyjwt # pyopenssl # snowflake-connector-python dask==2022.1.1 # via feast (setup.py) -debugpy==1.5.1 - # via ipykernel +dataclasses==0.6 + # via great-expectations decorator==5.1.1 # via # gcsfs # ipython -defusedxml==0.7.1 - # via nbconvert +deprecated==1.2.13 + # via redis deprecation==2.1.0 # via testcontainers dill==0.3.4 @@ -154,20 +151,19 @@ docutils==0.17.1 # sphinx # sphinx-rtd-theme entrypoints==0.4 - # via - # altair - # jupyter-client - # nbconvert + # via altair execnet==1.9.0 # via pytest-xdist executing==0.8.3 # via stack-data -fastapi==0.74.1 +fastapi==0.75.1 # via feast (setup.py) -fastavro==1.4.9 +fastavro==1.4.10 # via # feast (setup.py) # pandavro +fastjsonschema==2.15.3 + # via nbformat filelock==3.6.0 # via virtualenv firebase-admin==4.5.2 @@ -178,12 +174,12 @@ frozenlist==1.3.0 # via # aiohttp # aiosignal -fsspec==2022.2.0 +fsspec==2022.3.0 # via # adlfs # dask # gcsfs -gcsfs==2022.2.0 +gcsfs==2022.3.0 # via feast (setup.py) google-api-core[grpc]==1.31.5 # via @@ -195,7 +191,7 @@ google-api-core[grpc]==1.31.5 # google-cloud-core # google-cloud-datastore # google-cloud-firestore -google-api-python-client==2.39.0 +google-api-python-client==2.44.0 # via firebase-admin google-auth==1.35.0 # via @@ -208,11 +204,11 @@ google-auth==1.35.0 # google-cloud-storage google-auth-httplib2==0.1.0 # via google-api-python-client -google-auth-oauthlib==0.5.0 +google-auth-oauthlib==0.5.1 # via gcsfs -google-cloud-bigquery==2.34.1 +google-cloud-bigquery==2.34.3 # via feast (setup.py) -google-cloud-bigquery-storage==2.12.0 +google-cloud-bigquery-storage==2.13.1 # via feast (setup.py) google-cloud-core==1.7.2 # via @@ -221,9 +217,9 @@ google-cloud-core==1.7.2 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-cloud-datastore==2.5.0 +google-cloud-datastore==2.5.1 # via feast (setup.py) -google-cloud-firestore==2.3.4 +google-cloud-firestore==2.4.0 # via firebase-admin google-cloud-storage==1.40.0 # via @@ -241,7 +237,7 @@ googleapis-common-protos==1.52.0 # feast (setup.py) # google-api-core # tensorflow-metadata -great-expectations==0.14.8 +great-expectations==0.14.13 # via feast (setup.py) grpcio==1.44.0 # via @@ -265,9 +261,9 @@ httplib2==0.20.4 # via # google-api-python-client # google-auth-httplib2 -httptools==0.3.0 +httptools==0.4.0 # via uvicorn -identify==2.4.11 +identify==2.4.12 # via pre-commit idna==3.3 # via @@ -277,24 +273,11 @@ idna==3.3 # yarl imagesize==1.3.0 # via sphinx -importlib-metadata==4.11.2 +importlib-metadata==4.11.3 # via great-expectations iniconfig==1.1.1 # via pytest -ipykernel==6.9.1 - # via - # ipywidgets - # notebook -ipython==8.1.1 - # via - # ipykernel - # ipywidgets -ipython-genutils==0.2.0 - # via - # ipywidgets - # nbformat - # notebook -ipywidgets==7.6.5 +ipython==8.2.0 # via great-expectations isodate==0.6.1 # via msrest @@ -308,16 +291,14 @@ jinja2==3.0.3 # feast (setup.py) # great-expectations # moto - # nbconvert - # notebook # sphinx -jmespath==0.10.0 +jmespath==1.0.0 # via # boto3 # botocore jsonpatch==1.32 # via great-expectations -jsonpointer==2.2 +jsonpointer==2.3 # via jsonpatch jsonschema==4.4.0 # via @@ -325,44 +306,27 @@ jsonschema==4.4.0 # feast (setup.py) # great-expectations # nbformat -jupyter-client==7.1.2 - # via - # ipykernel - # nbclient - # notebook jupyter-core==4.9.2 - # via - # jupyter-client - # nbconvert - # nbformat - # notebook -jupyterlab-pygments==0.1.2 - # via nbconvert -jupyterlab-widgets==1.0.2 - # via ipywidgets + # via nbformat locket==0.2.1 # via partd -markupsafe==2.1.0 +markupsafe==2.1.1 # via # jinja2 # moto matplotlib-inline==0.1.3 - # via - # ipykernel - # ipython + # via ipython mccabe==0.6.1 # via flake8 minio==7.1.0 # via feast (setup.py) -mistune==0.8.4 - # via - # great-expectations - # nbconvert +mistune==2.0.2 + # via great-expectations mmh3==3.0.0 # via feast (setup.py) mock==2.0.0 # via feast (setup.py) -moto==3.0.5 +moto==3.1.4 # via feast (setup.py) msal==1.17.0 # via @@ -386,29 +350,13 @@ mypy==0.931 # via feast (setup.py) mypy-extensions==0.4.3 # via mypy -mypy-protobuf==3.1.0 - # via feast (setup.py) -nbclient==0.5.11 - # via nbconvert -nbconvert==6.4.2 - # via notebook -nbformat==5.1.3 - # via - # ipywidgets - # nbclient - # nbconvert - # notebook -nest-asyncio==1.5.4 - # via - # ipykernel - # jupyter-client - # nbclient - # notebook +mypy-protobuf==3.1 + # via feast (setup.py) +nbformat==5.3.0 + # via great-expectations nodeenv==1.6.0 # via pre-commit -notebook==6.4.10 - # via widgetsnbextension -numpy==1.22.2 +numpy==1.22.3 # via # altair # great-expectations @@ -418,17 +366,17 @@ numpy==1.22.2 # scipy oauthlib==3.2.0 # via requests-oauthlib -oscrypto==1.2.1 +oscrypto==1.3.0 # via snowflake-connector-python packaging==21.3 # via - # bleach # dask # deprecation # google-api-core # google-cloud-bigquery - # google-cloud-firestore + # great-expectations # pytest + # redis # sphinx pandas==1.3.5 # via @@ -439,8 +387,6 @@ pandas==1.3.5 # snowflake-connector-python pandavro==1.5.2 # via feast (setup.py) -pandocfilters==1.5.0 - # via nbconvert parso==0.8.3 # via jedi partd==1.2.0 @@ -455,7 +401,7 @@ pexpect==4.8.0 # via ipython pickleshare==0.7.5 # via ipython -pip-tools==6.5.1 +pip-tools==6.6.0 # via feast (setup.py) platformdirs==2.5.1 # via virtualenv @@ -463,11 +409,9 @@ pluggy==1.0.0 # via pytest portalocker==2.4.0 # via msal-extensions -pre-commit==2.17.0 +pre-commit==2.18.1 # via feast (setup.py) -prometheus-client==0.13.1 - # via notebook -prompt-toolkit==3.0.28 +prompt-toolkit==3.0.29 # via ipython proto-plus==1.19.6 # via @@ -491,9 +435,7 @@ protobuf==3.19.4 psutil==5.9.0 # via feast (setup.py) ptyprocess==0.7.0 - # via - # pexpect - # terminado + # via pexpect pure-eval==0.2.2 # via stack-data py==1.11.0 @@ -514,7 +456,7 @@ pyasn1==0.4.8 # rsa pyasn1-modules==0.2.8 # via google-auth -pybindgen==0.22.0 +pybindgen==0.22.1 # via feast (setup.py) pycodestyle==2.8.0 # via flake8 @@ -531,8 +473,6 @@ pyflakes==2.4.0 pygments==2.11.2 # via # ipython - # jupyterlab-pygments - # nbconvert # sphinx pyjwt[crypto]==2.3.0 # via @@ -550,7 +490,7 @@ pyrsistent==0.18.1 # via jsonschema pyspark==3.2.1 # via feast (setup.py) -pytest==7.0.1 +pytest==7.1.1 # via # feast (setup.py) # pytest-benchmark @@ -583,12 +523,11 @@ python-dateutil==2.8.2 # botocore # google-cloud-bigquery # great-expectations - # jupyter-client # moto # pandas -python-dotenv==0.19.2 +python-dotenv==0.20.0 # via uvicorn -pytz==2021.3 +pytz==2022.1 # via # babel # google-api-core @@ -596,6 +535,7 @@ pytz==2021.3 # moto # pandas # snowflake-connector-python + # trino pytz-deprecation-shim==0.1.0.post0 # via tzlocal pyyaml==6.0 @@ -604,17 +544,9 @@ pyyaml==6.0 # feast (setup.py) # pre-commit # uvicorn -pyzmq==22.3.0 - # via - # jupyter-client - # notebook -redis==3.5.3 - # via - # feast (setup.py) - # redis-py-cluster -redis-py-cluster==2.1.3 +redis==4.2.2 # via feast (setup.py) -regex==2022.3.2 +regex==2022.3.15 # via black requests==2.27.1 # via @@ -636,39 +568,32 @@ requests==2.27.1 # responses # snowflake-connector-python # sphinx + # trino requests-oauthlib==1.3.1 # via # google-auth-oauthlib # msrest -responses==0.18.0 +responses==0.20.0 # via moto rsa==4.8 # via google-auth -ruamel.yaml==0.17.17 +ruamel-yaml==0.17.17 # via great-expectations -ruamel.yaml.clib==0.2.6 - # via ruamel.yaml s3transfer==0.5.2 # via boto3 scipy==1.8.0 # via great-expectations -send2trash==1.8.0 - # via notebook six==1.16.0 # via # absl-py - # asttokens # azure-core # azure-identity - # bleach - # cryptography # google-api-core # google-auth # google-auth-httplib2 # google-cloud-core # google-resumable-media # grpcio - # isodate # mock # msrestazure # pandavro @@ -679,7 +604,7 @@ sniffio==1.2.0 # via anyio snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==2.7.4 +snowflake-connector-python[pandas]==2.7.6 # via feast (setup.py) sphinx==4.3.2 # via @@ -711,12 +636,8 @@ tensorflow-metadata==1.7.0 # via feast (setup.py) termcolor==1.1.0 # via great-expectations -terminado==0.13.2 - # via notebook -testcontainers==3.4.2 +testcontainers==3.5.3 # via feast (setup.py) -testpath==0.6.0 - # via nbconvert toml==0.10.2 # via # black @@ -733,49 +654,39 @@ toolz==0.11.2 # altair # dask # partd -tornado==6.1 - # via - # ipykernel - # jupyter-client - # notebook - # terminado -tqdm==4.63.0 +tqdm==4.64.0 # via # feast (setup.py) # great-expectations traitlets==5.1.1 # via - # ipykernel # ipython - # ipywidgets - # jupyter-client # jupyter-core # matplotlib-inline - # nbclient - # nbconvert # nbformat - # notebook +trino==0.312.0 + # via feast (setup.py) typed-ast==1.5.2 # via black -types-protobuf==3.19.12 +types-protobuf==3.19.15 # via # feast (setup.py) # mypy-protobuf -types-python-dateutil==2.8.9 +types-python-dateutil==2.8.10 # via feast (setup.py) -types-pytz==2021.3.5 +types-pytz==2021.3.6 # via feast (setup.py) -types-pyyaml==6.0.4 +types-pyyaml==6.0.5 # via feast (setup.py) -types-redis==4.1.17 +types-redis==4.1.19 # via feast (setup.py) -types-requests==2.27.11 +types-requests==2.27.16 # via feast (setup.py) -types-setuptools==57.4.9 +types-setuptools==57.4.12 # via feast (setup.py) -types-tabulate==0.8.5 +types-tabulate==0.8.6 # via feast (setup.py) -types-urllib3==1.26.10 +types-urllib3==1.26.11 # via types-requests typing-extensions==4.1.1 # via @@ -783,48 +694,47 @@ typing-extensions==4.1.1 # great-expectations # mypy # pydantic -tzdata==2021.5 +tzdata==2022.1 # via pytz-deprecation-shim -tzlocal==4.1 +tzlocal==4.2 # via great-expectations uritemplate==4.1.1 # via google-api-python-client -urllib3==1.26.8 +urllib3==1.26.9 # via # botocore # feast (setup.py) + # great-expectations # minio # requests # responses -uvicorn[standard]==0.17.5 +uvicorn[standard]==0.17.6 # via feast (setup.py) uvloop==0.16.0 # via uvicorn -virtualenv==20.13.2 +virtualenv==20.14.1 # via pre-commit -watchgod==0.7 +watchgod==0.8.2 # via uvicorn wcwidth==0.2.5 # via prompt-toolkit -webencodings==0.5.1 - # via bleach -websocket-client==1.3.1 +websocket-client==1.3.2 # via docker websockets==10.2 # via uvicorn -werkzeug==2.0.3 +werkzeug==2.1.1 # via moto wheel==0.37.1 # via pip-tools -widgetsnbextension==3.5.2 - # via ipywidgets -wrapt==1.13.3 - # via testcontainers +wrapt==1.14.0 + # via + # deprecated + # testcontainers xmltodict==0.12.0 # via moto yarl==1.7.2 # via aiohttp -zipp==3.7.0 +zipp==3.8.0 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index e21a4ba412..455d80b790 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -7,18 +7,20 @@ absl-py==1.0.0 # via tensorflow-metadata anyio==3.5.0 - # via starlette -asgiref==3.4.1 + # via + # starlette + # watchgod +asgiref==3.5.0 # via uvicorn attrs==21.4.0 # via jsonschema -cachetools==4.2.4 +cachetools==5.0.0 # via google-auth certifi==2021.10.8 # via requests -charset-normalizer==2.0.10 +charset-normalizer==2.0.12 # via requests -click==8.0.3 +click==8.1.2 # via # feast (setup.py) # uvicorn @@ -30,55 +32,55 @@ dask==2022.1.1 # via feast (setup.py) dill==0.3.4 # via feast (setup.py) -fastapi==0.72.0 +fastapi==0.75.1 # via feast (setup.py) -fastavro==1.4.9 +fastavro==1.4.10 # via # feast (setup.py) # pandavro -fsspec==2022.2.0 +fsspec==2022.3.0 # via dask -google-api-core==2.4.0 +google-api-core==2.7.2 # via feast (setup.py) -google-auth==2.3.3 +google-auth==2.6.5 # via google-api-core googleapis-common-protos==1.52.0 # via # feast (setup.py) # google-api-core # tensorflow-metadata -grpcio==1.43.0 +grpcio==1.44.0 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.43.0 +grpcio-reflection==1.44.0 # via feast (setup.py) h11==0.13.0 # via uvicorn -httptools==0.3.0 +httptools==0.4.0 # via uvicorn idna==3.3 # via # anyio # requests -jinja2==3.0.3 +jinja2==3.1.1 # via feast (setup.py) jsonschema==4.4.0 # via feast (setup.py) locket==0.2.1 # via partd -markupsafe==2.0.1 +markupsafe==2.1.1 # via jinja2 mmh3==3.0.0 # via feast (setup.py) -numpy==1.21.5 +numpy==1.22.3 # via # pandas # pandavro # pyarrow packaging==21.3 # via dask -pandas==1.3.5 +pandas==1.4.2 # via # feast (setup.py) # pandavro @@ -88,7 +90,7 @@ partd==1.2.0 # via dask proto-plus==1.19.6 # via feast (setup.py) -protobuf==3.19.3 +protobuf==3.19.4 # via # feast (setup.py) # google-api-core @@ -96,7 +98,7 @@ protobuf==3.19.3 # grpcio-reflection # proto-plus # tensorflow-metadata -pyarrow==6.0.1 +pyarrow==7.0.0 # via feast (setup.py) pyasn1==0.4.8 # via @@ -108,15 +110,15 @@ pydantic==1.9.0 # via # fastapi # feast (setup.py) -pyparsing==3.0.7 +pyparsing==3.0.8 # via packaging pyrsistent==0.18.1 # via jsonschema python-dateutil==2.8.2 # via pandas -python-dotenv==0.19.2 +python-dotenv==0.20.0 # via uvicorn -pytz==2021.3 +pytz==2022.1 # via pandas pyyaml==6.0 # via @@ -142,7 +144,7 @@ tabulate==0.8.9 # via feast (setup.py) tenacity==8.0.1 # via feast (setup.py) -tensorflow-metadata==1.6.0 +tensorflow-metadata==1.7.0 # via feast (setup.py) toml==0.10.2 # via feast (setup.py) @@ -150,20 +152,17 @@ toolz==0.11.2 # via # dask # partd -tqdm==4.62.3 +tqdm==4.64.0 # via feast (setup.py) -typing-extensions==4.0.1 +typing-extensions==4.1.1 # via pydantic -urllib3==1.26.8 +urllib3==1.26.9 # via requests -uvicorn[standard]==0.17.0 +uvicorn[standard]==0.17.6 # via feast (setup.py) uvloop==0.16.0 # via uvicorn -watchgod==0.7 +watchgod==0.8.2 # via uvicorn -websockets==10.1 +websockets==10.2 # via uvicorn - -# The following packages are considered to be unsafe in a requirements file: -# setuptools diff --git a/sdk/python/requirements/py3.7-ci-requirements.txt b/sdk/python/requirements/py3.7-ci-requirements.txt index 4ec4bde7c4..224840a6f7 100644 --- a/sdk/python/requirements/py3.7-ci-requirements.txt +++ b/sdk/python/requirements/py3.7-ci-requirements.txt @@ -43,7 +43,9 @@ asn1crypto==1.4.0 assertpy==1.1 # via feast (setup.py) async-timeout==4.0.2 - # via aiohttp + # via + # aiohttp + # redis asynctest==0.13.0 # via aiohttp attrs==21.4.0 @@ -122,7 +124,7 @@ colorama==0.4.4 # via feast (setup.py) coverage[toml]==6.3.2 # via pytest-cov -cryptography==3.3.2 +cryptography==3.4.8 # via # adal # azure-identity @@ -143,6 +145,8 @@ decorator==5.1.1 # ipython defusedxml==0.7.1 # via nbconvert +deprecated==1.2.13 + # via redis deprecation==2.1.0 # via testcontainers dill==0.3.4 @@ -290,6 +294,7 @@ importlib-metadata==4.2.0 # pluggy # pre-commit # pytest + # redis # virtualenv importlib-resources==5.4.0 # via jsonschema @@ -443,6 +448,7 @@ packaging==21.3 # google-cloud-bigquery # google-cloud-firestore # pytest + # redis # sphinx pandas==1.3.5 # via @@ -608,6 +614,7 @@ pytz==2021.3 # moto # pandas # snowflake-connector-python + # trino pytz-deprecation-shim==0.1.0.post0 # via tzlocal pyyaml==6.0 @@ -620,11 +627,7 @@ pyzmq==22.3.0 # via # jupyter-client # notebook -redis==3.5.3 - # via - # feast (setup.py) - # redis-py-cluster -redis-py-cluster==2.1.3 +redis==4.2.2 # via feast (setup.py) regex==2022.3.2 # via black @@ -648,6 +651,7 @@ requests==2.27.1 # responses # snowflake-connector-python # sphinx + # trino requests-oauthlib==1.3.1 # via # google-auth-oauthlib @@ -672,7 +676,6 @@ six==1.16.0 # azure-core # azure-identity # bleach - # cryptography # google-api-core # google-auth # google-auth-httplib2 @@ -722,7 +725,7 @@ termcolor==1.1.0 # via great-expectations terminado==0.13.2 # via notebook -testcontainers==3.4.2 +testcontainers==3.5.3 # via feast (setup.py) testpath==0.6.0 # via nbconvert @@ -764,6 +767,8 @@ traitlets==5.1.1 # nbconvert # nbformat # notebook +trino==0.312.0 + # via feast (setup.py) typed-ast==1.5.2 # via # black @@ -802,6 +807,7 @@ typing-extensions==4.1.1 # jsonschema # mypy # pydantic + # redis # starlette # uvicorn # yarl @@ -841,7 +847,9 @@ wheel==0.37.1 widgetsnbextension==3.5.2 # via ipywidgets wrapt==1.13.3 - # via testcontainers + # via + # deprecated + # testcontainers xmltodict==0.12.0 # via moto yarl==1.7.2 diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index 5e2da9baa7..34032ef7d2 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -23,20 +23,16 @@ alabaster==0.7.12 altair==4.2.0 # via great-expectations anyio==3.5.0 - # via starlette + # via + # starlette + # watchgod appdirs==1.4.4 # via black -appnope==0.1.2 - # via - # ipykernel - # ipython -argon2-cffi==21.3.0 - # via notebook -argon2-cffi-bindings==21.2.0 - # via argon2-cffi +appnope==0.1.3 + # via ipython asgiref==3.5.0 # via uvicorn -asn1crypto==1.4.0 +asn1crypto==1.5.1 # via # oscrypto # snowflake-connector-python @@ -45,7 +41,9 @@ assertpy==1.1 asttokens==2.0.5 # via stack-data async-timeout==4.0.2 - # via aiohttp + # via + # aiohttp + # redis attrs==21.4.0 # via # aiohttp @@ -54,16 +52,16 @@ attrs==21.4.0 # pytest avro==1.10.0 # via feast (setup.py) -azure-core==1.23.0 +azure-core==1.23.1 # via # adlfs # azure-identity # azure-storage-blob azure-datalake-store==0.0.52 # via adlfs -azure-identity==1.8.0 +azure-identity==1.9.0 # via adlfs -azure-storage-blob==12.9.0 +azure-storage-blob==12.11.0 # via adlfs babel==2.9.1 # via sphinx @@ -75,13 +73,11 @@ backports-zoneinfo==0.2.1 # tzlocal black==19.10b0 # via feast (setup.py) -bleach==4.1.0 - # via nbconvert -boto3==1.21.11 +boto3==1.21.41 # via # feast (setup.py) # moto -botocore==1.24.11 +botocore==1.24.41 # via # boto3 # moto @@ -98,7 +94,6 @@ certifi==2021.10.8 # snowflake-connector-python cffi==1.15.0 # via - # argon2-cffi-bindings # azure-datalake-store # cryptography # snowflake-connector-python @@ -109,7 +104,7 @@ charset-normalizer==2.0.12 # aiohttp # requests # snowflake-connector-python -click==8.0.4 +click==8.1.2 # via # black # feast (setup.py) @@ -119,30 +114,32 @@ click==8.0.4 cloudpickle==2.0.0 # via dask colorama==0.4.4 - # via feast (setup.py) + # via + # feast (setup.py) + # great-expectations coverage[toml]==6.3.2 # via pytest-cov -cryptography==3.3.2 +cryptography==3.4.8 # via # adal # azure-identity # azure-storage-blob # feast (setup.py) + # great-expectations # moto # msal - # pyjwt # pyopenssl # snowflake-connector-python dask==2022.1.1 # via feast (setup.py) -debugpy==1.5.1 - # via ipykernel +dataclasses==0.6 + # via great-expectations decorator==5.1.1 # via # gcsfs # ipython -defusedxml==0.7.1 - # via nbconvert +deprecated==1.2.13 + # via redis deprecation==2.1.0 # via testcontainers dill==0.3.4 @@ -158,20 +155,19 @@ docutils==0.17.1 # sphinx # sphinx-rtd-theme entrypoints==0.4 - # via - # altair - # jupyter-client - # nbconvert + # via altair execnet==1.9.0 # via pytest-xdist executing==0.8.3 # via stack-data -fastapi==0.74.1 +fastapi==0.75.1 # via feast (setup.py) -fastavro==1.4.9 +fastavro==1.4.10 # via # feast (setup.py) # pandavro +fastjsonschema==2.15.3 + # via nbformat filelock==3.6.0 # via virtualenv firebase-admin==4.5.2 @@ -182,12 +178,12 @@ frozenlist==1.3.0 # via # aiohttp # aiosignal -fsspec==2022.2.0 +fsspec==2022.3.0 # via # adlfs # dask # gcsfs -gcsfs==2022.2.0 +gcsfs==2022.3.0 # via feast (setup.py) google-api-core[grpc]==1.31.5 # via @@ -199,7 +195,7 @@ google-api-core[grpc]==1.31.5 # google-cloud-core # google-cloud-datastore # google-cloud-firestore -google-api-python-client==2.39.0 +google-api-python-client==2.44.0 # via firebase-admin google-auth==1.35.0 # via @@ -212,11 +208,11 @@ google-auth==1.35.0 # google-cloud-storage google-auth-httplib2==0.1.0 # via google-api-python-client -google-auth-oauthlib==0.5.0 +google-auth-oauthlib==0.5.1 # via gcsfs -google-cloud-bigquery==2.34.1 +google-cloud-bigquery==2.34.3 # via feast (setup.py) -google-cloud-bigquery-storage==2.12.0 +google-cloud-bigquery-storage==2.13.1 # via feast (setup.py) google-cloud-core==1.7.2 # via @@ -225,9 +221,9 @@ google-cloud-core==1.7.2 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-cloud-datastore==2.5.0 +google-cloud-datastore==2.5.1 # via feast (setup.py) -google-cloud-firestore==2.3.4 +google-cloud-firestore==2.4.0 # via firebase-admin google-cloud-storage==1.40.0 # via @@ -245,7 +241,7 @@ googleapis-common-protos==1.52.0 # feast (setup.py) # google-api-core # tensorflow-metadata -great-expectations==0.14.8 +great-expectations==0.14.13 # via feast (setup.py) grpcio==1.44.0 # via @@ -269,9 +265,9 @@ httplib2==0.20.4 # via # google-api-python-client # google-auth-httplib2 -httptools==0.3.0 +httptools==0.4.0 # via uvicorn -identify==2.4.11 +identify==2.4.12 # via pre-commit idna==3.3 # via @@ -281,26 +277,13 @@ idna==3.3 # yarl imagesize==1.3.0 # via sphinx -importlib-metadata==4.11.2 +importlib-metadata==4.11.3 # via great-expectations -importlib-resources==5.4.0 +importlib-resources==5.7.0 # via jsonschema iniconfig==1.1.1 # via pytest -ipykernel==6.9.1 - # via - # ipywidgets - # notebook -ipython==8.1.1 - # via - # ipykernel - # ipywidgets -ipython-genutils==0.2.0 - # via - # ipywidgets - # nbformat - # notebook -ipywidgets==7.6.5 +ipython==8.2.0 # via great-expectations isodate==0.6.1 # via msrest @@ -314,16 +297,14 @@ jinja2==3.0.3 # feast (setup.py) # great-expectations # moto - # nbconvert - # notebook # sphinx -jmespath==0.10.0 +jmespath==1.0.0 # via # boto3 # botocore jsonpatch==1.32 # via great-expectations -jsonpointer==2.2 +jsonpointer==2.3 # via jsonpatch jsonschema==4.4.0 # via @@ -331,44 +312,27 @@ jsonschema==4.4.0 # feast (setup.py) # great-expectations # nbformat -jupyter-client==7.1.2 - # via - # ipykernel - # nbclient - # notebook jupyter-core==4.9.2 - # via - # jupyter-client - # nbconvert - # nbformat - # notebook -jupyterlab-pygments==0.1.2 - # via nbconvert -jupyterlab-widgets==1.0.2 - # via ipywidgets + # via nbformat locket==0.2.1 # via partd -markupsafe==2.1.0 +markupsafe==2.1.1 # via # jinja2 # moto matplotlib-inline==0.1.3 - # via - # ipykernel - # ipython + # via ipython mccabe==0.6.1 # via flake8 minio==7.1.0 # via feast (setup.py) -mistune==0.8.4 - # via - # great-expectations - # nbconvert +mistune==2.0.2 + # via great-expectations mmh3==3.0.0 # via feast (setup.py) mock==2.0.0 # via feast (setup.py) -moto==3.0.5 +moto==3.1.4 # via feast (setup.py) msal==1.17.0 # via @@ -392,29 +356,13 @@ mypy==0.931 # via feast (setup.py) mypy-extensions==0.4.3 # via mypy -mypy-protobuf==3.1.0 - # via feast (setup.py) -nbclient==0.5.11 - # via nbconvert -nbconvert==6.4.2 - # via notebook -nbformat==5.1.3 - # via - # ipywidgets - # nbclient - # nbconvert - # notebook -nest-asyncio==1.5.4 - # via - # ipykernel - # jupyter-client - # nbclient - # notebook +mypy-protobuf==3.1 + # via feast (setup.py) +nbformat==5.3.0 + # via great-expectations nodeenv==1.6.0 # via pre-commit -notebook==6.4.10 - # via widgetsnbextension -numpy==1.22.2 +numpy==1.22.3 # via # altair # great-expectations @@ -424,17 +372,17 @@ numpy==1.22.2 # scipy oauthlib==3.2.0 # via requests-oauthlib -oscrypto==1.2.1 +oscrypto==1.3.0 # via snowflake-connector-python packaging==21.3 # via - # bleach # dask # deprecation # google-api-core # google-cloud-bigquery - # google-cloud-firestore + # great-expectations # pytest + # redis # sphinx pandas==1.3.5 # via @@ -445,8 +393,6 @@ pandas==1.3.5 # snowflake-connector-python pandavro==1.5.2 # via feast (setup.py) -pandocfilters==1.5.0 - # via nbconvert parso==0.8.3 # via jedi partd==1.2.0 @@ -461,7 +407,7 @@ pexpect==4.8.0 # via ipython pickleshare==0.7.5 # via ipython -pip-tools==6.5.1 +pip-tools==6.6.0 # via feast (setup.py) platformdirs==2.5.1 # via virtualenv @@ -469,11 +415,9 @@ pluggy==1.0.0 # via pytest portalocker==2.4.0 # via msal-extensions -pre-commit==2.17.0 +pre-commit==2.18.1 # via feast (setup.py) -prometheus-client==0.13.1 - # via notebook -prompt-toolkit==3.0.28 +prompt-toolkit==3.0.29 # via ipython proto-plus==1.19.6 # via @@ -497,9 +441,7 @@ protobuf==3.19.4 psutil==5.9.0 # via feast (setup.py) ptyprocess==0.7.0 - # via - # pexpect - # terminado + # via pexpect pure-eval==0.2.2 # via stack-data py==1.11.0 @@ -520,7 +462,7 @@ pyasn1==0.4.8 # rsa pyasn1-modules==0.2.8 # via google-auth -pybindgen==0.22.0 +pybindgen==0.22.1 # via feast (setup.py) pycodestyle==2.8.0 # via flake8 @@ -537,8 +479,6 @@ pyflakes==2.4.0 pygments==2.11.2 # via # ipython - # jupyterlab-pygments - # nbconvert # sphinx pyjwt[crypto]==2.3.0 # via @@ -556,7 +496,7 @@ pyrsistent==0.18.1 # via jsonschema pyspark==3.2.1 # via feast (setup.py) -pytest==7.0.1 +pytest==7.1.1 # via # feast (setup.py) # pytest-benchmark @@ -589,12 +529,11 @@ python-dateutil==2.8.2 # botocore # google-cloud-bigquery # great-expectations - # jupyter-client # moto # pandas -python-dotenv==0.19.2 +python-dotenv==0.20.0 # via uvicorn -pytz==2021.3 +pytz==2022.1 # via # babel # google-api-core @@ -602,6 +541,7 @@ pytz==2021.3 # moto # pandas # snowflake-connector-python + # trino pytz-deprecation-shim==0.1.0.post0 # via tzlocal pyyaml==6.0 @@ -610,17 +550,9 @@ pyyaml==6.0 # feast (setup.py) # pre-commit # uvicorn -pyzmq==22.3.0 - # via - # jupyter-client - # notebook -redis==3.5.3 - # via - # feast (setup.py) - # redis-py-cluster -redis-py-cluster==2.1.3 +redis==4.2.2 # via feast (setup.py) -regex==2022.3.2 +regex==2022.3.15 # via black requests==2.27.1 # via @@ -642,39 +574,34 @@ requests==2.27.1 # responses # snowflake-connector-python # sphinx + # trino requests-oauthlib==1.3.1 # via # google-auth-oauthlib # msrest -responses==0.18.0 +responses==0.20.0 # via moto rsa==4.8 # via google-auth ruamel-yaml==0.17.17 # via great-expectations -ruamel.yaml.clib==0.2.6 - # via ruamel.yaml +ruamel-yaml-clib==0.2.6 + # via ruamel-yaml s3transfer==0.5.2 # via boto3 scipy==1.8.0 # via great-expectations -send2trash==1.8.0 - # via notebook six==1.16.0 # via # absl-py - # asttokens # azure-core # azure-identity - # bleach - # cryptography # google-api-core # google-auth # google-auth-httplib2 # google-cloud-core # google-resumable-media # grpcio - # isodate # mock # msrestazure # pandavro @@ -685,7 +612,7 @@ sniffio==1.2.0 # via anyio snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==2.7.4 +snowflake-connector-python[pandas]==2.7.6 # via feast (setup.py) sphinx==4.3.2 # via @@ -717,12 +644,8 @@ tensorflow-metadata==1.7.0 # via feast (setup.py) termcolor==1.1.0 # via great-expectations -terminado==0.13.2 - # via notebook -testcontainers==3.4.2 +testcontainers==3.5.3 # via feast (setup.py) -testpath==0.6.0 - # via nbconvert toml==0.10.2 # via # black @@ -739,49 +662,39 @@ toolz==0.11.2 # altair # dask # partd -tornado==6.1 - # via - # ipykernel - # jupyter-client - # notebook - # terminado -tqdm==4.63.0 +tqdm==4.64.0 # via # feast (setup.py) # great-expectations traitlets==5.1.1 # via - # ipykernel # ipython - # ipywidgets - # jupyter-client # jupyter-core # matplotlib-inline - # nbclient - # nbconvert # nbformat - # notebook +trino==0.312.0 + # via feast (setup.py) typed-ast==1.5.2 # via black -types-protobuf==3.19.12 +types-protobuf==3.19.15 # via # feast (setup.py) # mypy-protobuf -types-python-dateutil==2.8.9 +types-python-dateutil==2.8.10 # via feast (setup.py) -types-pytz==2021.3.5 +types-pytz==2021.3.6 # via feast (setup.py) -types-pyyaml==6.0.4 +types-pyyaml==6.0.5 # via feast (setup.py) -types-redis==4.1.17 +types-redis==4.1.19 # via feast (setup.py) -types-requests==2.27.11 +types-requests==2.27.16 # via feast (setup.py) -types-setuptools==57.4.9 +types-setuptools==57.4.12 # via feast (setup.py) -types-tabulate==0.8.5 +types-tabulate==0.8.6 # via feast (setup.py) -types-urllib3==1.26.10 +types-urllib3==1.26.11 # via types-requests typing-extensions==4.1.1 # via @@ -789,52 +702,51 @@ typing-extensions==4.1.1 # great-expectations # mypy # pydantic -tzdata==2021.5 +tzdata==2022.1 # via pytz-deprecation-shim -tzlocal==4.1 +tzlocal==4.2 # via great-expectations uritemplate==4.1.1 # via google-api-python-client -urllib3==1.26.8 +urllib3==1.26.9 # via # botocore # feast (setup.py) + # great-expectations # minio # requests # responses -uvicorn[standard]==0.17.5 +uvicorn[standard]==0.17.6 # via feast (setup.py) uvloop==0.16.0 # via uvicorn -virtualenv==20.13.2 +virtualenv==20.14.1 # via pre-commit -watchgod==0.7 +watchgod==0.8.2 # via uvicorn wcwidth==0.2.5 # via prompt-toolkit -webencodings==0.5.1 - # via bleach -websocket-client==1.3.1 +websocket-client==1.3.2 # via docker websockets==10.2 # via uvicorn -werkzeug==2.0.3 +werkzeug==2.1.1 # via moto wheel==0.37.1 # via pip-tools -widgetsnbextension==3.5.2 - # via ipywidgets -wrapt==1.13.3 - # via testcontainers +wrapt==1.14.0 + # via + # deprecated + # testcontainers xmltodict==0.12.0 # via moto yarl==1.7.2 # via aiohttp -zipp==3.7.0 +zipp==3.8.0 # via # importlib-metadata # importlib-resources # The following packages are considered to be unsafe in a requirements file: # pip -# setuptools \ No newline at end of file +# setuptools diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index 4b996ef075..9000c7b1f7 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -7,18 +7,20 @@ absl-py==1.0.0 # via tensorflow-metadata anyio==3.5.0 - # via starlette -asgiref==3.4.1 + # via + # starlette + # watchgod +asgiref==3.5.0 # via uvicorn attrs==21.4.0 # via jsonschema -cachetools==4.2.4 +cachetools==5.0.0 # via google-auth certifi==2021.10.8 # via requests -charset-normalizer==2.0.10 +charset-normalizer==2.0.12 # via requests -click==8.0.3 +click==8.1.2 # via # feast (setup.py) # uvicorn @@ -30,57 +32,57 @@ dask==2022.1.1 # via feast (setup.py) dill==0.3.4 # via feast (setup.py) -fastapi==0.72.0 +fastapi==0.75.1 # via feast (setup.py) -fastavro==1.4.9 +fastavro==1.4.10 # via # feast (setup.py) # pandavro -fsspec==2022.2.0 +fsspec==2022.3.0 # via dask -google-api-core==2.4.0 +google-api-core==2.7.2 # via feast (setup.py) -google-auth==2.3.3 +google-auth==2.6.5 # via google-api-core googleapis-common-protos==1.52.0 # via # feast (setup.py) # google-api-core # tensorflow-metadata -grpcio==1.43.0 +grpcio==1.44.0 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.43.0 +grpcio-reflection==1.44.0 # via feast (setup.py) h11==0.13.0 # via uvicorn -httptools==0.3.0 +httptools==0.4.0 # via uvicorn idna==3.3 # via # anyio # requests -importlib-resources==5.4.0 +importlib-resources==5.7.0 # via jsonschema -jinja2==3.0.3 +jinja2==3.1.1 # via feast (setup.py) jsonschema==4.4.0 # via feast (setup.py) locket==0.2.1 # via partd -markupsafe==2.0.1 +markupsafe==2.1.1 # via jinja2 mmh3==3.0.0 # via feast (setup.py) -numpy==1.21.5 +numpy==1.22.3 # via # pandas # pandavro # pyarrow packaging==21.3 # via dask -pandas==1.3.5 +pandas==1.4.2 # via # feast (setup.py) # pandavro @@ -90,7 +92,7 @@ partd==1.2.0 # via dask proto-plus==1.19.6 # via feast (setup.py) -protobuf==3.19.3 +protobuf==3.19.4 # via # feast (setup.py) # google-api-core @@ -98,7 +100,7 @@ protobuf==3.19.3 # grpcio-reflection # proto-plus # tensorflow-metadata -pyarrow==6.0.1 +pyarrow==7.0.0 # via feast (setup.py) pyasn1==0.4.8 # via @@ -110,15 +112,15 @@ pydantic==1.9.0 # via # fastapi # feast (setup.py) -pyparsing==3.0.7 +pyparsing==3.0.8 # via packaging pyrsistent==0.18.1 # via jsonschema python-dateutil==2.8.2 # via pandas -python-dotenv==0.19.2 +python-dotenv==0.20.0 # via uvicorn -pytz==2021.3 +pytz==2022.1 # via pandas pyyaml==6.0 # via @@ -144,7 +146,7 @@ tabulate==0.8.9 # via feast (setup.py) tenacity==8.0.1 # via feast (setup.py) -tensorflow-metadata==1.6.0 +tensorflow-metadata==1.7.0 # via feast (setup.py) toml==0.10.2 # via feast (setup.py) @@ -152,22 +154,19 @@ toolz==0.11.2 # via # dask # partd -tqdm==4.62.3 +tqdm==4.64.0 # via feast (setup.py) -typing-extensions==4.0.1 +typing-extensions==4.1.1 # via pydantic -urllib3==1.26.8 +urllib3==1.26.9 # via requests -uvicorn[standard]==0.17.0 +uvicorn[standard]==0.17.6 # via feast (setup.py) uvloop==0.16.0 # via uvicorn -watchgod==0.7 +watchgod==0.8.2 # via uvicorn -websockets==10.1 +websockets==10.2 # via uvicorn -zipp==3.7.0 +zipp==3.8.0 # via importlib-resources - -# The following packages are considered to be unsafe in a requirements file: -# setuptools diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index cf228b9412..1ab910a16d 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -23,20 +23,16 @@ alabaster==0.7.12 altair==4.2.0 # via great-expectations anyio==3.5.0 - # via starlette + # via + # starlette + # watchgod appdirs==1.4.4 # via black -appnope==0.1.2 - # via - # ipykernel - # ipython -argon2-cffi==21.3.0 - # via notebook -argon2-cffi-bindings==21.2.0 - # via argon2-cffi +appnope==0.1.3 + # via ipython asgiref==3.5.0 # via uvicorn -asn1crypto==1.4.0 +asn1crypto==1.5.1 # via # oscrypto # snowflake-connector-python @@ -45,7 +41,9 @@ assertpy==1.1 asttokens==2.0.5 # via stack-data async-timeout==4.0.2 - # via aiohttp + # via + # aiohttp + # redis attrs==21.4.0 # via # aiohttp @@ -54,16 +52,16 @@ attrs==21.4.0 # pytest avro==1.10.0 # via feast (setup.py) -azure-core==1.23.0 +azure-core==1.23.1 # via # adlfs # azure-identity # azure-storage-blob azure-datalake-store==0.0.52 # via adlfs -azure-identity==1.8.0 +azure-identity==1.9.0 # via adlfs -azure-storage-blob==12.9.0 +azure-storage-blob==12.11.0 # via adlfs babel==2.9.1 # via sphinx @@ -71,13 +69,11 @@ backcall==0.2.0 # via ipython black==19.10b0 # via feast (setup.py) -bleach==4.1.0 - # via nbconvert -boto3==1.21.11 +boto3==1.21.41 # via # feast (setup.py) # moto -botocore==1.24.11 +botocore==1.24.41 # via # boto3 # moto @@ -94,7 +90,6 @@ certifi==2021.10.8 # snowflake-connector-python cffi==1.15.0 # via - # argon2-cffi-bindings # azure-datalake-store # cryptography # snowflake-connector-python @@ -105,7 +100,7 @@ charset-normalizer==2.0.12 # aiohttp # requests # snowflake-connector-python -click==8.0.4 +click==8.1.2 # via # black # feast (setup.py) @@ -115,30 +110,32 @@ click==8.0.4 cloudpickle==2.0.0 # via dask colorama==0.4.4 - # via feast (setup.py) + # via + # feast (setup.py) + # great-expectations coverage[toml]==6.3.2 # via pytest-cov -cryptography==3.3.2 +cryptography==3.4.8 # via # adal # azure-identity # azure-storage-blob # feast (setup.py) + # great-expectations # moto # msal - # pyjwt # pyopenssl # snowflake-connector-python dask==2022.1.1 # via feast (setup.py) -debugpy==1.5.1 - # via ipykernel +dataclasses==0.6 + # via great-expectations decorator==5.1.1 # via # gcsfs # ipython -defusedxml==0.7.1 - # via nbconvert +deprecated==1.2.13 + # via redis deprecation==2.1.0 # via testcontainers dill==0.3.4 @@ -154,20 +151,19 @@ docutils==0.17.1 # sphinx # sphinx-rtd-theme entrypoints==0.4 - # via - # altair - # jupyter-client - # nbconvert + # via altair execnet==1.9.0 # via pytest-xdist executing==0.8.3 # via stack-data -fastapi==0.74.1 +fastapi==0.75.1 # via feast (setup.py) -fastavro==1.4.9 +fastavro==1.4.10 # via # feast (setup.py) # pandavro +fastjsonschema==2.15.3 + # via nbformat filelock==3.6.0 # via virtualenv firebase-admin==4.5.2 @@ -178,12 +174,12 @@ frozenlist==1.3.0 # via # aiohttp # aiosignal -fsspec==2022.2.0 +fsspec==2022.3.0 # via # adlfs # dask # gcsfs -gcsfs==2022.2.0 +gcsfs==2022.3.0 # via feast (setup.py) google-api-core[grpc]==1.31.5 # via @@ -195,7 +191,7 @@ google-api-core[grpc]==1.31.5 # google-cloud-core # google-cloud-datastore # google-cloud-firestore -google-api-python-client==2.39.0 +google-api-python-client==2.44.0 # via firebase-admin google-auth==1.35.0 # via @@ -208,11 +204,11 @@ google-auth==1.35.0 # google-cloud-storage google-auth-httplib2==0.1.0 # via google-api-python-client -google-auth-oauthlib==0.5.0 +google-auth-oauthlib==0.5.1 # via gcsfs -google-cloud-bigquery==2.34.1 +google-cloud-bigquery==2.34.3 # via feast (setup.py) -google-cloud-bigquery-storage==2.12.0 +google-cloud-bigquery-storage==2.13.1 # via feast (setup.py) google-cloud-core==1.7.2 # via @@ -221,9 +217,9 @@ google-cloud-core==1.7.2 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-cloud-datastore==2.5.0 +google-cloud-datastore==2.5.1 # via feast (setup.py) -google-cloud-firestore==2.3.4 +google-cloud-firestore==2.4.0 # via firebase-admin google-cloud-storage==1.40.0 # via @@ -241,7 +237,7 @@ googleapis-common-protos==1.52.0 # feast (setup.py) # google-api-core # tensorflow-metadata -great-expectations==0.14.8 +great-expectations==0.14.13 # via feast (setup.py) grpcio==1.44.0 # via @@ -265,9 +261,9 @@ httplib2==0.20.4 # via # google-api-python-client # google-auth-httplib2 -httptools==0.3.0 +httptools==0.4.0 # via uvicorn -identify==2.4.11 +identify==2.4.12 # via pre-commit idna==3.3 # via @@ -277,24 +273,11 @@ idna==3.3 # yarl imagesize==1.3.0 # via sphinx -importlib-metadata==4.11.2 +importlib-metadata==4.11.3 # via great-expectations iniconfig==1.1.1 # via pytest -ipykernel==6.9.1 - # via - # ipywidgets - # notebook -ipython==8.1.1 - # via - # ipykernel - # ipywidgets -ipython-genutils==0.2.0 - # via - # ipywidgets - # nbformat - # notebook -ipywidgets==7.6.5 +ipython==8.2.0 # via great-expectations isodate==0.6.1 # via msrest @@ -308,16 +291,14 @@ jinja2==3.0.3 # feast (setup.py) # great-expectations # moto - # nbconvert - # notebook # sphinx -jmespath==0.10.0 +jmespath==1.0.0 # via # boto3 # botocore jsonpatch==1.32 # via great-expectations -jsonpointer==2.2 +jsonpointer==2.3 # via jsonpatch jsonschema==4.4.0 # via @@ -325,44 +306,27 @@ jsonschema==4.4.0 # feast (setup.py) # great-expectations # nbformat -jupyter-client==7.1.2 - # via - # ipykernel - # nbclient - # notebook jupyter-core==4.9.2 - # via - # jupyter-client - # nbconvert - # nbformat - # notebook -jupyterlab-pygments==0.1.2 - # via nbconvert -jupyterlab-widgets==1.0.2 - # via ipywidgets + # via nbformat locket==0.2.1 # via partd -markupsafe==2.1.0 +markupsafe==2.1.1 # via # jinja2 # moto matplotlib-inline==0.1.3 - # via - # ipykernel - # ipython + # via ipython mccabe==0.6.1 # via flake8 minio==7.1.0 # via feast (setup.py) -mistune==0.8.4 - # via - # great-expectations - # nbconvert +mistune==2.0.2 + # via great-expectations mmh3==3.0.0 # via feast (setup.py) mock==2.0.0 # via feast (setup.py) -moto==3.0.5 +moto==3.1.4 # via feast (setup.py) msal==1.17.0 # via @@ -386,29 +350,13 @@ mypy==0.931 # via feast (setup.py) mypy-extensions==0.4.3 # via mypy -mypy-protobuf==3.1.0 - # via feast (setup.py) -nbclient==0.5.11 - # via nbconvert -nbconvert==6.4.2 - # via notebook -nbformat==5.1.3 - # via - # ipywidgets - # nbclient - # nbconvert - # notebook -nest-asyncio==1.5.4 - # via - # ipykernel - # jupyter-client - # nbclient - # notebook +mypy-protobuf==3.1 + # via feast (setup.py) +nbformat==5.3.0 + # via great-expectations nodeenv==1.6.0 # via pre-commit -notebook==6.4.10 - # via widgetsnbextension -numpy==1.22.2 +numpy==1.22.3 # via # altair # great-expectations @@ -418,17 +366,17 @@ numpy==1.22.2 # scipy oauthlib==3.2.0 # via requests-oauthlib -oscrypto==1.2.1 +oscrypto==1.3.0 # via snowflake-connector-python packaging==21.3 # via - # bleach # dask # deprecation # google-api-core # google-cloud-bigquery - # google-cloud-firestore + # great-expectations # pytest + # redis # sphinx pandas==1.3.5 # via @@ -439,8 +387,6 @@ pandas==1.3.5 # snowflake-connector-python pandavro==1.5.2 # via feast (setup.py) -pandocfilters==1.5.0 - # via nbconvert parso==0.8.3 # via jedi partd==1.2.0 @@ -455,7 +401,7 @@ pexpect==4.8.0 # via ipython pickleshare==0.7.5 # via ipython -pip-tools==6.5.1 +pip-tools==6.6.0 # via feast (setup.py) platformdirs==2.5.1 # via virtualenv @@ -463,11 +409,9 @@ pluggy==1.0.0 # via pytest portalocker==2.4.0 # via msal-extensions -pre-commit==2.17.0 +pre-commit==2.18.1 # via feast (setup.py) -prometheus-client==0.13.1 - # via notebook -prompt-toolkit==3.0.28 +prompt-toolkit==3.0.29 # via ipython proto-plus==1.19.6 # via @@ -491,9 +435,7 @@ protobuf==3.19.4 psutil==5.9.0 # via feast (setup.py) ptyprocess==0.7.0 - # via - # pexpect - # terminado + # via pexpect pure-eval==0.2.2 # via stack-data py==1.11.0 @@ -514,7 +456,7 @@ pyasn1==0.4.8 # rsa pyasn1-modules==0.2.8 # via google-auth -pybindgen==0.22.0 +pybindgen==0.22.1 # via feast (setup.py) pycodestyle==2.8.0 # via flake8 @@ -531,8 +473,6 @@ pyflakes==2.4.0 pygments==2.11.2 # via # ipython - # jupyterlab-pygments - # nbconvert # sphinx pyjwt[crypto]==2.3.0 # via @@ -550,7 +490,7 @@ pyrsistent==0.18.1 # via jsonschema pyspark==3.2.1 # via feast (setup.py) -pytest==7.0.1 +pytest==7.1.1 # via # feast (setup.py) # pytest-benchmark @@ -583,12 +523,11 @@ python-dateutil==2.8.2 # botocore # google-cloud-bigquery # great-expectations - # jupyter-client # moto # pandas -python-dotenv==0.19.2 +python-dotenv==0.20.0 # via uvicorn -pytz==2021.3 +pytz==2022.1 # via # babel # google-api-core @@ -596,6 +535,7 @@ pytz==2021.3 # moto # pandas # snowflake-connector-python + # trino pytz-deprecation-shim==0.1.0.post0 # via tzlocal pyyaml==6.0 @@ -604,17 +544,9 @@ pyyaml==6.0 # feast (setup.py) # pre-commit # uvicorn -pyzmq==22.3.0 - # via - # jupyter-client - # notebook -redis==3.5.3 - # via - # feast (setup.py) - # redis-py-cluster -redis-py-cluster==2.1.3 +redis==4.2.2 # via feast (setup.py) -regex==2022.3.2 +regex==2022.3.15 # via black requests==2.27.1 # via @@ -636,39 +568,34 @@ requests==2.27.1 # responses # snowflake-connector-python # sphinx + # trino requests-oauthlib==1.3.1 # via # google-auth-oauthlib # msrest -responses==0.18.0 +responses==0.20.0 # via moto rsa==4.8 # via google-auth -ruamel.yaml==0.17.17 +ruamel-yaml==0.17.17 # via great-expectations -ruamel.yaml.clib==0.2.6 - # via ruamel.yaml +ruamel-yaml-clib==0.2.6 + # via ruamel-yaml s3transfer==0.5.2 # via boto3 scipy==1.8.0 # via great-expectations -send2trash==1.8.0 - # via notebook six==1.16.0 # via # absl-py - # asttokens # azure-core # azure-identity - # bleach - # cryptography # google-api-core # google-auth # google-auth-httplib2 # google-cloud-core # google-resumable-media # grpcio - # isodate # mock # msrestazure # pandavro @@ -679,7 +606,7 @@ sniffio==1.2.0 # via anyio snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==2.7.4 +snowflake-connector-python[pandas]==2.7.6 # via feast (setup.py) sphinx==4.3.2 # via @@ -711,12 +638,8 @@ tensorflow-metadata==1.7.0 # via feast (setup.py) termcolor==1.1.0 # via great-expectations -terminado==0.13.2 - # via notebook -testcontainers==3.4.2 +testcontainers==3.5.3 # via feast (setup.py) -testpath==0.6.0 - # via nbconvert toml==0.10.2 # via # black @@ -733,49 +656,39 @@ toolz==0.11.2 # altair # dask # partd -tornado==6.1 - # via - # ipykernel - # jupyter-client - # notebook - # terminado -tqdm==4.63.0 +tqdm==4.64.0 # via # feast (setup.py) # great-expectations traitlets==5.1.1 # via - # ipykernel # ipython - # ipywidgets - # jupyter-client # jupyter-core # matplotlib-inline - # nbclient - # nbconvert # nbformat - # notebook +trino==0.312.0 + # via feast (setup.py) typed-ast==1.5.2 # via black -types-protobuf==3.19.12 +types-protobuf==3.19.15 # via # feast (setup.py) # mypy-protobuf -types-python-dateutil==2.8.9 +types-python-dateutil==2.8.10 # via feast (setup.py) -types-pytz==2021.3.5 +types-pytz==2021.3.6 # via feast (setup.py) -types-pyyaml==6.0.4 +types-pyyaml==6.0.5 # via feast (setup.py) -types-redis==4.1.17 +types-redis==4.1.19 # via feast (setup.py) -types-requests==2.27.11 +types-requests==2.27.16 # via feast (setup.py) -types-setuptools==57.4.9 +types-setuptools==57.4.12 # via feast (setup.py) -types-tabulate==0.8.5 +types-tabulate==0.8.6 # via feast (setup.py) -types-urllib3==1.26.10 +types-urllib3==1.26.11 # via types-requests typing-extensions==4.1.1 # via @@ -783,48 +696,47 @@ typing-extensions==4.1.1 # great-expectations # mypy # pydantic -tzdata==2021.5 +tzdata==2022.1 # via pytz-deprecation-shim -tzlocal==4.1 +tzlocal==4.2 # via great-expectations uritemplate==4.1.1 # via google-api-python-client -urllib3==1.26.8 +urllib3==1.26.9 # via # botocore # feast (setup.py) + # great-expectations # minio # requests # responses -uvicorn[standard]==0.17.5 +uvicorn[standard]==0.17.6 # via feast (setup.py) uvloop==0.16.0 # via uvicorn -virtualenv==20.13.2 +virtualenv==20.14.1 # via pre-commit -watchgod==0.7 +watchgod==0.8.2 # via uvicorn wcwidth==0.2.5 # via prompt-toolkit -webencodings==0.5.1 - # via bleach -websocket-client==1.3.1 +websocket-client==1.3.2 # via docker websockets==10.2 # via uvicorn -werkzeug==2.0.3 +werkzeug==2.1.1 # via moto wheel==0.37.1 # via pip-tools -widgetsnbextension==3.5.2 - # via ipywidgets -wrapt==1.13.3 - # via testcontainers +wrapt==1.14.0 + # via + # deprecated + # testcontainers xmltodict==0.12.0 # via moto yarl==1.7.2 # via aiohttp -zipp==3.7.0 +zipp==3.8.0 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 67ef8ada9e..6413886c5b 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -7,18 +7,20 @@ absl-py==1.0.0 # via tensorflow-metadata anyio==3.5.0 - # via starlette -asgiref==3.4.1 + # via + # starlette + # watchgod +asgiref==3.5.0 # via uvicorn attrs==21.4.0 # via jsonschema -cachetools==4.2.4 +cachetools==5.0.0 # via google-auth certifi==2021.10.8 # via requests -charset-normalizer==2.0.10 +charset-normalizer==2.0.12 # via requests -click==8.0.3 +click==8.1.2 # via # feast (setup.py) # uvicorn @@ -30,55 +32,55 @@ dask==2022.1.1 # via feast (setup.py) dill==0.3.4 # via feast (setup.py) -fastapi==0.72.0 +fastapi==0.75.1 # via feast (setup.py) -fastavro==1.4.9 +fastavro==1.4.10 # via # feast (setup.py) # pandavro -fsspec==2022.2.0 +fsspec==2022.3.0 # via dask -google-api-core==2.4.0 +google-api-core==2.7.2 # via feast (setup.py) -google-auth==2.3.3 +google-auth==2.6.5 # via google-api-core googleapis-common-protos==1.52.0 # via # feast (setup.py) # google-api-core # tensorflow-metadata -grpcio==1.43.0 +grpcio==1.44.0 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.43.0 +grpcio-reflection==1.44.0 # via feast (setup.py) h11==0.13.0 # via uvicorn -httptools==0.3.0 +httptools==0.4.0 # via uvicorn idna==3.3 # via # anyio # requests -jinja2==3.0.3 +jinja2==3.1.1 # via feast (setup.py) jsonschema==4.4.0 # via feast (setup.py) locket==0.2.1 # via partd -markupsafe==2.0.1 +markupsafe==2.1.1 # via jinja2 mmh3==3.0.0 # via feast (setup.py) -numpy==1.21.5 +numpy==1.22.3 # via # pandas # pandavro # pyarrow packaging==21.3 # via dask -pandas==1.3.5 +pandas==1.4.2 # via # feast (setup.py) # pandavro @@ -88,7 +90,7 @@ partd==1.2.0 # via dask proto-plus==1.19.6 # via feast (setup.py) -protobuf==3.19.3 +protobuf==3.19.4 # via # feast (setup.py) # google-api-core @@ -96,7 +98,7 @@ protobuf==3.19.3 # grpcio-reflection # proto-plus # tensorflow-metadata -pyarrow==6.0.1 +pyarrow==7.0.0 # via feast (setup.py) pyasn1==0.4.8 # via @@ -108,15 +110,15 @@ pydantic==1.9.0 # via # fastapi # feast (setup.py) -pyparsing==3.0.7 +pyparsing==3.0.8 # via packaging pyrsistent==0.18.1 # via jsonschema python-dateutil==2.8.2 # via pandas -python-dotenv==0.19.2 +python-dotenv==0.20.0 # via uvicorn -pytz==2021.3 +pytz==2022.1 # via pandas pyyaml==6.0 # via @@ -142,7 +144,7 @@ tabulate==0.8.9 # via feast (setup.py) tenacity==8.0.1 # via feast (setup.py) -tensorflow-metadata==1.6.0 +tensorflow-metadata==1.7.0 # via feast (setup.py) toml==0.10.2 # via feast (setup.py) @@ -150,20 +152,17 @@ toolz==0.11.2 # via # dask # partd -tqdm==4.62.3 +tqdm==4.64.0 # via feast (setup.py) -typing-extensions==4.0.1 +typing-extensions==4.1.1 # via pydantic -urllib3==1.26.8 +urllib3==1.26.9 # via requests -uvicorn[standard]==0.17.0 +uvicorn[standard]==0.17.6 # via feast (setup.py) uvloop==0.16.0 # via uvicorn -watchgod==0.7 +watchgod==0.8.2 # via uvicorn -websockets==10.1 +websockets==10.2 # via uvicorn - -# The following packages are considered to be unsafe in a requirements file: -# setuptools diff --git a/sdk/python/setup.py b/sdk/python/setup.py index cc883da95e..ed1a1a7f9f 100644 --- a/sdk/python/setup.py +++ b/sdk/python/setup.py @@ -11,28 +11,33 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import copy import glob +import json import os import pathlib import re import shutil import subprocess +import sys from distutils.cmd import Command +from distutils.dir_util import copy_tree from pathlib import Path from subprocess import CalledProcessError -from setuptools import find_packages +from setuptools import find_packages, Extension try: from setuptools import setup from setuptools.command.build_py import build_py + from setuptools.command.build_ext import build_ext as _build_ext from setuptools.command.develop import develop from setuptools.command.install import install - from setuptools.dist import Distribution + except ImportError: from distutils.command.build_py import build_py + from distutils.command.build_ext import build_ext as _build_ext from distutils.core import setup - from distutils.dist import Distribution NAME = "feast" DESCRIPTION = "Python SDK for Feast" @@ -71,7 +76,7 @@ ] GCP_REQUIRED = [ - "google-cloud-bigquery>=2.28.1", + "google-cloud-bigquery>=2,<3", "google-cloud-bigquery-storage >= 2.0.0", "google-cloud-datastore>=2.1.*", "google-cloud-storage>=1.34.*,<1.41", @@ -79,8 +84,7 @@ ] REDIS_REQUIRED = [ - "redis==3.5.3", - "redis-py-cluster>=2.1.3", + "redis==4.2.2", "hiredis>=2.0.0", ] @@ -107,7 +111,7 @@ CI_REQUIRED = ( [ - "cryptography==3.3.2", + "cryptography==3.4.8", "flake8", "black==19.10b0", "isort>=5", @@ -189,7 +193,7 @@ class BuildPythonProtosCommand(Command): def initialize_options(self): self.python_protoc = [ - "python", + sys.executable, "-m", "grpc_tools.protoc", ] # find_executable("protoc") @@ -292,7 +296,7 @@ class BuildGoProtosCommand(Command): def initialize_options(self): self.go_protoc = [ - "python", + sys.executable, "-m", "grpc_tools.protoc", ] # find_executable("protoc") @@ -331,44 +335,6 @@ def run(self): self._generate_go_protos(f"feast/{sub_folder}/*.proto") -class BuildGoEmbeddedCommand(Command): - description = "Builds Go embedded library" - user_options = [] - - def initialize_options(self) -> None: - self.path_val = _generate_path_with_gopath() - - self.go_env = {} - for var in ("GOCACHE", "GOPATH"): - self.go_env[var] = subprocess \ - .check_output(["go", "env", var]) \ - .decode("utf-8") \ - .strip() - - def finalize_options(self) -> None: - pass - - def _compile_embedded_lib(self): - print("Compile embedded go") - subprocess.check_call([ - "gopy", - "build", - "-output", - "feast/embedded_go/lib", - "-vm", - "python3", - "-no-make", - "github.com/feast-dev/feast/go/embedded" - ], env={ - "PATH": self.path_val, - "CGO_LDFLAGS_ALLOW": ".*", - **self.go_env, - }) - - def run(self): - self._compile_embedded_lib() - - class BuildCommand(build_py): """Custom build command.""" @@ -377,7 +343,7 @@ def run(self): if os.getenv("COMPILE_GO", "false").lower() == "true": _ensure_go_and_proto_toolchain() self.run_command("build_go_protos") - self.run_command("build_go_lib") + build_py.run(self) @@ -389,15 +355,61 @@ def run(self): if os.getenv("COMPILE_GO", "false").lower() == "true": _ensure_go_and_proto_toolchain() self.run_command("build_go_protos") - self.run_command("build_go_lib") + develop.run(self) -class BinaryDistribution(Distribution): - """Distribution which forces a binary package with platform name - when go compilation is enabled""" - def has_ext_modules(self): - return os.getenv("COMPILE_GO", "false").lower() == "true" +class build_ext(_build_ext): + def finalize_options(self) -> None: + super().finalize_options() + if os.getenv("COMPILE_GO", "false").lower() == "false": + self.extensions = [e for e in self.extensions if not self._is_go_ext(e)] + + def _is_go_ext(self, ext: Extension): + return any(source.endswith('.go') or source.startswith('github') for source in ext.sources) + + def build_extension(self, ext: Extension): + if not self._is_go_ext(ext): + # the base class may mutate `self.compiler` + compiler = copy.deepcopy(self.compiler) + self.compiler, compiler = compiler, self.compiler + try: + return _build_ext.build_extension(self, ext) + finally: + self.compiler, compiler = compiler, self.compiler + + bin_path = _generate_path_with_gopath() + go_env = json.loads( + subprocess.check_output(["go", "env", "-json"]).decode("utf-8").strip() + ) + + destination = os.path.dirname(os.path.abspath(self.get_ext_fullpath(ext.name))) + subprocess.check_call([ + "gopy", + "build", + "-output", + destination, + "-vm", + sys.executable, + "-no-make", + *ext.sources + ], env={ + "PATH": bin_path, + "CGO_LDFLAGS_ALLOW": ".*", + **go_env, + }) + + def copy_extensions_to_source(self): + build_py = self.get_finalized_command('build_py') + for ext in self.extensions: + fullname = self.get_ext_fullname(ext.name) + modpath = fullname.split('.') + package = '.'.join(modpath[:-1]) + package_dir = build_py.get_package_dir(package) + src = os.path.join(self.build_lib, package_dir) + + # copy whole directory + copy_tree(src, package_dir) setup( @@ -452,9 +464,10 @@ def has_ext_modules(self): cmdclass={ "build_python_protos": BuildPythonProtosCommand, "build_go_protos": BuildGoProtosCommand, - "build_go_lib": BuildGoEmbeddedCommand, "build_py": BuildCommand, "develop": DevelopCommand, + "build_ext": build_ext, }, - distclass=BinaryDistribution, # generate wheel with platform-specific name + ext_modules=[Extension('feast.embedded_go.lib._embedded', + ["github.com/feast-dev/feast/go/embedded"])], ) diff --git a/sdk/python/tests/example_repos/example_feature_repo_1.py b/sdk/python/tests/example_repos/example_feature_repo_1.py index 76b42b2241..d8b6d7c89b 100644 --- a/sdk/python/tests/example_repos/example_feature_repo_1.py +++ b/sdk/python/tests/example_repos/example_feature_repo_1.py @@ -40,26 +40,19 @@ ) driver_locations_push_source = PushSource( - name="driver_locations_push", - schema=[ - Field(name="driver_id", dtype=String), - Field(name="driver_lat", dtype=Float32), - Field(name="driver_long", dtype=String), - ], - batch_source=driver_locations_source, - timestamp_field="event_timestamp", + name="driver_locations_push", batch_source=driver_locations_source, ) driver = Entity( name="driver", # The name is derived from this argument, not object name. - join_key="driver_id", + join_keys=["driver_id"], value_type=ValueType.INT64, description="driver id", ) customer = Entity( name="customer", # The name is derived from this argument, not object name. - join_key="customer_id", + join_keys=["customer_id"], value_type=ValueType.STRING, ) diff --git a/sdk/python/tests/example_repos/example_feature_repo_with_duplicated_featureview_names.py b/sdk/python/tests/example_repos/example_feature_repo_with_duplicated_featureview_names.py index 20ff666bd9..cbcc3ad172 100644 --- a/sdk/python/tests/example_repos/example_feature_repo_with_duplicated_featureview_names.py +++ b/sdk/python/tests/example_repos/example_feature_repo_with_duplicated_featureview_names.py @@ -10,7 +10,7 @@ name="driver_hourly_stats", # Intentionally use the same FeatureView name entities=["driver_id"], online=False, - batch_source=driver_hourly_stats, + source=driver_hourly_stats, ttl=timedelta(days=1), tags={}, ) @@ -19,7 +19,7 @@ name="driver_hourly_stats", # Intentionally use the same FeatureView name entities=["driver_id"], online=False, - batch_source=driver_hourly_stats, + source=driver_hourly_stats, ttl=timedelta(days=1), tags={}, ) diff --git a/sdk/python/tests/example_repos/example_feature_repo_with_entity_join_key.py b/sdk/python/tests/example_repos/example_feature_repo_with_entity_join_key.py index 3e1bbbba77..ba18cf84ba 100644 --- a/sdk/python/tests/example_repos/example_feature_repo_with_entity_join_key.py +++ b/sdk/python/tests/example_repos/example_feature_repo_with_entity_join_key.py @@ -15,7 +15,7 @@ name="driver_id", value_type=ValueType.INT64, description="driver id", - join_key="driver", + join_keys=["driver"], ) diff --git a/sdk/python/tests/integration/e2e/test_python_feature_server.py b/sdk/python/tests/integration/e2e/test_python_feature_server.py new file mode 100644 index 0000000000..a3048300a3 --- /dev/null +++ b/sdk/python/tests/integration/e2e/test_python_feature_server.py @@ -0,0 +1,121 @@ +import contextlib +import json +from datetime import datetime +from typing import List + +import pytest +from fastapi.testclient import TestClient + +from feast.feast_object import FeastObject +from feast.feature_server import get_app +from tests.integration.feature_repos.integration_test_repo_config import ( + IntegrationTestRepoConfig, +) +from tests.integration.feature_repos.repo_configuration import ( + construct_test_environment, + construct_universal_feature_views, + construct_universal_test_data, +) +from tests.integration.feature_repos.universal.entities import ( + customer, + driver, + location, +) + + +@pytest.mark.integration +@pytest.mark.universal +def test_get_online_features(): + with setup_python_fs_client() as client: + request_data_dict = { + "features": [ + "driver_stats:conv_rate", + "driver_stats:acc_rate", + "driver_stats:avg_daily_trips", + ], + "entities": {"driver_id": [5001, 5002]}, + } + response = client.post( + "/get-online-features", data=json.dumps(request_data_dict) + ) + + # Check entities and features are present + parsed_response = json.loads(response.text) + assert "metadata" in parsed_response + metadata = parsed_response["metadata"] + expected_features = ["driver_id", "conv_rate", "acc_rate", "avg_daily_trips"] + response_feature_names = metadata["feature_names"] + assert len(response_feature_names) == len(expected_features) + for expected_feature in expected_features: + assert expected_feature in response_feature_names + assert "results" in parsed_response + results = parsed_response["results"] + for result in results: + # Same order as in metadata + assert len(result["statuses"]) == 2 # Requested two entities + for status in result["statuses"]: + assert status == "PRESENT" + results_driver_id_index = response_feature_names.index("driver_id") + assert ( + results[results_driver_id_index]["values"] + == request_data_dict["entities"]["driver_id"] + ) + + +@pytest.mark.integration +@pytest.mark.universal +def test_push(): + with setup_python_fs_client() as client: + initial_temp = get_temperatures(client, location_ids=[1])[0] + json_data = json.dumps( + { + "push_source_name": "location_stats_push_source", + "df": { + "location_id": [1], + "temperature": [initial_temp * 100], + "event_timestamp": [str(datetime.utcnow())], + "created": [str(datetime.utcnow())], + }, + } + ) + response = client.post("/push", data=json_data,) + + # Check new pushed temperature is fetched + assert response.status_code == 200 + assert get_temperatures(client, location_ids=[1]) == [initial_temp * 100] + + +def get_temperatures(client, location_ids: List[int]): + get_request_data = { + "features": ["pushable_location_stats:temperature"], + "entities": {"location_id": location_ids}, + } + response = client.post("/get-online-features", data=json.dumps(get_request_data)) + parsed_response = json.loads(response.text) + assert "metadata" in parsed_response + metadata = parsed_response["metadata"] + response_feature_names = metadata["feature_names"] + assert "results" in parsed_response + results = parsed_response["results"] + results_temperature_index = response_feature_names.index("temperature") + return results[results_temperature_index]["values"] + + +@contextlib.contextmanager +def setup_python_fs_client(): + config = IntegrationTestRepoConfig() + environment = construct_test_environment(config) + fs = environment.feature_store + try: + entities, datasets, data_sources = construct_universal_test_data(environment) + feature_views = construct_universal_feature_views(data_sources) + feast_objects: List[FeastObject] = [] + feast_objects.extend(feature_views.values()) + feast_objects.extend([driver(), customer(), location()]) + fs.apply(feast_objects) + fs.materialize(environment.start_date, environment.end_date) + client = TestClient(get_app(fs)) + yield client + finally: + fs.teardown() + environment.data_source_creator.teardown() diff --git a/sdk/python/tests/integration/feature_repos/universal/entities.py b/sdk/python/tests/integration/feature_repos/universal/entities.py index e8e90a6af6..b7a7583f1b 100644 --- a/sdk/python/tests/integration/feature_repos/universal/entities.py +++ b/sdk/python/tests/integration/feature_repos/universal/entities.py @@ -6,7 +6,7 @@ def driver(value_type: ValueType = ValueType.INT64): name="driver", # The name is derived from this argument, not object name. value_type=value_type, description="driver id", - join_key="driver_id", + join_keys=["driver_id"], ) diff --git a/sdk/python/tests/integration/feature_repos/universal/feature_views.py b/sdk/python/tests/integration/feature_repos/universal/feature_views.py index 5918e36753..a6786528e1 100644 --- a/sdk/python/tests/integration/feature_repos/universal/feature_views.py +++ b/sdk/python/tests/integration/feature_repos/universal/feature_views.py @@ -13,7 +13,7 @@ ValueType, ) from feast.data_source import DataSource, RequestSource -from feast.types import Array, FeastType, Float32, Float64, Int32, Int64 +from feast.types import Array, FeastType, Float32, Float64, Int32 from tests.integration.feature_repos.universal.entities import location @@ -65,19 +65,19 @@ def conv_rate_plus_100(features_df: pd.DataFrame) -> pd.DataFrame: def conv_rate_plus_100_feature_view( sources: Dict[str, Union[RequestSource, FeatureView]], infer_features: bool = False, - features: Optional[List[Feature]] = None, + features: Optional[List[Field]] = None, ) -> OnDemandFeatureView: # Test that positional arguments and Features still work for ODFVs. _features = features or [ - Feature(name="conv_rate_plus_100", dtype=ValueType.DOUBLE), - Feature(name="conv_rate_plus_val_to_add", dtype=ValueType.DOUBLE), - Feature(name="conv_rate_plus_100_rounded", dtype=ValueType.INT32), + Field(name="conv_rate_plus_100", dtype=Float64), + Field(name="conv_rate_plus_val_to_add", dtype=Float64), + Field(name="conv_rate_plus_100_rounded", dtype=Int32), ] return OnDemandFeatureView( - conv_rate_plus_100.__name__, - [] if infer_features else _features, - sources, - conv_rate_plus_100, + name=conv_rate_plus_100.__name__, + schema=[] if infer_features else _features, + sources=sources, + udf=conv_rate_plus_100, ) @@ -237,13 +237,7 @@ def create_field_mapping_feature_view(source): def create_pushable_feature_view(batch_source: DataSource): push_source = PushSource( - name="location_stats_push_source", - schema=[ - Field(name="location_id", dtype=Int64), - Field(name="temperature", dtype=Int32), - ], - timestamp_field="timestamp", - batch_source=batch_source, + name="location_stats_push_source", batch_source=batch_source, ) return FeatureView( name="pushable_location_stats", diff --git a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py index 0d6ef84ff4..b62f7cda24 100644 --- a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py +++ b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py @@ -689,7 +689,7 @@ def test_historical_features_from_bigquery_sources_containing_backfills(environm created_timestamp_column="created", ) - driver = Entity(name="driver", join_key="driver_id", value_type=ValueType.INT64) + driver = Entity(name="driver", join_keys=["driver_id"], value_type=ValueType.INT64) driver_fv = FeatureView( name="driver_stats", entities=["driver"], diff --git a/sdk/python/tests/integration/registration/test_feature_store.py b/sdk/python/tests/integration/registration/test_feature_store.py index 39de7fc688..ca61734c78 100644 --- a/sdk/python/tests/integration/registration/test_feature_store.py +++ b/sdk/python/tests/integration/registration/test_feature_store.py @@ -219,7 +219,9 @@ def test_feature_view_inference_success(test_feature_store, dataframe_source): with prep_file_source( df=dataframe_source, event_timestamp_column="ts_1" ) as file_source: - entity = Entity(name="id", join_key="id_join_key", value_type=ValueType.INT64) + entity = Entity( + name="id", join_keys=["id_join_key"], value_type=ValueType.INT64 + ) fv1 = FeatureView( name="fv1", @@ -436,7 +438,7 @@ def test_reapply_feature_view_success(test_feature_store, dataframe_source): df=dataframe_source, event_timestamp_column="ts_1" ) as file_source: - e = Entity(name="id", join_key="id_join_key", value_type=ValueType.STRING) + e = Entity(name="id", join_keys=["id_join_key"], value_type=ValueType.STRING) # Create Feature View fv1 = FeatureView( diff --git a/sdk/python/tests/integration/registration/test_inference.py b/sdk/python/tests/integration/registration/test_inference.py index 526f422e9d..d41a4fdbc1 100644 --- a/sdk/python/tests/integration/registration/test_inference.py +++ b/sdk/python/tests/integration/registration/test_inference.py @@ -53,8 +53,8 @@ def test_update_entities_with_inferred_types_from_feature_views( name="fv2", entities=["id"], batch_source=file_source_2, ttl=None, ) - actual_1 = Entity(name="id", join_key="id_join_key") - actual_2 = Entity(name="id", join_key="id_join_key") + actual_1 = Entity(name="id", join_keys=["id_join_key"]) + actual_2 = Entity(name="id", join_keys=["id_join_key"]) update_entities_with_inferred_types_from_feature_views( [actual_1], [fv1], RepoConfig(provider="local", project="test") @@ -63,16 +63,16 @@ def test_update_entities_with_inferred_types_from_feature_views( [actual_2], [fv2], RepoConfig(provider="local", project="test") ) assert actual_1 == Entity( - name="id", join_key="id_join_key", value_type=ValueType.INT64 + name="id", join_keys=["id_join_key"], value_type=ValueType.INT64 ) assert actual_2 == Entity( - name="id", join_key="id_join_key", value_type=ValueType.STRING + name="id", join_keys=["id_join_key"], value_type=ValueType.STRING ) with pytest.raises(RegistryInferenceFailure): # two viable data types update_entities_with_inferred_types_from_feature_views( - [Entity(name="id", join_key="id_join_key")], + [Entity(name="id", join_keys=["id_join_key"])], [fv1, fv2], RepoConfig(provider="local", project="test"), ) @@ -229,7 +229,7 @@ def test_view_with_missing_feature(features_df: pd.DataFrame) -> pd.DataFrame: @pytest.mark.parametrize( "request_source_schema", [ - [Field(name="some_date", dtype=PrimitiveFeastType.UNIX_TIMESTAMP)], + [Field(name="some_date", dtype=UnixTimestamp)], {"some_date": ValueType.UNIX_TIMESTAMP}, ], ) diff --git a/sdk/python/tests/integration/registration/test_registry.py b/sdk/python/tests/integration/registration/test_registry.py index 072be15bfe..5f72fb7125 100644 --- a/sdk/python/tests/integration/registration/test_registry.py +++ b/sdk/python/tests/integration/registration/test_registry.py @@ -29,7 +29,7 @@ from feast.protos.feast.types import Value_pb2 as ValueProto from feast.registry import Registry from feast.repo_config import RegistryConfig -from feast.types import Array, Bytes, Float32, Int32, Int64, PrimitiveFeastType, String +from feast.types import Array, Bytes, Float32, Int32, Int64, String from feast.value_type import ValueType @@ -240,10 +240,7 @@ def test_apply_feature_view_success(test_registry): # TODO(kevjumba): remove this in feast 0.23 when deprecating @pytest.mark.parametrize( "request_source_schema", - [ - [Field(name="my_input_1", dtype=PrimitiveFeastType.INT32)], - {"my_input_1": ValueType.INT32}, - ], + [[Field(name="my_input_1", dtype=Int32)], {"my_input_1": ValueType.INT32}], ) def test_modify_feature_views_success(test_registry, request_source_schema): # Create Feature Views diff --git a/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py b/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py index 7d6da0dc06..e1be890e57 100644 --- a/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py +++ b/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py @@ -11,6 +11,8 @@ DynamoDBOnlineStoreConfig, DynamoDBTable, ) +from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto +from feast.protos.feast.types.Value_pb2 import Value as ValueProto from feast.repo_config import RepoConfig from tests.utils.online_store_utils import ( _create_n_customer_test_samples, @@ -49,7 +51,6 @@ def test_online_store_config_default(): assert dynamodb_store_config.batch_size == 40 assert dynamodb_store_config.endpoint_url is None assert dynamodb_store_config.region == aws_region - assert dynamodb_store_config.sort_response is True assert dynamodb_store_config.table_name_template == "{project}.{table_name}" @@ -70,20 +71,17 @@ def test_online_store_config_custom_params(): aws_region = "us-west-2" batch_size = 20 endpoint_url = "http://localhost:8000" - sort_response = False table_name_template = "feast_test.dynamodb_table" dynamodb_store_config = DynamoDBOnlineStoreConfig( region=aws_region, batch_size=batch_size, endpoint_url=endpoint_url, - sort_response=sort_response, table_name_template=table_name_template, ) assert dynamodb_store_config.type == "dynamodb" assert dynamodb_store_config.batch_size == batch_size assert dynamodb_store_config.endpoint_url == endpoint_url assert dynamodb_store_config.region == aws_region - assert dynamodb_store_config.sort_response == sort_response assert dynamodb_store_config.table_name_template == table_name_template @@ -175,6 +173,42 @@ def test_online_read(repo_config, n_samples): assert [item[1] for item in returned_items] == list(features) +@mock_dynamodb2 +def test_online_read_unknown_entity(repo_config): + """Test DynamoDBOnlineStore online_read method.""" + n_samples = 2 + _create_test_table(PROJECT, f"{TABLE_NAME}_{n_samples}", REGION) + data = _create_n_customer_test_samples(n=n_samples) + _insert_data_test_table(data, PROJECT, f"{TABLE_NAME}_{n_samples}", REGION) + + entity_keys, features, *rest = zip(*data) + # Append a nonsensical entity to search for + entity_keys = list(entity_keys) + features = list(features) + dynamodb_store = DynamoDBOnlineStore() + + # Have the unknown entity be in the beginning, middle, and end of the list of entities. + for pos in range(len(entity_keys)): + entity_keys_with_unknown = deepcopy(entity_keys) + entity_keys_with_unknown.insert( + pos, + EntityKeyProto( + join_keys=["customer"], entity_values=[ValueProto(string_val="12359")] + ), + ) + features_with_none = deepcopy(features) + features_with_none.insert(pos, None) + returned_items = dynamodb_store.online_read( + config=repo_config, + table=MockFeatureView(name=f"{TABLE_NAME}_{n_samples}"), + entity_keys=entity_keys_with_unknown, + ) + assert len(returned_items) == len(entity_keys_with_unknown) + assert [item[1] for item in returned_items] == list(features_with_none) + # The order should match the original entity key order + assert returned_items[pos] == (None, None) + + @mock_dynamodb2 def test_write_batch_non_duplicates(repo_config): """Test DynamoDBOnline Store deduplicate write batch request items.""" diff --git a/sdk/python/tests/unit/test_data_sources.py b/sdk/python/tests/unit/test_data_sources.py index ceb9ff4ce6..6bd4baf4fa 100644 --- a/sdk/python/tests/unit/test_data_sources.py +++ b/sdk/python/tests/unit/test_data_sources.py @@ -1,32 +1,29 @@ import pytest from feast import ValueType -from feast.data_source import PushSource, RequestDataSource, RequestSource +from feast.data_format import ProtoFormat +from feast.data_source import ( + KafkaSource, + KinesisSource, + PushSource, + RequestDataSource, + RequestSource, +) from feast.field import Field from feast.infra.offline_stores.bigquery_source import BigQuerySource -from feast.types import PrimitiveFeastType +from feast.types import Bool, Float32, Int64 def test_push_with_batch(): push_source = PushSource( - name="test", - schema=[ - Field(name="f1", dtype=PrimitiveFeastType.FLOAT32), - Field(name="f2", dtype=PrimitiveFeastType.BOOL), - ], - timestamp_field="event_timestamp", - batch_source=BigQuerySource(table="test.test"), + name="test", batch_source=BigQuerySource(table="test.test"), ) push_source_proto = push_source.to_proto() assert push_source_proto.HasField("batch_source") - assert push_source_proto.timestamp_field is not None - assert push_source_proto.push_options is not None push_source_unproto = PushSource.from_proto(push_source_proto) assert push_source.name == push_source_unproto.name - assert push_source.schema == push_source_unproto.schema - assert push_source.timestamp_field == push_source_unproto.timestamp_field assert push_source.batch_source.name == push_source_unproto.batch_source.name @@ -43,8 +40,8 @@ def test_request_data_source_deprecation(): def test_request_source_primitive_type_to_proto(): schema = [ - Field(name="f1", dtype=PrimitiveFeastType.FLOAT32), - Field(name="f2", dtype=PrimitiveFeastType.BOOL), + Field(name="f1", dtype=Float32), + Field(name="f2", dtype=Bool), ] request_source = RequestSource( name="source", schema=schema, description="desc", tags={}, owner="feast", @@ -52,3 +49,99 @@ def test_request_source_primitive_type_to_proto(): request_proto = request_source.to_proto() deserialized_request_source = RequestSource.from_proto(request_proto) assert deserialized_request_source == request_source + + +def test_hash(): + push_source_1 = PushSource( + name="test", batch_source=BigQuerySource(table="test.test"), + ) + push_source_2 = PushSource( + name="test", batch_source=BigQuerySource(table="test.test"), + ) + push_source_3 = PushSource( + name="test", batch_source=BigQuerySource(table="test.test2"), + ) + push_source_4 = PushSource( + name="test", + batch_source=BigQuerySource(table="test.test2"), + description="test", + ) + + s1 = {push_source_1, push_source_2} + assert len(s1) == 1 + + s2 = {push_source_1, push_source_3} + assert len(s2) == 2 + + s3 = {push_source_3, push_source_4} + assert len(s3) == 2 + + s4 = {push_source_1, push_source_2, push_source_3, push_source_4} + assert len(s4) == 3 + + +# TODO(kevjumba): Remove this test in feast 0.23 when positional arguments are removed. +def test_default_data_source_kw_arg_warning(): + # source_class = request.param + with pytest.warns(DeprecationWarning): + source = KafkaSource( + "name", "column", "bootstrap_servers", ProtoFormat("class_path"), "topic" + ) + assert source.name == "name" + assert source.timestamp_field == "column" + assert source.kafka_options.bootstrap_servers == "bootstrap_servers" + assert source.kafka_options.topic == "topic" + with pytest.raises(ValueError): + KafkaSource("name", "column", "bootstrap_servers", topic="topic") + + with pytest.warns(DeprecationWarning): + source = KinesisSource( + "name", + "column", + "c_column", + ProtoFormat("class_path"), + "region", + "stream_name", + ) + assert source.name == "name" + assert source.timestamp_field == "column" + assert source.created_timestamp_column == "c_column" + assert source.kinesis_options.region == "region" + assert source.kinesis_options.stream_name == "stream_name" + + with pytest.raises(ValueError): + KinesisSource( + "name", "column", "c_column", region="region", stream_name="stream_name" + ) + + with pytest.warns(DeprecationWarning): + source = RequestSource( + "name", [Field(name="val_to_add", dtype=Int64)], description="description" + ) + assert source.name == "name" + assert source.description == "description" + + with pytest.raises(ValueError): + RequestSource("name") + + with pytest.warns(DeprecationWarning): + source = PushSource( + "name", + BigQuerySource(name="bigquery_source", table="table"), + description="description", + ) + assert source.name == "name" + assert source.description == "description" + assert source.batch_source.name == "bigquery_source" + + with pytest.raises(ValueError): + PushSource("name") + + # No name warning for DataSource + with pytest.warns(UserWarning): + source = KafkaSource( + event_timestamp_column="column", + bootstrap_servers="bootstrap_servers", + message_format=ProtoFormat("class_path"), + topic="topic", + ) diff --git a/sdk/python/tests/unit/test_entity.py b/sdk/python/tests/unit/test_entity.py index fee8bd9f00..254a975f67 100644 --- a/sdk/python/tests/unit/test_entity.py +++ b/sdk/python/tests/unit/test_entity.py @@ -63,3 +63,22 @@ def test_multiple_args(): def test_name_keyword(recwarn): Entity(name="my-entity", value_type=ValueType.STRING) assert len(recwarn) == 0 + + +def test_hash(): + entity1 = Entity(name="my-entity", value_type=ValueType.STRING) + entity2 = Entity(name="my-entity", value_type=ValueType.STRING) + entity3 = Entity(name="my-entity", value_type=ValueType.FLOAT) + entity4 = Entity(name="my-entity", value_type=ValueType.FLOAT, description="test") + + s1 = {entity1, entity2} + assert len(s1) == 1 + + s2 = {entity1, entity3} + assert len(s2) == 2 + + s3 = {entity3, entity4} + assert len(s3) == 2 + + s4 = {entity1, entity2, entity3, entity4} + assert len(s4) == 3 diff --git a/sdk/python/tests/unit/test_feature_service.py b/sdk/python/tests/unit/test_feature_service.py index 522ac49de1..fc4fd70bcb 100644 --- a/sdk/python/tests/unit/test_feature_service.py +++ b/sdk/python/tests/unit/test_feature_service.py @@ -1,4 +1,10 @@ -from feast import FeatureService +import pytest + +from feast.feature_service import FeatureService +from feast.feature_view import FeatureView +from feast.field import Field +from feast.infra.offline_stores.file_source import FileSource +from feast.types import Float32 def test_feature_service_with_description(): @@ -12,3 +18,88 @@ def test_feature_service_without_description(): feature_service = FeatureService(name="my-feature-service", features=[]) # assert feature_service.to_proto().spec.description == "" + + +def test_hash(): + file_source = FileSource(name="my-file-source", path="test.parquet") + feature_view = FeatureView( + name="my-feature-view", + entities=[], + schema=[ + Field(name="feature1", dtype=Float32), + Field(name="feature2", dtype=Float32), + ], + source=file_source, + ) + feature_service_1 = FeatureService( + name="my-feature-service", features=[feature_view[["feature1", "feature2"]]] + ) + feature_service_2 = FeatureService( + name="my-feature-service", features=[feature_view[["feature1", "feature2"]]] + ) + feature_service_3 = FeatureService( + name="my-feature-service", features=[feature_view[["feature1"]]] + ) + feature_service_4 = FeatureService( + name="my-feature-service", + features=[feature_view[["feature1"]]], + description="test", + ) + + s1 = {feature_service_1, feature_service_2} + assert len(s1) == 1 + + s2 = {feature_service_1, feature_service_3} + assert len(s2) == 2 + + s3 = {feature_service_3, feature_service_4} + assert len(s3) == 2 + + s4 = {feature_service_1, feature_service_2, feature_service_3, feature_service_4} + assert len(s4) == 3 + + +def test_feature_view_kw_args_warning(): + with pytest.warns(DeprecationWarning): + service = FeatureService("name", [], tags={"tag_1": "tag"}, description="desc") + assert service.name == "name" + assert service.tags == {"tag_1": "tag"} + assert service.description == "desc" + + # More positional args than name and features + with pytest.raises(ValueError): + service = FeatureService("name", [], {"tag_1": "tag"}, "desc") + + # No name defined. + with pytest.raises(ValueError): + service = FeatureService(features=[], tags={"tag_1": "tag"}, description="desc") + + +def no_warnings(func): + def wrapper_no_warnings(*args, **kwargs): + with pytest.warns(None) as warnings: + func(*args, **kwargs) + + if len(warnings) > 0: + raise AssertionError( + "Warnings were raised: " + ", ".join([str(w) for w in warnings]) + ) + + return wrapper_no_warnings + + +@no_warnings +def test_feature_view_kw_args_normal(): + file_source = FileSource(name="my-file-source", path="test.parquet") + feature_view = FeatureView( + name="my-feature-view", + entities=[], + schema=[ + Field(name="feature1", dtype=Float32), + Field(name="feature2", dtype=Float32), + ], + source=file_source, + ) + _ = FeatureService( + name="my-feature-service", features=[feature_view[["feature1", "feature2"]]] + ) diff --git a/sdk/python/tests/unit/test_feature_view.py b/sdk/python/tests/unit/test_feature_view.py new file mode 100644 index 0000000000..80a583806e --- /dev/null +++ b/sdk/python/tests/unit/test_feature_view.py @@ -0,0 +1,64 @@ +# Copyright 2022 The Feast Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from feast.feature_view import FeatureView +from feast.field import Field +from feast.infra.offline_stores.file_source import FileSource +from feast.types import Float32 + + +def test_hash(): + file_source = FileSource(name="my-file-source", path="test.parquet") + feature_view_1 = FeatureView( + name="my-feature-view", + entities=[], + schema=[ + Field(name="feature1", dtype=Float32), + Field(name="feature2", dtype=Float32), + ], + source=file_source, + ) + feature_view_2 = FeatureView( + name="my-feature-view", + entities=[], + schema=[ + Field(name="feature1", dtype=Float32), + Field(name="feature2", dtype=Float32), + ], + source=file_source, + ) + feature_view_3 = FeatureView( + name="my-feature-view", + entities=[], + schema=[Field(name="feature1", dtype=Float32)], + source=file_source, + ) + feature_view_4 = FeatureView( + name="my-feature-view", + entities=[], + schema=[Field(name="feature1", dtype=Float32)], + source=file_source, + description="test", + ) + + s1 = {feature_view_1, feature_view_2} + assert len(s1) == 1 + + s2 = {feature_view_1, feature_view_3} + assert len(s2) == 2 + + s3 = {feature_view_3, feature_view_4} + assert len(s3) == 2 + + s4 = {feature_view_1, feature_view_2, feature_view_3, feature_view_4} + assert len(s4) == 3 diff --git a/sdk/python/tests/unit/test_on_demand_feature_view.py b/sdk/python/tests/unit/test_on_demand_feature_view.py new file mode 100644 index 0000000000..9d45cfbb0b --- /dev/null +++ b/sdk/python/tests/unit/test_on_demand_feature_view.py @@ -0,0 +1,102 @@ +# Copyright 2022 The Feast Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pandas as pd + +from feast.feature_view import FeatureView +from feast.field import Field +from feast.infra.offline_stores.file_source import FileSource +from feast.on_demand_feature_view import OnDemandFeatureView +from feast.types import Float32 + + +def udf1(features_df: pd.DataFrame) -> pd.DataFrame: + df = pd.DataFrame() + df["output1"] = features_df["feature1"] + df["output2"] = features_df["feature2"] + return df + + +def udf2(features_df: pd.DataFrame) -> pd.DataFrame: + df = pd.DataFrame() + df["output1"] = features_df["feature1"] + 100 + df["output2"] = features_df["feature2"] + 100 + return df + + +def test_hash(): + file_source = FileSource(name="my-file-source", path="test.parquet") + feature_view = FeatureView( + name="my-feature-view", + entities=[], + schema=[ + Field(name="feature1", dtype=Float32), + Field(name="feature2", dtype=Float32), + ], + source=file_source, + ) + sources = {"my-feature-view": feature_view} + on_demand_feature_view_1 = OnDemandFeatureView( + name="my-on-demand-feature-view", + sources=sources, + schema=[ + Field(name="output1", dtype=Float32), + Field(name="output2", dtype=Float32), + ], + udf=udf1, + ) + on_demand_feature_view_2 = OnDemandFeatureView( + name="my-on-demand-feature-view", + sources=sources, + schema=[ + Field(name="output1", dtype=Float32), + Field(name="output2", dtype=Float32), + ], + udf=udf1, + ) + on_demand_feature_view_3 = OnDemandFeatureView( + name="my-on-demand-feature-view", + sources=sources, + schema=[ + Field(name="output1", dtype=Float32), + Field(name="output2", dtype=Float32), + ], + udf=udf2, + ) + on_demand_feature_view_4 = OnDemandFeatureView( + name="my-on-demand-feature-view", + sources=sources, + schema=[ + Field(name="output1", dtype=Float32), + Field(name="output2", dtype=Float32), + ], + udf=udf2, + description="test", + ) + + s1 = {on_demand_feature_view_1, on_demand_feature_view_2} + assert len(s1) == 1 + + s2 = {on_demand_feature_view_1, on_demand_feature_view_3} + assert len(s2) == 2 + + s3 = {on_demand_feature_view_3, on_demand_feature_view_4} + assert len(s3) == 2 + + s4 = { + on_demand_feature_view_1, + on_demand_feature_view_2, + on_demand_feature_view_3, + on_demand_feature_view_4, + } + assert len(s4) == 3 diff --git a/sdk/python/tests/utils/online_write_benchmark.py b/sdk/python/tests/utils/online_write_benchmark.py index 6d6b73d5da..82ffc8e98b 100644 --- a/sdk/python/tests/utils/online_write_benchmark.py +++ b/sdk/python/tests/utils/online_write_benchmark.py @@ -29,7 +29,7 @@ def create_driver_hourly_stats_feature_view(source): Field(name="acc_rate", dtype=Float32), Field(name="avg_daily_trips", dtype=Int32), ], - batch_source=source, + source=source, ttl=timedelta(hours=2), ) return driver_stats_feature_view diff --git a/ui/package.json b/ui/package.json index 37a5ddf431..d74c828f1a 100644 --- a/ui/package.json +++ b/ui/package.json @@ -1,6 +1,6 @@ { "name": "@feast-dev/feast-ui", - "version": "0.19.0", + "version": "0.20.1", "private": false, "files": [ "dist"