From f2079fbdcf25443c153a62fda1979042a814889c Mon Sep 17 00:00:00 2001 From: Lokesh Rangineni Date: Thu, 13 Jun 2024 19:36:11 -0400 Subject: [PATCH 1/2] squashing last 61 commits. Merge remote-tracking branch 'fork/feature/adding-remote-onlinestore-rebase' into feature/adding-remote-onlinestore-rebase Signed-off-by: Lokesh Rangineni --- .../fork_pr_integration_tests_aws.yml | 60 -- .github/workflows/build_wheels.yml | 2 +- .github/workflows/master_only.yml | 55 -- .github/workflows/nightly-ci.yml | 59 +- .github/workflows/pr_integration_tests.yml | 63 -- .github/workflows/publish.yml | 2 +- .gitpod.Dockerfile | 8 + .gitpod.yml | 48 +- .pre-commit-config.yaml | 4 +- CHANGELOG.md | 55 ++ Makefile | 24 +- OWNERS | 3 +- docs/SUMMARY.md | 4 + docs/project/release-process.md | 8 + docs/reference/alpha-vector-database.md | 18 + docs/reference/feature-servers/README.md | 4 + .../feature-servers/offline-feature-server.md | 35 + .../offline-stores/remote-offline-store.md | 28 + docs/reference/offline-stores/snowflake.md | 15 + docs/reference/online-stores/README.md | 4 + docs/reference/online-stores/remote.md | 21 + examples/remote-offline-store/README.md | 98 +++ .../offline_client}/__init__.py | 0 .../offline_client/feature_store.yaml | 10 + .../offline_client/test.py | 40 ++ .../offline_server}/__init__.py | 0 .../offline_server/feature_repo}/__init__.py | 0 .../feature_repo/data/driver_stats.parquet | Bin 0 -> 35102 bytes .../feature_repo/data/online_store.db | Bin 0 -> 28672 bytes .../feature_repo/example_repo.py | 140 ++++ .../feature_repo/feature_store.yaml | 9 + go/types/typeconversion.go | 146 ++-- go/types/typeconversion_test.go | 30 +- infra/charts/feast-feature-server/Chart.yaml | 2 +- infra/charts/feast-feature-server/README.md | 19 +- .../feast-feature-server/README.md.gotmpl | 13 +- .../templates/deployment.yaml | 35 +- .../templates/service.yaml | 2 +- infra/charts/feast-feature-server/values.yaml | 5 +- infra/charts/feast/Chart.yaml | 2 +- infra/charts/feast/README.md | 6 +- .../feast/charts/feature-server/Chart.yaml | 4 +- .../feast/charts/feature-server/README.md | 4 +- .../feast/charts/feature-server/values.yaml | 2 +- .../charts/transformation-service/Chart.yaml | 4 +- .../charts/transformation-service/README.md | 4 +- .../charts/transformation-service/values.yaml | 2 +- infra/charts/feast/requirements.yaml | 4 +- infra/feast-operator/Makefile | 2 +- .../config/manager/kustomization.yaml | 2 +- infra/scripts/pixi/pixi.lock | 306 +++++++- infra/scripts/pixi/pixi.toml | 2 +- infra/scripts/release/bump_file_versions.py | 15 +- java/pom.xml | 2 +- .../src/main/java/dev/feast/FeastClient.java | 52 +- .../test/java/dev/feast/FeastClientTest.java | 3 +- java/serving/pom.xml | 2 +- protos/feast/core/Transformation.proto | 2 - protos/feast/registry/RegistryServer.proto | 108 ++- sdk/python/docs/index.rst | 6 - sdk/python/docs/source/index.rst | 6 - sdk/python/feast/cli.py | 37 +- sdk/python/feast/constants.py | 3 + sdk/python/feast/feature_store.py | 84 ++- .../feature_servers/multicloud/Dockerfile | 4 +- .../feature_servers/multicloud/Dockerfile.dev | 5 +- .../athena_offline_store/tests/data_source.py | 1 - .../mssql_offline_store/tests/data_source.py | 1 - .../tests/data_source.py | 3 +- .../spark_offline_store/tests/data_source.py | 1 - .../trino_offline_store/tests/data_source.py | 1 - .../feast/infra/offline_stores/remote.py | 407 +++++++++++ .../feast/infra/online_stores/dynamodb.py | 191 +++-- sdk/python/feast/infra/online_stores/redis.py | 11 +- .../feast/infra/online_stores/remote.py | 167 +++++ .../feast/infra/online_stores/sqlite.py | 258 ++++++- .../feast/infra/registry/base_registry.py | 8 +- .../postgres/postgres_registry_store.py | 128 ---- sdk/python/feast/infra/registry/registry.py | 1 - sdk/python/feast/infra/registry/remote.py | 121 +++- sdk/python/feast/offline_server.py | 332 +++++++++ sdk/python/feast/registry_server.py | 158 +++- sdk/python/feast/repo_config.py | 2 + sdk/python/feast/templates/local/bootstrap.py | 1 + .../local/feature_repo/example_repo.py | 5 + .../postgres/feature_repo/feature_store.yaml | 14 +- sdk/python/feast/type_map.py | 71 +- sdk/python/feast/ui/package.json | 2 +- sdk/python/feast/ui/yarn.lock | 8 +- sdk/python/feast/ui_server.py | 2 +- .../requirements/py3.10-ci-requirements.txt | 247 +++++-- .../requirements/py3.10-requirements.txt | 52 +- .../requirements/py3.11-ci-requirements.txt | 247 +++++-- .../requirements/py3.11-requirements.txt | 50 +- .../requirements/py3.9-ci-requirements.txt | 252 +++++-- .../requirements/py3.9-requirements.txt | 52 +- sdk/python/tests/conftest.py | 10 +- .../example_repos/example_feature_repo_1.py | 23 +- .../feature_repos/repo_configuration.py | 16 +- .../universal/data_source_creator.py | 2 - .../universal/data_sources/file.py | 82 ++- .../universal/data_sources/redshift.py | 1 - .../universal/data_sources/snowflake.py | 1 - .../feature_repos/universal/feature_views.py | 8 +- .../universal/online_store/elasticsearch.py | 4 +- .../universal/online_store/postgres.py | 4 +- .../universal/online_store_creator.py | 5 +- .../test_universal_e2e.py | 0 .../offline_store/test_feature_logging.py | 13 +- .../test_universal_historical_retrieval.py | 45 +- .../{e2e => offline_store}/test_validation.py | 24 +- .../test_python_feature_server.py | 0 .../online_store/test_remote_online_store.py | 233 ++++++ .../online_store/test_universal_online.py | 2 +- .../registration/test_feature_store.py | 167 +---- .../integration/registration/test_registry.py | 232 ------ .../registration/test_universal_cli.py | 26 +- .../registration/test_universal_registry.py} | 567 ++++++++++++--- .../offline_stores/test_offline_store.py | 34 + .../tests/unit/infra/registry/test_remote.py | 69 -- .../tests/unit/online_store/__init__.py | 0 .../online_store/test_online_retrieval.py | 145 +++- sdk/python/tests/unit/test_offline_server.py | 250 +++++++ .../test_on_demand_python_transformation.py | 4 + sdk/python/tests/unit/test_registry_server.py | 60 -- sdk/python/tests/unit/test_sql_registry.py | 672 ------------------ sdk/python/tests/unit/test_type_map.py | 8 + sdk/python/tests/utils/e2e_test_validation.py | 71 +- sdk/python/tests/utils/http_server.py | 6 +- setup.py | 7 +- ui/package.json | 2 +- ui/yarn.lock | 16 +- 132 files changed, 5018 insertions(+), 2327 deletions(-) create mode 100644 .gitpod.Dockerfile create mode 100644 docs/reference/feature-servers/offline-feature-server.md create mode 100644 docs/reference/offline-stores/remote-offline-store.md create mode 100644 docs/reference/online-stores/remote.md create mode 100644 examples/remote-offline-store/README.md rename {sdk/python/feast/infra/registry/contrib/postgres => examples/remote-offline-store/offline_client}/__init__.py (100%) create mode 100644 examples/remote-offline-store/offline_client/feature_store.yaml create mode 100644 examples/remote-offline-store/offline_client/test.py rename {sdk/python/tests/integration/e2e => examples/remote-offline-store/offline_server}/__init__.py (100%) rename {sdk/python/tests/integration/scaffolding => examples/remote-offline-store/offline_server/feature_repo}/__init__.py (100%) create mode 100644 examples/remote-offline-store/offline_server/feature_repo/data/driver_stats.parquet create mode 100644 examples/remote-offline-store/offline_server/feature_repo/data/online_store.db create mode 100644 examples/remote-offline-store/offline_server/feature_repo/example_repo.py create mode 100644 examples/remote-offline-store/offline_server/feature_repo/feature_store.yaml create mode 100644 sdk/python/feast/infra/offline_stores/remote.py create mode 100644 sdk/python/feast/infra/online_stores/remote.py delete mode 100644 sdk/python/feast/infra/registry/contrib/postgres/postgres_registry_store.py create mode 100644 sdk/python/feast/offline_server.py rename sdk/python/tests/integration/{e2e => materialization}/test_universal_e2e.py (100%) rename sdk/python/tests/integration/{e2e => offline_store}/test_validation.py (93%) rename sdk/python/tests/integration/{e2e => online_store}/test_python_feature_server.py (100%) create mode 100644 sdk/python/tests/integration/online_store/test_remote_online_store.py delete mode 100644 sdk/python/tests/integration/registration/test_registry.py rename sdk/python/tests/{unit/infra/test_local_registry.py => integration/registration/test_universal_registry.py} (53%) delete mode 100644 sdk/python/tests/unit/infra/registry/test_remote.py create mode 100644 sdk/python/tests/unit/online_store/__init__.py create mode 100644 sdk/python/tests/unit/test_offline_server.py delete mode 100644 sdk/python/tests/unit/test_registry_server.py delete mode 100644 sdk/python/tests/unit/test_sql_registry.py diff --git a/.github/fork_workflows/fork_pr_integration_tests_aws.yml b/.github/fork_workflows/fork_pr_integration_tests_aws.yml index aa89ece1776..6eb8b8feff0 100644 --- a/.github/fork_workflows/fork_pr_integration_tests_aws.yml +++ b/.github/fork_workflows/fork_pr_integration_tests_aws.yml @@ -3,64 +3,6 @@ name: fork-pr-integration-tests-aws on: [pull_request] jobs: - build-docker-image: - if: github.repository == 'your github repo' # swap here with your project id - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - # pull_request_target runs the workflow in the context of the base repo - # as such actions/checkout needs to be explicit configured to retrieve - # code from the PR. - ref: refs/pull/${{ github.event.pull_request.number }}/merge - submodules: recursive - - name: Set up QEMU - uses: docker/setup-qemu-action@v1 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - with: - install: true - - name: Set up AWS SDK - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: us-west-2 - - name: Login to Amazon ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v1 - - name: Set ECR image tag - id: image-tag - run: echo "::set-output name=DOCKER_IMAGE_TAG::`git rev-parse HEAD`" - - name: Cache Public ECR Image - id: lambda_python_3_11 - uses: actions/cache@v2 - with: - path: ~/cache - key: lambda_python_3_11 - - name: Handle Cache Miss (pull public ECR image & save it to tar file) - if: steps.cache-primes.outputs.cache-hit != 'true' - run: | - mkdir -p ~/cache - docker pull public.ecr.aws/lambda/python:3.11 - docker save public.ecr.aws/lambda/python:3.11 -o ~/cache/lambda_python_3_11.tar - - name: Handle Cache Hit (load docker image from tar file) - if: steps.cache-primes.outputs.cache-hit == 'true' - run: | - docker load -i ~/cache/lambda_python_3_11.tar - - name: Build and push - env: - ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - ECR_REPOSITORY: feast-python-server - run: | - docker build \ - --file sdk/python/feast/infra/feature_servers/aws_lambda/Dockerfile \ - --tag $ECR_REGISTRY/$ECR_REPOSITORY:${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }} \ - --load \ - . - docker push $ECR_REGISTRY/$ECR_REPOSITORY:${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }} - outputs: - DOCKER_IMAGE_TAG: ${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }} integration-test-python: if: github.repository == 'your github repo' # swap here with your project id runs-on: ${{ matrix.os }} @@ -138,8 +80,6 @@ jobs: docker run -d -p 6001:6379 -p 6002:6380 -p 6003:6381 -p 6004:6382 -p 6005:6383 -p 6006:6384 --name redis-cluster vishnunair/docker-redis-cluster - name: Test python if: ${{ always() }} # this will guarantee that step won't be canceled and resources won't leak - env: - FEAST_SERVER_DOCKER_IMAGE_TAG: ${{ needs.build-docker-image.outputs.DOCKER_IMAGE_TAG }} run: | pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests --integration --durations=5 --timeout=1200 --timeout_method=thread -k "aws and not Snowflake and not BigQuery and not minio_registry" pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests --integration --durations=5 --timeout=1200 --timeout_method=thread -k "File and not Snowflake and not BigQuery and not minio_registry" diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 596eef2b52c..f04015a9892 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -116,7 +116,7 @@ jobs: needs: get-version strategy: matrix: - component: [feature-server, feature-server-python-aws, feature-server-java, feature-transformation-server] + component: [feature-server, feature-server-java, feature-transformation-server] env: REGISTRY: feastdev steps: diff --git a/.github/workflows/master_only.yml b/.github/workflows/master_only.yml index 1d6850e4d8e..1b401997a7b 100644 --- a/.github/workflows/master_only.yml +++ b/.github/workflows/master_only.yml @@ -6,61 +6,8 @@ on: - master jobs: - build-lambda-docker-image: - if: github.repository == 'feast-dev/feast' - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Set up QEMU - uses: docker/setup-qemu-action@v1 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - with: - install: true - - name: Set up AWS SDK - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: us-west-2 - - name: Login to Amazon ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v1 - - name: Set ECR image tag - id: image-tag - run: echo "::set-output name=DOCKER_IMAGE_TAG::`git rev-parse HEAD`" - - name: Cache Public ECR Image - id: lambda_python_3_11 - uses: actions/cache@v2 - with: - path: ~/cache - key: lambda_python_3_11 - - name: Handle Cache Miss (pull public ECR image & save it to tar file) - if: steps.cache-primes.outputs.cache-hit != 'true' - run: | - mkdir -p ~/cache - docker pull public.ecr.aws/lambda/python:3.11 - docker save public.ecr.aws/lambda/python:3.11 -o ~/cache/lambda_python_3_11.tar - - name: Handle Cache Hit (load docker image from tar file) - if: steps.cache-primes.outputs.cache-hit == 'true' - run: | - docker load -i ~/cache/lambda_python_3_11.tar - - name: Build and push - env: - ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - ECR_REPOSITORY: feast-python-server - run: | - docker build \ - --file sdk/python/feast/infra/feature_servers/aws_lambda/Dockerfile \ - --tag $ECR_REGISTRY/$ECR_REPOSITORY:${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }} \ - --load \ - . - docker push $ECR_REGISTRY/$ECR_REPOSITORY:${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }} - outputs: - DOCKER_IMAGE_TAG: ${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }} integration-test-python: if: github.repository == 'feast-dev/feast' - needs: build-lambda-docker-image runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -125,7 +72,6 @@ jobs: docker run -d -p 6001:6379 -p 6002:6380 -p 6003:6381 -p 6004:6382 -p 6005:6383 -p 6006:6384 --name redis-cluster vishnunair/docker-redis-cluster - name: Test python and go env: - FEAST_SERVER_DOCKER_IMAGE_TAG: ${{ needs.build-lambda-docker-image.outputs.DOCKER_IMAGE_TAG }} SNOWFLAKE_CI_DEPLOYMENT: ${{ secrets.SNOWFLAKE_CI_DEPLOYMENT }} SNOWFLAKE_CI_USER: ${{ secrets.SNOWFLAKE_CI_USER }} SNOWFLAKE_CI_PASSWORD: ${{ secrets.SNOWFLAKE_CI_PASSWORD }} @@ -134,7 +80,6 @@ jobs: run: make test-python-integration - name: Benchmark python env: - FEAST_SERVER_DOCKER_IMAGE_TAG: ${{ needs.build-lambda-docker-image.outputs.DOCKER_IMAGE_TAG }} SNOWFLAKE_CI_DEPLOYMENT: ${{ secrets.SNOWFLAKE_CI_DEPLOYMENT }} SNOWFLAKE_CI_USER: ${{ secrets.SNOWFLAKE_CI_USER }} SNOWFLAKE_CI_PASSWORD: ${{ secrets.SNOWFLAKE_CI_PASSWORD }} diff --git a/.github/workflows/nightly-ci.yml b/.github/workflows/nightly-ci.yml index 8a6ed2d7a73..11c91af2d7b 100644 --- a/.github/workflows/nightly-ci.yml +++ b/.github/workflows/nightly-ci.yml @@ -61,65 +61,9 @@ jobs: run: gcloud info - name: Run DynamoDB / Bigtable cleanup script run: python infra/scripts/cleanup_ci.py - build-docker-image: - if: github.repository == 'feast-dev/feast' - needs: [check_date] - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - ref: master - submodules: recursive - - name: Set up QEMU - uses: docker/setup-qemu-action@v1 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - with: - install: true - - name: Set up AWS SDK - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: us-west-2 - - name: Login to Amazon ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v1 - - name: Set ECR image tag - id: image-tag - run: echo "::set-output name=DOCKER_IMAGE_TAG::`git rev-parse HEAD`" - - name: Cache Public ECR Image - id: lambda_python_3_11 - uses: actions/cache@v4 - with: - path: ~/cache - key: lambda_python_3_11 - - name: Handle Cache Miss (pull public ECR image & save it to tar file) - if: steps.lambda_python_3_11.outputs.cache-hit != 'true' - run: | - mkdir -p ~/cache - docker pull public.ecr.aws/lambda/python:3.11 - docker save public.ecr.aws/lambda/python:3.11 -o ~/cache/lambda_python_3_11.tar - - name: Handle Cache Hit (load docker image from tar file) - if: steps.lambda_python_3_11.outputs.cache-hit == 'true' - run: | - docker load -i ~/cache/lambda_python_3_11.tar - - name: Build and push - env: - ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - ECR_REPOSITORY: feast-python-server - run: | - docker build \ - --file sdk/python/feast/infra/feature_servers/aws_lambda/Dockerfile \ - --tag $ECR_REGISTRY/$ECR_REPOSITORY:${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }} \ - --load \ - . - docker push $ECR_REGISTRY/$ECR_REPOSITORY:${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }} - outputs: - DOCKER_IMAGE_TAG: ${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }} integration-test-python: if: github.repository == 'feast-dev/feast' - needs: [check_date, build-docker-image, cleanup_dynamo_tables] + needs: [check_date, cleanup_dynamo_tables] runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -205,7 +149,6 @@ jobs: - name: Test python if: ${{ always() }} # this will guarantee that step won't be canceled and resources won't leak env: - FEAST_SERVER_DOCKER_IMAGE_TAG: ${{ needs.build-docker-image.outputs.DOCKER_IMAGE_TAG }} SNOWFLAKE_CI_DEPLOYMENT: ${{ secrets.SNOWFLAKE_CI_DEPLOYMENT }} SNOWFLAKE_CI_USER: ${{ secrets.SNOWFLAKE_CI_USER }} SNOWFLAKE_CI_PASSWORD: ${{ secrets.SNOWFLAKE_CI_PASSWORD }} diff --git a/.github/workflows/pr_integration_tests.yml b/.github/workflows/pr_integration_tests.yml index 3081d418fcf..f4a9132d292 100644 --- a/.github/workflows/pr_integration_tests.yml +++ b/.github/workflows/pr_integration_tests.yml @@ -13,75 +13,12 @@ on: # cancel-in-progress: true jobs: - build-docker-image: - # when using pull_request_target, all jobs MUST have this if check for 'ok-to-test' or 'approved' for security purposes. - if: - ((github.event.action == 'labeled' && (github.event.label.name == 'approved' || github.event.label.name == 'lgtm' || github.event.label.name == 'ok-to-test')) || - (github.event.action != 'labeled' && (contains(github.event.pull_request.labels.*.name, 'ok-to-test') || contains(github.event.pull_request.labels.*.name, 'approved') || contains(github.event.pull_request.labels.*.name, 'lgtm')))) && - github.repository == 'feast-dev/feast' - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - # pull_request_target runs the workflow in the context of the base repo - # as such actions/checkout needs to be explicit configured to retrieve - # code from the PR. - ref: refs/pull/${{ github.event.pull_request.number }}/merge - submodules: recursive - - name: Set up QEMU - uses: docker/setup-qemu-action@v1 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - with: - install: true - - name: Set up AWS SDK - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: us-west-2 - - name: Login to Amazon ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v1 - - name: Set ECR image tag - id: image-tag - run: echo "::set-output name=DOCKER_IMAGE_TAG::`git rev-parse HEAD`" - - name: Cache Public ECR Image - id: lambda_python_3_11 - uses: actions/cache@v2 - with: - path: ~/cache - key: lambda_python_3_11 - - name: Handle Cache Miss (pull public ECR image & save it to tar file) - if: steps.cache-primes.outputs.cache-hit != 'true' - run: | - mkdir -p ~/cache - docker pull public.ecr.aws/lambda/python:3.11 - docker save public.ecr.aws/lambda/python:3.11 -o ~/cache/lambda_python_3_11.tar - - name: Handle Cache Hit (load docker image from tar file) - if: steps.cache-primes.outputs.cache-hit == 'true' - run: | - docker load -i ~/cache/lambda_python_3_11.tar - - name: Build and push - env: - ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - ECR_REPOSITORY: feast-python-server - run: | - docker build \ - --file sdk/python/feast/infra/feature_servers/aws_lambda/Dockerfile \ - --tag $ECR_REGISTRY/$ECR_REPOSITORY:${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }} \ - --load \ - . - docker push $ECR_REGISTRY/$ECR_REPOSITORY:${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }} - outputs: - DOCKER_IMAGE_TAG: ${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }} integration-test-python: # when using pull_request_target, all jobs MUST have this if check for 'ok-to-test' or 'approved' for security purposes. if: ((github.event.action == 'labeled' && (github.event.label.name == 'approved' || github.event.label.name == 'lgtm' || github.event.label.name == 'ok-to-test')) || (github.event.action != 'labeled' && (contains(github.event.pull_request.labels.*.name, 'ok-to-test') || contains(github.event.pull_request.labels.*.name, 'approved') || contains(github.event.pull_request.labels.*.name, 'lgtm')))) && github.repository == 'feast-dev/feast' - needs: build-docker-image runs-on: ${{ matrix.os }} strategy: fail-fast: false diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 914e5a233c7..e56296ec4b4 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -49,7 +49,7 @@ jobs: needs: [get-version, publish-python-sdk] strategy: matrix: - component: [feature-server, feature-server-python-aws, feature-server-java, feature-transformation-server, feast-operator] + component: [feature-server, feature-server-java, feature-transformation-server, feast-operator] env: MAVEN_CACHE: gs://feast-templocation-kf-feast/.m2.2020-08-19.tar REGISTRY: feastdev diff --git a/.gitpod.Dockerfile b/.gitpod.Dockerfile new file mode 100644 index 00000000000..51796294700 --- /dev/null +++ b/.gitpod.Dockerfile @@ -0,0 +1,8 @@ +FROM gitpod/workspace-base +RUN sudo apt-get update && sudo apt-get install -y python3-dev python3-setuptools python3-pip python-is-python3 && sudo rm -rf /var/lib/apt/lists/* +RUN curl -LsSf https://astral.sh/uv/install.sh | sh +RUN curl -fsSL https://pixi.sh/install.sh | bash +ENV PATH=$PATH:/home/gitpod/.cargo/bin +RUN curl -s "https://get.sdkman.io" | bash +SHELL ["/bin/bash", "-c"] +RUN source "/home/gitpod/.sdkman/bin/sdkman-init.sh" && sdk install java 14.0.2-zulu \ No newline at end of file diff --git a/.gitpod.yml b/.gitpod.yml index b28dfbe49f5..480baefede4 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,42 +1,20 @@ # https://www.gitpod.io/docs/config-gitpod-file +image: + file: .gitpod.Dockerfile + tasks: - init: | - python -m venv venv - source venv/bin/activate - - pip install pre-commit + uv venv + uv pip install pre-commit pre-commit install --hook-type pre-commit --hook-type pre-push - pip install '.[dev]' - make compile-protos-python - make compile-protos-go - make compile-go-lib - env: - PYTHONUSERBASE: "/workspace/.pip-modules" - command: | - source venv/bin/activate - - git config --global alias.ci 'commit -s' - git config --global alias.sw switch - git config --global alias.st status - git config --global alias.co checkout - git config --global alias.br branch - git config --global alias.df diff -github: - prebuilds: - # enable for the default branch (defaults to true) - master: true - # enable for all branches in this repo (defaults to false) - branches: false - # enable for pull requests coming from this repo (defaults to true) - pullRequests: true - # enable for pull requests coming from forks (defaults to false) - pullRequestsFromForks: false - # add a check to pull requests (defaults to true) - addCheck: true - # add a "Review in Gitpod" button as a comment to pull requests (defaults to false) - addComment: false - # add a "Review in Gitpod" button to the pull request's description (defaults to false) - addBadge: false + source .venv/bin/activate + export PYTHON=3.10 && make install-python-ci-dependencies-uv-venv + # git config --global alias.ci 'commit -s' + # git config --global alias.sw switch + # git config --global alias.st status + # git config --global alias.co checkout + # git config --global alias.br branch + # git config --global alias.df diff vscode: extensions: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f46f2af604f..7ecde0ec5d3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,13 +7,13 @@ repos: name: Format stages: [ push ] language: system - entry: make format + entry: make format-python pass_filenames: false - id: lint name: Lint stages: [ push ] language: system - entry: make lint + entry: make lint-python pass_filenames: false - id: template name: Build Templates diff --git a/CHANGELOG.md b/CHANGELOG.md index 19dc5d86d7c..fc569e5fbba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,60 @@ # Changelog +# [0.38.0](https://github.com/feast-dev/feast/compare/v0.37.0...v0.38.0) (2024-05-24) + + +### Bug Fixes + +* Add vector database doc ([#4165](https://github.com/feast-dev/feast/issues/4165)) ([37f36b6](https://github.com/feast-dev/feast/commit/37f36b681bde0c1ae83303803c89d3ed0b2ac8a9)) +* Change checkout action back to v3 from v5 which isn't released yet ([#4147](https://github.com/feast-dev/feast/issues/4147)) ([9523fff](https://github.com/feast-dev/feast/commit/9523fff2dda2e0d53bffa7f5c0d6f2f69f6b8c02)) +* Change numpy version <1.25 dependency to <2 in setup.py ([#4085](https://github.com/feast-dev/feast/issues/4085)) ([2ba71ff](https://github.com/feast-dev/feast/commit/2ba71fff5f76ed05066e94f3b11d08bc30b54b39)), closes [#4084](https://github.com/feast-dev/feast/issues/4084) +* Changed the code the way mysql container is initialized. ([#4140](https://github.com/feast-dev/feast/issues/4140)) ([8b5698f](https://github.com/feast-dev/feast/commit/8b5698fefa965fc08fdb5e07d739d0ca276a3522)), closes [#4126](https://github.com/feast-dev/feast/issues/4126) +* Correct nightly install command, move all installs to uv ([#4164](https://github.com/feast-dev/feast/issues/4164)) ([c86d594](https://github.com/feast-dev/feast/commit/c86d594613b0fb1425451def4fc1d7a7496eea92)) +* Default value is not set in Redis connection string using environment variable ([#4136](https://github.com/feast-dev/feast/issues/4136)) ([95acfb4](https://github.com/feast-dev/feast/commit/95acfb4cefc10f96f8ed61f148e24b238d400a68)), closes [#3669](https://github.com/feast-dev/feast/issues/3669) +* Get container host addresses from testcontainers (java) ([#4125](https://github.com/feast-dev/feast/issues/4125)) ([9184dde](https://github.com/feast-dev/feast/commit/9184dde1fcd57de5765c850615eb5e70cbafe70f)) +* Get rid of empty string `name_alias` during feature view projection deserialization ([#4116](https://github.com/feast-dev/feast/issues/4116)) ([65056ce](https://github.com/feast-dev/feast/commit/65056cea6c4537834a1c40be2ad37e1659310a47)) +* Helm chart `feast-feature-server`, improve Service template name ([#4161](https://github.com/feast-dev/feast/issues/4161)) ([dedc164](https://github.com/feast-dev/feast/commit/dedc1645ef1f38aa9b50a0cf55e4bc23ec60d5ad)) +* Improve the code related to on-demand-featureview. ([#4203](https://github.com/feast-dev/feast/issues/4203)) ([d91d7e0](https://github.com/feast-dev/feast/commit/d91d7e0da69d15c7aa14e736b608ed9f5ece3504)) +* Integration tests for async sdk method ([#4201](https://github.com/feast-dev/feast/issues/4201)) ([08c44ae](https://github.com/feast-dev/feast/commit/08c44ae35a4a91228f9f78c7323b4b7a73ef33aa)) +* Make sure schema is used when calling `get_table_query_string` method for Snowflake datasource ([#4131](https://github.com/feast-dev/feast/issues/4131)) ([c1579c7](https://github.com/feast-dev/feast/commit/c1579c77324cebb0514422235956812403316c80)) +* Make sure schema is used when generating `from_expression` for Snowflake ([#4177](https://github.com/feast-dev/feast/issues/4177)) ([5051da7](https://github.com/feast-dev/feast/commit/5051da75de81deed19b25fbc2826d504a8ebdc8b)) +* Pass native input values to `get_online_features` from feature server ([#4117](https://github.com/feast-dev/feast/issues/4117)) ([60756cb](https://github.com/feast-dev/feast/commit/60756cb4637a7961b6caffef3242e2886e77f78a)) +* Pass region to S3 client only if set (Java) ([#4151](https://github.com/feast-dev/feast/issues/4151)) ([b8087f7](https://github.com/feast-dev/feast/commit/b8087f7a181977e0e4d3bd29c857d8e137af1de2)) +* Pgvector patch ([#4108](https://github.com/feast-dev/feast/issues/4108)) ([ad45bb4](https://github.com/feast-dev/feast/commit/ad45bb4ac2dd83b530adda6196f85d46decaf98e)) +* Update doc ([#4153](https://github.com/feast-dev/feast/issues/4153)) ([e873636](https://github.com/feast-dev/feast/commit/e873636b4a5f3a05666f9284c31e488f27257ed0)) +* Update master-only benchmark bucket name due to credential update ([#4183](https://github.com/feast-dev/feast/issues/4183)) ([e88f1e3](https://github.com/feast-dev/feast/commit/e88f1e39778300fb443f1db230fe9589b74d9ed6)) +* Updating the instructions for quickstart guide. ([#4120](https://github.com/feast-dev/feast/issues/4120)) ([0c30e96](https://github.com/feast-dev/feast/commit/0c30e96da144babe725a3f168c05d2fbeca65507)) +* Upgrading the test container so that local tests works with updated d… ([#4155](https://github.com/feast-dev/feast/issues/4155)) ([93ddb11](https://github.com/feast-dev/feast/commit/93ddb11bf5a182cea44435147e39f40b30a69db7)) + + +### Features + +* Add a Kubernetes Operator for the Feast Feature Server ([#4145](https://github.com/feast-dev/feast/issues/4145)) ([4a696dc](https://github.com/feast-dev/feast/commit/4a696dc4b0fd96d51872a5e629ab5f3ca785d708)) +* Add delta format to `FileSource`, add support for it in ibis/duckdb ([#4123](https://github.com/feast-dev/feast/issues/4123)) ([2b6f1d0](https://github.com/feast-dev/feast/commit/2b6f1d0945e8dbf13d01e045f87c5e58546b4af6)) +* Add materialization support to ibis/duckdb ([#4173](https://github.com/feast-dev/feast/issues/4173)) ([369ca98](https://github.com/feast-dev/feast/commit/369ca98d88a5cb3c67b2363232b7c2eddfc4f333)) +* Add optional private key params to Snowflake config ([#4205](https://github.com/feast-dev/feast/issues/4205)) ([20f5419](https://github.com/feast-dev/feast/commit/20f5419d30c32b533e91043a9690007a84000512)) +* Add s3 remote storage export for duckdb ([#4195](https://github.com/feast-dev/feast/issues/4195)) ([6a04c48](https://github.com/feast-dev/feast/commit/6a04c48b4b84fb9905df638e5c4041c12532b053)) +* Adding DatastoreOnlineStore 'database' argument. ([#4180](https://github.com/feast-dev/feast/issues/4180)) ([e739745](https://github.com/feast-dev/feast/commit/e739745482fed1b9c2d7b788ebb088041118c642)) +* Adding get_online_features_async to feature store sdk ([#4172](https://github.com/feast-dev/feast/issues/4172)) ([311efc5](https://github.com/feast-dev/feast/commit/311efc5005b24d1fc9bc389ee7579e102e2cd4ea)) +* Adding support for dictionary writes to online store ([#4156](https://github.com/feast-dev/feast/issues/4156)) ([abfac01](https://github.com/feast-dev/feast/commit/abfac011ad1f94caef001539591d03b1552f65e5)) +* Elasticsearch vector database ([#4188](https://github.com/feast-dev/feast/issues/4188)) ([bf99640](https://github.com/feast-dev/feast/commit/bf99640c0bcfd9ee7c1e66d24cb791bfa0e5ac4a)) +* Enable other distance metrics for Vector DB and Update docs ([#4170](https://github.com/feast-dev/feast/issues/4170)) ([ba9f4ef](https://github.com/feast-dev/feast/commit/ba9f4efd5eccd0548a39521a145c6573ac90c221)) +* Feast/IKV datetime edgecase errors ([#4211](https://github.com/feast-dev/feast/issues/4211)) ([bdae562](https://github.com/feast-dev/feast/commit/bdae562ea4582d8e47763736b639c70e56d79b2d)) +* Feast/IKV documenation language changes ([#4149](https://github.com/feast-dev/feast/issues/4149)) ([690a621](https://github.com/feast-dev/feast/commit/690a6212e9f2b14fc4bf65513e5d30e70e229d0a)) +* Feast/IKV online store contrib plugin integration ([#4068](https://github.com/feast-dev/feast/issues/4068)) ([f2b4eb9](https://github.com/feast-dev/feast/commit/f2b4eb94add8f86afa4e168236e8fcd11968510e)) +* Feast/IKV online store documentation ([#4146](https://github.com/feast-dev/feast/issues/4146)) ([73601e4](https://github.com/feast-dev/feast/commit/73601e45e2fc57dc889644b1d28115b3c94bd8ea)) +* Feast/IKV upgrade client version ([#4200](https://github.com/feast-dev/feast/issues/4200)) ([0e42150](https://github.com/feast-dev/feast/commit/0e4215060f97b7629015ab65ac526dfef0a1f7d4)) +* Incorporate substrait ODFVs into ibis-based offline store queries ([#4102](https://github.com/feast-dev/feast/issues/4102)) ([c3a102f](https://github.com/feast-dev/feast/commit/c3a102f1b1941c8681ec876b54d7d16a32862925)) +* Isolate input-dependent calculations in `get_online_features` ([#4041](https://github.com/feast-dev/feast/issues/4041)) ([2a6edea](https://github.com/feast-dev/feast/commit/2a6edeae42a2ebba7d9fc69af917bdc41ae6ecb0)) +* Make arrow primary interchange for online ODFV execution ([#4143](https://github.com/feast-dev/feast/issues/4143)) ([3fdb716](https://github.com/feast-dev/feast/commit/3fdb71631fbb1b9cfb8d1cad69dbc2d2d50cea0d)) +* Move data source validation entrypoint to offline store ([#4197](https://github.com/feast-dev/feast/issues/4197)) ([a17725d](https://github.com/feast-dev/feast/commit/a17725daec9e7355591e7ff2bc57202d5fa3f0c1)) +* Upgrading python version to 3.11, adding support for 3.11 as well. ([#4159](https://github.com/feast-dev/feast/issues/4159)) ([4b1634f](https://github.com/feast-dev/feast/commit/4b1634f4da7ba47a29dfd4a0d573dfe515a8863d)), closes [#4152](https://github.com/feast-dev/feast/issues/4152) [#4114](https://github.com/feast-dev/feast/issues/4114) + + +### Reverts + +* Reverts "fix: Using version args to install the correct feast version" ([#4112](https://github.com/feast-dev/feast/issues/4112)) ([b66baa4](https://github.com/feast-dev/feast/commit/b66baa46f48c72f4704bfe3980a8df49e1a06507)), closes [#3953](https://github.com/feast-dev/feast/issues/3953) + ## [0.37.1](https://github.com/feast-dev/feast/compare/v0.37.0...v0.37.1) (2024-04-17) diff --git a/Makefile b/Makefile index aed58ed465b..b44aaf0ee5a 100644 --- a/Makefile +++ b/Makefile @@ -46,6 +46,11 @@ install-python-ci-dependencies-uv: uv pip install --system --no-deps -e . python setup.py build_python_protos --inplace +install-python-ci-dependencies-uv-venv: + uv pip sync sdk/python/requirements/py$(PYTHON)-ci-requirements.txt + uv pip install --no-deps -e . + python setup.py build_python_protos --inplace + lock-python-ci-dependencies: uv pip compile --system --no-strip-extras setup.py --extra ci --output-file sdk/python/requirements/py$(PYTHON)-ci-requirements.txt @@ -80,19 +85,16 @@ test-python-unit: python -m pytest -n 8 --color=yes sdk/python/tests test-python-integration: - python -m pytest -n 8 --integration -k "(not snowflake or not test_historical_features_main) and not minio_registry" --color=yes --durations=5 --timeout=1200 --timeout_method=thread sdk/python/tests + python -m pytest -n 8 --integration --color=yes --durations=10 --timeout=1200 --timeout_method=thread \ + -k "(not snowflake or not test_historical_features_main)" \ + sdk/python/tests test-python-integration-local: - @(docker info > /dev/null 2>&1 && \ - FEAST_IS_LOCAL_TEST=True \ - FEAST_LOCAL_ONLINE_CONTAINER=True \ - python -m pytest -n 8 --color=yes --integration \ - -k "not gcs_registry and \ - not s3_registry and \ - not test_lambda_materialization and \ - not test_snowflake_materialization" \ - sdk/python/tests \ - ) || echo "This script uses Docker, and it isn't running - please start the Docker Daemon and try again!"; + FEAST_IS_LOCAL_TEST=True \ + FEAST_LOCAL_ONLINE_CONTAINER=True \ + python -m pytest -n 8 --color=yes --integration --durations=5 --dist loadgroup \ + -k "not test_lambda_materialization and not test_snowflake_materialization" \ + sdk/python/tests test-python-integration-container: @(docker info > /dev/null 2>&1 && \ diff --git a/OWNERS b/OWNERS index 52c5e436d30..1072fc2187b 100644 --- a/OWNERS +++ b/OWNERS @@ -21,6 +21,7 @@ approvers: - haoxuai - jeremyary - shuchu + - tokoko reviewers: - woop @@ -43,4 +44,4 @@ reviewers: - haoxuai - jeremyary - shuchu - \ No newline at end of file + - tokoko diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 2e205dee0a1..06c5edcc8b0 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -85,6 +85,7 @@ * [PostgreSQL (contrib)](reference/offline-stores/postgres.md) * [Trino (contrib)](reference/offline-stores/trino.md) * [Azure Synapse + Azure SQL (contrib)](reference/offline-stores/mssql.md) + * [Remote Offline](reference/offline-stores/remote-offline-store.md) * [Online stores](reference/online-stores/README.md) * [Overview](reference/online-stores/overview.md) * [SQLite](reference/online-stores/sqlite.md) @@ -95,6 +96,7 @@ * [Datastore](reference/online-stores/datastore.md) * [DynamoDB](reference/online-stores/dynamodb.md) * [Bigtable](reference/online-stores/bigtable.md) + * [Remote](reference/online-stores/remote.md) * [PostgreSQL (contrib)](reference/online-stores/postgres.md) * [Cassandra + Astra DB (contrib)](reference/online-stores/cassandra.md) * [MySQL (contrib)](reference/online-stores/mysql.md) @@ -117,6 +119,8 @@ * [Python feature server](reference/feature-servers/python-feature-server.md) * [\[Alpha\] Go feature server](reference/feature-servers/go-feature-server.md) * [\[Alpha\] AWS Lambda feature server](reference/feature-servers/alpha-aws-lambda-feature-server.md) + * [Offline Feature Server](reference/feature-servers/offline-feature-server) + * [\[Beta\] Web UI](reference/alpha-web-ui.md) * [\[Alpha\] On demand feature view](reference/alpha-on-demand-feature-view.md) * [\[Alpha\] Data quality monitoring](reference/dqm.md) diff --git a/docs/project/release-process.md b/docs/project/release-process.md index d3ff34bbc38..e6f75ffd413 100644 --- a/docs/project/release-process.md +++ b/docs/project/release-process.md @@ -4,6 +4,12 @@ For Feast maintainers, these are the concrete steps for making a new release. +### 0. Cutting a minor release +You only need to hit the `release` workflow using [the GitHub action](https://github.com/feast-dev/feast/blob/master/.github/workflows/release.yml). +First test with a `dry-run` then run it live. This is all you need to do. All deployments to dockerhub, PyPI, and npm are handled by the workflows. + +Also note that as a part of the workflow, the [infra/scripts/release/bump_file_versions.py](https://github.com/feast-dev/feast/blob/master/infra/scripts/release/bump_file_versions.py) file will increment the Feast versions in the appropriate files. + ### 1. (for patch releases) Cherry-pick changes into the branch from master If you were cutting Feast 0.22.3, for example, you might do: 1. `git checkout v0.22-branch` (or `git pull upstream v0.22-branch --rebase` if you've cut a release before) @@ -16,6 +22,8 @@ If you were cutting Feast 0.22.3, for example, you might do: After this step, you will have all the changes you need in the branch. +Note, for patches you *do not need to run the `bump_file_versions.py` script.* + ### 2. Pre-release verification (currently broken) A lot of things can go wrong. One of the most common is getting the wheels to build correctly (and not accidentally building dev wheels from improper tagging or local code changes during the release process). diff --git a/docs/reference/alpha-vector-database.md b/docs/reference/alpha-vector-database.md index 37d9b9cdf87..b9ce7f408a0 100644 --- a/docs/reference/alpha-vector-database.md +++ b/docs/reference/alpha-vector-database.md @@ -13,7 +13,9 @@ Below are supported vector databases and implemented features: | Elasticsearch | [x] | [x] | | Milvus | [ ] | [ ] | | Faiss | [ ] | [ ] | +| SQLite | [x] | [ ] | +Note: SQLite is in limited access and only working on Python 3.10. It will be updated as [sqlite_vec](https://github.com/asg017/sqlite-vec/) progresses. ## Example @@ -108,4 +110,20 @@ def print_online_features(features): print(key, " : ", value) print_online_features(features) +``` + +### Configuration +We offer two Online Store options for Vector Databases. PGVector and SQLite. + +#### Installation with SQLite +If you are using `pyenv` to manage your Python versions, you can install the SQLite extension with the following command: +```bash +PYTHON_CONFIGURE_OPTS="--enable-loadable-sqlite-extensions" \ + LDFLAGS="-L/opt/homebrew/opt/sqlite/lib" \ + CPPFLAGS="-I/opt/homebrew/opt/sqlite/include" \ + pyenv install 3.10.14 +``` +And you can the Feast install package via: +```bash +pip install feast[sqlite_vec] ``` \ No newline at end of file diff --git a/docs/reference/feature-servers/README.md b/docs/reference/feature-servers/README.md index f9a40104c3a..d5a4312f73a 100644 --- a/docs/reference/feature-servers/README.md +++ b/docs/reference/feature-servers/README.md @@ -12,4 +12,8 @@ Feast users can choose to retrieve features from a feature server, as opposed to {% content-ref url="alpha-aws-lambda-feature-server.md" %} [alpha-aws-lambda-feature-server.md](alpha-aws-lambda-feature-server.md) +{% endcontent-ref %} + +{% content-ref url="offline-feature-server.md" %} +[offline-feature-server.md](offline-feature-server.md) {% endcontent-ref %} \ No newline at end of file diff --git a/docs/reference/feature-servers/offline-feature-server.md b/docs/reference/feature-servers/offline-feature-server.md new file mode 100644 index 00000000000..6c2fdf7a259 --- /dev/null +++ b/docs/reference/feature-servers/offline-feature-server.md @@ -0,0 +1,35 @@ +# Offline feature server + +## Description + +The Offline feature server is an Apache Arrow Flight Server that uses the gRPC communication protocol to exchange data. +This server wraps calls to existing offline store implementations and exposes interfaces as Arrow Flight endpoints. + +## How to configure the server + +## CLI + +There is a CLI command that starts the Offline feature server: `feast serve_offline`. By default, remote offline server uses port 8815, the port can be overridden with a `--port` flag. + +## Deploying as a service on Kubernetes + +The Offline feature server can be deployed using helm chart see this [helm chart](https://github.com/feast-dev/feast/blob/master/infra/charts/feast-feature-server). + +User need to set `feast_mode=offline`, when installing Offline feature server as shown in the helm command below: + +``` +helm install feast-offline-server feast-charts/feast-feature-server --set feast_mode=offline --set feature_store_yaml_base64=$(base64 > feature_store.yaml) +``` + +## Server Example + +The complete example can be find under [remote-offline-store-example](../../../examples/remote-offline-store) + +## How to configure the client + +Please see the detail how to configure offline store client [remote-offline-store.md](../offline-stores/remote-offline-store.md) + +## Functionality Matrix + +The set of functionalities supported by remote offline stores is the same as those supported by offline stores with the SDK, which are described in detail [here](../offline-stores/overview.md#functionality). + diff --git a/docs/reference/offline-stores/remote-offline-store.md b/docs/reference/offline-stores/remote-offline-store.md new file mode 100644 index 00000000000..0179e0f06f8 --- /dev/null +++ b/docs/reference/offline-stores/remote-offline-store.md @@ -0,0 +1,28 @@ +# Remote Offline Store + +## Description + +The Remote Offline Store is an Arrow Flight client for the offline store that implements the `RemoteOfflineStore` class using the existing `OfflineStore` interface. +The client implements various methods, including `get_historical_features`, `pull_latest_from_table_or_query`, `write_logged_features`, and `offline_write_batch`. + +## How to configure the client + +User needs to create client side `feature_store.yaml` file and set the `offline_store` type `remote` and provide the server connection configuration +including adding the host and specifying the port (default is 8815) required by the Arrow Flight client to connect with the Arrow Flight server. + +{% code title="feature_store.yaml" %} +```yaml +offline_store: + type: remote + host: localhost + port: 8815 +``` +{% endcode %} + +## Client Example + +The complete example can be find under [remote-offline-store-example](../../../examples/remote-offline-store) + +## How to configure the server + +Please see the detail how to configure offline feature server [offline-feature-server.md](../feature-servers/offline-feature-server.md) \ No newline at end of file diff --git a/docs/reference/offline-stores/snowflake.md b/docs/reference/offline-stores/snowflake.md index 9f2dafee671..39bbe3f8a03 100644 --- a/docs/reference/offline-stores/snowflake.md +++ b/docs/reference/offline-stores/snowflake.md @@ -34,6 +34,21 @@ offline_store: The full set of configuration options is available in [SnowflakeOfflineStoreConfig](https://rtd.feast.dev/en/latest/#feast.infra.offline_stores.snowflake.SnowflakeOfflineStoreConfig). + +## Limitation +Please be aware that here is a restriction/limitation for using SQL query string in Feast with Snowflake. Try to avoid the usage of single quote in SQL query string. For example, the following query string will fail: +``` +SELECT + some_column +FROM + some_table +WHERE + other_column = 'value' +``` +That 'value' will fail in Snowflake. Instead, please use pairs of dollar signs like `$$value$$` as [mentioned in Snowflake document](https://docs.snowflake.com/en/sql-reference/data-types-text#label-dollar-quoted-string-constants). + + + ## Functionality Matrix The set of functionality supported by offline stores is described in detail [here](overview.md#functionality). diff --git a/docs/reference/online-stores/README.md b/docs/reference/online-stores/README.md index 686e820f4e7..b5f4eb8de89 100644 --- a/docs/reference/online-stores/README.md +++ b/docs/reference/online-stores/README.md @@ -61,3 +61,7 @@ Please see [Online Store](../../getting-started/architecture-and-components/onli {% content-ref url="scylladb.md" %} [scylladb.md](scylladb.md) {% endcontent-ref %} + +{% content-ref url="remote.md" %} +[remote.md](remote.md) +{% endcontent-ref %} diff --git a/docs/reference/online-stores/remote.md b/docs/reference/online-stores/remote.md new file mode 100644 index 00000000000..c560fa6f223 --- /dev/null +++ b/docs/reference/online-stores/remote.md @@ -0,0 +1,21 @@ +# Remote online store + +## Description + +This remote online store will let you interact with remote feature server. At this moment this only supports the read operation. You can use this online store and able retrieve online features `store.get_online_features` from remote feature server. + +## Examples + +The registry is pointing to registry of remote feature store. If it is not accessible then should be configured to use remote registry. + +{% code title="feature_store.yaml" %} +```yaml +project: my-local-project + registry: /remote/data/registry.db + provider: local + online_store: + path: http://localhost:6566 + type: remote + entity_key_serialization_version: 2 +``` +{% endcode %} \ No newline at end of file diff --git a/examples/remote-offline-store/README.md b/examples/remote-offline-store/README.md new file mode 100644 index 00000000000..c07d7f30419 --- /dev/null +++ b/examples/remote-offline-store/README.md @@ -0,0 +1,98 @@ +# Feast Remote Offline Store Server + +This example demonstrates the steps using an [Arrow Flight](https://arrow.apache.org/blog/2019/10/13/introducing-arrow-flight/) server/client as the remote Feast offline store. + +## Launch the offline server locally + +1. **Create Feast Project**: Using the `feast init` command for example the [offline_server](./offline_server) folder contains a sample Feast repository. + +2. **Start Remote Offline Server**: Use the `feast server_offline` command to start remote offline requests. This command will: + - Spin up an `Arrow Flight` server at the default port 8815. + +3. **Initialize Offline Server**: The offline server can be initialized by providing the `feature_store.yml` file via an environment variable named `FEATURE_STORE_YAML_BASE64`. A temporary directory will be created with the provided YAML file named `feature_store.yml`. + +Example + +```console +cd offline_server +feast -c feature_repo apply +``` + +```console +feast -c feature_repo serve_offline +``` + +Sample output: +```console +Serving on grpc+tcp://127.0.0.1:8815 +``` + +## Launch a remote offline client + +The [offline_client](./offline_client) folder includes a test python function that uses an offline store of type `remote`, leveraging the remote server as the +actual data provider. + + +The test class is located under [offline_client](./offline_client/) and uses a remote configuration of the offline store to delegate the actual +implementation to the offline store server: +```yaml +offline_store: + type: remote + host: localhost + port: 8815 +``` + +The test code in [test.py](./offline_client/test.py) initializes the store from the local configuration and then fetches the historical features +from the store like any other Feast client, but the actual implementation is delegated to the offline server +```py +store = FeatureStore(repo_path=".") +training_df = store.get_historical_features(entity_df, features).to_df() +``` + + +Run client +`cd offline_client; + python test.py` + +Sample output: + +```console +config.offline_store is +----- Feature schema ----- + + +RangeIndex: 3 entries, 0 to 2 +Data columns (total 10 columns): + # Column Non-Null Count Dtype +--- ------ -------------- ----- + 0 driver_id 3 non-null int64 + 1 event_timestamp 3 non-null datetime64[ns, UTC] + 2 label_driver_reported_satisfaction 3 non-null int64 + 3 val_to_add 3 non-null int64 + 4 val_to_add_2 3 non-null int64 + 5 conv_rate 3 non-null float32 + 6 acc_rate 3 non-null float32 + 7 avg_daily_trips 3 non-null int32 + 8 conv_rate_plus_val1 3 non-null float64 + 9 conv_rate_plus_val2 3 non-null float64 +dtypes: datetime64[ns, UTC](1), float32(2), float64(2), int32(1), int64(4) +memory usage: 332.0 bytes +None + +----- Features ----- + + driver_id event_timestamp label_driver_reported_satisfaction ... avg_daily_trips conv_rate_plus_val1 conv_rate_plus_val2 +0 1001 2021-04-12 10:59:42+00:00 1 ... 590 1.022378 10.022378 +1 1002 2021-04-12 08:12:10+00:00 5 ... 974 2.762213 20.762213 +2 1003 2021-04-12 16:40:26+00:00 3 ... 127 3.419828 30.419828 + +[3 rows x 10 columns] +------training_df---- + driver_id event_timestamp label_driver_reported_satisfaction ... avg_daily_trips conv_rate_plus_val1 conv_rate_plus_val2 +0 1001 2021-04-12 10:59:42+00:00 1 ... 590 1.022378 10.022378 +1 1002 2021-04-12 08:12:10+00:00 5 ... 974 2.762213 20.762213 +2 1003 2021-04-12 16:40:26+00:00 3 ... 127 3.419828 30.419828 + +[3 rows x 10 columns] +``` + diff --git a/sdk/python/feast/infra/registry/contrib/postgres/__init__.py b/examples/remote-offline-store/offline_client/__init__.py similarity index 100% rename from sdk/python/feast/infra/registry/contrib/postgres/__init__.py rename to examples/remote-offline-store/offline_client/__init__.py diff --git a/examples/remote-offline-store/offline_client/feature_store.yaml b/examples/remote-offline-store/offline_client/feature_store.yaml new file mode 100644 index 00000000000..24ee5d70426 --- /dev/null +++ b/examples/remote-offline-store/offline_client/feature_store.yaml @@ -0,0 +1,10 @@ +project: offline_server +# By default, the registry is a file (but can be turned into a more scalable SQL-backed registry) +registry: ../offline_server/feature_repo/data/registry.db +# The provider primarily specifies default offline / online stores & storing the registry in a given cloud +provider: local +offline_store: + type: remote + host: localhost + port: 8815 +entity_key_serialization_version: 2 diff --git a/examples/remote-offline-store/offline_client/test.py b/examples/remote-offline-store/offline_client/test.py new file mode 100644 index 00000000000..172ee73bf09 --- /dev/null +++ b/examples/remote-offline-store/offline_client/test.py @@ -0,0 +1,40 @@ +from datetime import datetime +from feast import FeatureStore +import pandas as pd + +entity_df = pd.DataFrame.from_dict( + { + "driver_id": [1001, 1002, 1003], + "event_timestamp": [ + datetime(2021, 4, 12, 10, 59, 42), + datetime(2021, 4, 12, 8, 12, 10), + datetime(2021, 4, 12, 16, 40, 26), + ], + "label_driver_reported_satisfaction": [1, 5, 3], + "val_to_add": [1, 2, 3], + "val_to_add_2": [10, 20, 30], + } +) + +features = [ + "driver_hourly_stats:conv_rate", + "driver_hourly_stats:acc_rate", + "driver_hourly_stats:avg_daily_trips", + "transformed_conv_rate:conv_rate_plus_val1", + "transformed_conv_rate:conv_rate_plus_val2", +] + +store = FeatureStore(repo_path=".") + +training_df = store.get_historical_features(entity_df, features).to_df() + +print("----- Feature schema -----\n") +print(training_df.info()) + +print() +print("----- Features -----\n") +print(training_df.head()) + +print("------training_df----") + +print(training_df) diff --git a/sdk/python/tests/integration/e2e/__init__.py b/examples/remote-offline-store/offline_server/__init__.py similarity index 100% rename from sdk/python/tests/integration/e2e/__init__.py rename to examples/remote-offline-store/offline_server/__init__.py diff --git a/sdk/python/tests/integration/scaffolding/__init__.py b/examples/remote-offline-store/offline_server/feature_repo/__init__.py similarity index 100% rename from sdk/python/tests/integration/scaffolding/__init__.py rename to examples/remote-offline-store/offline_server/feature_repo/__init__.py diff --git a/examples/remote-offline-store/offline_server/feature_repo/data/driver_stats.parquet b/examples/remote-offline-store/offline_server/feature_repo/data/driver_stats.parquet new file mode 100644 index 0000000000000000000000000000000000000000..19279202d84a63a921306eb5fcd8b88603fb54b6 GIT binary patch literal 35102 zcmb5#c|29o|0w)J=AkqYG8__;B*}R8=X1=nq|DL@nVKX?nhs?iGGvwx6;i1X(sal? zHJ3&zQ>my_Xu9ir@9(~z`#kqPuh;YV!~3lL+Iu^DpR?Ectk3#zGI3rl$tRhqW(yKBL5eM&W?q?I(iRlW*h;-8?|9B#I_Df|FkyWGoC6!3qzCk~O$Wb|V=me4Z zs`gGUaWH#WxRA(plyWL2(gm$@N{HOs`(BgUAfzs`d~E6~Ao1 zOJw&e)jS~5Gn{806S+1=@fnfDTO0e5NV`7NJwW8_kzDYO$keoSA0ZA7MHIg$vQL%| zeI(Ld`!yzs+_}QLzY$sOhV?&)Gm8~W+4?O*-OB`Gyw8ntQ{%Ek@m`Fbxd>e?|O$Gd> zM3z*yoduD0_g98BkrS`kZ%1S@Hz}+m4o;`~I}_PgZdJMv=^+!7?nJIKUEiC?df{|v zGm&;8`i?J=V_z*CKxFd2TNg|myty!EJCPk>{&E+QzG|=X9wK-2Y)}M|#d%V-pGfl% z{C<$gnQvgsCNjH%qK^^>WAfYMh-~97@gyRBYIZ{^k$Yi9K{}DO)q60DNK;5v%_VZ4 z+}vJ3WFG%qeUdn6r8vVOvT06uhDg80jx8s0cVFqQB(gN#&aWcU-YsyyLgW;h71t7( z8^ed{iGyNiHLerct&c-*5$T5o>YIpMJ^eo|MAk&0SqG6;k(by_?HD3nKT|4XptptM*g)TOw_T!u4SyXW2TQ_eAFFBUT@YgE^N| zCy4BIZytUn(uEet{vdL>rmPtvYyY0I-$a_unUDX7oDYw5=5&+ePxJ2=AP#!z-4-UY z7X|W*6Y0IVb`nHxY)8gIA`51oN)c&4mn+B;IhQ>B<%!IoWHIa7X(mGutCwwqlpU7M}|D_ReaM(l{i0tC9AQK|p?R1qn zkvs3vlogTH!Dnnoq(!fdb|i8P1KQUUnO}3nU5JB~?Hk;QY`^JzFCtxT`JfMx`^ZDp zm&i&@+P;-Yv%Fp%MCAM(pWa4f*3028;@~ca*f1hn{c!hQBK=L({QX33{(##-BFkB# z_z00EYBCf<*_XKDeFe&oK96p^dX8!ba*P3pAE{pbAW{KV;`&DnBd!%`wswl!as zIQZcEz;YrxSygo8!n=bt7q%D!#BtGOoWJQ6-dLnJjuF#D{&iJW1HzM=Qz2Bb1L2n*2A0k^) zCvgjr-sgMQpU92Nk_;lUu*GZJf6hNWRkD+`IhU7>?ItpVUA6WS2bJQ(BZ=(i^^FIJ z^sF)35hB-4#wv!$;dy8M)14Uzj; zrLvC5N^_aK_Mh`x#p&H7ZO*^CgIprBVYK-UaWGU`xShyeZnLh7NPoLOyO+o^YoJARqBAB$a*z$ zOo&L!S==p329K3z*q$H6Y`TU(Uk-l0~LypLOZyc&fWR-5OQzp{9 zPX1m-LUEAZeG?8whJb8@B{o|~cKxEY&J(x_S?W%1~BXZP+g))iELCJO5#KAnv z>^vfS{k|84M7l_Y(kUXhxj(R!hb?wMxautV{gZUn?;31QTw+yKbZyVr zgL7W(RVT0Sn|#|@XmzRh=7H(2?_acEI(7R9ZH|bX^<_@eQK5wz-W``q?!-$hwJox~ zQremO zo4Lp4uZgplSrd5g%=(%*hb8*E23s=L#5>9x@0%RDUlYGh5s%RL7w9H9DVfDfEOV|+ zSijWj$Z@(wrf#D1GP_*utq*DwH>j;U#f)9BHfiJX4dqT}ov$XjtZ=);YRz1m?7Gsc zK6LoO)nvC-n{TuE7wVeDtE@0%EG&C);aLml`zIsCBx_+~uvk0!9lAf07$RB*oP#)kAQX7Tfs zEKe9@_*x_{ULEkLA;Zt=xS~niBEw97n=G~UWgD+$Zneu@wWaNZVOD@cq5kfXN7u3f z*PX&>0ZF41LF-RjWvIF|o(SGhzOKYF+bBE4<-FUKfX9v5+uSZ~ZiO?MRT=$ zZn1@z28iYB1oUxiP6vn=>g{~ZOFiBDgUU1enM{7&hnc#)xj>QlyvMglLIp;FR65F#OlILBke43J$ZIim_S-0~| z*V%20FZtX$%p;(lfWBgrWYrzAS3{muZRsxCv7~s!&HYCGkw zg^l!;*p=^;zaH`VbyKq1E`^&>KR&+| zsP9&8O_EyW+f%W7X?xlc1&k3}9;VWnsk*Mz{#@9y?i{Vn%_+H;fVdokEn(8kCnBlo%}qDCBQRHh+bgPwMfI|f_GDul{QGRd;+mDStv@<#MRt3ElSy&qpTWm(--fJ483^DMHyM>mZXlQ%tK zeP7u!RgQ{TWb;79Hd}L3^9h@Ws*Wcahj=AzAE`T^wLfW^ZTncm^`iI97)iS)nx0p; zziQ65d#dGg^WYy|Df@nH-_{g)vmEt2H^0Y-+$&vI}I{p_{9~>^PJw8#)-?r zG3oVh&66Zo`n2S&e`lE{PaWoyaUQbHT;@=0p6@(to3nD$tz$A9M(hjL?s(mjzhTs| zm^$>APj=&&Q>jI&f6tm|=^T?|A~HtD z)H6ygQe+LI_?Bm$u{ojP(RgfmR+&SQ(f(OJjT05?&)DW0#cE_%x?J$Q7RA3J=YmIV z$g{@S6}eTLZbZ$@@@wW@-qMmJV;rZMU%j=b!0>>;%7WV9N0lDe<5m{d?ReR^|CfMP zQN!+`u6*Not&@#=KR&;9KyX#@jr~&}o?VY$b?Vlk-?KBn1hqNbXg*O{MuK)pbFA1B zqk}@LOIs2bt?;~&u=;dcs+@7uZy}vC9T_Ti1&lu!5~BXLc6 z-$}-S+26vt757TalVzzS-E$Al+7}uf5?NdM=-kHho;Q=$o_})D`+C$L5xoojSNyvR zsN@`}l{D2ZM>$D8nYAyl2>E&c zUy{RgB=!DpkqtTEzyC-J{m-5L6WRRlTggr`tpDsw!kRe`_-POLc^>fdKHw*>`S^HM ztDOI@D;D+-3y#Tf4ijuW}q|NYeeFMsg=c(G%ll3zpZB@5*K z`}NiuOs$dIHr@-|2oI!gxq>6V9hE1@M8(Pgq;@R8_<4D-v_TJae>vk*?PxgqPzein z?m+$r8}Pv5aO^B)!vRGh;N-8wo0(0}|0V@?Z+-w__QrVXrY9O}+yQ;=Dd@F1LtPp2 z#VH?s%&qjm=`uqM4NjrrH*mqTY64WnccA8|4xT<73}%;(;INM`>|^)90}4v z+ADDLT@9SroJbkY`s2!&ILPlFg2q}QOjPXvMU!jrd29*zhVz4E))ut+EkYfTaYg-6 zAq+kp2j7~XQ>-2ZbbRE3XVe{WWz_;Kwm(dLxR(jgy$d95qTqR>D>i*xjOxmc7`dp0 zQYl}B6HXtX{`eOd)k%k_RnkTMIQ@o zpFXG56L+EPu55UGR1|sj_uAR52?8`BgUfWW0`gKeZV3 zkEl_}avoT}Wi?d(c}`U*xZ(Z$L&$9RWW@1VQDWz-VS33oYW)p8Y(JupV^VHdr6U3N z=!RIe#S8!C>Y%!u9i0C>2YGwuV82fe=)^;93!{aWvb|?nAWWw>Cw=DJaB|ms) z9H&wb8Q{Z>JJ3>I1m0fcrPl6UiQHwgFtKPUKFRt6AC7xdNd{w}{Av@m!rkksj7^nkaKG>rB@$_Xky9)+H>%g#Zc{~s{FzXQFYk}{O>gUc1AP`zUgBiX?kU61^x2G^YgQwJ|Jy5@qLcgldx zT|l0bEGp{fO=_t7IGhQR#rDWh^l>?a0vDoiyX{Wgp?DrA z&UeNGimD9MAPv;=r?V=*q? ztc`#7RZ%0{&FC&@h2Ph`hK&Uau&XN(6i$ibP|_mI3!8$28EUwFoiL_I@nhJ+eYl~{ z2}Ap4;p0DEd~16X-q=uB{cbDd@<`#mz%KY*IskIajW}Gn5=XB1Kyhjy1vX1j^Rovs zP6nc&|2_!%Vnzx0FNN_5QQVQ^fL(iz!^ET4;MuVr+qbD<`>+B|K52n!zr%R46EJ*} zEmp~@;2W)K5O9}7S#wK#F;om$pLQXwZ!_NeP)eD6Rl(2`Mc{PuDokEni*-A~V62I} z-&Q;P(B_4QuL@HYrc=}@8Be%dV~wrP#8J_!h+@C!0a^c6I6fJIQ-*A4x!wq6izU&Q zBM(tIGO&2*9gs`jhPzYT!Tnb|sJwGREBRt@e)tj&>X7%(w*;>Qms65vVU*{In{cRD z5QF=_Gn|btfOpCq41;CJ^Vb;V%e;}%l?yxt{K&Z`f(Z^I6yLQ9SUYkDGCA`wK1K!7 z)9vti&S%Q<+Y@+Gu@xB-Dbz*d?^Hwd2IKR)JaA&?DJr5&6t7*Xf&B~D!mO`76t3Bd zHcL;y0ZuPS8;ik1;jMT>sTvsDroc~F9-8^|aZcYte0TmN#S!U-K$Af9YS0DV?|>#* zy!d8OIU}xG9@o4~27#4cIOFq^@z-VohSWuHe_JK|kgEq99YHMJ>Vj1V!f}OO3H2{y z6vR|^L6;E+3I)4hYOXi_eP@C5U7Ru7eg+sZEc}}@#rPYbf_J(M@p?@H^}|aM-EWLR zUCMoml61j9Z(qtTDTh*=ehB`>##Ex|PCVt4W$fhQhAWh#u%Va(msjwjt4L?-T0q-LAInHzaU*pN%KENHh<*g~PMP6@b{2A%*rCspi*S&( z3l`4z#IH;R)bjccm)`F~%}Xn=!T$i-*E(RK{~7AbDJB-{+T&ur4Jc#{I9KF6Xj*T= z#fAql^`6=hN=^}YhMvdmE{UmKmOhT)94 zDf-U?%-a4O0?(fWy_9G$zVjaZbnB?A4h(!4Z$O!T2|~vaUOYI5!c7&+@Jz%o42O+S z&qRe#JfjXm9v#G$+qdDzH|b#cl!ZrFC17(!7nK}WP`1u$IPlLF6|Nu(|2cwX83)is zeGWc0`vWm4RZw*BF!BtEq5r>qcv)5sGcssU^=$y2K3b1vBPW3ud_Zh(Ai91Qf$3&7 zG_N;AtCf54om&NTHJ*b)`$Q;miGnpwidZJG7Wd`Z4Pv@e6z!?K09zjUM-_Ebq+3?$pQ72V<1ye0+Rmz*t=2y ze{Cv4-yB01#+62Gk=HcgCwN!K28)(U}!XKm08ML1U zs5HQaMA{gzPPf6qm|`$ilESx~_pnOp6f8Zl9fOZ}VaSg;XttJ)tF{cjx52z`Bf7`G0>gI{hDtVpYRE9-$$M=ynux?>>oOry$Qxg+_r{1K zNopoR7hnHI>^x}%pXY4FIn5cc?$=^`@V*KBz98y8?F9W98_e?=0k74=5Vd0ub{@M0 z+yh$3_xm--$5?{sOLugYerR-H+jH2PvJHPo9fzYsayaxu49>c!VU3v*b{r=vA}=j2p+t%ki^O@P&O2VZ9IUEJpJ(7X@C(Rz8!spS-2{e z!!TO55qHYn0|~D$@Ni~5I(m;oQTuCHuO*CC;`d?g)$MpUSO>m7XoLGT+rUbTfnIJA zP}}wzYIZEav3Vjm6C;Rkr4E8MBNStk7r=PO9O!J^gQHJXv8N{#mn!6OUX`w?LNKed-Za+J$VM@!IrnlQ=ie<@r*a0Iea5KpKdsb}2Xy=_w;zRcbfC#X z65SoyQ2S{CPR&;bzOH&0@;5@~y>6I&B9g>Bg1FvY7}XWX{r(Q%uk#5c(;1Y>y1Ov3 zg9qofT!iwlKKS!Y3Cwva;U;?_(iX~r{-G98vBTg+4?$6TDgOB( z1#*rIOcZdT=mxt`=AR$NWZB@qA$4jt?Fhbqa|e1x=_uW^1(mF1aOJxV$b5PR>aQnK zA_?9&BQB1WiSMB6<5v)$)I)KplaNsHlL~am0a;cwY#o@Uc261L&gx+}e_R#i^uIy@ z=@(ocq(X$_I=K9FHD0(mA5T3tgn402)OgNZ6o^_+o^1nkptj=SZ96e}$QJ9~H-j&O z#Q85zP*!Vyf#j;)urg;6(rRYFuwxDC`TN4WPr~>_awUc)j2kCgexz(gmSc6a0KPQb zhA$=Mu~j1&_fIH-%R~U$9iIY?r1_ZKXO8q}AE5MhK)x(L-nLwU+eFjgOBV}8!gt`7 zQWfe}!4axGUIr61yx{NfE;P)Ng6mzI@n!Qi=x=(%coZsujjsB5*4G}3Bqi{9#A6Ws z_MFivX^ZrP_3%4+n))R#3+iXufp#_+zZ+=bil02_;j|HMS~$bZs5L6D*F)QeR}gc< z7DF67@Lk{r%65$@{_?QFbvt%Ya$ygs0Mh>t_n3pwVtWi|BJ;CF3YcCZj?Q|Cl+wRQ zY+TO3?{^I#?p8Wg+~)_c$h`HrcM9mcet=+QXWXjR4V!vTK-T01pEzyzn6oERL|?$k-o$6KC55hDp&r2+kO)KSv|aQKrsiSLqN;;S2a%pOA8XgDaPNKwbe zMxgV(8@5z9gOi6e4iyc+&72RgvW|(DH0I(HsnsOg?1!KGmZGI>DC`;M!QYP#W1KRH z#Ud5}f4!(tcjiXCWVZ%ODNA$@;KM0iT@(>@!j(%*sn_+6$YC$Rg-=9r|2{uR-o6Cf zw3T7G&S^$yb0jWMqfl3qLW8xo$b7jUH!4=bwQC3A)9726$uXoDL)w@n=7RhCg;4p! zD|j1v3_dawsdu;3sSA>8@JjwN49$;(U3)g-igho*;*b)KEcZb109g#(Qv}n879rop zMc`;y3>(J}V~nvYCAHlGnrvjCvs4hTdetzbgWEtXSPu)?7a=D!3cs;5QB^h^{f2U3 z?^ZQ*@_Pxhkq6Oy#s$aQ&r?5hrQzRwDR`5j2A>Xx8h=)O0yjBxu!_XRy4`Egyx=C~ zGTAmJ(rb{?4`+l~Zv{E)L)f^%1-GsI3oBTu zkoC|BuXZlPjgjx*?lxmEXtP5_kJr?qhf%QPuQDz?ZwgxFR#cH#Av`Wdh;VX2G==0Bo5E!;EdhRK&dm*!cA;Y@Qr~>>nqVUT};jb@q zlR{AZtb}WlEMblOX3P{a#HlaKF*Md2{KreEGv(r;(DmbvM1;p1+!y=_VIQQEdLK?}qOMQa+yAjBdWuf=b z2;4KV#Oe4brYb-&VdQ#cF4`xi_bH9K}W(JBMru=B?Hf3 zgrkouX}4hFb}S`pu!GDCG|_tbGHkt+3m2PDf_~c^3|=jOz*2xnh7EkBY^e1G_N3o; z!StPe)R&ficuQ$9F4~ks>;nm?U!sqN+pYn>y*G{|3cyOO4)~GjhO53hqkxqS4z1V& zPZ=zRcvC;*lQ?6S=m(1Nv=p9g%Z0L=qY$Eb1tz~q;|5n9l)A7JrD);U!JiHb%?@DS zA2uvrZioT$Lg-EAw+1XSCn1wp4(7>#08qC4ex^VA;pf7^_!D*{O(#68&_VNoP~faUTwAnbB6=4)n_|3d6P!Dv^K!=OaG|e5gxpB zP#5ZQ?ofFZx{!XQ3Yxuaz(Mm7B}_hpnBAdxWP=;N+_nLRA{Ee%N#=N@j~>{19rA-e z1E|DOyZm}VZ=Wk_zTk&}{fAM#jgH>CkHV&E8!Y{P5sZp0@Sx}xJg%ew2D-6ev&tX6 z>1G&o$rZcRrXl?G6r3_^fhje8yk+qcqQAsb{y`+Bn!5vq7tFtTGD9E{$M0WbXp7@3<-)$7g2eQr0ws$>_si{Gcb&fWy;8^=JZI4GThPbvDJanwp+?3O^;lmX z=`M#+j*QmmsXMvwrT!K8o%ILX zqUR9%XCHhdpWW#wG1zgQ!?P%HPubsw^TeW)M!qX#L0qmJOmFHP~aH^2-20a$zd2pSld zz^T|$#_^B3sCjY@rikr?-GOR&aeN8d9a6!PaWXzMufSgm=OZgT2*oV@vB0Ya3N8JB zeNq?_3}(UIV@(?)4*Wz z0e8<8?BAS^n}gb_h8q&K-^Rj4)AwYXr( zX|k?zUkYuE<={&7W_YsP2*VqCVOnkl?6Zg!CnV7?eBrJoD~)qF5ge+hgvK5TpD1xp%-;K($|Q3o}{ z*gzu4c#VL1W+=89&qX6eUi9Z%f#2Ve_o_4n;&$yY=HdfRZ!c z35u{Vd=1L1Rzuq43bcB28YTpN@%V;0_$gKjZ~a*X$%QXL!-fyNr?_xI@H(vbO@h%? zfjC9izz_O9xFX(|^Z};0sKNs`enu*Pfid0>{{vt~QQKuR;NopVcq8P7bfvuvlTa~| zgWiV+<-Wo?jlZz0{u^YLhk;?Y6~3JBL-h%gdsxXt<*zCXMH4wZ-d7B?$^&Km4nh zi{`hDF?%ov0vsc;w2>E|w1}X&xCRDKo1k;HJH*&`LqE$1PF^en^MQG|$$u}{7bU^G zcQP2*n+;cGU&0K{6IqP=V7GSz{#>AlOx6TL@a;M@+g%0ayPa`?J{|Z^CBrAZSJdV|GE^)_n$-Vq~J$BosBd?U&dIx5}9jMK?`BNk+RRQ3rsPOssOcxb{= zxe;c(QU9B{ksU`BesJSq`prDH&@sAzNrI{LTZshgW6DxZ36=(L=cgweQ&BZZv~_>G zAg|+?s$Nr~W8~X~9HCfsOOqt$ytj)gtYbA?o041`-b!9hh}8@*N%ri2E7i~us}<3d z>@)p#F;^%~JI*AG}?IU_A{XenJd(c029Gc#{UMcpRR*0nh^ zr(tNBc4DG^fN54i|B$LdXQE?7b5`;6kQ!4s$tlkCM5*+!y0uM`b58S#GK1mePKiky z%S^K?-G?>YJCj`NnzO4ShgYzKlik})bE@-(HG^!DJ)bn^)HMvR3{6b-9x=^r>>t*O z>`eCg(VTm0dUzFEIE5u(me(vjqMcxq;wyD0ugzd&b$Viozp7b&m-~oLUS~>x-ktou z$dNT1;nW~Yvw{bCBf1qfsUfa+3Z68Kti7C=x;?AZYF;AZF=i^=l^&FFpBf-+VbV2@@xw>sel50z;)U`1S?WBy9 z0Q1wb&&Dhbx-!xtT23p>j9D>7GSlPC&nU~hx3;#;%*<&yqiXox#wjWDM49 z?p>KVbuDML_P@7diDc!qnV0G0zqb#v%_?}(Ql@w9y+deHR?&!gxzV%tj*(qi#Xnlg zVdnihw#W&NfJKF=%m=3g+Y_ZytreDrAJ(TQoj9Xvan9D`gL7Wji88&`bB_BzY~YAw zS6Et9I_H1bSYex8>DpT9dhLVD<)rKj0T$;ypM7v`=*q5&Xg%*U^TCZPl5;uE;)1Wt zxO<0fPIXS}g#g2Gk9$cuwPh9;Lp;Vk`@3@L>RK=E+&}I$D3aUIW>FQEKkhwdo7?!L zwJPG;_@>FE+#4enm!h7H`%HJ`-ulsc>F~_>W}0XoSHSXejLb)tkX>H0RNLh^!;f1e zlJi&TG?ayOOs5qaR%~zr)h9Iy3*Hzq(z1mup*f&b5zQwUhIE0xW9^ zo_!23=+5tpXsaon`54F)Ew~qFSz9XeDahKc;6YAXZJFVxV5j7QM`e~*D?L7ixOW#k zscXAhwg1yLmS|yrn`K>f{-^Chc7@NMwAIyJ`?MpJj2@3VW|_~S33f$qrP>?X3_tHqPc9l#wYt{j@i{E7yJ$qO{aWAt&wDtcC&w(U z8Xx3;4zI8~`N6fl@yWH%doL%S{1{+${rR)c5e?lZKS#7*e?9YgA6K+^GS2G8TbYT- z4!h#7Iqf$_3@7&AOD>)&v%2}gV?6rkJ80TgsiNa7Zgk$Q@1Y>aqDQ7YMhMKPAL%!w7w(z zd@|0Ur)2KFjynpolkrTkQi*u$7G>El3D)+d3vxSJRE@qQI;E5@Dz|Rc@cfeG-cu@7 z-_fcS^(C1lc3QgKx=p9xOG=RaY1yY8ZF-GgQbSWt%Z*yM8$JJ$7TI%J;b%uXNk$xJ zi=Ckh+H{!8eoarXKcl?3v%}KpYess?85K2~PFv5fnRz{DRP{SM9izTxam3E5TiJ9u z7koWYVSiS`t+UIu@oV-JXrStCL*>9(4;uTCm+j}vxQyd|O3e&}1_u`DEN+eP%%++k~ zCwWelO7~V+>UZ5wi<&x37e8lhW&0qrVCsyz!#P{Gt_L}dQ)jhP&)EmsJ}h`XRc6q8 z&T(JY!{XVga;A8tQ@rh?QrYhn)((}jZu_{>^LwRxZ>4K}*W;?F z@8?aZG8T{Dzf*y&(E%>w`RXzVvAp3 z3EK5H%l^2W;Bdirad&^4(T^+XsTcg!?4EUb{;1CDy%3<^{j4wQM-4~(Vvv>H^9Kb# zYAYNrhPZVgCjn+XL-hJb(V9uA%qh&VAi4UeErh=ZaT_#@oGoEBmvd!=WlH zxBKOY(a&r5Qmewt?OuKG{Mp#wTNP2?{pxen&+CKYmm=HkUVkn4dBbkZ;ZoGo?$xKYZeaHL&s)>Imk$5z9{4-^^EPenWwxOGAn%fCu8`y9n8iJV0>;x#5^0x@ zsoB2~@tSUy?zGsudC5$NwPST=ZqJaa@l2;vTJ?!?`(X{QnJ)Lf>YVzX zVXXr*-K@DadF}QiI)yVmL5?*APkTo6uFv#_rqvXU+K(E&nCXk`t1153GYY?E?y~3B zas(a5Oqa~wOK_|$UEDinX*_#BJ+1bPn!|fruh|EAeYIu!z3&|l%s%AIy;@=A@WHuo z_ECl7)k?SC53bi|A74(pdLhtZ-1EiklZL*lRr`9!eSXb8<<70U9PjYacge5*4#&Fc z+}@7?#=oB3ORK9bclZ?I_3L?mUtL{&@28yye!Uo+Ti?*`@HwpT*UK@-`o^cdpChjS zdNr9=e`C~PBI?Dj*VBFVw|@3c9RBrdfHtpzE9f{Gv*h=n(7J}^#eJc7WaiRTe}CkW zlN9=|$~uW)Ww6C64wjQr7klx&NR>B(qqHQ}JvBpymoXl4T>;N~`EV9~L(R?Q zxYZ^bbeh!hx|c2{&HD*jgA#C|>KG^uEW^)5;ppGP25Hwph&X15ziq!$Y)L!(K4lN> zLM70UErRouKElqL$IxBHgXd1|!EmuFa9tx0B(482w%2UJ<`^Fo)4T_rhhixG3MmYc z*Td6U7WkoL168+k0EGU~(7H(;_rBSS!E?j0VIbYeesLgjA}rD3XC8QN7e(3uQ54NJ z!mvL~T<$7@K7Xr?zZ{%^xxBS-mXzVdojHK=!!~&3fFVZDkHE|8HlXN^OqdgN8WL|9 zAPD}0ewQ`qdqNJ)4{>1B%8L54XbzTcw1#L?7N+p-#if7MaAr&m9h?r}`#L_X5DG^R zdLc|J*kI?UT4?UirAF6MSloUHT_!(3y2LG*4wrzgeicmkwhM20mr@^cH)8<5EOfJq zAkC@`IIq-EhaC+-mQa3hW1x462MZQyg6X%#D6(7|0)Ja#W-AT%=kCL8?h{m@Tss8W z3>jDbiNse|EU`s#F(x%qxMGtJs(Lu#oQuoI^eh_!dk=$wi7B=n-iPa|+jbmEd;^cgR^i@50+9LO9V}<= zfKjq6?sBCKN@OjtinASy+QKpG3JY|?MQ|S}GqjOs;RcdEb)9H}Hx8eu4nYcEF8&1H zZ#y9F=owxPlY z25R$7Q>oWC!hOkB2o85bzs@Jrc~Sya5~zsMD{?@Ke-pTmlhU*^x(r3lYjDI(9#2I~ zL%jPe^xLe&C($*q{n{q%p5+HkOM4I;l}1zL`KUEygl&<0Jqoo(Td;KI0~KYF1V&zKF(N+@UyI#hyp7w6e~-+=ZR=^U-}5d= z?2Lo^eXoGcw;!#f{BR&Uk1|?wkQzN9k1bE*C>kmKFt}bqo!^&Esp|3Iz$XKkI`Il_ zuE_vl6;FJle~lV^JZwSrjCm0-q_D~1>W9uz?_Yf@MMh)N)86%R@03bs?3X} zZ)ITqPa!NEj|HjsYcX(IjG^lj4Tbav)YrCe49=(*`u+{V+Kf`_Ql$WdtZAj{{LIin z)D7o;I)Zu9OVBBslsnySG%~HOqb>!Q;_Gj6XwlIM%l9h6E{9dPfBqp{=ck5wx25sT zg%+@59l(aP-Kb})gD2uwpaY0wv33${(3}e)c5@jM!((vm;|#U%nJg?Dv_}KEwb*{j z0;Nn1@r;%hOn8hy#-{@6=8@y@bPHKZwq#(b#$}lB6a^>!kMJPP9*2(Tp};blV+q_@Pv@b6&M_3?s`1)t$s zjxoMJEs3=rk<>S13U`%PP@W}qFdTOaA{VORM`?G|I4^@V;D8jkLB@ig;Bt8xcHtKY z{IwhJ3)+FabT|Z;>_zXVtB}sO967epkn~NGlt+7^R(l}YFFglzDw?r-zy`M*7lo{# zZ0P@$2>UNSf@4pjDE9bXw6&0cU!^`E70*KzSZzg>^JNT;3^9~3j3-a!6~UDDCuC1{7#nGEVLS{ zqIGa*;tE)hc@VptG9gKD7mDht;mV8GsjTJy;J%*&++gm*R?Q*UJ@gGG>`CcV-7Bia z#2ES-&O=W9b!e5jP9;~bz|)tOFwT*s^!Fz;P(@W0PTk4|Q#(4oOYvZ6UXO#)ff?%T zvn}YU8GtK&Eb%4jGsA{!sF8jPN&`h?(%)?W~)A%u5h zAA+_m4J9%qF=Uq@%>7ym`Nav0Swl8e@N_Xqw64G>`BhN4qZp>ns$jj020l2c1biOF zRBR9V-{G(^YP;DwQm)|y5z{BBk$>gzxFr}5tskMbJ38Zd{C{ijyQ88=`e*@BvY-Nj zf&u~}B9aB9>*tVj4g(l~A!o_KKoS8_5pxDq6wH|88ZlwOoFn3@xMEnd>$`*QuA9E| z{&?Sa&O2vE@7((fRb5@()7{fkMc1=lKOJIQex5)rLzXeeRmt?e{wO!!C!g~9qv*W zl)mOLTjuJ*oj%o_yrkW!vivZ+p)WzE<*V6dCnK7B;w3Y}aKF$I{&eE50rl+bMTz3& z%;2>(?K&`xNt>v#<-XzU?#Xj(W0pTz_nFLku5hFa<3s3|Tm^dT5zUI0N>R}ZN$U0t zKUba_N!xIIUggVYM=ZSP|;;fuWiXIwS+U6Ii7kvV&sJLOU8W( z$~~t?_Ahg3-IaA7?w*3BZ%(i!X*fjOn|PqO15 z!>Pmk77J6I%~FgoEN9~^F7ikcE%fKHf$b5rZmuECvxuVSTcl|84GjwS)nL!B#ZlYq zL6k3&Pmyyb(&F<^x%yU9+L*tOJs&@iqOHeJ7XLN-qir1CyLp|P7|7WT#M$|>QjoGD-AMV!YmwTF#AO% ztO!G3a<|s7`p*G0Z=*b!W@Xan!$gNnGwE_|3D-K@i)zDs*e&~kwD;-{EKqbS+r7wx zN_X0`jLE`b6b{rmM2^l}aH5$SMA@3N@iSW^_d0$m$A!J+jAz7A-xW7;jtZmnPs=#z zj9Qk}IG$C>a$qaT3L$#x4 z=+M@22ifN!0k_@r0^9lNN48{-C2e|-p`ogYq@pcF83WI<5oW$rw)HWaIqD75R2HZC zWyiQR&Z6`Y*K-N)7lnEg-PA$L+KdS>JY@#c<`9lWNv|S0rUU|~lduq&l?{IeU zS{993lS?IImvf7@cW~{hBdOq4BwKR7lEsW3M+=v z^)j~Xbv|opc*n)eJ;H_lLQMImOuF;(5Vzi2noeENr_Pi2*u&k|SYrA_+GgI1Odi^j z_?!_G>le%_J$1QbUY4|OuL3>T>O~=a`q0EOdD64VjeJ~BF`0xIx{jZ>Ce<3#v1$G+?6o{oeO%1>?UCl}wk~DT zH+f_;RgLm3uP`0;RosJT`MGS+Mr*|2T*cVk%`fO)Jb@~0t=baU0FSVp0!^hBq z9w{UrxsA=8>cMvUkEB7_EjD9^r*TJ>GpO#xUT&xJII0({WOYL;SoC%yCSDLgI%a%Y zrkP7mS16DnQzO%+Ph4i1BU_NJMS-fL>5S(!FQx=Af;#?Pm02B&J#{vQspLXGa^d&Y*Q zPgG-OZ*?(UCWkWQeqdwt4cWG!XUx_?KsT0d<#Zb|Xy7Qu_Ey-EXy;g(>}E{88e15* z@Gv*HUj#LG_>jWtaN_p}rZNuG&Lm@~`*dygcC8^xxgJTsyjaIn>%}R*vnL(2!|=rC zb)4mnN|rVzhonNDvc7SS?A3J%W^vApM1l>dcf1T;m=(k_rnIoRAE$DgtVF5VrIy*n zyk|d*if0>t{5;ZG#?9uPl?F9VNMvtTO(eHhVN_pJm83b2K%mBvaY@!Ss2QHf#IP!fK*<)D)4yNvvvQ=?;lBYvpVJD373fO=T>7SP1QZCQq|x3n=;OHKyaFk0GBSuK-Edj6cjrL3L9 z^w(Ol6Xm6B#I6r))=epjzgWX&r~kxF7}Jd!Lk@9oJRMk*{bp9Z<{~>0GL60Q%b=k1 zwxn*uvExs=G0GoLfrT=}&Lz>dnImbt!5*9YQ;4+;3L~{MWlYb;h;BXzqg@4)>72q( zY)kV~Hlioy;5;3`=I$|~+!#mpA>4u#=D3lilQ<2x#gNV={;a+-lR9=fa&9+9vjv?q zS>MNV*xkL&tbRobv)gS%74vV{tnYT6MVNPT$$i5)rXk6e_D-S?=lZgj!TKyrdob0O z4k49SVx$tXdt_PoQx@WaVQ>lZl-j>}9?D;06Q0~)u|D>!{HOa&uV)tVo~>nT+=tO(40|R) zEqC9nn&~%PWnme^*zM~x**J9!0k1#JmapSzz~IYlkiQ5;EOH`82W6^k96;Mg#IQ)K z=S(wjHWyeLM@!4cQNp2Y%G2~=%=iW?U(Qk4;lnK6aT3ik7o~Mao!J+SckF>+By$|R zoNeMIlfSMpwY*j(9pj_i@Dr+3+h-KbAHJXU+9FGn(igD9I6s_;t>csrUt?i5i&#y- z2%6NE%vnl2Vb@{;X~x)#%)&c?YLbVuwQtO5xYkhGx-S${8k)EZ`A67ir5Ws#UNPI{ zr9{!VU)O(95!=wh=wjP)rrIb$ecLhpYV>?IZVpEykJi|n%@X1I#zj!T!x>ECGtu+h z7hJPWI?ZbeCeyT06nA1PJC?eI*;+=j)P*lu`_D#9a>Fy$f1n~0Yt5mQ-6A%po9bBr z?vakH70`^Up|s&Kk3ukoVc7K-Oq8EP4Ncx8b-I+j$*yEuw%lWBn39y%QqOgNI*%>+ zEWtvo9O$HD3wK)4hej-Z&r;fk(v=h~_M~bp)02zgRy9XZFY%#V)r~{!T6`{ zu^IF*a*FRmqi)}3+8I~4!3#Vo8uwHWj#DSSOHuUU_+{4Z@K8FLbCd~go@KX-N3&z9 z%5($Y=Szo&&|%dE7OioBz3Dp_-#ykdraGQpNO+NoCXf7m1yqB3t+7Qt=!wo6<{scp z&V##=p>zx@nv_H{mtSJ@;{s_N7f0{vk|@Jhh51}GAwva6s#?&>w%(pdk(Lvv^cAiP z=S-re6iJdMDJnemfMq@JU_btHosF37OXdf+ah=*cGFaG)I$ih_us)VlB;~p2e4?vG zspS5-FDst8miwY{nS~$fWSRkCq#Lt?C1-WAmGhmbBof~PKc&!_CQNNU`-=S-c#h@Y zwV*($QdVZJN9#6*kO9hkG0>-18>6f~Ox0pzLuPQE66@KVmFo21E+a`jC$3TP7_;eR zO;#G#^!fQPR@Tdl246O&F>UFXX1&tpGtFcAiDRj1iV+!&c*bryc+#wL8R}@B%bfcM zvls=UDW1o<#ythx`ZjxJ?7f&vAbUD1Qou@0Ff>^zn#SLJ!9HCypm!LO*PP-wiF@Cmf^y$6EIBeruc4d+QJ@BYtV@@TKJB|~_{8eZ| zZ%owUh3eN~c(VU~);!aY*<6sI4|Yl1Mc)%_z|HqI zGgL9PRPhBDz7j(c9j>#B7^1i8cm@@D*k{yXNrsG2$6Ri*m9(zK)MC_6GAQw&zBQpvSJq<3@>?UGXGXi7Nu>iQ8T zNGWGe?vEv>UQ4+f+S8drx;V9|`*L5_D3bU47&3n@O-I^$lWMvYlb))^TEoY)8(LLt zb)G&KaWI+5P8v@ajs#KQGA*jbIrz&GRhl$?6n&Xjh3|@kDDb`uh4uJ50$u&B|F{3d9uL8->G3^YK&e$VffPs8b;0c0(#1VM9wlRI~WU zxnw1(%e~!aLt--p7y+f2`=o`}R>j}({KbA%f#r>#*-KpT3 zHl0e5rv*JH(%?J2>E>J?_M|=w_vBBoo%qh8bX}2sxDdd`Igh3dMY7zsOB+~FtUjGu zrB8jTb!qP@ak`Rqf(@1#!^Zw(&+fNvW4#81Q|)Y5sw)4$ip&R4xADD6t=f_K$yc*H zi`Q)4S&o*fn9|tK%h)DsNqRf<9WzacW}@%LbGP={Ql3FLr+7b+>gM>8y!Rz`beJnG z+M>%Ihks% zVm0jOo#(8_Vi|hm)Qz6LaVE7nBk7by87uIVqS<-T^#1fxrc{|nXIE*kgX_hpesN#A zCfLCq&sX5Qb3AE*;2_(v`Z%`(+qwUP4)*-Rdz<8&!`O)hz1Zr`PF6mR=(Z%LuP6^? zcBL7_e!jzvJeNuL)jqR11KK&o2_n=sB4gyGL+7oJSMg}-^m^bN^ct`a%Gg1#0LBByQ6?L{8Ne>+Hu!EpOM-WRucFLDXo6k=IYAP2e$xB#7r z_ys@>`G&Cl!INQ61fM`oEie+WLQER|0ALO5If(PYL3<9SCGl26*@&IQ7 zKSDbLACYqtm7IfbDfm3>QSeIvQ{ejq;LjI|=)=|q#vrE#5C?7|77LUiE(_K{ydU@w zSR5!u&RW=$5Lbk@1zMm9K6mh0hz|rW2ll}~2H1$aUtk{u*MWD#w;b3B zbOWqWW-4MC;CNsNumt`F_@+Q#0EWXJ22=oipf_U4@b?7zK{o+#i#&jR90&&P!|?&W z=YRw3ZGa6>3b?`76J@plGhvH^hXH+2#t=9Fy&XAn;Ngh(fbS}JHBbj%Gq4KrNT3M1 zKm7H88SKxn&qKQdda$R!ZWZREj!?26YL*gF9iDlmcTNUUjW<##1RwjsQW=q$k#^vF6j@qYp9=Ug zcraoG@K*xr@STO-1H2W;2Q~;beAB^E;CbK@lzRs43itx}FVY7n0+zz~0d)-q)&Xs> z>ySScu?N5|=+)p9U^QZkVV41|KsNG~fJulA2ChNx0u~{rk6agMb?`CR2cR#3Pryz9 zUq`+GaD@Ldz(LOk7lNN7#~s`XOoOiv@+ZQ#5B6NJsZax>U}qxki8uo?;oAw{LFg*r zFk&y^I|pqDe<&cFVmKPu3BLu{7``3Q<=_F}1IT>|yo1&RKL(FN-V^8|a1d;H#AAy^J>+r{co!|>Vt|fFj^Z{@ckPrO|Xo7wMzK9$Fg~AF)VqfB5!- zWnqVd9T49F?hC$)m>hH^v=8Dvz`?*i=%I-DK}FMYP8f%Cy%z;nQQh%bab1LgxG zp<_|sS;Uq?Ux9A}bULsZ{uZzdxEuT);FI8m@XbYTBzP4N1K$#`9AJeUf7lENggyzrO@Ny*8)!hF2eT$NP@OPnLOx^&<~JP3mysH0AvVrfjfx1g42*24m|_h z45T1uEBuM@9S6q)gWwkf761=nmxJ5Dvyh_*-UGW3uz8_XH0CKL&_m;*Zk@srSp zkaG`u0oWd_4}J~*0B8x|H0)~RF9wc7{|w%Qcp6X-eHT~<|0CE7pmm@%0D@i)7@`hq z*yEtpp^re%L#`^&3w8qRDCjE01BH2j1rUOm1pI}7EMSg!D|`azX^2aLXM*M7YX?Mu ziSVrx#(@EdX`;+A_+wyefPaL&0{%nb3fRZM89;aVWWgJdhx_OU1}U8!^oJ?lkC3d= zb!HQOZTj6pxx2ky-;qRE_y?|~CyR({91sy#3_BnysaXE~cl>CEM)N$G~}tSZ-CoO-@|aPb5&SG~GQTUYf} z3(}Du|e6|D_Tri6wXzeo*mWaw9ffRBRTU+Heu7{Lq->SNL-8EBBxZFdSUvA zhMX62md)Oq>@DwC+Id>FFDdr4?(JN6#=2wc15ZQcz1_}gy*U2rn9b{pf|+V!H;cVk z%a2=TDt>zXpq##Bh$x7ON|`C}y1x(e)|AkkqaY$Z?1;DBTFci}JOu~sR{MxpKQV_3 z{wrrV^j%o&}P(pu9min{1%hPBG*E-mqO)n9+a*X^f{2j|_4 zRw?vRG&#Mpw}<()t%p4>7l!3 zRsHq&b=$H^5;i_dQ%u}^!Xi{|>!s)$Ni*)vTbx|p(Hxqx|8tMgsfP}xHmAxdIJu8+ zR-ZaLZQPI@YU#_&9^FhoGiv_m3@7L8bE6_XoW^J`4tOLwx{x1vE3+hgzPiTsr1|p( zoF3oTHv9I(AScPjS>m@1?v;+_uC$6|Nsrr^kugRa^za3bxV_&O(fml!HyUE?R zhuo}%fl&iC|q^Q@`QCEv(^{>_x%SVW5D)l;KUQ`m0WLldt*88d_r_?OCmv!4E zL$g#ub$)GWiPaX(0=4$K(_!j)?`qXG&MVcG4LZMFzFenYWZjg(%gQ2T9xa$%SFti` zQ(e44nn>t0CA9}y-SrL}UYazo<3(NZyN2`r;zrGS4=P0(?N*qXN5npuUVnGNpbT!; z$_F!S-u#G(DPoW9tM+xamsWYoHar+;ul#<+ET`VZD?MGB?+=jk(%aQG)7?VqN|C4Q zn>O*q2HjW5=9Kq&STf2r&flKr-XJ?S+C?RTuhPRo()yJDkf^yu{NVd@`C}CV=Y^Ek z*;8oa&LD}an6VwOx!J7aZB>{Dfq2&d>)zh*XxXvOT?Y~-E_+eGeDa%UjhaGfSIw%)a{cb;tlis&-E3Em|*h1r?yvQ{x zYNSML`aSkNkusvf^>(G{j78ll8z!vkJ@oL^Qe9CizHNL%y{?2{*6N6Na*Ykj*^QW) z@V3gvIa&KgqtE8+Ppntxp1WZe6IDIPa9_#QqP1>2-|4R_`(WYiv|0Db6u06NuD91Y zpYL~0aod?iQ`hg%Z;~k3S=#BjL9RzucYeT=-9y(Mo70>i;D3nHO&$8ib+zO*jp!8{ z_pcWZG^nZI-QKuSq`!fC`R!d5o1A;P87Kyx8&Vy1N-=*-`ef?}gY@LE^bRKXBu1u5 zL04vW$w9rf70##XD>SF~GRjx&K6Ufq%HgN2KN@V^mG{WG>T^eJgYTk?RdHOMVU@PM zn&ZTi4S;}iA4kF{q-Zo7P0>QdO1h{eOy7G`YK_pf%fcXPNN(ll(xsY7oo z6mH#q?Y{hWpy|CG1%-=~c1AnvCfB!2Pf+4_s~S2ka?~ci;g)@lpE%+$C^ig|- zV4!EsM^%r<%3Io*MO6Vm{z-^_ncBWZ0z>J~L+XY_zU=V6HhMs0$kW{&+n*YCe_k4> zy0>gruZ??Og~ZjmX578rXW!44_c)HZ+!T|Nx=eKCt$h)n>Qwh9XI$+$x^>O$jq3Uq zHFZ_t@9I+ef6RL5C8D{@txUsh`->YcKfHSPbh^wJu?eE%^LiL`Mn3A%qxC?4!vpdM zKLu*f_c7;{uUk4#(IncyFxcO7%6{cu3~ay#s6KxbUXO z$S%LK>2Sp@tDg=lnD*LyWVXiy7s2gk_QoYsbXT1&Qpy%dO;TQLv2ypqF^P3^d3y6} zES{=4C^eUNR}O0SUQqXA za%j;+mkAH^tt<*xp31X5c;=MRsm6O{{X>`7w$yE3xwIww<)%ktC!KL`R-WFpXJt3D z!4=(>D`y>ion*hzVZUT?!~>`blc0i;a2nSS4n@mX8PM(%?e72 z?n+`^_n9TtVrQ?KA*RwpcrB)^)bIN%G09&;D!(bz{+s^YiwiOt;U%*0vP^WDnuu;u z8`j|0hpR{k(xNbb``d#?_KPYvi|TgV@20!=K3e`awf(~~{%%cFCzy+<^hg)PMkR$S z$>KpPI%0o$mP=w~FEtyqP52xZgnxhViHMS<(Ee?L`B(e5yZU&9i?Fsob^q5&eqALU zMfb|yYQk^1`2N03O;t@b@qhSSng7iLUH*IX@pb=eKd1bcKSKYb{ad;1MmOwLx{}IG zH@o5XimHK1kJRww$nbQPJ(BraamkTUIpYK=iJ3{s=~~trq4`<~@Ih&%g(t^EX&Go} zC5IsKm%| zfAoEA*|?0{RN)e}GLz#3DUn|rk(rc=qt~}UN<@5=AVb)=q^OMW$ncDCq~~izB_lm9 zIYwAEz~6D0rPjpn8uQ0iX#MGNO~T6l(4@ah_*46^;3(nqo-*>f`soi%{557_7Mh$s z<}XbZ7RyUX?pn*hP04wmVH?)3xu+m>!^@q@(+7p+YVPW>yru^pquCs(&^AC&s z?GX0YMgAf5r$t65ri5phn*4R4-v$2DVt@Vp^RG+&KJx#1p?^Fb{>`%gn((J4pilp9 zt$%a>r*-~zF8k|ZzYF|nalh_Of9>aABiPR}|FWNTZRM_yqww^ZlGgPpNsNm~3s1`x zen?Vt!_(4IvW2zcL0;)`Dak^wvALmZT}F@7CIe%i#rQ-7rte6#a(#3 zY#z#ig_YR4Knrai&psXbwj*4U5;H@b0?qg)o>>vjff*6W9&uiAwh>Xeu4&;x<_RwG zDKRd|uCWnGk%=yG*)c9j=CKh$0m7Wj$e^5bcROC3z|=QZkn95=FZ-LFC9rd`aItqV z^RO4ByV$wrMLGp$3+wXW8%GLrf=#T_BZG`H`6geNAHx4_xtYO1#)-n*h~&WZ2s`02 zLsCP6auWC^f3s{SN3`EDk8kR0E^rdIix(FeWS)xp6GHr4gv&-djkBWyt&F1hwq5nQ zBw59UCk4hw+I7V0%O^rwrbk!Fa=z z6X~21C+tJ?PlmubF*?#YF&pb@fptv?=KCggE!#iXD9FS-kN>p~{<4l=+n1UZk@U6C zu#WaF$;MXhcEWw)yY+E)O^r+n%*A#}_{a75bI#wkKQS}XKG^8nK4cuhw>AHMn_*uw z@{ITSx?Vwn=r`f|I-vi*?q|_{*)iBBf7>@)lCZxfheQjK9F4wh=cMqU!1NGDtN)af z9gKZCnD6<0pCv^&S;a=$d*uF8+d`6ZvV!?Oc^+|pv%ff*8%H?h{H5Q5gFNH;rmk3r zfUb24NlHwNaPp0gNOH{hwyk1=O+3GE|NpLS{E%e6iEnIpP>ygLpz}T8cssOd>5XZH>B|N;p5VEKElaEXmq^`-|U4kCr6VIW2=PVpu}|Hu~m?im@UY* zO+m@wU5S3eciWh6wkDj37F?>cV^ zg!O%U;dG1_`ZGXyKE-xGuL~QB%6@CqxBmWm((dYdVFHgQ>|5JJ>KfTY4WB>pF literal 0 HcmV?d00001 diff --git a/examples/remote-offline-store/offline_server/feature_repo/data/online_store.db b/examples/remote-offline-store/offline_server/feature_repo/data/online_store.db new file mode 100644 index 0000000000000000000000000000000000000000..d230f45b934c6f347589b35d87f58efa2ba4aa6d GIT binary patch literal 28672 zcmeI&!EVzq7{Kvl?HUA>?SgXkkxCT^Ar9PVA%}^wky2^fp|V2jXwlMiOWZ;a9MS_1 z!6Whn97&p0Vjz)J9FW4_l9Q%!Y=1w0ie$C#-9ez0@aOT&)8d(NXqcw)QV7E^npw57 zx-I#2Ubn^Ee)z+=X*~A7wGX}z^+5I_I{1Q0;r{}4!D zv@HA7HPh1|nyOE8-w%UG$wb9V70YQH^JNWaQ117&&L7u2_2ELfza}%Paz^6QP5d%0y>ZT``HXXVmnUZ^gUe(c9keT)c74 ze+v8d2%XMkzh!@PO=Giu$#t0Rprp6Z^Ubv^4_Q7fZ_`TMhGkF2W?FPx{)F^nm0VOx zmZE8M*Ob`~P%UwaL2jqdL({UyV{`qvV)_()RE>Ra$5i@#hW{v`uJ$ZDZH)gSql%@e zkXJ>_jqFu>Cx6|x^TT0&pdf$%0tg_000IagfB*srAb>z!1@7j7vH!2@@)8#U2q1s} z0tg_000IagfB*s;0rvkS0s;sifB*srAb pd.DataFrame: + df = pd.DataFrame() + df["conv_rate_plus_val1"] = inputs["conv_rate"] + inputs["val_to_add"] + df["conv_rate_plus_val2"] = inputs["conv_rate"] + inputs["val_to_add_2"] + return df + + +# This groups features into a model version +driver_activity_v1 = FeatureService( + name="driver_activity_v1", + features=[ + driver_stats_fv[["conv_rate"]], # Sub-selects a feature from a feature view + transformed_conv_rate, # Selects all features from the feature view + ], +) +driver_activity_v2 = FeatureService( + name="driver_activity_v2", features=[driver_stats_fv, transformed_conv_rate] +) + +# Defines a way to push data (to be available offline, online or both) into Feast. +driver_stats_push_source = PushSource( + name="driver_stats_push_source", + batch_source=driver_stats_source, +) + +# Defines a slightly modified version of the feature view from above, where the source +# has been changed to the push source. This allows fresh features to be directly pushed +# to the online store for this feature view. +driver_stats_fresh_fv = FeatureView( + name="driver_hourly_stats_fresh", + entities=[driver], + ttl=timedelta(days=1), + schema=[ + Field(name="conv_rate", dtype=Float32), + Field(name="acc_rate", dtype=Float32), + Field(name="avg_daily_trips", dtype=Int64), + ], + online=True, + source=driver_stats_push_source, # Changed from above + tags={"team": "driver_performance"}, +) + + +# Define an on demand feature view which can generate new features based on +# existing feature views and RequestSource features +@on_demand_feature_view( + sources=[driver_stats_fresh_fv, input_request], # relies on fresh version of FV + schema=[ + Field(name="conv_rate_plus_val1", dtype=Float64), + Field(name="conv_rate_plus_val2", dtype=Float64), + ], +) +def transformed_conv_rate_fresh(inputs: pd.DataFrame) -> pd.DataFrame: + df = pd.DataFrame() + df["conv_rate_plus_val1"] = inputs["conv_rate"] + inputs["val_to_add"] + df["conv_rate_plus_val2"] = inputs["conv_rate"] + inputs["val_to_add_2"] + return df + + +driver_activity_v3 = FeatureService( + name="driver_activity_v3", + features=[driver_stats_fresh_fv, transformed_conv_rate_fresh], +) diff --git a/examples/remote-offline-store/offline_server/feature_repo/feature_store.yaml b/examples/remote-offline-store/offline_server/feature_repo/feature_store.yaml new file mode 100644 index 00000000000..a751706d07a --- /dev/null +++ b/examples/remote-offline-store/offline_server/feature_repo/feature_store.yaml @@ -0,0 +1,9 @@ +project: offline_server +# By default, the registry is a file (but can be turned into a more scalable SQL-backed registry) +registry: data/registry.db +# The provider primarily specifies default offline / online stores & storing the registry in a given cloud +provider: local +online_store: + type: sqlite + path: data/online_store.db +entity_key_serialization_version: 2 diff --git a/go/types/typeconversion.go b/go/types/typeconversion.go index 45eeac52c6f..18b4769b4d7 100644 --- a/go/types/typeconversion.go +++ b/go/types/typeconversion.go @@ -11,6 +11,9 @@ import ( ) func ProtoTypeToArrowType(sample *types.Value) (arrow.DataType, error) { + if sample.Val == nil { + return nil, nil + } switch sample.Val.(type) { case *types.Value_BytesVal: return arrow.BinaryTypes.Binary, nil @@ -91,81 +94,71 @@ func ValueTypeEnumToArrowType(t types.ValueType_Enum) (arrow.DataType, error) { } func CopyProtoValuesToArrowArray(builder array.Builder, values []*types.Value) error { - switch fieldBuilder := builder.(type) { - case *array.BooleanBuilder: - for _, v := range values { - fieldBuilder.Append(v.GetBoolVal()) - } - case *array.BinaryBuilder: - for _, v := range values { - fieldBuilder.Append(v.GetBytesVal()) - } - case *array.StringBuilder: - for _, v := range values { - fieldBuilder.Append(v.GetStringVal()) - } - case *array.Int32Builder: - for _, v := range values { - fieldBuilder.Append(v.GetInt32Val()) - } - case *array.Int64Builder: - for _, v := range values { - fieldBuilder.Append(v.GetInt64Val()) - } - case *array.Float32Builder: - for _, v := range values { - fieldBuilder.Append(v.GetFloatVal()) + for _, value := range values { + if value == nil || value.Val == nil { + builder.AppendNull() + continue } - case *array.Float64Builder: - for _, v := range values { - fieldBuilder.Append(v.GetDoubleVal()) - } - case *array.TimestampBuilder: - for _, v := range values { - fieldBuilder.Append(arrow.Timestamp(v.GetUnixTimestampVal())) - } - case *array.ListBuilder: - for _, list := range values { + + switch fieldBuilder := builder.(type) { + + case *array.BooleanBuilder: + fieldBuilder.Append(value.GetBoolVal()) + case *array.BinaryBuilder: + fieldBuilder.Append(value.GetBytesVal()) + case *array.StringBuilder: + fieldBuilder.Append(value.GetStringVal()) + case *array.Int32Builder: + fieldBuilder.Append(value.GetInt32Val()) + case *array.Int64Builder: + fieldBuilder.Append(value.GetInt64Val()) + case *array.Float32Builder: + fieldBuilder.Append(value.GetFloatVal()) + case *array.Float64Builder: + fieldBuilder.Append(value.GetDoubleVal()) + case *array.TimestampBuilder: + fieldBuilder.Append(arrow.Timestamp(value.GetUnixTimestampVal())) + case *array.ListBuilder: fieldBuilder.Append(true) switch valueBuilder := fieldBuilder.ValueBuilder().(type) { case *array.BooleanBuilder: - for _, v := range list.GetBoolListVal().GetVal() { + for _, v := range value.GetBoolListVal().GetVal() { valueBuilder.Append(v) } case *array.BinaryBuilder: - for _, v := range list.GetBytesListVal().GetVal() { + for _, v := range value.GetBytesListVal().GetVal() { valueBuilder.Append(v) } case *array.StringBuilder: - for _, v := range list.GetStringListVal().GetVal() { + for _, v := range value.GetStringListVal().GetVal() { valueBuilder.Append(v) } case *array.Int32Builder: - for _, v := range list.GetInt32ListVal().GetVal() { + for _, v := range value.GetInt32ListVal().GetVal() { valueBuilder.Append(v) } case *array.Int64Builder: - for _, v := range list.GetInt64ListVal().GetVal() { + for _, v := range value.GetInt64ListVal().GetVal() { valueBuilder.Append(v) } case *array.Float32Builder: - for _, v := range list.GetFloatListVal().GetVal() { + for _, v := range value.GetFloatListVal().GetVal() { valueBuilder.Append(v) } case *array.Float64Builder: - for _, v := range list.GetDoubleListVal().GetVal() { + for _, v := range value.GetDoubleListVal().GetVal() { valueBuilder.Append(v) } case *array.TimestampBuilder: - for _, v := range list.GetUnixTimestampListVal().GetVal() { + for _, v := range value.GetUnixTimestampListVal().GetVal() { valueBuilder.Append(arrow.Timestamp(v)) } } + default: + return fmt.Errorf("unsupported array builder: %s", builder) } - default: - return fmt.Errorf("unsupported array builder: %s", builder) } return nil } @@ -249,41 +242,68 @@ func ArrowValuesToProtoValues(arr arrow.Array) ([]*types.Value, error) { switch arr.DataType() { case arrow.PrimitiveTypes.Int32: - for _, v := range arr.(*array.Int32).Int32Values() { - values = append(values, &types.Value{Val: &types.Value_Int32Val{Int32Val: v}}) + for idx := 0; idx < arr.Len(); idx++ { + if arr.IsNull(idx) { + values = append(values, &types.Value{}) + } else { + values = append(values, &types.Value{Val: &types.Value_Int32Val{Int32Val: arr.(*array.Int32).Value(idx)}}) + } } case arrow.PrimitiveTypes.Int64: - for _, v := range arr.(*array.Int64).Int64Values() { - values = append(values, &types.Value{Val: &types.Value_Int64Val{Int64Val: v}}) + for idx := 0; idx < arr.Len(); idx++ { + if arr.IsNull(idx) { + values = append(values, &types.Value{}) + } else { + values = append(values, &types.Value{Val: &types.Value_Int64Val{Int64Val: arr.(*array.Int64).Value(idx)}}) + } } case arrow.PrimitiveTypes.Float32: - for _, v := range arr.(*array.Float32).Float32Values() { - values = append(values, &types.Value{Val: &types.Value_FloatVal{FloatVal: v}}) + for idx := 0; idx < arr.Len(); idx++ { + if arr.IsNull(idx) { + values = append(values, &types.Value{}) + } else { + values = append(values, &types.Value{Val: &types.Value_FloatVal{FloatVal: arr.(*array.Float32).Value(idx)}}) + } } case arrow.PrimitiveTypes.Float64: - for _, v := range arr.(*array.Float64).Float64Values() { - values = append(values, &types.Value{Val: &types.Value_DoubleVal{DoubleVal: v}}) + for idx := 0; idx < arr.Len(); idx++ { + if arr.IsNull(idx) { + values = append(values, &types.Value{}) + } else { + values = append(values, &types.Value{Val: &types.Value_DoubleVal{DoubleVal: arr.(*array.Float64).Value(idx)}}) + } } case arrow.FixedWidthTypes.Boolean: for idx := 0; idx < arr.Len(); idx++ { - values = append(values, - &types.Value{Val: &types.Value_BoolVal{BoolVal: arr.(*array.Boolean).Value(idx)}}) + if arr.IsNull(idx) { + values = append(values, &types.Value{}) + } else { + values = append(values, &types.Value{Val: &types.Value_BoolVal{BoolVal: arr.(*array.Boolean).Value(idx)}}) + } } case arrow.BinaryTypes.Binary: for idx := 0; idx < arr.Len(); idx++ { - values = append(values, - &types.Value{Val: &types.Value_BytesVal{BytesVal: arr.(*array.Binary).Value(idx)}}) + if arr.IsNull(idx) { + values = append(values, &types.Value{}) + } else { + values = append(values, &types.Value{Val: &types.Value_BytesVal{BytesVal: arr.(*array.Binary).Value(idx)}}) + } } case arrow.BinaryTypes.String: for idx := 0; idx < arr.Len(); idx++ { - values = append(values, - &types.Value{Val: &types.Value_StringVal{StringVal: arr.(*array.String).Value(idx)}}) + if arr.IsNull(idx) { + values = append(values, &types.Value{}) + } else { + values = append(values, &types.Value{Val: &types.Value_StringVal{StringVal: arr.(*array.String).Value(idx)}}) + } } case arrow.FixedWidthTypes.Timestamp_s: for idx := 0; idx < arr.Len(); idx++ { - values = append(values, - &types.Value{Val: &types.Value_UnixTimestampVal{ - UnixTimestampVal: int64(arr.(*array.Timestamp).Value(idx))}}) + if arr.IsNull(idx) { + values = append(values, &types.Value{}) + } else { + values = append(values, &types.Value{Val: &types.Value_UnixTimestampVal{UnixTimestampVal: int64(arr.(*array.Timestamp).Value(idx))}}) + } } case arrow.Null: for idx := 0; idx < arr.Len(); idx++ { @@ -306,7 +326,9 @@ func ProtoValuesToArrowArray(protoValues []*types.Value, arrowAllocator memory.A if err != nil { return nil, err } - break + if fieldType != nil { + break + } } } diff --git a/go/types/typeconversion_test.go b/go/types/typeconversion_test.go index 1f89593ea01..4869369c186 100644 --- a/go/types/typeconversion_test.go +++ b/go/types/typeconversion_test.go @@ -1,27 +1,46 @@ package types import ( + "math" "testing" "time" "github.com/apache/arrow/go/v8/arrow/memory" - "github.com/golang/protobuf/proto" "github.com/stretchr/testify/assert" + "google.golang.org/protobuf/proto" "github.com/feast-dev/feast/go/protos/feast/types" ) +var nil_or_null_val = &types.Value{} + var ( PROTO_VALUES = [][]*types.Value{ + {{Val: nil}}, + {{Val: nil}, {Val: nil}}, + {nil_or_null_val, nil_or_null_val}, + {nil_or_null_val, {Val: nil}}, + {{Val: &types.Value_Int32Val{10}}, {Val: nil}, nil_or_null_val, {Val: &types.Value_Int32Val{20}}}, + {{Val: &types.Value_Int32Val{10}}, nil_or_null_val}, + {nil_or_null_val, {Val: &types.Value_Int32Val{20}}}, {{Val: &types.Value_Int32Val{10}}, {Val: &types.Value_Int32Val{20}}}, + {{Val: &types.Value_Int64Val{10}}, nil_or_null_val}, {{Val: &types.Value_Int64Val{10}}, {Val: &types.Value_Int64Val{20}}}, + {nil_or_null_val, {Val: &types.Value_FloatVal{2.0}}}, {{Val: &types.Value_FloatVal{1.0}}, {Val: &types.Value_FloatVal{2.0}}}, + {{Val: &types.Value_FloatVal{1.0}}, {Val: &types.Value_FloatVal{2.0}}, {Val: &types.Value_FloatVal{float32(math.NaN())}}}, {{Val: &types.Value_DoubleVal{1.0}}, {Val: &types.Value_DoubleVal{2.0}}}, + {{Val: &types.Value_DoubleVal{1.0}}, {Val: &types.Value_DoubleVal{2.0}}, {Val: &types.Value_DoubleVal{math.NaN()}}}, + {{Val: &types.Value_DoubleVal{1.0}}, nil_or_null_val}, + {nil_or_null_val, {Val: &types.Value_StringVal{"bbb"}}}, {{Val: &types.Value_StringVal{"aaa"}}, {Val: &types.Value_StringVal{"bbb"}}}, + {{Val: &types.Value_BytesVal{[]byte{1, 2, 3}}}, nil_or_null_val}, {{Val: &types.Value_BytesVal{[]byte{1, 2, 3}}}, {Val: &types.Value_BytesVal{[]byte{4, 5, 6}}}}, + {nil_or_null_val, {Val: &types.Value_BoolVal{false}}}, {{Val: &types.Value_BoolVal{true}}, {Val: &types.Value_BoolVal{false}}}, - {{Val: &types.Value_UnixTimestampVal{time.Now().Unix()}}, - {Val: &types.Value_UnixTimestampVal{time.Now().Unix()}}}, + {{Val: &types.Value_UnixTimestampVal{time.Now().Unix()}}, nil_or_null_val}, + {{Val: &types.Value_UnixTimestampVal{time.Now().Unix()}}, {Val: &types.Value_UnixTimestampVal{time.Now().Unix()}}}, + {{Val: &types.Value_UnixTimestampVal{time.Now().Unix()}}, {Val: &types.Value_UnixTimestampVal{time.Now().Unix()}}, {Val: &types.Value_UnixTimestampVal{-9223372036854775808}}}, { {Val: &types.Value_Int32ListVal{&types.Int32List{Val: []int32{0, 1, 2}}}}, @@ -55,6 +74,11 @@ var ( {Val: &types.Value_UnixTimestampListVal{&types.Int64List{Val: []int64{time.Now().Unix()}}}}, {Val: &types.Value_UnixTimestampListVal{&types.Int64List{Val: []int64{time.Now().Unix()}}}}, }, + { + {Val: &types.Value_UnixTimestampListVal{&types.Int64List{Val: []int64{time.Now().Unix(), time.Now().Unix()}}}}, + {Val: &types.Value_UnixTimestampListVal{&types.Int64List{Val: []int64{time.Now().Unix(), time.Now().Unix()}}}}, + {Val: &types.Value_UnixTimestampListVal{&types.Int64List{Val: []int64{-9223372036854775808, time.Now().Unix()}}}}, + }, } ) diff --git a/infra/charts/feast-feature-server/Chart.yaml b/infra/charts/feast-feature-server/Chart.yaml index 8d564f3b420..fca8f0c98c5 100644 --- a/infra/charts/feast-feature-server/Chart.yaml +++ b/infra/charts/feast-feature-server/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: feast-feature-server description: Feast Feature Server in Go or Python type: application -version: 0.37.1 +version: 0.38.0 keywords: - machine learning - big data diff --git a/infra/charts/feast-feature-server/README.md b/infra/charts/feast-feature-server/README.md index a9c609c3d62..9ff5652485a 100644 --- a/infra/charts/feast-feature-server/README.md +++ b/infra/charts/feast-feature-server/README.md @@ -1,6 +1,6 @@ # Feast Python / Go Feature Server Helm Charts -Current chart version is `0.37.1` +Current chart version is `0.38.0` ## Installation @@ -13,11 +13,19 @@ helm repo update Install Feast Feature Server on Kubernetes -A base64 encoded version of the `feature_store.yaml` file is needed. Helm install example: +- Feast Deployment Mode: The Feast Feature Server supports multiple deployment modes using the `feast_mode` property. Supported modes are `online` (default), `offline`, `ui`, and `registry`. +Users can set the `feast_mode` based on their deployment choice. The `online` mode is the default and maintains backward compatibility with previous Feast Feature Server implementations. + +- Feature Store File: A base64 encoded version of the `feature_store.yaml` file is needed. + +Helm install examples: ``` -helm install feast-feature-server feast-charts/feast-feature-server --set feature_store_yaml_base64=$(base64 feature_store.yaml) +helm install feast-feature-server feast-charts/feast-feature-server --set feature_store_yaml_base64=$(base64 > feature_store.yaml) +helm install feast-offline-server feast-charts/feast-feature-server --set feast_mode=offline --set feature_store_yaml_base64=$(base64 > feature_store.yaml) +helm install feast-ui-server feast-charts/feast-feature-server --set feast_mode=ui --set feature_store_yaml_base64=$(base64 > feature_store.yaml) +helm install feast-registry-server feast-charts/feast-feature-server --set feast_mode=registry --set feature_store_yaml_base64=$(base64 > feature_store.yaml) + ``` -> Alternatively, deploy this helm chart with a [Kubernetes Operator](/infra/feast-operator). ## Tutorial See [here](https://github.com/feast-dev/feast/tree/master/examples/python-helm-demo) for a sample tutorial on testing this helm chart with a demo feature repository and a local Redis instance. @@ -27,11 +35,12 @@ See [here](https://github.com/feast-dev/feast/tree/master/examples/python-helm-d | Key | Type | Default | Description | |-----|------|---------|-------------| | affinity | object | `{}` | | +| feast_mode | string | `"online"` | Feast supported deployment modes - online (default), offline, ui and registry | | feature_store_yaml_base64 | string | `""` | [required] a base64 encoded version of feature_store.yaml | | fullnameOverride | string | `""` | | | image.pullPolicy | string | `"IfNotPresent"` | | | image.repository | string | `"feastdev/feature-server"` | Docker image for Feature Server repository | -| image.tag | string | `"0.37.1"` | The Docker image tag (can be overwritten if custom feature server deps are needed for on demand transforms) | +| image.tag | string | `"0.38.0"` | The Docker image tag (can be overwritten if custom feature server deps are needed for on demand transforms) | | imagePullSecrets | list | `[]` | | | livenessProbe.initialDelaySeconds | int | `30` | | | livenessProbe.periodSeconds | int | `30` | | diff --git a/infra/charts/feast-feature-server/README.md.gotmpl b/infra/charts/feast-feature-server/README.md.gotmpl index fb877208e06..be2fdae2482 100644 --- a/infra/charts/feast-feature-server/README.md.gotmpl +++ b/infra/charts/feast-feature-server/README.md.gotmpl @@ -13,9 +13,18 @@ helm repo update Install Feast Feature Server on Kubernetes -A base64 encoded version of the `feature_store.yaml` file is needed. Helm install example: +- Feast Deployment Mode: The Feast Feature Server supports multiple deployment modes using the `feast_mode` property. Supported modes are `online` (default), `offline`, `ui`, and `registry`. +Users can set the `feast_mode` based on their deployment choice. The `online` mode is the default and maintains backward compatibility with previous Feast Feature Server implementations. + +- Feature Store File: A base64 encoded version of the `feature_store.yaml` file is needed. + +Helm install examples: ``` -helm install feast-feature-server feast-charts/feast-feature-server --set feature_store_yaml_base64=$(base64 feature_store.yaml) +helm install feast-feature-server feast-charts/feast-feature-server --set feature_store_yaml_base64=$(base64 > feature_store.yaml) +helm install feast-offline-server feast-charts/feast-feature-server --set feast_mode=offline --set feature_store_yaml_base64=$(base64 > feature_store.yaml) +helm install feast-ui-server feast-charts/feast-feature-server --set feast_mode=ui --set feature_store_yaml_base64=$(base64 > feature_store.yaml) +helm install feast-registry-server feast-charts/feast-feature-server --set feast_mode=registry --set feature_store_yaml_base64=$(base64 > feature_store.yaml) + ``` ## Tutorial diff --git a/infra/charts/feast-feature-server/templates/deployment.yaml b/infra/charts/feast-feature-server/templates/deployment.yaml index 94c56de9dda..85b323610d2 100644 --- a/infra/charts/feast-feature-server/templates/deployment.yaml +++ b/infra/charts/feast-feature-server/templates/deployment.yaml @@ -33,19 +33,46 @@ spec: env: - name: FEATURE_STORE_YAML_BASE64 value: {{ .Values.feature_store_yaml_base64 }} - command: ["feast", "serve", "-h", "0.0.0.0"] + command: + {{- if eq .Values.feast_mode "offline" }} + - "feast" + - "serve_offline" + - "-h" + - "0.0.0.0" + {{- else if eq .Values.feast_mode "ui" }} + - "feast" + - "ui" + - "-h" + - "0.0.0.0" + {{- else if eq .Values.feast_mode "registry" }} + - "feast" + - "serve_registry" + {{- else }} + - "feast" + - "serve" + - "-h" + - "0.0.0.0" + {{- end }} ports: - - name: http + - name: {{ .Values.feast_mode }} + {{- if eq .Values.feast_mode "offline" }} + containerPort: 8815 + {{- else if eq .Values.feast_mode "ui" }} + containerPort: 8888 + {{- else if eq .Values.feast_mode "registry" }} + containerPort: 6570 + {{- else }} containerPort: 6566 + {{- end }} protocol: TCP livenessProbe: tcpSocket: - port: http + port: {{ .Values.feast_mode }} initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.livenessProbe.periodSeconds }} readinessProbe: tcpSocket: - port: http + port: {{ .Values.feast_mode }} initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.readinessProbe.periodSeconds }} resources: diff --git a/infra/charts/feast-feature-server/templates/service.yaml b/infra/charts/feast-feature-server/templates/service.yaml index db0ac8b10b8..68f096264e0 100644 --- a/infra/charts/feast-feature-server/templates/service.yaml +++ b/infra/charts/feast-feature-server/templates/service.yaml @@ -8,7 +8,7 @@ spec: type: {{ .Values.service.type }} ports: - port: {{ .Values.service.port }} - targetPort: http + targetPort: {{ .Values.feast_mode }} protocol: TCP name: http selector: diff --git a/infra/charts/feast-feature-server/values.yaml b/infra/charts/feast-feature-server/values.yaml index df5241ebb2d..a6dd2d0f945 100644 --- a/infra/charts/feast-feature-server/values.yaml +++ b/infra/charts/feast-feature-server/values.yaml @@ -9,7 +9,7 @@ image: repository: feastdev/feature-server pullPolicy: IfNotPresent # image.tag -- The Docker image tag (can be overwritten if custom feature server deps are needed for on demand transforms) - tag: 0.37.1 + tag: 0.38.0 imagePullSecrets: [] nameOverride: "" @@ -18,6 +18,9 @@ fullnameOverride: "" # feature_store_yaml_base64 -- [required] a base64 encoded version of feature_store.yaml feature_store_yaml_base64: "" +# feast_mode -- Feast supported deployment modes - online (default), offline, ui and registry +feast_mode: "online" + podAnnotations: {} podSecurityContext: {} diff --git a/infra/charts/feast/Chart.yaml b/infra/charts/feast/Chart.yaml index 21c00e4483b..109b6713933 100644 --- a/infra/charts/feast/Chart.yaml +++ b/infra/charts/feast/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v1 description: Feature store for machine learning name: feast -version: 0.37.1 +version: 0.38.0 keywords: - machine learning - big data diff --git a/infra/charts/feast/README.md b/infra/charts/feast/README.md index 8ab816dc707..70296aa130c 100644 --- a/infra/charts/feast/README.md +++ b/infra/charts/feast/README.md @@ -8,7 +8,7 @@ This repo contains Helm charts for Feast Java components that are being installe ## Chart: Feast -Feature store for machine learning Current chart version is `0.37.1` +Feature store for machine learning Current chart version is `0.38.0` ## Installation @@ -65,8 +65,8 @@ See [here](https://github.com/feast-dev/feast/tree/master/examples/java-demo) fo | Repository | Name | Version | |------------|------|---------| | https://charts.helm.sh/stable | redis | 10.5.6 | -| https://feast-helm-charts.storage.googleapis.com | feature-server(feature-server) | 0.37.1 | -| https://feast-helm-charts.storage.googleapis.com | transformation-service(transformation-service) | 0.37.1 | +| https://feast-helm-charts.storage.googleapis.com | feature-server(feature-server) | 0.38.0 | +| https://feast-helm-charts.storage.googleapis.com | transformation-service(transformation-service) | 0.38.0 | ## Values diff --git a/infra/charts/feast/charts/feature-server/Chart.yaml b/infra/charts/feast/charts/feature-server/Chart.yaml index 08563c6e069..3df922d7994 100644 --- a/infra/charts/feast/charts/feature-server/Chart.yaml +++ b/infra/charts/feast/charts/feature-server/Chart.yaml @@ -1,8 +1,8 @@ apiVersion: v1 description: "Feast Feature Server: Online feature serving service for Feast" name: feature-server -version: 0.37.1 -appVersion: v0.37.1 +version: 0.38.0 +appVersion: v0.38.0 keywords: - machine learning - big data diff --git a/infra/charts/feast/charts/feature-server/README.md b/infra/charts/feast/charts/feature-server/README.md index 3018b31c96c..8266efeda3d 100644 --- a/infra/charts/feast/charts/feature-server/README.md +++ b/infra/charts/feast/charts/feature-server/README.md @@ -1,6 +1,6 @@ # feature-server -![Version: 0.37.1](https://img.shields.io/badge/Version-0.37.1-informational?style=flat-square) ![AppVersion: v0.37.1](https://img.shields.io/badge/AppVersion-v0.37.1-informational?style=flat-square) +![Version: 0.38.0](https://img.shields.io/badge/Version-0.38.0-informational?style=flat-square) ![AppVersion: v0.38.0](https://img.shields.io/badge/AppVersion-v0.38.0-informational?style=flat-square) Feast Feature Server: Online feature serving service for Feast @@ -17,7 +17,7 @@ Feast Feature Server: Online feature serving service for Feast | envOverrides | object | `{}` | Extra environment variables to set | | image.pullPolicy | string | `"IfNotPresent"` | Image pull policy | | image.repository | string | `"feastdev/feature-server-java"` | Docker image for Feature Server repository | -| image.tag | string | `"0.37.1"` | Image tag | +| image.tag | string | `"0.38.0"` | Image tag | | ingress.grpc.annotations | object | `{}` | Extra annotations for the ingress | | ingress.grpc.auth.enabled | bool | `false` | Flag to enable auth | | ingress.grpc.class | string | `"nginx"` | Which ingress controller to use | diff --git a/infra/charts/feast/charts/feature-server/values.yaml b/infra/charts/feast/charts/feature-server/values.yaml index 1d86059c1fd..fac64c18c7b 100644 --- a/infra/charts/feast/charts/feature-server/values.yaml +++ b/infra/charts/feast/charts/feature-server/values.yaml @@ -5,7 +5,7 @@ image: # image.repository -- Docker image for Feature Server repository repository: feastdev/feature-server-java # image.tag -- Image tag - tag: 0.37.1 + tag: 0.38.0 # image.pullPolicy -- Image pull policy pullPolicy: IfNotPresent diff --git a/infra/charts/feast/charts/transformation-service/Chart.yaml b/infra/charts/feast/charts/transformation-service/Chart.yaml index bad9befa0bf..91f0781f523 100644 --- a/infra/charts/feast/charts/transformation-service/Chart.yaml +++ b/infra/charts/feast/charts/transformation-service/Chart.yaml @@ -1,8 +1,8 @@ apiVersion: v1 description: "Transformation service: to compute on-demand features" name: transformation-service -version: 0.37.1 -appVersion: v0.37.1 +version: 0.38.0 +appVersion: v0.38.0 keywords: - machine learning - big data diff --git a/infra/charts/feast/charts/transformation-service/README.md b/infra/charts/feast/charts/transformation-service/README.md index f912b4c02f7..7b33e4b4a13 100644 --- a/infra/charts/feast/charts/transformation-service/README.md +++ b/infra/charts/feast/charts/transformation-service/README.md @@ -1,6 +1,6 @@ # transformation-service -![Version: 0.37.1](https://img.shields.io/badge/Version-0.37.1-informational?style=flat-square) ![AppVersion: v0.37.1](https://img.shields.io/badge/AppVersion-v0.37.1-informational?style=flat-square) +![Version: 0.38.0](https://img.shields.io/badge/Version-0.38.0-informational?style=flat-square) ![AppVersion: v0.38.0](https://img.shields.io/badge/AppVersion-v0.38.0-informational?style=flat-square) Transformation service: to compute on-demand features @@ -13,7 +13,7 @@ Transformation service: to compute on-demand features | envOverrides | object | `{}` | Extra environment variables to set | | image.pullPolicy | string | `"IfNotPresent"` | Image pull policy | | image.repository | string | `"feastdev/feature-transformation-server"` | Docker image for Transformation Server repository | -| image.tag | string | `"0.37.1"` | Image tag | +| image.tag | string | `"0.38.0"` | Image tag | | nodeSelector | object | `{}` | Node labels for pod assignment | | podLabels | object | `{}` | Labels to be added to Feast Serving pods | | replicaCount | int | `1` | Number of pods that will be created | diff --git a/infra/charts/feast/charts/transformation-service/values.yaml b/infra/charts/feast/charts/transformation-service/values.yaml index df5ea64c347..8c116cf7783 100644 --- a/infra/charts/feast/charts/transformation-service/values.yaml +++ b/infra/charts/feast/charts/transformation-service/values.yaml @@ -5,7 +5,7 @@ image: # image.repository -- Docker image for Transformation Server repository repository: feastdev/feature-transformation-server # image.tag -- Image tag - tag: 0.37.1 + tag: 0.38.0 # image.pullPolicy -- Image pull policy pullPolicy: IfNotPresent diff --git a/infra/charts/feast/requirements.yaml b/infra/charts/feast/requirements.yaml index 80b8c861326..d9c5f747b8a 100644 --- a/infra/charts/feast/requirements.yaml +++ b/infra/charts/feast/requirements.yaml @@ -1,12 +1,12 @@ dependencies: - name: feature-server alias: feature-server - version: 0.37.1 + version: 0.38.0 condition: feature-server.enabled repository: https://feast-helm-charts.storage.googleapis.com - name: transformation-service alias: transformation-service - version: 0.37.1 + version: 0.38.0 condition: transformation-service.enabled repository: https://feast-helm-charts.storage.googleapis.com - name: redis diff --git a/infra/feast-operator/Makefile b/infra/feast-operator/Makefile index 84e69d6eaca..1388778f9fe 100644 --- a/infra/feast-operator/Makefile +++ b/infra/feast-operator/Makefile @@ -3,7 +3,7 @@ # To re-generate a bundle for another specific version without changing the standard setup, you can: # - use the VERSION as arg of the bundle target (e.g make bundle VERSION=0.0.2) # - use environment variables to overwrite this value (e.g export VERSION=0.0.2) -VERSION ?= 0.37.1 +VERSION ?= 0.37.0 # CHANNELS define the bundle channels used in the bundle. # Add a new line here if you would like to change its default config. (E.g CHANNELS = "candidate,fast,stable") diff --git a/infra/feast-operator/config/manager/kustomization.yaml b/infra/feast-operator/config/manager/kustomization.yaml index be181e33472..226b87118d2 100644 --- a/infra/feast-operator/config/manager/kustomization.yaml +++ b/infra/feast-operator/config/manager/kustomization.yaml @@ -5,4 +5,4 @@ kind: Kustomization images: - name: controller newName: feastdev/feast-operator - newTag: 0.37.1 + newTag: 0.37.0 diff --git a/infra/scripts/pixi/pixi.lock b/infra/scripts/pixi/pixi.lock index 19a32f32ae8..f1ce2d26585 100644 --- a/infra/scripts/pixi/pixi.lock +++ b/infra/scripts/pixi/pixi.lock @@ -1,4 +1,4 @@ -version: 4 +version: 5 environments: default: channels: @@ -11,6 +11,9 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h807b86a_5.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h95c4c6d_6.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/uv-0.1.39-h0ea3d13_0.conda + osx-arm64: + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-17.0.6-h5f092b4_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/uv-0.1.45-hc069d6b_0.conda py310: channels: - url: https://conda.anaconda.org/conda-forge/ @@ -38,6 +41,21 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/uv-0.1.39-h0ea3d13_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2 + osx-arm64: + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/bzip2-1.0.8-h93a5062_5.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ca-certificates-2024.2.2-hf0a4a13_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-17.0.6-h5f092b4_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libffi-3.4.2-h3422bc3_5.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libsqlite-3.45.3-h091b4b1_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libzlib-1.2.13-hfb2fe0b_6.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ncurses-6.5-hb89a1cb_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/openssl-3.3.0-hfb2fe0b_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/python-3.10.14-h2469fbe_0_cpython.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/readline-8.2-h92ec313_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/tk-8.6.13-h5083fa2_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/uv-0.1.45-hc069d6b_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/xz-5.2.6-h57fd34a_0.tar.bz2 py311: channels: - url: https://conda.anaconda.org/conda-forge/ @@ -66,6 +84,22 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/uv-0.1.39-h0ea3d13_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2 + osx-arm64: + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/bzip2-1.0.8-h93a5062_5.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ca-certificates-2024.2.2-hf0a4a13_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-17.0.6-h5f092b4_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libexpat-2.6.2-hebf3989_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libffi-3.4.2-h3422bc3_5.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libsqlite-3.45.3-h091b4b1_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libzlib-1.2.13-hfb2fe0b_6.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ncurses-6.5-hb89a1cb_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/openssl-3.3.0-hfb2fe0b_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/python-3.11.9-h932a869_0_cpython.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/readline-8.2-h92ec313_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/tk-8.6.13-h5083fa2_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/uv-0.1.45-hc069d6b_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/xz-5.2.6-h57fd34a_0.tar.bz2 py39: channels: - url: https://conda.anaconda.org/conda-forge/ @@ -93,6 +127,21 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/uv-0.1.39-h0ea3d13_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2 + osx-arm64: + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/bzip2-1.0.8-h93a5062_5.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ca-certificates-2024.2.2-hf0a4a13_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-17.0.6-h5f092b4_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libffi-3.4.2-h3422bc3_5.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libsqlite-3.45.3-h091b4b1_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libzlib-1.2.13-hfb2fe0b_6.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ncurses-6.5-hb89a1cb_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/openssl-3.3.0-hfb2fe0b_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/python-3.9.19-hd7ebdb9_0_cpython.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/readline-8.2-h92ec313_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/tk-8.6.13-h5083fa2_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/uv-0.1.45-hc069d6b_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/xz-5.2.6-h57fd34a_0.tar.bz2 packages: - kind: conda name: _libgcc_mutex @@ -123,6 +172,19 @@ packages: license_family: BSD size: 23621 timestamp: 1650670423406 +- kind: conda + name: bzip2 + version: 1.0.8 + build: h93a5062_5 + build_number: 5 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/bzip2-1.0.8-h93a5062_5.conda + sha256: bfa84296a638bea78a8bb29abc493ee95f2a0218775642474a840411b950fe5f + md5: 1bbc659ca658bfd49a481b5ef7a0f40f + license: bzip2-1.0.6 + license_family: BSD + size: 122325 + timestamp: 1699280294368 - kind: conda name: bzip2 version: 1.0.8 @@ -149,6 +211,17 @@ packages: license: ISC size: 155432 timestamp: 1706843687645 +- kind: conda + name: ca-certificates + version: 2024.2.2 + build: hf0a4a13_0 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/ca-certificates-2024.2.2-hf0a4a13_0.conda + sha256: 49bc3439816ac72d0c0e0f144b8cc870fdcc4adec2e861407ec818d8116b2204 + md5: fb416a1795f18dcc5a038bc2dc54edf9 + license: ISC + size: 155725 + timestamp: 1706844034242 - kind: conda name: ld_impl_linux-64 version: '2.40' @@ -177,6 +250,20 @@ packages: license_family: GPL size: 713322 timestamp: 1713651222435 +- kind: conda + name: libcxx + version: 17.0.6 + build: h5f092b4_0 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-17.0.6-h5f092b4_0.conda + sha256: 119d3d9306f537d4c89dc99ed99b94c396d262f0b06f7833243646f68884f2c2 + md5: a96fd5dda8ce56c86a971e0fa02751d0 + depends: + - __osx >=11.0 + license: Apache-2.0 WITH LLVM-exception + license_family: Apache + size: 1248885 + timestamp: 1715020154867 - kind: conda name: libexpat version: 2.6.2 @@ -193,6 +280,33 @@ packages: license_family: MIT size: 73730 timestamp: 1710362120304 +- kind: conda + name: libexpat + version: 2.6.2 + build: hebf3989_0 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/libexpat-2.6.2-hebf3989_0.conda + sha256: ba7173ac30064ea901a4c9fb5a51846dcc25512ceb565759be7d18cbf3e5415e + md5: e3cde7cfa87f82f7cb13d482d5e0ad09 + constrains: + - expat 2.6.2.* + license: MIT + license_family: MIT + size: 63655 + timestamp: 1710362424980 +- kind: conda + name: libffi + version: 3.4.2 + build: h3422bc3_5 + build_number: 5 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/libffi-3.4.2-h3422bc3_5.tar.bz2 + sha256: 41b3d13efb775e340e4dba549ab5c029611ea6918703096b2eaa9c015c0750ca + md5: 086914b672be056eb70fd4285b6783b6 + license: MIT + license_family: MIT + size: 39020 + timestamp: 1636488587153 - kind: conda name: libffi version: 3.4.2 @@ -288,6 +402,19 @@ packages: license_family: GPL size: 33408 timestamp: 1697359010159 +- kind: conda + name: libsqlite + version: 3.45.3 + build: h091b4b1_0 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/libsqlite-3.45.3-h091b4b1_0.conda + sha256: 4337f466eb55bbdc74e168b52ec8c38f598e3664244ec7a2536009036e2066cc + md5: c8c1186c7f3351f6ffddb97b1f54fc58 + depends: + - libzlib >=1.2.13,<2.0.0a0 + license: Unlicense + size: 824794 + timestamp: 1713367748819 - kind: conda name: libsqlite version: 3.45.3 @@ -360,6 +487,23 @@ packages: license_family: Other size: 61588 timestamp: 1686575217516 +- kind: conda + name: libzlib + version: 1.2.13 + build: hfb2fe0b_6 + build_number: 6 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/libzlib-1.2.13-hfb2fe0b_6.conda + sha256: 8b29a2386d99b8f58178951dcf19117b532cd9c4aa07623bf1667eae99755d32 + md5: 9c4e121cd926cab631bd1c4a61d18b17 + depends: + - __osx >=11.0 + constrains: + - zlib 1.2.13 *_6 + license: Zlib + license_family: Other + size: 46768 + timestamp: 1716874151980 - kind: conda name: ncurses version: 6.4.20240210 @@ -373,6 +517,17 @@ packages: license: X11 AND BSD-3-Clause size: 895669 timestamp: 1710866638986 +- kind: conda + name: ncurses + version: '6.5' + build: hb89a1cb_0 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/ncurses-6.5-hb89a1cb_0.conda + sha256: 87d7cf716d9d930dab682cb57b3b8d3a61940b47d6703f3529a155c938a6990a + md5: b13ad5724ac9ae98b6b4fd87e4500ba4 + license: X11 AND BSD-3-Clause + size: 795131 + timestamp: 1715194898402 - kind: conda name: openssl version: 3.2.1 @@ -408,6 +563,24 @@ packages: license_family: Apache size: 2895187 timestamp: 1714466138265 +- kind: conda + name: openssl + version: 3.3.0 + build: hfb2fe0b_3 + build_number: 3 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/openssl-3.3.0-hfb2fe0b_3.conda + sha256: 6f41c163ab57e7499dff092be4498614651f0f6432e12c2b9f06859a8bc39b75 + md5: 730f618b008b3c13c1e3f973408ddd67 + depends: + - __osx >=11.0 + - ca-certificates + constrains: + - pyopenssl >=22.1 + license: Apache-2.0 + license_family: Apache + size: 2893954 + timestamp: 1716468329572 - kind: conda name: python version: 3.9.19 @@ -437,6 +610,54 @@ packages: license: Python-2.0 size: 23800555 timestamp: 1710940120866 +- kind: conda + name: python + version: 3.9.19 + build: hd7ebdb9_0_cpython + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/python-3.9.19-hd7ebdb9_0_cpython.conda + sha256: 3b93f7a405f334043758dfa8aaca050429a954a37721a6462ebd20e94ef7c5a0 + md5: 45c4d173b12154f746be3b49b1190634 + depends: + - bzip2 >=1.0.8,<2.0a0 + - libffi >=3.4,<4.0a0 + - libsqlite >=3.45.2,<4.0a0 + - libzlib >=1.2.13,<2.0.0a0 + - ncurses >=6.4.20240210,<7.0a0 + - openssl >=3.2.1,<4.0a0 + - readline >=8.2,<9.0a0 + - tk >=8.6.13,<8.7.0a0 + - tzdata + - xz >=5.2.6,<6.0a0 + constrains: + - python_abi 3.9.* *_cp39 + license: Python-2.0 + size: 11847835 + timestamp: 1710939779164 +- kind: conda + name: python + version: 3.10.14 + build: h2469fbe_0_cpython + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/python-3.10.14-h2469fbe_0_cpython.conda + sha256: 454d609fe25daedce9e886efcbfcadad103ed0362e7cb6d2bcddec90b1ecd3ee + md5: 4ae999c8227c6d8c7623d32d51d25ea9 + depends: + - bzip2 >=1.0.8,<2.0a0 + - libffi >=3.4,<4.0a0 + - libsqlite >=3.45.2,<4.0a0 + - libzlib >=1.2.13,<2.0.0a0 + - ncurses >=6.4.20240210,<7.0a0 + - openssl >=3.2.1,<4.0a0 + - readline >=8.2,<9.0a0 + - tk >=8.6.13,<8.7.0a0 + - tzdata + - xz >=5.2.6,<6.0a0 + constrains: + - python_abi 3.10.* *_cp310 + license: Python-2.0 + size: 12336005 + timestamp: 1710939659384 - kind: conda name: python version: 3.10.14 @@ -466,6 +687,32 @@ packages: license: Python-2.0 size: 25517742 timestamp: 1710939725109 +- kind: conda + name: python + version: 3.11.9 + build: h932a869_0_cpython + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/python-3.11.9-h932a869_0_cpython.conda + sha256: a436ceabde1f056a0ac3e347dadc780ee2a135a421ddb6e9a469370769829e3c + md5: 293e0713ae804b5527a673e7605c04fc + depends: + - __osx >=11.0 + - bzip2 >=1.0.8,<2.0a0 + - libexpat >=2.6.2,<3.0a0 + - libffi >=3.4,<4.0a0 + - libsqlite >=3.45.3,<4.0a0 + - libzlib >=1.2.13,<2.0.0a0 + - ncurses >=6.4.20240210,<7.0a0 + - openssl >=3.2.1,<4.0a0 + - readline >=8.2,<9.0a0 + - tk >=8.6.13,<8.7.0a0 + - tzdata + - xz >=5.2.6,<6.0a0 + constrains: + - python_abi 3.11.* *_cp311 + license: Python-2.0 + size: 14644189 + timestamp: 1713552154779 - kind: conda name: python version: 3.11.9 @@ -512,6 +759,36 @@ packages: license_family: GPL size: 281456 timestamp: 1679532220005 +- kind: conda + name: readline + version: '8.2' + build: h92ec313_1 + build_number: 1 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/readline-8.2-h92ec313_1.conda + sha256: a1dfa679ac3f6007362386576a704ad2d0d7a02e98f5d0b115f207a2da63e884 + md5: 8cbb776a2f641b943d413b3e19df71f4 + depends: + - ncurses >=6.3,<7.0a0 + license: GPL-3.0-only + license_family: GPL + size: 250351 + timestamp: 1679532511311 +- kind: conda + name: tk + version: 8.6.13 + build: h5083fa2_1 + build_number: 1 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/tk-8.6.13-h5083fa2_1.conda + sha256: 72457ad031b4c048e5891f3f6cb27a53cb479db68a52d965f796910e71a403a8 + md5: b50a57ba89c32b62428b71a875291c9b + depends: + - libzlib >=1.2.13,<1.3.0a0 + license: TCL + license_family: BSD + size: 3145523 + timestamp: 1699202432999 - kind: conda name: tk version: 8.6.13 @@ -554,6 +831,22 @@ packages: license: Apache-2.0 OR MIT size: 11891252 timestamp: 1714233659570 +- kind: conda + name: uv + version: 0.1.45 + build: hc069d6b_0 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/uv-0.1.45-hc069d6b_0.conda + sha256: 80dfc19f2ef473e86e718361847d1d598e95ffd0c0f5de7d07cda35d25f6aef5 + md5: 9192238a60bc6da9c41092990c31eb41 + depends: + - __osx >=11.0 + - libcxx >=16 + constrains: + - __osx >=11.0 + license: Apache-2.0 OR MIT + size: 9231858 + timestamp: 1716265232676 - kind: conda name: xz version: 5.2.6 @@ -567,3 +860,14 @@ packages: license: LGPL-2.1 and GPL-2.0 size: 418368 timestamp: 1660346797927 +- kind: conda + name: xz + version: 5.2.6 + build: h57fd34a_0 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/xz-5.2.6-h57fd34a_0.tar.bz2 + sha256: 59d78af0c3e071021cfe82dc40134c19dab8cdf804324b62940f5c8cd71803ec + md5: 39c6b54e94014701dd157f4f576ed211 + license: LGPL-2.1 and GPL-2.0 + size: 235693 + timestamp: 1660346961024 diff --git a/infra/scripts/pixi/pixi.toml b/infra/scripts/pixi/pixi.toml index f0d360fff3d..10179339f70 100644 --- a/infra/scripts/pixi/pixi.toml +++ b/infra/scripts/pixi/pixi.toml @@ -1,7 +1,7 @@ [project] name = "pixi-feast" channels = ["conda-forge"] -platforms = ["linux-64"] +platforms = ["linux-64", "osx-arm64"] [tasks] diff --git a/infra/scripts/release/bump_file_versions.py b/infra/scripts/release/bump_file_versions.py index e17463c2c7b..c913e9f43f7 100644 --- a/infra/scripts/release/bump_file_versions.py +++ b/infra/scripts/release/bump_file_versions.py @@ -1,5 +1,6 @@ # This script will bump the versions found in files (charts, pom.xml) during the Feast release process. +import re import pathlib import sys @@ -45,7 +46,9 @@ def main() -> None: with open(repo_root.joinpath(file_path), "r") as f: file_contents = f.readlines() for line in lines: - file_contents[int(line) - 1] = file_contents[int(line) - 1].replace(current_version, new_version) + # note we validate the version above already + current_parsed_version = _get_semantic_version(file_contents[int(line) - 1]) + file_contents[int(line) - 1] = file_contents[int(line) - 1].replace(current_parsed_version, new_version) with open(repo_root.joinpath(file_path), "w") as f: f.write(''.join(file_contents)) @@ -73,11 +76,19 @@ def validate_files_to_bump(current_version, files_to_bump, repo_root): with open(repo_root.joinpath(file_path), "r") as f: file_contents = f.readlines() for line in lines: - assert current_version in file_contents[int(line) - 1], ( + new_version = _get_semantic_version(file_contents[int(line) - 1]) + current_major_minor_version = '.'.join(current_version.split(".")[0:1]) + assert current_version in new_version or current_major_minor_version in new_version, ( f"File `{file_path}` line `{line}` didn't contain version {current_version}. " f"Contents: {file_contents[int(line) - 1]}" ) +def _get_semantic_version(input_string: str) -> str: + semver_pattern = r'\bv?(\d+\.\d+\.\d+)\b' + match = re.search(semver_pattern, input_string) + return match.group(1) + + if __name__ == "__main__": main() diff --git a/java/pom.xml b/java/pom.xml index 8ba8ed4ac53..6aabb87d0cc 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -35,7 +35,7 @@ - 0.37.1 + 0.38.0 https://github.com/feast-dev/feast UTF-8 diff --git a/java/serving-client/src/main/java/dev/feast/FeastClient.java b/java/serving-client/src/main/java/dev/feast/FeastClient.java index c10a76ecf81..c14d3be5867 100644 --- a/java/serving-client/src/main/java/dev/feast/FeastClient.java +++ b/java/serving-client/src/main/java/dev/feast/FeastClient.java @@ -49,6 +49,7 @@ public class FeastClient implements AutoCloseable { private final ManagedChannel channel; private final ServingServiceBlockingStub stub; + private final long requestTimeout; /** * Create a client to access Feast Serving. @@ -63,7 +64,22 @@ public static FeastClient create(String host, int port) { } /** - * Create a authenticated client that can access Feast serving with authentication enabled. + * Create a client to access Feast Serving. + * + * @param host hostname or ip address of Feast serving GRPC server + * @param port port number of Feast serving GRPC server + * @param requestTimeout maximum duration for online retrievals from the GRPC server in + * milliseconds, use 0 for no timeout + * @return {@link FeastClient} + */ + public static FeastClient create(String host, int port, long requestTimeout) { + // configure client with no security config. + return FeastClient.createSecure( + host, port, SecurityConfig.newBuilder().build(), requestTimeout); + } + + /** + * Create an authenticated client that can access Feast serving with authentication enabled. * * @param host hostname or ip address of Feast serving GRPC server * @param port port number of Feast serving GRPC server @@ -72,6 +88,27 @@ public static FeastClient create(String host, int port) { * @return {@link FeastClient} */ public static FeastClient createSecure(String host, int port, SecurityConfig securityConfig) { + return FeastClient.createSecure(host, port, securityConfig, 0); + } + + /** + * Create an authenticated client that can access Feast serving with authentication enabled. + * + * @param host hostname or ip address of Feast serving GRPC server + * @param port port number of Feast serving GRPC server + * @param securityConfig security options to configure the Feast client. See {@link + * SecurityConfig} for options. + * @param requestTimeout maximum duration for online retrievals from the GRPC server in + * milliseconds + * @return {@link FeastClient} + */ + public static FeastClient createSecure( + String host, int port, SecurityConfig securityConfig, long requestTimeout) { + + if (requestTimeout < 0) { + throw new IllegalArgumentException("Request timeout can't be negative"); + } + // Configure client TLS ManagedChannel channel = null; if (securityConfig.isTLSEnabled()) { @@ -98,7 +135,7 @@ public static FeastClient createSecure(String host, int port, SecurityConfig sec channel = ManagedChannelBuilder.forAddress(host, port).usePlaintext().build(); } - return new FeastClient(channel, securityConfig.getCredentials()); + return new FeastClient(channel, securityConfig.getCredentials(), requestTimeout); } /** @@ -129,7 +166,10 @@ public List getOnlineFeatures(List featureRefs, List entities) requestBuilder.putAllEntities(getEntityValuesMap(entities)); - GetOnlineFeaturesResponse response = stub.getOnlineFeatures(requestBuilder.build()); + ServingServiceGrpc.ServingServiceBlockingStub timedStub = + requestTimeout != 0 ? stub.withDeadlineAfter(requestTimeout, TimeUnit.MILLISECONDS) : stub; + + GetOnlineFeaturesResponse response = timedStub.getOnlineFeatures(requestBuilder.build()); List results = Lists.newArrayList(); if (response.getResultsCount() == 0) { @@ -202,7 +242,13 @@ public List getOnlineFeatures(List featureRefs, List rows, Str } protected FeastClient(ManagedChannel channel, Optional credentials) { + this(channel, credentials, 0); + } + + protected FeastClient( + ManagedChannel channel, Optional credentials, long requestTimeout) { this.channel = channel; + this.requestTimeout = requestTimeout; TracingClientInterceptor tracingInterceptor = TracingClientInterceptor.newBuilder().withTracer(GlobalTracer.get()).build(); diff --git a/java/serving-client/src/test/java/dev/feast/FeastClientTest.java b/java/serving-client/src/test/java/dev/feast/FeastClientTest.java index 1dfb9989c95..cbd4b0016e5 100644 --- a/java/serving-client/src/test/java/dev/feast/FeastClientTest.java +++ b/java/serving-client/src/test/java/dev/feast/FeastClientTest.java @@ -45,6 +45,7 @@ public class FeastClientTest { private final String AUTH_TOKEN = "test token"; + private final long TIMEOUT_MILLIS = 300; @Rule public GrpcCleanupRule grpcRule; private AtomicBoolean isAuthenticated; @@ -86,7 +87,7 @@ public void setup() throws Exception { ManagedChannel channel = this.grpcRule.register( InProcessChannelBuilder.forName(serverName).directExecutor().build()); - this.client = new FeastClient(channel, Optional.empty()); + this.client = new FeastClient(channel, Optional.empty(), TIMEOUT_MILLIS); } @Test diff --git a/java/serving/pom.xml b/java/serving/pom.xml index 6929d65d934..93e4f81efef 100644 --- a/java/serving/pom.xml +++ b/java/serving/pom.xml @@ -131,7 +131,7 @@ com.azure azure-identity - 1.11.3 + 1.12.2 diff --git a/protos/feast/core/Transformation.proto b/protos/feast/core/Transformation.proto index 5cb53e690fa..7033f553f16 100644 --- a/protos/feast/core/Transformation.proto +++ b/protos/feast/core/Transformation.proto @@ -5,8 +5,6 @@ option go_package = "github.com/feast-dev/feast/go/protos/feast/core"; option java_outer_classname = "FeatureTransformationProto"; option java_package = "feast.proto.core"; -import "google/protobuf/duration.proto"; - // Serialized representation of python function. message UserDefinedFunctionV2 { // The function name diff --git a/protos/feast/registry/RegistryServer.proto b/protos/feast/registry/RegistryServer.proto index e99987eb2da..3ca7398fdc1 100644 --- a/protos/feast/registry/RegistryServer.proto +++ b/protos/feast/registry/RegistryServer.proto @@ -3,6 +3,7 @@ syntax = "proto3"; package feast.registry; import "google/protobuf/empty.proto"; +import "google/protobuf/timestamp.proto"; import "feast/core/Registry.proto"; import "feast/core/Entity.proto"; import "feast/core/DataSource.proto"; @@ -16,16 +17,22 @@ import "feast/core/InfraObject.proto"; service RegistryServer{ // Entity RPCs + rpc ApplyEntity (ApplyEntityRequest) returns (google.protobuf.Empty) {} rpc GetEntity (GetEntityRequest) returns (feast.core.Entity) {} rpc ListEntities (ListEntitiesRequest) returns (ListEntitiesResponse) {} + rpc DeleteEntity (DeleteEntityRequest) returns (google.protobuf.Empty) {} // DataSource RPCs + rpc ApplyDataSource (ApplyDataSourceRequest) returns (google.protobuf.Empty) {} rpc GetDataSource (GetDataSourceRequest) returns (feast.core.DataSource) {} rpc ListDataSources (ListDataSourcesRequest) returns (ListDataSourcesResponse) {} + rpc DeleteDataSource (DeleteDataSourceRequest) returns (google.protobuf.Empty) {} // FeatureView RPCs + rpc ApplyFeatureView (ApplyFeatureViewRequest) returns (google.protobuf.Empty) {} rpc GetFeatureView (GetFeatureViewRequest) returns (feast.core.FeatureView) {} rpc ListFeatureViews (ListFeatureViewsRequest) returns (ListFeatureViewsResponse) {} + rpc DeleteFeatureView (DeleteFeatureViewRequest) returns (google.protobuf.Empty) {} // StreamFeatureView RPCs rpc GetStreamFeatureView (GetStreamFeatureViewRequest) returns (feast.core.StreamFeatureView) {} @@ -36,19 +43,28 @@ service RegistryServer{ rpc ListOnDemandFeatureViews (ListOnDemandFeatureViewsRequest) returns (ListOnDemandFeatureViewsResponse) {} // FeatureService RPCs + rpc ApplyFeatureService (ApplyFeatureServiceRequest) returns (google.protobuf.Empty) {} rpc GetFeatureService (GetFeatureServiceRequest) returns (feast.core.FeatureService) {} rpc ListFeatureServices (ListFeatureServicesRequest) returns (ListFeatureServicesResponse) {} + rpc DeleteFeatureService (DeleteFeatureServiceRequest) returns (google.protobuf.Empty) {} // SavedDataset RPCs + rpc ApplySavedDataset (ApplySavedDatasetRequest) returns (google.protobuf.Empty) {} rpc GetSavedDataset (GetSavedDatasetRequest) returns (feast.core.SavedDataset) {} rpc ListSavedDatasets (ListSavedDatasetsRequest) returns (ListSavedDatasetsResponse) {} + rpc DeleteSavedDataset (DeleteSavedDatasetRequest) returns (google.protobuf.Empty) {} // ValidationReference RPCs + rpc ApplyValidationReference (ApplyValidationReferenceRequest) returns (google.protobuf.Empty) {} rpc GetValidationReference (GetValidationReferenceRequest) returns (feast.core.ValidationReference) {} rpc ListValidationReferences (ListValidationReferencesRequest) returns (ListValidationReferencesResponse) {} - + rpc DeleteValidationReference (DeleteValidationReferenceRequest) returns (google.protobuf.Empty) {} + + rpc ApplyMaterialization (ApplyMaterializationRequest) returns (google.protobuf.Empty) {} rpc ListProjectMetadata (ListProjectMetadataRequest) returns (ListProjectMetadataResponse) {} + rpc UpdateInfra (UpdateInfraRequest) returns (google.protobuf.Empty) {} rpc GetInfra (GetInfraRequest) returns (feast.core.Infra) {} + rpc Commit (google.protobuf.Empty) returns (google.protobuf.Empty) {} rpc Refresh (RefreshRequest) returns (google.protobuf.Empty) {} rpc Proto (google.protobuf.Empty) returns (feast.core.Registry) {} @@ -58,6 +74,12 @@ message RefreshRequest { string project = 1; } +message UpdateInfraRequest { + feast.core.Infra infra = 1; + string project = 2; + bool commit = 3; +} + message GetInfraRequest { string project = 1; bool allow_cache = 2; @@ -72,6 +94,20 @@ message ListProjectMetadataResponse { repeated feast.core.ProjectMetadata project_metadata = 1; } +message ApplyMaterializationRequest { + feast.core.FeatureView feature_view = 1; + string project = 2; + google.protobuf.Timestamp start_date = 3; + google.protobuf.Timestamp end_date = 4; + bool commit = 5; +} + +message ApplyEntityRequest { + feast.core.Entity entity = 1; + string project = 2; + bool commit = 3; +} + message GetEntityRequest { string name = 1; string project = 2; @@ -87,8 +123,20 @@ message ListEntitiesResponse { repeated feast.core.Entity entities = 1; } +message DeleteEntityRequest { + string name = 1; + string project = 2; + bool commit = 3; +} + // DataSources +message ApplyDataSourceRequest { + feast.core.DataSource data_source = 1; + string project = 2; + bool commit = 3; +} + message GetDataSourceRequest { string name = 1; string project = 2; @@ -104,8 +152,24 @@ message ListDataSourcesResponse { repeated feast.core.DataSource data_sources = 1; } +message DeleteDataSourceRequest { + string name = 1; + string project = 2; + bool commit = 3; +} + // FeatureViews +message ApplyFeatureViewRequest { + oneof base_feature_view { + feast.core.FeatureView feature_view = 1; + feast.core.OnDemandFeatureView on_demand_feature_view = 2; + feast.core.StreamFeatureView stream_feature_view = 3; + } + string project = 4; + bool commit = 5; +} + message GetFeatureViewRequest { string name = 1; string project = 2; @@ -121,6 +185,12 @@ message ListFeatureViewsResponse { repeated feast.core.FeatureView feature_views = 1; } +message DeleteFeatureViewRequest { + string name = 1; + string project = 2; + bool commit = 3; +} + // StreamFeatureView message GetStreamFeatureViewRequest { @@ -157,6 +227,12 @@ message ListOnDemandFeatureViewsResponse { // FeatureServices +message ApplyFeatureServiceRequest { + feast.core.FeatureService feature_service = 1; + string project = 2; + bool commit = 3; +} + message GetFeatureServiceRequest { string name = 1; string project = 2; @@ -172,8 +248,20 @@ message ListFeatureServicesResponse { repeated feast.core.FeatureService feature_services = 1; } +message DeleteFeatureServiceRequest { + string name = 1; + string project = 2; + bool commit = 3; +} + // SavedDataset +message ApplySavedDatasetRequest { + feast.core.SavedDataset saved_dataset = 1; + string project = 2; + bool commit = 3; +} + message GetSavedDatasetRequest { string name = 1; string project = 2; @@ -189,8 +277,20 @@ message ListSavedDatasetsResponse { repeated feast.core.SavedDataset saved_datasets = 1; } +message DeleteSavedDatasetRequest { + string name = 1; + string project = 2; + bool commit = 3; +} + // ValidationReference +message ApplyValidationReferenceRequest { + feast.core.ValidationReference validation_reference = 1; + string project = 2; + bool commit = 3; +} + message GetValidationReferenceRequest { string name = 1; string project = 2; @@ -205,3 +305,9 @@ message ListValidationReferencesRequest { message ListValidationReferencesResponse { repeated feast.core.ValidationReference validation_references = 1; } + +message DeleteValidationReferenceRequest { + string name = 1; + string project = 2; + bool commit = 3; +} \ No newline at end of file diff --git a/sdk/python/docs/index.rst b/sdk/python/docs/index.rst index 4cedffb1fc0..1ef6bd16c80 100644 --- a/sdk/python/docs/index.rst +++ b/sdk/python/docs/index.rst @@ -182,12 +182,6 @@ S3 Registry Store .. autoclass:: feast.infra.registry.s3.S3RegistryStore :members: -PostgreSQL Registry Store ------------------------ - -.. autoclass:: feast.infra.registry.contrib.postgres.postgres_registry_store.PostgreSQLRegistryStore - :members: - Provider ================== diff --git a/sdk/python/docs/source/index.rst b/sdk/python/docs/source/index.rst index 4cedffb1fc0..1ef6bd16c80 100644 --- a/sdk/python/docs/source/index.rst +++ b/sdk/python/docs/source/index.rst @@ -182,12 +182,6 @@ S3 Registry Store .. autoclass:: feast.infra.registry.s3.S3RegistryStore :members: -PostgreSQL Registry Store ------------------------ - -.. autoclass:: feast.infra.registry.contrib.postgres.postgres_registry_store.PostgreSQLRegistryStore - :members: - Provider ================== diff --git a/sdk/python/feast/cli.py b/sdk/python/feast/cli.py index f239c2dfad5..eeffc29fab0 100644 --- a/sdk/python/feast/cli.py +++ b/sdk/python/feast/cli.py @@ -27,6 +27,7 @@ from feast import utils from feast.constants import ( DEFAULT_FEATURE_TRANSFORMATION_SERVER_PORT, + DEFAULT_OFFLINE_SERVER_PORT, DEFAULT_REGISTRY_SERVER_PORT, ) from feast.errors import FeastObjectNotFoundException, FeastProviderLoginError @@ -644,12 +645,6 @@ def init_command(project_directory, minimal: bool, template: str): show_default=True, help="Disable the Uvicorn access log", ) -@click.option( - "--no-feature-log", - is_flag=True, - show_default=True, - help="Disable logging served features", -) @click.option( "--workers", "-w", @@ -680,7 +675,6 @@ def serve_command( port: int, type_: str, no_access_log: bool, - no_feature_log: bool, workers: int, keep_alive_timeout: int, registry_ttl_sec: int = 5, @@ -693,7 +687,6 @@ def serve_command( port=port, type_=type_, no_access_log=no_access_log, - no_feature_log=no_feature_log, workers=workers, keep_alive_timeout=keep_alive_timeout, registry_ttl_sec=registry_ttl_sec, @@ -773,6 +766,34 @@ def serve_registry_command(ctx: click.Context, port: int): store.serve_registry(port) +@cli.command("serve_offline") +@click.option( + "--host", + "-h", + type=click.STRING, + default="127.0.0.1", + show_default=True, + help="Specify a host for the server", +) +@click.option( + "--port", + "-p", + type=click.INT, + default=DEFAULT_OFFLINE_SERVER_PORT, + help="Specify a port for the server", +) +@click.pass_context +def serve_offline_command( + ctx: click.Context, + host: str, + port: int, +): + """Start a remote server locally on a given host, port.""" + store = create_feature_store(ctx) + + store.serve_offline(host, port) + + @cli.command("validate") @click.option( "--feature-service", diff --git a/sdk/python/feast/constants.py b/sdk/python/feast/constants.py index 6aad3e60bbf..fa8674d91d2 100644 --- a/sdk/python/feast/constants.py +++ b/sdk/python/feast/constants.py @@ -41,6 +41,9 @@ # Default registry server port DEFAULT_REGISTRY_SERVER_PORT = 6570 +# Default offline server port +DEFAULT_OFFLINE_SERVER_PORT = 8815 + # Environment variable for feature server docker image tag DOCKER_IMAGE_TAG_ENV_NAME: str = "FEAST_SERVER_DOCKER_IMAGE_TAG" diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 343aa04d604..716e706ebe5 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -13,6 +13,7 @@ # limitations under the License. import copy import itertools +import logging import os import warnings from collections import Counter, defaultdict @@ -247,6 +248,20 @@ def list_feature_services(self) -> List[FeatureService]: """ return self._registry.list_feature_services(self.project) + def list_all_feature_views( + self, allow_cache: bool = False + ) -> List[Union[FeatureView, StreamFeatureView, OnDemandFeatureView]]: + """ + Retrieves the list of feature views from the registry. + + Args: + allow_cache: Whether to allow returning entities from a cached registry. + + Returns: + A list of feature views. + """ + return self._list_all_feature_views(allow_cache) + def list_feature_views(self, allow_cache: bool = False) -> List[FeatureView]: """ Retrieves the list of feature views from the registry. @@ -257,12 +272,50 @@ def list_feature_views(self, allow_cache: bool = False) -> List[FeatureView]: Returns: A list of feature views. """ + logging.warning( + "list_feature_views will make breaking changes. Please use list_batch_feature_views instead. " + "list_feature_views will behave like list_all_feature_views in the future." + ) return self._list_feature_views(allow_cache) + def _list_all_feature_views( + self, + allow_cache: bool = False, + ) -> List[Union[FeatureView, StreamFeatureView, OnDemandFeatureView]]: + all_feature_views = ( + self._list_feature_views(allow_cache) + + self._list_stream_feature_views(allow_cache) + + self.list_on_demand_feature_views(allow_cache) + ) + return all_feature_views + def _list_feature_views( self, allow_cache: bool = False, hide_dummy_entity: bool = True, + ) -> List[FeatureView]: + logging.warning( + "_list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. " + "_list_feature_views will behave like _list_all_feature_views in the future." + ) + feature_views = [] + for fv in self._registry.list_feature_views( + self.project, allow_cache=allow_cache + ): + if ( + hide_dummy_entity + and fv.entities + and fv.entities[0] == DUMMY_ENTITY_NAME + ): + fv.entities = [] + fv.entity_columns = [] + feature_views.append(fv) + return feature_views + + def _list_batch_feature_views( + self, + allow_cache: bool = False, + hide_dummy_entity: bool = True, ) -> List[FeatureView]: feature_views = [] for fv in self._registry.list_feature_views( @@ -1881,18 +1934,28 @@ def _retrieve_online_documents( "Using embedding functionality is not supported for document retrieval. Please embed the query before calling retrieve_online_documents." ) ( - requested_feature_views, + available_feature_views, _, ) = self._get_feature_views_to_use( features=[feature], allow_cache=True, hide_dummy_entity=False ) + requested_feature_view_name = ( + feature.split(":")[0] if isinstance(feature, str) else feature + ) + for feature_view in available_feature_views: + if feature_view.name == requested_feature_view_name: + requested_feature_view = feature_view + if not requested_feature_view: + raise ValueError( + f"Feature view {requested_feature_view} not found in the registry." + ) requested_feature = ( feature.split(":")[1] if isinstance(feature, str) else feature ) provider = self._get_provider() document_features = self._retrieve_from_online_store( provider, - requested_feature_views[0], + requested_feature_view, requested_feature, query, top_k, @@ -2448,12 +2511,11 @@ def serve( self, host: str, port: int, - type_: str, - no_access_log: bool, - no_feature_log: bool, - workers: int, - keep_alive_timeout: int, - registry_ttl_sec: int, + type_: str = "http", + no_access_log: bool = True, + workers: int = 1, + keep_alive_timeout: int = 30, + registry_ttl_sec: int = 2, ) -> None: """Start the feature consumption server locally on a given port.""" type_ = type_.lower() @@ -2507,6 +2569,12 @@ def serve_registry(self, port: int) -> None: registry_server.start_server(self, port) + def serve_offline(self, host: str, port: int) -> None: + """Start offline server locally on a given port.""" + from feast import offline_server + + offline_server.start_server(self, host, port) + def serve_transformations(self, port: int) -> None: """Start the feature transformation server locally on a given port.""" warnings.warn( diff --git a/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile b/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile index 8a441479184..c272f4ed66d 100644 --- a/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile +++ b/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile @@ -16,4 +16,6 @@ RUN wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | RUN apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb RUN apt update RUN apt -y install libarrow-dev -RUN mkdir -m 775 /.cache \ No newline at end of file +# modify permissions to support running with a random uid +RUN mkdir -m 775 /.cache +RUN chmod g+w $(python -c "import feast.ui as _; print(_.__path__)" | tr -d "[']")/build/projects-list.json diff --git a/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile.dev b/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile.dev index 948e3569a64..858a5ae7d1a 100644 --- a/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile.dev +++ b/sdk/python/feast/infra/feature_servers/multicloud/Dockerfile.dev @@ -16,4 +16,7 @@ RUN apt install -y -V ca-certificates lsb-release wget RUN wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb RUN apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb RUN apt update -RUN apt -y install libarrow-dev \ No newline at end of file +RUN apt -y install libarrow-dev +# modify permissions to support running with a random uid +RUN mkdir -m 775 /.cache +RUN chmod g+w $(python -c "import feast.ui as _; print(_.__path__)" | tr -d "[']")/build/projects-list.json diff --git a/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/tests/data_source.py b/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/tests/data_source.py index f95a750fd14..b43c874ddc3 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/tests/data_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/tests/data_source.py @@ -47,7 +47,6 @@ def create_data_source( self, df: pd.DataFrame, destination_name: str, - event_timestamp_column="ts", created_timestamp_column="created_ts", field_mapping: Optional[Dict[str, str]] = None, timestamp_field: Optional[str] = "ts", diff --git a/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/tests/data_source.py b/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/tests/data_source.py index bf892e9d969..ccf826c068f 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/tests/data_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/tests/data_source.py @@ -64,7 +64,6 @@ def create_data_source( self, df: pd.DataFrame, destination_name: str, - event_timestamp_column="ts", created_timestamp_column="created_ts", field_mapping: Optional[Dict[str, str]] = None, timestamp_field: Optional[str] = "ts", diff --git a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py index a23d90e1868..c94b04329e0 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py @@ -91,7 +91,6 @@ def create_data_source( self, df: pd.DataFrame, destination_name: str, - event_timestamp_column="ts", created_timestamp_column="created_ts", field_mapping: Optional[Dict[str, str]] = None, timestamp_field: Optional[str] = "ts", @@ -115,7 +114,7 @@ def create_offline_store_config(self) -> PostgreSQLOfflineStoreConfig: def get_prefixed_table_name(self, suffix: str) -> str: return f"{self.project_name}_{suffix}" - def create_online_store(self) -> PostgreSQLOnlineStoreConfig: + def create_online_store(self) -> PostgreSQLOnlineStoreConfig: # type: ignore assert self.container return PostgreSQLOnlineStoreConfig( type="postgres", diff --git a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/tests/data_source.py b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/tests/data_source.py index b9785218857..7093e40b99e 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/tests/data_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/tests/data_source.py @@ -69,7 +69,6 @@ def create_data_source( self, df: pd.DataFrame, destination_name: str, - event_timestamp_column="ts", created_timestamp_column="created_ts", field_mapping: Optional[Dict[str, str]] = None, timestamp_field: Optional[str] = "ts", diff --git a/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/tests/data_source.py b/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/tests/data_source.py index 0dee517eb37..c8fc15a6350 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/tests/data_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/tests/data_source.py @@ -81,7 +81,6 @@ def create_data_source( self, df: pd.DataFrame, destination_name: str, - event_timestamp_column="ts", created_timestamp_column="created_ts", field_mapping: Optional[Dict[str, str]] = None, timestamp_field: Optional[str] = "ts", diff --git a/sdk/python/feast/infra/offline_stores/remote.py b/sdk/python/feast/infra/offline_stores/remote.py new file mode 100644 index 00000000000..dc657017d9b --- /dev/null +++ b/sdk/python/feast/infra/offline_stores/remote.py @@ -0,0 +1,407 @@ +import json +import logging +import uuid +from datetime import datetime +from pathlib import Path +from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union + +import numpy as np +import pandas as pd +import pyarrow as pa +import pyarrow.flight as fl +import pyarrow.parquet +from pydantic import StrictInt, StrictStr + +from feast import OnDemandFeatureView +from feast.data_source import DataSource +from feast.feature_logging import ( + FeatureServiceLoggingSource, + LoggingConfig, + LoggingSource, +) +from feast.feature_view import FeatureView +from feast.infra.offline_stores import offline_utils +from feast.infra.offline_stores.offline_store import ( + OfflineStore, + RetrievalJob, + RetrievalMetadata, +) +from feast.infra.registry.base_registry import BaseRegistry +from feast.repo_config import FeastConfigBaseModel, RepoConfig +from feast.saved_dataset import SavedDatasetStorage + +logger = logging.getLogger(__name__) + + +class RemoteOfflineStoreConfig(FeastConfigBaseModel): + type: Literal["remote"] = "remote" + host: StrictStr + """ str: remote offline store server port, e.g. the host URL for offline store of arrow flight server. """ + + port: Optional[StrictInt] = None + """ str: remote offline store server port.""" + + +class RemoteRetrievalJob(RetrievalJob): + def __init__( + self, + client: fl.FlightClient, + api: str, + api_parameters: Dict[str, Any], + entity_df: Union[pd.DataFrame, str] = None, + table: pa.Table = None, + metadata: Optional[RetrievalMetadata] = None, + ): + # Initialize the client connection + self.client = client + self.api = api + self.api_parameters = api_parameters + self.entity_df = entity_df + self.table = table + self._metadata = metadata + + # Invoked to realize the Pandas DataFrame + def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: + # We use arrow format because it gives better control of the table schema + return self._to_arrow_internal().to_pandas() + + # Invoked to synchronously execute the underlying query and return the result as an arrow table + # This is where do_get service is invoked + def _to_arrow_internal(self, timeout: Optional[int] = None) -> pa.Table: + return _send_retrieve_remote( + self.api, self.api_parameters, self.entity_df, self.table, self.client + ) + + @property + def on_demand_feature_views(self) -> List[OnDemandFeatureView]: + return [] + + @property + def metadata(self) -> Optional[RetrievalMetadata]: + return self._metadata + + @property + def full_feature_names(self) -> bool: + return self.api_parameters["full_feature_names"] + + def persist( + self, + storage: SavedDatasetStorage, + allow_overwrite: bool = False, + timeout: Optional[int] = None, + ): + """ + Arrow flight action is being used to perform the persist action remotely + """ + + api_parameters = { + "data_source_name": storage.to_data_source().name, + "allow_overwrite": allow_overwrite, + "timeout": timeout, + } + + # Add api parameters to command + for key, value in self.api_parameters.items(): + api_parameters[key] = value + + api_parameters["retrieve_func"] = self.api + + _call_put( + api=RemoteRetrievalJob.persist.__name__, + api_parameters=api_parameters, + client=self.client, + table=self.table, + entity_df=self.entity_df, + ) + + +class RemoteOfflineStore(OfflineStore): + @staticmethod + def get_historical_features( + config: RepoConfig, + feature_views: List[FeatureView], + feature_refs: List[str], + entity_df: Union[pd.DataFrame, str], + registry: BaseRegistry, + project: str, + full_feature_names: bool = False, + ) -> RemoteRetrievalJob: + assert isinstance(config.offline_store, RemoteOfflineStoreConfig) + + # Initialize the client connection + client = RemoteOfflineStore.init_client(config) + + feature_view_names = [fv.name for fv in feature_views] + name_aliases = [fv.projection.name_alias for fv in feature_views] + + api_parameters = { + "feature_view_names": feature_view_names, + "feature_refs": feature_refs, + "project": project, + "full_feature_names": full_feature_names, + "name_aliases": name_aliases, + } + + return RemoteRetrievalJob( + client=client, + api=OfflineStore.get_historical_features.__name__, + api_parameters=api_parameters, + entity_df=entity_df, + metadata=_create_retrieval_metadata(feature_refs, entity_df), + ) + + @staticmethod + def pull_all_from_table_or_query( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + timestamp_field: str, + start_date: datetime, + end_date: datetime, + ) -> RetrievalJob: + assert isinstance(config.offline_store, RemoteOfflineStoreConfig) + + # Initialize the client connection + client = RemoteOfflineStore.init_client(config) + + api_parameters = { + "data_source_name": data_source.name, + "join_key_columns": join_key_columns, + "feature_name_columns": feature_name_columns, + "timestamp_field": timestamp_field, + "start_date": start_date.isoformat(), + "end_date": end_date.isoformat(), + } + + return RemoteRetrievalJob( + client=client, + api=OfflineStore.pull_all_from_table_or_query.__name__, + api_parameters=api_parameters, + ) + + @staticmethod + def pull_latest_from_table_or_query( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + timestamp_field: str, + created_timestamp_column: Optional[str], + start_date: datetime, + end_date: datetime, + ) -> RetrievalJob: + assert isinstance(config.offline_store, RemoteOfflineStoreConfig) + + # Initialize the client connection + client = RemoteOfflineStore.init_client(config) + + api_parameters = { + "data_source_name": data_source.name, + "join_key_columns": join_key_columns, + "feature_name_columns": feature_name_columns, + "timestamp_field": timestamp_field, + "created_timestamp_column": created_timestamp_column, + "start_date": start_date.isoformat(), + "end_date": end_date.isoformat(), + } + + return RemoteRetrievalJob( + client=client, + api=OfflineStore.pull_latest_from_table_or_query.__name__, + api_parameters=api_parameters, + ) + + @staticmethod + def write_logged_features( + config: RepoConfig, + data: Union[pyarrow.Table, Path], + source: LoggingSource, + logging_config: LoggingConfig, + registry: BaseRegistry, + ): + assert isinstance(config.offline_store, RemoteOfflineStoreConfig) + assert isinstance(source, FeatureServiceLoggingSource) + + if isinstance(data, Path): + data = pyarrow.parquet.read_table(data, use_threads=False, pre_buffer=False) + + # Initialize the client connection + client = RemoteOfflineStore.init_client(config) + + api_parameters = { + "feature_service_name": source._feature_service.name, + } + + _call_put( + api=OfflineStore.write_logged_features.__name__, + api_parameters=api_parameters, + client=client, + table=data, + entity_df=None, + ) + + @staticmethod + def offline_write_batch( + config: RepoConfig, + feature_view: FeatureView, + table: pyarrow.Table, + progress: Optional[Callable[[int], Any]], + ): + assert isinstance(config.offline_store, RemoteOfflineStoreConfig) + + # Initialize the client connection + client = RemoteOfflineStore.init_client(config) + + feature_view_names = [feature_view.name] + name_aliases = [feature_view.projection.name_alias] + + api_parameters = { + "feature_view_names": feature_view_names, + "progress": progress, + "name_aliases": name_aliases, + } + + _call_put( + api=OfflineStore.offline_write_batch.__name__, + api_parameters=api_parameters, + client=client, + table=table, + entity_df=None, + ) + + @staticmethod + def init_client(config): + location = f"grpc://{config.offline_store.host}:{config.offline_store.port}" + client = fl.connect(location=location) + logger.info(f"Connecting FlightClient at {location}") + return client + + +def _create_retrieval_metadata(feature_refs: List[str], entity_df: pd.DataFrame): + entity_schema = _get_entity_schema( + entity_df=entity_df, + ) + + event_timestamp_col = offline_utils.infer_event_timestamp_from_entity_df( + entity_schema=entity_schema, + ) + + timestamp_range = _get_entity_df_event_timestamp_range( + entity_df, event_timestamp_col + ) + + return RetrievalMetadata( + features=feature_refs, + keys=list(set(entity_df.columns) - {event_timestamp_col}), + min_event_timestamp=timestamp_range[0], + max_event_timestamp=timestamp_range[1], + ) + + +def _get_entity_schema(entity_df: pd.DataFrame) -> Dict[str, np.dtype]: + return dict(zip(entity_df.columns, entity_df.dtypes)) + + +def _get_entity_df_event_timestamp_range( + entity_df: Union[pd.DataFrame, str], + entity_df_event_timestamp_col: str, +) -> Tuple[datetime, datetime]: + if not isinstance(entity_df, pd.DataFrame): + raise ValueError( + f"Please provide an entity_df of type {type(pd.DataFrame)} instead of type {type(entity_df)}" + ) + + entity_df_event_timestamp = entity_df.loc[ + :, entity_df_event_timestamp_col + ].infer_objects() + if pd.api.types.is_string_dtype(entity_df_event_timestamp): + entity_df_event_timestamp = pd.to_datetime(entity_df_event_timestamp, utc=True) + + return ( + entity_df_event_timestamp.min().to_pydatetime(), + entity_df_event_timestamp.max().to_pydatetime(), + ) + + +def _send_retrieve_remote( + api: str, + api_parameters: Dict[str, Any], + entity_df: Union[pd.DataFrame, str], + table: pa.Table, + client: fl.FlightClient, +): + command_descriptor = _call_put(api, api_parameters, client, entity_df, table) + return _call_get(client, command_descriptor) + + +def _call_get(client: fl.FlightClient, command_descriptor: fl.FlightDescriptor): + flight = client.get_flight_info(command_descriptor) + ticket = flight.endpoints[0].ticket + reader = client.do_get(ticket) + return reader.read_all() + + +def _call_put( + api: str, + api_parameters: Dict[str, Any], + client: fl.FlightClient, + entity_df: Union[pd.DataFrame, str], + table: pa.Table, +): + # Generate unique command identifier + command_id = str(uuid.uuid4()) + command = { + "command_id": command_id, + "api": api, + } + # Add api parameters to command + for key, value in api_parameters.items(): + command[key] = value + + command_descriptor = fl.FlightDescriptor.for_command( + json.dumps( + command, + ) + ) + + _put_parameters(command_descriptor, entity_df, table, client) + return command_descriptor + + +def _put_parameters( + command_descriptor: fl.FlightDescriptor, + entity_df: Union[pd.DataFrame, str], + table: pa.Table, + client: fl.FlightClient, +): + updatedTable: pa.Table + + if entity_df is not None: + updatedTable = pa.Table.from_pandas(entity_df) + elif table is not None: + updatedTable = table + else: + updatedTable = _create_empty_table() + + writer, _ = client.do_put( + command_descriptor, + updatedTable.schema, + ) + + writer.write_table(updatedTable) + writer.close() + + +def _create_empty_table(): + schema = pa.schema( + { + "key": pa.string(), + } + ) + + keys = ["mock_key"] + + table = pa.Table.from_pydict(dict(zip(schema.names, keys)), schema=schema) + + return table diff --git a/sdk/python/feast/infra/online_stores/dynamodb.py b/sdk/python/feast/infra/online_stores/dynamodb.py index 0ee9af185d3..b2488543b02 100644 --- a/sdk/python/feast/infra/online_stores/dynamodb.py +++ b/sdk/python/feast/infra/online_stores/dynamodb.py @@ -33,6 +33,8 @@ try: import boto3 + from aiobotocore import session + from boto3.dynamodb.types import TypeDeserializer from botocore.config import Config from botocore.exceptions import ClientError except ImportError as e: @@ -80,6 +82,7 @@ class DynamoDBOnlineStore(OnlineStore): _dynamodb_client = None _dynamodb_resource = None + _aioboto_session = None def update( self, @@ -223,6 +226,7 @@ def online_read( """ online_config = config.online_store assert isinstance(online_config, DynamoDBOnlineStoreConfig) + dynamodb_resource = self._get_dynamodb_resource( online_config.region, online_config.endpoint_url ) @@ -230,62 +234,95 @@ def online_read( _get_table_name(online_config, config, table) ) - result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] - entity_ids = [ - compute_entity_id( - entity_key, - entity_key_serialization_version=config.entity_key_serialization_version, - ) - for entity_key in entity_keys - ] batch_size = online_config.batch_size + entity_ids = self._to_entity_ids(config, entity_keys) entity_ids_iter = iter(entity_ids) + result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] + while True: batch = list(itertools.islice(entity_ids_iter, batch_size)) - batch_result: List[ - Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]] - ] = [] + # No more items to insert if len(batch) == 0: break - batch_entity_ids = { - table_instance.name: { - "Keys": [{"entity_id": entity_id} for entity_id in batch], - "ConsistentRead": online_config.consistent_reads, - } - } + batch_entity_ids = self._to_resource_batch_get_payload( + online_config, table_instance.name, batch + ) response = dynamodb_resource.batch_get_item( RequestItems=batch_entity_ids, ) - response = response.get("Responses") - table_responses = response.get(table_instance.name) - if table_responses: - table_responses = self._sort_dynamodb_response( - table_responses, entity_ids - ) - entity_idx = 0 - for tbl_res in table_responses: - entity_id = tbl_res["entity_id"] - while entity_id != batch[entity_idx]: - batch_result.append((None, None)) - entity_idx += 1 - res = {} - for feature_name, value_bin in tbl_res["values"].items(): - val = ValueProto() - val.ParseFromString(value_bin.value) - res[feature_name] = val - batch_result.append( - (datetime.fromisoformat(tbl_res["event_ts"]), res) - ) - entity_idx += 1 - - # Not all entities in a batch may have responses - # Pad with remaining values in batch that were not found - batch_size_nones = ((None, None),) * (len(batch) - len(batch_result)) - batch_result.extend(batch_size_nones) + batch_result = self._process_batch_get_response( + table_instance.name, response, entity_ids, batch + ) result.extend(batch_result) return result + async def online_read_async( + self, + config: RepoConfig, + table: FeatureView, + entity_keys: List[EntityKeyProto], + requested_features: Optional[List[str]] = None, + ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: + """ + Reads features values for the given entity keys asynchronously. + + Args: + config: The config for the current feature store. + table: The feature view whose feature values should be read. + entity_keys: The list of entity keys for which feature values should be read. + requested_features: The list of features that should be read. + + Returns: + A list of the same length as entity_keys. Each item in the list is a tuple where the first + item is the event timestamp for the row, and the second item is a dict mapping feature names + to values, which are returned in proto format. + """ + online_config = config.online_store + assert isinstance(online_config, DynamoDBOnlineStoreConfig) + + batch_size = online_config.batch_size + entity_ids = self._to_entity_ids(config, entity_keys) + entity_ids_iter = iter(entity_ids) + result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = [] + table_name = _get_table_name(online_config, config, table) + + deserialize = TypeDeserializer().deserialize + + def to_tbl_resp(raw_client_response): + return { + "entity_id": deserialize(raw_client_response["entity_id"]), + "event_ts": deserialize(raw_client_response["event_ts"]), + "values": deserialize(raw_client_response["values"]), + } + + async with self._get_aiodynamodb_client(online_config.region) as client: + while True: + batch = list(itertools.islice(entity_ids_iter, batch_size)) + + # No more items to insert + if len(batch) == 0: + break + batch_entity_ids = self._to_client_batch_get_payload( + online_config, table_name, batch + ) + response = await client.batch_get_item( + RequestItems=batch_entity_ids, + ) + batch_result = self._process_batch_get_response( + table_name, response, entity_ids, batch, to_tbl_response=to_tbl_resp + ) + result.extend(batch_result) + return result + + def _get_aioboto_session(self): + if self._aioboto_session is None: + self._aioboto_session = session.get_session() + return self._aioboto_session + + def _get_aiodynamodb_client(self, region: str): + return self._get_aioboto_session().create_client("dynamodb", region_name=region) + def _get_dynamodb_client(self, region: str, endpoint_url: Optional[str] = None): if self._dynamodb_client is None: self._dynamodb_client = _initialize_dynamodb_client(region, endpoint_url) @@ -298,13 +335,19 @@ def _get_dynamodb_resource(self, region: str, endpoint_url: Optional[str] = None ) return self._dynamodb_resource - def _sort_dynamodb_response(self, responses: list, order: list) -> Any: + def _sort_dynamodb_response( + self, + responses: list, + order: list, + to_tbl_response: Callable = lambda raw_dict: raw_dict, + ) -> Any: """DynamoDB Batch Get Item doesn't return items in a particular order.""" # Assign an index to order order_with_index = {value: idx for idx, value in enumerate(order)} # Sort table responses by index table_responses_ordered: Any = [ - (order_with_index[tbl_res["entity_id"]], tbl_res) for tbl_res in responses + (order_with_index[tbl_res["entity_id"]], tbl_res) + for tbl_res in map(to_tbl_response, responses) ] table_responses_ordered = sorted( table_responses_ordered, key=lambda tup: tup[0] @@ -341,6 +384,64 @@ def _write_batch_non_duplicates( if progress: progress(1) + def _process_batch_get_response( + self, table_name, response, entity_ids, batch, **sort_kwargs + ): + response = response.get("Responses") + table_responses = response.get(table_name) + + batch_result = [] + if table_responses: + table_responses = self._sort_dynamodb_response( + table_responses, entity_ids, **sort_kwargs + ) + entity_idx = 0 + for tbl_res in table_responses: + entity_id = tbl_res["entity_id"] + while entity_id != batch[entity_idx]: + batch_result.append((None, None)) + entity_idx += 1 + res = {} + for feature_name, value_bin in tbl_res["values"].items(): + val = ValueProto() + val.ParseFromString(value_bin.value) + res[feature_name] = val + batch_result.append((datetime.fromisoformat(tbl_res["event_ts"]), res)) + entity_idx += 1 + # Not all entities in a batch may have responses + # Pad with remaining values in batch that were not found + batch_size_nones = ((None, None),) * (len(batch) - len(batch_result)) + batch_result.extend(batch_size_nones) + return batch_result + + @staticmethod + def _to_entity_ids(config: RepoConfig, entity_keys: List[EntityKeyProto]): + return [ + compute_entity_id( + entity_key, + entity_key_serialization_version=config.entity_key_serialization_version, + ) + for entity_key in entity_keys + ] + + @staticmethod + def _to_resource_batch_get_payload(online_config, table_name, batch): + return { + table_name: { + "Keys": [{"entity_id": entity_id} for entity_id in batch], + "ConsistentRead": online_config.consistent_reads, + } + } + + @staticmethod + def _to_client_batch_get_payload(online_config, table_name, batch): + return { + table_name: { + "Keys": [{"entity_id": {"S": entity_id}} for entity_id in batch], + "ConsistentRead": online_config.consistent_reads, + } + } + def _initialize_dynamodb_client(region: str, endpoint_url: Optional[str] = None): return boto3.client( diff --git a/sdk/python/feast/infra/online_stores/redis.py b/sdk/python/feast/infra/online_stores/redis.py index 7428eb8bea4..5f0156f6204 100644 --- a/sdk/python/feast/infra/online_stores/redis.py +++ b/sdk/python/feast/infra/online_stores/redis.py @@ -77,6 +77,9 @@ class RedisOnlineStoreConfig(FeastConfigBaseModel): key_ttl_seconds: Optional[int] = None """(Optional) redis key bin ttl (in seconds) for expiring entities""" + full_scan_for_deletion: Optional[bool] = True + """(Optional) whether to scan for deletion of features""" + class RedisOnlineStore(OnlineStore): """ @@ -162,9 +165,13 @@ def update( entities_to_keep: Entities to keep partial: Whether to do a partial update """ + online_store_config = config.online_store + + assert isinstance(online_store_config, RedisOnlineStoreConfig) - for table in tables_to_delete: - self.delete_table(config, table) + if online_store_config.full_scan_for_deletion: + for table in tables_to_delete: + self.delete_table(config, table) def teardown( self, diff --git a/sdk/python/feast/infra/online_stores/remote.py b/sdk/python/feast/infra/online_stores/remote.py new file mode 100644 index 00000000000..19e1b7d5159 --- /dev/null +++ b/sdk/python/feast/infra/online_stores/remote.py @@ -0,0 +1,167 @@ +# Copyright 2021 The Feast Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import logging +from datetime import datetime +from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple + +import requests +from pydantic import StrictStr + +from feast import Entity, FeatureView, RepoConfig +from feast.infra.online_stores.online_store import OnlineStore +from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto +from feast.protos.feast.types.Value_pb2 import Value as ValueProto +from feast.repo_config import FeastConfigBaseModel +from feast.type_map import python_values_to_proto_values +from feast.value_type import ValueType + +logger = logging.getLogger(__name__) + + +class RemoteOnlineStoreConfig(FeastConfigBaseModel): + """Remote Online store config for remote online store""" + + type: Literal["remote"] = "remote" + """Online store type selector""" + + path: StrictStr = "http://localhost:6566" + """ str: Path to metadata store. + If type is 'remote', then this is a URL for registry server """ + + +class RemoteOnlineStore(OnlineStore): + """ + remote online store implementation wrapper to communicate with feast online server. + """ + + def online_write_batch( + self, + config: RepoConfig, + table: FeatureView, + data: List[ + Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]] + ], + progress: Optional[Callable[[int], Any]], + ) -> None: + raise NotImplementedError + + def online_read( + self, + config: RepoConfig, + table: FeatureView, + entity_keys: List[EntityKeyProto], + requested_features: Optional[List[str]] = None, + ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: + assert isinstance(config.online_store, RemoteOnlineStoreConfig) + config.online_store.__class__ = RemoteOnlineStoreConfig + + req_body = self._construct_online_read_api_json_request( + entity_keys, table, requested_features + ) + response = requests.post( + f"{config.online_store.path}/get-online-features", data=req_body + ) + if response.status_code == 200: + logger.debug("Able to retrieve the online features from feature server.") + response_json = json.loads(response.text) + event_ts = self._get_event_ts(response_json) + # Iterating over results and converting the API results in column format to row format. + result_tuples: List[ + Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]] + ] = [] + for feature_value_index in range(len(entity_keys)): + feature_values_dict: Dict[str, ValueProto] = dict() + for index, feature_name in enumerate( + response_json["metadata"]["feature_names"] + ): + if ( + requested_features is not None + and feature_name in requested_features + ): + if ( + response_json["results"][index]["statuses"][ + feature_value_index + ] + == "PRESENT" + ): + message = python_values_to_proto_values( + [ + response_json["results"][index]["values"][ + feature_value_index + ] + ], + ValueType.UNKNOWN, + ) + feature_values_dict[feature_name] = message[0] + else: + feature_values_dict[feature_name] = ValueProto() + result_tuples.append((event_ts, feature_values_dict)) + return result_tuples + else: + error_msg = f"Unable to retrieve the online store data using feature server API. Error_code={response.status_code}, error_message={response.reason}" + logger.error(error_msg) + raise RuntimeError(error_msg) + + def _construct_online_read_api_json_request( + self, + entity_keys: List[EntityKeyProto], + table: FeatureView, + requested_features: Optional[List[str]] = None, + ) -> str: + api_requested_features = [] + if requested_features is not None: + for requested_feature in requested_features: + api_requested_features.append(f"{table.name}:{requested_feature}") + + entity_values = [] + entity_key = "" + for row in entity_keys: + entity_key = row.join_keys[0] + entity_values.append( + getattr(row.entity_values[0], row.entity_values[0].WhichOneof("val")) + ) + + req_body = json.dumps( + { + "features": api_requested_features, + "entities": {entity_key: entity_values}, + } + ) + return req_body + + def _get_event_ts(self, response_json) -> datetime: + event_ts = "" + if len(response_json["results"]) > 1: + event_ts = response_json["results"][1]["event_timestamps"][0] + return datetime.fromisoformat(event_ts.replace("Z", "+00:00")) + + def update( + self, + config: RepoConfig, + tables_to_delete: Sequence[FeatureView], + tables_to_keep: Sequence[FeatureView], + entities_to_delete: Sequence[Entity], + entities_to_keep: Sequence[Entity], + partial: bool, + ): + pass + + def teardown( + self, + config: RepoConfig, + tables: Sequence[FeatureView], + entities: Sequence[Entity], + ): + pass diff --git a/sdk/python/feast/infra/online_stores/sqlite.py b/sdk/python/feast/infra/online_stores/sqlite.py index 63d3ef03f51..41af14aaf16 100644 --- a/sdk/python/feast/infra/online_stores/sqlite.py +++ b/sdk/python/feast/infra/online_stores/sqlite.py @@ -14,10 +14,14 @@ import itertools import os import sqlite3 +import struct +import sys from datetime import datetime from pathlib import Path -from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple +from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple, Union +import sqlite_vec +from google.protobuf.internal.containers import RepeatedScalarFieldContainer from pydantic import StrictStr from feast import Entity @@ -29,6 +33,7 @@ from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto from feast.protos.feast.core.SqliteTable_pb2 import SqliteTable as SqliteTableProto from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto +from feast.protos.feast.types.Value_pb2 import FloatList as FloatListProto from feast.protos.feast.types.Value_pb2 import Value as ValueProto from feast.repo_config import FeastConfigBaseModel, RepoConfig from feast.utils import to_naive_utc @@ -45,6 +50,12 @@ class SqliteOnlineStoreConfig(FeastConfigBaseModel): path: StrictStr = "data/online.db" """ (optional) Path to sqlite db """ + vec_enabled: Optional[bool] = False + """ (optional) Enable or disable sqlite-vss for vector search""" + + vector_len: Optional[int] = 512 + """ (optional) Length of the vector to be stored in the database""" + class SqliteOnlineStore(OnlineStore): """ @@ -73,6 +84,10 @@ def _get_conn(self, config: RepoConfig): if not self._conn: db_path = self._get_db_path(config) self._conn = _initialize_conn(db_path) + if sys.version_info[0:2] == (3, 10): + self._conn.enable_load_extension(True) # type: ignore + sqlite_vec.load(self._conn) + return self._conn def online_write_batch( @@ -80,7 +95,12 @@ def online_write_batch( config: RepoConfig, table: FeatureView, data: List[ - Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]] + Tuple[ + EntityKeyProto, + Dict[str, ValueProto], + datetime, + Optional[datetime], + ] ], progress: Optional[Callable[[int], Any]], ) -> None: @@ -98,36 +118,74 @@ def online_write_batch( if created_ts is not None: created_ts = to_naive_utc(created_ts) + table_name = _table_id(project, table) for feature_name, val in values.items(): - conn.execute( - f""" - UPDATE {_table_id(project, table)} - SET value = ?, event_ts = ?, created_ts = ? - WHERE (entity_key = ? AND feature_name = ?) - """, - ( - # SET - val.SerializeToString(), - timestamp, - created_ts, - # WHERE - entity_key_bin, - feature_name, - ), - ) - - conn.execute( - f"""INSERT OR IGNORE INTO {_table_id(project, table)} - (entity_key, feature_name, value, event_ts, created_ts) - VALUES (?, ?, ?, ?, ?)""", - ( - entity_key_bin, - feature_name, - val.SerializeToString(), - timestamp, - created_ts, - ), - ) + if config.online_store.vec_enabled: + vector_bin = serialize_f32( + val.float_list_val.val, config.online_store.vector_len + ) # type: ignore + conn.execute( + f""" + UPDATE {table_name} + SET value = ?, vector_value = ?, event_ts = ?, created_ts = ? + WHERE (entity_key = ? AND feature_name = ?) + """, + ( + # SET + val.SerializeToString(), + vector_bin, + timestamp, + created_ts, + # WHERE + entity_key_bin, + feature_name, + ), + ) + + conn.execute( + f"""INSERT OR IGNORE INTO {table_name} + (entity_key, feature_name, value, vector_value, event_ts, created_ts) + VALUES (?, ?, ?, ?, ?, ?)""", + ( + entity_key_bin, + feature_name, + val.SerializeToString(), + vector_bin, + timestamp, + created_ts, + ), + ) + + else: + conn.execute( + f""" + UPDATE {table_name} + SET value = ?, event_ts = ?, created_ts = ? + WHERE (entity_key = ? AND feature_name = ?) + """, + ( + # SET + val.SerializeToString(), + timestamp, + created_ts, + # WHERE + entity_key_bin, + feature_name, + ), + ) + + conn.execute( + f"""INSERT OR IGNORE INTO {table_name} + (entity_key, feature_name, value, event_ts, created_ts) + VALUES (?, ?, ?, ?, ?)""", + ( + entity_key_bin, + feature_name, + val.SerializeToString(), + timestamp, + created_ts, + ), + ) if progress: progress(1) @@ -195,7 +253,7 @@ def update( for table in tables_to_keep: conn.execute( - f"CREATE TABLE IF NOT EXISTS {_table_id(project, table)} (entity_key BLOB, feature_name TEXT, value BLOB, event_ts timestamp, created_ts timestamp, PRIMARY KEY(entity_key, feature_name))" + f"CREATE TABLE IF NOT EXISTS {_table_id(project, table)} (entity_key BLOB, feature_name TEXT, value BLOB, vector_value BLOB, event_ts timestamp, created_ts timestamp, PRIMARY KEY(entity_key, feature_name))" ) conn.execute( f"CREATE INDEX IF NOT EXISTS {_table_id(project, table)}_ek ON {_table_id(project, table)} (entity_key);" @@ -232,6 +290,124 @@ def teardown( except FileNotFoundError: pass + def retrieve_online_documents( + self, + config: RepoConfig, + table: FeatureView, + requested_feature: str, + embedding: List[float], + top_k: int, + distance_metric: Optional[str] = None, + ) -> List[ + Tuple[ + Optional[datetime], + Optional[ValueProto], + Optional[ValueProto], + Optional[ValueProto], + ] + ]: + """ + + Args: + config: Feast configuration object + table: FeatureView object as the table to search + requested_feature: The requested feature as the column to search + embedding: The query embedding to search for + top_k: The number of items to return + Returns: + List of tuples containing the event timestamp, the document feature, the vector value, and the distance + """ + project = config.project + + if not config.online_store.vec_enabled: + raise ValueError("sqlite-vss is not enabled in the online store config") + + conn = self._get_conn(config) + cur = conn.cursor() + + # Convert the embedding to a binary format instead of using SerializeToString() + query_embedding_bin = serialize_f32(embedding, config.online_store.vector_len) + table_name = _table_id(project, table) + + cur.execute( + f""" + CREATE VIRTUAL TABLE vec_example using vec0( + vector_value float[{config.online_store.vector_len}] + ); + """ + ) + + # Currently I can only insert the embedding value without crashing SQLite, will report a bug + cur.execute( + f""" + INSERT INTO vec_example(rowid, vector_value) + select rowid, vector_value from {table_name} + """ + ) + cur.execute( + """ + INSERT INTO vec_example(rowid, vector_value) + VALUES (?, ?) + """, + (0, query_embedding_bin), + ) + + # Have to join this with the {table_name} to get the feature name and entity_key + # Also the `top_k` doesn't appear to be working for some reason + cur.execute( + f""" + select + fv.entity_key, + f.vector_value, + fv.value, + f.distance, + fv.event_ts + from ( + select + rowid, + vector_value, + distance + from vec_example + where vector_value match ? + order by distance + limit ? + ) f + left join {table_name} fv + on f.rowid = fv.rowid + """, + (query_embedding_bin, top_k), + ) + + rows = cur.fetchall() + + result: List[ + Tuple[ + Optional[datetime], + Optional[ValueProto], + Optional[ValueProto], + Optional[ValueProto], + ] + ] = [] + + for entity_key, _, string_value, distance, event_ts in rows: + feature_value_proto = ValueProto() + feature_value_proto.ParseFromString(string_value if string_value else b"") + vector_value_proto = ValueProto( + float_list_val=FloatListProto(val=embedding) + ) + distance_value_proto = ValueProto(float_val=distance) + + result.append( + ( + event_ts, + feature_value_proto, + vector_value_proto, + distance_value_proto, + ) + ) + + return result + def _initialize_conn(db_path: str): Path(db_path).parent.mkdir(exist_ok=True) @@ -246,6 +422,19 @@ def _table_id(project: str, table: FeatureView) -> str: return f"{project}_{table.name}" +def serialize_f32( + vector: Union[RepeatedScalarFieldContainer[float], List[float]], vector_length: int +) -> bytes: + """serializes a list of floats into a compact "raw bytes" format""" + return struct.pack(f"{vector_length}f", *vector) + + +def deserialize_f32(byte_vector: bytes, vector_length: int) -> List[float]: + """deserializes a list of floats from a compact "raw bytes" format""" + num_floats = vector_length // 4 # 4 bytes per float + return list(struct.unpack(f"{num_floats}f", byte_vector)) + + class SqliteTable(InfraObject): """ A Sqlite table managed by Feast. @@ -292,8 +481,11 @@ def from_proto(sqlite_table_proto: SqliteTableProto) -> Any: ) def update(self): + if sys.version_info[0:2] == (3, 10): + self.conn.enable_load_extension(True) + sqlite_vec.load(self.conn) self.conn.execute( - f"CREATE TABLE IF NOT EXISTS {self.name} (entity_key BLOB, feature_name TEXT, value BLOB, event_ts timestamp, created_ts timestamp, PRIMARY KEY(entity_key, feature_name))" + f"CREATE TABLE IF NOT EXISTS {self.name} (entity_key BLOB, feature_name TEXT, value BLOB, vector_value BLOB, event_ts timestamp, created_ts timestamp, PRIMARY KEY(entity_key, feature_name))" ) self.conn.execute( f"CREATE INDEX IF NOT EXISTS {self.name}_ek ON {self.name} (entity_key);" diff --git a/sdk/python/feast/infra/registry/base_registry.py b/sdk/python/feast/infra/registry/base_registry.py index ed1fc3ab879..b52749a9b2f 100644 --- a/sdk/python/feast/infra/registry/base_registry.py +++ b/sdk/python/feast/infra/registry/base_registry.py @@ -406,18 +406,14 @@ def get_saved_dataset( """ raise NotImplementedError - def delete_saved_dataset(self, name: str, project: str, allow_cache: bool = False): + def delete_saved_dataset(self, name: str, project: str, commit: bool = True): """ Delete a saved dataset. Args: name: Name of dataset project: Feast project that this dataset belongs to - allow_cache: Whether to allow returning this dataset from a cached registry - - Returns: - Returns either the specified SavedDataset, or raises an exception if - none is found + commit: Whether the change should be persisted immediately """ raise NotImplementedError diff --git a/sdk/python/feast/infra/registry/contrib/postgres/postgres_registry_store.py b/sdk/python/feast/infra/registry/contrib/postgres/postgres_registry_store.py deleted file mode 100644 index 877e0a018a8..00000000000 --- a/sdk/python/feast/infra/registry/contrib/postgres/postgres_registry_store.py +++ /dev/null @@ -1,128 +0,0 @@ -import warnings -from typing import Optional - -import psycopg2 -from psycopg2 import sql - -from feast.infra.registry.registry_store import RegistryStore -from feast.infra.utils.postgres.connection_utils import _get_conn -from feast.infra.utils.postgres.postgres_config import PostgreSQLConfig -from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto -from feast.repo_config import RegistryConfig - - -class PostgresRegistryConfig(RegistryConfig): - host: str - port: int - database: str - db_schema: str - user: str - password: str - sslmode: Optional[str] - sslkey_path: Optional[str] - sslcert_path: Optional[str] - sslrootcert_path: Optional[str] - - -class PostgreSQLRegistryStore(RegistryStore): - def __init__(self, config: PostgresRegistryConfig, registry_path: str): - self.db_config = PostgreSQLConfig( - host=config.host, - port=config.port, - database=config.database, - db_schema=config.db_schema, - user=config.user, - password=config.password, - sslmode=getattr(config, "sslmode", None), - sslkey_path=getattr(config, "sslkey_path", None), - sslcert_path=getattr(config, "sslcert_path", None), - sslrootcert_path=getattr(config, "sslrootcert_path", None), - ) - warnings.warn( - "PostgreSQLRegistryStore is deprecated and will be removed in the future releases. Please use SqlRegistry instead.", - DeprecationWarning, - ) - - self.table_name = config.path - self.cache_ttl_seconds = config.cache_ttl_seconds - - def get_registry_proto(self) -> RegistryProto: - registry_proto = RegistryProto() - try: - with _get_conn(self.db_config) as conn, conn.cursor() as cur: - cur.execute( - sql.SQL( - """ - SELECT registry - FROM {} - WHERE version = (SELECT max(version) FROM {}) - """ - ).format( - sql.Identifier(self.table_name), - sql.Identifier(self.table_name), - ) - ) - row = cur.fetchone() - if row: - registry_proto = registry_proto.FromString(row[0]) - except psycopg2.errors.UndefinedTable: - pass - return registry_proto - - def update_registry_proto(self, registry_proto: RegistryProto): - """ - Overwrites the current registry proto with the proto passed in. This method - writes to the registry path. - - Args: - registry_proto: the new RegistryProto - """ - schema_name = self.db_config.db_schema or self.db_config.user - with _get_conn(self.db_config) as conn, conn.cursor() as cur: - cur.execute( - """ - SELECT schema_name - FROM information_schema.schemata - WHERE schema_name = %s - """, - (schema_name,), - ) - schema_exists = cur.fetchone() - if not schema_exists: - cur.execute( - sql.SQL("CREATE SCHEMA IF NOT EXISTS {} AUTHORIZATION {}").format( - sql.Identifier(schema_name), - sql.Identifier(self.db_config.user), - ), - ) - - cur.execute( - sql.SQL( - """ - CREATE TABLE IF NOT EXISTS {} ( - version BIGSERIAL PRIMARY KEY, - registry BYTEA NOT NULL - ); - """ - ).format(sql.Identifier(self.table_name)), - ) - # Do we want to keep track of the history or just keep the latest? - cur.execute( - sql.SQL( - """ - INSERT INTO {} (registry) - VALUES (%s); - """ - ).format(sql.Identifier(self.table_name)), - [registry_proto.SerializeToString()], - ) - - def teardown(self): - with _get_conn(self.db_config) as conn, conn.cursor() as cur: - cur.execute( - sql.SQL( - """ - DROP TABLE IF EXISTS {}; - """ - ).format(sql.Identifier(self.table_name)) - ) diff --git a/sdk/python/feast/infra/registry/registry.py b/sdk/python/feast/infra/registry/registry.py index b1efbb2c7c3..df1a419ccf7 100644 --- a/sdk/python/feast/infra/registry/registry.py +++ b/sdk/python/feast/infra/registry/registry.py @@ -54,7 +54,6 @@ "GCSRegistryStore": "feast.infra.registry.gcs.GCSRegistryStore", "S3RegistryStore": "feast.infra.registry.s3.S3RegistryStore", "FileRegistryStore": "feast.infra.registry.file.FileRegistryStore", - "PostgreSQLRegistryStore": "feast.infra.registry.contrib.postgres.postgres_registry_store.PostgreSQLRegistryStore", "AzureRegistryStore": "feast.infra.registry.contrib.azure.azure_registry_store.AzBlobRegistryStore", } diff --git a/sdk/python/feast/infra/registry/remote.py b/sdk/python/feast/infra/registry/remote.py index f93e1ab1c03..4336db232fb 100644 --- a/sdk/python/feast/infra/registry/remote.py +++ b/sdk/python/feast/infra/registry/remote.py @@ -4,12 +4,12 @@ import grpc from google.protobuf.empty_pb2 import Empty +from google.protobuf.timestamp_pb2 import Timestamp from pydantic import StrictStr from feast.base_feature_view import BaseFeatureView from feast.data_source import DataSource from feast.entity import Entity -from feast.errors import ReadOnlyRegistryException from feast.feature_service import FeatureService from feast.feature_view import FeatureView from feast.infra.infra_object import Infra @@ -43,10 +43,18 @@ def __init__( self.stub = RegistryServer_pb2_grpc.RegistryServerStub(self.channel) def apply_entity(self, entity: Entity, project: str, commit: bool = True): - raise ReadOnlyRegistryException() + request = RegistryServer_pb2.ApplyEntityRequest( + entity=entity.to_proto(), project=project, commit=commit + ) + + self.stub.ApplyEntity(request) def delete_entity(self, name: str, project: str, commit: bool = True): - raise ReadOnlyRegistryException() + request = RegistryServer_pb2.DeleteEntityRequest( + name=name, project=project, commit=commit + ) + + self.stub.DeleteEntity(request) def get_entity(self, name: str, project: str, allow_cache: bool = False) -> Entity: request = RegistryServer_pb2.GetEntityRequest( @@ -69,10 +77,18 @@ def list_entities(self, project: str, allow_cache: bool = False) -> List[Entity] def apply_data_source( self, data_source: DataSource, project: str, commit: bool = True ): - raise ReadOnlyRegistryException() + request = RegistryServer_pb2.ApplyDataSourceRequest( + data_source=data_source.to_proto(), project=project, commit=commit + ) + + self.stub.ApplyDataSource(request) def delete_data_source(self, name: str, project: str, commit: bool = True): - raise ReadOnlyRegistryException() + request = RegistryServer_pb2.DeleteDataSourceRequest( + name=name, project=project, commit=commit + ) + + self.stub.DeleteDataSource(request) def get_data_source( self, name: str, project: str, allow_cache: bool = False @@ -101,10 +117,18 @@ def list_data_sources( def apply_feature_service( self, feature_service: FeatureService, project: str, commit: bool = True ): - raise ReadOnlyRegistryException() + request = RegistryServer_pb2.ApplyFeatureServiceRequest( + feature_service=feature_service.to_proto(), project=project, commit=commit + ) + + self.stub.ApplyFeatureService(request) def delete_feature_service(self, name: str, project: str, commit: bool = True): - raise ReadOnlyRegistryException() + request = RegistryServer_pb2.DeleteFeatureServiceRequest( + name=name, project=project, commit=commit + ) + + self.stub.DeleteFeatureService(request) def get_feature_service( self, name: str, project: str, allow_cache: bool = False @@ -134,10 +158,35 @@ def list_feature_services( def apply_feature_view( self, feature_view: BaseFeatureView, project: str, commit: bool = True ): - raise ReadOnlyRegistryException() + if isinstance(feature_view, StreamFeatureView): + arg_name = "stream_feature_view" + elif isinstance(feature_view, FeatureView): + arg_name = "feature_view" + elif isinstance(feature_view, OnDemandFeatureView): + arg_name = "on_demand_feature_view" + + request = RegistryServer_pb2.ApplyFeatureViewRequest( + feature_view=feature_view.to_proto() + if arg_name == "feature_view" + else None, + stream_feature_view=feature_view.to_proto() + if arg_name == "stream_feature_view" + else None, + on_demand_feature_view=feature_view.to_proto() + if arg_name == "on_demand_feature_view" + else None, + project=project, + commit=commit, + ) + + self.stub.ApplyFeatureView(request) def delete_feature_view(self, name: str, project: str, commit: bool = True): - raise ReadOnlyRegistryException() + request = RegistryServer_pb2.DeleteFeatureViewRequest( + name=name, project=project, commit=commit + ) + + self.stub.DeleteFeatureView(request) def get_stream_feature_view( self, name: str, project: str, allow_cache: bool = False @@ -222,7 +271,20 @@ def apply_materialization( end_date: datetime, commit: bool = True, ): - raise ReadOnlyRegistryException() + start_date_timestamp = Timestamp() + end_date_timestamp = Timestamp() + start_date_timestamp.FromDatetime(start_date) + end_date_timestamp.FromDatetime(end_date) + + request = RegistryServer_pb2.ApplyMaterializationRequest( + feature_view=feature_view.to_proto(), + project=project, + start_date=start_date_timestamp, + end_date=end_date_timestamp, + commit=commit, + ) + + self.stub.ApplyMaterialization(request) def apply_saved_dataset( self, @@ -230,10 +292,18 @@ def apply_saved_dataset( project: str, commit: bool = True, ): - raise ReadOnlyRegistryException() + request = RegistryServer_pb2.ApplySavedDatasetRequest( + saved_dataset=saved_dataset.to_proto(), project=project, commit=commit + ) - def delete_saved_dataset(self, name: str, project: str, allow_cache: bool = False): - raise ReadOnlyRegistryException() + self.stub.ApplyFeatureService(request) + + def delete_saved_dataset(self, name: str, project: str, commit: bool = True): + request = RegistryServer_pb2.DeleteSavedDatasetRequest( + name=name, project=project, commit=commit + ) + + self.stub.DeleteSavedDataset(request) def get_saved_dataset( self, name: str, project: str, allow_cache: bool = False @@ -266,10 +336,20 @@ def apply_validation_reference( project: str, commit: bool = True, ): - raise ReadOnlyRegistryException() + request = RegistryServer_pb2.ApplyValidationReferenceRequest( + validation_reference=validation_reference.to_proto(), + project=project, + commit=commit, + ) + + self.stub.ApplyValidationReference(request) def delete_validation_reference(self, name: str, project: str, commit: bool = True): - raise ReadOnlyRegistryException() + request = RegistryServer_pb2.DeleteValidationReferenceRequest( + name=name, project=project, commit=commit + ) + + self.stub.DeleteValidationReference(request) def get_validation_reference( self, name: str, project: str, allow_cache: bool = False @@ -308,7 +388,11 @@ def list_project_metadata( return [ProjectMetadata.from_proto(pm) for pm in response.project_metadata] def update_infra(self, infra: Infra, project: str, commit: bool = True): - raise ReadOnlyRegistryException() + request = RegistryServer_pb2.UpdateInfraRequest( + infra=infra.to_proto(), project=project, commit=commit + ) + + self.stub.UpdateInfra(request) def get_infra(self, project: str, allow_cache: bool = False) -> Infra: request = RegistryServer_pb2.GetInfraRequest( @@ -336,9 +420,12 @@ def proto(self) -> RegistryProto: return self.stub.Proto(Empty()) def commit(self): - raise ReadOnlyRegistryException() + self.stub.Commit(Empty()) def refresh(self, project: Optional[str] = None): request = RegistryServer_pb2.RefreshRequest(project=str(project)) self.stub.Refresh(request) + + def teardown(self): + pass diff --git a/sdk/python/feast/offline_server.py b/sdk/python/feast/offline_server.py new file mode 100644 index 00000000000..718da1b109f --- /dev/null +++ b/sdk/python/feast/offline_server.py @@ -0,0 +1,332 @@ +import ast +import json +import logging +import traceback +from datetime import datetime +from typing import Any, Dict, List + +import pyarrow as pa +import pyarrow.flight as fl + +from feast import FeatureStore, FeatureView, utils +from feast.feature_logging import FeatureServiceLoggingSource +from feast.feature_view import DUMMY_ENTITY_NAME +from feast.infra.offline_stores.offline_utils import get_offline_store_from_config +from feast.saved_dataset import SavedDatasetStorage + +logger = logging.getLogger(__name__) + + +class OfflineServer(fl.FlightServerBase): + def __init__(self, store: FeatureStore, location: str, **kwargs): + super(OfflineServer, self).__init__(location, **kwargs) + self._location = location + # A dictionary of configured flights, e.g. API calls received and not yet served + self.flights: Dict[str, Any] = {} + self.store = store + self.offline_store = get_offline_store_from_config(store.config.offline_store) + + @classmethod + def descriptor_to_key(self, descriptor: fl.FlightDescriptor): + return ( + descriptor.descriptor_type.value, + descriptor.command, + tuple(descriptor.path or tuple()), + ) + + def _make_flight_info(self, key: Any, descriptor: fl.FlightDescriptor): + endpoints = [fl.FlightEndpoint(repr(key), [self._location])] + # TODO calculate actual schema from the given features + schema = pa.schema([]) + + return fl.FlightInfo(schema, descriptor, endpoints, -1, -1) + + def get_flight_info( + self, context: fl.ServerCallContext, descriptor: fl.FlightDescriptor + ): + key = OfflineServer.descriptor_to_key(descriptor) + if key in self.flights: + return self._make_flight_info(key, descriptor) + raise KeyError("Flight not found.") + + def list_flights(self, context: fl.ServerCallContext, criteria: bytes): + for key, table in self.flights.items(): + if key[1] is not None: + descriptor = fl.FlightDescriptor.for_command(key[1]) + else: + descriptor = fl.FlightDescriptor.for_path(*key[2]) + + yield self._make_flight_info(key, descriptor) + + # Expects to receive request parameters and stores them in the flights dictionary + # Indexed by the unique command + def do_put( + self, + context: fl.ServerCallContext, + descriptor: fl.FlightDescriptor, + reader: fl.MetadataRecordBatchReader, + writer: fl.FlightMetadataWriter, + ): + key = OfflineServer.descriptor_to_key(descriptor) + command = json.loads(key[1]) + if "api" in command: + data = reader.read_all() + logger.debug(f"do_put: command is{command}, data is {data}") + self.flights[key] = data + + self._call_api(command, key) + else: + logger.warning(f"No 'api' field in command: {command}") + + def _call_api(self, command: dict, key: str): + remove_data = False + try: + api = command["api"] + if api == OfflineServer.offline_write_batch.__name__: + self.offline_write_batch(command, key) + remove_data = True + elif api == OfflineServer.write_logged_features.__name__: + self.write_logged_features(command, key) + remove_data = True + elif api == OfflineServer.persist.__name__: + self.persist(command["retrieve_func"], command, key) + remove_data = True + except Exception as e: + remove_data = True + logger.exception(e) + traceback.print_exc() + raise e + finally: + if remove_data: + # Get service is consumed, so we clear the corresponding flight and data + del self.flights[key] + + def get_feature_view_by_name( + self, fv_name: str, name_alias: str, project: str + ) -> FeatureView: + """ + Retrieves a feature view by name, including all subclasses of FeatureView. + + Args: + fv_name: Name of feature view + name_alias: Alias to be applied to the projection of the registered view + project: Feast project that this feature view belongs to + + Returns: + Returns either the specified feature view, or raises an exception if + none is found + """ + try: + fv = self.store.registry.get_feature_view(name=fv_name, project=project) + if name_alias is not None: + for fs in self.store.registry.list_feature_services(project=project): + for p in fs.feature_view_projections: + if p.name_alias == name_alias: + logger.debug( + f"Found matching FeatureService {fs.name} with projection {p}" + ) + fv = fv.with_projection(p) + return fv + except Exception: + try: + return self.store.registry.get_stream_feature_view( + name=fv_name, project=project + ) + except Exception as e: + logger.error( + f"Cannot find any FeatureView by name {fv_name} in project {project}" + ) + raise e + + def list_feature_views_by_name( + self, feature_view_names: List[str], name_aliases: List[str], project: str + ) -> List[FeatureView]: + return [ + remove_dummies( + self.get_feature_view_by_name( + fv_name=fv_name, name_alias=name_aliases[index], project=project + ) + ) + for index, fv_name in enumerate(feature_view_names) + ] + + # Extracts the API parameters from the flights dictionary, delegates the execution to the FeatureStore instance + # and returns the stream of data + def do_get(self, context: fl.ServerCallContext, ticket: fl.Ticket): + key = ast.literal_eval(ticket.ticket.decode()) + if key not in self.flights: + logger.error(f"Unknown key {key}") + return None + + command = json.loads(key[1]) + api = command["api"] + logger.debug(f"get command is {command}") + logger.debug(f"requested api is {api}") + try: + if api == OfflineServer.get_historical_features.__name__: + table = self.get_historical_features(command, key).to_arrow() + elif api == OfflineServer.pull_all_from_table_or_query.__name__: + table = self.pull_all_from_table_or_query(command).to_arrow() + elif api == OfflineServer.pull_latest_from_table_or_query.__name__: + table = self.pull_latest_from_table_or_query(command).to_arrow() + else: + raise NotImplementedError + except Exception as e: + logger.exception(e) + traceback.print_exc() + raise e + + # Get service is consumed, so we clear the corresponding flight and data + del self.flights[key] + return fl.RecordBatchStream(table) + + def offline_write_batch(self, command: dict, key: str): + feature_view_names = command["feature_view_names"] + assert ( + len(feature_view_names) == 1 + ), "feature_view_names list should only have one item" + name_aliases = command["name_aliases"] + assert len(name_aliases) == 1, "name_aliases list should only have one item" + project = self.store.config.project + feature_views = self.list_feature_views_by_name( + feature_view_names=feature_view_names, + name_aliases=name_aliases, + project=project, + ) + + assert len(feature_views) == 1 + table = self.flights[key] + self.offline_store.offline_write_batch( + self.store.config, feature_views[0], table, command["progress"] + ) + + def write_logged_features(self, command: dict, key: str): + table = self.flights[key] + feature_service = self.store.get_feature_service( + command["feature_service_name"] + ) + + assert feature_service.logging_config is not None + + self.offline_store.write_logged_features( + config=self.store.config, + data=table, + source=FeatureServiceLoggingSource( + feature_service, self.store.config.project + ), + logging_config=feature_service.logging_config, + registry=self.store.registry, + ) + + def pull_all_from_table_or_query(self, command: dict): + return self.offline_store.pull_all_from_table_or_query( + self.store.config, + self.store.get_data_source(command["data_source_name"]), + command["join_key_columns"], + command["feature_name_columns"], + command["timestamp_field"], + utils.make_tzaware(datetime.fromisoformat(command["start_date"])), + utils.make_tzaware(datetime.fromisoformat(command["end_date"])), + ) + + def pull_latest_from_table_or_query(self, command: dict): + return self.offline_store.pull_latest_from_table_or_query( + self.store.config, + self.store.get_data_source(command["data_source_name"]), + command["join_key_columns"], + command["feature_name_columns"], + command["timestamp_field"], + command["created_timestamp_column"], + utils.make_tzaware(datetime.fromisoformat(command["start_date"])), + utils.make_tzaware(datetime.fromisoformat(command["end_date"])), + ) + + def list_actions(self, context): + return [ + ( + OfflineServer.offline_write_batch.__name__, + "Writes the specified arrow table to the data source underlying the specified feature view.", + ), + ( + OfflineServer.write_logged_features.__name__, + "Writes logged features to a specified destination in the offline store.", + ), + ( + OfflineServer.persist.__name__, + "Synchronously executes the underlying query and persists the result in the same offline store at the " + "specified destination.", + ), + ] + + def get_historical_features(self, command: dict, key: str): + # Extract parameters from the internal flights dictionary + entity_df_value = self.flights[key] + entity_df = pa.Table.to_pandas(entity_df_value) + feature_view_names = command["feature_view_names"] + name_aliases = command["name_aliases"] + feature_refs = command["feature_refs"] + project = command["project"] + full_feature_names = command["full_feature_names"] + feature_views = self.list_feature_views_by_name( + feature_view_names=feature_view_names, + name_aliases=name_aliases, + project=project, + ) + retJob = self.offline_store.get_historical_features( + config=self.store.config, + feature_views=feature_views, + feature_refs=feature_refs, + entity_df=entity_df, + registry=self.store.registry, + project=project, + full_feature_names=full_feature_names, + ) + return retJob + + def persist(self, retrieve_func: str, command: dict, key: str): + try: + if retrieve_func == OfflineServer.get_historical_features.__name__: + ret_job = self.get_historical_features(command, key) + elif ( + retrieve_func == OfflineServer.pull_latest_from_table_or_query.__name__ + ): + ret_job = self.pull_latest_from_table_or_query(command) + elif retrieve_func == OfflineServer.pull_all_from_table_or_query.__name__: + ret_job = self.pull_all_from_table_or_query(command) + else: + raise NotImplementedError + + data_source = self.store.get_data_source(command["data_source_name"]) + storage = SavedDatasetStorage.from_data_source(data_source) + ret_job.persist(storage, command["allow_overwrite"], command["timeout"]) + except Exception as e: + logger.exception(e) + traceback.print_exc() + raise e + + def do_action(self, context: fl.ServerCallContext, action: fl.Action): + pass + + def do_drop_dataset(self, dataset): + pass + + +def remove_dummies(fv: FeatureView) -> FeatureView: + """ + Removes dummmy IDs from FeatureView instances created with FeatureView.from_proto + """ + if DUMMY_ENTITY_NAME in fv.entities: + fv.entities = [] + fv.entity_columns = [] + return fv + + +def start_server( + store: FeatureStore, + host: str, + port: int, +): + location = "grpc+tcp://{}:{}".format(host, port) + server = OfflineServer(store, location) + logger.info(f"Offline store server serving on {location}") + server.serve() diff --git a/sdk/python/feast/registry_server.py b/sdk/python/feast/registry_server.py index 7de0cc43e14..85038ad6ff3 100644 --- a/sdk/python/feast/registry_server.py +++ b/sdk/python/feast/registry_server.py @@ -1,16 +1,34 @@ from concurrent import futures +from datetime import datetime import grpc from google.protobuf.empty_pb2 import Empty from feast import FeatureStore +from feast.data_source import DataSource +from feast.entity import Entity +from feast.feature_service import FeatureService +from feast.feature_view import FeatureView +from feast.infra.infra_object import Infra +from feast.infra.registry.base_registry import BaseRegistry +from feast.on_demand_feature_view import OnDemandFeatureView from feast.protos.feast.registry import RegistryServer_pb2, RegistryServer_pb2_grpc +from feast.saved_dataset import SavedDataset, ValidationReference +from feast.stream_feature_view import StreamFeatureView class RegistryServer(RegistryServer_pb2_grpc.RegistryServerServicer): - def __init__(self, store: FeatureStore) -> None: + def __init__(self, registry: BaseRegistry) -> None: super().__init__() - self.proxied_registry = store.registry + self.proxied_registry = registry + + def ApplyEntity(self, request: RegistryServer_pb2.ApplyEntityRequest, context): + self.proxied_registry.apply_entity( + entity=Entity.from_proto(request.entity), + project=request.project, + commit=request.commit, + ) + return Empty() def GetEntity(self, request: RegistryServer_pb2.GetEntityRequest, context): return self.proxied_registry.get_entity( @@ -27,6 +45,22 @@ def ListEntities(self, request, context): ] ) + def DeleteEntity(self, request: RegistryServer_pb2.DeleteEntityRequest, context): + self.proxied_registry.delete_entity( + name=request.name, project=request.project, commit=request.commit + ) + return Empty() + + def ApplyDataSource( + self, request: RegistryServer_pb2.ApplyDataSourceRequest, context + ): + self.proxied_registry.apply_data_source( + data_source=DataSource.from_proto(request.data_source), + project=request.project, + commit=request.commit, + ) + return Empty() + def GetDataSource(self, request: RegistryServer_pb2.GetDataSourceRequest, context): return self.proxied_registry.get_data_source( name=request.name, project=request.project, allow_cache=request.allow_cache @@ -42,6 +76,14 @@ def ListDataSources(self, request, context): ] ) + def DeleteDataSource( + self, request: RegistryServer_pb2.DeleteDataSourceRequest, context + ): + self.proxied_registry.delete_data_source( + name=request.name, project=request.project, commit=request.commit + ) + return Empty() + def GetFeatureView( self, request: RegistryServer_pb2.GetFeatureViewRequest, context ): @@ -49,6 +91,24 @@ def GetFeatureView( name=request.name, project=request.project, allow_cache=request.allow_cache ).to_proto() + def ApplyFeatureView( + self, request: RegistryServer_pb2.ApplyFeatureViewRequest, context + ): + feature_view_type = request.WhichOneof("base_feature_view") + if feature_view_type == "feature_view": + feature_view = FeatureView.from_proto(request.feature_view) + elif feature_view_type == "on_demand_feature_view": + feature_view = OnDemandFeatureView.from_proto( + request.on_demand_feature_view + ) + elif feature_view_type == "stream_feature_view": + feature_view = StreamFeatureView.from_proto(request.stream_feature_view) + + self.proxied_registry.apply_feature_view( + feature_view=feature_view, project=request.project, commit=request.commit + ) + return Empty() + def ListFeatureViews(self, request, context): return RegistryServer_pb2.ListFeatureViewsResponse( feature_views=[ @@ -59,6 +119,14 @@ def ListFeatureViews(self, request, context): ] ) + def DeleteFeatureView( + self, request: RegistryServer_pb2.DeleteFeatureViewRequest, context + ): + self.proxied_registry.delete_feature_view( + name=request.name, project=request.project, commit=request.commit + ) + return Empty() + def GetStreamFeatureView( self, request: RegistryServer_pb2.GetStreamFeatureViewRequest, context ): @@ -93,6 +161,16 @@ def ListOnDemandFeatureViews(self, request, context): ] ) + def ApplyFeatureService( + self, request: RegistryServer_pb2.ApplyFeatureServiceRequest, context + ): + self.proxied_registry.apply_feature_service( + feature_service=FeatureService.from_proto(request.feature_service), + project=request.project, + commit=request.commit, + ) + return Empty() + def GetFeatureService( self, request: RegistryServer_pb2.GetFeatureServiceRequest, context ): @@ -112,6 +190,24 @@ def ListFeatureServices( ] ) + def DeleteFeatureService( + self, request: RegistryServer_pb2.DeleteFeatureServiceRequest, context + ): + self.proxied_registry.delete_feature_service( + name=request.name, project=request.project, commit=request.commit + ) + return Empty() + + def ApplySavedDataset( + self, request: RegistryServer_pb2.ApplySavedDatasetRequest, context + ): + self.proxied_registry.apply_saved_dataset( + saved_dataset=SavedDataset.from_proto(request.saved_dataset), + project=request.project, + commit=request.commit, + ) + return Empty() + def GetSavedDataset( self, request: RegistryServer_pb2.GetSavedDatasetRequest, context ): @@ -131,6 +227,26 @@ def ListSavedDatasets( ] ) + def DeleteSavedDataset( + self, request: RegistryServer_pb2.DeleteSavedDatasetRequest, context + ): + self.proxied_registry.delete_saved_dataset( + name=request.name, project=request.project, commit=request.commit + ) + return Empty() + + def ApplyValidationReference( + self, request: RegistryServer_pb2.ApplyValidationReferenceRequest, context + ): + self.proxied_registry.apply_validation_reference( + validation_reference=ValidationReference.from_proto( + request.validation_reference + ), + project=request.project, + commit=request.commit, + ) + return Empty() + def GetValidationReference( self, request: RegistryServer_pb2.GetValidationReferenceRequest, context ): @@ -150,6 +266,14 @@ def ListValidationReferences( ] ) + def DeleteValidationReference( + self, request: RegistryServer_pb2.DeleteValidationReferenceRequest, context + ): + self.proxied_registry.delete_validation_reference( + name=request.name, project=request.project, commit=request.commit + ) + return Empty() + def ListProjectMetadata( self, request: RegistryServer_pb2.ListProjectMetadataRequest, context ): @@ -162,11 +286,39 @@ def ListProjectMetadata( ] ) + def ApplyMaterialization( + self, request: RegistryServer_pb2.ApplyMaterializationRequest, context + ): + self.proxied_registry.apply_materialization( + feature_view=FeatureView.from_proto(request.feature_view), + project=request.project, + start_date=datetime.fromtimestamp( + request.start_date.seconds + request.start_date.nanos / 1e9 + ), + end_date=datetime.fromtimestamp( + request.end_date.seconds + request.end_date.nanos / 1e9 + ), + commit=request.commit, + ) + return Empty() + + def UpdateInfra(self, request: RegistryServer_pb2.UpdateInfraRequest, context): + self.proxied_registry.update_infra( + infra=Infra.from_proto(request.infra), + project=request.project, + commit=request.commit, + ) + return Empty() + def GetInfra(self, request: RegistryServer_pb2.GetInfraRequest, context): return self.proxied_registry.get_infra( project=request.project, allow_cache=request.allow_cache ).to_proto() + def Commit(self, request, context): + self.proxied_registry.commit() + return Empty() + def Refresh(self, request, context): self.proxied_registry.refresh(request.project) return Empty() @@ -178,7 +330,7 @@ def Proto(self, request, context): def start_server(store: FeatureStore, port: int): server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) RegistryServer_pb2_grpc.add_RegistryServerServicer_to_server( - RegistryServer(store), server + RegistryServer(store.registry), server ) server.add_insecure_port(f"[::]:{port}") server.start() diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index 6ef81794bf8..d5b3160b566 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -64,6 +64,7 @@ "hazelcast": "feast.infra.online_stores.contrib.hazelcast_online_store.hazelcast_online_store.HazelcastOnlineStore", "ikv": "feast.infra.online_stores.contrib.ikv_online_store.ikv.IKVOnlineStore", "elasticsearch": "feast.infra.online_stores.contrib.elasticsearch.ElasticSearchOnlineStore", + "remote": "feast.infra.online_stores.remote.RemoteOnlineStore", } OFFLINE_STORE_CLASS_FOR_TYPE = { @@ -77,6 +78,7 @@ "athena": "feast.infra.offline_stores.contrib.athena_offline_store.athena.AthenaOfflineStore", "mssql": "feast.infra.offline_stores.contrib.mssql_offline_store.mssql.MsSqlServerOfflineStore", "duckdb": "feast.infra.offline_stores.duckdb.DuckDBOfflineStore", + "remote": "feast.infra.offline_stores.remote.RemoteOfflineStore", } FEATURE_SERVER_CONFIG_CLASS_FOR_TYPE = { diff --git a/sdk/python/feast/templates/local/bootstrap.py b/sdk/python/feast/templates/local/bootstrap.py index 125eb7c2e72..ee2847c19c8 100644 --- a/sdk/python/feast/templates/local/bootstrap.py +++ b/sdk/python/feast/templates/local/bootstrap.py @@ -24,6 +24,7 @@ def bootstrap(): example_py_file = repo_path / "example_repo.py" replace_str_in_file(example_py_file, "%PARQUET_PATH%", str(driver_stats_path)) + replace_str_in_file(example_py_file, "%LOGGING_PATH%", str(data_path)) if __name__ == "__main__": diff --git a/sdk/python/feast/templates/local/feature_repo/example_repo.py b/sdk/python/feast/templates/local/feature_repo/example_repo.py index 5aed3371b14..debe9d45e92 100644 --- a/sdk/python/feast/templates/local/feature_repo/example_repo.py +++ b/sdk/python/feast/templates/local/feature_repo/example_repo.py @@ -13,6 +13,8 @@ PushSource, RequestSource, ) +from feast.feature_logging import LoggingConfig +from feast.infra.offline_stores.file_source import FileLoggingDestination from feast.on_demand_feature_view import on_demand_feature_view from feast.types import Float32, Float64, Int64 @@ -88,6 +90,9 @@ def transformed_conv_rate(inputs: pd.DataFrame) -> pd.DataFrame: driver_stats_fv[["conv_rate"]], # Sub-selects a feature from a feature view transformed_conv_rate, # Selects all features from the feature view ], + logging_config=LoggingConfig( + destination=FileLoggingDestination(path="%LOGGING_PATH%") + ), ) driver_activity_v2 = FeatureService( name="driver_activity_v2", features=[driver_stats_fv, transformed_conv_rate] diff --git a/sdk/python/feast/templates/postgres/feature_repo/feature_store.yaml b/sdk/python/feast/templates/postgres/feature_repo/feature_store.yaml index 0ccd4a6d499..f14510f820e 100644 --- a/sdk/python/feast/templates/postgres/feature_repo/feature_store.yaml +++ b/sdk/python/feast/templates/postgres/feature_repo/feature_store.yaml @@ -1,14 +1,12 @@ project: my_project provider: local registry: - registry_store_type: PostgreSQLRegistryStore - path: feast_registry - host: DB_HOST - port: DB_PORT - database: DB_NAME - db_schema: DB_SCHEMA - user: DB_USERNAME - password: DB_PASSWORD + registry_type: sql + path: postgresql://postgres:mysecretpassword@127.0.0.1:55001/feast + cache_ttl_seconds: 60 + sqlalchemy_config_kwargs: + echo: false + pool_pre_ping: true online_store: type: postgres host: DB_HOST diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index e7fdf971209..a0859f2f7ad 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -13,6 +13,7 @@ # limitations under the License. import json +import logging from collections import defaultdict from datetime import datetime, timezone from typing import ( @@ -53,6 +54,8 @@ # null timestamps get converted to -9223372036854775808 NULL_TIMESTAMP_INT_VALUE: int = np.datetime64("NaT").astype(int) +logger = logging.getLogger(__name__) + def feast_value_type_to_python_type(field_value_proto: ProtoValue) -> Any: """ @@ -77,9 +80,11 @@ def feast_value_type_to_python_type(field_value_proto: ProtoValue) -> Any: # Convert UNIX_TIMESTAMP values to `datetime` if val_attr == "unix_timestamp_list_val": val = [ - datetime.fromtimestamp(v, tz=timezone.utc) - if v != NULL_TIMESTAMP_INT_VALUE - else None + ( + datetime.fromtimestamp(v, tz=timezone.utc) + if v != NULL_TIMESTAMP_INT_VALUE + else None + ) for v in val ] elif val_attr == "unix_timestamp_val": @@ -295,9 +300,11 @@ def _type_err(item, dtype): ValueType.INT32: ("int32_val", lambda x: int(x), None), ValueType.INT64: ( "int64_val", - lambda x: int(x.timestamp()) - if isinstance(x, pd._libs.tslibs.timestamps.Timestamp) - else int(x), + lambda x: ( + int(x.timestamp()) + if isinstance(x, pd._libs.tslibs.timestamps.Timestamp) + else int(x) + ), None, ), ValueType.FLOAT: ("float_val", lambda x: float(x), None), @@ -373,10 +380,18 @@ def _python_value_to_proto_value( if sample is not None and not all( type(item) in valid_types for item in sample ): - first_invalid = next( - item for item in sample if type(item) not in valid_types - ) - raise _type_err(first_invalid, valid_types[0]) + # to_numpy() in utils._convert_arrow_to_proto() upcasts values of type Array of INT32 or INT64 with NULL values to Float64 automatically. + for item in sample: + if type(item) not in valid_types: + if feast_value_type in [ + ValueType.INT32_LIST, + ValueType.INT64_LIST, + ]: + if not any(np.isnan(item) for item in sample): + logger.error( + "Array of Int32 or Int64 type has NULL values. to_numpy() upcasts to Float64 automatically." + ) + raise _type_err(item, valid_types[0]) if feast_value_type == ValueType.UNIX_TIMESTAMP_LIST: int_timestamps_lists = ( @@ -390,15 +405,21 @@ def _python_value_to_proto_value( if feast_value_type == ValueType.BOOL_LIST: # ProtoValue does not support conversion of np.bool_ so we need to convert it to support np.bool_. return [ - ProtoValue(**{field_name: proto_type(val=[bool(e) for e in value])}) # type: ignore - if value is not None - else ProtoValue() + ( + ProtoValue( + **{field_name: proto_type(val=[bool(e) for e in value])} # type: ignore + ) + if value is not None + else ProtoValue() + ) for value in values ] return [ - ProtoValue(**{field_name: proto_type(val=value)}) # type: ignore - if value is not None - else ProtoValue() + ( + ProtoValue(**{field_name: proto_type(val=value)}) # type: ignore + if value is not None + else ProtoValue() + ) for value in values ] @@ -433,15 +454,17 @@ def _python_value_to_proto_value( if feast_value_type == ValueType.BOOL: # ProtoValue does not support conversion of np.bool_ so we need to convert it to support np.bool_. return [ - ProtoValue( - **{ - field_name: func( - bool(value) if type(value) is np.bool_ else value # type: ignore - ) - } + ( + ProtoValue( + **{ + field_name: func( + bool(value) if type(value) is np.bool_ else value # type: ignore + ) + } + ) + if not pd.isnull(value) + else ProtoValue() ) - if not pd.isnull(value) - else ProtoValue() for value in values ] if feast_value_type in PYTHON_SCALAR_VALUE_TYPE_TO_PROTO_VALUE: diff --git a/sdk/python/feast/ui/package.json b/sdk/python/feast/ui/package.json index 61c89f648f2..d4b5decaac1 100644 --- a/sdk/python/feast/ui/package.json +++ b/sdk/python/feast/ui/package.json @@ -6,7 +6,7 @@ "@elastic/datemath": "^5.0.3", "@elastic/eui": "^55.0.1", "@emotion/react": "^11.9.0", - "@feast-dev/feast-ui": "0.37.1", + "@feast-dev/feast-ui": "0.38.0", "@testing-library/jest-dom": "^5.16.4", "@testing-library/react": "^13.2.0", "@testing-library/user-event": "^13.5.0", diff --git a/sdk/python/feast/ui/yarn.lock b/sdk/python/feast/ui/yarn.lock index 91197e5219d..cb1e3154049 100644 --- a/sdk/python/feast/ui/yarn.lock +++ b/sdk/python/feast/ui/yarn.lock @@ -1451,10 +1451,10 @@ minimatch "^3.1.2" strip-json-comments "^3.1.1" -"@feast-dev/feast-ui@0.37.1": - version "0.37.1" - resolved "https://registry.yarnpkg.com/@feast-dev/feast-ui/-/feast-ui-0.37.1.tgz#b84618d1fd2e1dbc463ab2889964006b555d9ec4" - integrity sha512-xhHK3hWvW58ukB+kx04ut+7OIT+zuITw6eYKjuJmjzAZ2S8uVcqDso4T9Ma88qX+qhn4NWzNBUyM2Gz1xOhzKQ== +"@feast-dev/feast-ui@0.38.0": + version "0.38.0" + resolved "https://registry.yarnpkg.com/@feast-dev/feast-ui/-/feast-ui-0.38.0.tgz#3a2b8325b15a1e789741523bd5113b54a80b4325" + integrity sha512-i2F4yMwbaWOOPE+FOyDxrqAsb1GETDUsZ/AYJQJiQYyWgXtVFBZpShrJcOQkOwBvV5eX/2jtj9o7SaFQpUcM8A== dependencies: "@elastic/datemath" "^5.0.3" "@elastic/eui" "^55.0.1" diff --git a/sdk/python/feast/ui_server.py b/sdk/python/feast/ui_server.py index 1e0d87a64e3..35b51a8021a 100644 --- a/sdk/python/feast/ui_server.py +++ b/sdk/python/feast/ui_server.py @@ -51,7 +51,7 @@ def shutdown_event(): async_refresh() - ui_dir_ref = importlib_resources.files(__name__) / "ui/build/" + ui_dir_ref = importlib_resources.files(__spec__.parent) / "ui/build/" # type: ignore[name-defined] with importlib_resources.as_file(ui_dir_ref) as ui_dir: # Initialize with the projects-list.json file with ui_dir.joinpath("projects-list.json").open(mode="w") as f: diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index e7ca9ca35b6..3a9a11953fd 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -1,5 +1,13 @@ # This file was autogenerated by uv via the following command: # uv pip compile --system --no-strip-extras setup.py --extra ci --output-file sdk/python/requirements/py3.10-ci-requirements.txt +aiobotocore==2.13.0 + # via feast (setup.py) +aiohttp==3.9.5 + # via aiobotocore +aioitertools==0.11.0 + # via aiobotocore +aiosignal==1.3.1 + # via aiohttp alabaster==0.7.16 # via sphinx altair==4.2.2 @@ -12,6 +20,8 @@ anyio==4.3.0 # jupyter-server # starlette # watchfiles +appnope==0.1.4 + # via ipykernel argon2-cffi==23.1.0 # via jupyter-server argon2-cffi-bindings==21.2.0 @@ -21,16 +31,20 @@ arrow==1.3.0 asn1crypto==1.5.1 # via snowflake-connector-python assertpy==1.1 + # via feast (setup.py) asttokens==2.4.1 # via stack-data async-lru==2.0.4 # via jupyterlab async-timeout==4.0.3 - # via redis + # via + # aiohttp + # redis atpublic==4.1.0 # via ibis-framework attrs==23.2.0 # via + # aiohttp # jsonschema # referencing azure-core==1.30.1 @@ -38,7 +52,9 @@ azure-core==1.30.1 # azure-identity # azure-storage-blob azure-identity==1.16.0 -azure-storage-blob==12.19.1 + # via feast (setup.py) +azure-storage-blob==12.20.0 + # via feast (setup.py) babel==2.15.0 # via # jupyterlab-server @@ -50,21 +66,28 @@ bidict==0.23.1 bleach==6.1.0 # via nbconvert boto3==1.34.99 - # via moto + # via + # feast (setup.py) + # moto botocore==1.34.99 # via + # aiobotocore # boto3 # moto # s3transfer build==1.2.1 - # via pip-tools + # via + # feast (setup.py) + # pip-tools cachecontrol==0.14.0 # via firebase-admin cachetools==5.3.3 # via google-auth cassandra-driver==3.29.1 + # via feast (setup.py) certifi==2024.2.2 # via + # elastic-transport # httpcore # httpx # kubernetes @@ -84,6 +107,7 @@ charset-normalizer==3.3.2 # snowflake-connector-python click==8.1.7 # via + # feast (setup.py) # dask # geomet # great-expectations @@ -93,15 +117,18 @@ click==8.1.7 cloudpickle==3.0.0 # via dask colorama==0.4.6 - # via great-expectations + # via + # feast (setup.py) + # great-expectations comm==0.2.2 # via # ipykernel # ipywidgets -coverage[toml]==7.5.1 +coverage[toml]==7.5.3 # via pytest-cov cryptography==42.0.7 # via + # feast (setup.py) # azure-identity # azure-storage-blob # great-expectations @@ -113,7 +140,9 @@ cryptography==42.0.7 # types-pyopenssl # types-redis dask[dataframe]==2024.5.0 - # via dask-expr + # via + # feast (setup.py) + # dask-expr dask-expr==1.1.0 # via dask db-dtypes==1.2.0 @@ -125,21 +154,29 @@ decorator==5.1.1 defusedxml==0.7.1 # via nbconvert deltalake==0.17.3 + # via feast (setup.py) dill==0.3.8 + # via feast (setup.py) distlib==0.3.8 # via virtualenv dnspython==2.6.1 # via email-validator -docker==7.0.0 - # via testcontainers +docker==7.1.0 + # via + # feast (setup.py) + # testcontainers docutils==0.19 # via sphinx -duckdb==0.10.2 +duckdb==0.10.3 # via # duckdb-engine # ibis-framework -duckdb-engine==0.12.0 +duckdb-engine==0.12.1 # via ibis-framework +elastic-transport==8.13.1 + # via elasticsearch +elasticsearch==8.13.2 + # via feast (setup.py) email-validator==2.1.1 # via fastapi entrypoints==0.4 @@ -154,7 +191,9 @@ execnet==2.1.1 executing==2.0.1 # via stack-data fastapi==0.111.0 - # via fastapi-cli + # via + # feast (setup.py) + # fastapi-cli fastapi-cli==0.0.2 # via fastapi fastjsonschema==2.19.1 @@ -164,16 +203,24 @@ filelock==3.14.0 # snowflake-connector-python # virtualenv firebase-admin==5.4.0 + # via feast (setup.py) fqdn==1.5.1 # via jsonschema +frozenlist==1.4.1 + # via + # aiohttp + # aiosignal fsspec==2023.12.2 - # via dask + # via + # feast (setup.py) + # dask geojson==2.5.0 # via rockset geomet==0.2.1.post1 # via cassandra-driver google-api-core[grpc]==2.19.0 # via + # feast (setup.py) # firebase-admin # google-api-python-client # google-cloud-bigquery @@ -183,7 +230,7 @@ google-api-core[grpc]==2.19.0 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.128.0 +google-api-python-client==2.131.0 # via firebase-admin google-auth==2.29.0 # via @@ -198,8 +245,11 @@ google-auth==2.29.0 google-auth-httplib2==0.2.0 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 + # via feast (setup.py) google-cloud-bigquery-storage==2.25.0 + # via feast (setup.py) google-cloud-bigtable==2.23.1 + # via feast (setup.py) google-cloud-core==2.4.1 # via # google-cloud-bigquery @@ -208,10 +258,13 @@ google-cloud-core==2.4.1 # google-cloud-firestore # google-cloud-storage google-cloud-datastore==2.19.0 + # via feast (setup.py) google-cloud-firestore==2.16.0 # via firebase-admin google-cloud-storage==2.16.0 - # via firebase-admin + # via + # feast (setup.py) + # firebase-admin google-crc32c==1.5.0 # via # google-cloud-storage @@ -222,16 +275,17 @@ google-resumable-media==2.7.0 # google-cloud-storage googleapis-common-protos[grpc]==1.63.0 # via + # feast (setup.py) # google-api-core # grpc-google-iam-v1 # grpcio-status -great-expectations==0.18.13 -greenlet==3.0.3 - # via sqlalchemy +great-expectations==0.18.15 + # via feast (setup.py) grpc-google-iam-v1==0.13.0 # via google-cloud-bigtable -grpcio==1.63.0 +grpcio==1.64.0 # via + # feast (setup.py) # google-api-core # google-cloud-bigquery # googleapis-common-protos @@ -242,19 +296,27 @@ grpcio==1.63.0 # grpcio-testing # grpcio-tools grpcio-health-checking==1.62.2 + # via feast (setup.py) grpcio-reflection==1.62.2 + # via feast (setup.py) grpcio-status==1.62.2 # via google-api-core grpcio-testing==1.62.2 + # via feast (setup.py) grpcio-tools==1.62.2 + # via feast (setup.py) gunicorn==22.0.0 + # via feast (setup.py) h11==0.14.0 # via # httpcore # uvicorn happybase==1.2.0 -hazelcast-python-client==5.3.0 + # via feast (setup.py) +hazelcast-python-client==5.4.0 + # via feast (setup.py) hiredis==2.3.2 + # via feast (setup.py) httpcore==1.0.5 # via httpx httplib2==0.22.0 @@ -265,11 +327,15 @@ httptools==0.6.1 # via uvicorn httpx==0.27.0 # via + # feast (setup.py) # fastapi # jupyterlab ibis-framework[duckdb]==8.0.0 - # via ibis-substrait + # via + # feast (setup.py) + # ibis-substrait ibis-substrait==3.2.0 + # via feast (setup.py) identify==2.5.36 # via pre-commit idna==3.7 @@ -280,6 +346,7 @@ idna==3.7 # jsonschema # requests # snowflake-connector-python + # yarl imagesize==1.4.1 # via sphinx importlib-metadata==7.1.0 @@ -288,12 +355,12 @@ iniconfig==2.0.0 # via pytest ipykernel==6.29.4 # via jupyterlab -ipython==8.24.0 +ipython==8.25.0 # via # great-expectations # ipykernel # ipywidgets -ipywidgets==8.1.2 +ipywidgets==8.1.3 # via great-expectations isodate==0.6.1 # via azure-storage-blob @@ -303,6 +370,7 @@ jedi==0.19.1 # via ipython jinja2==3.1.4 # via + # feast (setup.py) # altair # fastapi # great-expectations @@ -326,6 +394,7 @@ jsonpointer==2.4 # jsonschema jsonschema[format-nongpl]==4.22.0 # via + # feast (setup.py) # altair # great-expectations # jupyter-events @@ -333,7 +402,7 @@ jsonschema[format-nongpl]==4.22.0 # nbformat jsonschema-specifications==2023.12.1 # via jsonschema -jupyter-client==8.6.1 +jupyter-client==8.6.2 # via # ipykernel # jupyter-server @@ -351,7 +420,7 @@ jupyter-events==0.10.0 # via jupyter-server jupyter-lsp==2.2.5 # via jupyterlab -jupyter-server==2.14.0 +jupyter-server==2.14.1 # via # jupyter-lsp # jupyterlab @@ -360,17 +429,18 @@ jupyter-server==2.14.0 # notebook-shim jupyter-server-terminals==0.5.3 # via jupyter-server -jupyterlab==4.1.8 +jupyterlab==4.2.1 # via notebook jupyterlab-pygments==0.3.0 # via nbconvert -jupyterlab-server==2.27.1 +jupyterlab-server==2.27.2 # via # jupyterlab # notebook -jupyterlab-widgets==3.0.10 +jupyterlab-widgets==3.0.11 # via ipywidgets kubernetes==20.13.0 + # via feast (setup.py) locket==1.0.0 # via partd makefun==1.15.2 @@ -391,13 +461,17 @@ matplotlib-inline==0.1.7 mdurl==0.1.2 # via markdown-it-py minio==7.1.0 + # via feast (setup.py) mistune==3.0.2 # via # great-expectations # nbconvert mmh3==4.1.0 + # via feast (setup.py) mock==2.0.0 + # via feast (setup.py) moto==4.2.14 + # via feast (setup.py) msal==1.28.0 # via # azure-identity @@ -406,13 +480,20 @@ msal-extensions==1.1.0 # via azure-identity msgpack==1.0.8 # via cachecontrol +multidict==6.0.5 + # via + # aiohttp + # yarl multipledispatch==1.0.0 # via ibis-framework mypy==1.10.0 - # via sqlalchemy + # via + # feast (setup.py) + # sqlalchemy mypy-extensions==1.0.0 # via mypy mypy-protobuf==3.3.0 + # via feast (setup.py) nbclient==0.10.0 # via nbconvert nbconvert==7.16.4 @@ -425,9 +506,9 @@ nbformat==5.10.4 # nbconvert nest-asyncio==1.6.0 # via ipykernel -nodeenv==1.8.0 +nodeenv==1.9.0 # via pre-commit -notebook==7.1.3 +notebook==7.2.0 # via great-expectations notebook-shim==0.2.4 # via @@ -435,6 +516,7 @@ notebook-shim==0.2.4 # notebook numpy==1.26.4 # via + # feast (setup.py) # altair # dask # db-dtypes @@ -454,7 +536,6 @@ packaging==24.0 # build # dask # db-dtypes - # docker # duckdb-engine # google-cloud-bigquery # great-expectations @@ -472,6 +553,7 @@ packaging==24.0 # sphinx pandas==2.2.2 # via + # feast (setup.py) # altair # dask # dask-expr @@ -495,6 +577,7 @@ pexpect==4.9.0 pip==24.0 # via pip-tools pip-tools==7.4.1 + # via feast (setup.py) platformdirs==3.11.0 # via # jupyter-core @@ -507,9 +590,10 @@ ply==3.11 portalocker==2.8.2 # via msal-extensions pre-commit==3.3.1 + # via feast (setup.py) prometheus-client==0.20.0 # via jupyter-server -prompt-toolkit==3.0.43 +prompt-toolkit==3.0.45 # via ipython proto-plus==1.23.0 # via @@ -521,6 +605,7 @@ proto-plus==1.23.0 # google-cloud-firestore protobuf==4.25.3 # via + # feast (setup.py) # google-api-core # google-cloud-bigquery # google-cloud-bigquery-storage @@ -538,8 +623,11 @@ protobuf==4.25.3 # proto-plus # substrait psutil==5.9.0 - # via ipykernel + # via + # feast (setup.py) + # ipykernel psycopg2-binary==2.9.9 + # via feast (setup.py) ptyprocess==0.7.0 # via # pexpect @@ -547,12 +635,14 @@ ptyprocess==0.7.0 pure-eval==0.2.2 # via stack-data py==1.11.0 + # via feast (setup.py) py-cpuinfo==9.0.0 # via pytest-benchmark py4j==0.10.9.7 # via pyspark pyarrow==15.0.2 # via + # feast (setup.py) # dask-expr # db-dtypes # deltalake @@ -570,16 +660,19 @@ pyasn1==0.6.0 pyasn1-modules==0.4.0 # via google-auth pybindgen==0.22.1 + # via feast (setup.py) pycparser==2.22 # via cffi pydantic==2.7.1 # via + # feast (setup.py) # fastapi # great-expectations pydantic-core==2.18.2 # via pydantic pygments==2.18.0 # via + # feast (setup.py) # ipython # nbconvert # rich @@ -589,8 +682,11 @@ pyjwt[crypto]==2.8.0 # msal # snowflake-connector-python pymssql==2.3.0 -pymysql==1.1.0 + # via feast (setup.py) +pymysql==1.1.1 + # via feast (setup.py) pyodbc==5.1.0 + # via feast (setup.py) pyopenssl==24.1.0 # via snowflake-connector-python pyparsing==3.1.2 @@ -602,8 +698,10 @@ pyproject-hooks==1.1.0 # build # pip-tools pyspark==3.5.1 + # via feast (setup.py) pytest==7.4.4 # via + # feast (setup.py) # pytest-benchmark # pytest-cov # pytest-env @@ -613,13 +711,21 @@ pytest==7.4.4 # pytest-timeout # pytest-xdist pytest-benchmark==3.4.1 + # via feast (setup.py) pytest-cov==5.0.0 + # via feast (setup.py) pytest-env==1.1.3 + # via feast (setup.py) pytest-lazy-fixture==0.6.3 + # via feast (setup.py) pytest-mock==1.10.4 + # via feast (setup.py) pytest-ordering==0.6 + # via feast (setup.py) pytest-timeout==1.4.2 + # via feast (setup.py) pytest-xdist==3.6.1 + # via feast (setup.py) python-dateutil==2.9.0.post0 # via # arrow @@ -648,6 +754,7 @@ pytz==2024.1 # trino pyyaml==6.0.1 # via + # feast (setup.py) # dask # ibis-substrait # jupyter-events @@ -661,14 +768,17 @@ pyzmq==26.0.3 # jupyter-client # jupyter-server redis==4.6.0 + # via feast (setup.py) referencing==0.35.1 # via # jsonschema # jsonschema-specifications # jupyter-events regex==2024.4.28 + # via feast (setup.py) requests==2.31.0 # via + # feast (setup.py) # azure-core # cachecontrol # docker @@ -702,6 +812,7 @@ rich==13.7.1 # ibis-framework # typer rockset==2.1.2 + # via feast (setup.py) rpds-py==0.18.1 # via # jsonschema @@ -710,18 +821,18 @@ rsa==4.9 # via google-auth ruamel-yaml==0.17.17 # via great-expectations -ruff==0.4.3 +ruff==0.4.6 + # via feast (setup.py) s3transfer==0.10.1 # via boto3 -scipy==1.13.0 +scipy==1.13.1 # via great-expectations send2trash==1.8.3 # via jupyter-server -setuptools==69.5.1 +setuptools==70.0.0 # via # grpcio-tools # kubernetes - # nodeenv # pip-tools shellingham==1.5.4 # via typer @@ -744,12 +855,14 @@ sniffio==1.3.1 # httpx snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==3.10.0 +snowflake-connector-python[pandas]==3.10.1 + # via feast (setup.py) sortedcontainers==2.4.0 # via snowflake-connector-python soupsieve==2.5 # via beautifulsoup4 sphinx==6.2.1 + # via feast (setup.py) sphinxcontrib-applehelp==1.0.8 # via sphinx sphinxcontrib-devhelp==1.0.6 @@ -764,6 +877,7 @@ sphinxcontrib-serializinghtml==1.1.10 # via sphinx sqlalchemy[mypy]==2.0.30 # via + # feast (setup.py) # duckdb-engine # ibis-framework # sqlalchemy-views @@ -771,24 +885,30 @@ sqlalchemy-views==0.3.2 # via ibis-framework sqlglot==20.11.0 # via ibis-framework +sqlite-vec==0.0.1a10 + # via feast (setup.py) stack-data==0.6.3 # via ipython starlette==0.37.2 # via fastapi -substrait==0.17.0 +substrait==0.19.0 # via ibis-substrait tabulate==0.9.0 + # via feast (setup.py) tenacity==8.3.0 + # via feast (setup.py) terminado==0.18.1 # via # jupyter-server # jupyter-server-terminals testcontainers==4.4.0 + # via feast (setup.py) thriftpy2==0.5.0 # via happybase tinycss2==1.3.0 # via nbconvert toml==0.10.2 + # via feast (setup.py) tomli==2.0.1 # via # build @@ -798,7 +918,7 @@ tomli==2.0.1 # pip-tools # pytest # pytest-env -tomlkit==0.12.4 +tomlkit==0.12.5 # via snowflake-connector-python toolz==0.12.1 # via @@ -806,7 +926,7 @@ toolz==0.12.1 # dask # ibis-framework # partd -tornado==6.4 +tornado==6.4.1 # via # ipykernel # jupyter-client @@ -815,7 +935,9 @@ tornado==6.4 # notebook # terminado tqdm==4.66.4 - # via great-expectations + # via + # feast (setup.py) + # great-expectations traitlets==5.14.3 # via # comm @@ -832,25 +954,39 @@ traitlets==5.14.3 # nbconvert # nbformat trino==0.328.0 + # via feast (setup.py) typeguard==4.2.1 + # via feast (setup.py) typer==0.12.3 # via fastapi-cli types-cffi==1.16.0.20240331 # via types-pyopenssl types-protobuf==3.19.22 - # via mypy-protobuf + # via + # feast (setup.py) + # mypy-protobuf types-pymysql==1.1.0.20240425 + # via feast (setup.py) types-pyopenssl==24.1.0.20240425 # via types-redis types-python-dateutil==2.9.0.20240316 - # via arrow + # via + # feast (setup.py) + # arrow types-pytz==2024.1.0.20240417 + # via feast (setup.py) types-pyyaml==6.0.12.20240311 + # via feast (setup.py) types-redis==4.6.0.20240425 + # via feast (setup.py) types-requests==2.30.0.0 -types-setuptools==69.5.0.20240423 - # via types-cffi + # via feast (setup.py) +types-setuptools==70.0.0.20240524 + # via + # feast (setup.py) + # types-cffi types-tabulate==0.9.0.20240106 + # via feast (setup.py) types-urllib3==1.26.25.14 # via types-requests typing-extensions==4.11.0 @@ -886,8 +1022,10 @@ uritemplate==4.1.1 # via google-api-python-client urllib3==1.26.18 # via + # feast (setup.py) # botocore # docker + # elastic-transport # great-expectations # kubernetes # minio @@ -897,12 +1035,15 @@ urllib3==1.26.18 # testcontainers uvicorn[standard]==0.29.0 # via + # feast (setup.py) # fastapi # fastapi-cli uvloop==0.19.0 # via uvicorn virtualenv==20.23.0 - # via pre-commit + # via + # feast (setup.py) + # pre-commit watchfiles==0.21.0 # via uvicorn wcwidth==0.2.13 @@ -923,11 +1064,15 @@ werkzeug==3.0.3 # via moto wheel==0.43.0 # via pip-tools -widgetsnbextension==4.0.10 +widgetsnbextension==4.0.11 # via ipywidgets wrapt==1.16.0 - # via testcontainers + # via + # aiobotocore + # testcontainers xmltodict==0.13.0 # via moto +yarl==1.9.4 + # via aiohttp zipp==3.18.1 # via importlib-metadata diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index 56a8259ab43..23bd94feb5c 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -20,17 +20,22 @@ charset-normalizer==3.3.2 # via requests click==8.1.7 # via + # feast (setup.py) # dask # typer # uvicorn cloudpickle==3.0.0 # via dask colorama==0.4.6 + # via feast (setup.py) dask[dataframe]==2024.5.0 - # via dask-expr + # via + # feast (setup.py) + # dask-expr dask-expr==1.1.0 # via dask dill==0.3.8 + # via feast (setup.py) dnspython==2.6.1 # via email-validator email-validator==2.1.1 @@ -38,14 +43,15 @@ email-validator==2.1.1 exceptiongroup==1.2.1 # via anyio fastapi==0.111.0 - # via fastapi-cli + # via + # feast (setup.py) + # fastapi-cli fastapi-cli==0.0.2 # via fastapi fsspec==2024.3.1 # via dask -greenlet==3.0.3 - # via sqlalchemy gunicorn==22.0.0 + # via feast (setup.py) h11==0.14.0 # via # httpcore @@ -65,8 +71,11 @@ idna==3.7 importlib-metadata==7.1.0 # via dask jinja2==3.1.4 - # via fastapi + # via + # feast (setup.py) + # fastapi jsonschema==4.22.0 + # via feast (setup.py) jsonschema-specifications==2023.12.1 # via jsonschema locket==1.0.0 @@ -78,13 +87,16 @@ markupsafe==2.1.5 mdurl==0.1.2 # via markdown-it-py mmh3==4.1.0 + # via feast (setup.py) mypy==1.10.0 # via sqlalchemy mypy-extensions==1.0.0 # via mypy mypy-protobuf==3.6.0 + # via feast (setup.py) numpy==1.26.4 # via + # feast (setup.py) # dask # pandas # pyarrow @@ -96,20 +108,29 @@ packaging==24.0 # gunicorn pandas==2.2.2 # via + # feast (setup.py) # dask # dask-expr partd==1.4.2 # via dask protobuf==4.25.3 - # via mypy-protobuf + # via + # feast (setup.py) + # mypy-protobuf pyarrow==16.0.0 - # via dask-expr + # via + # feast (setup.py) + # dask-expr pydantic==2.7.1 - # via fastapi + # via + # feast (setup.py) + # fastapi pydantic-core==2.18.2 # via pydantic pygments==2.18.0 - # via rich + # via + # feast (setup.py) + # rich python-dateutil==2.9.0.post0 # via pandas python-dotenv==1.0.1 @@ -120,6 +141,7 @@ pytz==2024.1 # via pandas pyyaml==6.0.1 # via + # feast (setup.py) # dask # uvicorn referencing==0.35.1 @@ -127,6 +149,7 @@ referencing==0.35.1 # jsonschema # jsonschema-specifications requests==2.31.0 + # via feast (setup.py) rich==13.7.1 # via typer rpds-py==0.18.1 @@ -142,11 +165,17 @@ sniffio==1.3.1 # anyio # httpx sqlalchemy[mypy]==2.0.30 + # via feast (setup.py) +sqlite-vec==0.0.1a10 + # via feast (setup.py) starlette==0.37.2 # via fastapi tabulate==0.9.0 + # via feast (setup.py) tenacity==8.3.0 + # via feast (setup.py) toml==0.10.2 + # via feast (setup.py) tomli==2.0.1 # via mypy toolz==0.12.1 @@ -154,7 +183,9 @@ toolz==0.12.1 # dask # partd tqdm==4.66.4 + # via feast (setup.py) typeguard==4.2.1 + # via feast (setup.py) typer==0.12.3 # via fastapi-cli types-protobuf==5.26.0.20240422 @@ -178,6 +209,7 @@ urllib3==2.2.1 # via requests uvicorn[standard]==0.29.0 # via + # feast (setup.py) # fastapi # fastapi-cli uvloop==0.19.0 @@ -187,4 +219,4 @@ watchfiles==0.21.0 websockets==12.0 # via uvicorn zipp==3.18.1 - # via importlib-metadata \ No newline at end of file + # via importlib-metadata diff --git a/sdk/python/requirements/py3.11-ci-requirements.txt b/sdk/python/requirements/py3.11-ci-requirements.txt index 3b76237f599..718af8a2c70 100644 --- a/sdk/python/requirements/py3.11-ci-requirements.txt +++ b/sdk/python/requirements/py3.11-ci-requirements.txt @@ -1,5 +1,13 @@ # This file was autogenerated by uv via the following command: # uv pip compile --system --no-strip-extras setup.py --extra ci --output-file sdk/python/requirements/py3.11-ci-requirements.txt +aiobotocore==2.13.0 + # via feast (setup.py) +aiohttp==3.9.5 + # via aiobotocore +aioitertools==0.11.0 + # via aiobotocore +aiosignal==1.3.1 + # via aiohttp alabaster==0.7.16 # via sphinx altair==4.2.2 @@ -12,6 +20,8 @@ anyio==4.3.0 # jupyter-server # starlette # watchfiles +appnope==0.1.4 + # via ipykernel argon2-cffi==23.1.0 # via jupyter-server argon2-cffi-bindings==21.2.0 @@ -21,6 +31,7 @@ arrow==1.3.0 asn1crypto==1.5.1 # via snowflake-connector-python assertpy==1.1 + # via feast (setup.py) asttokens==2.4.1 # via stack-data async-lru==2.0.4 @@ -29,6 +40,7 @@ atpublic==4.1.0 # via ibis-framework attrs==23.2.0 # via + # aiohttp # jsonschema # referencing azure-core==1.30.1 @@ -36,7 +48,9 @@ azure-core==1.30.1 # azure-identity # azure-storage-blob azure-identity==1.16.0 -azure-storage-blob==12.19.1 + # via feast (setup.py) +azure-storage-blob==12.20.0 + # via feast (setup.py) babel==2.15.0 # via # jupyterlab-server @@ -48,21 +62,28 @@ bidict==0.23.1 bleach==6.1.0 # via nbconvert boto3==1.34.99 - # via moto + # via + # feast (setup.py) + # moto botocore==1.34.99 # via + # aiobotocore # boto3 # moto # s3transfer build==1.2.1 - # via pip-tools + # via + # feast (setup.py) + # pip-tools cachecontrol==0.14.0 # via firebase-admin cachetools==5.3.3 # via google-auth cassandra-driver==3.29.1 + # via feast (setup.py) certifi==2024.2.2 # via + # elastic-transport # httpcore # httpx # kubernetes @@ -82,6 +103,7 @@ charset-normalizer==3.3.2 # snowflake-connector-python click==8.1.7 # via + # feast (setup.py) # dask # geomet # great-expectations @@ -91,15 +113,18 @@ click==8.1.7 cloudpickle==3.0.0 # via dask colorama==0.4.6 - # via great-expectations + # via + # feast (setup.py) + # great-expectations comm==0.2.2 # via # ipykernel # ipywidgets -coverage[toml]==7.5.1 +coverage[toml]==7.5.3 # via pytest-cov cryptography==42.0.7 # via + # feast (setup.py) # azure-identity # azure-storage-blob # great-expectations @@ -111,7 +136,9 @@ cryptography==42.0.7 # types-pyopenssl # types-redis dask[dataframe]==2024.5.0 - # via dask-expr + # via + # feast (setup.py) + # dask-expr dask-expr==1.1.0 # via dask db-dtypes==1.2.0 @@ -122,22 +149,30 @@ decorator==5.1.1 # via ipython defusedxml==0.7.1 # via nbconvert -deltalake==0.17.3 +deltalake==0.17.4 + # via feast (setup.py) dill==0.3.8 + # via feast (setup.py) distlib==0.3.8 # via virtualenv dnspython==2.6.1 # via email-validator -docker==7.0.0 - # via testcontainers +docker==7.1.0 + # via + # feast (setup.py) + # testcontainers docutils==0.19 # via sphinx -duckdb==0.10.2 +duckdb==0.10.3 # via # duckdb-engine # ibis-framework -duckdb-engine==0.12.0 +duckdb-engine==0.12.1 # via ibis-framework +elastic-transport==8.13.1 + # via elasticsearch +elasticsearch==8.13.2 + # via feast (setup.py) email-validator==2.1.1 # via fastapi entrypoints==0.4 @@ -147,7 +182,9 @@ execnet==2.1.1 executing==2.0.1 # via stack-data fastapi==0.111.0 - # via fastapi-cli + # via + # feast (setup.py) + # fastapi-cli fastapi-cli==0.0.2 # via fastapi fastjsonschema==2.19.1 @@ -157,16 +194,24 @@ filelock==3.14.0 # snowflake-connector-python # virtualenv firebase-admin==5.4.0 + # via feast (setup.py) fqdn==1.5.1 # via jsonschema +frozenlist==1.4.1 + # via + # aiohttp + # aiosignal fsspec==2023.12.2 - # via dask + # via + # feast (setup.py) + # dask geojson==2.5.0 # via rockset geomet==0.2.1.post1 # via cassandra-driver google-api-core[grpc]==2.19.0 # via + # feast (setup.py) # firebase-admin # google-api-python-client # google-cloud-bigquery @@ -176,7 +221,7 @@ google-api-core[grpc]==2.19.0 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.128.0 +google-api-python-client==2.131.0 # via firebase-admin google-auth==2.29.0 # via @@ -191,8 +236,11 @@ google-auth==2.29.0 google-auth-httplib2==0.2.0 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 + # via feast (setup.py) google-cloud-bigquery-storage==2.25.0 + # via feast (setup.py) google-cloud-bigtable==2.23.1 + # via feast (setup.py) google-cloud-core==2.4.1 # via # google-cloud-bigquery @@ -201,10 +249,13 @@ google-cloud-core==2.4.1 # google-cloud-firestore # google-cloud-storage google-cloud-datastore==2.19.0 + # via feast (setup.py) google-cloud-firestore==2.16.0 # via firebase-admin google-cloud-storage==2.16.0 - # via firebase-admin + # via + # feast (setup.py) + # firebase-admin google-crc32c==1.5.0 # via # google-cloud-storage @@ -215,16 +266,17 @@ google-resumable-media==2.7.0 # google-cloud-storage googleapis-common-protos[grpc]==1.63.0 # via + # feast (setup.py) # google-api-core # grpc-google-iam-v1 # grpcio-status -great-expectations==0.18.13 -greenlet==3.0.3 - # via sqlalchemy +great-expectations==0.18.15 + # via feast (setup.py) grpc-google-iam-v1==0.13.0 # via google-cloud-bigtable -grpcio==1.63.0 +grpcio==1.64.0 # via + # feast (setup.py) # google-api-core # google-cloud-bigquery # googleapis-common-protos @@ -235,19 +287,27 @@ grpcio==1.63.0 # grpcio-testing # grpcio-tools grpcio-health-checking==1.62.2 + # via feast (setup.py) grpcio-reflection==1.62.2 + # via feast (setup.py) grpcio-status==1.62.2 # via google-api-core grpcio-testing==1.62.2 + # via feast (setup.py) grpcio-tools==1.62.2 + # via feast (setup.py) gunicorn==22.0.0 + # via feast (setup.py) h11==0.14.0 # via # httpcore # uvicorn happybase==1.2.0 -hazelcast-python-client==5.3.0 + # via feast (setup.py) +hazelcast-python-client==5.4.0 + # via feast (setup.py) hiredis==2.3.2 + # via feast (setup.py) httpcore==1.0.5 # via httpx httplib2==0.22.0 @@ -258,11 +318,15 @@ httptools==0.6.1 # via uvicorn httpx==0.27.0 # via + # feast (setup.py) # fastapi # jupyterlab ibis-framework[duckdb]==8.0.0 - # via ibis-substrait + # via + # feast (setup.py) + # ibis-substrait ibis-substrait==3.2.0 + # via feast (setup.py) identify==2.5.36 # via pre-commit idna==3.7 @@ -273,6 +337,7 @@ idna==3.7 # jsonschema # requests # snowflake-connector-python + # yarl imagesize==1.4.1 # via sphinx importlib-metadata==7.1.0 @@ -281,12 +346,12 @@ iniconfig==2.0.0 # via pytest ipykernel==6.29.4 # via jupyterlab -ipython==8.24.0 +ipython==8.25.0 # via # great-expectations # ipykernel # ipywidgets -ipywidgets==8.1.2 +ipywidgets==8.1.3 # via great-expectations isodate==0.6.1 # via azure-storage-blob @@ -296,6 +361,7 @@ jedi==0.19.1 # via ipython jinja2==3.1.4 # via + # feast (setup.py) # altair # fastapi # great-expectations @@ -319,6 +385,7 @@ jsonpointer==2.4 # jsonschema jsonschema[format-nongpl]==4.22.0 # via + # feast (setup.py) # altair # great-expectations # jupyter-events @@ -326,7 +393,7 @@ jsonschema[format-nongpl]==4.22.0 # nbformat jsonschema-specifications==2023.12.1 # via jsonschema -jupyter-client==8.6.1 +jupyter-client==8.6.2 # via # ipykernel # jupyter-server @@ -344,7 +411,7 @@ jupyter-events==0.10.0 # via jupyter-server jupyter-lsp==2.2.5 # via jupyterlab -jupyter-server==2.14.0 +jupyter-server==2.14.1 # via # jupyter-lsp # jupyterlab @@ -353,17 +420,18 @@ jupyter-server==2.14.0 # notebook-shim jupyter-server-terminals==0.5.3 # via jupyter-server -jupyterlab==4.1.8 +jupyterlab==4.2.1 # via notebook jupyterlab-pygments==0.3.0 # via nbconvert -jupyterlab-server==2.27.1 +jupyterlab-server==2.27.2 # via # jupyterlab # notebook -jupyterlab-widgets==3.0.10 +jupyterlab-widgets==3.0.11 # via ipywidgets kubernetes==20.13.0 + # via feast (setup.py) locket==1.0.0 # via partd makefun==1.15.2 @@ -384,13 +452,17 @@ matplotlib-inline==0.1.7 mdurl==0.1.2 # via markdown-it-py minio==7.1.0 + # via feast (setup.py) mistune==3.0.2 # via # great-expectations # nbconvert mmh3==4.1.0 + # via feast (setup.py) mock==2.0.0 + # via feast (setup.py) moto==4.2.14 + # via feast (setup.py) msal==1.28.0 # via # azure-identity @@ -399,13 +471,20 @@ msal-extensions==1.1.0 # via azure-identity msgpack==1.0.8 # via cachecontrol +multidict==6.0.5 + # via + # aiohttp + # yarl multipledispatch==1.0.0 # via ibis-framework mypy==1.10.0 - # via sqlalchemy + # via + # feast (setup.py) + # sqlalchemy mypy-extensions==1.0.0 # via mypy mypy-protobuf==3.3.0 + # via feast (setup.py) nbclient==0.10.0 # via nbconvert nbconvert==7.16.4 @@ -418,9 +497,9 @@ nbformat==5.10.4 # nbconvert nest-asyncio==1.6.0 # via ipykernel -nodeenv==1.8.0 +nodeenv==1.9.0 # via pre-commit -notebook==7.1.3 +notebook==7.2.0 # via great-expectations notebook-shim==0.2.4 # via @@ -428,6 +507,7 @@ notebook-shim==0.2.4 # notebook numpy==1.26.4 # via + # feast (setup.py) # altair # dask # db-dtypes @@ -447,7 +527,6 @@ packaging==24.0 # build # dask # db-dtypes - # docker # duckdb-engine # google-cloud-bigquery # great-expectations @@ -465,6 +544,7 @@ packaging==24.0 # sphinx pandas==2.2.2 # via + # feast (setup.py) # altair # dask # dask-expr @@ -488,6 +568,7 @@ pexpect==4.9.0 pip==24.0 # via pip-tools pip-tools==7.4.1 + # via feast (setup.py) platformdirs==3.11.0 # via # jupyter-core @@ -500,9 +581,10 @@ ply==3.11 portalocker==2.8.2 # via msal-extensions pre-commit==3.3.1 + # via feast (setup.py) prometheus-client==0.20.0 # via jupyter-server -prompt-toolkit==3.0.43 +prompt-toolkit==3.0.45 # via ipython proto-plus==1.23.0 # via @@ -514,6 +596,7 @@ proto-plus==1.23.0 # google-cloud-firestore protobuf==4.25.3 # via + # feast (setup.py) # google-api-core # google-cloud-bigquery # google-cloud-bigquery-storage @@ -531,8 +614,11 @@ protobuf==4.25.3 # proto-plus # substrait psutil==5.9.0 - # via ipykernel + # via + # feast (setup.py) + # ipykernel psycopg2-binary==2.9.9 + # via feast (setup.py) ptyprocess==0.7.0 # via # pexpect @@ -540,12 +626,14 @@ ptyprocess==0.7.0 pure-eval==0.2.2 # via stack-data py==1.11.0 + # via feast (setup.py) py-cpuinfo==9.0.0 # via pytest-benchmark py4j==0.10.9.7 # via pyspark pyarrow==15.0.2 # via + # feast (setup.py) # dask-expr # db-dtypes # deltalake @@ -563,16 +651,19 @@ pyasn1==0.6.0 pyasn1-modules==0.4.0 # via google-auth pybindgen==0.22.1 + # via feast (setup.py) pycparser==2.22 # via cffi pydantic==2.7.1 # via + # feast (setup.py) # fastapi # great-expectations pydantic-core==2.18.2 # via pydantic pygments==2.18.0 # via + # feast (setup.py) # ipython # nbconvert # rich @@ -582,8 +673,11 @@ pyjwt[crypto]==2.8.0 # msal # snowflake-connector-python pymssql==2.3.0 -pymysql==1.1.0 + # via feast (setup.py) +pymysql==1.1.1 + # via feast (setup.py) pyodbc==5.1.0 + # via feast (setup.py) pyopenssl==24.1.0 # via snowflake-connector-python pyparsing==3.1.2 @@ -595,8 +689,10 @@ pyproject-hooks==1.1.0 # build # pip-tools pyspark==3.5.1 + # via feast (setup.py) pytest==7.4.4 # via + # feast (setup.py) # pytest-benchmark # pytest-cov # pytest-env @@ -606,13 +702,21 @@ pytest==7.4.4 # pytest-timeout # pytest-xdist pytest-benchmark==3.4.1 + # via feast (setup.py) pytest-cov==5.0.0 + # via feast (setup.py) pytest-env==1.1.3 + # via feast (setup.py) pytest-lazy-fixture==0.6.3 + # via feast (setup.py) pytest-mock==1.10.4 + # via feast (setup.py) pytest-ordering==0.6 + # via feast (setup.py) pytest-timeout==1.4.2 + # via feast (setup.py) pytest-xdist==3.6.1 + # via feast (setup.py) python-dateutil==2.9.0.post0 # via # arrow @@ -641,6 +745,7 @@ pytz==2024.1 # trino pyyaml==6.0.1 # via + # feast (setup.py) # dask # ibis-substrait # jupyter-events @@ -654,14 +759,17 @@ pyzmq==26.0.3 # jupyter-client # jupyter-server redis==4.6.0 + # via feast (setup.py) referencing==0.35.1 # via # jsonschema # jsonschema-specifications # jupyter-events -regex==2024.4.28 +regex==2024.5.15 + # via feast (setup.py) requests==2.31.0 # via + # feast (setup.py) # azure-core # cachecontrol # docker @@ -695,6 +803,7 @@ rich==13.7.1 # ibis-framework # typer rockset==2.1.2 + # via feast (setup.py) rpds-py==0.18.1 # via # jsonschema @@ -703,18 +812,18 @@ rsa==4.9 # via google-auth ruamel-yaml==0.17.17 # via great-expectations -ruff==0.4.3 +ruff==0.4.6 + # via feast (setup.py) s3transfer==0.10.1 # via boto3 -scipy==1.13.0 +scipy==1.13.1 # via great-expectations send2trash==1.8.3 # via jupyter-server -setuptools==69.5.1 +setuptools==70.0.0 # via # grpcio-tools # kubernetes - # nodeenv # pip-tools shellingham==1.5.4 # via typer @@ -737,12 +846,14 @@ sniffio==1.3.1 # httpx snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==3.10.0 +snowflake-connector-python[pandas]==3.10.1 + # via feast (setup.py) sortedcontainers==2.4.0 # via snowflake-connector-python soupsieve==2.5 # via beautifulsoup4 sphinx==6.2.1 + # via feast (setup.py) sphinxcontrib-applehelp==1.0.8 # via sphinx sphinxcontrib-devhelp==1.0.6 @@ -757,6 +868,7 @@ sphinxcontrib-serializinghtml==1.1.10 # via sphinx sqlalchemy[mypy]==2.0.30 # via + # feast (setup.py) # duckdb-engine # ibis-framework # sqlalchemy-views @@ -764,25 +876,31 @@ sqlalchemy-views==0.3.2 # via ibis-framework sqlglot==20.11.0 # via ibis-framework +sqlite-vec==0.0.1a10 + # via feast (setup.py) stack-data==0.6.3 # via ipython starlette==0.37.2 # via fastapi -substrait==0.17.0 +substrait==0.19.0 # via ibis-substrait tabulate==0.9.0 + # via feast (setup.py) tenacity==8.3.0 + # via feast (setup.py) terminado==0.18.1 # via # jupyter-server # jupyter-server-terminals testcontainers==4.4.0 + # via feast (setup.py) thriftpy2==0.5.0 # via happybase tinycss2==1.3.0 # via nbconvert toml==0.10.2 -tomlkit==0.12.4 + # via feast (setup.py) +tomlkit==0.12.5 # via snowflake-connector-python toolz==0.12.1 # via @@ -790,7 +908,7 @@ toolz==0.12.1 # dask # ibis-framework # partd -tornado==6.4 +tornado==6.4.1 # via # ipykernel # jupyter-client @@ -799,7 +917,9 @@ tornado==6.4 # notebook # terminado tqdm==4.66.4 - # via great-expectations + # via + # feast (setup.py) + # great-expectations traitlets==5.14.3 # via # comm @@ -816,25 +936,39 @@ traitlets==5.14.3 # nbconvert # nbformat trino==0.328.0 + # via feast (setup.py) typeguard==4.2.1 + # via feast (setup.py) typer==0.12.3 # via fastapi-cli types-cffi==1.16.0.20240331 # via types-pyopenssl types-protobuf==3.19.22 - # via mypy-protobuf + # via + # feast (setup.py) + # mypy-protobuf types-pymysql==1.1.0.20240425 + # via feast (setup.py) types-pyopenssl==24.1.0.20240425 # via types-redis types-python-dateutil==2.9.0.20240316 - # via arrow + # via + # feast (setup.py) + # arrow types-pytz==2024.1.0.20240417 + # via feast (setup.py) types-pyyaml==6.0.12.20240311 + # via feast (setup.py) types-redis==4.6.0.20240425 + # via feast (setup.py) types-requests==2.30.0.0 -types-setuptools==69.5.0.20240423 - # via types-cffi + # via feast (setup.py) +types-setuptools==70.0.0.20240524 + # via + # feast (setup.py) + # types-cffi types-tabulate==0.9.0.20240106 + # via feast (setup.py) types-urllib3==1.26.25.14 # via types-requests typing-extensions==4.11.0 @@ -867,8 +1001,10 @@ uritemplate==4.1.1 # via google-api-python-client urllib3==1.26.18 # via + # feast (setup.py) # botocore # docker + # elastic-transport # great-expectations # kubernetes # minio @@ -878,12 +1014,15 @@ urllib3==1.26.18 # testcontainers uvicorn[standard]==0.29.0 # via + # feast (setup.py) # fastapi # fastapi-cli uvloop==0.19.0 # via uvicorn virtualenv==20.23.0 - # via pre-commit + # via + # feast (setup.py) + # pre-commit watchfiles==0.21.0 # via uvicorn wcwidth==0.2.13 @@ -904,11 +1043,15 @@ werkzeug==3.0.3 # via moto wheel==0.43.0 # via pip-tools -widgetsnbextension==4.0.10 +widgetsnbextension==4.0.11 # via ipywidgets wrapt==1.16.0 - # via testcontainers + # via + # aiobotocore + # testcontainers xmltodict==0.13.0 # via moto +yarl==1.9.4 + # via aiohttp zipp==3.18.1 # via importlib-metadata diff --git a/sdk/python/requirements/py3.11-requirements.txt b/sdk/python/requirements/py3.11-requirements.txt index c34b610d14c..9698eea6dff 100644 --- a/sdk/python/requirements/py3.11-requirements.txt +++ b/sdk/python/requirements/py3.11-requirements.txt @@ -20,30 +20,36 @@ charset-normalizer==3.3.2 # via requests click==8.1.7 # via + # feast (setup.py) # dask # typer # uvicorn cloudpickle==3.0.0 # via dask colorama==0.4.6 + # via feast (setup.py) dask[dataframe]==2024.5.0 - # via dask-expr + # via + # feast (setup.py) + # dask-expr dask-expr==1.1.0 # via dask dill==0.3.8 + # via feast (setup.py) dnspython==2.6.1 # via email-validator email-validator==2.1.1 # via fastapi fastapi==0.111.0 - # via fastapi-cli + # via + # feast (setup.py) + # fastapi-cli fastapi-cli==0.0.2 # via fastapi fsspec==2024.3.1 # via dask -greenlet==3.0.3 - # via sqlalchemy gunicorn==22.0.0 + # via feast (setup.py) h11==0.14.0 # via # httpcore @@ -63,8 +69,11 @@ idna==3.7 importlib-metadata==7.1.0 # via dask jinja2==3.1.4 - # via fastapi + # via + # feast (setup.py) + # fastapi jsonschema==4.22.0 + # via feast (setup.py) jsonschema-specifications==2023.12.1 # via jsonschema locket==1.0.0 @@ -76,13 +85,16 @@ markupsafe==2.1.5 mdurl==0.1.2 # via markdown-it-py mmh3==4.1.0 + # via feast (setup.py) mypy==1.10.0 # via sqlalchemy mypy-extensions==1.0.0 # via mypy mypy-protobuf==3.6.0 + # via feast (setup.py) numpy==1.26.4 # via + # feast (setup.py) # dask # pandas # pyarrow @@ -94,20 +106,29 @@ packaging==24.0 # gunicorn pandas==2.2.2 # via + # feast (setup.py) # dask # dask-expr partd==1.4.2 # via dask protobuf==4.25.3 - # via mypy-protobuf + # via + # feast (setup.py) + # mypy-protobuf pyarrow==16.0.0 - # via dask-expr + # via + # feast (setup.py) + # dask-expr pydantic==2.7.1 - # via fastapi + # via + # feast (setup.py) + # fastapi pydantic-core==2.18.2 # via pydantic pygments==2.18.0 - # via rich + # via + # feast (setup.py) + # rich python-dateutil==2.9.0.post0 # via pandas python-dotenv==1.0.1 @@ -118,6 +139,7 @@ pytz==2024.1 # via pandas pyyaml==6.0.1 # via + # feast (setup.py) # dask # uvicorn referencing==0.35.1 @@ -125,6 +147,7 @@ referencing==0.35.1 # jsonschema # jsonschema-specifications requests==2.31.0 + # via feast (setup.py) rich==13.7.1 # via typer rpds-py==0.18.1 @@ -140,17 +163,25 @@ sniffio==1.3.1 # anyio # httpx sqlalchemy[mypy]==2.0.30 + # via feast (setup.py) +sqlite-vec==0.0.1a10 + # via feast (setup.py) starlette==0.37.2 # via fastapi tabulate==0.9.0 + # via feast (setup.py) tenacity==8.3.0 + # via feast (setup.py) toml==0.10.2 + # via feast (setup.py) toolz==0.12.1 # via # dask # partd tqdm==4.66.4 + # via feast (setup.py) typeguard==4.2.1 + # via feast (setup.py) typer==0.12.3 # via fastapi-cli types-protobuf==5.26.0.20240422 @@ -172,6 +203,7 @@ urllib3==2.2.1 # via requests uvicorn[standard]==0.29.0 # via + # feast (setup.py) # fastapi # fastapi-cli uvloop==0.19.0 diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index a628f0823db..2a2d33faf34 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -1,5 +1,13 @@ # This file was autogenerated by uv via the following command: # uv pip compile --system --no-strip-extras setup.py --extra ci --output-file sdk/python/requirements/py3.9-ci-requirements.txt +aiobotocore==2.13.0 + # via feast (setup.py) +aiohttp==3.9.5 + # via aiobotocore +aioitertools==0.11.0 + # via aiobotocore +aiosignal==1.3.1 + # via aiohttp alabaster==0.7.16 # via sphinx altair==4.2.2 @@ -12,6 +20,8 @@ anyio==4.3.0 # jupyter-server # starlette # watchfiles +appnope==0.1.4 + # via ipykernel argon2-cffi==23.1.0 # via jupyter-server argon2-cffi-bindings==21.2.0 @@ -21,16 +31,20 @@ arrow==1.3.0 asn1crypto==1.5.1 # via snowflake-connector-python assertpy==1.1 + # via feast (setup.py) asttokens==2.4.1 # via stack-data async-lru==2.0.4 # via jupyterlab async-timeout==4.0.3 - # via redis + # via + # aiohttp + # redis atpublic==4.1.0 # via ibis-framework attrs==23.2.0 # via + # aiohttp # jsonschema # referencing azure-core==1.30.1 @@ -38,7 +52,9 @@ azure-core==1.30.1 # azure-identity # azure-storage-blob azure-identity==1.16.0 -azure-storage-blob==12.19.1 + # via feast (setup.py) +azure-storage-blob==12.20.0 + # via feast (setup.py) babel==2.15.0 # via # jupyterlab-server @@ -50,21 +66,28 @@ bidict==0.23.1 bleach==6.1.0 # via nbconvert boto3==1.34.99 - # via moto + # via + # feast (setup.py) + # moto botocore==1.34.99 # via + # aiobotocore # boto3 # moto # s3transfer build==1.2.1 - # via pip-tools + # via + # feast (setup.py) + # pip-tools cachecontrol==0.14.0 # via firebase-admin cachetools==5.3.3 # via google-auth cassandra-driver==3.29.1 + # via feast (setup.py) certifi==2024.2.2 # via + # elastic-transport # httpcore # httpx # kubernetes @@ -84,6 +107,7 @@ charset-normalizer==3.3.2 # snowflake-connector-python click==8.1.7 # via + # feast (setup.py) # dask # geomet # great-expectations @@ -93,15 +117,18 @@ click==8.1.7 cloudpickle==3.0.0 # via dask colorama==0.4.6 - # via great-expectations + # via + # feast (setup.py) + # great-expectations comm==0.2.2 # via # ipykernel # ipywidgets -coverage[toml]==7.5.1 +coverage[toml]==7.5.3 # via pytest-cov cryptography==42.0.7 # via + # feast (setup.py) # azure-identity # azure-storage-blob # great-expectations @@ -113,7 +140,9 @@ cryptography==42.0.7 # types-pyopenssl # types-redis dask[dataframe]==2024.5.0 - # via dask-expr + # via + # feast (setup.py) + # dask-expr dask-expr==1.1.0 # via dask db-dtypes==1.2.0 @@ -124,22 +153,30 @@ decorator==5.1.1 # via ipython defusedxml==0.7.1 # via nbconvert -deltalake==0.17.3 +deltalake==0.17.4 + # via feast (setup.py) dill==0.3.8 + # via feast (setup.py) distlib==0.3.8 # via virtualenv dnspython==2.6.1 # via email-validator -docker==7.0.0 - # via testcontainers +docker==7.1.0 + # via + # feast (setup.py) + # testcontainers docutils==0.19 # via sphinx -duckdb==0.10.2 +duckdb==0.10.3 # via # duckdb-engine # ibis-framework -duckdb-engine==0.12.0 +duckdb-engine==0.12.1 # via ibis-framework +elastic-transport==8.13.1 + # via elasticsearch +elasticsearch==8.13.2 + # via feast (setup.py) email-validator==2.1.1 # via fastapi entrypoints==0.4 @@ -154,7 +191,9 @@ execnet==2.1.1 executing==2.0.1 # via stack-data fastapi==0.111.0 - # via fastapi-cli + # via + # feast (setup.py) + # fastapi-cli fastapi-cli==0.0.2 # via fastapi fastjsonschema==2.19.1 @@ -164,16 +203,24 @@ filelock==3.14.0 # snowflake-connector-python # virtualenv firebase-admin==5.4.0 + # via feast (setup.py) fqdn==1.5.1 # via jsonschema +frozenlist==1.4.1 + # via + # aiohttp + # aiosignal fsspec==2023.12.2 - # via dask + # via + # feast (setup.py) + # dask geojson==2.5.0 # via rockset geomet==0.2.1.post1 # via cassandra-driver google-api-core[grpc]==2.19.0 # via + # feast (setup.py) # firebase-admin # google-api-python-client # google-cloud-bigquery @@ -183,7 +230,7 @@ google-api-core[grpc]==2.19.0 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.128.0 +google-api-python-client==2.131.0 # via firebase-admin google-auth==2.29.0 # via @@ -198,8 +245,11 @@ google-auth==2.29.0 google-auth-httplib2==0.2.0 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 + # via feast (setup.py) google-cloud-bigquery-storage==2.25.0 + # via feast (setup.py) google-cloud-bigtable==2.23.1 + # via feast (setup.py) google-cloud-core==2.4.1 # via # google-cloud-bigquery @@ -208,10 +258,13 @@ google-cloud-core==2.4.1 # google-cloud-firestore # google-cloud-storage google-cloud-datastore==2.19.0 + # via feast (setup.py) google-cloud-firestore==2.16.0 # via firebase-admin google-cloud-storage==2.16.0 - # via firebase-admin + # via + # feast (setup.py) + # firebase-admin google-crc32c==1.5.0 # via # google-cloud-storage @@ -222,16 +275,17 @@ google-resumable-media==2.7.0 # google-cloud-storage googleapis-common-protos[grpc]==1.63.0 # via + # feast (setup.py) # google-api-core # grpc-google-iam-v1 # grpcio-status -great-expectations==0.18.13 -greenlet==3.0.3 - # via sqlalchemy +great-expectations==0.18.15 + # via feast (setup.py) grpc-google-iam-v1==0.13.0 # via google-cloud-bigtable -grpcio==1.63.0 +grpcio==1.64.0 # via + # feast (setup.py) # google-api-core # google-cloud-bigquery # googleapis-common-protos @@ -242,19 +296,27 @@ grpcio==1.63.0 # grpcio-testing # grpcio-tools grpcio-health-checking==1.62.2 + # via feast (setup.py) grpcio-reflection==1.62.2 + # via feast (setup.py) grpcio-status==1.62.2 # via google-api-core grpcio-testing==1.62.2 + # via feast (setup.py) grpcio-tools==1.62.2 + # via feast (setup.py) gunicorn==22.0.0 + # via feast (setup.py) h11==0.14.0 # via # httpcore # uvicorn happybase==1.2.0 -hazelcast-python-client==5.3.0 + # via feast (setup.py) +hazelcast-python-client==5.4.0 + # via feast (setup.py) hiredis==2.3.2 + # via feast (setup.py) httpcore==1.0.5 # via httpx httplib2==0.22.0 @@ -265,11 +327,15 @@ httptools==0.6.1 # via uvicorn httpx==0.27.0 # via + # feast (setup.py) # fastapi # jupyterlab ibis-framework[duckdb]==8.0.0 - # via ibis-substrait + # via + # feast (setup.py) + # ibis-substrait ibis-substrait==3.2.0 + # via feast (setup.py) identify==2.5.36 # via pre-commit idna==3.7 @@ -280,6 +346,7 @@ idna==3.7 # jsonschema # requests # snowflake-connector-python + # yarl imagesize==1.4.1 # via sphinx importlib-metadata==7.1.0 @@ -302,7 +369,7 @@ ipython==8.18.1 # great-expectations # ipykernel # ipywidgets -ipywidgets==8.1.2 +ipywidgets==8.1.3 # via great-expectations isodate==0.6.1 # via azure-storage-blob @@ -312,6 +379,7 @@ jedi==0.19.1 # via ipython jinja2==3.1.4 # via + # feast (setup.py) # altair # fastapi # great-expectations @@ -335,6 +403,7 @@ jsonpointer==2.4 # jsonschema jsonschema[format-nongpl]==4.22.0 # via + # feast (setup.py) # altair # great-expectations # jupyter-events @@ -342,7 +411,7 @@ jsonschema[format-nongpl]==4.22.0 # nbformat jsonschema-specifications==2023.12.1 # via jsonschema -jupyter-client==8.6.1 +jupyter-client==8.6.2 # via # ipykernel # jupyter-server @@ -360,7 +429,7 @@ jupyter-events==0.10.0 # via jupyter-server jupyter-lsp==2.2.5 # via jupyterlab -jupyter-server==2.14.0 +jupyter-server==2.14.1 # via # jupyter-lsp # jupyterlab @@ -369,17 +438,18 @@ jupyter-server==2.14.0 # notebook-shim jupyter-server-terminals==0.5.3 # via jupyter-server -jupyterlab==4.1.8 +jupyterlab==4.2.1 # via notebook jupyterlab-pygments==0.3.0 # via nbconvert -jupyterlab-server==2.27.1 +jupyterlab-server==2.27.2 # via # jupyterlab # notebook -jupyterlab-widgets==3.0.10 +jupyterlab-widgets==3.0.11 # via ipywidgets kubernetes==20.13.0 + # via feast (setup.py) locket==1.0.0 # via partd makefun==1.15.2 @@ -400,13 +470,17 @@ matplotlib-inline==0.1.7 mdurl==0.1.2 # via markdown-it-py minio==7.1.0 + # via feast (setup.py) mistune==3.0.2 # via # great-expectations # nbconvert mmh3==4.1.0 + # via feast (setup.py) mock==2.0.0 + # via feast (setup.py) moto==4.2.14 + # via feast (setup.py) msal==1.28.0 # via # azure-identity @@ -415,13 +489,20 @@ msal-extensions==1.1.0 # via azure-identity msgpack==1.0.8 # via cachecontrol +multidict==6.0.5 + # via + # aiohttp + # yarl multipledispatch==1.0.0 # via ibis-framework mypy==1.10.0 - # via sqlalchemy + # via + # feast (setup.py) + # sqlalchemy mypy-extensions==1.0.0 # via mypy mypy-protobuf==3.3.0 + # via feast (setup.py) nbclient==0.10.0 # via nbconvert nbconvert==7.16.4 @@ -434,9 +515,9 @@ nbformat==5.10.4 # nbconvert nest-asyncio==1.6.0 # via ipykernel -nodeenv==1.8.0 +nodeenv==1.9.0 # via pre-commit -notebook==7.1.3 +notebook==7.2.0 # via great-expectations notebook-shim==0.2.4 # via @@ -444,6 +525,7 @@ notebook-shim==0.2.4 # notebook numpy==1.26.4 # via + # feast (setup.py) # altair # dask # db-dtypes @@ -463,7 +545,6 @@ packaging==24.0 # build # dask # db-dtypes - # docker # duckdb-engine # google-cloud-bigquery # great-expectations @@ -481,6 +562,7 @@ packaging==24.0 # sphinx pandas==2.2.2 # via + # feast (setup.py) # altair # dask # dask-expr @@ -504,6 +586,7 @@ pexpect==4.9.0 pip==24.0 # via pip-tools pip-tools==7.4.1 + # via feast (setup.py) platformdirs==3.11.0 # via # jupyter-core @@ -516,9 +599,10 @@ ply==3.11 portalocker==2.8.2 # via msal-extensions pre-commit==3.3.1 + # via feast (setup.py) prometheus-client==0.20.0 # via jupyter-server -prompt-toolkit==3.0.43 +prompt-toolkit==3.0.45 # via ipython proto-plus==1.23.0 # via @@ -530,6 +614,7 @@ proto-plus==1.23.0 # google-cloud-firestore protobuf==4.25.3 # via + # feast (setup.py) # google-api-core # google-cloud-bigquery # google-cloud-bigquery-storage @@ -547,8 +632,11 @@ protobuf==4.25.3 # proto-plus # substrait psutil==5.9.0 - # via ipykernel + # via + # feast (setup.py) + # ipykernel psycopg2-binary==2.9.9 + # via feast (setup.py) ptyprocess==0.7.0 # via # pexpect @@ -556,12 +644,14 @@ ptyprocess==0.7.0 pure-eval==0.2.2 # via stack-data py==1.11.0 + # via feast (setup.py) py-cpuinfo==9.0.0 # via pytest-benchmark py4j==0.10.9.7 # via pyspark pyarrow==15.0.2 # via + # feast (setup.py) # dask-expr # db-dtypes # deltalake @@ -579,16 +669,19 @@ pyasn1==0.6.0 pyasn1-modules==0.4.0 # via google-auth pybindgen==0.22.1 + # via feast (setup.py) pycparser==2.22 # via cffi pydantic==2.7.1 # via + # feast (setup.py) # fastapi # great-expectations pydantic-core==2.18.2 # via pydantic pygments==2.18.0 # via + # feast (setup.py) # ipython # nbconvert # rich @@ -598,8 +691,11 @@ pyjwt[crypto]==2.8.0 # msal # snowflake-connector-python pymssql==2.3.0 -pymysql==1.1.0 + # via feast (setup.py) +pymysql==1.1.1 + # via feast (setup.py) pyodbc==5.1.0 + # via feast (setup.py) pyopenssl==24.1.0 # via snowflake-connector-python pyparsing==3.1.2 @@ -611,8 +707,10 @@ pyproject-hooks==1.1.0 # build # pip-tools pyspark==3.5.1 + # via feast (setup.py) pytest==7.4.4 # via + # feast (setup.py) # pytest-benchmark # pytest-cov # pytest-env @@ -622,13 +720,21 @@ pytest==7.4.4 # pytest-timeout # pytest-xdist pytest-benchmark==3.4.1 + # via feast (setup.py) pytest-cov==5.0.0 + # via feast (setup.py) pytest-env==1.1.3 + # via feast (setup.py) pytest-lazy-fixture==0.6.3 + # via feast (setup.py) pytest-mock==1.10.4 + # via feast (setup.py) pytest-ordering==0.6 + # via feast (setup.py) pytest-timeout==1.4.2 + # via feast (setup.py) pytest-xdist==3.6.1 + # via feast (setup.py) python-dateutil==2.9.0.post0 # via # arrow @@ -657,6 +763,7 @@ pytz==2024.1 # trino pyyaml==6.0.1 # via + # feast (setup.py) # dask # ibis-substrait # jupyter-events @@ -670,14 +777,17 @@ pyzmq==26.0.3 # jupyter-client # jupyter-server redis==4.6.0 + # via feast (setup.py) referencing==0.35.1 # via # jsonschema # jsonschema-specifications # jupyter-events -regex==2024.4.28 +regex==2024.5.15 + # via feast (setup.py) requests==2.31.0 # via + # feast (setup.py) # azure-core # cachecontrol # docker @@ -711,6 +821,7 @@ rich==13.7.1 # ibis-framework # typer rockset==2.1.2 + # via feast (setup.py) rpds-py==0.18.1 # via # jsonschema @@ -721,18 +832,18 @@ ruamel-yaml==0.17.17 # via great-expectations ruamel-yaml-clib==0.2.8 # via ruamel-yaml -ruff==0.4.3 +ruff==0.4.6 + # via feast (setup.py) s3transfer==0.10.1 # via boto3 -scipy==1.13.0 +scipy==1.13.1 # via great-expectations send2trash==1.8.3 # via jupyter-server -setuptools==69.5.1 +setuptools==70.0.0 # via # grpcio-tools # kubernetes - # nodeenv # pip-tools shellingham==1.5.4 # via typer @@ -755,12 +866,14 @@ sniffio==1.3.1 # httpx snowballstemmer==2.2.0 # via sphinx -snowflake-connector-python[pandas]==3.10.0 +snowflake-connector-python[pandas]==3.10.1 + # via feast (setup.py) sortedcontainers==2.4.0 # via snowflake-connector-python soupsieve==2.5 # via beautifulsoup4 sphinx==6.2.1 + # via feast (setup.py) sphinxcontrib-applehelp==1.0.8 # via sphinx sphinxcontrib-devhelp==1.0.6 @@ -775,6 +888,7 @@ sphinxcontrib-serializinghtml==1.1.10 # via sphinx sqlalchemy[mypy]==2.0.30 # via + # feast (setup.py) # duckdb-engine # ibis-framework # sqlalchemy-views @@ -782,24 +896,30 @@ sqlalchemy-views==0.3.2 # via ibis-framework sqlglot==20.11.0 # via ibis-framework +sqlite-vec==0.0.1a10 + # via feast (setup.py) stack-data==0.6.3 # via ipython starlette==0.37.2 # via fastapi -substrait==0.17.0 +substrait==0.19.0 # via ibis-substrait tabulate==0.9.0 + # via feast (setup.py) tenacity==8.3.0 + # via feast (setup.py) terminado==0.18.1 # via # jupyter-server # jupyter-server-terminals testcontainers==4.4.0 + # via feast (setup.py) thriftpy2==0.5.0 # via happybase tinycss2==1.3.0 # via nbconvert toml==0.10.2 + # via feast (setup.py) tomli==2.0.1 # via # build @@ -809,7 +929,7 @@ tomli==2.0.1 # pip-tools # pytest # pytest-env -tomlkit==0.12.4 +tomlkit==0.12.5 # via snowflake-connector-python toolz==0.12.1 # via @@ -817,7 +937,7 @@ toolz==0.12.1 # dask # ibis-framework # partd -tornado==6.4 +tornado==6.4.1 # via # ipykernel # jupyter-client @@ -826,7 +946,9 @@ tornado==6.4 # notebook # terminado tqdm==4.66.4 - # via great-expectations + # via + # feast (setup.py) + # great-expectations traitlets==5.14.3 # via # comm @@ -843,29 +965,44 @@ traitlets==5.14.3 # nbconvert # nbformat trino==0.328.0 + # via feast (setup.py) typeguard==4.2.1 + # via feast (setup.py) typer==0.12.3 # via fastapi-cli types-cffi==1.16.0.20240331 # via types-pyopenssl types-protobuf==3.19.22 - # via mypy-protobuf -types-pymysql==1.1.0.20240425 + # via + # feast (setup.py) + # mypy-protobuf +types-pymysql==1.1.0.20240524 + # via feast (setup.py) types-pyopenssl==24.1.0.20240425 # via types-redis types-python-dateutil==2.9.0.20240316 - # via arrow + # via + # feast (setup.py) + # arrow types-pytz==2024.1.0.20240417 + # via feast (setup.py) types-pyyaml==6.0.12.20240311 + # via feast (setup.py) types-redis==4.6.0.20240425 + # via feast (setup.py) types-requests==2.30.0.0 -types-setuptools==69.5.0.20240423 - # via types-cffi + # via feast (setup.py) +types-setuptools==70.0.0.20240524 + # via + # feast (setup.py) + # types-cffi types-tabulate==0.9.0.20240106 + # via feast (setup.py) types-urllib3==1.26.25.14 # via types-requests typing-extensions==4.11.0 # via + # aioitertools # anyio # async-lru # azure-core @@ -898,8 +1035,10 @@ uritemplate==4.1.1 # via google-api-python-client urllib3==1.26.18 # via + # feast (setup.py) # botocore # docker + # elastic-transport # great-expectations # kubernetes # minio @@ -910,12 +1049,15 @@ urllib3==1.26.18 # testcontainers uvicorn[standard]==0.29.0 # via + # feast (setup.py) # fastapi # fastapi-cli uvloop==0.19.0 # via uvicorn virtualenv==20.23.0 - # via pre-commit + # via + # feast (setup.py) + # pre-commit watchfiles==0.21.0 # via uvicorn wcwidth==0.2.13 @@ -936,11 +1078,15 @@ werkzeug==3.0.3 # via moto wheel==0.43.0 # via pip-tools -widgetsnbextension==4.0.10 +widgetsnbextension==4.0.11 # via ipywidgets wrapt==1.16.0 - # via testcontainers + # via + # aiobotocore + # testcontainers xmltodict==0.13.0 # via moto +yarl==1.9.4 + # via aiohttp zipp==3.18.1 # via importlib-metadata diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 1092aac9d09..579f39135e3 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -20,17 +20,22 @@ charset-normalizer==3.3.2 # via requests click==8.1.7 # via + # feast (setup.py) # dask # typer # uvicorn cloudpickle==3.0.0 # via dask colorama==0.4.6 + # via feast (setup.py) dask[dataframe]==2024.5.0 - # via dask-expr + # via + # feast (setup.py) + # dask-expr dask-expr==1.1.0 # via dask dill==0.3.8 + # via feast (setup.py) dnspython==2.6.1 # via email-validator email-validator==2.1.1 @@ -38,14 +43,15 @@ email-validator==2.1.1 exceptiongroup==1.2.1 # via anyio fastapi==0.111.0 - # via fastapi-cli + # via + # feast (setup.py) + # fastapi-cli fastapi-cli==0.0.2 # via fastapi fsspec==2024.3.1 # via dask -greenlet==3.0.3 - # via sqlalchemy gunicorn==22.0.0 + # via feast (setup.py) h11==0.14.0 # via # httpcore @@ -67,8 +73,11 @@ importlib-metadata==7.1.0 # dask # typeguard jinja2==3.1.4 - # via fastapi + # via + # feast (setup.py) + # fastapi jsonschema==4.22.0 + # via feast (setup.py) jsonschema-specifications==2023.12.1 # via jsonschema locket==1.0.0 @@ -80,13 +89,16 @@ markupsafe==2.1.5 mdurl==0.1.2 # via markdown-it-py mmh3==4.1.0 + # via feast (setup.py) mypy==1.10.0 # via sqlalchemy mypy-extensions==1.0.0 # via mypy mypy-protobuf==3.6.0 + # via feast (setup.py) numpy==1.26.4 # via + # feast (setup.py) # dask # pandas # pyarrow @@ -98,20 +110,29 @@ packaging==24.0 # gunicorn pandas==2.2.2 # via + # feast (setup.py) # dask # dask-expr partd==1.4.2 # via dask protobuf==4.25.3 - # via mypy-protobuf + # via + # feast (setup.py) + # mypy-protobuf pyarrow==16.0.0 - # via dask-expr + # via + # feast (setup.py) + # dask-expr pydantic==2.7.1 - # via fastapi + # via + # feast (setup.py) + # fastapi pydantic-core==2.18.2 # via pydantic pygments==2.18.0 - # via rich + # via + # feast (setup.py) + # rich python-dateutil==2.9.0.post0 # via pandas python-dotenv==1.0.1 @@ -122,6 +143,7 @@ pytz==2024.1 # via pandas pyyaml==6.0.1 # via + # feast (setup.py) # dask # uvicorn referencing==0.35.1 @@ -129,6 +151,7 @@ referencing==0.35.1 # jsonschema # jsonschema-specifications requests==2.31.0 + # via feast (setup.py) rich==13.7.1 # via typer rpds-py==0.18.1 @@ -144,11 +167,17 @@ sniffio==1.3.1 # anyio # httpx sqlalchemy[mypy]==2.0.30 + # via feast (setup.py) +sqlite-vec==0.0.1a10 + # via feast (setup.py) starlette==0.37.2 # via fastapi tabulate==0.9.0 + # via feast (setup.py) tenacity==8.3.0 + # via feast (setup.py) toml==0.10.2 + # via feast (setup.py) tomli==2.0.1 # via mypy toolz==0.12.1 @@ -156,7 +185,9 @@ toolz==0.12.1 # dask # partd tqdm==4.66.4 + # via feast (setup.py) typeguard==4.2.1 + # via feast (setup.py) typer==0.12.3 # via fastapi-cli types-protobuf==5.26.0.20240422 @@ -181,6 +212,7 @@ urllib3==2.2.1 # via requests uvicorn[standard]==0.29.0 # via + # feast (setup.py) # fastapi # fastapi-cli uvloop==0.19.0 @@ -190,4 +222,4 @@ watchfiles==0.21.0 websockets==12.0 # via uvicorn zipp==3.18.1 - # via importlib-metadata \ No newline at end of file + # via importlib-metadata diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py index 7c875fc9bde..6f103828dc5 100644 --- a/sdk/python/tests/conftest.py +++ b/sdk/python/tests/conftest.py @@ -32,8 +32,8 @@ create_basic_driver_dataset, create_document_dataset, ) -from tests.integration.feature_repos.integration_test_repo_config import ( # noqa: E402 - IntegrationTestRepoConfig, +from tests.integration.feature_repos.integration_test_repo_config import ( + IntegrationTestRepoConfig, # noqa: E402 ) from tests.integration.feature_repos.repo_configuration import ( # noqa: E402 AVAILABLE_OFFLINE_STORES, @@ -45,8 +45,8 @@ construct_universal_feature_views, construct_universal_test_data, ) -from tests.integration.feature_repos.universal.data_sources.file import ( # noqa: E402 - FileDataSourceCreator, +from tests.integration.feature_repos.universal.data_sources.file import ( + FileDataSourceCreator, # noqa: E402 ) from tests.integration.feature_repos.universal.entities import ( # noqa: E402 customer, @@ -173,7 +173,7 @@ def simple_dataset_2() -> pd.DataFrame: def start_test_local_server(repo_path: str, port: int): fs = FeatureStore(repo_path) - fs.serve("localhost", port, no_access_log=True) + fs.serve(host="localhost", port=port) @pytest.fixture diff --git a/sdk/python/tests/example_repos/example_feature_repo_1.py b/sdk/python/tests/example_repos/example_feature_repo_1.py index fbf1fbb9b07..20a8ad7bd86 100644 --- a/sdk/python/tests/example_repos/example_feature_repo_1.py +++ b/sdk/python/tests/example_repos/example_feature_repo_1.py @@ -4,7 +4,7 @@ from feast import Entity, FeatureService, FeatureView, Field, FileSource, PushSource from feast.on_demand_feature_view import on_demand_feature_view -from feast.types import Float32, Int64, String +from feast.types import Array, Float32, Int64, String # Note that file source paths are not validated, so there doesn't actually need to be any data # at the paths for these file sources. Since these paths are effectively fake, this example @@ -32,6 +32,12 @@ batch_source=driver_locations_source, ) +rag_documents_source = FileSource( + name="rag_documents_source", + path="data/rag_documents.parquet", + timestamp_field="event_timestamp", +) + driver = Entity( name="driver", # The name is derived from this argument, not object name. join_keys=["driver_id"], @@ -43,6 +49,10 @@ join_keys=["customer_id"], ) +item = Entity( + name="item_id", # The name is derived from this argument, not object name. + join_keys=["item_id"], +) driver_locations = FeatureView( name="driver_locations", @@ -101,6 +111,17 @@ tags={}, ) +document_embeddings = FeatureView( + name="document_embeddings", + entities=[item], + schema=[ + Field(name="Embeddings", dtype=Array(Float32)), + Field(name="item_id", dtype=String), + ], + source=rag_documents_source, + ttl=timedelta(hours=24), +) + @on_demand_feature_view( sources=[customer_profile], diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py index 2f260e87a60..7123bd0fc15 100644 --- a/sdk/python/tests/integration/feature_repos/repo_configuration.py +++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py @@ -34,8 +34,8 @@ from tests.integration.feature_repos.universal.data_sources.file import ( DuckDBDataSourceCreator, DuckDBDeltaDataSourceCreator, - DuckDBDeltaS3DataSourceCreator, FileDataSourceCreator, + RemoteOfflineStoreDataSourceCreator, ) from tests.integration.feature_repos.universal.data_sources.redshift import ( RedshiftDataSourceCreator, @@ -122,21 +122,21 @@ ("local", FileDataSourceCreator), ("local", DuckDBDataSourceCreator), ("local", DuckDBDeltaDataSourceCreator), + ("local", RemoteOfflineStoreDataSourceCreator), ] if os.getenv("FEAST_IS_LOCAL_TEST", "False") == "True": AVAILABLE_OFFLINE_STORES.extend( [ - ("local", DuckDBDeltaS3DataSourceCreator), + # todo: @tokoko to reenable + # ("local", DuckDBDeltaS3DataSourceCreator), ] ) AVAILABLE_ONLINE_STORES: Dict[ str, Tuple[Union[str, Dict[Any, Any]], Optional[Type[OnlineStoreCreator]]] -] = { - "sqlite": ({"type": "sqlite"}, None), -} +] = {"sqlite": ({"type": "sqlite"}, None)} # Only configure Cloud DWH if running full integration tests if os.getenv("FEAST_IS_LOCAL_TEST", "False") != "True": @@ -153,7 +153,6 @@ AVAILABLE_ONLINE_STORES["datastore"] = ("datastore", None) AVAILABLE_ONLINE_STORES["snowflake"] = (SNOWFLAKE_CONFIG, None) AVAILABLE_ONLINE_STORES["bigtable"] = (BIGTABLE_CONFIG, None) - # Uncomment to test using private Rockset account. Currently not enabled as # there is no dedicated Rockset instance for CI testing and there is no # containerized version of Rockset. @@ -487,7 +486,6 @@ def construct_test_environment( "arn:aws:iam::402087665549:role/lambda_execution_role", ), ) - else: feature_server = LocalFeatureServerConfig( feature_logging=FeatureLoggingConfig(enabled=True) @@ -500,9 +498,7 @@ def construct_test_environment( aws_registry_path = os.getenv( "AWS_REGISTRY_PATH", "s3://feast-int-bucket/registries" ) - registry: Union[str, RegistryConfig] = ( - f"{aws_registry_path}/{project}/registry.db" - ) + registry = RegistryConfig(path=f"{aws_registry_path}/{project}/registry.db") else: registry = RegistryConfig( path=str(Path(repo_dir_name) / "registry.db"), diff --git a/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py b/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py index 62d458d6f4a..f1cab214299 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py @@ -18,7 +18,6 @@ def create_data_source( self, df: pd.DataFrame, destination_name: str, - event_timestamp_column="ts", created_timestamp_column="created_ts", field_mapping: Optional[Dict[str, str]] = None, timestamp_field: Optional[str] = None, @@ -32,7 +31,6 @@ def create_data_source( df: The dataframe to be used to create the data source. destination_name: This str is used by the implementing classes to isolate the multiple dataframes from each other. - event_timestamp_column: (Deprecated) Pass through for the underlying data source. created_timestamp_column: Pass through for the underlying data source. field_mapping: Pass through for the underlying data source. timestamp_field: Pass through for the underlying data source. diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py index 6f0ac02a003..f7ab55d868a 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py @@ -1,18 +1,22 @@ +import logging import os.path import shutil +import subprocess import tempfile import uuid +from pathlib import Path from typing import Any, Dict, List, Optional import pandas as pd import pyarrow as pa import pyarrow.parquet as pq +import yaml from minio import Minio from testcontainers.core.generic import DockerContainer from testcontainers.core.waiting_utils import wait_for_logs from testcontainers.minio import MinioContainer -from feast import FileSource +from feast import FileSource, RepoConfig from feast.data_format import DeltaFormat, ParquetFormat from feast.data_source import DataSource from feast.feature_logging import LoggingDestination @@ -22,10 +26,15 @@ FileLoggingDestination, SavedDatasetFileStorage, ) -from feast.repo_config import FeastConfigBaseModel +from feast.infra.offline_stores.remote import RemoteOfflineStoreConfig +from feast.repo_config import FeastConfigBaseModel, RegistryConfig +from feast.wait import wait_retry_backoff # noqa: E402 from tests.integration.feature_repos.universal.data_source_creator import ( DataSourceCreator, ) +from tests.utils.http_server import check_port_open, free_port # noqa: E402 + +logger = logging.getLogger(__name__) class FileDataSourceCreator(DataSourceCreator): @@ -141,7 +150,8 @@ def __init__(self, project_name: str, *args, **kwargs): self.minio = MinioContainer() self.minio.start() client = self.minio.get_client() - client.make_bucket("test") + if not client.bucket_exists("test"): + client.make_bucket("test") host_ip = self.minio.get_container_host_ip() exposed_port = self.minio.get_exposed_port(self.minio.port) self.endpoint_url = f"http://{host_ip}:{exposed_port}" @@ -351,3 +361,69 @@ def create_offline_store_config(self): staging_location_endpoint_override=self.endpoint_url, ) return self.duckdb_offline_store_config + + +class RemoteOfflineStoreDataSourceCreator(FileDataSourceCreator): + def __init__(self, project_name: str, *args, **kwargs): + super().__init__(project_name) + self.server_port: int = 0 + self.proc = None + + def setup(self, registry: RegistryConfig): + parent_offline_config = super().create_offline_store_config() + config = RepoConfig( + project=self.project_name, + provider="local", + offline_store=parent_offline_config, + registry=registry.path, + entity_key_serialization_version=2, + ) + + repo_path = Path(tempfile.mkdtemp()) + with open(repo_path / "feature_store.yaml", "w") as outfile: + yaml.dump(config.dict(by_alias=True), outfile) + repo_path = str(repo_path.resolve()) + + self.server_port = free_port() + host = "0.0.0.0" + cmd = [ + "feast", + "-c" + repo_path, + "serve_offline", + "--host", + host, + "--port", + str(self.server_port), + ] + self.proc = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL + ) + + _time_out_sec: int = 60 + # Wait for server to start + wait_retry_backoff( + lambda: (None, check_port_open(host, self.server_port)), + timeout_secs=_time_out_sec, + timeout_msg=f"Unable to start the feast remote offline server in {_time_out_sec} seconds at port={self.server_port}", + ) + return "grpc+tcp://{}:{}".format(host, self.server_port) + + def create_offline_store_config(self) -> FeastConfigBaseModel: + self.remote_offline_store_config = RemoteOfflineStoreConfig( + type="remote", host="0.0.0.0", port=self.server_port + ) + return self.remote_offline_store_config + + def teardown(self): + super().teardown() + if self.proc is not None: + self.proc.kill() + + # wait server to free the port + wait_retry_backoff( + lambda: ( + None, + not check_port_open("localhost", self.server_port), + ), + timeout_secs=30, + ) diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py index 8fe933fbba7..91d1a74f071 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py @@ -49,7 +49,6 @@ def create_data_source( self, df: pd.DataFrame, destination_name: str, - event_timestamp_column="ts", created_timestamp_column="created_ts", field_mapping: Optional[Dict[str, str]] = None, timestamp_field: Optional[str] = "ts", diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py index 237be2ac016..e9c4ad21a31 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py @@ -47,7 +47,6 @@ def create_data_source( self, df: pd.DataFrame, destination_name: str, - event_timestamp_column="ts", created_timestamp_column="created_ts", field_mapping: Optional[Dict[str, str]] = None, timestamp_field: Optional[str] = "ts", diff --git a/sdk/python/tests/integration/feature_repos/universal/feature_views.py b/sdk/python/tests/integration/feature_repos/universal/feature_views.py index 2a0a9d1bd01..32649fe5bf0 100644 --- a/sdk/python/tests/integration/feature_repos/universal/feature_views.py +++ b/sdk/python/tests/integration/feature_repos/universal/feature_views.py @@ -85,7 +85,8 @@ def conv_rate_plus_100_feature_view( schema=[] if infer_features else _features, sources=sources, feature_transformation=PandasTransformation( - udf=conv_rate_plus_100, udf_string="raw udf source" + udf=conv_rate_plus_100, + udf_string="raw udf source", # type: ignore ) if not use_substrait_odfv else SubstraitTransformation.from_ibis(conv_rate_plus_100_ibis, sources), @@ -124,10 +125,11 @@ def similarity_feature_view( return OnDemandFeatureView( name=similarity.__name__, - sources=sources, + sources=sources, # type: ignore schema=[] if infer_features else _fields, feature_transformation=PandasTransformation( - udf=similarity, udf_string="similarity raw udf" + udf=similarity, + udf_string="similarity raw udf", # type: ignore ), ) diff --git a/sdk/python/tests/integration/feature_repos/universal/online_store/elasticsearch.py b/sdk/python/tests/integration/feature_repos/universal/online_store/elasticsearch.py index c62a9009caf..cfbc7611a1f 100644 --- a/sdk/python/tests/integration/feature_repos/universal/online_store/elasticsearch.py +++ b/sdk/python/tests/integration/feature_repos/universal/online_store/elasticsearch.py @@ -1,4 +1,4 @@ -from typing import Dict +from typing import Any, Dict from testcontainers.elasticsearch import ElasticSearchContainer @@ -14,7 +14,7 @@ def __init__(self, project_name: str, **kwargs): "elasticsearch:8.3.3", ).with_exposed_ports(9200) - def create_online_store(self) -> Dict[str, str]: + def create_online_store(self) -> Dict[str, Any]: self.container.start() return { "host": "localhost", diff --git a/sdk/python/tests/integration/feature_repos/universal/online_store/postgres.py b/sdk/python/tests/integration/feature_repos/universal/online_store/postgres.py index 7b4156fffe0..e4098626411 100644 --- a/sdk/python/tests/integration/feature_repos/universal/online_store/postgres.py +++ b/sdk/python/tests/integration/feature_repos/universal/online_store/postgres.py @@ -1,5 +1,5 @@ import os -from typing import Dict +from typing import Any, Dict from testcontainers.core.container import DockerContainer from testcontainers.core.waiting_utils import wait_for_logs @@ -51,7 +51,7 @@ def __init__(self, project_name: str, **kwargs): ) ) - def create_online_store(self) -> Dict[str, str]: + def create_online_store(self) -> Dict[str, Any]: self.container.start() log_string_to_wait_for = "database system is ready to accept connections" wait_for_logs( diff --git a/sdk/python/tests/integration/feature_repos/universal/online_store_creator.py b/sdk/python/tests/integration/feature_repos/universal/online_store_creator.py index 4932001e76f..0963a1cd1e3 100644 --- a/sdk/python/tests/integration/feature_repos/universal/online_store_creator.py +++ b/sdk/python/tests/integration/feature_repos/universal/online_store_creator.py @@ -1,13 +1,14 @@ from abc import ABC, abstractmethod +from typing import Any -from feast.repo_config import FeastConfigBaseModel +# from feast.repo_config import FeastConfigBaseModel class OnlineStoreCreator(ABC): def __init__(self, project_name: str, **kwargs): self.project_name = project_name - def create_online_store(self) -> FeastConfigBaseModel: + def create_online_store(self) -> dict[str, Any]: raise NotImplementedError @abstractmethod diff --git a/sdk/python/tests/integration/e2e/test_universal_e2e.py b/sdk/python/tests/integration/materialization/test_universal_e2e.py similarity index 100% rename from sdk/python/tests/integration/e2e/test_universal_e2e.py rename to sdk/python/tests/integration/materialization/test_universal_e2e.py diff --git a/sdk/python/tests/integration/offline_store/test_feature_logging.py b/sdk/python/tests/integration/offline_store/test_feature_logging.py index eba994544da..32f506f90b2 100644 --- a/sdk/python/tests/integration/offline_store/test_feature_logging.py +++ b/sdk/python/tests/integration/offline_store/test_feature_logging.py @@ -34,8 +34,6 @@ def test_feature_service_logging(environment, universal_data_sources, pass_as_pa (_, datasets, data_sources) = universal_data_sources feature_views = construct_universal_feature_views(data_sources) - store.apply([customer(), driver(), location(), *feature_views.values()]) - feature_service = FeatureService( name="test_service", features=[ @@ -49,6 +47,17 @@ def test_feature_service_logging(environment, universal_data_sources, pass_as_pa ), ) + store.apply( + [customer(), driver(), location(), *feature_views.values()], feature_service + ) + + # Added to handle the case that the offline store is remote + store.registry.apply_feature_service(feature_service, store.config.project) + store.registry.apply_data_source( + feature_service.logging_config.destination.to_data_source(), + store.config.project, + ) + driver_df = datasets.driver_df driver_df["val_to_add"] = 50 driver_df = driver_df.join(conv_rate_plus_100(driver_df)) diff --git a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py index a6db7f2535c..bfb8a56200a 100644 --- a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py +++ b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py @@ -19,6 +19,9 @@ construct_universal_feature_views, table_name_from_data_source, ) +from tests.integration.feature_repos.universal.data_sources.file import ( + RemoteOfflineStoreDataSourceCreator, +) from tests.integration.feature_repos.universal.data_sources.snowflake import ( SnowflakeDataSourceCreator, ) @@ -157,22 +160,25 @@ def test_historical_features_main( timestamp_precision=timedelta(milliseconds=1), ) - assert_feature_service_correctness( - store, - feature_service, - full_feature_names, - entity_df_with_request_data, - expected_df, - event_timestamp, - ) - assert_feature_service_entity_mapping_correctness( - store, - feature_service_entity_mapping, - full_feature_names, - entity_df_with_request_data, - full_expected_df, - event_timestamp, - ) + if not isinstance( + environment.data_source_creator, RemoteOfflineStoreDataSourceCreator + ): + assert_feature_service_correctness( + store, + feature_service, + full_feature_names, + entity_df_with_request_data, + expected_df, + event_timestamp, + ) + assert_feature_service_entity_mapping_correctness( + store, + feature_service_entity_mapping, + full_feature_names, + entity_df_with_request_data, + full_expected_df, + event_timestamp, + ) table_from_df_entities: pd.DataFrame = job_from_df.to_arrow().to_pandas() validate_dataframes( @@ -375,8 +381,13 @@ def test_historical_features_persisting( (entities, datasets, data_sources) = universal_data_sources feature_views = construct_universal_feature_views(data_sources) + storage = environment.data_source_creator.create_saved_dataset_destination() + store.apply([driver(), customer(), location(), *feature_views.values()]) + # Added to handle the case that the offline store is remote + store.registry.apply_data_source(storage.to_data_source(), store.config.project) + entity_df = datasets.entity_df.drop( columns=["order_id", "origin_id", "destination_id"] ) @@ -398,7 +409,7 @@ def test_historical_features_persisting( saved_dataset = store.create_saved_dataset( from_=job, name="saved_dataset", - storage=environment.data_source_creator.create_saved_dataset_destination(), + storage=storage, tags={"env": "test"}, allow_overwrite=True, ) diff --git a/sdk/python/tests/integration/e2e/test_validation.py b/sdk/python/tests/integration/offline_store/test_validation.py similarity index 93% rename from sdk/python/tests/integration/e2e/test_validation.py rename to sdk/python/tests/integration/offline_store/test_validation.py index fdf182be573..1731f823c89 100644 --- a/sdk/python/tests/integration/e2e/test_validation.py +++ b/sdk/python/tests/integration/offline_store/test_validation.py @@ -45,8 +45,13 @@ def test_historical_retrieval_with_validation(environment, universal_data_source store = environment.feature_store (entities, datasets, data_sources) = universal_data_sources feature_views = construct_universal_feature_views(data_sources) + storage = environment.data_source_creator.create_saved_dataset_destination() + store.apply([driver(), customer(), location(), *feature_views.values()]) + # Added to handle the case that the offline store is remote + store.registry.apply_data_source(storage.to_data_source(), store.config.project) + # Create two identical retrieval jobs entity_df = datasets.entity_df.drop( columns=["order_id", "origin_id", "destination_id"] @@ -64,7 +69,7 @@ def test_historical_retrieval_with_validation(environment, universal_data_source store.create_saved_dataset( from_=reference_job, name="my_training_dataset", - storage=environment.data_source_creator.create_saved_dataset_destination(), + storage=storage, allow_overwrite=True, ) saved_dataset = store.get_saved_dataset("my_training_dataset") @@ -80,9 +85,13 @@ def test_historical_retrieval_fails_on_validation(environment, universal_data_so (entities, datasets, data_sources) = universal_data_sources feature_views = construct_universal_feature_views(data_sources) + storage = environment.data_source_creator.create_saved_dataset_destination() store.apply([driver(), customer(), location(), *feature_views.values()]) + # Added to handle the case that the offline store is remote + store.registry.apply_data_source(storage.to_data_source(), store.config.project) + entity_df = datasets.entity_df.drop( columns=["order_id", "origin_id", "destination_id"] ) @@ -95,7 +104,7 @@ def test_historical_retrieval_fails_on_validation(environment, universal_data_so store.create_saved_dataset( from_=reference_job, name="my_other_dataset", - storage=environment.data_source_creator.create_saved_dataset_destination(), + storage=storage, allow_overwrite=True, ) @@ -149,10 +158,19 @@ def test_logged_features_validation(environment, universal_data_sources): ), ) + storage = environment.data_source_creator.create_saved_dataset_destination() + store.apply( [driver(), customer(), location(), feature_service, *feature_views.values()] ) + # Added to handle the case that the offline store is remote + store.registry.apply_data_source( + feature_service.logging_config.destination.to_data_source(), + store.config.project, + ) + store.registry.apply_data_source(storage.to_data_source(), store.config.project) + entity_df = datasets.entity_df.drop( columns=["order_id", "origin_id", "destination_id"] ) @@ -180,7 +198,7 @@ def test_logged_features_validation(environment, universal_data_sources): entity_df=entity_df, features=store_fs, full_feature_names=True ), name="reference_for_validating_logged_features", - storage=environment.data_source_creator.create_saved_dataset_destination(), + storage=storage, allow_overwrite=True, ) diff --git a/sdk/python/tests/integration/e2e/test_python_feature_server.py b/sdk/python/tests/integration/online_store/test_python_feature_server.py similarity index 100% rename from sdk/python/tests/integration/e2e/test_python_feature_server.py rename to sdk/python/tests/integration/online_store/test_python_feature_server.py diff --git a/sdk/python/tests/integration/online_store/test_remote_online_store.py b/sdk/python/tests/integration/online_store/test_remote_online_store.py new file mode 100644 index 00000000000..759a9c7a87b --- /dev/null +++ b/sdk/python/tests/integration/online_store/test_remote_online_store.py @@ -0,0 +1,233 @@ +import os +import subprocess +import tempfile +from datetime import datetime +from textwrap import dedent + +import pytest + +from feast.feature_store import FeatureStore +from feast.wait import wait_retry_backoff +from tests.utils.cli_repo_creator import CliRunner +from tests.utils.http_server import check_port_open, free_port + + +@pytest.mark.integration +def test_remote_online_store_read(): + with tempfile.TemporaryDirectory() as remote_server_tmp_dir, tempfile.TemporaryDirectory() as remote_client_tmp_dir: + server_store, server_url, registry_path = ( + _create_server_store_spin_feature_server(temp_dir=remote_server_tmp_dir) + ) + assert None not in (server_store, server_url, registry_path) + client_store = _create_remote_client_feature_store( + temp_dir=remote_client_tmp_dir, + server_registry_path=str(registry_path), + feature_server_url=server_url, + ) + assert client_store is not None + _assert_non_existing_entity_feature_views_entity( + client_store=client_store, server_store=server_store + ) + _assert_existing_feature_views_entity( + client_store=client_store, server_store=server_store + ) + _assert_non_existing_feature_views( + client_store=client_store, server_store=server_store + ) + + +def _assert_non_existing_entity_feature_views_entity( + client_store: FeatureStore, server_store: FeatureStore +): + features = [ + "driver_hourly_stats:conv_rate", + "driver_hourly_stats:acc_rate", + "driver_hourly_stats:avg_daily_trips", + ] + + entity_rows = [{"driver_id": 1234}] + _assert_client_server_online_stores_are_matching( + client_store=client_store, + server_store=server_store, + features=features, + entity_rows=entity_rows, + ) + + +def _assert_non_existing_feature_views( + client_store: FeatureStore, server_store: FeatureStore +): + features = [ + "driver_hourly_stats1:conv_rate", + "driver_hourly_stats1:acc_rate", + "driver_hourly_stats:avg_daily_trips", + ] + + entity_rows = [{"driver_id": 1001}, {"driver_id": 1002}] + + with pytest.raises( + Exception, match="Feature view driver_hourly_stats1 does not exist" + ): + client_store.get_online_features( + features=features, entity_rows=entity_rows + ).to_dict() + + with pytest.raises( + Exception, match="Feature view driver_hourly_stats1 does not exist" + ): + server_store.get_online_features( + features=features, entity_rows=entity_rows + ).to_dict() + + +def _assert_existing_feature_views_entity( + client_store: FeatureStore, server_store: FeatureStore +): + features = [ + "driver_hourly_stats:conv_rate", + "driver_hourly_stats:acc_rate", + "driver_hourly_stats:avg_daily_trips", + ] + + entity_rows = [{"driver_id": 1001}, {"driver_id": 1002}] + _assert_client_server_online_stores_are_matching( + client_store=client_store, + server_store=server_store, + features=features, + entity_rows=entity_rows, + ) + + features = ["driver_hourly_stats:conv_rate"] + _assert_client_server_online_stores_are_matching( + client_store=client_store, + server_store=server_store, + features=features, + entity_rows=entity_rows, + ) + + +def _assert_client_server_online_stores_are_matching( + client_store: FeatureStore, + server_store: FeatureStore, + features: list[str], + entity_rows: list, +): + online_features_from_client = client_store.get_online_features( + features=features, entity_rows=entity_rows + ).to_dict() + + assert online_features_from_client is not None + + online_features_from_server = server_store.get_online_features( + features=features, entity_rows=entity_rows + ).to_dict() + + assert online_features_from_server is not None + assert online_features_from_client is not None + assert online_features_from_client == online_features_from_server + + +def _create_server_store_spin_feature_server(temp_dir): + feast_server_port = free_port() + store = _default_store(str(temp_dir), "REMOTE_ONLINE_SERVER_PROJECT") + server_url = next( + _start_feature_server( + repo_path=str(store.repo_path), server_port=feast_server_port + ) + ) + print(f"Server started successfully, {server_url}") + return store, server_url, os.path.join(store.repo_path, "data", "registry.db") + + +def _default_store(temp_dir, project_name) -> FeatureStore: + runner = CliRunner() + result = runner.run(["init", project_name], cwd=temp_dir) + repo_path = os.path.join(temp_dir, project_name, "feature_repo") + assert result.returncode == 0 + + result = runner.run(["--chdir", repo_path, "apply"], cwd=temp_dir) + assert result.returncode == 0 + + fs = FeatureStore(repo_path=repo_path) + fs.materialize_incremental( + end_date=datetime.utcnow(), feature_views=["driver_hourly_stats"] + ) + return fs + + +def _create_remote_client_feature_store( + temp_dir, server_registry_path: str, feature_server_url: str +) -> FeatureStore: + project_name = "REMOTE_ONLINE_CLIENT_PROJECT" + runner = CliRunner() + result = runner.run(["init", project_name], cwd=temp_dir) + assert result.returncode == 0 + repo_path = os.path.join(temp_dir, project_name, "feature_repo") + _overwrite_remote_client_feature_store_yaml( + repo_path=str(repo_path), + registry_path=server_registry_path, + feature_server_url=feature_server_url, + ) + + result = runner.run(["--chdir", repo_path, "apply"], cwd=temp_dir) + assert result.returncode == 0 + + return FeatureStore(repo_path=repo_path) + + +def _overwrite_remote_client_feature_store_yaml( + repo_path: str, registry_path: str, feature_server_url: str +): + repo_config = os.path.join(repo_path, "feature_store.yaml") + with open(repo_config, "w") as repo_config: + repo_config.write( + dedent( + f""" + project: REMOTE_ONLINE_CLIENT_PROJECT + registry: {registry_path} + provider: local + online_store: + path: {feature_server_url} + type: remote + entity_key_serialization_version: 2 + """ + ) + ) + + +def _start_feature_server(repo_path: str, server_port: int): + host = "0.0.0.0" + cmd = [ + "feast", + "-c" + repo_path, + "serve", + "--host", + host, + "--port", + str(server_port), + ] + feast_server_process = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL + ) + _time_out_sec: int = 60 + # Wait for server to start + wait_retry_backoff( + lambda: (None, check_port_open(host, server_port)), + timeout_secs=_time_out_sec, + timeout_msg=f"Unable to start the feast server in {_time_out_sec} seconds for remote online store type, port={server_port}", + ) + + yield f"http://localhost:{server_port}" + + if feast_server_process is not None: + feast_server_process.kill() + + # wait server to free the port + wait_retry_backoff( + lambda: ( + None, + not check_port_open("localhost", server_port), + ), + timeout_msg=f"Unable to stop the feast server in {_time_out_sec} seconds for remote online store type, port={server_port}", + timeout_secs=_time_out_sec, + ) diff --git a/sdk/python/tests/integration/online_store/test_universal_online.py b/sdk/python/tests/integration/online_store/test_universal_online.py index 4822a8d4f71..4cb474d2f1a 100644 --- a/sdk/python/tests/integration/online_store/test_universal_online.py +++ b/sdk/python/tests/integration/online_store/test_universal_online.py @@ -476,7 +476,7 @@ def test_online_retrieval_with_event_timestamps(environment, universal_data_sour @pytest.mark.integration -@pytest.mark.universal_online_stores(only=["redis"]) +@pytest.mark.universal_online_stores(only=["redis", "dynamodb"]) def test_async_online_retrieval_with_event_timestamps( environment, universal_data_sources ): diff --git a/sdk/python/tests/integration/registration/test_feature_store.py b/sdk/python/tests/integration/registration/test_feature_store.py index bf0c2fb61fd..d7ffb83059b 100644 --- a/sdk/python/tests/integration/registration/test_feature_store.py +++ b/sdk/python/tests/integration/registration/test_feature_store.py @@ -11,68 +11,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import os -import time from datetime import timedelta from tempfile import mkstemp import pytest from pytest_lazyfixture import lazy_fixture -from feast import FileSource -from feast.data_format import ParquetFormat from feast.entity import Entity from feast.feature_store import FeatureStore from feast.feature_view import FeatureView -from feast.field import Field -from feast.infra.offline_stores.file import FileOfflineStoreConfig -from feast.infra.online_stores.dynamodb import DynamoDBOnlineStoreConfig from feast.infra.online_stores.sqlite import SqliteOnlineStoreConfig from feast.repo_config import RepoConfig -from feast.types import Array, Bytes, Float64, Int64, String +from feast.types import Float64, Int64, String from tests.utils.data_source_test_creator import prep_file_source -@pytest.mark.integration -@pytest.mark.parametrize( - "test_feature_store", - [ - lazy_fixture("feature_store_with_gcs_registry"), - lazy_fixture("feature_store_with_s3_registry"), - ], -) -def test_apply_entity_integration(test_feature_store): - entity = Entity( - name="driver_car_id", - description="Car driver id", - tags={"team": "matchmaking"}, - ) - - # Register Entity - test_feature_store.apply([entity]) - - entities = test_feature_store.list_entities() - - entity = entities[0] - assert ( - len(entities) == 1 - and entity.name == "driver_car_id" - and entity.description == "Car driver id" - and "team" in entity.tags - and entity.tags["team"] == "matchmaking" - ) - - entity = test_feature_store.get_entity("driver_car_id") - assert ( - entity.name == "driver_car_id" - and entity.description == "Car driver id" - and "team" in entity.tags - and entity.tags["team"] == "matchmaking" - ) - - test_feature_store.teardown() - - @pytest.mark.integration @pytest.mark.parametrize( "test_feature_store", @@ -109,81 +62,6 @@ def test_feature_view_inference_success(test_feature_store, dataframe_source): test_feature_store.teardown() -@pytest.mark.integration -@pytest.mark.parametrize( - "test_feature_store", - [ - lazy_fixture("feature_store_with_gcs_registry"), - lazy_fixture("feature_store_with_s3_registry"), - ], -) -def test_apply_feature_view_integration(test_feature_store): - # Create Feature Views - batch_source = FileSource( - file_format=ParquetFormat(), - path="file://feast/*", - timestamp_field="ts_col", - created_timestamp_column="timestamp", - ) - - entity = Entity(name="fs1_my_entity_1", join_keys=["test"]) - - fv1 = FeatureView( - name="my_feature_view_1", - schema=[ - Field(name="fs1_my_feature_1", dtype=Int64), - Field(name="fs1_my_feature_2", dtype=String), - Field(name="fs1_my_feature_3", dtype=Array(String)), - Field(name="fs1_my_feature_4", dtype=Array(Bytes)), - Field(name="test", dtype=Int64), - ], - entities=[entity], - tags={"team": "matchmaking"}, - source=batch_source, - ttl=timedelta(minutes=5), - ) - - # Register Feature View - test_feature_store.apply([fv1, entity]) - - feature_views = test_feature_store.list_feature_views() - - # List Feature Views - assert ( - len(feature_views) == 1 - and feature_views[0].name == "my_feature_view_1" - and feature_views[0].features[0].name == "fs1_my_feature_1" - and feature_views[0].features[0].dtype == Int64 - and feature_views[0].features[1].name == "fs1_my_feature_2" - and feature_views[0].features[1].dtype == String - and feature_views[0].features[2].name == "fs1_my_feature_3" - and feature_views[0].features[2].dtype == Array(String) - and feature_views[0].features[3].name == "fs1_my_feature_4" - and feature_views[0].features[3].dtype == Array(Bytes) - and feature_views[0].entities[0] == "fs1_my_entity_1" - ) - - feature_view = test_feature_store.get_feature_view("my_feature_view_1") - assert ( - feature_view.name == "my_feature_view_1" - and feature_view.features[0].name == "fs1_my_feature_1" - and feature_view.features[0].dtype == Int64 - and feature_view.features[1].name == "fs1_my_feature_2" - and feature_view.features[1].dtype == String - and feature_view.features[2].name == "fs1_my_feature_3" - and feature_view.features[2].dtype == Array(String) - and feature_view.features[3].name == "fs1_my_feature_4" - and feature_view.features[3].dtype == Array(Bytes) - and feature_view.entities[0] == "fs1_my_entity_1" - ) - - test_feature_store.delete_feature_view("my_feature_view_1") - feature_views = test_feature_store.list_feature_views() - assert len(feature_views) == 0 - - test_feature_store.teardown() - - @pytest.fixture def feature_store_with_local_registry(): fd, registry_path = mkstemp() @@ -197,46 +75,3 @@ def feature_store_with_local_registry(): entity_key_serialization_version=2, ) ) - - -@pytest.fixture -def feature_store_with_gcs_registry(): - from google.cloud import storage - - storage_client = storage.Client() - bucket_name = f"feast-registry-test-{int(time.time() * 1000)}" - bucket = storage_client.bucket(bucket_name) - bucket = storage_client.create_bucket(bucket) - bucket.add_lifecycle_delete_rule( - age=14 - ) # delete buckets automatically after 14 days - bucket.patch() - bucket.blob("registry.db") - - return FeatureStore( - config=RepoConfig( - registry=f"gs://{bucket_name}/registry.db", - project="default", - provider="gcp", - entity_key_serialization_version=2, - ) - ) - - -@pytest.fixture -def feature_store_with_s3_registry(): - aws_registry_path = os.getenv( - "AWS_REGISTRY_PATH", "s3://feast-int-bucket/registries" - ) - return FeatureStore( - config=RepoConfig( - registry=f"{aws_registry_path}/{int(time.time() * 1000)}/registry.db", - project="default", - provider="aws", - online_store=DynamoDBOnlineStoreConfig( - region=os.getenv("AWS_REGION", "us-west-2") - ), - offline_store=FileOfflineStoreConfig(), - entity_key_serialization_version=2, - ) - ) diff --git a/sdk/python/tests/integration/registration/test_registry.py b/sdk/python/tests/integration/registration/test_registry.py deleted file mode 100644 index 9ad1a98a050..00000000000 --- a/sdk/python/tests/integration/registration/test_registry.py +++ /dev/null @@ -1,232 +0,0 @@ -# Copyright 2021 The Feast Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import time -from datetime import timedelta -from unittest import mock - -import pytest -from pytest_lazyfixture import lazy_fixture -from testcontainers.minio import MinioContainer - -from feast import FileSource -from feast.data_format import ParquetFormat -from feast.entity import Entity -from feast.feature_view import FeatureView -from feast.field import Field -from feast.infra.registry.registry import Registry -from feast.repo_config import RegistryConfig -from feast.types import Array, Bytes, Int64, String -from tests.utils.e2e_test_validation import validate_registry_data_source_apply - - -@pytest.fixture -def gcs_registry() -> Registry: - from google.cloud import storage - - storage_client = storage.Client() - bucket_name = f"feast-registry-test-{int(time.time() * 1000)}" - bucket = storage_client.bucket(bucket_name) - bucket = storage_client.create_bucket(bucket) - bucket.add_lifecycle_delete_rule( - age=14 - ) # delete buckets automatically after 14 days - bucket.patch() - bucket.blob("registry.db") - registry_config = RegistryConfig( - path=f"gs://{bucket_name}/registry.db", cache_ttl_seconds=600 - ) - return Registry("project", registry_config, None) - - -@pytest.fixture -def s3_registry() -> Registry: - aws_registry_path = os.getenv( - "AWS_REGISTRY_PATH", "s3://feast-int-bucket/registries" - ) - registry_config = RegistryConfig( - path=f"{aws_registry_path}/{int(time.time() * 1000)}/registry.db", - cache_ttl_seconds=600, - ) - return Registry("project", registry_config, None) - - -@pytest.fixture -def minio_registry() -> Registry: - bucket_name = "test-bucket" - - container = MinioContainer() - container.start() - client = container.get_client() - client.make_bucket(bucket_name) - - container_host = container.get_container_host_ip() - exposed_port = container.get_exposed_port(container.port) - - registry_config = RegistryConfig( - path=f"s3://{bucket_name}/registry.db", cache_ttl_seconds=600 - ) - - mock_environ = { - "FEAST_S3_ENDPOINT_URL": f"http://{container_host}:{exposed_port}", - "AWS_ACCESS_KEY_ID": container.access_key, - "AWS_SECRET_ACCESS_KEY": container.secret_key, - "AWS_SESSION_TOKEN": "", - } - - with mock.patch.dict(os.environ, mock_environ): - yield Registry("project", registry_config, None) - - container.stop() - - -@pytest.mark.integration -@pytest.mark.parametrize( - "test_registry", - [ - lazy_fixture("gcs_registry"), - lazy_fixture("s3_registry"), - lazy_fixture("minio_registry"), - ], -) -def test_apply_entity_integration(test_registry): - entity = Entity( - name="driver_car_id", - description="Car driver id", - tags={"team": "matchmaking"}, - ) - - project = "project" - - # Register Entity - test_registry.apply_entity(entity, project) - - entities = test_registry.list_entities(project) - - entity = entities[0] - assert ( - len(entities) == 1 - and entity.name == "driver_car_id" - and entity.description == "Car driver id" - and "team" in entity.tags - and entity.tags["team"] == "matchmaking" - ) - - entity = test_registry.get_entity("driver_car_id", project) - assert ( - entity.name == "driver_car_id" - and entity.description == "Car driver id" - and "team" in entity.tags - and entity.tags["team"] == "matchmaking" - ) - - test_registry.teardown() - - # Will try to reload registry, which will fail because the file has been deleted - with pytest.raises(FileNotFoundError): - test_registry._get_registry_proto(project=project) - - -@pytest.mark.integration -@pytest.mark.parametrize( - "test_registry", - [ - lazy_fixture("gcs_registry"), - lazy_fixture("s3_registry"), - lazy_fixture("minio_registry"), - ], -) -def test_apply_feature_view_integration(test_registry): - # Create Feature Views - batch_source = FileSource( - file_format=ParquetFormat(), - path="file://feast/*", - timestamp_field="ts_col", - created_timestamp_column="timestamp", - ) - - entity = Entity(name="fs1_my_entity_1", join_keys=["test"]) - - fv1 = FeatureView( - name="my_feature_view_1", - schema=[ - Field(name="fs1_my_feature_1", dtype=Int64), - Field(name="fs1_my_feature_2", dtype=String), - Field(name="fs1_my_feature_3", dtype=Array(String)), - Field(name="fs1_my_feature_4", dtype=Array(Bytes)), - ], - entities=[entity], - tags={"team": "matchmaking"}, - source=batch_source, - ttl=timedelta(minutes=5), - ) - - project = "project" - - # Register Feature View - test_registry.apply_feature_view(fv1, project) - - feature_views = test_registry.list_feature_views(project) - - # List Feature Views - assert ( - len(feature_views) == 1 - and feature_views[0].name == "my_feature_view_1" - and feature_views[0].features[0].name == "fs1_my_feature_1" - and feature_views[0].features[0].dtype == Int64 - and feature_views[0].features[1].name == "fs1_my_feature_2" - and feature_views[0].features[1].dtype == String - and feature_views[0].features[2].name == "fs1_my_feature_3" - and feature_views[0].features[2].dtype == Array(String) - and feature_views[0].features[3].name == "fs1_my_feature_4" - and feature_views[0].features[3].dtype == Array(Bytes) - and feature_views[0].entities[0] == "fs1_my_entity_1" - ) - - feature_view = test_registry.get_feature_view("my_feature_view_1", project) - assert ( - feature_view.name == "my_feature_view_1" - and feature_view.features[0].name == "fs1_my_feature_1" - and feature_view.features[0].dtype == Int64 - and feature_view.features[1].name == "fs1_my_feature_2" - and feature_view.features[1].dtype == String - and feature_view.features[2].name == "fs1_my_feature_3" - and feature_view.features[2].dtype == Array(String) - and feature_view.features[3].name == "fs1_my_feature_4" - and feature_view.features[3].dtype == Array(Bytes) - and feature_view.entities[0] == "fs1_my_entity_1" - ) - - test_registry.delete_feature_view("my_feature_view_1", project) - feature_views = test_registry.list_feature_views(project) - assert len(feature_views) == 0 - - test_registry.teardown() - - # Will try to reload registry, which will fail because the file has been deleted - with pytest.raises(FileNotFoundError): - test_registry._get_registry_proto(project=project) - - -@pytest.mark.integration -@pytest.mark.parametrize( - "test_registry", - [ - lazy_fixture("gcs_registry"), - lazy_fixture("s3_registry"), - lazy_fixture("minio_registry"), - ], -) -def test_apply_data_source_integration(test_registry: Registry): - validate_registry_data_source_apply(test_registry) diff --git a/sdk/python/tests/integration/registration/test_universal_cli.py b/sdk/python/tests/integration/registration/test_universal_cli.py index e7331a07894..fc90108d787 100644 --- a/sdk/python/tests/integration/registration/test_universal_cli.py +++ b/sdk/python/tests/integration/registration/test_universal_cli.py @@ -7,7 +7,9 @@ from assertpy import assertpy from feast.feature_store import FeatureStore -from tests.integration.feature_repos.repo_configuration import Environment +from tests.integration.feature_repos.universal.data_sources.file import ( + FileDataSourceCreator, +) from tests.utils.basic_read_write_test import basic_rw_test from tests.utils.cli_repo_creator import CliRunner, get_example_repo from tests.utils.e2e_test_validation import ( @@ -17,8 +19,7 @@ @pytest.mark.integration -@pytest.mark.universal_offline_stores -def test_universal_cli(environment: Environment): +def test_universal_cli(): project = f"test_universal_cli_{str(uuid.uuid4()).replace('-', '')[:8]}" runner = CliRunner() @@ -28,9 +29,9 @@ def test_universal_cli(environment: Environment): feature_store_yaml = make_feature_store_yaml( project, repo_path, - environment.data_source_creator, - environment.provider, - environment.online_store, + FileDataSourceCreator("project"), + "local", + {"type": "sqlite"}, ) repo_config = repo_path / "feature_store.yaml" @@ -73,13 +74,13 @@ def test_universal_cli(environment: Environment): cwd=repo_path, ) assertpy.assert_that(result.returncode).is_equal_to(0) - assertpy.assert_that(fs.list_feature_views()).is_length(4) + assertpy.assert_that(fs.list_feature_views()).is_length(5) result = runner.run( ["data-sources", "describe", "customer_profile_source"], cwd=repo_path, ) assertpy.assert_that(result.returncode).is_equal_to(0) - assertpy.assert_that(fs.list_data_sources()).is_length(4) + assertpy.assert_that(fs.list_data_sources()).is_length(5) # entity & feature view describe commands should fail when objects don't exist result = runner.run(["entities", "describe", "foo"], cwd=repo_path) @@ -115,8 +116,7 @@ def test_universal_cli(environment: Environment): @pytest.mark.integration -@pytest.mark.universal_offline_stores -def test_odfv_apply(environment) -> None: +def test_odfv_apply() -> None: project = f"test_odfv_apply{str(uuid.uuid4()).replace('-', '')[:8]}" runner = CliRunner() @@ -126,9 +126,9 @@ def test_odfv_apply(environment) -> None: feature_store_yaml = make_feature_store_yaml( project, repo_path, - environment.data_source_creator, - environment.provider, - environment.online_store, + FileDataSourceCreator("project"), + "local", + {"type": "sqlite"}, ) repo_config = repo_path / "feature_store.yaml" diff --git a/sdk/python/tests/unit/infra/test_local_registry.py b/sdk/python/tests/integration/registration/test_universal_registry.py similarity index 53% rename from sdk/python/tests/unit/infra/test_local_registry.py rename to sdk/python/tests/integration/registration/test_universal_registry.py index 73f5cd91a5d..65d07aca45c 100644 --- a/sdk/python/tests/unit/infra/test_local_registry.py +++ b/sdk/python/tests/integration/registration/test_universal_registry.py @@ -1,4 +1,4 @@ -# Copyright 2022 The Feast Authors +# Copyright 2021 The Feast Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,28 +11,42 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging +import os +import time from datetime import timedelta from tempfile import mkstemp +from unittest import mock +import grpc_testing import pandas as pd import pytest from pytest_lazyfixture import lazy_fixture +from testcontainers.core.container import DockerContainer +from testcontainers.core.waiting_utils import wait_for_logs +from testcontainers.minio import MinioContainer +from testcontainers.mysql import MySqlContainer -from feast import FileSource -from feast.aggregation import Aggregation +from feast import FileSource, RequestSource from feast.data_format import AvroFormat, ParquetFormat from feast.data_source import KafkaSource from feast.entity import Entity +from feast.errors import FeatureViewNotFoundException from feast.feature_view import FeatureView from feast.field import Field +from feast.infra.infra_object import Infra +from feast.infra.online_stores.sqlite import SqliteTable from feast.infra.registry.registry import Registry -from feast.on_demand_feature_view import RequestSource, on_demand_feature_view +from feast.infra.registry.remote import RemoteRegistry, RemoteRegistryConfig +from feast.infra.registry.sql import SqlRegistry +from feast.on_demand_feature_view import on_demand_feature_view +from feast.protos.feast.registry import RegistryServer_pb2, RegistryServer_pb2_grpc +from feast.registry_server import RegistryServer from feast.repo_config import RegistryConfig -from feast.stream_feature_view import StreamFeatureView +from feast.stream_feature_view import Aggregation, StreamFeatureView from feast.types import Array, Bytes, Float32, Int32, Int64, String from feast.value_type import ValueType from tests.integration.feature_repos.universal.entities import driver -from tests.utils.e2e_test_validation import validate_registry_data_source_apply @pytest.fixture @@ -42,10 +56,218 @@ def local_registry() -> Registry: return Registry("project", registry_config, None) -@pytest.mark.parametrize( - "test_registry", - [lazy_fixture("local_registry")], -) +@pytest.fixture +def gcs_registry() -> Registry: + from google.cloud import storage + + storage_client = storage.Client() + bucket_name = f"feast-registry-test-{int(time.time() * 1000)}" + bucket = storage_client.bucket(bucket_name) + bucket = storage_client.create_bucket(bucket) + bucket.add_lifecycle_delete_rule( + age=14 + ) # delete buckets automatically after 14 days + bucket.patch() + bucket.blob("registry.db") + registry_config = RegistryConfig( + path=f"gs://{bucket_name}/registry.db", cache_ttl_seconds=600 + ) + return Registry("project", registry_config, None) + + +@pytest.fixture +def s3_registry() -> Registry: + aws_registry_path = os.getenv( + "AWS_REGISTRY_PATH", "s3://feast-int-bucket/registries" + ) + registry_config = RegistryConfig( + path=f"{aws_registry_path}/{int(time.time() * 1000)}/registry.db", + cache_ttl_seconds=600, + ) + return Registry("project", registry_config, None) + + +@pytest.fixture(scope="session") +def minio_registry() -> Registry: + bucket_name = "test-bucket" + + container = MinioContainer() + container.start() + client = container.get_client() + client.make_bucket(bucket_name) + + container_host = container.get_container_host_ip() + exposed_port = container.get_exposed_port(container.port) + + registry_config = RegistryConfig( + path=f"s3://{bucket_name}/registry.db", cache_ttl_seconds=600 + ) + + mock_environ = { + "FEAST_S3_ENDPOINT_URL": f"http://{container_host}:{exposed_port}", + "AWS_ACCESS_KEY_ID": container.access_key, + "AWS_SECRET_ACCESS_KEY": container.secret_key, + "AWS_SESSION_TOKEN": "", + } + + with mock.patch.dict(os.environ, mock_environ): + yield Registry("project", registry_config, None) + + container.stop() + + +POSTGRES_USER = "test" +POSTGRES_PASSWORD = "test" +POSTGRES_DB = "test" + +logger = logging.getLogger(__name__) + + +@pytest.fixture(scope="session") +def pg_registry(): + container = ( + DockerContainer("postgres:latest") + .with_exposed_ports(5432) + .with_env("POSTGRES_USER", POSTGRES_USER) + .with_env("POSTGRES_PASSWORD", POSTGRES_PASSWORD) + .with_env("POSTGRES_DB", POSTGRES_DB) + ) + + container.start() + + log_string_to_wait_for = "database system is ready to accept connections" + waited = wait_for_logs( + container=container, + predicate=log_string_to_wait_for, + timeout=30, + interval=10, + ) + logger.info("Waited for %s seconds until postgres container was up", waited) + container_port = container.get_exposed_port(5432) + container_host = container.get_container_host_ip() + + registry_config = RegistryConfig( + registry_type="sql", + path=f"postgresql://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{container_host}:{container_port}/{POSTGRES_DB}", + sqlalchemy_config_kwargs={"echo": False, "pool_pre_ping": True}, + ) + + yield SqlRegistry(registry_config, "project", None) + + container.stop() + + +@pytest.fixture(scope="session") +def mysql_registry(): + container = MySqlContainer("mysql:latest") + container.start() + + # testing for the database to exist and ready to connect and start testing. + import sqlalchemy + + engine = sqlalchemy.create_engine( + container.get_connection_url(), pool_pre_ping=True + ) + engine.connect() + + registry_config = RegistryConfig( + registry_type="sql", + path=container.get_connection_url(), + sqlalchemy_config_kwargs={"echo": False, "pool_pre_ping": True}, + ) + + yield SqlRegistry(registry_config, "project", None) + + container.stop() + + +@pytest.fixture(scope="session") +def sqlite_registry(): + registry_config = RegistryConfig( + registry_type="sql", + path="sqlite://", + ) + + yield SqlRegistry(registry_config, "project", None) + + +class GrpcMockChannel: + def __init__(self, service, servicer): + self.service = service + self.test_server = grpc_testing.server_from_dictionary( + {service: servicer}, + grpc_testing.strict_real_time(), + ) + + def unary_unary( + self, method: str, request_serializer=None, response_deserializer=None + ): + method_name = method.split("/")[-1] + method_descriptor = self.service.methods_by_name[method_name] + + def handler(request): + rpc = self.test_server.invoke_unary_unary( + method_descriptor, (), request, None + ) + + response, trailing_metadata, code, details = rpc.termination() + return response + + return handler + + +@pytest.fixture +def mock_remote_registry(): + fd, registry_path = mkstemp() + registry_config = RegistryConfig(path=registry_path, cache_ttl_seconds=600) + proxied_registry = Registry("project", registry_config, None) + + registry = RemoteRegistry( + registry_config=RemoteRegistryConfig(path=""), project=None, repo_path=None + ) + mock_channel = GrpcMockChannel( + RegistryServer_pb2.DESCRIPTOR.services_by_name["RegistryServer"], + RegistryServer(registry=proxied_registry), + ) + registry.stub = RegistryServer_pb2_grpc.RegistryServerStub(mock_channel) + yield registry + + +if os.getenv("FEAST_IS_LOCAL_TEST", "False") == "False": + all_fixtures = [lazy_fixture("s3_registry"), lazy_fixture("gcs_registry")] +else: + all_fixtures = [ + lazy_fixture("local_registry"), + pytest.param( + lazy_fixture("minio_registry"), + marks=pytest.mark.xdist_group(name="minio_registry"), + ), + pytest.param( + lazy_fixture("pg_registry"), + marks=pytest.mark.xdist_group(name="pg_registry"), + ), + pytest.param( + lazy_fixture("mysql_registry"), + marks=pytest.mark.xdist_group(name="mysql_registry"), + ), + lazy_fixture("sqlite_registry"), + lazy_fixture("mock_remote_registry"), + ] + +sql_fixtures = [ + pytest.param( + lazy_fixture("pg_registry"), marks=pytest.mark.xdist_group(name="pg_registry") + ), + pytest.param( + lazy_fixture("mysql_registry"), + marks=pytest.mark.xdist_group(name="mysql_registry"), + ), + lazy_fixture("sqlite_registry"), +] + + +@pytest.mark.integration +@pytest.mark.parametrize("test_registry", all_fixtures) def test_apply_entity_success(test_registry): entity = Entity( name="driver_car_id", @@ -57,8 +279,14 @@ def test_apply_entity_success(test_registry): # Register Entity test_registry.apply_entity(entity, project) + project_metadata = test_registry.list_project_metadata(project=project) + assert len(project_metadata) == 1 + project_uuid = project_metadata[0].project_uuid + assert len(project_metadata[0].project_uuid) == 36 + assert_project_uuid(project, project_uuid, test_registry) entities = test_registry.list_entities(project) + assert_project_uuid(project, project_uuid, test_registry) entity = entities[0] assert ( @@ -77,20 +305,28 @@ def test_apply_entity_success(test_registry): and entity.tags["team"] == "matchmaking" ) + # After the first apply, the created_timestamp should be the same as the last_update_timestamp. + assert entity.created_timestamp == entity.last_updated_timestamp + test_registry.delete_entity("driver_car_id", project) + assert_project_uuid(project, project_uuid, test_registry) entities = test_registry.list_entities(project) + assert_project_uuid(project, project_uuid, test_registry) assert len(entities) == 0 test_registry.teardown() - # Will try to reload registry, which will fail because the file has been deleted - with pytest.raises(FileNotFoundError): - test_registry._get_registry_proto(project=project) + +def assert_project_uuid(project, project_uuid, test_registry): + project_metadata = test_registry.list_project_metadata(project=project) + assert len(project_metadata) == 1 + assert project_metadata[0].project_uuid == project_uuid +@pytest.mark.integration @pytest.mark.parametrize( "test_registry", - [lazy_fixture("local_registry")], + all_fixtures, ) def test_apply_feature_view_success(test_registry): # Create Feature Views @@ -153,21 +389,31 @@ def test_apply_feature_view_success(test_registry): and feature_view.features[3].dtype == Array(Bytes) and feature_view.entities[0] == "fs1_my_entity_1" ) + assert feature_view.ttl == timedelta(minutes=5) + + # After the first apply, the created_timestamp should be the same as the last_update_timestamp. + assert feature_view.created_timestamp == feature_view.last_updated_timestamp + + # Modify the feature view and apply again to test if diffing the online store table works + fv1.ttl = timedelta(minutes=6) + test_registry.apply_feature_view(fv1, project) + feature_views = test_registry.list_feature_views(project) + assert len(feature_views) == 1 + feature_view = test_registry.get_feature_view("my_feature_view_1", project) + assert feature_view.ttl == timedelta(minutes=6) + # Delete feature view test_registry.delete_feature_view("my_feature_view_1", project) feature_views = test_registry.list_feature_views(project) assert len(feature_views) == 0 test_registry.teardown() - # Will try to reload registry, which will fail because the file has been deleted - with pytest.raises(FileNotFoundError): - test_registry._get_registry_proto(project=project) - +@pytest.mark.integration @pytest.mark.parametrize( "test_registry", - [lazy_fixture("local_registry")], + sql_fixtures, ) def test_apply_on_demand_feature_view_success(test_registry): # Create Feature Views @@ -185,6 +431,7 @@ def test_apply_on_demand_feature_view_success(test_registry): entities=[driver()], ttl=timedelta(seconds=8640000000), schema=[ + Field(name="driver_id", dtype=Int64), Field(name="daily_miles_driven", dtype=Float32), Field(name="lat", dtype=Float32), Field(name="lon", dtype=Float32), @@ -207,9 +454,18 @@ def location_features_from_push(inputs: pd.DataFrame) -> pd.DataFrame: project = "project" + with pytest.raises(FeatureViewNotFoundException): + test_registry.get_user_metadata(project, location_features_from_push) + # Register Feature View test_registry.apply_feature_view(location_features_from_push, project) + assert not test_registry.get_user_metadata(project, location_features_from_push) + + b = "metadata".encode("utf-8") + test_registry.apply_user_metadata(project, location_features_from_push, b) + assert test_registry.get_user_metadata(project, location_features_from_push) == b + feature_views = test_registry.list_on_demand_feature_views(project) # List Feature Views @@ -235,84 +491,74 @@ def location_features_from_push(inputs: pd.DataFrame) -> pd.DataFrame: test_registry.teardown() - # Will try to reload registry, which will fail because the file has been deleted - with pytest.raises(FileNotFoundError): - test_registry._get_registry_proto(project=project) - +@pytest.mark.integration @pytest.mark.parametrize( "test_registry", - [lazy_fixture("local_registry")], + all_fixtures, ) -def test_apply_stream_feature_view_success(test_registry): +def test_apply_data_source(test_registry): # Create Feature Views - def simple_udf(x: int): - return x + 3 - - entity = Entity(name="driver_entity", join_keys=["test_key"]) - - stream_source = KafkaSource( - name="kafka", - timestamp_field="event_timestamp", - kafka_bootstrap_servers="", - message_format=AvroFormat(""), - topic="topic", - batch_source=FileSource(path="some path"), - watermark_delay_threshold=timedelta(days=1), + batch_source = FileSource( + name="test_source", + file_format=ParquetFormat(), + path="file://feast/*", + timestamp_field="ts_col", + created_timestamp_column="timestamp", ) - sfv = StreamFeatureView( - name="test kafka stream feature view", - entities=[entity], - ttl=timedelta(days=30), - owner="test@example.com", - online=True, - schema=[Field(name="dummy_field", dtype=Float32)], - description="desc", - aggregations=[ - Aggregation( - column="dummy_field", - function="max", - time_window=timedelta(days=1), - ), - Aggregation( - column="dummy_field2", - function="count", - time_window=timedelta(days=24), - ), + entity = Entity(name="fs1_my_entity_1", join_keys=["test"]) + + fv1 = FeatureView( + name="my_feature_view_1", + schema=[ + Field(name="test", dtype=Int64), + Field(name="fs1_my_feature_1", dtype=Int64), + Field(name="fs1_my_feature_2", dtype=String), + Field(name="fs1_my_feature_3", dtype=Array(String)), + Field(name="fs1_my_feature_4", dtype=Array(Bytes)), ], - timestamp_field="event_timestamp", - mode="spark", - source=stream_source, - udf=simple_udf, - tags={}, + entities=[entity], + tags={"team": "matchmaking"}, + source=batch_source, + ttl=timedelta(minutes=5), ) project = "project" - # Register Feature View - test_registry.apply_feature_view(sfv, project) - - stream_feature_views = test_registry.list_stream_feature_views(project) - - # List Feature Views - assert len(stream_feature_views) == 1 - assert stream_feature_views[0] == sfv - - test_registry.delete_feature_view("test kafka stream feature view", project) - stream_feature_views = test_registry.list_stream_feature_views(project) - assert len(stream_feature_views) == 0 + # Register data source and feature view + test_registry.apply_data_source(batch_source, project, commit=False) + test_registry.apply_feature_view(fv1, project, commit=True) + + registry_feature_views = test_registry.list_feature_views(project) + registry_data_sources = test_registry.list_data_sources(project) + assert len(registry_feature_views) == 1 + assert len(registry_data_sources) == 1 + registry_feature_view = registry_feature_views[0] + assert registry_feature_view.batch_source == batch_source + registry_data_source = registry_data_sources[0] + assert registry_data_source == batch_source + + # Check that change to batch source propagates + batch_source.timestamp_field = "new_ts_col" + test_registry.apply_data_source(batch_source, project, commit=False) + test_registry.apply_feature_view(fv1, project, commit=True) + registry_feature_views = test_registry.list_feature_views(project) + registry_data_sources = test_registry.list_data_sources(project) + assert len(registry_feature_views) == 1 + assert len(registry_data_sources) == 1 + registry_feature_view = registry_feature_views[0] + assert registry_feature_view.batch_source == batch_source + registry_batch_source = test_registry.list_data_sources(project)[0] + assert registry_batch_source == batch_source test_registry.teardown() - # Will try to reload registry, which will fail because the file has been deleted - with pytest.raises(FileNotFoundError): - test_registry._get_registry_proto(project=project) - +@pytest.mark.integration @pytest.mark.parametrize( "test_registry", - [lazy_fixture("local_registry")], + all_fixtures, ) def test_modify_feature_views_success(test_registry): # Create Feature Views @@ -431,19 +677,170 @@ def odfv1(feature_df: pd.DataFrame) -> pd.DataFrame: test_registry.teardown() - # Will try to reload registry, which will fail because the file has been deleted - with pytest.raises(FileNotFoundError): - test_registry._get_registry_proto(project=project) + +@pytest.mark.integration +@pytest.mark.parametrize( + "test_registry", + sql_fixtures, +) +def test_update_infra(test_registry): + # Create infra object + project = "project" + infra = test_registry.get_infra(project=project) + + assert len(infra.infra_objects) == 0 + + # Should run update infra successfully + test_registry.update_infra(infra, project) + + # Should run update infra successfully when adding + new_infra = Infra() + new_infra.infra_objects.append( + SqliteTable( + path="/tmp/my_path.db", + name="my_table", + ) + ) + test_registry.update_infra(new_infra, project) + infra = test_registry.get_infra(project=project) + assert len(infra.infra_objects) == 1 + + # Try again since second time, infra should be not-empty + test_registry.teardown() +@pytest.mark.integration @pytest.mark.parametrize( "test_registry", - [lazy_fixture("local_registry")], + sql_fixtures, ) -def test_apply_data_source(test_registry: Registry): - validate_registry_data_source_apply(test_registry) +def test_registry_cache(test_registry): + # Create Feature Views + batch_source = FileSource( + name="test_source", + file_format=ParquetFormat(), + path="file://feast/*", + timestamp_field="ts_col", + created_timestamp_column="timestamp", + ) + + entity = Entity(name="fs1_my_entity_1", join_keys=["test"]) + + fv1 = FeatureView( + name="my_feature_view_1", + schema=[ + Field(name="test", dtype=Int64), + Field(name="fs1_my_feature_1", dtype=Int64), + Field(name="fs1_my_feature_2", dtype=String), + Field(name="fs1_my_feature_3", dtype=Array(String)), + Field(name="fs1_my_feature_4", dtype=Array(Bytes)), + ], + entities=[entity], + tags={"team": "matchmaking"}, + source=batch_source, + ttl=timedelta(minutes=5), + ) + + project = "project" + + # Register data source and feature view + test_registry.apply_data_source(batch_source, project) + test_registry.apply_feature_view(fv1, project) + registry_feature_views_cached = test_registry.list_feature_views( + project, allow_cache=True + ) + registry_data_sources_cached = test_registry.list_data_sources( + project, allow_cache=True + ) + # Not refreshed cache, so cache miss + assert len(registry_feature_views_cached) == 0 + assert len(registry_data_sources_cached) == 0 + test_registry.refresh(project) + # Now objects exist + registry_feature_views_cached = test_registry.list_feature_views( + project, allow_cache=True + ) + registry_data_sources_cached = test_registry.list_data_sources( + project, allow_cache=True + ) + assert len(registry_feature_views_cached) == 1 + assert len(registry_data_sources_cached) == 1 + registry_feature_view = registry_feature_views_cached[0] + assert registry_feature_view.batch_source == batch_source + registry_data_source = registry_data_sources_cached[0] + assert registry_data_source == batch_source + + test_registry.teardown() + + +@pytest.mark.integration +@pytest.mark.parametrize( + "test_registry", + all_fixtures, +) +def test_apply_stream_feature_view_success(test_registry): + # Create Feature Views + def simple_udf(x: int): + return x + 3 + + entity = Entity(name="driver_entity", join_keys=["test_key"]) + + stream_source = KafkaSource( + name="kafka", + timestamp_field="event_timestamp", + kafka_bootstrap_servers="", + message_format=AvroFormat(""), + topic="topic", + batch_source=FileSource(path="some path"), + watermark_delay_threshold=timedelta(days=1), + ) + + sfv = StreamFeatureView( + name="test kafka stream feature view", + entities=[entity], + ttl=timedelta(days=30), + owner="test@example.com", + online=True, + schema=[Field(name="dummy_field", dtype=Float32)], + description="desc", + aggregations=[ + Aggregation( + column="dummy_field", + function="max", + time_window=timedelta(days=1), + ), + Aggregation( + column="dummy_field2", + function="count", + time_window=timedelta(days=24), + ), + ], + timestamp_field="event_timestamp", + mode="spark", + source=stream_source, + udf=simple_udf, + tags={}, + ) + + project = "project" + + # Register Feature View + test_registry.apply_feature_view(sfv, project) + + stream_feature_views = test_registry.list_stream_feature_views(project) + + # List Feature Views + assert len(stream_feature_views) == 1 + assert stream_feature_views[0] == sfv + + test_registry.delete_feature_view("test kafka stream feature view", project) + stream_feature_views = test_registry.list_stream_feature_views(project) + assert len(stream_feature_views) == 0 + + test_registry.teardown() +@pytest.mark.integration def test_commit(): fd, registry_path = mkstemp() registry_config = RegistryConfig(path=registry_path, cache_ttl_seconds=600) diff --git a/sdk/python/tests/unit/infra/offline_stores/test_offline_store.py b/sdk/python/tests/unit/infra/offline_stores/test_offline_store.py index 79a3a27b67a..fd50d376322 100644 --- a/sdk/python/tests/unit/infra/offline_stores/test_offline_store.py +++ b/sdk/python/tests/unit/infra/offline_stores/test_offline_store.py @@ -29,6 +29,10 @@ RedshiftOfflineStoreConfig, RedshiftRetrievalJob, ) +from feast.infra.offline_stores.remote import ( + RemoteOfflineStoreConfig, + RemoteRetrievalJob, +) from feast.infra.offline_stores.snowflake import ( SnowflakeOfflineStoreConfig, SnowflakeRetrievalJob, @@ -104,6 +108,7 @@ def metadata(self) -> Optional[RetrievalMetadata]: PostgreSQLRetrievalJob, SparkRetrievalJob, TrinoRetrievalJob, + RemoteRetrievalJob, ] ) def retrieval_job(request, environment): @@ -203,6 +208,35 @@ def retrieval_job(request, environment): config=environment.config, full_feature_names=False, ) + elif request.param is RemoteRetrievalJob: + offline_store_config = RemoteOfflineStoreConfig( + type="remote", + host="localhost", + port=0, + ) + environment.config._offline_store = offline_store_config + + entity_df = pd.DataFrame.from_dict( + { + "id": [1], + "event_timestamp": ["datetime"], + "val_to_add": [1], + } + ) + + return RemoteRetrievalJob( + client=MagicMock(), + api_parameters={ + "str": "str", + }, + api="api", + table=pyarrow.Table.from_pandas(entity_df), + entity_df=entity_df, + metadata=RetrievalMetadata( + features=["1", "2", "3", "4"], + keys=["1", "2", "3", "4"], + ), + ) else: return request.param() diff --git a/sdk/python/tests/unit/infra/registry/test_remote.py b/sdk/python/tests/unit/infra/registry/test_remote.py deleted file mode 100644 index 16c6f0abfb0..00000000000 --- a/sdk/python/tests/unit/infra/registry/test_remote.py +++ /dev/null @@ -1,69 +0,0 @@ -import assertpy -import grpc_testing -import pytest - -from feast import Entity, FeatureStore -from feast.infra.registry.remote import RemoteRegistry, RemoteRegistryConfig -from feast.protos.feast.registry import RegistryServer_pb2, RegistryServer_pb2_grpc -from feast.registry_server import RegistryServer - - -class GrpcMockChannel: - def __init__(self, service, servicer): - self.service = service - self.test_server = grpc_testing.server_from_dictionary( - {service: servicer}, - grpc_testing.strict_real_time(), - ) - - def unary_unary( - self, method: str, request_serializer=None, response_deserializer=None - ): - method_name = method.split("/")[-1] - method_descriptor = self.service.methods_by_name[method_name] - - def handler(request): - rpc = self.test_server.invoke_unary_unary( - method_descriptor, (), request, None - ) - - response, trailing_metadata, code, details = rpc.termination() - return response - - return handler - - -@pytest.fixture -def mock_remote_registry(environment): - store: FeatureStore = environment.feature_store - registry = RemoteRegistry( - registry_config=RemoteRegistryConfig(path=""), project=None, repo_path=None - ) - mock_channel = GrpcMockChannel( - RegistryServer_pb2.DESCRIPTOR.services_by_name["RegistryServer"], - RegistryServer(store=store), - ) - registry.stub = RegistryServer_pb2_grpc.RegistryServerStub(mock_channel) - return registry - - -def test_registry_server_get_entity(environment, mock_remote_registry): - store: FeatureStore = environment.feature_store - entity = Entity(name="driver", join_keys=["driver_id"]) - store.apply(entity) - - expected = store.get_entity(entity.name) - response_entity = mock_remote_registry.get_entity(entity.name, store.project) - - assertpy.assert_that(response_entity).is_equal_to(expected) - - -def test_registry_server_proto(environment, mock_remote_registry): - store: FeatureStore = environment.feature_store - entity = Entity(name="driver", join_keys=["driver_id"]) - store.apply(entity) - - expected = store.registry.proto() - response = mock_remote_registry.proto() - - assertpy.assert_that(response).is_equal_to(expected) diff --git a/sdk/python/tests/unit/online_store/__init__.py b/sdk/python/tests/unit/online_store/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/tests/unit/online_store/test_online_retrieval.py b/sdk/python/tests/unit/online_store/test_online_retrieval.py index 5368b1e11cd..13b220fbb97 100644 --- a/sdk/python/tests/unit/online_store/test_online_retrieval.py +++ b/sdk/python/tests/unit/online_store/test_online_retrieval.py @@ -1,20 +1,26 @@ import os +import platform +import sqlite3 +import sys import time from datetime import datetime +import numpy as np import pandas as pd import pytest +import sqlite_vec from pandas.testing import assert_frame_equal from feast import FeatureStore, RepoConfig from feast.errors import FeatureViewNotFoundException from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto +from feast.protos.feast.types.Value_pb2 import FloatList as FloatListProto from feast.protos.feast.types.Value_pb2 import Value as ValueProto from feast.repo_config import RegistryConfig from tests.utils.cli_repo_creator import CliRunner, get_example_repo -def test_online() -> None: +def test_get_online_features() -> None: """ Test reading from the online store in local mode. """ @@ -415,3 +421,140 @@ def test_online_to_df(): ] expected_df = pd.DataFrame({k: reversed(v) for (k, v) in df_dict.items()}) assert_frame_equal(result_df[ordered_column], expected_df) + + +@pytest.mark.skipif( + sys.version_info[0:2] != (3, 10) or platform.system() != "Darwin", + reason="Only works on Python 3.10 and MacOS", +) +def test_sqlite_get_online_documents() -> None: + """ + Test retrieving documents from the online store in local mode. + """ + n = 10 # number of samples - note: we'll actually double it + vector_length = 8 + runner = CliRunner() + with runner.local_repo( + get_example_repo("example_feature_repo_1.py"), "file" + ) as store: + store.config.online_store.vec_enabled = True + store.config.online_store.vector_len = vector_length + # Write some data to two tables + document_embeddings_fv = store.get_feature_view(name="document_embeddings") + + provider = store._get_provider() + + item_keys = [ + EntityKeyProto( + join_keys=["item_id"], entity_values=[ValueProto(int64_val=i)] + ) + for i in range(n) + ] + data = [] + for item_key in item_keys: + data.append( + ( + item_key, + { + "Embeddings": ValueProto( + float_list_val=FloatListProto( + val=np.random.random( + vector_length, + ) + ) + ) + }, + datetime.utcnow(), + datetime.utcnow(), + ) + ) + + provider.online_write_batch( + config=store.config, + table=document_embeddings_fv, + data=data, + progress=None, + ) + documents_df = pd.DataFrame( + { + "item_id": [str(i) for i in range(n)], + "Embeddings": [ + np.random.random( + vector_length, + ) + for i in range(n) + ], + "event_timestamp": [datetime.utcnow() for _ in range(n)], + } + ) + + store.write_to_online_store( + feature_view_name="document_embeddings", + df=documents_df, + ) + + document_table = store._provider._online_store._conn.execute( + "SELECT name FROM sqlite_master WHERE type='table' and name like '%_document_embeddings';" + ).fetchall() + assert len(document_table) == 1 + document_table_name = document_table[0][0] + record_count = len( + store._provider._online_store._conn.execute( + f"select * from {document_table_name}" + ).fetchall() + ) + assert record_count == len(data) + documents_df.shape[0] + + query_embedding = np.random.random( + vector_length, + ) + result = store.retrieve_online_documents( + feature="document_embeddings:Embeddings", query=query_embedding, top_k=3 + ).to_dict() + + assert "Embeddings" in result + assert "distance" in result + assert len(result["distance"]) == 3 + + +@pytest.mark.skipif( + sys.version_info[0:2] != (3, 10) or platform.system() != "Darwin", + reason="Only works on Python 3.10 and MacOS", +) +def test_sqlite_vec_import() -> None: + db = sqlite3.connect(":memory:") + db.enable_load_extension(True) + sqlite_vec.load(db) + + db.execute(""" + create virtual table vec_examples using vec0( + sample_embedding float[8] + ); + """) + + db.execute(""" + insert into vec_examples(rowid, sample_embedding) + values + (1, '[-0.200, 0.250, 0.341, -0.211, 0.645, 0.935, -0.316, -0.924]'), + (2, '[0.443, -0.501, 0.355, -0.771, 0.707, -0.708, -0.185, 0.362]'), + (3, '[0.716, -0.927, 0.134, 0.052, -0.669, 0.793, -0.634, -0.162]'), + (4, '[-0.710, 0.330, 0.656, 0.041, -0.990, 0.726, 0.385, -0.958]'); + """) + + sqlite_version, vec_version = db.execute( + "select sqlite_version(), vec_version()" + ).fetchone() + assert vec_version == "v0.0.1-alpha.10" + print(f"sqlite_version={sqlite_version}, vec_version={vec_version}") + + result = db.execute(""" + select + rowid, + distance + from vec_examples + where sample_embedding match '[0.890, 0.544, 0.825, 0.961, 0.358, 0.0196, 0.521, 0.175]' + order by distance + limit 2; + """).fetchall() + result = [(rowid, round(distance, 2)) for rowid, distance in result] + assert result == [(2, 2.39), (1, 2.39)] diff --git a/sdk/python/tests/unit/test_offline_server.py b/sdk/python/tests/unit/test_offline_server.py new file mode 100644 index 00000000000..5991e7450d1 --- /dev/null +++ b/sdk/python/tests/unit/test_offline_server.py @@ -0,0 +1,250 @@ +import os +import tempfile +from datetime import datetime, timedelta + +import assertpy +import pandas as pd +import pyarrow as pa +import pyarrow.flight as flight +import pytest + +from feast import FeatureStore +from feast.feature_logging import FeatureServiceLoggingSource +from feast.infra.offline_stores.remote import ( + RemoteOfflineStore, + RemoteOfflineStoreConfig, +) +from feast.offline_server import OfflineServer +from feast.repo_config import RepoConfig +from tests.utils.cli_repo_creator import CliRunner + +PROJECT_NAME = "test_remote_offline" + + +@pytest.fixture +def empty_offline_server(environment): + store = environment.feature_store + + location = "grpc+tcp://localhost:0" + return OfflineServer(store=store, location=location) + + +@pytest.fixture +def arrow_client(empty_offline_server): + return flight.FlightClient(f"grpc://localhost:{empty_offline_server.port}") + + +def test_offline_server_is_alive(environment, empty_offline_server, arrow_client): + server = empty_offline_server + client = arrow_client + + assertpy.assert_that(server).is_not_none + assertpy.assert_that(server.port).is_not_equal_to(0) + + actions = list(client.list_actions()) + flights = list(client.list_flights()) + + assertpy.assert_that(actions).is_equal_to( + [ + ( + "offline_write_batch", + "Writes the specified arrow table to the data source underlying the specified feature view.", + ), + ( + "write_logged_features", + "Writes logged features to a specified destination in the offline store.", + ), + ( + "persist", + "Synchronously executes the underlying query and persists the result in the same offline store at the " + "specified destination.", + ), + ] + ) + assertpy.assert_that(flights).is_empty() + + +def default_store(temp_dir): + runner = CliRunner() + result = runner.run(["init", PROJECT_NAME], cwd=temp_dir) + repo_path = os.path.join(temp_dir, PROJECT_NAME, "feature_repo") + assert result.returncode == 0 + + result = runner.run(["--chdir", repo_path, "apply"], cwd=temp_dir) + assert result.returncode == 0 + + fs = FeatureStore(repo_path=repo_path) + return fs + + +def remote_feature_store(offline_server): + offline_config = RemoteOfflineStoreConfig( + type="remote", host="0.0.0.0", port=offline_server.port + ) + + registry_path = os.path.join( + str(offline_server.store.repo_path), + offline_server.store.config.registry.path, + ) + store = FeatureStore( + config=RepoConfig( + project=PROJECT_NAME, + registry=registry_path, + provider="local", + offline_store=offline_config, + entity_key_serialization_version=2, + ) + ) + return store + + +def test_remote_offline_store_apis(): + with tempfile.TemporaryDirectory() as temp_dir: + store = default_store(str(temp_dir)) + location = "grpc+tcp://localhost:0" + server = OfflineServer(store=store, location=location) + + assertpy.assert_that(server).is_not_none + assertpy.assert_that(server.port).is_not_equal_to(0) + + fs = remote_feature_store(server) + + _test_get_historical_features_returns_data(fs) + _test_get_historical_features_returns_nan(fs) + _test_offline_write_batch(str(temp_dir), fs) + _test_write_logged_features(str(temp_dir), fs) + _test_pull_latest_from_table_or_query(str(temp_dir), fs) + _test_pull_all_from_table_or_query(str(temp_dir), fs) + + +def _test_get_historical_features_returns_data(fs: FeatureStore): + entity_df = pd.DataFrame.from_dict( + { + "driver_id": [1001, 1002, 1003], + "event_timestamp": [ + datetime(2021, 4, 12, 10, 59, 42), + datetime(2021, 4, 12, 8, 12, 10), + datetime(2021, 4, 12, 16, 40, 26), + ], + "label_driver_reported_satisfaction": [1, 5, 3], + "val_to_add": [1, 2, 3], + "val_to_add_2": [10, 20, 30], + } + ) + + features = [ + "driver_hourly_stats:conv_rate", + "driver_hourly_stats:acc_rate", + "driver_hourly_stats:avg_daily_trips", + "transformed_conv_rate:conv_rate_plus_val1", + "transformed_conv_rate:conv_rate_plus_val2", + ] + + training_df = fs.get_historical_features(entity_df, features).to_df() + + assertpy.assert_that(training_df).is_not_none() + assertpy.assert_that(len(training_df)).is_equal_to(3) + + for index, driver_id in enumerate(entity_df["driver_id"]): + assertpy.assert_that(training_df["driver_id"][index]).is_equal_to(driver_id) + for feature in features: + column_id = feature.split(":")[1] + value = training_df[column_id][index] + assertpy.assert_that(value).is_not_nan() + + +def _test_get_historical_features_returns_nan(fs: FeatureStore): + entity_df = pd.DataFrame.from_dict( + { + "driver_id": [1, 2, 3], + "event_timestamp": [ + datetime(2021, 4, 12, 10, 59, 42), + datetime(2021, 4, 12, 8, 12, 10), + datetime(2021, 4, 12, 16, 40, 26), + ], + "label_driver_reported_satisfaction": [1, 5, 3], + "val_to_add": [1, 2, 3], + "val_to_add_2": [10, 20, 30], + } + ) + + features = [ + "driver_hourly_stats:conv_rate", + "driver_hourly_stats:acc_rate", + "driver_hourly_stats:avg_daily_trips", + "transformed_conv_rate:conv_rate_plus_val1", + "transformed_conv_rate:conv_rate_plus_val2", + ] + + training_df = fs.get_historical_features(entity_df, features).to_df() + + assertpy.assert_that(training_df).is_not_none() + assertpy.assert_that(len(training_df)).is_equal_to(3) + + for index, driver_id in enumerate(entity_df["driver_id"]): + assertpy.assert_that(training_df["driver_id"][index]).is_equal_to(driver_id) + for feature in features: + column_id = feature.split(":")[1] + value = training_df[column_id][index] + assertpy.assert_that(value).is_nan() + + +def _test_offline_write_batch(temp_dir, fs: FeatureStore): + data_file = os.path.join( + temp_dir, fs.project, "feature_repo/data/driver_stats.parquet" + ) + data_df = pd.read_parquet(data_file) + feature_view = fs.get_feature_view("driver_hourly_stats") + + RemoteOfflineStore.offline_write_batch( + fs.config, feature_view, pa.Table.from_pandas(data_df), progress=None + ) + + +def _test_write_logged_features(temp_dir, fs: FeatureStore): + data_file = os.path.join( + temp_dir, fs.project, "feature_repo/data/driver_stats.parquet" + ) + data_df = pd.read_parquet(data_file) + feature_service = fs.get_feature_service("driver_activity_v1") + + RemoteOfflineStore.write_logged_features( + config=fs.config, + data=pa.Table.from_pandas(data_df), + source=FeatureServiceLoggingSource(feature_service, fs.config.project), + logging_config=feature_service.logging_config, + registry=fs.registry, + ) + + +def _test_pull_latest_from_table_or_query(temp_dir, fs: FeatureStore): + data_source = fs.get_data_source("driver_hourly_stats_source") + + end_date = datetime.now().replace(microsecond=0, second=0, minute=0) + start_date = end_date - timedelta(days=15) + RemoteOfflineStore.pull_latest_from_table_or_query( + config=fs.config, + data_source=data_source, + join_key_columns=[], + feature_name_columns=[], + timestamp_field="event_timestamp", + created_timestamp_column="created", + start_date=start_date, + end_date=end_date, + ).to_df() + + +def _test_pull_all_from_table_or_query(temp_dir, fs: FeatureStore): + data_source = fs.get_data_source("driver_hourly_stats_source") + + end_date = datetime.now().replace(microsecond=0, second=0, minute=0) + start_date = end_date - timedelta(days=15) + RemoteOfflineStore.pull_all_from_table_or_query( + config=fs.config, + data_source=data_source, + join_key_columns=[], + feature_name_columns=[], + timestamp_field="event_timestamp", + start_date=start_date, + end_date=end_date, + ).to_df() diff --git a/sdk/python/tests/unit/test_on_demand_python_transformation.py b/sdk/python/tests/unit/test_on_demand_python_transformation.py index ebe797ffdbf..72e9b53a101 100644 --- a/sdk/python/tests/unit/test_on_demand_python_transformation.py +++ b/sdk/python/tests/unit/test_on_demand_python_transformation.py @@ -159,6 +159,10 @@ def python_singleton_view(inputs: dict[str, Any]) -> dict[str, Any]: self.store.write_to_online_store( feature_view_name="driver_hourly_stats", df=driver_df ) + assert len(self.store.list_all_feature_views()) == 4 + assert len(self.store.list_feature_views()) == 1 + assert len(self.store.list_on_demand_feature_views()) == 3 + assert len(self.store.list_stream_feature_views()) == 0 def test_python_pandas_parity(self): entity_rows = [ diff --git a/sdk/python/tests/unit/test_registry_server.py b/sdk/python/tests/unit/test_registry_server.py deleted file mode 100644 index 734bbfe19b8..00000000000 --- a/sdk/python/tests/unit/test_registry_server.py +++ /dev/null @@ -1,60 +0,0 @@ -import assertpy -import grpc_testing -import pytest -from google.protobuf.empty_pb2 import Empty - -from feast import Entity, FeatureStore -from feast.protos.feast.registry import RegistryServer_pb2 -from feast.registry_server import RegistryServer - - -def call_registry_server(server, method: str, request=None): - service = RegistryServer_pb2.DESCRIPTOR.services_by_name["RegistryServer"] - rpc = server.invoke_unary_unary( - service.methods_by_name[method], (), request if request else Empty(), None - ) - - return rpc.termination() - - -@pytest.fixture -def registry_server(environment): - store: FeatureStore = environment.feature_store - - servicer = RegistryServer(store=store) - - return grpc_testing.server_from_dictionary( - {RegistryServer_pb2.DESCRIPTOR.services_by_name["RegistryServer"]: servicer}, - grpc_testing.strict_real_time(), - ) - - -def test_registry_server_get_entity(environment, registry_server): - store: FeatureStore = environment.feature_store - entity = Entity(name="driver", join_keys=["driver_id"]) - store.apply(entity) - - expected = store.get_entity(entity.name) - - get_entity_request = RegistryServer_pb2.GetEntityRequest( - name=entity.name, project=store.project, allow_cache=False - ) - response, trailing_metadata, code, details = call_registry_server( - registry_server, "GetEntity", get_entity_request - ) - response_entity = Entity.from_proto(response) - - assertpy.assert_that(response_entity).is_equal_to(expected) - - -def test_registry_server_proto(environment, registry_server): - store: FeatureStore = environment.feature_store - entity = Entity(name="driver", join_keys=["driver_id"]) - store.apply(entity) - - expected = store.registry.proto() - response, trailing_metadata, code, details = call_registry_server( - registry_server, "Proto" - ) - - assertpy.assert_that(response).is_equal_to(expected) diff --git a/sdk/python/tests/unit/test_sql_registry.py b/sdk/python/tests/unit/test_sql_registry.py deleted file mode 100644 index a1460663aeb..00000000000 --- a/sdk/python/tests/unit/test_sql_registry.py +++ /dev/null @@ -1,672 +0,0 @@ -# Copyright 2021 The Feast Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import logging -import os -import sys -from datetime import timedelta - -import pandas as pd -import pytest -from pytest_lazyfixture import lazy_fixture -from testcontainers.core.container import DockerContainer -from testcontainers.core.waiting_utils import wait_for_logs -from testcontainers.mysql import MySqlContainer - -from feast import FileSource, RequestSource -from feast.data_format import ParquetFormat -from feast.entity import Entity -from feast.errors import FeatureViewNotFoundException -from feast.feature_view import FeatureView -from feast.field import Field -from feast.infra.infra_object import Infra -from feast.infra.online_stores.sqlite import SqliteTable -from feast.infra.registry.sql import SqlRegistry -from feast.on_demand_feature_view import on_demand_feature_view -from feast.repo_config import RegistryConfig -from feast.types import Array, Bytes, Float32, Int32, Int64, String -from feast.value_type import ValueType -from tests.integration.feature_repos.universal.entities import driver - -POSTGRES_USER = "test" -POSTGRES_PASSWORD = "test" -POSTGRES_DB = "test" - -logger = logging.getLogger(__name__) - - -@pytest.fixture(scope="session") -def pg_registry(): - container = ( - DockerContainer("postgres:latest") - .with_exposed_ports(5432) - .with_env("POSTGRES_USER", POSTGRES_USER) - .with_env("POSTGRES_PASSWORD", POSTGRES_PASSWORD) - .with_env("POSTGRES_DB", POSTGRES_DB) - ) - - container.start() - - log_string_to_wait_for = "database system is ready to accept connections" - waited = wait_for_logs( - container=container, - predicate=log_string_to_wait_for, - timeout=30, - interval=10, - ) - logger.info("Waited for %s seconds until postgres container was up", waited) - container_port = container.get_exposed_port(5432) - container_host = container.get_container_host_ip() - - registry_config = RegistryConfig( - registry_type="sql", - path=f"postgresql://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{container_host}:{container_port}/{POSTGRES_DB}", - sqlalchemy_config_kwargs={"echo": False, "pool_pre_ping": True}, - ) - - yield SqlRegistry(registry_config, "project", None) - - container.stop() - - -@pytest.fixture(scope="session") -def mysql_registry(): - container = MySqlContainer("mysql:latest") - container.start() - - # testing for the database to exist and ready to connect and start testing. - import sqlalchemy - - engine = sqlalchemy.create_engine( - container.get_connection_url(), pool_pre_ping=True - ) - engine.connect() - - registry_config = RegistryConfig( - registry_type="sql", - path=container.get_connection_url(), - sqlalchemy_config_kwargs={"echo": False, "pool_pre_ping": True}, - ) - - yield SqlRegistry(registry_config, "project", None) - - container.stop() - - -@pytest.fixture(scope="session") -def sqlite_registry(): - registry_config = RegistryConfig( - registry_type="sql", - path="sqlite://", - ) - - yield SqlRegistry(registry_config, "project", None) - - -@pytest.mark.skipif( - sys.platform == "darwin" and "GITHUB_REF" in os.environ, - reason="does not run on mac github actions", -) -@pytest.mark.parametrize( - "sql_registry", - [ - lazy_fixture("mysql_registry"), - lazy_fixture("pg_registry"), - lazy_fixture("sqlite_registry"), - ], -) -def test_apply_entity_success(sql_registry): - entity = Entity( - name="driver_car_id", - description="Car driver id", - tags={"team": "matchmaking"}, - ) - - project = "project" - - # Register Entity - sql_registry.apply_entity(entity, project) - project_metadata = sql_registry.list_project_metadata(project=project) - assert len(project_metadata) == 1 - project_uuid = project_metadata[0].project_uuid - assert len(project_metadata[0].project_uuid) == 36 - assert_project_uuid(project, project_uuid, sql_registry) - - entities = sql_registry.list_entities(project) - assert_project_uuid(project, project_uuid, sql_registry) - - entity = entities[0] - assert ( - len(entities) == 1 - and entity.name == "driver_car_id" - and entity.description == "Car driver id" - and "team" in entity.tags - and entity.tags["team"] == "matchmaking" - ) - - entity = sql_registry.get_entity("driver_car_id", project) - assert ( - entity.name == "driver_car_id" - and entity.description == "Car driver id" - and "team" in entity.tags - and entity.tags["team"] == "matchmaking" - ) - - # After the first apply, the created_timestamp should be the same as the last_update_timestamp. - assert entity.created_timestamp == entity.last_updated_timestamp - - sql_registry.delete_entity("driver_car_id", project) - assert_project_uuid(project, project_uuid, sql_registry) - entities = sql_registry.list_entities(project) - assert_project_uuid(project, project_uuid, sql_registry) - assert len(entities) == 0 - - sql_registry.teardown() - - -def assert_project_uuid(project, project_uuid, sql_registry): - project_metadata = sql_registry.list_project_metadata(project=project) - assert len(project_metadata) == 1 - assert project_metadata[0].project_uuid == project_uuid - - -@pytest.mark.skipif( - sys.platform == "darwin" and "GITHUB_REF" in os.environ, - reason="does not run on mac github actions", -) -@pytest.mark.parametrize( - "sql_registry", - [ - lazy_fixture("mysql_registry"), - lazy_fixture("pg_registry"), - lazy_fixture("sqlite_registry"), - ], -) -def test_apply_feature_view_success(sql_registry): - # Create Feature Views - batch_source = FileSource( - file_format=ParquetFormat(), - path="file://feast/*", - timestamp_field="ts_col", - created_timestamp_column="timestamp", - ) - - entity = Entity(name="fs1_my_entity_1", join_keys=["test"]) - - fv1 = FeatureView( - name="my_feature_view_1", - schema=[ - Field(name="test", dtype=Int64), - Field(name="fs1_my_feature_1", dtype=Int64), - Field(name="fs1_my_feature_2", dtype=String), - Field(name="fs1_my_feature_3", dtype=Array(String)), - Field(name="fs1_my_feature_4", dtype=Array(Bytes)), - ], - entities=[entity], - tags={"team": "matchmaking"}, - source=batch_source, - ttl=timedelta(minutes=5), - ) - - project = "project" - - # Register Feature View - sql_registry.apply_feature_view(fv1, project) - - feature_views = sql_registry.list_feature_views(project) - - # List Feature Views - assert ( - len(feature_views) == 1 - and feature_views[0].name == "my_feature_view_1" - and feature_views[0].features[0].name == "fs1_my_feature_1" - and feature_views[0].features[0].dtype == Int64 - and feature_views[0].features[1].name == "fs1_my_feature_2" - and feature_views[0].features[1].dtype == String - and feature_views[0].features[2].name == "fs1_my_feature_3" - and feature_views[0].features[2].dtype == Array(String) - and feature_views[0].features[3].name == "fs1_my_feature_4" - and feature_views[0].features[3].dtype == Array(Bytes) - and feature_views[0].entities[0] == "fs1_my_entity_1" - ) - - feature_view = sql_registry.get_feature_view("my_feature_view_1", project) - assert ( - feature_view.name == "my_feature_view_1" - and feature_view.features[0].name == "fs1_my_feature_1" - and feature_view.features[0].dtype == Int64 - and feature_view.features[1].name == "fs1_my_feature_2" - and feature_view.features[1].dtype == String - and feature_view.features[2].name == "fs1_my_feature_3" - and feature_view.features[2].dtype == Array(String) - and feature_view.features[3].name == "fs1_my_feature_4" - and feature_view.features[3].dtype == Array(Bytes) - and feature_view.entities[0] == "fs1_my_entity_1" - ) - assert feature_view.ttl == timedelta(minutes=5) - - # After the first apply, the created_timestamp should be the same as the last_update_timestamp. - assert feature_view.created_timestamp == feature_view.last_updated_timestamp - - # Modify the feature view and apply again to test if diffing the online store table works - fv1.ttl = timedelta(minutes=6) - sql_registry.apply_feature_view(fv1, project) - feature_views = sql_registry.list_feature_views(project) - assert len(feature_views) == 1 - feature_view = sql_registry.get_feature_view("my_feature_view_1", project) - assert feature_view.ttl == timedelta(minutes=6) - - # Delete feature view - sql_registry.delete_feature_view("my_feature_view_1", project) - feature_views = sql_registry.list_feature_views(project) - assert len(feature_views) == 0 - - sql_registry.teardown() - - -@pytest.mark.skipif( - sys.platform == "darwin" and "GITHUB_REF" in os.environ, - reason="does not run on mac github actions", -) -@pytest.mark.parametrize( - "sql_registry", - [ - lazy_fixture("mysql_registry"), - lazy_fixture("pg_registry"), - lazy_fixture("sqlite_registry"), - ], -) -def test_apply_on_demand_feature_view_success(sql_registry): - # Create Feature Views - driver_stats = FileSource( - name="driver_stats_source", - path="data/driver_stats_lat_lon.parquet", - timestamp_field="event_timestamp", - created_timestamp_column="created", - description="A table describing the stats of a driver based on hourly logs", - owner="test2@gmail.com", - ) - - driver_daily_features_view = FeatureView( - name="driver_daily_features", - entities=[driver()], - ttl=timedelta(seconds=8640000000), - schema=[ - Field(name="driver_id", dtype=Int64), - Field(name="daily_miles_driven", dtype=Float32), - Field(name="lat", dtype=Float32), - Field(name="lon", dtype=Float32), - Field(name="string_feature", dtype=String), - ], - online=True, - source=driver_stats, - tags={"production": "True"}, - owner="test2@gmail.com", - ) - - @on_demand_feature_view( - sources=[driver_daily_features_view], - schema=[Field(name="first_char", dtype=String)], - ) - def location_features_from_push(inputs: pd.DataFrame) -> pd.DataFrame: - df = pd.DataFrame() - df["first_char"] = inputs["string_feature"].str[:1].astype("string") - return df - - project = "project" - - with pytest.raises(FeatureViewNotFoundException): - sql_registry.get_user_metadata(project, location_features_from_push) - - # Register Feature View - sql_registry.apply_feature_view(location_features_from_push, project) - - assert not sql_registry.get_user_metadata(project, location_features_from_push) - - b = "metadata".encode("utf-8") - sql_registry.apply_user_metadata(project, location_features_from_push, b) - assert sql_registry.get_user_metadata(project, location_features_from_push) == b - - feature_views = sql_registry.list_on_demand_feature_views(project) - - # List Feature Views - assert ( - len(feature_views) == 1 - and feature_views[0].name == "location_features_from_push" - and feature_views[0].features[0].name == "first_char" - and feature_views[0].features[0].dtype == String - ) - - feature_view = sql_registry.get_on_demand_feature_view( - "location_features_from_push", project - ) - assert ( - feature_view.name == "location_features_from_push" - and feature_view.features[0].name == "first_char" - and feature_view.features[0].dtype == String - ) - - sql_registry.delete_feature_view("location_features_from_push", project) - feature_views = sql_registry.list_on_demand_feature_views(project) - assert len(feature_views) == 0 - - sql_registry.teardown() - - -@pytest.mark.skipif( - sys.platform == "darwin" and "GITHUB_REF" in os.environ, - reason="does not run on mac github actions", -) -@pytest.mark.parametrize( - "sql_registry", - [ - lazy_fixture("mysql_registry"), - lazy_fixture("pg_registry"), - lazy_fixture("sqlite_registry"), - ], -) -def test_modify_feature_views_success(sql_registry): - # Create Feature Views - batch_source = FileSource( - file_format=ParquetFormat(), - path="file://feast/*", - timestamp_field="ts_col", - created_timestamp_column="timestamp", - ) - - request_source = RequestSource( - name="request_source", - schema=[Field(name="my_input_1", dtype=Int32)], - ) - - entity = Entity(name="fs1_my_entity_1", join_keys=["test"]) - - fv1 = FeatureView( - name="my_feature_view_1", - schema=[ - Field(name="test", dtype=Int64), - Field(name="fs1_my_feature_1", dtype=Int64), - ], - entities=[entity], - tags={"team": "matchmaking"}, - source=batch_source, - ttl=timedelta(minutes=5), - ) - - @on_demand_feature_view( - schema=[ - Field(name="odfv1_my_feature_1", dtype=String), - Field(name="odfv1_my_feature_2", dtype=Int32), - ], - sources=[request_source], - ) - def odfv1(feature_df: pd.DataFrame) -> pd.DataFrame: - data = pd.DataFrame() - data["odfv1_my_feature_1"] = feature_df["my_input_1"].astype("category") - data["odfv1_my_feature_2"] = feature_df["my_input_1"].astype("int32") - return data - - project = "project" - - # Register Feature Views - sql_registry.apply_feature_view(odfv1, project) - sql_registry.apply_feature_view(fv1, project) - - # Modify odfv by changing a single feature dtype - @on_demand_feature_view( - schema=[ - Field(name="odfv1_my_feature_1", dtype=Float32), - Field(name="odfv1_my_feature_2", dtype=Int32), - ], - sources=[request_source], - ) - def odfv1(feature_df: pd.DataFrame) -> pd.DataFrame: - data = pd.DataFrame() - data["odfv1_my_feature_1"] = feature_df["my_input_1"].astype("float") - data["odfv1_my_feature_2"] = feature_df["my_input_1"].astype("int32") - return data - - # Apply the modified odfv - sql_registry.apply_feature_view(odfv1, project) - - # Check odfv - on_demand_feature_views = sql_registry.list_on_demand_feature_views(project) - - assert ( - len(on_demand_feature_views) == 1 - and on_demand_feature_views[0].name == "odfv1" - and on_demand_feature_views[0].features[0].name == "odfv1_my_feature_1" - and on_demand_feature_views[0].features[0].dtype == Float32 - and on_demand_feature_views[0].features[1].name == "odfv1_my_feature_2" - and on_demand_feature_views[0].features[1].dtype == Int32 - ) - request_schema = on_demand_feature_views[0].get_request_data_schema() - assert ( - list(request_schema.keys())[0] == "my_input_1" - and list(request_schema.values())[0] == ValueType.INT32 - ) - - feature_view = sql_registry.get_on_demand_feature_view("odfv1", project) - assert ( - feature_view.name == "odfv1" - and feature_view.features[0].name == "odfv1_my_feature_1" - and feature_view.features[0].dtype == Float32 - and feature_view.features[1].name == "odfv1_my_feature_2" - and feature_view.features[1].dtype == Int32 - ) - request_schema = feature_view.get_request_data_schema() - assert ( - list(request_schema.keys())[0] == "my_input_1" - and list(request_schema.values())[0] == ValueType.INT32 - ) - - # Make sure fv1 is untouched - feature_views = sql_registry.list_feature_views(project) - - # List Feature Views - assert ( - len(feature_views) == 1 - and feature_views[0].name == "my_feature_view_1" - and feature_views[0].features[0].name == "fs1_my_feature_1" - and feature_views[0].features[0].dtype == Int64 - and feature_views[0].entities[0] == "fs1_my_entity_1" - ) - - feature_view = sql_registry.get_feature_view("my_feature_view_1", project) - assert ( - feature_view.name == "my_feature_view_1" - and feature_view.features[0].name == "fs1_my_feature_1" - and feature_view.features[0].dtype == Int64 - and feature_view.entities[0] == "fs1_my_entity_1" - ) - - sql_registry.teardown() - - -@pytest.mark.skipif( - sys.platform == "darwin" and "GITHUB_REF" in os.environ, - reason="does not run on mac github actions", -) -@pytest.mark.parametrize( - "sql_registry", - [ - lazy_fixture("mysql_registry"), - lazy_fixture("pg_registry"), - lazy_fixture("sqlite_registry"), - ], -) -def test_apply_data_source(sql_registry): - # Create Feature Views - batch_source = FileSource( - name="test_source", - file_format=ParquetFormat(), - path="file://feast/*", - timestamp_field="ts_col", - created_timestamp_column="timestamp", - ) - - entity = Entity(name="fs1_my_entity_1", join_keys=["test"]) - - fv1 = FeatureView( - name="my_feature_view_1", - schema=[ - Field(name="test", dtype=Int64), - Field(name="fs1_my_feature_1", dtype=Int64), - Field(name="fs1_my_feature_2", dtype=String), - Field(name="fs1_my_feature_3", dtype=Array(String)), - Field(name="fs1_my_feature_4", dtype=Array(Bytes)), - ], - entities=[entity], - tags={"team": "matchmaking"}, - source=batch_source, - ttl=timedelta(minutes=5), - ) - - project = "project" - - # Register data source and feature view - sql_registry.apply_data_source(batch_source, project, commit=False) - sql_registry.apply_feature_view(fv1, project, commit=True) - - registry_feature_views = sql_registry.list_feature_views(project) - registry_data_sources = sql_registry.list_data_sources(project) - assert len(registry_feature_views) == 1 - assert len(registry_data_sources) == 1 - registry_feature_view = registry_feature_views[0] - assert registry_feature_view.batch_source == batch_source - registry_data_source = registry_data_sources[0] - assert registry_data_source == batch_source - - # Check that change to batch source propagates - batch_source.timestamp_field = "new_ts_col" - sql_registry.apply_data_source(batch_source, project, commit=False) - sql_registry.apply_feature_view(fv1, project, commit=True) - registry_feature_views = sql_registry.list_feature_views(project) - registry_data_sources = sql_registry.list_data_sources(project) - assert len(registry_feature_views) == 1 - assert len(registry_data_sources) == 1 - registry_feature_view = registry_feature_views[0] - assert registry_feature_view.batch_source == batch_source - registry_batch_source = sql_registry.list_data_sources(project)[0] - assert registry_batch_source == batch_source - - sql_registry.teardown() - - -@pytest.mark.skipif( - sys.platform == "darwin" and "GITHUB_REF" in os.environ, - reason="does not run on mac github actions", -) -@pytest.mark.parametrize( - "sql_registry", - [ - lazy_fixture("mysql_registry"), - lazy_fixture("pg_registry"), - lazy_fixture("sqlite_registry"), - ], -) -def test_registry_cache(sql_registry): - # Create Feature Views - batch_source = FileSource( - name="test_source", - file_format=ParquetFormat(), - path="file://feast/*", - timestamp_field="ts_col", - created_timestamp_column="timestamp", - ) - - entity = Entity(name="fs1_my_entity_1", join_keys=["test"]) - - fv1 = FeatureView( - name="my_feature_view_1", - schema=[ - Field(name="test", dtype=Int64), - Field(name="fs1_my_feature_1", dtype=Int64), - Field(name="fs1_my_feature_2", dtype=String), - Field(name="fs1_my_feature_3", dtype=Array(String)), - Field(name="fs1_my_feature_4", dtype=Array(Bytes)), - ], - entities=[entity], - tags={"team": "matchmaking"}, - source=batch_source, - ttl=timedelta(minutes=5), - ) - - project = "project" - - # Register data source and feature view - sql_registry.apply_data_source(batch_source, project) - sql_registry.apply_feature_view(fv1, project) - registry_feature_views_cached = sql_registry.list_feature_views( - project, allow_cache=True - ) - registry_data_sources_cached = sql_registry.list_data_sources( - project, allow_cache=True - ) - # Not refreshed cache, so cache miss - assert len(registry_feature_views_cached) == 0 - assert len(registry_data_sources_cached) == 0 - sql_registry.refresh(project) - # Now objects exist - registry_feature_views_cached = sql_registry.list_feature_views( - project, allow_cache=True - ) - registry_data_sources_cached = sql_registry.list_data_sources( - project, allow_cache=True - ) - assert len(registry_feature_views_cached) == 1 - assert len(registry_data_sources_cached) == 1 - registry_feature_view = registry_feature_views_cached[0] - assert registry_feature_view.batch_source == batch_source - registry_data_source = registry_data_sources_cached[0] - assert registry_data_source == batch_source - - sql_registry.teardown() - - -@pytest.mark.skipif( - sys.platform == "darwin" and "GITHUB_REF" in os.environ, - reason="does not run on mac github actions", -) -@pytest.mark.parametrize( - "sql_registry", - [ - lazy_fixture("mysql_registry"), - lazy_fixture("pg_registry"), - lazy_fixture("sqlite_registry"), - ], -) -def test_update_infra(sql_registry): - # Create infra object - project = "project" - infra = sql_registry.get_infra(project=project) - - assert len(infra.infra_objects) == 0 - - # Should run update infra successfully - sql_registry.update_infra(infra, project) - - # Should run update infra successfully when adding - new_infra = Infra() - new_infra.infra_objects.append( - SqliteTable( - path="/tmp/my_path.db", - name="my_table", - ) - ) - sql_registry.update_infra(new_infra, project) - infra = sql_registry.get_infra(project=project) - assert len(infra.infra_objects) == 1 - - # Try again since second time, infra should be not-empty - sql_registry.teardown() diff --git a/sdk/python/tests/unit/test_type_map.py b/sdk/python/tests/unit/test_type_map.py index 87e5ef0548c..39e3e7dafa5 100644 --- a/sdk/python/tests/unit/test_type_map.py +++ b/sdk/python/tests/unit/test_type_map.py @@ -1,4 +1,5 @@ import numpy as np +import pandas as pd import pytest from feast.type_map import ( @@ -79,3 +80,10 @@ def test_python_values_to_proto_values_bytes_to_list(values, value_type, expecte def test_python_values_to_proto_values_bytes_to_list_not_supported(): with pytest.raises(TypeError): _ = python_values_to_proto_values([b"[]"], ValueType.BYTES_LIST) + + +def test_python_values_to_proto_values_int_list_with_null_not_supported(): + df = pd.DataFrame({"column": [1, 2, None]}) + arr = df["column"].to_numpy() + with pytest.raises(TypeError): + _ = python_values_to_proto_values(arr, ValueType.INT32_LIST) diff --git a/sdk/python/tests/utils/e2e_test_validation.py b/sdk/python/tests/utils/e2e_test_validation.py index 37e57558678..885798db109 100644 --- a/sdk/python/tests/utils/e2e_test_validation.py +++ b/sdk/python/tests/utils/e2e_test_validation.py @@ -6,16 +6,10 @@ from typing import Dict, List, Optional, Union import pandas as pd -import pytest import yaml from pytz import utc -from feast import FeatureStore, FeatureView, FileSource, RepoConfig -from feast.data_format import ParquetFormat -from feast.entity import Entity -from feast.field import Field -from feast.infra.registry.registry import Registry -from feast.types import Array, Bytes, Int64, String +from feast import FeatureStore, FeatureView, RepoConfig from tests.integration.feature_repos.integration_test_repo_config import ( IntegrationTestRepoConfig, ) @@ -182,7 +176,6 @@ def make_feature_store_yaml( online_store: Optional[Union[str, Dict]], ): offline_store_config = offline_creator.create_offline_store_config() - online_store = online_store config = RepoConfig( registry=str(Path(repo_dir_name) / "registry.db"), @@ -235,65 +228,3 @@ def make_feature_store_yaml( ), ] ) - - -def validate_registry_data_source_apply(test_registry: Registry): - # Create Feature Views - batch_source = FileSource( - name="test_source", - file_format=ParquetFormat(), - path="file://feast/*", - timestamp_field="ts_col", - created_timestamp_column="timestamp", - ) - - entity = Entity(name="fs1_my_entity_1", join_keys=["test"]) - - fv1 = FeatureView( - name="my_feature_view_1", - schema=[ - Field(name="test", dtype=Int64), - Field(name="fs1_my_feature_1", dtype=Int64), - Field(name="fs1_my_feature_2", dtype=String), - Field(name="fs1_my_feature_3", dtype=Array(String)), - Field(name="fs1_my_feature_4", dtype=Array(Bytes)), - ], - entities=[entity], - tags={"team": "matchmaking"}, - source=batch_source, - ttl=timedelta(minutes=5), - ) - - project = "project" - - # Register data source and feature view - test_registry.apply_data_source(batch_source, project, commit=False) - test_registry.apply_feature_view(fv1, project, commit=True) - - registry_feature_views = test_registry.list_feature_views(project) - registry_data_sources = test_registry.list_data_sources(project) - assert len(registry_feature_views) == 1 - assert len(registry_data_sources) == 1 - registry_feature_view = registry_feature_views[0] - assert registry_feature_view.batch_source == batch_source - registry_data_source = registry_data_sources[0] - assert registry_data_source == batch_source - - # Check that change to batch source propagates - batch_source.timestamp_field = "new_ts_col" - test_registry.apply_data_source(batch_source, project, commit=False) - test_registry.apply_feature_view(fv1, project, commit=True) - registry_feature_views = test_registry.list_feature_views(project) - registry_data_sources = test_registry.list_data_sources(project) - assert len(registry_feature_views) == 1 - assert len(registry_data_sources) == 1 - registry_feature_view = registry_feature_views[0] - assert registry_feature_view.batch_source == batch_source - registry_batch_source = test_registry.list_data_sources(project)[0] - assert registry_batch_source == batch_source - - test_registry.teardown() - - # Will try to reload registry, which will fail because the file has been deleted - with pytest.raises(FileNotFoundError): - test_registry._get_registry_proto(project=project) diff --git a/sdk/python/tests/utils/http_server.py b/sdk/python/tests/utils/http_server.py index 47c6cb8ac17..5bb6255d72e 100644 --- a/sdk/python/tests/utils/http_server.py +++ b/sdk/python/tests/utils/http_server.py @@ -3,9 +3,9 @@ def free_port(): - sock = socket.socket() - sock.bind(("", 0)) - return sock.getsockname()[1] + with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: + sock.bind(("", 0)) + return sock.getsockname()[1] def check_port_open(host, port) -> bool: diff --git a/setup.py b/setup.py index cdab69b6848..9b3d0e55e62 100644 --- a/setup.py +++ b/setup.py @@ -84,7 +84,7 @@ "hiredis>=2.0.0,<3", ] -AWS_REQUIRED = ["boto3>=1.17.0,<2", "docker>=5.0.2", "fsspec<=2024.1.0"] +AWS_REQUIRED = ["boto3>=1.17.0,<2", "docker>=5.0.2", "fsspec<=2024.1.0", "aiobotocore>2,<3"] KUBERNETES_REQUIRED = ["kubernetes<=20.13.0"] @@ -96,6 +96,9 @@ "pyspark>=3.0.0,<4", ] +SQLITE_VEC_REQUIRED = [ + "sqlite-vec==v0.0.1-alpha.10", +] TRINO_REQUIRED = ["trino>=0.305.0,<0.400.0", "regex"] POSTGRES_REQUIRED = [ @@ -214,6 +217,7 @@ + DUCKDB_REQUIRED + DELTA_REQUIRED + ELASTICSEARCH_REQUIRED + + SQLITE_VEC_REQUIRED ) DOCS_REQUIRED = CI_REQUIRED @@ -381,6 +385,7 @@ def run(self): "ikv": IKV_REQUIRED, "delta": DELTA_REQUIRED, "elasticsearch": ELASTICSEARCH_REQUIRED, + "sqlite_vec": SQLITE_VEC_REQUIRED, }, include_package_data=True, license="Apache", diff --git a/ui/package.json b/ui/package.json index ea69e571fb5..ec00624a823 100644 --- a/ui/package.json +++ b/ui/package.json @@ -1,6 +1,6 @@ { "name": "@feast-dev/feast-ui", - "version": "0.37.1", + "version": "0.38.0", "private": false, "files": [ "dist" diff --git a/ui/yarn.lock b/ui/yarn.lock index 9a4338a319b..89107de0b89 100644 --- a/ui/yarn.lock +++ b/ui/yarn.lock @@ -3604,11 +3604,11 @@ brace-expansion@^2.0.1: balanced-match "^1.0.0" braces@^3.0.1, braces@~3.0.2: - version "3.0.2" - resolved "https://registry.yarnpkg.com/braces/-/braces-3.0.2.tgz#3454e1a462ee8d599e236df336cd9ea4f8afe107" - integrity sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A== + version "3.0.3" + resolved "https://registry.yarnpkg.com/braces/-/braces-3.0.3.tgz#490332f40919452272d55a8480adc0c441358789" + integrity sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA== dependencies: - fill-range "^7.0.1" + fill-range "^7.1.1" broadcast-channel@^3.4.1: version "3.7.0" @@ -5616,10 +5616,10 @@ filesize@^8.0.6: resolved "https://registry.yarnpkg.com/filesize/-/filesize-8.0.7.tgz#695e70d80f4e47012c132d57a059e80c6b580bd8" integrity sha512-pjmC+bkIF8XI7fWaH8KxHcZL3DPybs1roSKP4rKDvy20tAWwIObE4+JIseG2byfGKhud5ZnM4YSGKBz7Sh0ndQ== -fill-range@^7.0.1: - version "7.0.1" - resolved "https://registry.yarnpkg.com/fill-range/-/fill-range-7.0.1.tgz#1919a6a7c75fe38b2c7c77e5198535da9acdda40" - integrity sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ== +fill-range@^7.1.1: + version "7.1.1" + resolved "https://registry.yarnpkg.com/fill-range/-/fill-range-7.1.1.tgz#44265d3cac07e3ea7dc247516380643754a05292" + integrity sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg== dependencies: to-regex-range "^5.0.1" From 0aeb0a684e50ee449b5b0c0dba90e4cc2e545540 Mon Sep 17 00:00:00 2001 From: Lokesh Rangineni Date: Thu, 13 Jun 2024 19:36:11 -0400 Subject: [PATCH 2/2] squashing last 61 commits. Merge remote-tracking branch 'fork/feature/adding-remote-onlinestore-rebase' into feature/adding-remote-onlinestore-rebase Signed-off-by: Lokesh Rangineni --- docs/SUMMARY.md | 1 + docs/reference/online-stores/README.md | 4 + docs/reference/online-stores/remote.md | 21 ++ .../feast/infra/online_stores/remote.py | 167 +++++++++++++ sdk/python/feast/repo_config.py | 1 + sdk/python/tests/conftest.py | 10 +- .../feature_repos/repo_configuration.py | 6 +- .../online_store/test_remote_online_store.py | 233 ++++++++++++++++++ .../tests/unit/online_store/__init__.py | 0 9 files changed, 433 insertions(+), 10 deletions(-) create mode 100644 docs/reference/online-stores/remote.md create mode 100644 sdk/python/feast/infra/online_stores/remote.py create mode 100644 sdk/python/tests/integration/online_store/test_remote_online_store.py create mode 100644 sdk/python/tests/unit/online_store/__init__.py diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index af6362da3ed..06c5edcc8b0 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -96,6 +96,7 @@ * [Datastore](reference/online-stores/datastore.md) * [DynamoDB](reference/online-stores/dynamodb.md) * [Bigtable](reference/online-stores/bigtable.md) + * [Remote](reference/online-stores/remote.md) * [PostgreSQL (contrib)](reference/online-stores/postgres.md) * [Cassandra + Astra DB (contrib)](reference/online-stores/cassandra.md) * [MySQL (contrib)](reference/online-stores/mysql.md) diff --git a/docs/reference/online-stores/README.md b/docs/reference/online-stores/README.md index 686e820f4e7..b5f4eb8de89 100644 --- a/docs/reference/online-stores/README.md +++ b/docs/reference/online-stores/README.md @@ -61,3 +61,7 @@ Please see [Online Store](../../getting-started/architecture-and-components/onli {% content-ref url="scylladb.md" %} [scylladb.md](scylladb.md) {% endcontent-ref %} + +{% content-ref url="remote.md" %} +[remote.md](remote.md) +{% endcontent-ref %} diff --git a/docs/reference/online-stores/remote.md b/docs/reference/online-stores/remote.md new file mode 100644 index 00000000000..c560fa6f223 --- /dev/null +++ b/docs/reference/online-stores/remote.md @@ -0,0 +1,21 @@ +# Remote online store + +## Description + +This remote online store will let you interact with remote feature server. At this moment this only supports the read operation. You can use this online store and able retrieve online features `store.get_online_features` from remote feature server. + +## Examples + +The registry is pointing to registry of remote feature store. If it is not accessible then should be configured to use remote registry. + +{% code title="feature_store.yaml" %} +```yaml +project: my-local-project + registry: /remote/data/registry.db + provider: local + online_store: + path: http://localhost:6566 + type: remote + entity_key_serialization_version: 2 +``` +{% endcode %} \ No newline at end of file diff --git a/sdk/python/feast/infra/online_stores/remote.py b/sdk/python/feast/infra/online_stores/remote.py new file mode 100644 index 00000000000..19e1b7d5159 --- /dev/null +++ b/sdk/python/feast/infra/online_stores/remote.py @@ -0,0 +1,167 @@ +# Copyright 2021 The Feast Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import logging +from datetime import datetime +from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple + +import requests +from pydantic import StrictStr + +from feast import Entity, FeatureView, RepoConfig +from feast.infra.online_stores.online_store import OnlineStore +from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto +from feast.protos.feast.types.Value_pb2 import Value as ValueProto +from feast.repo_config import FeastConfigBaseModel +from feast.type_map import python_values_to_proto_values +from feast.value_type import ValueType + +logger = logging.getLogger(__name__) + + +class RemoteOnlineStoreConfig(FeastConfigBaseModel): + """Remote Online store config for remote online store""" + + type: Literal["remote"] = "remote" + """Online store type selector""" + + path: StrictStr = "http://localhost:6566" + """ str: Path to metadata store. + If type is 'remote', then this is a URL for registry server """ + + +class RemoteOnlineStore(OnlineStore): + """ + remote online store implementation wrapper to communicate with feast online server. + """ + + def online_write_batch( + self, + config: RepoConfig, + table: FeatureView, + data: List[ + Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]] + ], + progress: Optional[Callable[[int], Any]], + ) -> None: + raise NotImplementedError + + def online_read( + self, + config: RepoConfig, + table: FeatureView, + entity_keys: List[EntityKeyProto], + requested_features: Optional[List[str]] = None, + ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: + assert isinstance(config.online_store, RemoteOnlineStoreConfig) + config.online_store.__class__ = RemoteOnlineStoreConfig + + req_body = self._construct_online_read_api_json_request( + entity_keys, table, requested_features + ) + response = requests.post( + f"{config.online_store.path}/get-online-features", data=req_body + ) + if response.status_code == 200: + logger.debug("Able to retrieve the online features from feature server.") + response_json = json.loads(response.text) + event_ts = self._get_event_ts(response_json) + # Iterating over results and converting the API results in column format to row format. + result_tuples: List[ + Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]] + ] = [] + for feature_value_index in range(len(entity_keys)): + feature_values_dict: Dict[str, ValueProto] = dict() + for index, feature_name in enumerate( + response_json["metadata"]["feature_names"] + ): + if ( + requested_features is not None + and feature_name in requested_features + ): + if ( + response_json["results"][index]["statuses"][ + feature_value_index + ] + == "PRESENT" + ): + message = python_values_to_proto_values( + [ + response_json["results"][index]["values"][ + feature_value_index + ] + ], + ValueType.UNKNOWN, + ) + feature_values_dict[feature_name] = message[0] + else: + feature_values_dict[feature_name] = ValueProto() + result_tuples.append((event_ts, feature_values_dict)) + return result_tuples + else: + error_msg = f"Unable to retrieve the online store data using feature server API. Error_code={response.status_code}, error_message={response.reason}" + logger.error(error_msg) + raise RuntimeError(error_msg) + + def _construct_online_read_api_json_request( + self, + entity_keys: List[EntityKeyProto], + table: FeatureView, + requested_features: Optional[List[str]] = None, + ) -> str: + api_requested_features = [] + if requested_features is not None: + for requested_feature in requested_features: + api_requested_features.append(f"{table.name}:{requested_feature}") + + entity_values = [] + entity_key = "" + for row in entity_keys: + entity_key = row.join_keys[0] + entity_values.append( + getattr(row.entity_values[0], row.entity_values[0].WhichOneof("val")) + ) + + req_body = json.dumps( + { + "features": api_requested_features, + "entities": {entity_key: entity_values}, + } + ) + return req_body + + def _get_event_ts(self, response_json) -> datetime: + event_ts = "" + if len(response_json["results"]) > 1: + event_ts = response_json["results"][1]["event_timestamps"][0] + return datetime.fromisoformat(event_ts.replace("Z", "+00:00")) + + def update( + self, + config: RepoConfig, + tables_to_delete: Sequence[FeatureView], + tables_to_keep: Sequence[FeatureView], + entities_to_delete: Sequence[Entity], + entities_to_keep: Sequence[Entity], + partial: bool, + ): + pass + + def teardown( + self, + config: RepoConfig, + tables: Sequence[FeatureView], + entities: Sequence[Entity], + ): + pass diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index b7c7b0a9d0d..d5b3160b566 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -64,6 +64,7 @@ "hazelcast": "feast.infra.online_stores.contrib.hazelcast_online_store.hazelcast_online_store.HazelcastOnlineStore", "ikv": "feast.infra.online_stores.contrib.ikv_online_store.ikv.IKVOnlineStore", "elasticsearch": "feast.infra.online_stores.contrib.elasticsearch.ElasticSearchOnlineStore", + "remote": "feast.infra.online_stores.remote.RemoteOnlineStore", } OFFLINE_STORE_CLASS_FOR_TYPE = { diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py index 775db8c388d..48f482f5428 100644 --- a/sdk/python/tests/conftest.py +++ b/sdk/python/tests/conftest.py @@ -32,8 +32,8 @@ create_basic_driver_dataset, create_document_dataset, ) -from tests.integration.feature_repos.integration_test_repo_config import ( # noqa: E402 - IntegrationTestRepoConfig, +from tests.integration.feature_repos.integration_test_repo_config import ( + IntegrationTestRepoConfig, # noqa: E402 ) from tests.integration.feature_repos.repo_configuration import ( # noqa: E402 AVAILABLE_OFFLINE_STORES, @@ -45,8 +45,8 @@ construct_universal_feature_views, construct_universal_test_data, ) -from tests.integration.feature_repos.universal.data_sources.file import ( # noqa: E402 - FileDataSourceCreator, +from tests.integration.feature_repos.universal.data_sources.file import ( + FileDataSourceCreator, # noqa: E402 ) from tests.integration.feature_repos.universal.entities import ( # noqa: E402 customer, @@ -173,7 +173,7 @@ def simple_dataset_2() -> pd.DataFrame: def start_test_local_server(repo_path: str, port: int): fs = FeatureStore(repo_path) - fs.serve("localhost", port, no_access_log=True) + fs.serve(host="localhost", port=port) @pytest.fixture diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py index be01a1e1ac6..7123bd0fc15 100644 --- a/sdk/python/tests/integration/feature_repos/repo_configuration.py +++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py @@ -136,9 +136,7 @@ AVAILABLE_ONLINE_STORES: Dict[ str, Tuple[Union[str, Dict[Any, Any]], Optional[Type[OnlineStoreCreator]]] -] = { - "sqlite": ({"type": "sqlite"}, None), -} +] = {"sqlite": ({"type": "sqlite"}, None)} # Only configure Cloud DWH if running full integration tests if os.getenv("FEAST_IS_LOCAL_TEST", "False") != "True": @@ -155,7 +153,6 @@ AVAILABLE_ONLINE_STORES["datastore"] = ("datastore", None) AVAILABLE_ONLINE_STORES["snowflake"] = (SNOWFLAKE_CONFIG, None) AVAILABLE_ONLINE_STORES["bigtable"] = (BIGTABLE_CONFIG, None) - # Uncomment to test using private Rockset account. Currently not enabled as # there is no dedicated Rockset instance for CI testing and there is no # containerized version of Rockset. @@ -489,7 +486,6 @@ def construct_test_environment( "arn:aws:iam::402087665549:role/lambda_execution_role", ), ) - else: feature_server = LocalFeatureServerConfig( feature_logging=FeatureLoggingConfig(enabled=True) diff --git a/sdk/python/tests/integration/online_store/test_remote_online_store.py b/sdk/python/tests/integration/online_store/test_remote_online_store.py new file mode 100644 index 00000000000..759a9c7a87b --- /dev/null +++ b/sdk/python/tests/integration/online_store/test_remote_online_store.py @@ -0,0 +1,233 @@ +import os +import subprocess +import tempfile +from datetime import datetime +from textwrap import dedent + +import pytest + +from feast.feature_store import FeatureStore +from feast.wait import wait_retry_backoff +from tests.utils.cli_repo_creator import CliRunner +from tests.utils.http_server import check_port_open, free_port + + +@pytest.mark.integration +def test_remote_online_store_read(): + with tempfile.TemporaryDirectory() as remote_server_tmp_dir, tempfile.TemporaryDirectory() as remote_client_tmp_dir: + server_store, server_url, registry_path = ( + _create_server_store_spin_feature_server(temp_dir=remote_server_tmp_dir) + ) + assert None not in (server_store, server_url, registry_path) + client_store = _create_remote_client_feature_store( + temp_dir=remote_client_tmp_dir, + server_registry_path=str(registry_path), + feature_server_url=server_url, + ) + assert client_store is not None + _assert_non_existing_entity_feature_views_entity( + client_store=client_store, server_store=server_store + ) + _assert_existing_feature_views_entity( + client_store=client_store, server_store=server_store + ) + _assert_non_existing_feature_views( + client_store=client_store, server_store=server_store + ) + + +def _assert_non_existing_entity_feature_views_entity( + client_store: FeatureStore, server_store: FeatureStore +): + features = [ + "driver_hourly_stats:conv_rate", + "driver_hourly_stats:acc_rate", + "driver_hourly_stats:avg_daily_trips", + ] + + entity_rows = [{"driver_id": 1234}] + _assert_client_server_online_stores_are_matching( + client_store=client_store, + server_store=server_store, + features=features, + entity_rows=entity_rows, + ) + + +def _assert_non_existing_feature_views( + client_store: FeatureStore, server_store: FeatureStore +): + features = [ + "driver_hourly_stats1:conv_rate", + "driver_hourly_stats1:acc_rate", + "driver_hourly_stats:avg_daily_trips", + ] + + entity_rows = [{"driver_id": 1001}, {"driver_id": 1002}] + + with pytest.raises( + Exception, match="Feature view driver_hourly_stats1 does not exist" + ): + client_store.get_online_features( + features=features, entity_rows=entity_rows + ).to_dict() + + with pytest.raises( + Exception, match="Feature view driver_hourly_stats1 does not exist" + ): + server_store.get_online_features( + features=features, entity_rows=entity_rows + ).to_dict() + + +def _assert_existing_feature_views_entity( + client_store: FeatureStore, server_store: FeatureStore +): + features = [ + "driver_hourly_stats:conv_rate", + "driver_hourly_stats:acc_rate", + "driver_hourly_stats:avg_daily_trips", + ] + + entity_rows = [{"driver_id": 1001}, {"driver_id": 1002}] + _assert_client_server_online_stores_are_matching( + client_store=client_store, + server_store=server_store, + features=features, + entity_rows=entity_rows, + ) + + features = ["driver_hourly_stats:conv_rate"] + _assert_client_server_online_stores_are_matching( + client_store=client_store, + server_store=server_store, + features=features, + entity_rows=entity_rows, + ) + + +def _assert_client_server_online_stores_are_matching( + client_store: FeatureStore, + server_store: FeatureStore, + features: list[str], + entity_rows: list, +): + online_features_from_client = client_store.get_online_features( + features=features, entity_rows=entity_rows + ).to_dict() + + assert online_features_from_client is not None + + online_features_from_server = server_store.get_online_features( + features=features, entity_rows=entity_rows + ).to_dict() + + assert online_features_from_server is not None + assert online_features_from_client is not None + assert online_features_from_client == online_features_from_server + + +def _create_server_store_spin_feature_server(temp_dir): + feast_server_port = free_port() + store = _default_store(str(temp_dir), "REMOTE_ONLINE_SERVER_PROJECT") + server_url = next( + _start_feature_server( + repo_path=str(store.repo_path), server_port=feast_server_port + ) + ) + print(f"Server started successfully, {server_url}") + return store, server_url, os.path.join(store.repo_path, "data", "registry.db") + + +def _default_store(temp_dir, project_name) -> FeatureStore: + runner = CliRunner() + result = runner.run(["init", project_name], cwd=temp_dir) + repo_path = os.path.join(temp_dir, project_name, "feature_repo") + assert result.returncode == 0 + + result = runner.run(["--chdir", repo_path, "apply"], cwd=temp_dir) + assert result.returncode == 0 + + fs = FeatureStore(repo_path=repo_path) + fs.materialize_incremental( + end_date=datetime.utcnow(), feature_views=["driver_hourly_stats"] + ) + return fs + + +def _create_remote_client_feature_store( + temp_dir, server_registry_path: str, feature_server_url: str +) -> FeatureStore: + project_name = "REMOTE_ONLINE_CLIENT_PROJECT" + runner = CliRunner() + result = runner.run(["init", project_name], cwd=temp_dir) + assert result.returncode == 0 + repo_path = os.path.join(temp_dir, project_name, "feature_repo") + _overwrite_remote_client_feature_store_yaml( + repo_path=str(repo_path), + registry_path=server_registry_path, + feature_server_url=feature_server_url, + ) + + result = runner.run(["--chdir", repo_path, "apply"], cwd=temp_dir) + assert result.returncode == 0 + + return FeatureStore(repo_path=repo_path) + + +def _overwrite_remote_client_feature_store_yaml( + repo_path: str, registry_path: str, feature_server_url: str +): + repo_config = os.path.join(repo_path, "feature_store.yaml") + with open(repo_config, "w") as repo_config: + repo_config.write( + dedent( + f""" + project: REMOTE_ONLINE_CLIENT_PROJECT + registry: {registry_path} + provider: local + online_store: + path: {feature_server_url} + type: remote + entity_key_serialization_version: 2 + """ + ) + ) + + +def _start_feature_server(repo_path: str, server_port: int): + host = "0.0.0.0" + cmd = [ + "feast", + "-c" + repo_path, + "serve", + "--host", + host, + "--port", + str(server_port), + ] + feast_server_process = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL + ) + _time_out_sec: int = 60 + # Wait for server to start + wait_retry_backoff( + lambda: (None, check_port_open(host, server_port)), + timeout_secs=_time_out_sec, + timeout_msg=f"Unable to start the feast server in {_time_out_sec} seconds for remote online store type, port={server_port}", + ) + + yield f"http://localhost:{server_port}" + + if feast_server_process is not None: + feast_server_process.kill() + + # wait server to free the port + wait_retry_backoff( + lambda: ( + None, + not check_port_open("localhost", server_port), + ), + timeout_msg=f"Unable to stop the feast server in {_time_out_sec} seconds for remote online store type, port={server_port}", + timeout_secs=_time_out_sec, + ) diff --git a/sdk/python/tests/unit/online_store/__init__.py b/sdk/python/tests/unit/online_store/__init__.py new file mode 100644 index 00000000000..e69de29bb2d