From e3aa27448395d42778cfd519458f3641ac897f76 Mon Sep 17 00:00:00 2001 From: iamcodingcat Date: Sat, 21 Jun 2025 18:24:10 +0900 Subject: [PATCH 01/13] feat: integrate dynamodb as online store in Golang api server Signed-off-by: iamcodingcat --- .../feast/onlinestore/dynamodbonlinestore.go | 240 ++++++++++++++++++ go/internal/feast/onlinestore/onlinestore.go | 3 + 2 files changed, 243 insertions(+) create mode 100644 go/internal/feast/onlinestore/dynamodbonlinestore.go diff --git a/go/internal/feast/onlinestore/dynamodbonlinestore.go b/go/internal/feast/onlinestore/dynamodbonlinestore.go new file mode 100644 index 00000000000..f01f9467f91 --- /dev/null +++ b/go/internal/feast/onlinestore/dynamodbonlinestore.go @@ -0,0 +1,240 @@ +package onlinestore + +import ( + "context" + "encoding/hex" + "fmt" + awsConfig "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/service/dynamodb" + dtypes "github.com/aws/aws-sdk-go-v2/service/dynamodb/types" + "github.com/feast-dev/feast/go/internal/feast/registry" + "github.com/feast-dev/feast/go/protos/feast/serving" + "github.com/feast-dev/feast/go/protos/feast/types" + "github.com/roberson-io/mmh3" + "golang.org/x/sync/errgroup" + "golang.org/x/sync/semaphore" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/timestamppb" + "runtime" + "sync" + "time" +) + +type batchResult struct { + index int + response *dynamodb.BatchGetItemOutput + err error +} + +type DynamodbOnlineStore struct { + // Feast project name + // TODO: Should we remove project as state that is tracked at the store level? + project string + + client *dynamodb.Client + + config *registry.RepoConfig + + // dynamodb configuration + consistentRead *bool + batchSize *int +} + +func NewDynamodbOnlineStore(project string, config *registry.RepoConfig, onlineStoreConfig map[string]interface{}) (*DynamodbOnlineStore, error) { + store := DynamodbOnlineStore{ + project: project, + config: config, + } + + // aws configuration + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + cfg, err := awsConfig.LoadDefaultConfig(ctx) + if err != nil { + panic(err) + } + store.client = dynamodb.NewFromConfig(cfg) + + // dynamodb configuration + consistentRead, ok := onlineStoreConfig["consistent_reads"].(bool) + if !ok { + consistentRead = false + } + store.consistentRead = &consistentRead + + var batchSize int + if batchSizeFloat, ok := onlineStoreConfig["batch_size"].(float64); ok { + batchSize = int(batchSizeFloat) + } else { + batchSize = 40 + } + store.batchSize = &batchSize + + return &store, nil +} + +func (d *DynamodbOnlineStore) OnlineRead(ctx context.Context, entityKeys []*types.EntityKey, featureViewNames []string, featureNames []string) ([][]FeatureData, error) { + // prevent resource waste in case context is canceled earlier + if ctx.Err() != nil { + return nil, ctx.Err() + } + + results := make([][]FeatureData, len(entityKeys)) + + // serialize entity key into entity hash id + entityIndexMap := make(map[string]int) + entityIds := make([]string, 0, len(entityKeys)) + unprocessedEntityIds := make(map[string]bool) + for i, entityKey := range entityKeys { + serKey, err := serializeEntityKey(entityKey, d.config.EntityKeySerializationVersion) + if err != nil { + return nil, err + } + entityId := hex.EncodeToString(mmh3.Hashx64_128(*serKey, 0)) + entityIds = append(entityIds, entityId) + entityIndexMap[entityId] = i + unprocessedEntityIds[entityId] = true + } + + // metadata from feature views, feature names + featureMap, featureNamesIndex, err := makeFeatureMeta(featureViewNames, featureNames) + if err != nil { + return nil, err + } + + // initialize `FeatureData` slice + featureCount := len(featureNamesIndex) + for i := 0; i < len(results); i++ { + results[i] = make([]FeatureData, featureCount) + } + + // controls the maximum number of concurrent goroutines sending requests to DynamoDB using a semaphore + cpuCount := runtime.NumCPU() + sem := semaphore.NewWeighted(int64(cpuCount * 2)) + + var mu sync.Mutex + for featureViewName, featureNames := range featureMap { + tableName := fmt.Sprintf("%s.%s", d.project, featureViewName) + + var batchGetItemInputs []*dynamodb.BatchGetItemInput + batchSize := *d.batchSize + for i := 0; i < len(entityIds); i += batchSize { + end := i + batchSize + if end > len(entityIds) { + end = len(entityIds) + } + batchEntityIds := entityIds[i:end] + entityIdBatch := make([]map[string]dtypes.AttributeValue, len(batchEntityIds)) + for i, entityId := range batchEntityIds { + entityIdBatch[i] = map[string]dtypes.AttributeValue{ + "entity_id": &dtypes.AttributeValueMemberS{Value: entityId}, + } + } + batchGetItemInput := &dynamodb.BatchGetItemInput{ + RequestItems: map[string]dtypes.KeysAndAttributes{ + tableName: { + Keys: entityIdBatch, + ConsistentRead: d.consistentRead, + }, + }, + } + batchGetItemInputs = append(batchGetItemInputs, batchGetItemInput) + } + + // goroutines sending requests to DynamoDB + errGroup, ctx := errgroup.WithContext(ctx) + for i, batchGetItemInput := range batchGetItemInputs { + _, batchGetItemInput := i, batchGetItemInput + errGroup.Go(func() error { + if err := sem.Acquire(ctx, 1); err != nil { + return err + } + defer sem.Release(1) + + resp, err := d.client.BatchGetItem(ctx, batchGetItemInput) + if err != nil { + return err + } + + // in case there is no entity id of a feature view in dynamodb + batchSize := len(resp.Responses[tableName]) + if batchSize == 0 { + return nil + } + + // process response from dynamodb + for j := 0; j < batchSize; j++ { + entityId := resp.Responses[tableName][j]["entity_id"].(*dtypes.AttributeValueMemberS).Value + timestampString := resp.Responses[tableName][j]["event_ts"].(*dtypes.AttributeValueMemberS).Value + t, err := time.Parse("2006-01-02 15:04:05-07:00", timestampString) + if err != nil { + return err + } + timeStamp := timestamppb.New(t) + + featureValues := resp.Responses[tableName][j]["values"].(*dtypes.AttributeValueMemberM).Value + entityIndex := entityIndexMap[entityId] + + for _, featureName := range featureNames { + featureValue := featureValues[featureName].(*dtypes.AttributeValueMemberB).Value + var value types.Value + if err := proto.Unmarshal(featureValue, &value); err != nil { + return err + } + featureIndex := featureNamesIndex[featureName] + + mu.Lock() + results[entityIndex][featureIndex] = FeatureData{Reference: serving.FeatureReferenceV2{FeatureViewName: featureViewName, FeatureName: featureName}, + Timestamp: timestamppb.Timestamp{Seconds: timeStamp.Seconds, Nanos: timeStamp.Nanos}, + Value: types.Value{Val: value.Val}, + } + mu.Unlock() + } + + mu.Lock() + delete(unprocessedEntityIds, entityId) + mu.Unlock() + } + return nil + }) + } + if err := errGroup.Wait(); err != nil { + return nil, err + } + + // process null imputation for entity ids that don't exist in dynamodb + currentTime := timestamppb.Now() // TODO: should use a different timestamp? + for entityId, _ := range unprocessedEntityIds { + entityIndex := entityIndexMap[entityId] + for _, featureName := range featureNames { + featureIndex := featureNamesIndex[featureName] + results[entityIndex][featureIndex] = FeatureData{Reference: serving.FeatureReferenceV2{FeatureViewName: featureViewName, FeatureName: featureName}, + Timestamp: timestamppb.Timestamp{Seconds: currentTime.Seconds, Nanos: currentTime.Nanos}, + Value: types.Value{Val: &types.Value_NullVal{NullVal: types.Null_NULL}}, + } + } + } + } + + return results, nil +} + +func (d *DynamodbOnlineStore) Destruct() { + +} + +func makeFeatureMeta(featureViewNames []string, featureNames []string) (map[string][]string, map[string]int, error) { + if len(featureViewNames) != len(featureNames) { + return nil, nil, fmt.Errorf("the lengths of featureViewNames and featureNames must be the same. got=%d, %d", len(featureViewNames), len(featureNames)) + } + featureMap := make(map[string][]string) + featureNamesIndex := make(map[string]int) + for i := 0; i < len(featureViewNames); i++ { + featureViewName := featureViewNames[i] + featureName := featureNames[i] + + featureMap[featureViewName] = append(featureMap[featureViewName], featureName) + featureNamesIndex[featureName] = i + } + return featureMap, featureNamesIndex, nil +} diff --git a/go/internal/feast/onlinestore/onlinestore.go b/go/internal/feast/onlinestore/onlinestore.go index 2f30e16d674..d6a3f893aaa 100644 --- a/go/internal/feast/onlinestore/onlinestore.go +++ b/go/internal/feast/onlinestore/onlinestore.go @@ -61,6 +61,9 @@ func NewOnlineStore(config *registry.RepoConfig) (OnlineStore, error) { } else if onlineStoreType == "redis" { onlineStore, err := NewRedisOnlineStore(config.Project, config, config.OnlineStore) return onlineStore, err + } else if onlineStoreType == "dynamodb" { + onlineStore, err := NewDynamodbOnlineStore(config.Project, config, config.OnlineStore) + return onlineStore, err } else { return nil, fmt.Errorf("%s online store type is currently not supported; only redis and sqlite are supported", onlineStoreType) } From 72e03c20d042f9f036e12841f0a5a0df1ff06601 Mon Sep 17 00:00:00 2001 From: iamcodingcat Date: Sat, 21 Jun 2025 18:24:28 +0900 Subject: [PATCH 02/13] chore: update mod and sum files Signed-off-by: iamcodingcat --- go.mod | 15 +++++++++------ go.sum | 12 ++++++++++++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index 18c459373b6..7a65f5b744b 100644 --- a/go.mod +++ b/go.mod @@ -6,15 +6,21 @@ toolchain go1.22.5 require ( github.com/apache/arrow/go/v17 v17.0.0 + github.com/aws/aws-sdk-go-v2 v1.36.4 + github.com/aws/aws-sdk-go-v2/config v1.29.14 + github.com/aws/aws-sdk-go-v2/service/dynamodb v1.43.3 + github.com/aws/aws-sdk-go-v2/service/s3 v1.79.3 github.com/ghodss/yaml v1.0.0 github.com/golang/protobuf v1.5.4 github.com/google/uuid v1.6.0 github.com/mattn/go-sqlite3 v1.14.23 github.com/pkg/errors v0.9.1 github.com/redis/go-redis/v9 v9.6.1 + github.com/roberson-io/mmh3 v0.0.0-20190729202758-fdfce3ba6225 github.com/rs/zerolog v1.33.0 github.com/spaolacci/murmur3 v1.1.0 github.com/stretchr/testify v1.9.0 + google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 google.golang.org/grpc v1.67.0 google.golang.org/protobuf v1.34.2 ) @@ -23,20 +29,18 @@ require ( github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c // indirect github.com/andybalholm/brotli v1.1.0 // indirect github.com/apache/thrift v0.21.0 // indirect - github.com/aws/aws-sdk-go-v2 v1.36.3 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10 // indirect - github.com/aws/aws-sdk-go-v2/config v1.29.14 // indirect github.com/aws/aws-sdk-go-v2/credentials v1.17.67 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30 // indirect - github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34 // indirect - github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.35 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.35 // indirect github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 // indirect github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.34 // indirect github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3 // indirect github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.1 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.10.16 // indirect github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15 // indirect github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.15 // indirect - github.com/aws/aws-sdk-go-v2/service/s3 v1.79.3 // indirect github.com/aws/aws-sdk-go-v2/service/sso v1.25.3 // indirect github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.1 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.33.19 // indirect @@ -66,7 +70,6 @@ require ( golang.org/x/text v0.18.0 // indirect golang.org/x/tools v0.25.0 // indirect golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 9aefbf0aa35..7778a906ecd 100644 --- a/go.sum +++ b/go.sum @@ -8,6 +8,8 @@ github.com/apache/thrift v0.21.0 h1:tdPmh/ptjE1IJnhbhrcl2++TauVjy242rkV/UzJChnE= github.com/apache/thrift v0.21.0/go.mod h1:W1H8aR/QRtYNvrPeFXBtobyRkd0/YVhTc6i07XIAgDw= github.com/aws/aws-sdk-go-v2 v1.36.3 h1:mJoei2CxPutQVxaATCzDUjcZEjVRdpsiiXi2o38yqWM= github.com/aws/aws-sdk-go-v2 v1.36.3/go.mod h1:LLXuLpgzEbD766Z5ECcRmi8AzSwfZItDtmABVkRLGzg= +github.com/aws/aws-sdk-go-v2 v1.36.4 h1:GySzjhVvx0ERP6eyfAbAuAXLtAda5TEy19E5q5W8I9E= +github.com/aws/aws-sdk-go-v2 v1.36.4/go.mod h1:LLXuLpgzEbD766Z5ECcRmi8AzSwfZItDtmABVkRLGzg= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10 h1:zAybnyUQXIZ5mok5Jqwlf58/TFE7uvd3IAsa1aF9cXs= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10/go.mod h1:qqvMj6gHLR/EXWZw4ZbqlPbQUyenf4h82UQUlKc+l14= github.com/aws/aws-sdk-go-v2/config v1.29.14 h1:f+eEi/2cKCg9pqKBoAIwRGzVb70MRKqWX4dg1BDcSJM= @@ -18,16 +20,24 @@ github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30 h1:x793wxmUWVDhshP8WW2mln github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30/go.mod h1:Jpne2tDnYiFascUEs2AWHJL9Yp7A5ZVy3TNyxaAjD6M= github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34 h1:ZK5jHhnrioRkUNOc+hOgQKlUL5JeC3S6JgLxtQ+Rm0Q= github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34/go.mod h1:p4VfIceZokChbA9FzMbRGz5OV+lekcVtHlPKEO0gSZY= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.35 h1:o1v1VFfPcDVlK3ll1L5xHsaQAFdNtZ5GXnNR7SwueC4= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.35/go.mod h1:rZUQNYMNG+8uZxz9FOerQJ+FceCiodXvixpeRtdESrU= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34 h1:SZwFm17ZUNNg5Np0ioo/gq8Mn6u9w19Mri8DnJ15Jf0= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34/go.mod h1:dFZsC0BLo346mvKQLWmoJxT+Sjp+qcVR1tRVHQGOH9Q= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.35 h1:R5b82ubO2NntENm3SAm0ADME+H630HomNJdgv+yZ3xw= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.35/go.mod h1:FuA+nmgMRfkzVKYDNEqQadvEMxtxl9+RLT9ribCwEMs= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 h1:bIqFDwgGXXN1Kpp99pDOdKMTTb5d2KyU5X/BZxjOkRo= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3/go.mod h1:H5O/EsxDWyU+LP/V8i5sm8cxoZgc2fdNR9bxlOFrQTo= github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.34 h1:ZNTqv4nIdE/DiBfUUfXcLZ/Spcuz+RjeziUtNJackkM= github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.34/go.mod h1:zf7Vcd1ViW7cPqYWEHLHJkS50X0JS2IKz9Cgaj6ugrs= +github.com/aws/aws-sdk-go-v2/service/dynamodb v1.43.3 h1:2FCJAT5wyPs5JjAFoLgaEB0MIiWvXiJ0T6PZiKDkJoo= +github.com/aws/aws-sdk-go-v2/service/dynamodb v1.43.3/go.mod h1:rUOhTo9+gtTYTMnGD+xiiks/2Z8vssPP+uSMNhJBbmI= github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3 h1:eAh2A4b5IzM/lum78bZ590jy36+d/aFLgKF/4Vd1xPE= github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3/go.mod h1:0yKJC/kb8sAnmlYa6Zs3QVYqaC8ug2AbnNChv5Ox3uA= github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.1 h1:4nm2G6A4pV9rdlWzGMPv4BNtQp22v1hg3yrtkYpeLl8= github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.1/go.mod h1:iu6FSzgt+M2/x3Dk8zhycdIcHjEFb36IS8HVUVFoMg0= +github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.10.16 h1:TLsOzHW9zlJoMgjcKQI/7bolyv/DL0796y4NigWgaw8= +github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.10.16/go.mod h1:mNoiR5qsO9TxXZ6psjjQ3M+Zz7hURFTumXHF+UKjyAU= github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15 h1:dM9/92u2F1JbDaGooxTq18wmmFzbJRfXfVfy96/1CXM= github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15/go.mod h1:SwFBy2vjtA0vZbjjaFtfN045boopadnoVPhu4Fv66vY= github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.15 h1:moLQUoVq91LiqT1nbvzDukyqAlCv89ZmwaHw/ZFlFZg= @@ -94,6 +104,8 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/redis/go-redis/v9 v9.6.1 h1:HHDteefn6ZkTtY5fGUE8tj8uy85AHk6zP7CpzIAM0y4= github.com/redis/go-redis/v9 v9.6.1/go.mod h1:0C0c6ycQsdpVNQpxb1njEQIqkx5UcsM8FJCQLgE9+RA= +github.com/roberson-io/mmh3 v0.0.0-20190729202758-fdfce3ba6225 h1:ZMsPCp7oYgjoIFt1c+sM2qojxZXotSYcMF8Ur9/LJlM= +github.com/roberson-io/mmh3 v0.0.0-20190729202758-fdfce3ba6225/go.mod h1:XEESr+X1SY8ZSuc3jqsTlb3clCkqQJ4DcF3Qxv1N3PM= github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8= github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss= From 7d15f837c962d058d46a9a5dd578c59a47a3f0ff Mon Sep 17 00:00:00 2001 From: iamcodingcat Date: Tue, 24 Jun 2025 22:04:34 +0900 Subject: [PATCH 03/13] refact: regard serializing entity-key as common method Signed-off-by: iamcodingcat --- go/internal/feast/onlinestore/onlinestore.go | 64 ++++++++++++++++++ .../feast/onlinestore/redisonlinestore.go | 65 +------------------ 2 files changed, 65 insertions(+), 64 deletions(-) diff --git a/go/internal/feast/onlinestore/onlinestore.go b/go/internal/feast/onlinestore/onlinestore.go index d6a3f893aaa..5fd23c52ee9 100644 --- a/go/internal/feast/onlinestore/onlinestore.go +++ b/go/internal/feast/onlinestore/onlinestore.go @@ -2,7 +2,9 @@ package onlinestore import ( "context" + "encoding/binary" "fmt" + "sort" "github.com/feast-dev/feast/go/internal/feast/registry" "github.com/feast-dev/feast/go/protos/feast/serving" @@ -68,3 +70,65 @@ func NewOnlineStore(config *registry.RepoConfig) (OnlineStore, error) { return nil, fmt.Errorf("%s online store type is currently not supported; only redis and sqlite are supported", onlineStoreType) } } + +func serializeEntityKey(entityKey *types.EntityKey, entityKeySerializationVersion int64) (*[]byte, error) { + // Serialize entity key to a bytestring so that it can be used as a lookup key in a hash table. + + // Ensure that we have the right amount of join keys and entity values + if len(entityKey.JoinKeys) != len(entityKey.EntityValues) { + return nil, fmt.Errorf("the amount of join key names and entity values don't match: %s vs %s", entityKey.JoinKeys, entityKey.EntityValues) + } + + // Make sure that join keys are sorted so that we have consistent key building + m := make(map[string]*types.Value) + + for i := 0; i < len(entityKey.JoinKeys); i++ { + m[entityKey.JoinKeys[i]] = entityKey.EntityValues[i] + } + + keys := make([]string, 0, len(m)) + for k := range entityKey.JoinKeys { + keys = append(keys, entityKey.JoinKeys[k]) + } + sort.Strings(keys) + + // Build the key + length := 5 * len(keys) + bufferList := make([][]byte, length) + + for i := 0; i < len(keys); i++ { + offset := i * 2 + byteBuffer := make([]byte, 4) + binary.LittleEndian.PutUint32(byteBuffer, uint32(types.ValueType_Enum_value["STRING"])) + bufferList[offset] = byteBuffer + bufferList[offset+1] = []byte(keys[i]) + } + + for i := 0; i < len(keys); i++ { + offset := (2 * len(keys)) + (i * 3) + value := m[keys[i]].GetVal() + + valueBytes, valueTypeBytes, err := serializeValue(value, entityKeySerializationVersion) + if err != nil { + return valueBytes, err + } + + typeBuffer := make([]byte, 4) + binary.LittleEndian.PutUint32(typeBuffer, uint32(valueTypeBytes)) + + lenBuffer := make([]byte, 4) + binary.LittleEndian.PutUint32(lenBuffer, uint32(len(*valueBytes))) + + bufferList[offset+0] = typeBuffer + bufferList[offset+1] = lenBuffer + bufferList[offset+2] = *valueBytes + } + + // Convert from an array of byte arrays to a single byte array + var entityKeyBuffer []byte + for i := 0; i < len(bufferList); i++ { + entityKeyBuffer = append(entityKeyBuffer, bufferList[i]...) + } + + return &entityKeyBuffer, nil +} diff --git a/go/internal/feast/onlinestore/redisonlinestore.go b/go/internal/feast/onlinestore/redisonlinestore.go index df47deceecf..6c1d3591e01 100644 --- a/go/internal/feast/onlinestore/redisonlinestore.go +++ b/go/internal/feast/onlinestore/redisonlinestore.go @@ -6,8 +6,7 @@ import ( "encoding/binary" "errors" "fmt" - //"os" - "sort" + "strconv" "strings" @@ -340,68 +339,6 @@ func buildRedisKey(project string, entityKey *types.EntityKey, entityKeySerializ return &fullKey, nil } -func serializeEntityKey(entityKey *types.EntityKey, entityKeySerializationVersion int64) (*[]byte, error) { - // Serialize entity key to a bytestring so that it can be used as a lookup key in a hash table. - - // Ensure that we have the right amount of join keys and entity values - if len(entityKey.JoinKeys) != len(entityKey.EntityValues) { - return nil, fmt.Errorf("the amount of join key names and entity values don't match: %s vs %s", entityKey.JoinKeys, entityKey.EntityValues) - } - - // Make sure that join keys are sorted so that we have consistent key building - m := make(map[string]*types.Value) - - for i := 0; i < len(entityKey.JoinKeys); i++ { - m[entityKey.JoinKeys[i]] = entityKey.EntityValues[i] - } - - keys := make([]string, 0, len(m)) - for k := range entityKey.JoinKeys { - keys = append(keys, entityKey.JoinKeys[k]) - } - sort.Strings(keys) - - // Build the key - length := 5 * len(keys) - bufferList := make([][]byte, length) - - for i := 0; i < len(keys); i++ { - offset := i * 2 - byteBuffer := make([]byte, 4) - binary.LittleEndian.PutUint32(byteBuffer, uint32(types.ValueType_Enum_value["STRING"])) - bufferList[offset] = byteBuffer - bufferList[offset+1] = []byte(keys[i]) - } - - for i := 0; i < len(keys); i++ { - offset := (2 * len(keys)) + (i * 3) - value := m[keys[i]].GetVal() - - valueBytes, valueTypeBytes, err := serializeValue(value, entityKeySerializationVersion) - if err != nil { - return valueBytes, err - } - - typeBuffer := make([]byte, 4) - binary.LittleEndian.PutUint32(typeBuffer, uint32(valueTypeBytes)) - - lenBuffer := make([]byte, 4) - binary.LittleEndian.PutUint32(lenBuffer, uint32(len(*valueBytes))) - - bufferList[offset+0] = typeBuffer - bufferList[offset+1] = lenBuffer - bufferList[offset+2] = *valueBytes - } - - // Convert from an array of byte arrays to a single byte array - var entityKeyBuffer []byte - for i := 0; i < len(bufferList); i++ { - entityKeyBuffer = append(entityKeyBuffer, bufferList[i]...) - } - - return &entityKeyBuffer, nil -} - func serializeValue(value interface{}, entityKeySerializationVersion int64) (*[]byte, types.ValueType_Enum, error) { // TODO: Implement support for other types (at least the major types like ints, strings, bytes) switch x := (value).(type) { From b1abd414a23900f66da1768959141350be4335fb Mon Sep 17 00:00:00 2001 From: iamcodingcat Date: Tue, 24 Jun 2025 22:04:54 +0900 Subject: [PATCH 04/13] test: add dynamodb online store Signed-off-by: iamcodingcat --- .../onlinestore/dynamodbonlinestore_test.go | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 go/internal/feast/onlinestore/dynamodbonlinestore_test.go diff --git a/go/internal/feast/onlinestore/dynamodbonlinestore_test.go b/go/internal/feast/onlinestore/dynamodbonlinestore_test.go new file mode 100644 index 00000000000..32c2c78dfe8 --- /dev/null +++ b/go/internal/feast/onlinestore/dynamodbonlinestore_test.go @@ -0,0 +1,24 @@ +package onlinestore + +import ( + "testing" + + "github.com/feast-dev/feast/go/internal/feast/registry" + + "github.com/stretchr/testify/assert" +) + +func TestNewDynamodbOnlineStore(t *testing.T) { + var config = map[string]interface{}{ + "batch_size": 40, + "region": "us-east-1", + "max_pool_connections": 4, + "consistent_reads": "true", + } + rc := ®istry.RepoConfig{ + OnlineStore: config, + EntityKeySerializationVersion: 2, + } + _, err := NewDynamodbOnlineStore("test", rc, config) + assert.Nil(t, err) +} From 9d62354e2ee9b7ee8918d8e273665467b73da5d1 Mon Sep 17 00:00:00 2001 From: iamcodingcat Date: Tue, 24 Jun 2025 22:10:14 +0900 Subject: [PATCH 05/13] refact: change to false in unprocessed entity set Signed-off-by: iamcodingcat --- go/internal/feast/onlinestore/dynamodbonlinestore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/internal/feast/onlinestore/dynamodbonlinestore.go b/go/internal/feast/onlinestore/dynamodbonlinestore.go index f01f9467f91..36aed52cddf 100644 --- a/go/internal/feast/onlinestore/dynamodbonlinestore.go +++ b/go/internal/feast/onlinestore/dynamodbonlinestore.go @@ -93,7 +93,7 @@ func (d *DynamodbOnlineStore) OnlineRead(ctx context.Context, entityKeys []*type entityId := hex.EncodeToString(mmh3.Hashx64_128(*serKey, 0)) entityIds = append(entityIds, entityId) entityIndexMap[entityId] = i - unprocessedEntityIds[entityId] = true + unprocessedEntityIds[entityId] = false } // metadata from feature views, feature names From e8ed80a2409515ab7b35225cb1649828a992ad98 Mon Sep 17 00:00:00 2001 From: Jitendra Yejare Date: Mon, 16 Jun 2025 17:53:05 +0530 Subject: [PATCH 06/13] feat: Remote Write to Online Store completes client / server architecture (#5422) * Remote Write to Online Store Signed-off-by: jyejare * Comments Resolved and code shortened Signed-off-by: jyejare --------- Signed-off-by: jyejare Signed-off-by: iamcodingcat --- .../feast/infra/online_stores/remote.py | 68 +++++++- .../online_store/test_remote_online_store.py | 152 +++++++++++++++++- 2 files changed, 216 insertions(+), 4 deletions(-) diff --git a/sdk/python/feast/infra/online_stores/remote.py b/sdk/python/feast/infra/online_stores/remote.py index 8cc75ade445..ea09362299d 100644 --- a/sdk/python/feast/infra/online_stores/remote.py +++ b/sdk/python/feast/infra/online_stores/remote.py @@ -13,6 +13,7 @@ # limitations under the License. import json import logging +from collections import defaultdict from datetime import datetime from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple @@ -20,12 +21,16 @@ from pydantic import StrictStr from feast import Entity, FeatureView, RepoConfig +from feast.infra.online_stores.helpers import _to_naive_utc from feast.infra.online_stores.online_store import OnlineStore from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto from feast.protos.feast.types.Value_pb2 import Value as ValueProto from feast.repo_config import FeastConfigBaseModel from feast.rest_error_handler import rest_error_handling_decorator -from feast.type_map import python_values_to_proto_values +from feast.type_map import ( + feast_value_type_to_python_type, + python_values_to_proto_values, +) from feast.value_type import ValueType logger = logging.getLogger(__name__) @@ -60,7 +65,55 @@ def online_write_batch( ], progress: Optional[Callable[[int], Any]], ) -> None: - raise NotImplementedError + """ + Writes a batch of feature rows to the remote online store via the remote API. + """ + assert isinstance(config.online_store, RemoteOnlineStoreConfig) + config.online_store.__class__ = RemoteOnlineStoreConfig + + columnar_data: Dict[str, List[Any]] = defaultdict(list) + + # Iterate through each row to populate columnar data directly + for entity_key_proto, feature_values_proto, event_ts, created_ts in data: + # Populate entity key values + for join_key, entity_value_proto in zip( + entity_key_proto.join_keys, entity_key_proto.entity_values + ): + columnar_data[join_key].append( + feast_value_type_to_python_type(entity_value_proto) + ) + + # Populate feature values + for feature_name, feature_value_proto in feature_values_proto.items(): + columnar_data[feature_name].append( + feast_value_type_to_python_type(feature_value_proto) + ) + + # Populate timestamps + columnar_data["event_timestamp"].append(_to_naive_utc(event_ts).isoformat()) + columnar_data["created"].append( + _to_naive_utc(created_ts).isoformat() if created_ts else None + ) + + req_body = { + "feature_view_name": table.name, + "df": columnar_data, + "allow_registry_cache": False, + } + + response = post_remote_online_write(config=config, req_body=req_body) + + if response.status_code != 200: + error_msg = f"Unable to write online store data using feature server API. Error_code={response.status_code}, error_message={response.text}" + logger.error(error_msg) + raise RuntimeError(error_msg) + + if progress: + data_length = len(data) + logger.info( + f"Writing {data_length} rows to the remote store for feature view {table.name}." + ) + progress(data_length) def online_read( self, @@ -184,3 +237,14 @@ def get_remote_online_features( return session.post( f"{config.online_store.path}/get-online-features", data=req_body ) + + +@rest_error_handling_decorator +def post_remote_online_write( + session: requests.Session, config: RepoConfig, req_body: dict +) -> requests.Response: + url = f"{config.online_store.path}/write-to-online-store" + if config.online_store.cert: + return session.post(url, json=req_body, verify=config.online_store.cert) + else: + return session.post(url, json=req_body) diff --git a/sdk/python/tests/integration/online_store/test_remote_online_store.py b/sdk/python/tests/integration/online_store/test_remote_online_store.py index eb03fd0c3c5..3b5b707dcb7 100644 --- a/sdk/python/tests/integration/online_store/test_remote_online_store.py +++ b/sdk/python/tests/integration/online_store/test_remote_online_store.py @@ -1,15 +1,28 @@ import logging import os import tempfile +from datetime import timedelta from textwrap import dedent +import pandas as pd import pytest -from feast import FeatureView, OnDemandFeatureView, StreamFeatureView +from feast import ( + Entity, + FeatureView, + Field, + FileSource, + OnDemandFeatureView, + PushSource, + StreamFeatureView, +) +from feast.data_source import PushMode from feast.feature_store import FeatureStore from feast.permissions.action import AuthzedAction from feast.permissions.permission import Permission from feast.permissions.policy import RoleBasedPolicy +from feast.types import Float32, Int64 +from feast.utils import _utc_now from tests.utils.auth_permissions_util import ( PROJECT_NAME, default_store, @@ -235,7 +248,6 @@ def _create_remote_client_feature_store( if is_tls_mode and ca_trust_store_path: # configure trust store path only when is_tls_mode and ca_trust_store_path exists. os.environ["FEAST_CA_CERT_FILE_PATH"] = ca_trust_store_path - return FeatureStore(repo_path=repo_path) @@ -265,3 +277,139 @@ def _overwrite_remote_client_feature_store_yaml( with open(repo_config, "w") as repo_config_file: repo_config_file.write(config_content) + + +@pytest.mark.integration +@pytest.mark.rbac_remote_integration_test +@pytest.mark.parametrize( + "tls_mode", [("True", "True"), ("True", "False"), ("False", "")], indirect=True +) +def test_remote_online_store_read_write(auth_config, tls_mode): + with ( + tempfile.TemporaryDirectory() as remote_server_tmp_dir, + tempfile.TemporaryDirectory() as remote_client_tmp_dir, + ): + permissions_list = [ + Permission( + name="online_list_fv_perm", + types=FeatureView, + policy=RoleBasedPolicy(roles=["reader"]), + actions=[AuthzedAction.READ_ONLINE], + ), + Permission( + name="online_list_odfv_perm", + types=OnDemandFeatureView, + policy=RoleBasedPolicy(roles=["reader"]), + actions=[AuthzedAction.READ_ONLINE], + ), + Permission( + name="online_list_sfv_perm", + types=StreamFeatureView, + policy=RoleBasedPolicy(roles=["reader"]), + actions=[AuthzedAction.READ_ONLINE], + ), + Permission( + name="online_write_fv_perm", + types=FeatureView, + policy=RoleBasedPolicy(roles=["writer"]), + actions=[AuthzedAction.WRITE_ONLINE], + ), + Permission( + name="online_write_odfv_perm", + types=OnDemandFeatureView, + policy=RoleBasedPolicy(roles=["writer"]), + actions=[AuthzedAction.WRITE_ONLINE], + ), + Permission( + name="online_write_sfv_perm", + types=StreamFeatureView, + policy=RoleBasedPolicy(roles=["writer"]), + actions=[AuthzedAction.WRITE_ONLINE], + ), + ] + server_store, server_url, registry_path = ( + _create_server_store_spin_feature_server( + temp_dir=remote_server_tmp_dir, + auth_config=auth_config, + permissions_list=permissions_list, + tls_mode=tls_mode, + ) + ) + assert None not in (server_store, server_url, registry_path) + + client_store = _create_remote_client_feature_store( + temp_dir=remote_client_tmp_dir, + server_registry_path=str(registry_path), + feature_server_url=server_url, + auth_config=auth_config, + tls_mode=tls_mode, + ) + assert client_store is not None + + # Define a simple FeatureView for testing write operations + driver = Entity(name="driver_id", description="Drivers id") + + driver_hourly_stats_source = FileSource( + path="data/driver_stats.parquet", # Path is not used for online writes in this context + timestamp_field="event_timestamp", + created_timestamp_column="created", + ) + + PushSource( + name="driver_stats_push_source", + batch_source=driver_hourly_stats_source, + ) + + driver_hourly_stats_fv = FeatureView( + name="driver_hourly_stats", + entities=[driver], + ttl=timedelta(days=1), + schema=[ + Field(name="conv_rate", dtype=Float32), + Field(name="acc_rate", dtype=Float32), + Field(name="avg_daily_trips", dtype=Int64), + ], + source=driver_hourly_stats_source, + tags={}, + ) + + # Apply the feature view to the client store + client_store.apply([driver, driver_hourly_stats_fv]) + event_df = pd.DataFrame( + { + "driver_id": [1000, 1001], + "conv_rate": [0.56, 0.74], + "acc_rate": [0.95, 0.93], + "avg_daily_trips": [50, 45], + "event_timestamp": [pd.Timestamp(_utc_now()).round("ms")] * 2, + "created": [pd.Timestamp(_utc_now()).round("ms")] * 2, + } + ) + + # Perform the online write + client_store.push( + push_source_name="driver_stats_push_source", df=event_df, to=PushMode.ONLINE + ) + + # Verify the data by reading it back + # read_entity_keys = [entity_key_1, entity_key_2] + read_features = [ + "driver_hourly_stats_fresh:conv_rate", + "driver_hourly_stats_fresh:acc_rate", + "driver_hourly_stats_fresh:avg_daily_trips", + ] + online_features = client_store.get_online_features( + features=read_features, + entity_rows=[{"driver_id": 1000}, {"driver_id": 1001}], + ).to_dict() + + # Assertions for read data + assert online_features is not None + assert len(online_features["driver_id"]) == 2 + assert online_features["driver_id"] == [1000, 1001] + assert [round(val, 2) for val in online_features["conv_rate"]] == [0.56, 0.74] + assert [round(val, 2) for val in online_features["acc_rate"]] == [0.95, 0.93] + assert online_features["avg_daily_trips"] == [50, 45] + + # Clean up the applied feature view from the server store to avoid interference with other tests + server_store.teardown() From 341cd590d6e4b1cba014c3195ae1c59a391a17d5 Mon Sep 17 00:00:00 2001 From: Francisco Arceo Date: Mon, 16 Jun 2025 08:37:05 -0400 Subject: [PATCH 07/13] chore: Update development-guide.md Signed-off-by: iamcodingcat --- docs/project/development-guide.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/project/development-guide.md b/docs/project/development-guide.md index 5e28ca4daf6..6912fd2c091 100644 --- a/docs/project/development-guide.md +++ b/docs/project/development-guide.md @@ -74,6 +74,7 @@ A quick list of things to keep in mind as you're making changes: - Ensure you leave a release note for any user facing changes in the PR. There is a field automatically generated in the PR request. You can write `NONE` in that field if there are no user facing changes. - Please run tests locally before submitting a PR (e.g. for Python, the [local integration tests](#local-integration-tests)) - Try to keep PRs smaller. This makes them easier to review. + - Please make sure to update any useful documentation under `docs/` that impacts the area you are contributing to. ### Good practices to keep in mind * Fill in the description based on the default template configured when you first open the PR From 8d7c4c38373c1ed065848aa959600f4ccd7754e2 Mon Sep 17 00:00:00 2001 From: Sneh Pillai Date: Tue, 17 Jun 2025 11:53:45 -0500 Subject: [PATCH 08/13] feat: Make batch_source optional in PushSource (#5440) (#5454) * Make batch_source optional in PushSource (#5440) Signed-off-by: snehsuresh * docs: make batch_source optional in PushSource Signed-off-by: snehsuresh * refactor: remove comment Signed-off-by: snehsuresh * fix: ensure batch_source is valid DataSource after null check Signed-off-by: snehsuresh --------- Signed-off-by: snehsuresh Signed-off-by: iamcodingcat --- docs/reference/data-sources/push.md | 9 ++++++++- sdk/python/feast/data_source.py | 19 ++++++++++--------- sdk/python/feast/inference.py | 5 ++++- sdk/python/tests/unit/test_data_sources.py | 16 ++++++++++++++++ 4 files changed, 38 insertions(+), 11 deletions(-) diff --git a/docs/reference/data-sources/push.md b/docs/reference/data-sources/push.md index 7a7ef96c7ca..ddb531dcca2 100644 --- a/docs/reference/data-sources/push.md +++ b/docs/reference/data-sources/push.md @@ -6,9 +6,12 @@ Push sources allow feature values to be pushed to the online store and offline s Push sources can be used by multiple feature views. When data is pushed to a push source, Feast propagates the feature values to all the consuming feature views. -Push sources must have a batch source specified. The batch source will be used for retrieving historical features. Thus users are also responsible for pushing data to a batch data source such as a data warehouse table. When using a push source as a stream source in the definition of a feature view, a batch source doesn't need to be specified in the feature view definition explicitly. +Push sources can optionally have a batch_source specified. If provided, it enables retrieval of historical features and supports materialization from the offline store to the online store. However, if your features are generated post-training or are only needed online (e.g., embeddings), you can omit the batch_source. + +When a batch_source is used, users are responsible for ensuring that data is also pushed to a batch data source, such as a data warehouse. Note that when a push source is used as a stream source in a feature view definition, a batch_source does not need to be explicitly specified in the feature view itself. ## Stream sources + Streaming data sources are important sources of feature values. A typical setup with streaming data looks like: 1. Raw events come in (stream 1) @@ -20,7 +23,9 @@ Streaming data sources are important sources of feature values. A typical setup Feast allows users to push features previously registered in a feature view to the online store for fresher features. It also allows users to push batches of stream data to the offline store by specifying that the push be directed to the offline store. This will push the data to the offline store declared in the repository configuration used to initialize the feature store. ## Example (basic) + ### Defining a push source + Note that the push schema needs to also include the entity. ```python @@ -43,7 +48,9 @@ fv = FeatureView( ``` ### Pushing data + Note that the `to` parameter is optional and defaults to online but we can specify these options: `PushMode.ONLINE`, `PushMode.OFFLINE`, or `PushMode.ONLINE_AND_OFFLINE`. + ```python from feast import FeatureStore import pandas as pd diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py index 25475fcb4c3..fea3034dd0d 100644 --- a/sdk/python/feast/data_source.py +++ b/sdk/python/feast/data_source.py @@ -764,13 +764,13 @@ class PushSource(DataSource): # TODO(adchia): consider adding schema here in case where Feast manages pushing events to the offline store # TODO(adchia): consider a "mode" to support pushing raw vs transformed events - batch_source: DataSource + batch_source: Optional[DataSource] = None def __init__( self, *, name: str, - batch_source: DataSource, + batch_source: Optional[DataSource] = None, description: Optional[str] = "", tags: Optional[Dict[str, str]] = None, owner: Optional[str] = "", @@ -815,8 +815,11 @@ def get_table_column_names_and_types( @staticmethod def from_proto(data_source: DataSourceProto): - assert data_source.HasField("batch_source") - batch_source = DataSource.from_proto(data_source.batch_source) + batch_source = ( + DataSource.from_proto(data_source.batch_source) + if data_source.HasField("batch_source") + else None + ) return PushSource( name=data_source.name, @@ -827,19 +830,17 @@ def from_proto(data_source: DataSourceProto): ) def to_proto(self) -> DataSourceProto: - batch_source_proto = None - if self.batch_source: - batch_source_proto = self.batch_source.to_proto() - data_source_proto = DataSourceProto( name=self.name, type=DataSourceProto.PUSH_SOURCE, description=self.description, tags=self.tags, owner=self.owner, - batch_source=batch_source_proto, ) + if self.batch_source: + data_source_proto.batch_source.MergeFrom(self.batch_source.to_proto()) + return data_source_proto def get_table_query_string(self) -> str: diff --git a/sdk/python/feast/inference.py b/sdk/python/feast/inference.py index f2a2ee637fd..dd43d1f5bdb 100644 --- a/sdk/python/feast/inference.py +++ b/sdk/python/feast/inference.py @@ -29,7 +29,10 @@ def update_data_sources_with_inferred_event_timestamp_col( if isinstance(data_source, RequestSource): continue if isinstance(data_source, PushSource): - data_source = data_source.batch_source + if not isinstance(data_source.batch_source, DataSource): + continue + else: + data_source = data_source.batch_source if data_source.timestamp_field is None or data_source.timestamp_field == "": # prepare right match pattern for data source ts_column_type_regex_pattern: str diff --git a/sdk/python/tests/unit/test_data_sources.py b/sdk/python/tests/unit/test_data_sources.py index 990c5d3b698..8a2d0f44001 100644 --- a/sdk/python/tests/unit/test_data_sources.py +++ b/sdk/python/tests/unit/test_data_sources.py @@ -30,6 +30,22 @@ def test_push_with_batch(): assert push_source.batch_source.name == push_source_unproto.batch_source.name +def test_push_source_without_batch_source(): + # Create PushSource with no batch_source + push_source = PushSource(name="test_push_source") + + # Convert to proto + push_source_proto = push_source.to_proto() + + # Assert batch_source is not present in proto + assert not push_source_proto.HasField("batch_source") + + # Deserialize and check again + push_source_unproto = PushSource.from_proto(push_source_proto) + assert push_source_unproto.batch_source is None + assert push_source_unproto.name == "test_push_source" + + def test_request_source_primitive_type_to_proto(): schema = [ Field(name="f1", dtype=Float32), From 9e7096ba36274b1f3038d205430dce680edec553 Mon Sep 17 00:00:00 2001 From: Srihari Venkataramaiah Date: Wed, 18 Jun 2025 18:16:09 +0530 Subject: [PATCH 09/13] test: Add Feast Milvus Jupyter Notebook Execution for downstream testing (#5446) Signed-off-by: Srihari Signed-off-by: iamcodingcat --- .../test/e2e_rhoai/e2e_suite_test.go | 32 ++ .../test/e2e_rhoai/feast_wb_test.go | 151 ++++++ .../test/e2e_rhoai/resources/custom-nb.yaml | 157 ++++++ .../test/e2e_rhoai/resources/feast-test.ipynb | 494 ++++++++++++++++++ .../resources/feature_repo/__init__.py | 0 .../resources/feature_repo/example_repo.py | 42 ++ .../resources/feature_repo/feature_store.yaml | 16 + .../test/e2e_rhoai/resources/pvc.yaml | 10 + .../test/utils/notebook_util.go | 218 ++++++++ 9 files changed, 1120 insertions(+) create mode 100644 infra/feast-operator/test/e2e_rhoai/e2e_suite_test.go create mode 100644 infra/feast-operator/test/e2e_rhoai/feast_wb_test.go create mode 100644 infra/feast-operator/test/e2e_rhoai/resources/custom-nb.yaml create mode 100755 infra/feast-operator/test/e2e_rhoai/resources/feast-test.ipynb create mode 100644 infra/feast-operator/test/e2e_rhoai/resources/feature_repo/__init__.py create mode 100755 infra/feast-operator/test/e2e_rhoai/resources/feature_repo/example_repo.py create mode 100755 infra/feast-operator/test/e2e_rhoai/resources/feature_repo/feature_store.yaml create mode 100644 infra/feast-operator/test/e2e_rhoai/resources/pvc.yaml create mode 100644 infra/feast-operator/test/utils/notebook_util.go diff --git a/infra/feast-operator/test/e2e_rhoai/e2e_suite_test.go b/infra/feast-operator/test/e2e_rhoai/e2e_suite_test.go new file mode 100644 index 00000000000..86750f36e4f --- /dev/null +++ b/infra/feast-operator/test/e2e_rhoai/e2e_suite_test.go @@ -0,0 +1,32 @@ +/* +Copyright 2025 Feast Community. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2erhoai + +import ( + "fmt" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// Run e2e feast Notebook tests using the Ginkgo runner. +func TestNotebookRunE2E(t *testing.T) { + RegisterFailHandler(Fail) + _, _ = fmt.Fprintf(GinkgoWriter, "Feast Jupyter Notebook Test suite\n") + RunSpecs(t, "e2erhoai Feast Notebook test suite") +} diff --git a/infra/feast-operator/test/e2e_rhoai/feast_wb_test.go b/infra/feast-operator/test/e2e_rhoai/feast_wb_test.go new file mode 100644 index 00000000000..64bb1f2dea4 --- /dev/null +++ b/infra/feast-operator/test/e2e_rhoai/feast_wb_test.go @@ -0,0 +1,151 @@ +/* +Copyright 2025 Feast Community. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package e2erhoai provides end-to-end (E2E) test coverage for Feast integration with +// Red Hat OpenShift AI (RHOAI) environments. This specific test validates the functionality +// of executing a Feast Jupyter notebook within a fully configured OpenShift namespace +package e2erhoai + +import ( + "fmt" + "os" + "os/exec" + "strings" + + utils "github.com/feast-dev/feast/infra/feast-operator/test/utils" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("Feast Jupyter Notebook Testing", Ordered, func() { + const ( + namespace = "test-ns-feast-wb" + configMapName = "feast-wb-cm" + rolebindingName = "rb-feast-test" + notebookFile = "test/e2e_rhoai/resources/feast-test.ipynb" + pvcFile = "test/e2e_rhoai/resources/pvc.yaml" + notebookPVC = "jupyterhub-nb-kube-3aadmin-pvc" + testDir = "/test/e2e_rhoai" + notebookName = "feast-test.ipynb" + feastMilvusTest = "TestFeastMilvusNotebook" + ) + + BeforeAll(func() { + By(fmt.Sprintf("Creating test namespace: %s", namespace)) + Expect(utils.CreateNamespace(namespace, testDir)).To(Succeed()) + fmt.Printf("Namespace %s created successfully\n", namespace) + }) + + AfterAll(func() { + By(fmt.Sprintf("Deleting test namespace: %s", namespace)) + Expect(utils.DeleteNamespace(namespace, testDir)).To(Succeed()) + fmt.Printf("Namespace %s deleted successfully\n", namespace) + }) + + runNotebookTest := func() { + env := func(key string) string { + val, _ := os.LookupEnv(key) + return val + } + + username := utils.GetOCUser(testDir) + + // set namespace context + By(fmt.Sprintf("Setting namespace context to : %s", namespace)) + cmd := exec.Command("kubectl", "config", "set-context", "--current", "--namespace", namespace) + output, err := utils.Run(cmd, "/test/e2e_rhoai") + Expect(err).ToNot(HaveOccurred(), fmt.Sprintf( + "Failed to set namespace context to %s.\nError: %v\nOutput: %s\n", + namespace, err, output, + )) + fmt.Printf("Successfully set namespace context to: %s\n", namespace) + + // create config map + By(fmt.Sprintf("Creating Config map: %s", configMapName)) + cmd = exec.Command("kubectl", "create", "configmap", configMapName, "--from-file="+notebookFile, "--from-file=test/e2e_rhoai/resources/feature_repo") + output, err = utils.Run(cmd, "/test/e2e_rhoai") + Expect(err).ToNot(HaveOccurred(), fmt.Sprintf( + "Failed to create ConfigMap %s.\nError: %v\nOutput: %s\n", + configMapName, err, output, + )) + fmt.Printf("ConfigMap %s created successfully\n", configMapName) + + // create pvc + By(fmt.Sprintf("Creating Persistent volume claim: %s", notebookPVC)) + cmd = exec.Command("kubectl", "apply", "-f", "test/e2e_rhoai/resources/pvc.yaml") + _, err = utils.Run(cmd, "/test/e2e_rhoai") + ExpectWithOffset(1, err).NotTo(HaveOccurred()) + fmt.Printf("Persistent Volume Claim %s created successfully", notebookPVC) + + // create rolebinding + By(fmt.Sprintf("Creating rolebinding %s for the user", rolebindingName)) + cmd = exec.Command("kubectl", "create", "rolebinding", rolebindingName, "-n", namespace, "--role=admin", "--user="+username) + _, err = utils.Run(cmd, "/test/e2e_rhoai") + ExpectWithOffset(1, err).NotTo(HaveOccurred()) + fmt.Printf("Created rolebinding %s successfully\n", rolebindingName) + + // configure papermill notebook command execution + command := []string{ + "/bin/sh", + "-c", + fmt.Sprintf( + "pip install papermill && "+ + "mkdir -p /opt/app-root/src/feature_repo && "+ + "cp -rL /opt/app-root/notebooks/* /opt/app-root/src/feature_repo/ && "+ + "oc login --token=%s --server=%s --insecure-skip-tls-verify=true && "+ + "(papermill /opt/app-root/notebooks/%s /opt/app-root/src/output.ipynb --kernel python3 && "+ + "echo '✅ Notebook executed successfully' || "+ + "(echo '❌ Notebook execution failed' && "+ + "cp /opt/app-root/src/output.ipynb /opt/app-root/src/failed_output.ipynb && "+ + "echo '📄 Copied failed notebook to failed_output.ipynb')) && "+ + "jupyter nbconvert --to notebook --stdout /opt/app-root/src/output.ipynb || echo '⚠️ nbconvert failed' && "+ + "sleep 100; exit 0", + utils.GetOCToken("test/e2e_rhoai"), + utils.GetOCServer("test/e2e_rhoai"), + "feast-test.ipynb", + ), + } + + // Defining notebook parameters + nbParams := utils.NotebookTemplateParams{ + Namespace: namespace, + IngressDomain: utils.GetIngressDomain(testDir), + OpenDataHubNamespace: env("APPLICATIONS_NAMESPACE"), + NotebookImage: env("NOTEBOOK_IMAGE"), + NotebookConfigMapName: configMapName, + NotebookPVC: notebookPVC, + Username: username, + OC_TOKEN: utils.GetOCToken(testDir), + OC_SERVER: utils.GetOCServer(testDir), + NotebookFile: notebookName, + Command: "[\"" + strings.Join(command, "\",\"") + "\"]", + PipIndexUrl: env("PIP_INDEX_URL"), + PipTrustedHost: env("PIP_TRUSTED_HOST"), + FeastVerison: env("FEAST_VERSION"), + OpenAIAPIKey: env("OPENAI_API_KEY"), + } + + By("Creating Jupyter Notebook") + Expect(utils.CreateNotebook(nbParams)).To(Succeed(), "Failed to create notebook") + + By("Monitoring notebook logs") + Expect(utils.MonitorNotebookPod(namespace, "jupyter-nb-", notebookName)).To(Succeed(), "Notebook execution failed") + } + + Context("Feast Jupyter Notebook Test", func() { + It("Should create and run a "+feastMilvusTest+" successfully", runNotebookTest) + }) +}) diff --git a/infra/feast-operator/test/e2e_rhoai/resources/custom-nb.yaml b/infra/feast-operator/test/e2e_rhoai/resources/custom-nb.yaml new file mode 100644 index 00000000000..8c91cdc5f34 --- /dev/null +++ b/infra/feast-operator/test/e2e_rhoai/resources/custom-nb.yaml @@ -0,0 +1,157 @@ +# This template maybe used to spin up a custom notebook image +# i.e.: sed s/{{.IngressDomain}}/$(oc get ingresses.config/cluster -o jsonpath={.spec.domain})/g tests/resources/custom-nb.template | oc apply -f - +# resources generated: +# pod/jupyter-nb-kube-3aadmin-0 +# service/jupyter-nb-kube-3aadmin +# route.route.openshift.io/jupyter-nb-kube-3aadmin (jupyter-nb-kube-3aadmin-opendatahub.apps.tedbig412.cp.fyre.ibm.com) +# service/jupyter-nb-kube-3aadmin-tls +apiVersion: kubeflow.org/v1 +kind: Notebook +metadata: + annotations: + notebooks.opendatahub.io/inject-oauth: "true" + notebooks.opendatahub.io/last-size-selection: Small + notebooks.opendatahub.io/oauth-logout-url: https://odh-dashboard-{{.OpenDataHubNamespace}}.{{.IngressDomain}}/notebookController/kube-3aadmin/home + opendatahub.io/link: https://jupyter-nb-kube-3aadmin-{{.Namespace}}.{{.IngressDomain}}/notebook/{{.Namespace}}/jupyter-nb-kube-3aadmin + opendatahub.io/username: {{.Username}} + generation: 1 + labels: + app: jupyter-nb-kube-3aadmin + opendatahub.io/dashboard: "true" + opendatahub.io/odh-managed: "true" + opendatahub.io/user: {{.Username}} + name: jupyter-nb-kube-3aadmin + namespace: {{.Namespace}} +spec: + template: + spec: + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - preference: + matchExpressions: + - key: nvidia.com/gpu.present + operator: NotIn + values: + - "true" + weight: 1 + containers: + - env: + - name: NOTEBOOK_ARGS + value: |- + --ServerApp.port=8888 + --ServerApp.token='' + --ServerApp.password='' + --ServerApp.base_url=/notebook/test-feast-wb/jupyter-nb-kube-3aadmin + --ServerApp.quit_button=False + --ServerApp.tornado_settings={"user":"{{.Username}}","hub_host":"https://odh-dashboard-{{.OpenDataHubNamespace}}.{{.IngressDomain}}","hub_prefix":"/notebookController/{{.Username}}"} + - name: JUPYTER_IMAGE + value: {{.NotebookImage}} + - name: JUPYTER_NOTEBOOK_PORT + value: "8888" + - name: PIP_INDEX_URL + value: {{.PipIndexUrl}} + - name: PIP_TRUSTED_HOST + value: {{.PipTrustedHost}} + - name: FEAST_VERSION + value: {{.FeastVerison}} + - name: OPENAI_API_KEY + value: {{.OpenAIAPIKey}} + image: {{.NotebookImage}} + command: {{.Command}} + imagePullPolicy: Always + name: jupyter-nb-kube-3aadmin + ports: + - containerPort: 8888 + name: notebook-port + protocol: TCP + resources: + limits: + cpu: "2" + memory: 3Gi + requests: + cpu: "1" + memory: 3Gi + volumeMounts: + - mountPath: /opt/app-root/src + name: jupyterhub-nb-kube-3aadmin-pvc + - mountPath: /opt/app-root/notebooks + name: {{.NotebookConfigMapName}} + workingDir: /opt/app-root/src + - args: + - --provider=openshift + - --https-address=:8443 + - --http-address= + - --openshift-service-account=jupyter-nb-kube-3aadmin + - --cookie-secret-file=/etc/oauth/config/cookie_secret + - --cookie-expire=24h0m0s + - --tls-cert=/etc/tls/private/tls.crt + - --tls-key=/etc/tls/private/tls.key + - --upstream=http://localhost:8888 + - --upstream-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + - --skip-auth-regex=^(?:/notebook/test-feast-wb/jupyter-nb-kube-3aadmin)?/api$ + - --email-domain=* + - --skip-provider-button + - --openshift-sar={"verb":"get","resource":"notebooks","resourceAPIGroup":"kubeflow.org","resourceName":"jupyter-nb-kube-3aadmin","namespace":$(NAMESPACE)} + - --logout-url=https://odh-dashboard-{{.OpenDataHubNamespace}}.{{.IngressDomain}}/notebookController/kube-3aadmin/home + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + image: registry.redhat.io/openshift4/ose-oauth-proxy:v4.10 + imagePullPolicy: Always + livenessProbe: + failureThreshold: 3 + httpGet: + path: /oauth/healthz + port: oauth-proxy + scheme: HTTPS + initialDelaySeconds: 30 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 1 + name: oauth-proxy + ports: + - containerPort: 8443 + name: oauth-proxy + protocol: TCP + readinessProbe: + failureThreshold: 3 + httpGet: + path: /oauth/healthz + port: oauth-proxy + scheme: HTTPS + initialDelaySeconds: 5 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 1 + resources: + limits: + cpu: 100m + memory: 64Mi + requests: + cpu: 100m + memory: 64Mi + volumeMounts: + - mountPath: /etc/oauth/config + name: oauth-config + - mountPath: /etc/tls/private + name: tls-certificates + enableServiceLinks: false + serviceAccountName: jupyter-nb-kube-3aadmin + volumes: + - name: jupyterhub-nb-kube-3aadmin-pvc + persistentVolumeClaim: + claimName: {{.NotebookPVC}} + - name: oauth-config + secret: + defaultMode: 420 + secretName: jupyter-nb-kube-3aadmin-oauth-config + - name: tls-certificates + secret: + defaultMode: 420 + secretName: jupyter-nb-kube-3aadmin-tls + - name: {{.NotebookConfigMapName}} + configMap: + name: {{.NotebookConfigMapName}} diff --git a/infra/feast-operator/test/e2e_rhoai/resources/feast-test.ipynb b/infra/feast-operator/test/e2e_rhoai/resources/feast-test.ipynb new file mode 100755 index 00000000000..d3fb72eb57b --- /dev/null +++ b/infra/feast-operator/test/e2e_rhoai/resources/feast-test.ipynb @@ -0,0 +1,494 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import subprocess\n", + "\n", + "feast_version = os.environ.get(\"FEAST_VERSION\")\n", + "subprocess.run([\"pip\", \"install\", f\"feast=={feast_version}\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import feast\n", + "\n", + "actual_version = feast.__version__\n", + "assert actual_version == os.environ.get(\"FEAST_VERSION\"), (\n", + " f\"❌ Feast version mismatch. Expected: {os.environ.get('FEAST_VERSION')}, Found: {actual_version}\"\n", + ")\n", + "print(f\"✅ Successfully installed Feast version: {actual_version}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%cd /opt/app-root/src/feature_repo\n", + "!ls -l" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!cat /opt/app-root/src/feature_repo/feature_store.yaml" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!mkdir -p data\n", + "!wget -O data/city_wikipedia_summaries_with_embeddings.parquet https://raw.githubusercontent.com/opendatahub-io/feast/master/examples/rag/feature_repo/data/city_wikipedia_summaries_with_embeddings.parquet" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd \n", + "\n", + "df = pd.read_parquet(\"./data/city_wikipedia_summaries_with_embeddings.parquet\")\n", + "df['vector'] = df['vector'].apply(lambda x: x.tolist())\n", + "embedding_length = len(df['vector'][0])\n", + "assert embedding_length == 384, f\"❌ Expected vector length 384, but got {embedding_length}\"\n", + "print(f'embedding length = {embedding_length}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import display\n", + "\n", + "display(df.head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -q pymilvus transformers torch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import subprocess\n", + "\n", + "# Run `feast apply` and capture output\n", + "result = subprocess.run([\"feast\", \"apply\"], capture_output=True, text=True)\n", + "\n", + "# Combine stdout and stderr in case important info is in either\n", + "output = result.stdout + result.stderr\n", + "\n", + "# Print full output for debugging (optional)\n", + "print(output)\n", + "\n", + "# Expected substrings to validate\n", + "expected_messages = [\n", + " \"Applying changes for project rag\",\n", + " \"Connecting to Milvus in local mode\",\n", + " \"Deploying infrastructure for city_embeddings\"\n", + "]\n", + "\n", + "# Validate all expected messages are in output\n", + "for msg in expected_messages:\n", + " assert msg in output, f\"❌ Expected message not found: '{msg}'\"\n", + "\n", + "print(\"✅ All expected messages were found in the output.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "from feast import FeatureStore\n", + "\n", + "store = FeatureStore(repo_path=\".\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import io\n", + "import sys\n", + "\n", + "# Capture stdout\n", + "captured_output = io.StringIO()\n", + "sys_stdout_backup = sys.stdout\n", + "sys.stdout = captured_output\n", + "\n", + "# Call the function\n", + "store.write_to_online_store(feature_view_name='city_embeddings', df=df)\n", + "\n", + "# Restore stdout\n", + "sys.stdout = sys_stdout_backup\n", + "\n", + "# Get the output\n", + "output_str = captured_output.getvalue()\n", + "\n", + "# Expected message\n", + "expected_msg = \"Connecting to Milvus in local mode using data/online_store.db\"\n", + "\n", + "# Validate\n", + "assert expected_msg in output_str, f\"❌ Expected message not found.\\nExpected: {expected_msg}\\nActual Output:\\n{output_str}\"\n", + "\n", + "print(\"✅ Output message validated successfully.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# List batch feature views\n", + "batch_fvs = store.list_batch_feature_views()\n", + "\n", + "# Print the number of batch feature views\n", + "print(\"Number of batch feature views:\", len(batch_fvs))\n", + "\n", + "# Assert that the result is an integer and non-negative\n", + "assert isinstance(len(batch_fvs), int), \"Result is not an integer\"\n", + "assert len(batch_fvs) >= 0, \"Feature view count is negative\"\n", + "\n", + "print(\"Feature views listed correctly ✅\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from feast import FeatureStore\n", + "\n", + "# Initialize store (if not already)\n", + "store = FeatureStore(repo_path=\".\") # Adjust path if necessary\n", + "\n", + "# Retrieve the feature view\n", + "fv = store.get_feature_view(\"city_embeddings\")\n", + "\n", + "# Assert name\n", + "assert fv.name == \"city_embeddings\", \"Feature view name mismatch\"\n", + "\n", + "# Assert entities\n", + "assert fv.entities == [\"item_id\"], f\"Expected entities ['item_id'], got {fv.entities}\"\n", + "\n", + "# Assert feature names and vector index settings\n", + "feature_names = [f.name for f in fv.features]\n", + "assert \"vector\" in feature_names, \"Missing 'vector' feature\"\n", + "assert \"state\" in feature_names, \"Missing 'state' feature\"\n", + "assert \"sentence_chunks\" in feature_names, \"Missing 'sentence_chunks' feature\"\n", + "assert \"wiki_summary\" in feature_names, \"Missing 'wiki_summary' feature\"\n", + "\n", + "# Assert 'vector' feature is a vector index with COSINE metric\n", + "vector_feature = next(f for f in fv.features if f.name == \"vector\")\n", + "assert vector_feature.vector_index, \"'vector' feature is not indexed\"\n", + "assert vector_feature.vector_search_metric == \"COSINE\", \"Expected COSINE search metric for 'vector'\"\n", + "\n", + "print(\"All assertions passed ✅\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from feast.entity import Entity\n", + "from feast.types import ValueType\n", + "entity = Entity(\n", + " name=\"item_id1\",\n", + " value_type=ValueType.INT64,\n", + " description=\"test id\",\n", + " tags={\"team\": \"feast\"},\n", + ")\n", + "store.apply(entity)\n", + "assert any(e.name == \"item_id1\" for e in store.list_entities())\n", + "print(\"Entity added ✅\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "entity_to_delete = store.get_entity(\"item_id1\")\n", + "\n", + "store.apply(\n", + " objects=[],\n", + " objects_to_delete=[entity_to_delete],\n", + " partial=False\n", + ")\n", + "\n", + "# Validation after deletion\n", + "assert not any(e.name == \"item_id1\" for e in store.list_entities())\n", + "print(\"Entity deleted ✅\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# List batch feature views\n", + "batch_fvs = store.list_batch_feature_views()\n", + "assert len(batch_fvs) == 1\n", + "\n", + "# Print count\n", + "print(f\"Found {len(batch_fvs)} batch feature view(s) ✅\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pymilvus_client = store._provider._online_store._connect(store.config)\n", + "COLLECTION_NAME = pymilvus_client.list_collections()[0]\n", + "\n", + "milvus_query_result = pymilvus_client.query(\n", + " collection_name=COLLECTION_NAME,\n", + " filter=\"item_id == '0'\",\n", + ")\n", + "pd.DataFrame(milvus_query_result[0]).head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn.functional as F\n", + "from feast import FeatureStore\n", + "from pymilvus import MilvusClient, DataType, FieldSchema\n", + "from transformers import AutoTokenizer, AutoModel\n", + "from example_repo import city_embeddings_feature_view, item\n", + "\n", + "TOKENIZER = \"sentence-transformers/all-MiniLM-L6-v2\"\n", + "MODEL = \"sentence-transformers/all-MiniLM-L6-v2\"\n", + "\n", + "def mean_pooling(model_output, attention_mask):\n", + " token_embeddings = model_output[\n", + " 0\n", + " ] # First element of model_output contains all token embeddings\n", + " input_mask_expanded = (\n", + " attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()\n", + " )\n", + " return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(\n", + " input_mask_expanded.sum(1), min=1e-9\n", + " )\n", + "\n", + "def run_model(sentences, tokenizer, model):\n", + " encoded_input = tokenizer(\n", + " sentences, padding=True, truncation=True, return_tensors=\"pt\"\n", + " )\n", + " # Compute token embeddings\n", + " with torch.no_grad():\n", + " model_output = model(**encoded_input)\n", + "\n", + " sentence_embeddings = mean_pooling(model_output, encoded_input[\"attention_mask\"])\n", + " sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)\n", + " return sentence_embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "question = \"Which city has the largest population in New York?\"\n", + "\n", + "tokenizer = AutoTokenizer.from_pretrained(TOKENIZER)\n", + "model = AutoModel.from_pretrained(MODEL)\n", + "query_embedding = run_model(question, tokenizer, model)\n", + "query = query_embedding.detach().cpu().numpy().tolist()[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import display\n", + "\n", + "# Retrieve top k documents\n", + "context_data = store.retrieve_online_documents_v2(\n", + " features=[\n", + " \"city_embeddings:vector\",\n", + " \"city_embeddings:item_id\",\n", + " \"city_embeddings:state\",\n", + " \"city_embeddings:sentence_chunks\",\n", + " \"city_embeddings:wiki_summary\",\n", + " ],\n", + " query=query,\n", + " top_k=3,\n", + " distance_metric='COSINE',\n", + ").to_df()\n", + "display(context_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def format_documents(context_df):\n", + " output_context = \"\"\n", + " unique_documents = context_df.drop_duplicates().apply(\n", + " lambda x: \"City & State = {\" + x['state'] +\"}\\nSummary = {\" + x['wiki_summary'].strip()+\"}\",\n", + " axis=1,\n", + " )\n", + " for i, document_text in enumerate(unique_documents):\n", + " output_context+= f\"****START DOCUMENT {i}****\\n{document_text.strip()}\\n****END DOCUMENT {i}****\"\n", + " return output_context" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "RAG_CONTEXT = format_documents(context_data[['state', 'wiki_summary']])\n", + "print(RAG_CONTEXT)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "FULL_PROMPT = f\"\"\"\n", + "You are an assistant for answering questions about states. You will be provided documentation from Wikipedia. Provide a conversational answer.\n", + "If you don't know the answer, just say \"I do not know.\" Don't make up an answer.\n", + "\n", + "Here are document(s) you should use when answer the users question:\n", + "{RAG_CONTEXT}\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install openai" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from openai import OpenAI\n", + "\n", + "client = OpenAI(\n", + " api_key=os.environ.get(\"OPENAI_API_KEY\"),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "response = client.chat.completions.create(\n", + " model=\"gpt-4o-mini\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": FULL_PROMPT},\n", + " {\"role\": \"user\", \"content\": question}\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The expected output\n", + "expected_output = (\n", + " \"New York City\"\n", + ")\n", + "\n", + "# Actual output from response\n", + "actual_output = '\\n'.join([c.message.content.strip() for c in response.choices])\n", + "\n", + "# Validate\n", + "assert expected_output in actual_output, f\"❌ Output mismatch:\\nExpected: {expected_output}\\nActual: {actual_output}\"\n", + "\n", + "print(\"✅ Output matches expected response.\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/infra/feast-operator/test/e2e_rhoai/resources/feature_repo/__init__.py b/infra/feast-operator/test/e2e_rhoai/resources/feature_repo/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/infra/feast-operator/test/e2e_rhoai/resources/feature_repo/example_repo.py b/infra/feast-operator/test/e2e_rhoai/resources/feature_repo/example_repo.py new file mode 100755 index 00000000000..7a37d99d495 --- /dev/null +++ b/infra/feast-operator/test/e2e_rhoai/resources/feature_repo/example_repo.py @@ -0,0 +1,42 @@ +from datetime import timedelta + +from feast import ( + FeatureView, + Field, + FileSource, +) +from feast.data_format import ParquetFormat +from feast.types import Float32, Array, String, ValueType +from feast import Entity + +item = Entity( + name="item_id", + description="Item ID", + value_type=ValueType.INT64, +) + +parquet_file_path = "./data/city_wikipedia_summaries_with_embeddings.parquet" + +source = FileSource( + file_format=ParquetFormat(), + path=parquet_file_path, + timestamp_field="event_timestamp", +) + +city_embeddings_feature_view = FeatureView( + name="city_embeddings", + entities=[item], + schema=[ + Field( + name="vector", + dtype=Array(Float32), + vector_index=True, + vector_search_metric="COSINE", + ), + Field(name="state", dtype=String), + Field(name="sentence_chunks", dtype=String), + Field(name="wiki_summary", dtype=String), + ], + source=source, + ttl=timedelta(hours=2), +) diff --git a/infra/feast-operator/test/e2e_rhoai/resources/feature_repo/feature_store.yaml b/infra/feast-operator/test/e2e_rhoai/resources/feature_repo/feature_store.yaml new file mode 100755 index 00000000000..f8f9cc293dc --- /dev/null +++ b/infra/feast-operator/test/e2e_rhoai/resources/feature_repo/feature_store.yaml @@ -0,0 +1,16 @@ +project: rag +provider: local +registry: data/registry.db +online_store: + type: milvus + path: data/online_store.db + vector_enabled: true + embedding_dim: 384 + index_type: "FLAT" + metric_type: "COSINE" +offline_store: + type: file +entity_key_serialization_version: 3 +auth: + type: no_auth + diff --git a/infra/feast-operator/test/e2e_rhoai/resources/pvc.yaml b/infra/feast-operator/test/e2e_rhoai/resources/pvc.yaml new file mode 100644 index 00000000000..a9e8c1be299 --- /dev/null +++ b/infra/feast-operator/test/e2e_rhoai/resources/pvc.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: jupyterhub-nb-kube-3aadmin-pvc +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi diff --git a/infra/feast-operator/test/utils/notebook_util.go b/infra/feast-operator/test/utils/notebook_util.go new file mode 100644 index 00000000000..28bf64a67eb --- /dev/null +++ b/infra/feast-operator/test/utils/notebook_util.go @@ -0,0 +1,218 @@ +package utils + +import ( + "bytes" + "fmt" + "os" + "os/exec" + "strings" + "text/template" + "time" + + . "github.com/onsi/gomega" +) + +type NotebookTemplateParams struct { + Namespace string + IngressDomain string + OpenDataHubNamespace string + NotebookImage string + NotebookConfigMapName string + NotebookPVC string + Username string + OC_TOKEN string + OC_SERVER string + NotebookFile string + Command string + PipIndexUrl string + PipTrustedHost string + FeastVerison string + OpenAIAPIKey string +} + +// CreateNotebook renders a notebook manifest from a template and applies it using kubectl. +func CreateNotebook(params NotebookTemplateParams) error { + content, err := os.ReadFile("test/e2e_rhoai/resources/custom-nb.yaml") + if err != nil { + return fmt.Errorf("failed to read template file: %w", err) + } + + tmpl, err := template.New("notebook").Parse(string(content)) + if err != nil { + return fmt.Errorf("failed to parse template: %w", err) + } + + var rendered bytes.Buffer + if err := tmpl.Execute(&rendered, params); err != nil { + return fmt.Errorf("failed to substitute template: %w", err) + } + + tmpFile, err := os.CreateTemp("", "notebook-*.yaml") + if err != nil { + return fmt.Errorf("failed to create temp file: %w", err) + } + + // Defer cleanup of temp file + defer func() { + if err := os.Remove(tmpFile.Name()); err != nil { + fmt.Printf("warning: failed to remove temp file %s: %v", tmpFile.Name(), err) + } + }() + + if _, err := tmpFile.Write(rendered.Bytes()); err != nil { + return fmt.Errorf("failed to write to temp file: %w", err) + } + + if err := tmpFile.Close(); err != nil { + return fmt.Errorf("failed to close temp file: %w", err) + } + + // fmt.Println("Notebook manifest applied successfully") + cmd := exec.Command("kubectl", "apply", "-f", tmpFile.Name(), "-n", params.Namespace) + output, err := Run(cmd, "/test/e2e_rhoai") + Expect(err).ToNot(HaveOccurred(), fmt.Sprintf( + "Failed to create Notebook %s.\nError: %v\nOutput: %s\n", + tmpFile.Name(), err, output, + )) + fmt.Printf("Notebook %s created successfully\n", tmpFile.Name()) + return nil +} + +// MonitorNotebookPod waits for a notebook pod to reach Running state and verifies execution logs. +func MonitorNotebookPod(namespace, podPrefix string, notebookName string) error { + const successMarker = "Notebook executed successfully" + const failureMarker = "Notebook execution failed" + const pollInterval = 5 * time.Second + var pod *PodInfo + + fmt.Println("🔄 Waiting for notebook pod to reach Running & Ready state...") + + foundRunningReady := false + for i := 0; i < 36; i++ { + var err error + pod, err = getPodByPrefix(namespace, podPrefix) + if err != nil { + fmt.Printf("⏳ Pod not created yet: %v\n", err) + time.Sleep(pollInterval) + continue + } + if pod.Status == "Running" { + fmt.Printf("✅ Pod %s is Running and Ready.\n", pod.Name) + foundRunningReady = true + break + } + fmt.Printf("⏳ Pod %s not ready yet. Phase: %s\n", pod.Name, pod.Status) + time.Sleep(pollInterval) + } + + if !foundRunningReady { + return fmt.Errorf("❌ Pod %s did not reach Running & Ready state within 3 minutes", podPrefix) + } + + // Start monitoring notebook logs + fmt.Printf("⏳ Monitoring Notebook pod %s Logs for Jupyter Notebook %s execution status\n", pod.Name, notebookName) + + for i := 0; i < 60; i++ { + logs, err := getPodLogs(namespace, pod.Name) + if err != nil { + fmt.Printf("⏳ Failed to get logs for pod %s: %v\n", pod.Name, err) + time.Sleep(pollInterval) + continue + } + + if strings.Contains(logs, successMarker) { + Expect(logs).To(ContainSubstring(successMarker)) + fmt.Printf("✅ Jupyter Notebook pod %s executed successfully.\n", pod.Name) + return nil + } + + if strings.Contains(logs, failureMarker) { + fmt.Printf("❌ Notebook pod %s failed: failure marker found.\n", pod.Name) + return fmt.Errorf("Notebook failed in execution. Logs:\n%s", logs) + } + + time.Sleep(pollInterval) + } + + return fmt.Errorf("❌ Timed out waiting for notebook pod %s to complete", podPrefix) +} + +type PodInfo struct { + Name string + Status string +} + +// returns the first pod matching a name prefix in the given namespace. +func getPodByPrefix(namespace, prefix string) (*PodInfo, error) { + cmd := exec.Command( + "kubectl", "get", "pods", "-n", namespace, + "-o", "jsonpath={range .items[*]}{.metadata.name} {.status.phase}{\"\\n\"}{end}", + ) + output, err := Run(cmd, "/test/e2e_rhoai") + if err != nil { + return nil, fmt.Errorf("failed to get pods: %w", err) + } + + lines := strings.Split(strings.TrimSpace(string(output)), "\n") + for _, line := range lines { + parts := strings.Fields(line) + if len(parts) < 2 { + continue + } + name := parts[0] + status := parts[1] + + if strings.HasPrefix(name, prefix) { + return &PodInfo{ + Name: name, + Status: status, + }, nil + } + } + + return nil, fmt.Errorf("no pod found with prefix %q in namespace %q", prefix, namespace) +} + +// retrieves the logs of a specified pod in the given namespace. +func getPodLogs(namespace, podName string) (string, error) { + cmd := exec.Command("kubectl", "logs", "-n", namespace, podName) + var out bytes.Buffer + var stderr bytes.Buffer + cmd.Stdout = &out + cmd.Stderr = &stderr + + err := cmd.Run() + if err != nil { + return "", fmt.Errorf("error getting pod logs: %v - %s", err, stderr.String()) + } + + return out.String(), nil +} + +// returns the OpenShift cluster ingress domain. +func GetIngressDomain(testDir string) string { + cmd := exec.Command("oc", "get", "ingresses.config.openshift.io", "cluster", "-o", "jsonpath={.spec.domain}") + output, _ := Run(cmd, testDir) + return string(output) +} + +// returns the current OpenShift user authentication token. +func GetOCToken(testDir string) string { + cmd := exec.Command("oc", "whoami", "--show-token") + output, _ := Run(cmd, testDir) + return string(output) +} + +// returns the OpenShift API server URL for the current user. +func GetOCServer(testDir string) string { + cmd := exec.Command("oc", "whoami", "--show-server") + output, _ := Run(cmd, testDir) + return string(output) +} + +// returns the OpenShift cluster logged in Username +func GetOCUser(testDir string) string { + cmd := exec.Command("oc", "whoami") + output, _ := Run(cmd, testDir) + return strings.TrimSpace(string(output)) +} From cc0a0ed66288e5b3d9a6ea809b129f7f1c2dd651 Mon Sep 17 00:00:00 2001 From: iamcodingcat Date: Wed, 25 Jun 2025 09:21:13 +0900 Subject: [PATCH 10/13] feat: resolve conflicts --- docs/getting-started/quickstart.md | 2 +- ...entity-reserialization-of-from-v2-to-v3.md | 8 ++-- .../running-feast-in-production.md | 2 +- .../starting-feast-servers-tls-mode.md | 6 +-- docs/reference/batch-materialization/spark.md | 2 +- docs/reference/feast-cli-commands.md | 2 +- docs/reference/online-stores/couchbase.md | 2 +- docs/reference/online-stores/remote.md | 2 +- .../02_Deploying_the_Feature_Store.ipynb | 22 +++++++---- .../03_Credit_Risk_Model_Training.ipynb | 2 +- examples/java-demo/README.md | 5 +-- .../feature_repo/application-override.yaml | 2 +- .../java-demo/feature_repo/feature_store.yaml | 2 +- .../kind-quickstart/client/feature_store.yaml | 2 +- .../feature_repo/feature_store.yaml | 2 +- examples/python-helm-demo/README.md | 5 +-- .../feature_repo/feature_store.yaml.template | 2 +- .../python-helm-demo/test/feature_store.yaml | 2 +- examples/rbac-local/client/feature_store.yaml | 2 +- .../oidc/feature_repo/feature_store.yaml | 2 +- .../server/feature_repo/feature_store.yaml | 2 +- .../server/k8s/feature_store_offline.yaml | 2 +- .../server/k8s/feature_store_online.yaml | 2 +- .../server/k8s/feature_store_registry.yaml | 2 +- .../server/oidc/feature_store_offline.yaml | 2 +- .../server/oidc/feature_store_online.yaml | 2 +- .../server/oidc/feature_store_registry.yaml | 2 +- .../offline_client/feature_store.yaml | 2 +- .../feature_repo/feature_store.yaml | 2 +- .../feast-demo-quickstart.ipynb | 38 +++++++++---------- .../feast/onlinestore/redisonlinestore.go | 13 ++----- .../onlinestore/redisonlinestore_test.go | 10 ++--- infra/charts/feast/README.md | 2 +- infra/charts/feast/README.md.gotmpl | 2 +- java/serving/README.md | 4 +- .../retriever/EntityKeySerializerV2.java | 5 +-- .../test/java/feast/serving/it/TestUtils.java | 2 +- .../docker-compose/feast10/feature_store.yaml | 2 +- sdk/python/feast/infra/key_encoding_utils.py | 32 ++++++++++++---- .../couchbase_online_store/README.md | 2 +- .../hazelcast_online_store.py | 4 +- .../feast/infra/online_stores/helpers.py | 4 +- .../online_stores/mysql_online_store/mysql.py | 4 +- .../singlestore_online_store/singlestore.py | 4 +- .../feast/infra/online_stores/sqlite.py | 1 - sdk/python/feast/infra/utils/hbase_utils.py | 6 +-- .../snowflake/snowpark/snowflake_udfs.py | 2 +- sdk/python/feast/repo_config.py | 14 +++---- .../athena/feature_repo/feature_store.yaml | 2 +- .../aws/feature_repo/feature_store.yaml | 2 +- .../cassandra/feature_repo/feature_store.yaml | 2 +- .../couchbase/feature_repo/feature_store.yaml | 2 +- .../gcp/feature_repo/feature_store.yaml | 2 +- .../hazelcast/feature_repo/feature_store.yaml | 2 +- .../hbase/feature_repo/feature_store.yaml | 2 +- .../local/feature_repo/feature_store.yaml | 2 +- .../minimal/feature_repo/feature_store.yaml | 2 +- .../postgres/feature_repo/feature_store.yaml | 2 +- .../snowflake/feature_repo/feature_store.yaml | 2 +- .../spark/feature_repo/feature_store.yaml | 2 +- .../feast/templates/spark/feature_store.yaml | 2 +- .../compute_engines/spark/test_compute.py | 2 +- .../feature_repos/repo_configuration.py | 10 +---- .../universal/data_sources/file.py | 6 +-- .../test_spark_materialization_engine.py | 2 +- .../materialization/test_lambda.py | 3 +- .../online_store/test_remote_online_store.py | 2 +- .../registration/test_feature_store.py | 2 +- .../registration/test_inference.py | 6 +-- sdk/python/tests/unit/cli/test_cli.py | 6 +-- .../infra/offline_stores/test_redshift.py | 2 +- .../infra/offline_stores/test_snowflake.py | 2 +- .../test_dynamodb_online_store.py | 2 +- .../unit/infra/online_store/test_redis.py | 4 +- .../infra/scaffolding/test_repo_config.py | 26 ++++++------- .../unit/infra/test_inference_unit_tests.py | 14 +++---- .../unit/infra/test_key_encoding_utils.py | 30 ++------------- .../test_local_feature_store.py | 2 +- .../online_store/test_online_retrieval.py | 4 +- .../unit/online_store/test_online_writes.py | 4 +- sdk/python/tests/unit/test_offline_server.py | 2 +- .../test_on_demand_pandas_transformation.py | 6 +-- .../test_on_demand_python_transformation.py | 8 ++-- .../tests/unit/test_serialization_version.py | 4 +- .../unit/test_substrait_transformation.py | 2 +- .../tests/utils/auth_permissions_util.py | 2 +- sdk/python/tests/utils/cli_repo_creator.py | 2 +- .../tests/utils/dynamo_table_creator.py | 2 +- sdk/python/tests/utils/e2e_test_validation.py | 2 +- ui/feature_repo/feature_store.yaml | 2 +- 90 files changed, 204 insertions(+), 228 deletions(-) diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index a83897005fd..184167ebe29 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -113,7 +113,7 @@ provider: local online_store: type: sqlite path: data/online_store.db -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 ``` {% endtab %} diff --git a/docs/how-to-guides/entity-reserialization-of-from-v2-to-v3.md b/docs/how-to-guides/entity-reserialization-of-from-v2-to-v3.md index dc43e481a96..8135c350fd7 100644 --- a/docs/how-to-guides/entity-reserialization-of-from-v2-to-v3.md +++ b/docs/how-to-guides/entity-reserialization-of-from-v2-to-v3.md @@ -1,8 +1,8 @@ -# Entity Key Re-Serialization from Version 2 to 3 +# Entity Key Serialization from Version 2 to Version 3 -Entity Key Serialization version 2 will soon be deprecated, hence we need to shift the serialization and deserilization to version 3. +Entity Key Serialization version 2 is now deprecated. All new and existing Feast deployments should shift to using serialization version 3. -But here comes the challegnge where existing FeatuteViews on stores has written features with version 2. A version 2 serialized entity key cant be retrived using version 3 deserilization algorithm. +However, a challenge arises when existing FeatureViews in online or offline stores have written features with version 2. A version 2 serialized entity key cannot be retrieved using the version 3 deserialization algorithm. ## Reserialize the Feature Views entity Keys to version 3 @@ -10,7 +10,7 @@ The solution is to reserialize the entity keys from version 2 to version 3. Follow the following procedures to reserialize the entity key to version 3 in feature View in an offline / online store. -In hosrt, you need to iterate through all the feature views in your Feast repository, retrieve their serialized entity keys (if stored in version 2), reserialize them to version 3, and then update the online/offline store or wherever the serialized keys are stored. +In short, you need to iterate through all the feature views in your Feast repository, retrieve their serialized entity keys (if stored in version 2), reserialize them to version 3, and then update the online/offline store or wherever the serialized keys are stored. ### 1. Initialize the Feature Store diff --git a/docs/how-to-guides/running-feast-in-production.md b/docs/how-to-guides/running-feast-in-production.md index 7aeb9e96650..65ab2c82d9d 100644 --- a/docs/how-to-guides/running-feast-in-production.md +++ b/docs/how-to-guides/running-feast-in-production.md @@ -88,7 +88,7 @@ def materialize(data_interval_start=None, data_interval_end=None): provider="aws", offline_store="file", online_store=DynamoDBOnlineStoreConfig(region="us-west-2"), - entity_key_serialization_version=2 + entity_key_serialization_version=3 ) store = FeatureStore(config=repo_config) # Option 1: materialize just one feature view diff --git a/docs/how-to-guides/starting-feast-servers-tls-mode.md b/docs/how-to-guides/starting-feast-servers-tls-mode.md index a868e17cf96..ffc7e5d9e90 100644 --- a/docs/how-to-guides/starting-feast-servers-tls-mode.md +++ b/docs/how-to-guides/starting-feast-servers-tls-mode.md @@ -83,7 +83,7 @@ online_store: path: http://localhost:6566 type: remote cert: /path/to/cert.pem -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 auth: type: no_auth ``` @@ -121,7 +121,7 @@ online_store: path: http://localhost:6566 type: remote cert: /path/to/cert.pem -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 auth: type: no_auth ``` @@ -161,7 +161,7 @@ online_store: path: http://localhost:6566 type: remote cert: /path/to/cert.pem -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 offline_store: type: remote host: localhost diff --git a/docs/reference/batch-materialization/spark.md b/docs/reference/batch-materialization/spark.md index 27a1388c48e..7a61c0ccb6d 100644 --- a/docs/reference/batch-materialization/spark.md +++ b/docs/reference/batch-materialization/spark.md @@ -47,7 +47,7 @@ repo_config = RepoConfig( "partitions": 10 }, online_store=DynamoDBOnlineStoreConfig(region="us-west-1"), - entity_key_serialization_version=2 + entity_key_serialization_version=3 ) store = FeatureStore(config=repo_config) diff --git a/docs/reference/feast-cli-commands.md b/docs/reference/feast-cli-commands.md index 712df18a6b6..71e0ab3c76d 100644 --- a/docs/reference/feast-cli-commands.md +++ b/docs/reference/feast-cli-commands.md @@ -79,7 +79,7 @@ online_store: path: data/online_store.db offline_store: type: dask -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 auth: type: no_auth ``` diff --git a/docs/reference/online-stores/couchbase.md b/docs/reference/online-stores/couchbase.md index 2878deb97ee..99bdd710793 100644 --- a/docs/reference/online-stores/couchbase.md +++ b/docs/reference/online-stores/couchbase.md @@ -44,7 +44,7 @@ online_store: password: password # Couchbase password from access credentials bucket_name: feast # Couchbase bucket name, defaults to feast kv_port: 11210 # Couchbase key-value port, defaults to 11210. Required if custom ports are used. -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 ``` {% endcode %} diff --git a/docs/reference/online-stores/remote.md b/docs/reference/online-stores/remote.md index aa97a495baa..b6734ccc1ec 100644 --- a/docs/reference/online-stores/remote.md +++ b/docs/reference/online-stores/remote.md @@ -17,7 +17,7 @@ online_store: path: http://localhost:6566 type: remote cert: /path/to/cert.pem -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 auth: type: no_auth ``` diff --git a/examples/credit-risk-end-to-end/02_Deploying_the_Feature_Store.ipynb b/examples/credit-risk-end-to-end/02_Deploying_the_Feature_Store.ipynb index f736cdaed93..6379ba46421 100644 --- a/examples/credit-risk-end-to-end/02_Deploying_the_Feature_Store.ipynb +++ b/examples/credit-risk-end-to-end/02_Deploying_the_Feature_Store.ipynb @@ -62,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "8bd21689-4a8e-4b0c-937d-0911df9db1d3", "metadata": {}, "outputs": [], @@ -102,7 +102,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 6, "id": "b3757221-2037-49eb-867f-b9529fec06e2", "metadata": {}, "outputs": [ @@ -110,7 +110,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Writing Feature_Store/feature_store.yaml\n" + "Overwriting Feature_Store/feature_store.yaml\n" ] } ], @@ -125,7 +125,7 @@ "online_store:\n", " type: sqlite\n", " path: data/online_store.db\n", - "entity_key_serialization_version: 2" + "entity_key_serialization_version: 3" ] }, { @@ -383,10 +383,18 @@ "id": "c764a60a-b911-41a8-ba8f-7ef0a0bc7257", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jyejare/gitrepos/feast/.venv/lib/python3.11/site-packages/pydantic/_internal/_fields.py:192: UserWarning: Field name \"vector_enabled\" in \"SqliteOnlineStoreConfig\" shadows an attribute in parent \"VectorStoreConfig\"\n", + " warnings.warn(\n" + ] + }, { "data": { "text/plain": [ - "RepoConfig(project='loan_applications', provider='local', registry_config='data/registry.db', online_config={'type': 'sqlite', 'path': 'data/online_store.db'}, offline_config={'type': 'dask'}, batch_engine_config='local', feature_server=None, flags=None, repo_path=PosixPath('Feature_Store'), entity_key_serialization_version=2, coerce_tz_aware=True)" + "RepoConfig(project='loan_applications', provider='local', registry_config='data/registry.db', online_config={'type': 'sqlite', 'path': 'data/online_store.db'}, auth={'type': 'no_auth'}, offline_config={'type': 'dask'}, batch_engine_config='local', feature_server=None, flags=None, repo_path=PosixPath('Feature_Store'), entity_key_serialization_version=3, coerce_tz_aware=True)" ] }, "execution_count": 7, @@ -779,7 +787,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -793,7 +801,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/examples/credit-risk-end-to-end/03_Credit_Risk_Model_Training.ipynb b/examples/credit-risk-end-to-end/03_Credit_Risk_Model_Training.ipynb index ca0d0e29d95..c7148b89c30 100644 --- a/examples/credit-risk-end-to-end/03_Credit_Risk_Model_Training.ipynb +++ b/examples/credit-risk-end-to-end/03_Credit_Risk_Model_Training.ipynb @@ -256,7 +256,7 @@ " \"host\": \"localhost\",\n", " \"port\": 8815\n", " },\n", - " entity_key_serialization_version=2\n", + " entity_key_serialization_version=3\n", "))" ] }, diff --git a/examples/java-demo/README.md b/examples/java-demo/README.md index 0ae085e0a7a..a0bde89f2ba 100644 --- a/examples/java-demo/README.md +++ b/examples/java-demo/README.md @@ -1,4 +1,3 @@ - # Running Feast Java Server with Redis & calling with python (with registry in GCP) For this tutorial, we setup Feast with Redis, using the Feast CLI to register and materialize features, and then retrieving via a Feast Java server deployed in Kubernetes via a gRPC call. @@ -40,7 +39,7 @@ For this tutorial, we setup Feast with Redis, using the Feast CLI to register an connection_string: localhost:6379,password=[YOUR PASSWORD] offline_store: type: file - entity_key_serialization_version: 2 + entity_key_serialization_version: 3 ``` 4. Run `feast apply` to apply your local features to the remote registry - Note: you may need to authenticate to gcloud first with `gcloud auth login` @@ -86,7 +85,7 @@ For this tutorial, we setup Feast with Redis, using the Feast CLI to register an >2. `make build-java-docker-dev` >3. In the `application-override.yaml`, uncomment the two `image: tag: dev` blocks >4. `helm install feast-release ../../../infra/charts/feast --values application-override.yaml` -5. (Optional): check logs of the server to make sure it’s working +5. (Optional): check logs of the server to make sure it's working ```bash kubectl logs svc/feast-release-feature-server ``` diff --git a/examples/java-demo/feature_repo/application-override.yaml b/examples/java-demo/feature_repo/application-override.yaml index caaa5411e2f..0db26a9dd39 100644 --- a/examples/java-demo/feature_repo/application-override.yaml +++ b/examples/java-demo/feature_repo/application-override.yaml @@ -10,7 +10,7 @@ feature-server: host: my-redis-master port: 6379 password: [YOUR PASSWORD] - entityKeySerializationVersion: 2 + entityKeySerializationVersion: 3 # Uncomment below for dev # image: # tag: dev diff --git a/examples/java-demo/feature_repo/feature_store.yaml b/examples/java-demo/feature_repo/feature_store.yaml index 16d426fc5a8..0928f41e07a 100644 --- a/examples/java-demo/feature_repo/feature_store.yaml +++ b/examples/java-demo/feature_repo/feature_store.yaml @@ -7,4 +7,4 @@ online_store: connection_string: localhost:6379,password=[YOUR PASSWORD] offline_store: type: file -entity_key_serialization_version: 2 \ No newline at end of file +entity_key_serialization_version: 3 \ No newline at end of file diff --git a/examples/kind-quickstart/client/feature_store.yaml b/examples/kind-quickstart/client/feature_store.yaml index 62acd3ead66..2671f525e4f 100644 --- a/examples/kind-quickstart/client/feature_store.yaml +++ b/examples/kind-quickstart/client/feature_store.yaml @@ -9,6 +9,6 @@ offline_store: online_store: path: http://localhost:8003 type: remote -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 auth: type: no_auth diff --git a/examples/podman_local/feature_repo/feature_store.yaml b/examples/podman_local/feature_repo/feature_store.yaml index 3e6a3603162..5ad0c0b77e8 100644 --- a/examples/podman_local/feature_repo/feature_store.yaml +++ b/examples/podman_local/feature_repo/feature_store.yaml @@ -6,4 +6,4 @@ provider: local online_store: type: sqlite path: data/online_store.db -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 diff --git a/examples/python-helm-demo/README.md b/examples/python-helm-demo/README.md index 078550ae392..031355f3f49 100644 --- a/examples/python-helm-demo/README.md +++ b/examples/python-helm-demo/README.md @@ -1,4 +1,3 @@ - # Running Feast Python / Go Feature Server with Redis on Kubernetes For this tutorial, we set up Feast with Redis. @@ -64,7 +63,7 @@ Manifests have been taken from [Deploy Minio in your project](https://ai-on-open connection_string: localhost:6379,password=**** offline_store: type: file - entity_key_serialization_version: 2 + entity_key_serialization_version: 3 ``` 1. To run `feast apply` from the current machine we need to define the AWS credentials to connect the MinIO S3 store, which are defined in [minio.env](./minio.env): @@ -142,7 +141,7 @@ are defined in [minio.env](./minio.env): ]' kubectl wait --for=condition=available deployment/online-server --timeout=2m ``` -1. (Optional): check logs of the server to make sure it’s working +1. (Optional): check logs of the server to make sure it's working ```bash kubectl logs svc/online-server ``` diff --git a/examples/python-helm-demo/feature_repo/feature_store.yaml.template b/examples/python-helm-demo/feature_repo/feature_store.yaml.template index 585ba23e63b..81f0fa4083d 100644 --- a/examples/python-helm-demo/feature_repo/feature_store.yaml.template +++ b/examples/python-helm-demo/feature_repo/feature_store.yaml.template @@ -6,4 +6,4 @@ online_store: connection_string: localhost:6379,password=_REDIS_PASSWORD_ offline_store: type: file -entity_key_serialization_version: 2 \ No newline at end of file +entity_key_serialization_version: 3 \ No newline at end of file diff --git a/examples/python-helm-demo/test/feature_store.yaml b/examples/python-helm-demo/test/feature_store.yaml index 13e99873ee7..a81c66f1f7f 100644 --- a/examples/python-helm-demo/test/feature_store.yaml +++ b/examples/python-helm-demo/test/feature_store.yaml @@ -4,4 +4,4 @@ provider: local online_store: path: http://localhost:6566 type: remote -entity_key_serialization_version: 2 \ No newline at end of file +entity_key_serialization_version: 3 \ No newline at end of file diff --git a/examples/rbac-local/client/feature_store.yaml b/examples/rbac-local/client/feature_store.yaml index d428adf6712..971b7098af9 100644 --- a/examples/rbac-local/client/feature_store.yaml +++ b/examples/rbac-local/client/feature_store.yaml @@ -1,4 +1,3 @@ -entity_key_serialization_version: 2 offline_store: host: localhost port: 8815 @@ -10,3 +9,4 @@ project: rbac registry: path: localhost:6570 registry_type: remote +entity_key_serialization_version: 3 \ No newline at end of file diff --git a/examples/rbac-remote/client/oidc/feature_repo/feature_store.yaml b/examples/rbac-remote/client/oidc/feature_repo/feature_store.yaml index 1454e16df92..d36762ce95e 100644 --- a/examples/rbac-remote/client/oidc/feature_repo/feature_store.yaml +++ b/examples/rbac-remote/client/oidc/feature_repo/feature_store.yaml @@ -16,4 +16,4 @@ auth: client_secret: update-this-value username: ${FEAST_USERNAME} password: ${FEAST_PASSWORD} -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 diff --git a/examples/rbac-remote/server/feature_repo/feature_store.yaml b/examples/rbac-remote/server/feature_repo/feature_store.yaml index 78b13c660bf..25267f1bece 100644 --- a/examples/rbac-remote/server/feature_repo/feature_store.yaml +++ b/examples/rbac-remote/server/feature_repo/feature_store.yaml @@ -23,4 +23,4 @@ offline_store: db_schema: public user: feast password: feast -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 diff --git a/examples/rbac-remote/server/k8s/feature_store_offline.yaml b/examples/rbac-remote/server/k8s/feature_store_offline.yaml index 4fc01508bd1..1e68df239f7 100644 --- a/examples/rbac-remote/server/k8s/feature_store_offline.yaml +++ b/examples/rbac-remote/server/k8s/feature_store_offline.yaml @@ -13,4 +13,4 @@ offline_store: password: feast auth: type: kubernetes -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 diff --git a/examples/rbac-remote/server/k8s/feature_store_online.yaml b/examples/rbac-remote/server/k8s/feature_store_online.yaml index aa167731b2e..bd5c8cf85fe 100644 --- a/examples/rbac-remote/server/k8s/feature_store_online.yaml +++ b/examples/rbac-remote/server/k8s/feature_store_online.yaml @@ -17,4 +17,4 @@ offline_store: port: 80 auth: type: kubernetes -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 diff --git a/examples/rbac-remote/server/k8s/feature_store_registry.yaml b/examples/rbac-remote/server/k8s/feature_store_registry.yaml index 579141fb010..6da7ef5d573 100644 --- a/examples/rbac-remote/server/k8s/feature_store_registry.yaml +++ b/examples/rbac-remote/server/k8s/feature_store_registry.yaml @@ -9,4 +9,4 @@ registry: pool_pre_ping: true auth: type: kubernetes -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 diff --git a/examples/rbac-remote/server/oidc/feature_store_offline.yaml b/examples/rbac-remote/server/oidc/feature_store_offline.yaml index 8ed4cc1ff3c..b6fba1a7d49 100644 --- a/examples/rbac-remote/server/oidc/feature_store_offline.yaml +++ b/examples/rbac-remote/server/oidc/feature_store_offline.yaml @@ -15,4 +15,4 @@ auth: type: oidc auth_discovery_url: https://keycloak-feast-dev.apps.com/realms/feast-rbac/.well-known/openid-configuration client_id: feast-client -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 diff --git a/examples/rbac-remote/server/oidc/feature_store_online.yaml b/examples/rbac-remote/server/oidc/feature_store_online.yaml index c47c3a0662c..e9e051d7e7b 100644 --- a/examples/rbac-remote/server/oidc/feature_store_online.yaml +++ b/examples/rbac-remote/server/oidc/feature_store_online.yaml @@ -19,4 +19,4 @@ auth: type: oidc auth_discovery_url: https://keycloak-feast-dev.apps.com/realms/feast-rbac/.well-known/openid-configuration client_id: feast-client -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 diff --git a/examples/rbac-remote/server/oidc/feature_store_registry.yaml b/examples/rbac-remote/server/oidc/feature_store_registry.yaml index a661d9dc566..1ffe9a09225 100644 --- a/examples/rbac-remote/server/oidc/feature_store_registry.yaml +++ b/examples/rbac-remote/server/oidc/feature_store_registry.yaml @@ -11,4 +11,4 @@ auth: type: oidc auth_discovery_url: https://keycloak-feast-dev.apps.com/realms/feast-rbac/.well-known/openid-configuration client_id: feast-client -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 diff --git a/examples/remote-offline-store/offline_client/feature_store.yaml b/examples/remote-offline-store/offline_client/feature_store.yaml index 24ee5d70426..5e916432b7f 100644 --- a/examples/remote-offline-store/offline_client/feature_store.yaml +++ b/examples/remote-offline-store/offline_client/feature_store.yaml @@ -7,4 +7,4 @@ offline_store: type: remote host: localhost port: 8815 -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 diff --git a/examples/remote-offline-store/offline_server/feature_repo/feature_store.yaml b/examples/remote-offline-store/offline_server/feature_repo/feature_store.yaml index a751706d07a..758cf6f460f 100644 --- a/examples/remote-offline-store/offline_server/feature_repo/feature_store.yaml +++ b/examples/remote-offline-store/offline_server/feature_repo/feature_store.yaml @@ -6,4 +6,4 @@ provider: local online_store: type: sqlite path: data/online_store.db -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 diff --git a/examples/rhoai-quickstart/feast-demo-quickstart.ipynb b/examples/rhoai-quickstart/feast-demo-quickstart.ipynb index 18874e462ec..8777798e49f 100644 --- a/examples/rhoai-quickstart/feast-demo-quickstart.ipynb +++ b/examples/rhoai-quickstart/feast-demo-quickstart.ipynb @@ -22,8 +22,8 @@ "output_type": "stream", "text": [ "\n", - "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip available: \u001B[0m\u001B[31;49m22.2.2\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m24.2\u001B[0m\n", - "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", "Note: you may need to restart the kernel to use updated packages.\n", "feast package is installed\n" ] @@ -82,7 +82,7 @@ "output_type": "stream", "text": [ "\n", - "Creating a new Feast repository in \u001B[1m\u001B[32m/opt/app-root/src/feast/examples/rhoai-quickstart/my_feast_project\u001B[0m.\n", + "Creating a new Feast repository in \u001b[1m\u001b[32m/opt/app-root/src/feast/examples/rhoai-quickstart/my_feast_project\u001b[0m.\n", "\n" ] } @@ -463,19 +463,19 @@ "09/24/2024 06:01:41 PM root WARNING: Cannot use sqlite_vec for vector search\n", "09/24/2024 06:01:41 PM root WARNING: Cannot use sqlite_vec for vector search\n", "09/24/2024 06:01:41 PM root WARNING: Cannot use sqlite_vec for vector search\n", - "Created entity \u001B[1m\u001B[32mdriver\u001B[0m\n", - "Created feature view \u001B[1m\u001B[32mdriver_hourly_stats\u001B[0m\n", - "Created feature view \u001B[1m\u001B[32mdriver_hourly_stats_fresh\u001B[0m\n", - "Created on demand feature view \u001B[1m\u001B[32mtransformed_conv_rate\u001B[0m\n", - "Created on demand feature view \u001B[1m\u001B[32mtransformed_conv_rate_fresh\u001B[0m\n", - "Created feature service \u001B[1m\u001B[32mdriver_activity_v2\u001B[0m\n", - "Created feature service \u001B[1m\u001B[32mdriver_activity_v1\u001B[0m\n", - "Created feature service \u001B[1m\u001B[32mdriver_activity_v3\u001B[0m\n", + "Created entity \u001b[1m\u001b[32mdriver\u001b[0m\n", + "Created feature view \u001b[1m\u001b[32mdriver_hourly_stats\u001b[0m\n", + "Created feature view \u001b[1m\u001b[32mdriver_hourly_stats_fresh\u001b[0m\n", + "Created on demand feature view \u001b[1m\u001b[32mtransformed_conv_rate\u001b[0m\n", + "Created on demand feature view \u001b[1m\u001b[32mtransformed_conv_rate_fresh\u001b[0m\n", + "Created feature service \u001b[1m\u001b[32mdriver_activity_v2\u001b[0m\n", + "Created feature service \u001b[1m\u001b[32mdriver_activity_v1\u001b[0m\n", + "Created feature service \u001b[1m\u001b[32mdriver_activity_v3\u001b[0m\n", "\n", "09/24/2024 06:01:41 PM root WARNING: Cannot use sqlite_vec for vector search\n", "09/24/2024 06:01:41 PM root WARNING: Cannot use sqlite_vec for vector search\n", - "Created sqlite table \u001B[1m\u001B[32mmy_feast_project_driver_hourly_stats_fresh\u001B[0m\n", - "Created sqlite table \u001B[1m\u001B[32mmy_feast_project_driver_hourly_stats\u001B[0m\n", + "Created sqlite table \u001b[1m\u001b[32mmy_feast_project_driver_hourly_stats_fresh\u001b[0m\n", + "Created sqlite table \u001b[1m\u001b[32mmy_feast_project_driver_hourly_stats\u001b[0m\n", "\n" ] } @@ -706,12 +706,12 @@ "output_type": "stream", "text": [ "09/24/2024 06:02:09 PM root WARNING: _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future.\n", - "Materializing \u001B[1m\u001B[32m2\u001B[0m feature views to \u001B[1m\u001B[32m2024-09-24 18:02:06+00:00\u001B[0m into the \u001B[1m\u001B[32msqlite\u001B[0m online store.\n", + "Materializing \u001b[1m\u001b[32m2\u001b[0m feature views to \u001b[1m\u001b[32m2024-09-24 18:02:06+00:00\u001b[0m into the \u001b[1m\u001b[32msqlite\u001b[0m online store.\n", "\n", - "\u001B[1m\u001B[32mdriver_hourly_stats\u001B[0m from \u001B[1m\u001B[32m2024-09-23 18:02:09+00:00\u001B[0m to \u001B[1m\u001B[32m2024-09-24 18:02:06+00:00\u001B[0m:\n", + "\u001b[1m\u001b[32mdriver_hourly_stats\u001b[0m from \u001b[1m\u001b[32m2024-09-23 18:02:09+00:00\u001b[0m to \u001b[1m\u001b[32m2024-09-24 18:02:06+00:00\u001b[0m:\n", " 0%| | 0/5 [00:00 Tuple[bytes, int]: if value_type == "string_val": return v.string_val.encode("utf8"), ValueType.STRING @@ -41,7 +42,9 @@ def _deserialize_value(value_type, value_bytes) -> ValueProto: raise ValueError(f"Unsupported value type: {value_type}") -def serialize_entity_key_prefix(entity_keys: List[str]) -> bytes: +def serialize_entity_key_prefix( + entity_keys: List[str], entity_key_serialization_version: int = 3 +) -> bytes: """ Serialize keys to a bytestring, so it can be used to prefix-scan through items stored in the online store using serialize_entity_key. @@ -51,8 +54,12 @@ def serialize_entity_key_prefix(entity_keys: List[str]) -> bytes: """ sorted_keys = sorted(entity_keys) output: List[bytes] = [] + if entity_key_serialization_version > 2: + output.append(struct.pack(" 2: + output.append(struct.pack(" bytes: """ Serialize entity key to a bytestring so it can be used as a lookup key in a hash table. @@ -117,13 +124,19 @@ def serialize_entity_key( Args: entity_key_serialization_version: version of the entity key serialization - version 1: int64 values are serialized as 4 bytes - version 2: int64 values are serialized as 8 bytes + Versions: version 3: entity_key size is added to the serialization for deserialization purposes entity_key: EntityKeyProto Returns: bytes of the serialized entity key """ + if entity_key_serialization_version < 3: + # Not raising the error, keeping it in warning state for reserialization purpose + # We should remove this after few releases + warnings.warn( + "Serialization of entity key with version < 3 is removed. Please use version 3 by setting entity_key_serialization_version=3." + "To reserializa your online store featrues refer - https://github.com/feast-dev/feast/blob/master/docs/how-to-guides/entity-reserialization-of-from-v2-to-v3.md" + ) sorted_keys, sorted_values = zip( *sorted(zip(entity_key.join_keys, entity_key.entity_values)) ) @@ -163,9 +176,12 @@ def deserialize_entity_key( Returns: EntityKeyProto """ - if entity_key_serialization_version <= 2: - raise ValueError( - "Deserialization of entity key with version <= 2 is not supported. Please use version > 2 by setting entity_key_serialization_version=3" + if entity_key_serialization_version < 3: + # Not raising the error, keeping it in warning state for reserialization purpose + # We should remove this after few releases + warnings.warn( + "Deserialization of entity key with version < 3 is removed. Please use version 3 by setting entity_key_serialization_version=3." + "To reserializa your online store featrues refer - https://github.com/feast-dev/feast/blob/master/docs/how-to-guides/entity-reserialization-of-from-v2-to-v3.md" ) offset = 0 keys = [] diff --git a/sdk/python/feast/infra/online_stores/couchbase_online_store/README.md b/sdk/python/feast/infra/online_stores/couchbase_online_store/README.md index 8f95884fe03..885250a66af 100644 --- a/sdk/python/feast/infra/online_stores/couchbase_online_store/README.md +++ b/sdk/python/feast/infra/online_stores/couchbase_online_store/README.md @@ -41,7 +41,7 @@ online_store: password: password # Couchbase password from access credentials bucket_name: feast # Couchbase bucket name, defaults to feast kv_port: 11210 # Couchbase key-value port, defaults to 11210. Required if custom ports are used. -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 ``` #### Apply the feature definitions in [`example.py`](https://github.com/feast-dev/feast/blob/master/go/internal/test/feature_repo/example.py) diff --git a/sdk/python/feast/infra/online_stores/hazelcast_online_store/hazelcast_online_store.py b/sdk/python/feast/infra/online_stores/hazelcast_online_store/hazelcast_online_store.py index c56d394c21a..21359b45bca 100644 --- a/sdk/python/feast/infra/online_stores/hazelcast_online_store/hazelcast_online_store.py +++ b/sdk/python/feast/infra/online_stores/hazelcast_online_store/hazelcast_online_store.py @@ -163,7 +163,7 @@ def online_write_batch( entity_key_str = base64.b64encode( serialize_entity_key( entity_key, - entity_key_serialization_version=2, + entity_key_serialization_version=3, ) ).decode("utf-8") event_ts_utc = event_ts.astimezone(tz=timezone.utc).timestamp() @@ -214,7 +214,7 @@ def online_read( entity_key_str = base64.b64encode( serialize_entity_key( entity_key, - entity_key_serialization_version=2, + entity_key_serialization_version=3, ) ).decode("utf-8") if requested_features: diff --git a/sdk/python/feast/infra/online_stores/helpers.py b/sdk/python/feast/infra/online_stores/helpers.py index 409a1eb2126..b657bd44d00 100644 --- a/sdk/python/feast/infra/online_stores/helpers.py +++ b/sdk/python/feast/infra/online_stores/helpers.py @@ -23,7 +23,7 @@ def get_online_store_from_config(online_store_config: Any) -> OnlineStore: def _redis_key( - project: str, entity_key: EntityKeyProto, entity_key_serialization_version=1 + project: str, entity_key: EntityKeyProto, entity_key_serialization_version=3 ) -> bytes: key: List[bytes] = [ serialize_entity_key( @@ -50,7 +50,7 @@ def _mmh3(key: str): def compute_entity_id( - entity_key: EntityKeyProto, entity_key_serialization_version=1 + entity_key: EntityKeyProto, entity_key_serialization_version=3 ) -> str: """ Compute Entity id given Feast Entity Key for online stores. diff --git a/sdk/python/feast/infra/online_stores/mysql_online_store/mysql.py b/sdk/python/feast/infra/online_stores/mysql_online_store/mysql.py index 64111ca42c1..d44eddfbd0b 100644 --- a/sdk/python/feast/infra/online_stores/mysql_online_store/mysql.py +++ b/sdk/python/feast/infra/online_stores/mysql_online_store/mysql.py @@ -72,7 +72,7 @@ def online_write_batch( for entity_key, values, timestamp, created_ts in data: entity_key_bin = serialize_entity_key( entity_key, - entity_key_serialization_version=2, + entity_key_serialization_version=3, ).hex() timestamp = to_naive_utc(timestamp) if created_ts is not None: @@ -137,7 +137,7 @@ def online_read( for entity_key in entity_keys: entity_key_bin = serialize_entity_key( entity_key, - entity_key_serialization_version=2, + entity_key_serialization_version=3, ).hex() cur.execute( diff --git a/sdk/python/feast/infra/online_stores/singlestore_online_store/singlestore.py b/sdk/python/feast/infra/online_stores/singlestore_online_store/singlestore.py index a1535589542..eb598ec5e7a 100644 --- a/sdk/python/feast/infra/online_stores/singlestore_online_store/singlestore.py +++ b/sdk/python/feast/infra/online_stores/singlestore_online_store/singlestore.py @@ -80,7 +80,7 @@ def online_write_batch( for entity_key, values, timestamp, created_ts in data: entity_key_bin = serialize_entity_key( entity_key, - entity_key_serialization_version=2, + entity_key_serialization_version=3, ).hex() timestamp = _to_naive_utc(timestamp) if created_ts is not None: @@ -130,7 +130,7 @@ def online_read( keys.append( serialize_entity_key( entity_key, - entity_key_serialization_version=2, + entity_key_serialization_version=3, ).hex() ) diff --git a/sdk/python/feast/infra/online_stores/sqlite.py b/sdk/python/feast/infra/online_stores/sqlite.py index 07180fe75ed..461277631f5 100644 --- a/sdk/python/feast/infra/online_stores/sqlite.py +++ b/sdk/python/feast/infra/online_stores/sqlite.py @@ -111,7 +111,6 @@ class SqliteOnlineStoreConfig(FeastConfigBaseModel, VectorStoreConfig): path: StrictStr = "data/online.db" """ (optional) Path to sqlite db """ - vector_enabled: bool = False text_search_enabled: bool = False diff --git a/sdk/python/feast/infra/utils/hbase_utils.py b/sdk/python/feast/infra/utils/hbase_utils.py index 72afda2ef3d..d26587cc6a8 100644 --- a/sdk/python/feast/infra/utils/hbase_utils.py +++ b/sdk/python/feast/infra/utils/hbase_utils.py @@ -193,19 +193,19 @@ def main(): EntityKey( join_keys=["driver_id"], entity_values=[Value(int64_val=1004)] ), - entity_key_serialization_version=2, + entity_key_serialization_version=3, ).hex(), serialize_entity_key( EntityKey( join_keys=["driver_id"], entity_values=[Value(int64_val=1005)] ), - entity_key_serialization_version=2, + entity_key_serialization_version=3, ).hex(), serialize_entity_key( EntityKey( join_keys=["driver_id"], entity_values=[Value(int64_val=1024)] ), - entity_key_serialization_version=2, + entity_key_serialization_version=3, ).hex(), ] rows = table.rows(row_keys) diff --git a/sdk/python/feast/infra/utils/snowflake/snowpark/snowflake_udfs.py b/sdk/python/feast/infra/utils/snowflake/snowpark/snowflake_udfs.py index ebba3e9b84e..277d8e18946 100644 --- a/sdk/python/feast/infra/utils/snowflake/snowpark/snowflake_udfs.py +++ b/sdk/python/feast/infra/utils/snowflake/snowpark/snowflake_udfs.py @@ -394,7 +394,7 @@ def feast_serialize_entity_keys(df): join_keys=join_keys, entity_values=[proto_values_by_column[k][idx] for k in join_keys], ), - entity_key_serialization_version=2, + entity_key_serialization_version=3, ) for idx in range(df.shape[0]) ] diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index d172b80f076..41d4971ea4e 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -215,16 +215,12 @@ class RepoConfig(FeastBaseModel): repo_path: Optional[Path] = None """When using relative path in FileSource path, this parameter is mandatory""" - entity_key_serialization_version: StrictInt = 1 + entity_key_serialization_version: StrictInt = 3 """ Entity key serialization version: This version is used to control what serialization scheme is used when writing data to the online store. - A value <= 1 uses the serialization scheme used by feast up to Feast 0.22. - A value of 2 uses a newer serialization scheme, supported as of Feast 0.23. A value of 3 uses the latest serialization scheme, supported as of Feast 0.38. - The main difference between the three schema is that - v1: the serialization scheme v1 stored `long` values as `int`s, which would result in errors trying to serialize a range of values. - v2: fixes this error, but v1 is kept around to ensure backwards compatibility - specifically the ability to read - feature values for entities that have already been written into the online store. + + Version Schemas: v3: add entity_key value length to serialized bytes to enable deserialization, which can be used in retrieval of entity_key in document retrieval. """ @@ -266,9 +262,9 @@ def __init__(self, **data: Any): self.feature_server["type"] )(**self.feature_server) - if self.entity_key_serialization_version <= 2: + if self.entity_key_serialization_version < 3: warnings.warn( - "The serialization version 2 and below will be deprecated in the next release. " + "The serialization version below 3 are deprecated. " "Specifying `entity_key_serialization_version` to 3 is recommended.", DeprecationWarning, ) diff --git a/sdk/python/feast/templates/athena/feature_repo/feature_store.yaml b/sdk/python/feast/templates/athena/feature_repo/feature_store.yaml index bd12e906d1f..a3e74083f5e 100644 --- a/sdk/python/feast/templates/athena/feature_repo/feature_store.yaml +++ b/sdk/python/feast/templates/athena/feature_repo/feature_store.yaml @@ -11,4 +11,4 @@ offline_store: data_source: AwsDataCatalog s3_staging_location: s3://{S3 bucket to be used by Feast} workgroup: {Workgroup for Athena} -entity_key_serialization_version: 2 \ No newline at end of file +entity_key_serialization_version: 3 \ No newline at end of file diff --git a/sdk/python/feast/templates/aws/feature_repo/feature_store.yaml b/sdk/python/feast/templates/aws/feature_repo/feature_store.yaml index c29496711bb..27fb0d6dfbe 100644 --- a/sdk/python/feast/templates/aws/feature_repo/feature_store.yaml +++ b/sdk/python/feast/templates/aws/feature_repo/feature_store.yaml @@ -25,4 +25,4 @@ offline_store: user: %REDSHIFT_USER% s3_staging_location: %REDSHIFT_S3_STAGING_LOCATION% iam_role: %REDSHIFT_IAM_ROLE% -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 diff --git a/sdk/python/feast/templates/cassandra/feature_repo/feature_store.yaml b/sdk/python/feast/templates/cassandra/feature_repo/feature_store.yaml index ce50275554b..9c6263c7e45 100644 --- a/sdk/python/feast/templates/cassandra/feature_repo/feature_store.yaml +++ b/sdk/python/feast/templates/cassandra/feature_repo/feature_store.yaml @@ -18,4 +18,4 @@ online_store: load_balancing_policy: c_load_balancing_policy read_concurrency: c_r_concurrency write_concurrency: c_w_concurrency -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 diff --git a/sdk/python/feast/templates/couchbase/feature_repo/feature_store.yaml b/sdk/python/feast/templates/couchbase/feature_repo/feature_store.yaml index 96f45934eb5..69a2577951f 100644 --- a/sdk/python/feast/templates/couchbase/feature_repo/feature_store.yaml +++ b/sdk/python/feast/templates/couchbase/feature_repo/feature_store.yaml @@ -14,4 +14,4 @@ offline_store: user: COUCHBASE_COLUMNAR_USER # Couchbase cluster access name from Settings > Access Control page in Capella Columnar console password: COUCHBASE_COLUMNAR_PASSWORD # Couchbase password from Settings > Access Control page in Capella Columnar console timeout: COUCHBASE_COLUMNAR_TIMEOUT # Timeout in seconds for Columnar operations, optional -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 diff --git a/sdk/python/feast/templates/gcp/feature_repo/feature_store.yaml b/sdk/python/feast/templates/gcp/feature_repo/feature_store.yaml index e3d9d1d1e61..e24093c9745 100644 --- a/sdk/python/feast/templates/gcp/feature_repo/feature_store.yaml +++ b/sdk/python/feast/templates/gcp/feature_repo/feature_store.yaml @@ -22,4 +22,4 @@ online_store: #online_store: # type: redis # connection_string: "localhost:6379" -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 diff --git a/sdk/python/feast/templates/hazelcast/feature_repo/feature_store.yaml b/sdk/python/feast/templates/hazelcast/feature_repo/feature_store.yaml index e26d1bf7506..a7c687f6455 100644 --- a/sdk/python/feast/templates/hazelcast/feature_repo/feature_store.yaml +++ b/sdk/python/feast/templates/hazelcast/feature_repo/feature_store.yaml @@ -11,4 +11,4 @@ online_store: ssl_keyfile_path: c_key_path ssl_password: ${SSL_PASSWORD} # This value will be read form the `SSL_PASSWORD` environment variable. key_ttl_seconds: c_ttl_seconds -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 diff --git a/sdk/python/feast/templates/hbase/feature_repo/feature_store.yaml b/sdk/python/feast/templates/hbase/feature_repo/feature_store.yaml index bde49486ad3..028c48f029b 100644 --- a/sdk/python/feast/templates/hbase/feature_repo/feature_store.yaml +++ b/sdk/python/feast/templates/hbase/feature_repo/feature_store.yaml @@ -7,4 +7,4 @@ online_store: type: hbase host: 127.0.0.1 port: 9090 -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 diff --git a/sdk/python/feast/templates/local/feature_repo/feature_store.yaml b/sdk/python/feast/templates/local/feature_repo/feature_store.yaml index 11b339583e0..356e706c638 100644 --- a/sdk/python/feast/templates/local/feature_repo/feature_store.yaml +++ b/sdk/python/feast/templates/local/feature_repo/feature_store.yaml @@ -6,7 +6,7 @@ provider: local online_store: type: sqlite path: data/online_store.db -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 # By default, no_auth for authentication and authorization, other possible values kubernetes and oidc. Refer the documentation for more details. auth: type: no_auth diff --git a/sdk/python/feast/templates/minimal/feature_repo/feature_store.yaml b/sdk/python/feast/templates/minimal/feature_repo/feature_store.yaml index 45a0ce77186..244a3b0f453 100644 --- a/sdk/python/feast/templates/minimal/feature_repo/feature_store.yaml +++ b/sdk/python/feast/templates/minimal/feature_repo/feature_store.yaml @@ -3,4 +3,4 @@ registry: /path/to/registry.db provider: local online_store: path: /path/to/online_store.db -entity_key_serialization_version: 2 \ No newline at end of file +entity_key_serialization_version: 3 \ No newline at end of file diff --git a/sdk/python/feast/templates/postgres/feature_repo/feature_store.yaml b/sdk/python/feast/templates/postgres/feature_repo/feature_store.yaml index f14510f820e..0663ff0ad97 100644 --- a/sdk/python/feast/templates/postgres/feature_repo/feature_store.yaml +++ b/sdk/python/feast/templates/postgres/feature_repo/feature_store.yaml @@ -23,4 +23,4 @@ offline_store: db_schema: DB_SCHEMA user: DB_USERNAME password: DB_PASSWORD -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 diff --git a/sdk/python/feast/templates/snowflake/feature_repo/feature_store.yaml b/sdk/python/feast/templates/snowflake/feature_repo/feature_store.yaml index 104e6394c6b..a7de457a2f2 100644 --- a/sdk/python/feast/templates/snowflake/feature_repo/feature_store.yaml +++ b/sdk/python/feast/templates/snowflake/feature_repo/feature_store.yaml @@ -25,4 +25,4 @@ online_store: role: SNOWFLAKE_ROLE warehouse: SNOWFLAKE_WAREHOUSE database: SNOWFLAKE_DATABASE -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 diff --git a/sdk/python/feast/templates/spark/feature_repo/feature_store.yaml b/sdk/python/feast/templates/spark/feature_repo/feature_store.yaml index 08383a29e13..873ce113f71 100644 --- a/sdk/python/feast/templates/spark/feature_repo/feature_store.yaml +++ b/sdk/python/feast/templates/spark/feature_repo/feature_store.yaml @@ -16,4 +16,4 @@ offline_store: spark.sql.execution.arrow.pyspark.enabled: "true" online_store: path: data/online_store.db -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 diff --git a/sdk/python/feast/templates/spark/feature_store.yaml b/sdk/python/feast/templates/spark/feature_store.yaml index 91e3ecf4724..230d2c23d9f 100644 --- a/sdk/python/feast/templates/spark/feature_store.yaml +++ b/sdk/python/feast/templates/spark/feature_store.yaml @@ -12,4 +12,4 @@ offline_store: spark.sql.session.timeZone: "UTC" online_store: path: data/online_store.db -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 diff --git a/sdk/python/tests/integration/compute_engines/spark/test_compute.py b/sdk/python/tests/integration/compute_engines/spark/test_compute.py index 5254db1e690..3d44a130d64 100644 --- a/sdk/python/tests/integration/compute_engines/spark/test_compute.py +++ b/sdk/python/tests/integration/compute_engines/spark/test_compute.py @@ -109,7 +109,7 @@ def create_spark_environment(): batch_engine={"type": "spark.engine", "partitions": 10}, ) spark_environment = construct_test_environment( - spark_config, None, entity_key_serialization_version=2 + spark_config, None, entity_key_serialization_version=3 ) spark_environment.setup() return spark_environment diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py index b851261e819..24e611c4f33 100644 --- a/sdk/python/tests/integration/feature_repos/repo_configuration.py +++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py @@ -528,7 +528,7 @@ def construct_test_environment( fixture_request: Optional[pytest.FixtureRequest], test_suite_name: str = "integration_test", worker_id: str = "worker_id", - entity_key_serialization_version: int = 2, + entity_key_serialization_version: int = 3, ) -> Environment: _uuid = str(uuid.uuid4()).replace("-", "")[:6] @@ -565,14 +565,6 @@ def construct_test_environment( cache_ttl_seconds=1, ) - online_store = ( - test_repo_config.online_store.get("type") - if isinstance(test_repo_config.online_store, dict) - else test_repo_config.online_store - ) - if online_store in ["milvus", "pgvector", "qdrant", "elasticsearch"]: - entity_key_serialization_version = 3 - environment_params = { "name": project, "provider": test_repo_config.provider, diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py index 6f6e5d68133..7e6334b1b88 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py @@ -385,7 +385,7 @@ def setup(self, registry: RegistryConfig): provider="local", offline_store=parent_offline_config, registry=registry.path, - entity_key_serialization_version=2, + entity_key_serialization_version=3, ) repo_path = Path(tempfile.mkdtemp()) @@ -435,7 +435,7 @@ def setup(self, registry: RegistryConfig): provider="local", offline_store=parent_offline_config, registry=registry.path, - entity_key_serialization_version=2, + entity_key_serialization_version=3, ) certificates_path = tempfile.mkdtemp() @@ -536,7 +536,7 @@ def setup(self, registry: RegistryConfig): provider="local", offline_store=parent_offline_config, registry=registry.path, - entity_key_serialization_version=2, + entity_key_serialization_version=3, ) repo_base_path = Path(tempfile.mkdtemp()) diff --git a/sdk/python/tests/integration/materialization/contrib/spark/test_spark_materialization_engine.py b/sdk/python/tests/integration/materialization/contrib/spark/test_spark_materialization_engine.py index ae0e03c9441..03f942c2f96 100644 --- a/sdk/python/tests/integration/materialization/contrib/spark/test_spark_materialization_engine.py +++ b/sdk/python/tests/integration/materialization/contrib/spark/test_spark_materialization_engine.py @@ -31,7 +31,7 @@ def test_spark_materialization_consistency(): batch_engine={"type": "spark.engine", "partitions": 10}, ) spark_environment = construct_test_environment( - spark_config, None, entity_key_serialization_version=2 + spark_config, None, entity_key_serialization_version=3 ) spark_environment.setup() diff --git a/sdk/python/tests/integration/materialization/test_lambda.py b/sdk/python/tests/integration/materialization/test_lambda.py index 07ab93e2e13..f0c1e108694 100644 --- a/sdk/python/tests/integration/materialization/test_lambda.py +++ b/sdk/python/tests/integration/materialization/test_lambda.py @@ -38,9 +38,8 @@ def test_lambda_materialization_consistency(): }, registry_location=RegistryLocation.S3, ) - # TODO(adchia): figure out why entity_key_serialization_version 2 breaks with this test lambda_environment = construct_test_environment( - lambda_config, None, entity_key_serialization_version=1 + lambda_config, None, entity_key_serialization_version=3 ) df = create_basic_driver_dataset() diff --git a/sdk/python/tests/integration/online_store/test_remote_online_store.py b/sdk/python/tests/integration/online_store/test_remote_online_store.py index 3b5b707dcb7..80166abf431 100644 --- a/sdk/python/tests/integration/online_store/test_remote_online_store.py +++ b/sdk/python/tests/integration/online_store/test_remote_online_store.py @@ -260,7 +260,7 @@ def _overwrite_remote_client_feature_store_yaml( ): repo_config = os.path.join(repo_path, "feature_store.yaml") - config_content = "entity_key_serialization_version: 2\n" + auth_config + config_content = "entity_key_serialization_version: 3\n" + auth_config config_content += dedent( f""" project: {PROJECT_NAME} diff --git a/sdk/python/tests/integration/registration/test_feature_store.py b/sdk/python/tests/integration/registration/test_feature_store.py index d7ffb83059b..b59af900190 100644 --- a/sdk/python/tests/integration/registration/test_feature_store.py +++ b/sdk/python/tests/integration/registration/test_feature_store.py @@ -72,6 +72,6 @@ def feature_store_with_local_registry(): project="default", provider="local", online_store=SqliteOnlineStoreConfig(path=online_store_path), - entity_key_serialization_version=2, + entity_key_serialization_version=3, ) ) diff --git a/sdk/python/tests/integration/registration/test_inference.py b/sdk/python/tests/integration/registration/test_inference.py index 9f490d7f4e2..7f053c46224 100644 --- a/sdk/python/tests/integration/registration/test_inference.py +++ b/sdk/python/tests/integration/registration/test_inference.py @@ -23,7 +23,7 @@ def test_update_file_data_source_with_inferred_event_timestamp_col(simple_datase provider="local", project="test", registry="test.pb", - entity_key_serialization_version=2, + entity_key_serialization_version=3, ), ) actual_event_timestamp_cols = [ @@ -41,7 +41,7 @@ def test_update_file_data_source_with_inferred_event_timestamp_col(simple_datase provider="local", project="test", registry="test.pb", - entity_key_serialization_version=2, + entity_key_serialization_version=3, ), ) @@ -62,7 +62,7 @@ def test_update_data_sources_with_inferred_event_timestamp_col(universal_data_so provider="local", project="test", registry="test.pb", - entity_key_serialization_version=2, + entity_key_serialization_version=3, ), ) actual_event_timestamp_cols = [ diff --git a/sdk/python/tests/unit/cli/test_cli.py b/sdk/python/tests/unit/cli/test_cli.py index 0dd51e9ce68..c8649f5cfb5 100644 --- a/sdk/python/tests/unit/cli/test_cli.py +++ b/sdk/python/tests/unit/cli/test_cli.py @@ -121,7 +121,7 @@ def setup_third_party_provider_repo(provider_name: str): type: sqlite offline_store: type: file - entity_key_serialization_version: 2 + entity_key_serialization_version: 3 """ ) ) @@ -158,7 +158,7 @@ def setup_third_party_registry_store_repo( type: sqlite offline_store: type: file - entity_key_serialization_version: 2 + entity_key_serialization_version: 3 """ ) ) @@ -189,4 +189,4 @@ def test_cli_configuration(): assertpy.assert_that(output).contains(b"type: sqlite") assertpy.assert_that(output).contains(b"path: data/online_store.db") assertpy.assert_that(output).contains(b"type: file") - assertpy.assert_that(output).contains(b"entity_key_serialization_version: 2") + assertpy.assert_that(output).contains(b"entity_key_serialization_version: 3") diff --git a/sdk/python/tests/unit/infra/offline_stores/test_redshift.py b/sdk/python/tests/unit/infra/offline_stores/test_redshift.py index a9ed4c2b59f..dc4caccc8f9 100644 --- a/sdk/python/tests/unit/infra/offline_stores/test_redshift.py +++ b/sdk/python/tests/unit/infra/offline_stores/test_redshift.py @@ -33,7 +33,7 @@ def test_offline_write_batch( s3_staging_location="s3://bucket/path", workgroup="", ), - entity_key_serialization_version=2, + entity_key_serialization_version=3, ) batch_source = RedshiftSource( diff --git a/sdk/python/tests/unit/infra/offline_stores/test_snowflake.py b/sdk/python/tests/unit/infra/offline_stores/test_snowflake.py index d692d0f957a..0a4c1df9b1c 100644 --- a/sdk/python/tests/unit/infra/offline_stores/test_snowflake.py +++ b/sdk/python/tests/unit/infra/offline_stores/test_snowflake.py @@ -38,7 +38,7 @@ def retrieval_job(request): provider="snowflake.offline", online_store=SqliteOnlineStoreConfig(type="sqlite"), offline_store=offline_store_config, - entity_key_serialization_version=2, + entity_key_serialization_version=3, ), full_feature_names=True, on_demand_feature_views=[], diff --git a/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py b/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py index 527d3f16181..1fd032bbd54 100644 --- a/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py +++ b/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py @@ -49,7 +49,7 @@ def repo_config(): online_store=DynamoDBOnlineStoreConfig(region=REGION), # online_store={"type": "dynamodb", "region": REGION}, offline_store=DaskOfflineStoreConfig(), - entity_key_serialization_version=2, + entity_key_serialization_version=3, ) diff --git a/sdk/python/tests/unit/infra/online_store/test_redis.py b/sdk/python/tests/unit/infra/online_store/test_redis.py index c26c2f25c5f..83c8d3d61e4 100644 --- a/sdk/python/tests/unit/infra/online_store/test_redis.py +++ b/sdk/python/tests/unit/infra/online_store/test_redis.py @@ -18,7 +18,7 @@ def repo_config(): return RepoConfig( provider="local", project="test", - entity_key_serialization_version=2, + entity_key_serialization_version=3, registry="dummy_registry.db", ) @@ -49,7 +49,7 @@ def test_generate_entity_redis_keys(redis_online_store: RedisOnlineStore, repo_c repo_config, entity_keys ) expected = [ - b"\x02\x00\x00\x00entity\x03\x00\x00\x00\x04\x00\x00\x00\x01\x00\x00\x00test" + b"\x01\x00\x00\x00\x02\x00\x00\x00\x06\x00\x00\x00entity\x03\x00\x00\x00\x04\x00\x00\x00\x01\x00\x00\x00test" ] assert actual == expected diff --git a/sdk/python/tests/unit/infra/scaffolding/test_repo_config.py b/sdk/python/tests/unit/infra/scaffolding/test_repo_config.py index 9dcf7e4caf6..bd6283e2d7b 100644 --- a/sdk/python/tests/unit/infra/scaffolding/test_repo_config.py +++ b/sdk/python/tests/unit/infra/scaffolding/test_repo_config.py @@ -48,7 +48,7 @@ def test_nullable_online_store_local(): registry: "registry.db" provider: local online_store: null - entity_key_serialization_version: 2 + entity_key_serialization_version: 3 """ ), expect_error=None, @@ -62,7 +62,7 @@ def test_local_config(): project: foo registry: "registry.db" provider: local - entity_key_serialization_version: 2 + entity_key_serialization_version: 3 """ ), expect_error=None, @@ -78,7 +78,7 @@ def test_local_config_with_full_online_class(): provider: local online_store: type: feast.infra.online_stores.sqlite.SqliteOnlineStore - entity_key_serialization_version: 2 + entity_key_serialization_version: 3 """ ), expect_error=None, @@ -94,7 +94,7 @@ def test_local_config_with_full_online_class_directly(): registry: "registry.db" provider: local online_store: feast.infra.online_stores.sqlite.SqliteOnlineStore - entity_key_serialization_version: 2 + entity_key_serialization_version: 3 """ ), expect_error=None, @@ -128,7 +128,7 @@ def test_no_online_store_type(): provider: local online_store: path: "blah" - entity_key_serialization_version: 2 + entity_key_serialization_version: 3 """ ), expect_error=None, @@ -158,7 +158,7 @@ def test_no_project(): provider: local online_store: path: foo - entity_key_serialization_version: 2 + entity_key_serialization_version: 3 """ ), expect_error="1 validation error for RepoConfig\nproject\n Field required", @@ -197,7 +197,7 @@ def test_no_provider(): registry: "registry.db" online_store: path: "blah" - entity_key_serialization_version: 2 + entity_key_serialization_version: 3 """ ), expect_error=None, @@ -219,7 +219,7 @@ def test_auth_config(): provider: local online_store: path: foo - entity_key_serialization_version: 2 + entity_key_serialization_version: 3 """ ), expect_error="missing authentication type", @@ -240,7 +240,7 @@ def test_auth_config(): provider: local online_store: path: foo - entity_key_serialization_version: 2 + entity_key_serialization_version: 3 """ ), expect_error="invalid authentication type=not_valid_auth_type", @@ -258,7 +258,7 @@ def test_auth_config(): provider: local online_store: path: foo - entity_key_serialization_version: 2 + entity_key_serialization_version: 3 """ ), expect_error=None, @@ -286,7 +286,7 @@ def test_auth_config(): provider: local online_store: path: foo - entity_key_serialization_version: 2 + entity_key_serialization_version: 3 """ ), expect_error=None, @@ -310,7 +310,7 @@ def test_auth_config(): provider: local online_store: path: foo - entity_key_serialization_version: 2 + entity_key_serialization_version: 3 """ ), expect_error=None, @@ -328,7 +328,7 @@ def test_auth_config(): provider: local online_store: path: foo - entity_key_serialization_version: 2 + entity_key_serialization_version: 3 """ ), expect_error=None, diff --git a/sdk/python/tests/unit/infra/test_inference_unit_tests.py b/sdk/python/tests/unit/infra/test_inference_unit_tests.py index 951f7033d23..f1aef20d113 100644 --- a/sdk/python/tests/unit/infra/test_inference_unit_tests.py +++ b/sdk/python/tests/unit/infra/test_inference_unit_tests.py @@ -243,7 +243,7 @@ def test_feature_view_inference_respects_basic_inference(): config = RepoConfig( provider="local", project="test", - entity_key_serialization_version=2, + entity_key_serialization_version=3, registry="dummy_registry.pb", ) provider = get_provider(config) @@ -264,7 +264,7 @@ def test_feature_view_inference_respects_basic_inference(): config = RepoConfig( provider="local", project="test", - entity_key_serialization_version=2, + entity_key_serialization_version=3, registry="dummy_registry.pb", ) provider = get_provider(config) @@ -301,7 +301,7 @@ def test_feature_view_inference_on_entity_value_types(): config = RepoConfig( provider="local", project="test", - entity_key_serialization_version=2, + entity_key_serialization_version=3, registry="dummy_registry.pb", ) provider = get_provider(config) @@ -377,7 +377,7 @@ def test_feature_view_inference_on_entity_columns(simple_dataset_1): config = RepoConfig( provider="local", project="test", - entity_key_serialization_version=2, + entity_key_serialization_version=3, registry="dummy_registry.pb", ) provider = get_provider(config) @@ -418,7 +418,7 @@ def test_feature_view_inference_on_feature_columns(simple_dataset_1): config = RepoConfig( provider="local", project="test", - entity_key_serialization_version=2, + entity_key_serialization_version=3, registry="dummy_registry.pb", ) provider = get_provider(config) @@ -476,7 +476,7 @@ def test_update_feature_services_with_inferred_features(simple_dataset_1): config = RepoConfig( provider="local", project="test", - entity_key_serialization_version=2, + entity_key_serialization_version=3, registry="dummy_registry.pb", ) provider = get_provider(config) @@ -539,7 +539,7 @@ def test_update_feature_services_with_specified_features(simple_dataset_1): config = RepoConfig( provider="local", project="test", - entity_key_serialization_version=2, + entity_key_serialization_version=3, registry="dummy_registry.pb", ) provider = get_provider(config) diff --git a/sdk/python/tests/unit/infra/test_key_encoding_utils.py b/sdk/python/tests/unit/infra/test_key_encoding_utils.py index bad94534bdb..4c82945fe88 100644 --- a/sdk/python/tests/unit/infra/test_key_encoding_utils.py +++ b/sdk/python/tests/unit/infra/test_key_encoding_utils.py @@ -1,5 +1,3 @@ -import pytest - from feast.infra.key_encoding_utils import ( _deserialize_value, _serialize_val, @@ -18,24 +16,16 @@ def test_serialize_entity_key(): EntityKeyProto( join_keys=["user"], entity_values=[ValueProto(int64_val=int(2**15))] ), - entity_key_serialization_version=2, + entity_key_serialization_version=3, ) # True int64, but should also be fine. serialize_entity_key( EntityKeyProto( join_keys=["user"], entity_values=[ValueProto(int64_val=int(2**31))] ), - entity_key_serialization_version=2, + entity_key_serialization_version=3, ) - # Old serialization scheme, should fail. - with pytest.raises(BaseException): - serialize_entity_key( - EntityKeyProto( - join_keys=["user"], entity_values=[ValueProto(int64_val=int(2**31))] - ), - ) - def test_deserialize_entity_key(): serialized_entity_key = serialize_entity_key( @@ -84,23 +74,9 @@ def test_serialize_value(): assert t == ValueType.INT32 assert v == b"\x01\x00\x00\x00" - # default entity_key_serialization_version is 1, so the result should be 4 bytes + # Default entity_key_serialization_version is 3, so result is of 8 bytes v, t = _serialize_val("int64_val", ValueProto(int64_val=1)) assert t == ValueType.INT64 - assert v == b"\x01\x00\x00\x00" - - # current entity_key_serialization_version is 2, so the result should be 8 bytes - v, t = _serialize_val( - "int64_val", ValueProto(int64_val=1), entity_key_serialization_version=2 - ) - assert t == ValueType.INT64 - assert v == b"\x01\x00\x00\x00\x00\x00\x00\x00" - - # new entity_key_serialization_version is 3, the result should be same as version 2 - v, t = _serialize_val( - "int64_val", ValueProto(int64_val=1), entity_key_serialization_version=3 - ) - assert t == ValueType.INT64 assert v == b"\x01\x00\x00\x00\x00\x00\x00\x00" diff --git a/sdk/python/tests/unit/local_feast_tests/test_local_feature_store.py b/sdk/python/tests/unit/local_feast_tests/test_local_feature_store.py index 931acfb3919..07b6fe093de 100644 --- a/sdk/python/tests/unit/local_feast_tests/test_local_feature_store.py +++ b/sdk/python/tests/unit/local_feast_tests/test_local_feature_store.py @@ -742,6 +742,6 @@ def feature_store_with_local_registry(): project="default", provider="local", online_store=SqliteOnlineStoreConfig(path=online_store_path), - entity_key_serialization_version=2, + entity_key_serialization_version=3, ) ) diff --git a/sdk/python/tests/unit/online_store/test_online_retrieval.py b/sdk/python/tests/unit/online_store/test_online_retrieval.py index 0615a4ca9d1..35f1e0a3238 100644 --- a/sdk/python/tests/unit/online_store/test_online_retrieval.py +++ b/sdk/python/tests/unit/online_store/test_online_retrieval.py @@ -224,7 +224,7 @@ def test_get_online_features() -> None: online_store=store.config.online_store, project=store.project, provider=store.config.provider, - entity_key_serialization_version=2, + entity_key_serialization_version=3, ) ) @@ -287,7 +287,7 @@ def test_get_online_features() -> None: online_store=store.config.online_store, project=store.project, provider=store.config.provider, - entity_key_serialization_version=2, + entity_key_serialization_version=3, ) ) diff --git a/sdk/python/tests/unit/online_store/test_online_writes.py b/sdk/python/tests/unit/online_store/test_online_writes.py index 5d573bf6b92..9c6f5c8af10 100644 --- a/sdk/python/tests/unit/online_store/test_online_writes.py +++ b/sdk/python/tests/unit/online_store/test_online_writes.py @@ -43,7 +43,7 @@ def setUp(self): project="test_write_to_online_store", registry=os.path.join(data_dir, "registry.db"), provider="local", - entity_key_serialization_version=2, + entity_key_serialization_version=3, online_store=SqliteOnlineStoreConfig( path=os.path.join(data_dir, "online.db") ), @@ -163,7 +163,7 @@ def test_transform_on_write_pdf(self): project="test_write_to_online_store_with_transform", registry=os.path.join(data_dir, "registry.db"), provider="local", - entity_key_serialization_version=2, + entity_key_serialization_version=3, online_store=SqliteOnlineStoreConfig( path=os.path.join(data_dir, "online.db") ), diff --git a/sdk/python/tests/unit/test_offline_server.py b/sdk/python/tests/unit/test_offline_server.py index 74752f6952d..c5f309eb323 100644 --- a/sdk/python/tests/unit/test_offline_server.py +++ b/sdk/python/tests/unit/test_offline_server.py @@ -95,7 +95,7 @@ def remote_feature_store(offline_server): registry=registry_path, provider="local", offline_store=offline_config, - entity_key_serialization_version=2, + entity_key_serialization_version=3, # repo_config = ) ) diff --git a/sdk/python/tests/unit/test_on_demand_pandas_transformation.py b/sdk/python/tests/unit/test_on_demand_pandas_transformation.py index 1a04a466fbc..c5ab657becc 100644 --- a/sdk/python/tests/unit/test_on_demand_pandas_transformation.py +++ b/sdk/python/tests/unit/test_on_demand_pandas_transformation.py @@ -35,7 +35,7 @@ def test_pandas_transformation(): project="test_on_demand_python_transformation", registry=os.path.join(data_dir, "registry.db"), provider="local", - entity_key_serialization_version=2, + entity_key_serialization_version=3, online_store=SqliteOnlineStoreConfig( path=os.path.join(data_dir, "online.db") ), @@ -116,7 +116,7 @@ def test_pandas_transformation_returning_all_data_types(): project="test_on_demand_python_transformation", registry=os.path.join(data_dir, "registry.db"), provider="local", - entity_key_serialization_version=2, + entity_key_serialization_version=3, online_store=SqliteOnlineStoreConfig( path=os.path.join(data_dir, "online.db") ), @@ -312,7 +312,7 @@ def test_invalid_pandas_transformation_raises_type_error_on_apply(): project="test_on_demand_python_transformation", registry=os.path.join(data_dir, "registry.db"), provider="local", - entity_key_serialization_version=2, + entity_key_serialization_version=3, online_store=SqliteOnlineStoreConfig( path=os.path.join(data_dir, "online.db") ), diff --git a/sdk/python/tests/unit/test_on_demand_python_transformation.py b/sdk/python/tests/unit/test_on_demand_python_transformation.py index eb29c645e53..f53e100cf22 100644 --- a/sdk/python/tests/unit/test_on_demand_python_transformation.py +++ b/sdk/python/tests/unit/test_on_demand_python_transformation.py @@ -51,7 +51,7 @@ def setUp(self): project="test_on_demand_python_transformation", registry=os.path.join(data_dir, "registry.db"), provider="local", - entity_key_serialization_version=2, + entity_key_serialization_version=3, online_store=SqliteOnlineStoreConfig( path=os.path.join(data_dir, "online.db") ), @@ -407,7 +407,7 @@ def setUp(self): project="test_on_demand_python_transformation", registry=os.path.join(data_dir, "registry.db"), provider="local", - entity_key_serialization_version=2, + entity_key_serialization_version=3, online_store=SqliteOnlineStoreConfig( path=os.path.join(data_dir, "online.db") ), @@ -775,7 +775,7 @@ def test_invalid_python_transformation_raises_type_error_on_apply(): project="test_on_demand_python_transformation", registry=os.path.join(data_dir, "registry.db"), provider="local", - entity_key_serialization_version=2, + entity_key_serialization_version=3, online_store=SqliteOnlineStoreConfig( path=os.path.join(data_dir, "online.db") ), @@ -822,7 +822,7 @@ def test_stored_writes(self): project="test_on_demand_python_transformation", registry=os.path.join(data_dir, "registry.db"), provider="local", - entity_key_serialization_version=2, + entity_key_serialization_version=3, online_store=SqliteOnlineStoreConfig( path=os.path.join(data_dir, "online.db") ), diff --git a/sdk/python/tests/unit/test_serialization_version.py b/sdk/python/tests/unit/test_serialization_version.py index 00562e4000a..474d3de9ebe 100644 --- a/sdk/python/tests/unit/test_serialization_version.py +++ b/sdk/python/tests/unit/test_serialization_version.py @@ -12,6 +12,6 @@ def test_registry_entity_serialization_version(): provider="local", online_store="redis", registry=f"{tmpdir}/registry.db", - entity_key_serialization_version=2, + entity_key_serialization_version=3, ) - assertpy.assert_that(r.entity_key_serialization_version).is_equal_to(2) + assertpy.assert_that(r.entity_key_serialization_version).is_equal_to(3) diff --git a/sdk/python/tests/unit/test_substrait_transformation.py b/sdk/python/tests/unit/test_substrait_transformation.py index 351651cfda7..1e5cd0889ae 100644 --- a/sdk/python/tests/unit/test_substrait_transformation.py +++ b/sdk/python/tests/unit/test_substrait_transformation.py @@ -19,7 +19,7 @@ def test_ibis_pandas_parity(): project="test_on_demand_substrait_transformation", registry=os.path.join(data_dir, "registry.db"), provider="local", - entity_key_serialization_version=2, + entity_key_serialization_version=3, online_store=SqliteOnlineStoreConfig( path=os.path.join(data_dir, "online.db") ), diff --git a/sdk/python/tests/utils/auth_permissions_util.py b/sdk/python/tests/utils/auth_permissions_util.py index dcc456e1d82..c332a5ab8d3 100644 --- a/sdk/python/tests/utils/auth_permissions_util.py +++ b/sdk/python/tests/utils/auth_permissions_util.py @@ -158,7 +158,7 @@ def get_remote_registry_store(server_port, feature_store, tls_mode): auth=feature_store.config.auth, registry=registry_config, provider="local", - entity_key_serialization_version=2, + entity_key_serialization_version=3, repo_path=feature_store.repo_path, ) ) diff --git a/sdk/python/tests/utils/cli_repo_creator.py b/sdk/python/tests/utils/cli_repo_creator.py index 4b8f9aad04b..ea1d7fcf10b 100644 --- a/sdk/python/tests/utils/cli_repo_creator.py +++ b/sdk/python/tests/utils/cli_repo_creator.py @@ -84,7 +84,7 @@ def local_repo( path: {data_path / "online_store.db"} offline_store: type: {offline_store} - entity_key_serialization_version: 2 + entity_key_serialization_version: 3 """ ) elif online_store == "milvus": diff --git a/sdk/python/tests/utils/dynamo_table_creator.py b/sdk/python/tests/utils/dynamo_table_creator.py index 0ebc939dc11..ad547358a90 100644 --- a/sdk/python/tests/utils/dynamo_table_creator.py +++ b/sdk/python/tests/utils/dynamo_table_creator.py @@ -44,7 +44,7 @@ def insert_data_test_table(data, project, tbl_name, region): dynamodb_resource = boto3.resource("dynamodb", region_name=region) table_instance = dynamodb_resource.Table(f"{project}.{tbl_name}") for entity_key, features, timestamp, created_ts in data: - entity_id = compute_entity_id(entity_key, entity_key_serialization_version=2) + entity_id = compute_entity_id(entity_key, entity_key_serialization_version=3) with table_instance.batch_writer() as batch: batch.put_item( Item={ diff --git a/sdk/python/tests/utils/e2e_test_validation.py b/sdk/python/tests/utils/e2e_test_validation.py index ed66aead87d..6d887b8bb07 100644 --- a/sdk/python/tests/utils/e2e_test_validation.py +++ b/sdk/python/tests/utils/e2e_test_validation.py @@ -195,7 +195,7 @@ def make_feature_store_yaml( offline_store=offline_store_config, online_store=online_store, repo_path=str(Path(repo_dir_name)), - entity_key_serialization_version=2, + entity_key_serialization_version=3, ) config_dict = config.model_dump(by_alias=True) if ( diff --git a/ui/feature_repo/feature_store.yaml b/ui/feature_repo/feature_store.yaml index 60342c96bdb..80fcb7354fa 100644 --- a/ui/feature_repo/feature_store.yaml +++ b/ui/feature_repo/feature_store.yaml @@ -5,4 +5,4 @@ online_store: type: sqlite offline_store: type: file -entity_key_serialization_version: 2 +entity_key_serialization_version: 3 From d4ca14d5f9c57eace4ded577bb89f8c717cae63c Mon Sep 17 00:00:00 2001 From: Srihari Date: Thu, 19 Jun 2025 12:03:30 +0530 Subject: [PATCH 11/13] test: Add RHOAI upgrate test to validate feast apply and materialize functionality Signed-off-by: Srihari Signed-off-by: iamcodingcat --- .../test/e2e_rhoai/feast_postupgrade_test.go | 56 ++++++++++++++ .../test/e2e_rhoai/feast_preupgrade_test.go | 74 +++++++++++++++++++ infra/feast-operator/test/utils/test_util.go | 69 ++++++----------- 3 files changed, 154 insertions(+), 45 deletions(-) create mode 100644 infra/feast-operator/test/e2e_rhoai/feast_postupgrade_test.go create mode 100644 infra/feast-operator/test/e2e_rhoai/feast_preupgrade_test.go diff --git a/infra/feast-operator/test/e2e_rhoai/feast_postupgrade_test.go b/infra/feast-operator/test/e2e_rhoai/feast_postupgrade_test.go new file mode 100644 index 00000000000..d8d091a44b8 --- /dev/null +++ b/infra/feast-operator/test/e2e_rhoai/feast_postupgrade_test.go @@ -0,0 +1,56 @@ +/* +Copyright 2025 Feast Community. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2erhoai + +import ( + "fmt" + + . "github.com/feast-dev/feast/infra/feast-operator/test/utils" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("Feast PostUpgrade scenario Testing", Ordered, func() { + const ( + namespace = "test-ns-feast-upgrade" + testDir = "/test/e2e_rhoai" + feastDeploymentName = FeastPrefix + "credit-scoring" + feastCRName = "credit-scoring" + ) + + AfterAll(func() { + By(fmt.Sprintf("Deleting test namespace: %s", namespace)) + Expect(DeleteNamespace(namespace, testDir)).To(Succeed()) + fmt.Printf("Namespace %s deleted successfully\n", namespace) + }) + runPostUpgradeTest := func() { + By("Verify Feature Store CR is in Ready state") + ValidateFeatureStoreCRStatus(namespace, feastCRName) + + By("Running `feast apply` and `feast materialize-incremental` to validate registry definitions") + VerifyApplyFeatureStoreDefinitions(namespace, feastCRName, feastDeploymentName) + + By("Validating Feast entity, feature, and feature view presence") + VerifyFeastMethods(namespace, feastDeploymentName, testDir) + } + + // This context verifies that a pre-created Feast FeatureStore CR continues to function as expected + // after an upgrade. It validates `feast apply`, registry sync, feature retrieval, and model execution. + Context("Feast post Upgrade Test", func() { + It("Should create and run a feastPostUpgrade test scenario feast apply and materialize functionality successfully", runPostUpgradeTest) + }) +}) diff --git a/infra/feast-operator/test/e2e_rhoai/feast_preupgrade_test.go b/infra/feast-operator/test/e2e_rhoai/feast_preupgrade_test.go new file mode 100644 index 00000000000..925f276c92d --- /dev/null +++ b/infra/feast-operator/test/e2e_rhoai/feast_preupgrade_test.go @@ -0,0 +1,74 @@ +/* +Copyright 2025 Feast Community. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2erhoai + +import ( + "fmt" + + . "github.com/feast-dev/feast/infra/feast-operator/test/utils" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("Feast PreUpgrade scenario Testing", Ordered, func() { + const ( + namespace = "test-ns-feast-upgrade" + replaceNamespace = "test-ns-feast" + testDir = "/test/e2e_rhoai" + feastDeploymentName = FeastPrefix + "credit-scoring" + feastCRName = "credit-scoring" + ) + + filesToUpdateNamespace := []string{ + "test/testdata/feast_integration_test_crs/postgres.yaml", + "test/testdata/feast_integration_test_crs/redis.yaml", + "test/testdata/feast_integration_test_crs/feast.yaml", + } + + BeforeAll(func() { + By(fmt.Sprintf("Creating test namespace: %s", namespace)) + Expect(CreateNamespace(namespace, testDir)).To(Succeed()) + fmt.Printf("Namespace %s created successfully\n", namespace) + + By("Replacing placeholder namespace in CR YAMLs for test setup") + Expect(ReplaceNamespaceInYamlFilesInPlace(filesToUpdateNamespace, replaceNamespace, namespace)).To(Succeed()) + }) + + AfterAll(func() { + By("Restoring original namespace in CR YAMLs") + Expect(ReplaceNamespaceInYamlFilesInPlace(filesToUpdateNamespace, namespace, replaceNamespace)).To(Succeed()) + + if CurrentSpecReport().Failed() { + By(fmt.Sprintf("Deleting test namespace: %s", namespace)) + Expect(DeleteNamespace(namespace, testDir)).To(Succeed()) + fmt.Printf("Namespace %s deleted successfully\n", namespace) + } + }) + + runPreUpgradeTest := func() { + By("Applying Feast infra manifests and verifying setup") + ApplyFeastInfraManifestsAndVerify(namespace, testDir) + + By("Applying and validating the credit-scoring FeatureStore CR") + ApplyFeastYamlAndVerify(namespace, testDir, feastDeploymentName, feastCRName) + } + + // This context ensures the Feast CR setup is functional prior to any upgrade + Context("Feast Pre Upgrade Test", func() { + It("Should create and run a feastPreUpgrade test scenario feast credit-scoring CR setup successfully", runPreUpgradeTest) + }) +}) diff --git a/infra/feast-operator/test/utils/test_util.go b/infra/feast-operator/test/utils/test_util.go index bf27734096a..868ce9066f4 100644 --- a/infra/feast-operator/test/utils/test_util.go +++ b/infra/feast-operator/test/utils/test_util.go @@ -471,19 +471,18 @@ func DeleteNamespace(namespace string, testDir string) error { return nil } -// Test real-time credit scoring demo by applying feature store configs and verifying Feast definitions, materializing data, and executing a training and prediction jobs +// Test real-time credit scoring demo by applying feature store configs and verifying Feast definitions, materializing data. func RunTestApplyAndMaterializeFunc(testDir string, namespace string, feastCRName string, feastDeploymentName string) func() { return func() { - applyFeastInfraManifestsAndVerify(namespace, testDir) - applyFeastYamlAndVerify(namespace, testDir, feastDeploymentName, feastCRName) + ApplyFeastInfraManifestsAndVerify(namespace, testDir) + ApplyFeastYamlAndVerify(namespace, testDir, feastDeploymentName, feastCRName) VerifyApplyFeatureStoreDefinitions(namespace, feastCRName, feastDeploymentName) VerifyFeastMethods(namespace, feastDeploymentName, testDir) - TrainAndTestModel(namespace, feastCRName, feastDeploymentName, testDir) } } // applies the manifests for Redis and Postgres and checks whether the deployments become available -func applyFeastInfraManifestsAndVerify(namespace string, testDir string) { +func ApplyFeastInfraManifestsAndVerify(namespace string, testDir string) { By("Applying postgres.yaml and redis.yaml manifests") cmd := exec.Command("kubectl", "apply", "-n", namespace, "-f", "test/testdata/feast_integration_test_crs/postgres.yaml", "-f", "test/testdata/feast_integration_test_crs/redis.yaml") _, cmdOutputerr := Run(cmd, testDir) @@ -575,43 +574,6 @@ func VerifyOutputContains(output []byte, expectedSubstrings []string) { } } -// patches and validate the FeatureStore CR's CronJob to execute model training and prediction -func TrainAndTestModel(namespace string, feastCRName string, feastDeploymentName string, testDir string) { - By("Patching FeatureStore with train/test commands") - patch := `{ - "spec": { - "cronJob": { - "containerConfigs": { - "commands": [ - "pip install jupyter==1.1.1 scikit-learn==1.5.2 matplotlib==3.9.2 seaborn==0.13.2 joblib", - "cd ../ && python run.py" - ] - } - } - } - }` - cmd := exec.Command("kubectl", "patch", "feast/"+feastCRName, "-n", namespace, "--type=merge", "--patch", patch) - _, cmdOutputErr := Run(cmd, testDir) - ExpectWithOffset(1, cmdOutputErr).NotTo(HaveOccurred()) - fmt.Println("Patched FeatureStore with train/test commands") - - By("Validating patch was applied correctly") - - Eventually(func() string { - cmd := exec.Command("kubectl", "get", "feast/"+feastCRName, "-n", namespace, "-o", "jsonpath={.status.applied.cronJob.containerConfigs.commands}") - output, _ := Run(cmd, testDir) - return string(output) - }, "30s", "3s").Should( - And( - ContainSubstring("python run.py"), - ), - ) - fmt.Println("FeatureStore patched correctly with commands") - - By("Creating Job from CronJob") - CreateAndVerifyJobFromCron(namespace, feastDeploymentName, "feast-test-job", testDir, []string{"Loan rejected!"}) -} - // Create a Job and verifies its logs contain expected substrings func CreateAndVerifyJobFromCron(namespace, cronName, jobName, testDir string, expectedLogSubstrings []string) { By(fmt.Sprintf("Creating Job %s from CronJob %s", jobName, cronName)) @@ -649,7 +611,7 @@ func checkDeployment(namespace, name string) { } // validate that the status of the FeatureStore CR is "Ready". -func validateFeatureStoreCRStatus(namespace, crName string) { +func ValidateFeatureStoreCRStatus(namespace, crName string) { Eventually(func() string { cmd := exec.Command("kubectl", "get", "feast", crName, "-n", namespace, "-o", "jsonpath={.status.phase}") output, err := cmd.Output() @@ -675,7 +637,7 @@ func validateFeatureStoreYaml(namespace, deployment string) { } // apply and verifies the Feast deployment becomes available, the CR status is "Ready -func applyFeastYamlAndVerify(namespace string, testDir string, feastDeploymentName string, feastCRName string) { +func ApplyFeastYamlAndVerify(namespace string, testDir string, feastDeploymentName string, feastCRName string) { By("Applying Feast yaml for secrets and Feature store CR") cmd := exec.Command("kubectl", "apply", "-n", namespace, "-f", "test/testdata/feast_integration_test_crs/feast.yaml") @@ -684,7 +646,7 @@ func applyFeastYamlAndVerify(namespace string, testDir string, feastDeploymentNa checkDeployment(namespace, feastDeploymentName) By("Verify Feature Store CR is in Ready state") - validateFeatureStoreCRStatus(namespace, feastCRName) + ValidateFeatureStoreCRStatus(namespace, feastCRName) By("Verifying that the Postgres DB contains the expected Feast tables") cmd = exec.Command("kubectl", "exec", "deploy/postgres", "-n", namespace, "--", "psql", "-h", "localhost", "-U", "feast", "feast", "-c", `\dt`) @@ -714,3 +676,20 @@ func applyFeastYamlAndVerify(namespace string, testDir string, feastDeploymentNa By("Verifying client feature_store.yaml for expected store types") validateFeatureStoreYaml(namespace, feastDeploymentName) } + +// ReplaceNamespaceInYaml reads a YAML file, replaces all existingNamespace with the actual namespace +func ReplaceNamespaceInYamlFilesInPlace(filePaths []string, existingNamespace string, actualNamespace string) error { + for _, filePath := range filePaths { + data, err := os.ReadFile(filePath) + if err != nil { + return fmt.Errorf("failed to read YAML file %s: %w", filePath, err) + } + updated := strings.ReplaceAll(string(data), existingNamespace, actualNamespace) + + err = os.WriteFile(filePath, []byte(updated), 0644) + if err != nil { + return fmt.Errorf("failed to write updated YAML file %s: %w", filePath, err) + } + } + return nil +} From efd5d35f9d6f3a1834ea93aff6e616ffb93a34d7 Mon Sep 17 00:00:00 2001 From: Francisco Arceo Date: Mon, 23 Jun 2025 08:36:45 -0600 Subject: [PATCH 12/13] feat: Enable materialization for ODFV Transform on Write (#5459) * feat: Enable materialization for ODFV Transform on Write Signed-off-by: Francisco Javier Arceo Signed-off-by: iamcodingcat --- sdk/python/feast/feature_store.py | 63 ++++-- .../feast/infra/passthrough_provider.py | 9 +- sdk/python/feast/infra/provider.py | 2 +- .../feast/infra/registry/base_registry.py | 4 +- sdk/python/feast/infra/registry/registry.py | 4 +- sdk/python/feast/infra/registry/remote.py | 2 +- sdk/python/feast/infra/registry/snowflake.py | 2 +- sdk/python/feast/infra/registry/sql.py | 2 +- .../test_on_demand_python_transformation.py | 204 ++++++++++++++++++ 9 files changed, 262 insertions(+), 30 deletions(-) diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 5cc232d5fca..8632d619fec 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -656,7 +656,7 @@ def _make_inferences( def _get_feature_views_to_materialize( self, feature_views: Optional[List[str]], - ) -> List[FeatureView]: + ) -> List[Union[FeatureView, OnDemandFeatureView]]: """ Returns the list of feature views that should be materialized. @@ -669,34 +669,53 @@ def _get_feature_views_to_materialize( FeatureViewNotFoundException: One of the specified feature views could not be found. ValueError: One of the specified feature views is not configured for materialization. """ - feature_views_to_materialize: List[FeatureView] = [] + feature_views_to_materialize: List[Union[FeatureView, OnDemandFeatureView]] = [] if feature_views is None: - feature_views_to_materialize = utils._list_feature_views( + regular_feature_views = utils._list_feature_views( self._registry, self.project, hide_dummy_entity=False ) - feature_views_to_materialize = [ - fv for fv in feature_views_to_materialize if fv.online - ] + feature_views_to_materialize.extend( + [fv for fv in regular_feature_views if fv.online] + ) stream_feature_views_to_materialize = self._list_stream_feature_views( hide_dummy_entity=False ) - feature_views_to_materialize += [ - sfv for sfv in stream_feature_views_to_materialize if sfv.online - ] + feature_views_to_materialize.extend( + [sfv for sfv in stream_feature_views_to_materialize if sfv.online] + ) + on_demand_feature_views_to_materialize = self.list_on_demand_feature_views() + feature_views_to_materialize.extend( + [ + odfv + for odfv in on_demand_feature_views_to_materialize + if odfv.write_to_online_store + ] + ) else: for name in feature_views: + feature_view: Union[FeatureView, OnDemandFeatureView] try: feature_view = self._get_feature_view(name, hide_dummy_entity=False) except FeatureViewNotFoundException: - feature_view = self._get_stream_feature_view( - name, hide_dummy_entity=False - ) + try: + feature_view = self._get_stream_feature_view( + name, hide_dummy_entity=False + ) + except FeatureViewNotFoundException: + feature_view = self.get_on_demand_feature_view(name) - if not feature_view.online: + if hasattr(feature_view, "online") and not feature_view.online: raise ValueError( f"FeatureView {feature_view.name} is not configured to be served online." ) + elif ( + hasattr(feature_view, "write_to_online_store") + and not feature_view.write_to_online_store + ): + raise ValueError( + f"OnDemandFeatureView {feature_view.name} is not configured for write_to_online_store." + ) feature_views_to_materialize.append(feature_view) return feature_views_to_materialize @@ -1312,6 +1331,8 @@ def materialize_incremental( ) # TODO paging large loads for feature_view in feature_views_to_materialize: + if isinstance(feature_view, OnDemandFeatureView): + continue start_date = feature_view.most_recent_end_time if start_date is None: if feature_view.ttl is None: @@ -1340,7 +1361,7 @@ def tqdm_builder(length): return tqdm(total=length, ncols=100) start_date = utils.make_tzaware(start_date) - end_date = utils.make_tzaware(end_date) + end_date = utils.make_tzaware(end_date) or _utc_now() provider.materialize_single_feature_view( config=self.config, @@ -1351,13 +1372,13 @@ def tqdm_builder(length): project=self.project, tqdm_builder=tqdm_builder, ) - - self._registry.apply_materialization( - feature_view, - self.project, - start_date, - end_date, - ) + if not isinstance(feature_view, OnDemandFeatureView): + self._registry.apply_materialization( + feature_view, + self.project, + start_date, + end_date, + ) def materialize( self, diff --git a/sdk/python/feast/infra/passthrough_provider.py b/sdk/python/feast/infra/passthrough_provider.py index b532ac563d4..d4b586f5c93 100644 --- a/sdk/python/feast/infra/passthrough_provider.py +++ b/sdk/python/feast/infra/passthrough_provider.py @@ -420,17 +420,24 @@ def ingest_df_to_offline_store(self, feature_view: FeatureView, table: pa.Table) def materialize_single_feature_view( self, config: RepoConfig, - feature_view: FeatureView, + feature_view: Union[FeatureView, OnDemandFeatureView], start_date: datetime, end_date: datetime, registry: BaseRegistry, project: str, tqdm_builder: Callable[[int], tqdm], ) -> None: + if isinstance(feature_view, OnDemandFeatureView): + if not feature_view.write_to_online_store: + raise ValueError( + f"OnDemandFeatureView {feature_view.name} does not have write_to_online_store enabled" + ) + return assert ( isinstance(feature_view, BatchFeatureView) or isinstance(feature_view, StreamFeatureView) or isinstance(feature_view, FeatureView) + or isinstance(feature_view, OnDemandFeatureView) ), f"Unexpected type for {feature_view.name}: {type(feature_view)}" task = MaterializationTask( project=project, diff --git a/sdk/python/feast/infra/provider.py b/sdk/python/feast/infra/provider.py index 15917420af0..4f7b0d4b5c1 100644 --- a/sdk/python/feast/infra/provider.py +++ b/sdk/python/feast/infra/provider.py @@ -217,7 +217,7 @@ def ingest_df_to_offline_store( def materialize_single_feature_view( self, config: RepoConfig, - feature_view: FeatureView, + feature_view: Union[FeatureView, OnDemandFeatureView], start_date: datetime, end_date: datetime, registry: BaseRegistry, diff --git a/sdk/python/feast/infra/registry/base_registry.py b/sdk/python/feast/infra/registry/base_registry.py index f2374edf1b2..85810f1fbc1 100644 --- a/sdk/python/feast/infra/registry/base_registry.py +++ b/sdk/python/feast/infra/registry/base_registry.py @@ -16,7 +16,7 @@ from abc import ABC, abstractmethod from collections import defaultdict from datetime import datetime -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Union from google.protobuf.json_format import MessageToJson from google.protobuf.message import Message @@ -432,7 +432,7 @@ def list_all_feature_views( @abstractmethod def apply_materialization( self, - feature_view: FeatureView, + feature_view: Union[FeatureView, OnDemandFeatureView], project: str, start_date: datetime, end_date: datetime, diff --git a/sdk/python/feast/infra/registry/registry.py b/sdk/python/feast/infra/registry/registry.py index 62a21d5c433..0cfbc77b24e 100644 --- a/sdk/python/feast/infra/registry/registry.py +++ b/sdk/python/feast/infra/registry/registry.py @@ -16,7 +16,7 @@ from enum import Enum from pathlib import Path from threading import Lock -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Union from urllib.parse import urlparse from google.protobuf.internal.containers import RepeatedCompositeFieldContainer @@ -529,7 +529,7 @@ def get_data_source( def apply_materialization( self, - feature_view: FeatureView, + feature_view: Union[FeatureView, OnDemandFeatureView], project: str, start_date: datetime, end_date: datetime, diff --git a/sdk/python/feast/infra/registry/remote.py b/sdk/python/feast/infra/registry/remote.py index 590c0454b73..4122586046f 100644 --- a/sdk/python/feast/infra/registry/remote.py +++ b/sdk/python/feast/infra/registry/remote.py @@ -356,7 +356,7 @@ def list_feature_views( def apply_materialization( self, - feature_view: FeatureView, + feature_view: Union[FeatureView, OnDemandFeatureView], project: str, start_date: datetime, end_date: datetime, diff --git a/sdk/python/feast/infra/registry/snowflake.py b/sdk/python/feast/infra/registry/snowflake.py index 06403fe9aee..e46231ca7a0 100644 --- a/sdk/python/feast/infra/registry/snowflake.py +++ b/sdk/python/feast/infra/registry/snowflake.py @@ -992,7 +992,7 @@ def list_permissions( def apply_materialization( self, - feature_view: FeatureView, + feature_view: Union[FeatureView, OnDemandFeatureView], project: str, start_date: datetime, end_date: datetime, diff --git a/sdk/python/feast/infra/registry/sql.py b/sdk/python/feast/infra/registry/sql.py index c42e6e8b82b..68dcd893f9d 100644 --- a/sdk/python/feast/infra/registry/sql.py +++ b/sdk/python/feast/infra/registry/sql.py @@ -702,7 +702,7 @@ def apply_validation_reference( def apply_materialization( self, - feature_view: FeatureView, + feature_view: Union[FeatureView, OnDemandFeatureView], project: str, start_date: datetime, end_date: datetime, diff --git a/sdk/python/tests/unit/test_on_demand_python_transformation.py b/sdk/python/tests/unit/test_on_demand_python_transformation.py index f53e100cf22..9a09037d422 100644 --- a/sdk/python/tests/unit/test_on_demand_python_transformation.py +++ b/sdk/python/tests/unit/test_on_demand_python_transformation.py @@ -1117,6 +1117,210 @@ def python_stored_writes_feature_view( "current_datetime": [None], } + def test_materialize_with_odfv_writes(self): + with tempfile.TemporaryDirectory() as data_dir: + self.store = FeatureStore( + config=RepoConfig( + project="test_on_demand_python_transformation", + registry=os.path.join(data_dir, "registry.db"), + provider="local", + entity_key_serialization_version=3, + online_store=SqliteOnlineStoreConfig( + path=os.path.join(data_dir, "online.db") + ), + ) + ) + + end_date = datetime.now().replace(microsecond=0, second=0, minute=0) + start_date = end_date - timedelta(days=15) + + driver_entities = [1001, 1002, 1003, 1004, 1005] + driver_df = create_driver_hourly_stats_df( + driver_entities, start_date, end_date + ) + driver_stats_path = os.path.join(data_dir, "driver_stats.parquet") + driver_df.to_parquet( + path=driver_stats_path, allow_truncated_timestamps=True + ) + + driver = Entity(name="driver", join_keys=["driver_id"]) + + driver_stats_source = FileSource( + name="driver_hourly_stats_source", + path=driver_stats_path, + timestamp_field="event_timestamp", + ) + + driver_stats_fv = FeatureView( + name="driver_hourly_stats", + entities=[driver], + ttl=timedelta(days=1), + schema=[ + Field(name="conv_rate", dtype=Float32), + Field(name="acc_rate", dtype=Float32), + Field(name="avg_daily_trips", dtype=Int64), + ], + online=True, + source=driver_stats_source, + tags={}, + ) + + input_request_source = RequestSource( + name="vals_to_add", + schema=[ + Field(name="counter", dtype=Int64), + Field(name="input_datetime", dtype=UnixTimestamp), + ], + ) + + @on_demand_feature_view( + entities=[driver], + sources=[ + driver_stats_fv[["conv_rate", "acc_rate"]], + input_request_source, + ], + schema=[ + Field(name="conv_rate_plus_acc", dtype=Float64), + Field(name="current_datetime", dtype=UnixTimestamp), + Field(name="counter", dtype=Int64), + Field(name="input_datetime", dtype=UnixTimestamp), + Field(name="string_constant", dtype=String), + ], + mode="python", + write_to_online_store=True, + ) + def python_stored_writes_feature_view( + inputs: dict[str, Any], + ) -> dict[str, Any]: + output: dict[str, Any] = { + "conv_rate_plus_acc": [ + conv_rate + acc_rate + for conv_rate, acc_rate in zip( + inputs["conv_rate"], inputs["acc_rate"] + ) + ], + "current_datetime": [datetime.now() for _ in inputs["conv_rate"]], + "counter": [c + 1 for c in inputs["counter"]], + "input_datetime": [d for d in inputs["input_datetime"]], + "string_constant": ["test_constant"], + } + return output + + @on_demand_feature_view( + entities=[driver], + sources=[ + driver_stats_fv[["conv_rate", "acc_rate"]], + input_request_source, + ], + schema=[ + Field(name="conv_rate_plus_acc", dtype=Float64), + Field(name="current_datetime", dtype=UnixTimestamp), + Field(name="counter", dtype=Int64), + Field(name="input_datetime", dtype=UnixTimestamp), + Field(name="string_constant", dtype=String), + ], + mode="python", + write_to_online_store=False, + ) + def python_no_writes_feature_view( + inputs: dict[str, Any], + ) -> dict[str, Any]: + output: dict[str, Any] = { + "conv_rate_plus_acc": [ + conv_rate + acc_rate + for conv_rate, acc_rate in zip( + inputs["conv_rate"], inputs["acc_rate"] + ) + ], + "current_datetime": [datetime.now() for _ in inputs["conv_rate"]], + "counter": [c + 1 for c in inputs["counter"]], + "input_datetime": [d for d in inputs["input_datetime"]], + "string_constant": ["test_constant"], + } + return output + + self.store.apply( + [ + driver, + driver_stats_source, + driver_stats_fv, + python_stored_writes_feature_view, + python_no_writes_feature_view, + ] + ) + + feature_views_to_materialize = self.store._get_feature_views_to_materialize( + None + ) + + odfv_names = [ + fv.name + for fv in feature_views_to_materialize + if hasattr(fv, "write_to_online_store") + ] + assert "python_stored_writes_feature_view" in odfv_names + assert "python_no_writes_feature_view" not in odfv_names + + regular_fv_names = [ + fv.name + for fv in feature_views_to_materialize + if not hasattr(fv, "write_to_online_store") + ] + assert "driver_hourly_stats" in regular_fv_names + + materialize_end_date = datetime.now().replace( + microsecond=0, second=0, minute=0 + ) + materialize_start_date = materialize_end_date - timedelta(days=1) + + self.store.materialize(materialize_start_date, materialize_end_date) + + specific_feature_views_to_materialize = ( + self.store._get_feature_views_to_materialize( + ["driver_hourly_stats", "python_stored_writes_feature_view"] + ) + ) + assert len(specific_feature_views_to_materialize) == 2 + + # materialize some data into the online store for the python_stored_writes_feature_view + self.store.materialize( + materialize_start_date, + materialize_end_date, + ["python_stored_writes_feature_view"], + ) + # validate data is loaded to online store + online_response = self.store.get_online_features( + entity_rows=[{"driver_id": 1001}], + features=[ + "python_stored_writes_feature_view:conv_rate_plus_acc", + "python_stored_writes_feature_view:current_datetime", + "python_stored_writes_feature_view:counter", + "python_stored_writes_feature_view:input_datetime", + "python_stored_writes_feature_view:string_constant", + ], + ).to_dict() + assert sorted(list(online_response.keys())) == sorted( + [ + "driver_id", + "conv_rate_plus_acc", + "counter", + "current_datetime", + "input_datetime", + "string_constant", + ] + ) + assert online_response["driver_id"] == [1001] + + try: + self.store._get_feature_views_to_materialize( + ["python_no_writes_feature_view"] + ) + assert False, ( + "Should have raised ValueError for ODFV without write_to_online_store" + ) + except ValueError as e: + assert "not configured for write_to_online_store" in str(e) + def test_stored_writes_with_explode(self): with tempfile.TemporaryDirectory() as data_dir: self.store = FeatureStore( From e123a5d99c746ebc74110bf300d32b23ece10378 Mon Sep 17 00:00:00 2001 From: PrasannaKumarARDS Date: Tue, 24 Jun 2025 02:30:24 +0530 Subject: [PATCH 13/13] feat: Add HybridOnlineStore for multi-backend online store routing (#5423) * feat: Added HybridOnlineStore for multi-backend online store routing - Implements HybridOnlineStore, enabling routing of online feature operations to different backends based on a configurable tag (e.g., tribe, team, or project) on the FeatureView. - Adds support for specifying the routing tag name via the 'routing_tag' field in the online_store config, allowing flexible backend selection. - Supports multi-tenancy and flexible data management by allowing multiple online store backends in a single Feast deployment. - added documentation - fixed linter raised issues Signed-off-by: r0b0fyi * feat: Added HybridOnlineStore for multi-backend online store routing - Implements HybridOnlineStore, enabling routing of online feature operations to different backends based on a configurable tag (e.g., tribe, team, or project) on the FeatureView. - Adds support for specifying the routing tag name via the 'routing_tag' field in the online_store config, allowing flexible backend selection. - Supports multi-tenancy and flexible data management by allowing multiple online store backends in a single Feast deployment. - added documentation - fixed linter raised issues Signed-off-by: r0b0fyi --------- Signed-off-by: r0b0fyi Co-authored-by: r0b0fyi Signed-off-by: iamcodingcat --- docs/reference/online-stores/hybrid.md | 111 ++++++ .../hybrid_online_store.py | 329 ++++++++++++++++++ .../hybrid_online_store_repo_configuration.py | 28 ++ .../online_store/hybrid_online_store.py | 25 ++ .../online_store/test_hybrid_online_store.py | 87 +++++ 5 files changed, 580 insertions(+) create mode 100644 docs/reference/online-stores/hybrid.md create mode 100644 sdk/python/feast/infra/online_stores/hybrid_online_store/hybrid_online_store.py create mode 100644 sdk/python/feast/infra/online_stores/hybrid_online_store/hybrid_online_store_repo_configuration.py create mode 100644 sdk/python/tests/integration/feature_repos/universal/online_store/hybrid_online_store.py create mode 100644 sdk/python/tests/integration/online_store/test_hybrid_online_store.py diff --git a/docs/reference/online-stores/hybrid.md b/docs/reference/online-stores/hybrid.md new file mode 100644 index 00000000000..38527d9a66e --- /dev/null +++ b/docs/reference/online-stores/hybrid.md @@ -0,0 +1,111 @@ +# Hybrid online store + +## Description + +The HybridOnlineStore allows routing online feature operations to different online store backends based on a configurable tag (such as `tribe`, `team`, or `project`) on the FeatureView. This enables a single Feast deployment to support multiple online store backends, each configured independently and selected dynamically at runtime. + +## Getting started + +To use the HybridOnlineStore, install Feast with all required online store dependencies (e.g., Bigtable, Cassandra, etc.) for the stores you plan to use. For example: + +``` +pip install 'feast[gcp,cassandra]' +``` + +## Example + +{% code title="feature_store.yaml" %} +```yaml +project: my_feature_repo +registry: data/registry.db +provider: local +online_store: + type: hybrid_online_store.HybridOnlineStore + routing_tag: team # or any tag name you want to use in FeatureView's for routing + online_stores: + - type: bigtable + conf: + project_id: my_gcp_project + instance: my_bigtable_instance + - type: cassandra + conf: + hosts: + - cassandra1.example.com + - cassandra2.example.com + keyspace: feast_keyspace + username: feast_user + password: feast_password +``` +{% endcode %} + +### Setting the Routing Tag in FeatureView + +To enable routing, add a tag to your FeatureView that matches the `routing_tag` specified in your `feature_store.yaml`. For example, if your `routing_tag` is `team`, add a `team` tag to your FeatureView: + +```yaml +tags: + team: bigtable # This tag determines which online store is used +``` + +The value of this tag (e.g., `bigtable`) should match the type or identifier of the online store you want to use for this FeatureView. The HybridOnlineStore will route all online operations for this FeatureView to the corresponding backend. + +### Example FeatureView + +{% code title="feature_view" %} +```yaml +name: user_features +entities: + - name: user_id + join_keys: ["user_id"] +ttl: null +schema: + - name: age + dtype: int64 + - name: country + dtype: string +online: true +source: + path: data/user_features.parquet + event_timestamp_column: event_timestamp + created_timestamp_column: created_timestamp +tags: + team: bigtable # This tag determines which online store is used +``` +{% endcode %} + +The `team` tag in the FeatureView's `tags` field determines which online store backend is used for this FeatureView. In this example, all online operations for `user_features` will be routed to the Bigtable online store, as specified by the tag value and the `routing_tag` in your `feature_store.yaml`. + +The HybridOnlineStore will route requests to the correct online store based on the value of the tag specified by `routing_tag`. + +The full set of configuration options for each online store is available in their respective documentation: +- [BigtableOnlineStoreConfig](https://rtd.feast.dev/en/latest/#feast.infra.online_stores.bigtable.BigtableOnlineStoreConfig) +- [CassandraOnlineStoreConfig](https://rtd.feast.dev/en/master/#feast.infra.online_stores.cassandra_online_store.cassandra_online_store.CassandraOnlineStoreConfig) + +For a full explanation of configuration options, please refer to the documentation for each online store backend you configure in the `online_stores` list. + +Storage specifications can be found at [docs/specs/online_store_format.md](../../specs/online_store_format.md). + +## Functionality Matrix + +The set of functionality supported by online stores is described in detail [here](overview.md#functionality). Below is a matrix indicating which functionality is supported by the HybridOnlineStore. + +| | HybridOnlineStore | +|-----------------------------------------------------------|-------------------| +| write feature values to the online store | yes | +| read feature values from the online store | yes | +| update infrastructure (e.g. tables) in the online store | yes | +| teardown infrastructure (e.g. tables) in the online store | yes | +| generate a plan of infrastructure changes | no | +| support for on-demand transforms | yes | +| readable by Python SDK | yes | +| readable by Java | no | +| readable by Go | no | +| support for entityless feature views | yes | +| support for concurrent writing to the same key | yes | +| support for ttl (time to live) at retrieval | no | +| support for deleting expired data | no | +| collocated by feature view | yes | +| collocated by feature service | no | +| collocated by entity key | yes | + +To compare this set of functionality against other online stores, please see the full [functionality matrix](overview.md#functionality-matrix). diff --git a/sdk/python/feast/infra/online_stores/hybrid_online_store/hybrid_online_store.py b/sdk/python/feast/infra/online_stores/hybrid_online_store/hybrid_online_store.py new file mode 100644 index 00000000000..e929e039411 --- /dev/null +++ b/sdk/python/feast/infra/online_stores/hybrid_online_store/hybrid_online_store.py @@ -0,0 +1,329 @@ +""" +hybrid_online_store.py +---------------------- + +This module provides the HybridOnlineStore, a Feast OnlineStore implementation that enables routing online feature operations +to different online stores based on a configurable tag (e.g., tribe, team, or project) on the FeatureView. This allows a single Feast deployment +to support multiple online store backends, each configured independently and selected dynamically at runtime. + +Features: + - Supports multiple online store backends in a single Feast deployment. + - Routes online reads and writes to the correct backend based on a configurable tag on the FeatureView. + - Enables multi-tenancy and flexible data management strategies. + - Designed for extensibility and compatibility with Feast's OnlineStore interface. + +Usage: + 1. Add a tag (e.g., 'tribe', 'team', or any custom name) to your FeatureView. + 2. Configure multiple online stores in your Feast repo config under 'online_stores'. + 3. Set the 'routing_tag' field in your online_store config to specify which tag to use for routing. + 4. The HybridOnlineStore will route reads and writes to the correct backend based on the tag value. + +Example configuration (feature_store.yaml): + + online_store: + type: hybrid_online_store.HybridOnlineStore + routing_tag: team # or any tag name you want to use for routing + online_stores: + - type: feast.infra.online_stores.bigtable.BigtableOnlineStore + conf: + ... # bigtable config + - type: feast.infra.online_stores.contrib.cassandra_online_store.cassandra_online_store.CassandraOnlineStore + conf: + ... # cassandra config + +Example FeatureView: + + tags: + team: bigtable + +The HybridOnlineStore will route requests to the correct online store based on the value of the tag specified by 'routing_tag'. +""" + +from datetime import datetime +from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple + +from pydantic import StrictStr + +from feast import Entity, FeatureView, RepoConfig +from feast.infra.online_stores.helpers import get_online_store_from_config +from feast.infra.online_stores.online_store import OnlineStore +from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto +from feast.protos.feast.types.Value_pb2 import Value as ValueProto +from feast.repo_config import FeastConfigBaseModel, get_online_config_from_type + + +class HybridOnlineStoreConfig(FeastConfigBaseModel): + """ + Configuration for HybridOnlineStore. + + This config allows multiple online stores to be used in a single Feast deployment. Each online store is specified by its type (Python import path) + and a configuration dictionary. The HybridOnlineStore uses this configuration to instantiate and manage the set of online stores. + + Attributes: + type: The type identifier for the HybridOnlineStore. + online_stores: A list of OnlineStoresWithConfig, each specifying the type and config for an online store backend. + """ + + type: Literal["HybridOnlineStore", "hybrid_online_store.HybridOnlineStore"] = ( + "hybrid_online_store.HybridOnlineStore" + ) + + class OnlineStoresWithConfig(FeastConfigBaseModel): + """ + Configuration for a single online store backend. + + Attributes: + type: Python import path to the online store class. + conf: Dictionary of configuration parameters for the online store. + """ + + type: StrictStr # Python import path to the online store class + conf: Dict + + online_stores: Optional[List[OnlineStoresWithConfig]] + routing_tag: StrictStr = ( + "tribe" # Configurable tag name for routing, default is 'tribe' + ) + + +class HybridOnlineStore(OnlineStore): + """ + HybridOnlineStore routes online feature operations to different online store backends + based on a tag (e.g., 'tribe') on the FeatureView. This enables multi-tenancy and flexible + backend selection in a single Feast deployment. + + The backend is selected dynamically at runtime according to the tag value. + """ + + def __init__(self): + """ + Initialize the HybridOnlineStore. Online stores are instantiated lazily on first use. + """ + self.online_stores = {} + self._initialized = False + + def _initialize_online_stores(self, config: RepoConfig): + """ + Lazily instantiate all configured online store backends from the repo config. + + Args: + config: Feast RepoConfig containing the online_stores configuration. + """ + if self._initialized: + return + self.online_stores = {} + online_stores_cfg = getattr(config.online_store, "online_stores", []) + for store_cfg in online_stores_cfg: + config_cls = get_online_config_from_type( + store_cfg.type.split(".")[-1].lower() + ) + config_instance = config_cls(**store_cfg.conf) + online_store_instance = get_online_store_from_config(config_instance) + self.online_stores[store_cfg.type.split(".")[-1].lower()] = ( + online_store_instance + ) + self._initialized = True + + def _get_online_store(self, tribe_tag, config: RepoConfig): + """ + Retrieve the online store backend corresponding to the given tag value. + + Args: + tribe_tag: The tag value (e.g., 'tribe') used to select the backend. + config: Feast RepoConfig. + Returns: + The OnlineStore instance for the given tag, or None if not found. + """ + self._initialize_online_stores(config) + return self.online_stores.get(tribe_tag.lower()) + + def _prepare_repo_conf(self, config: RepoConfig, online_store_type: str): + """ + Prepare a RepoConfig for the selected online store backend. + + Args: + config: The original Feast RepoConfig. + online_store_type: The type of the online store backend to use. + Returns: + A dictionary representing the updated RepoConfig for the selected backend. + """ + rconfig = config + for online_store in config.online_store.online_stores: + if online_store.type.split(".")[-1].lower() == online_store_type.lower(): + rconfig.online_config = online_store.conf + rconfig.online_config["type"] = online_store.type + data = rconfig.__dict__ + data["registry"] = data["registry_config"] + data["offline_store"] = data["offline_config"] + data["online_store"] = data["online_config"] + return data + + def _get_routing_tag_value(self, table: FeatureView, config: RepoConfig): + tag_name = getattr(config.online_store, "routing_tag", "tribe") + return table.tags.get(tag_name) + + def online_write_batch( + self, + config: RepoConfig, + table: FeatureView, + odata: List[ + Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]] + ], + progress: Optional[Callable[[int], Any]], + ) -> None: + """ + Write a batch of feature rows to the appropriate online store based on the FeatureView's tag. + + Args: + config: Feast RepoConfig. + table: FeatureView to write to. Must have a tag (e.g., 'tribe') to select the backend. + odata: List of tuples containing entity key, feature values, event timestamp, and created timestamp. + progress: Optional callback for progress reporting. + Raises: + ValueError: If the FeatureView does not have the required tag. + NotImplementedError: If no online store is found for the tag value. + """ + tribe = self._get_routing_tag_value(table, config) + if not tribe: + tag_name = getattr(config.online_store, "routing_tag", "tribe") + raise ValueError( + f"FeatureView must have a '{tag_name}' tag to use HybridOnlineStore." + ) + online_store = self._get_online_store(tribe, config) + if online_store: + config = RepoConfig(**self._prepare_repo_conf(config, tribe)) + online_store.online_write_batch(config, table, odata, progress) + else: + raise NotImplementedError( + f"No online store found for {getattr(config.online_store, 'routing_tag', 'tribe')} tag '{tribe}'. Please check your configuration." + ) + + @staticmethod + def write_to_table( + created_ts, cur, entity_key_bin, feature_name, project, table, timestamp, val + ): + """ + (Not implemented) Write a single feature value to the online store table. + """ + pass + + def online_read( + self, + config: RepoConfig, + table: FeatureView, + entity_keys: List[EntityKeyProto], + requested_features: Optional[List[str]] = None, + ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: + """ + Read feature rows from the appropriate online store based on the FeatureView's tag. + + Args: + config: Feast RepoConfig. + table: FeatureView to read from. Must have a tag (e.g., 'tribe') to select the backend. + entity_keys: List of entity keys to read. + requested_features: Optional list of feature names to read. + Returns: + List of tuples containing event timestamp and feature values. + Raises: + ValueError: If the FeatureView does not have the required tag. + NotImplementedError: If no online store is found for the tag value. + """ + tribe = self._get_routing_tag_value(table, config) + if not tribe: + tag_name = getattr(config.online_store, "routing_tag", "tribe") + raise ValueError( + f"FeatureView must have a '{tag_name}' tag to use HybridOnlineStore." + ) + online_store = self._get_online_store(tribe, config) + if online_store: + config = RepoConfig(**self._prepare_repo_conf(config, tribe)) + return online_store.online_read( + config, table, entity_keys, requested_features + ) + else: + raise NotImplementedError( + f"No online store found for {getattr(config.online_store, 'routing_tag', 'tribe')} tag '{tribe}'. Please check your configuration." + ) + + def update( + self, + config: RepoConfig, + tables_to_delete: Sequence[FeatureView], + tables_to_keep: Sequence[FeatureView], + entities_to_delete: Sequence[Entity], + entities_to_keep: Sequence[Entity], + partial: bool, + ): + """ + Update the state of the online stores for the given FeatureViews and Entities. + + Args: + config: Feast RepoConfig. + tables_to_delete: Sequence of FeatureViews to delete. + tables_to_keep: Sequence of FeatureViews to keep. + entities_to_delete: Sequence of Entities to delete. + entities_to_keep: Sequence of Entities to keep. + partial: Whether to perform a partial update. + Raises: + ValueError: If a FeatureView does not have the required tag. + NotImplementedError: If no online store is found for a tag value. + """ + for table in tables_to_keep: + tribe = self._get_routing_tag_value(table, config) + if not tribe: + tag_name = getattr(config.online_store, "routing_tag", "tribe") + raise ValueError( + f"FeatureView must have a '{tag_name}' tag to use HybridOnlineStore." + ) + online_store = self._get_online_store(tribe, config) + if online_store: + config = RepoConfig(**self._prepare_repo_conf(config, tribe)) + online_store.update( + config, + tables_to_delete, + tables_to_keep, + entities_to_delete, + entities_to_keep, + partial, + ) + else: + raise NotImplementedError( + f"No online store found for {getattr(config.online_store, 'routing_tag', 'tribe')} tag '{tribe}'. Please check your configuration." + ) + + def teardown( + self, + config: RepoConfig, + tables: Sequence[FeatureView], + entities: Sequence[Entity], + ): + """ + Teardown all managed online stores for the given FeatureViews and Entities. + + Args: + config: Feast RepoConfig. + tables: Sequence of FeatureViews to teardown. + entities: Sequence of Entities to teardown. + """ + # Use a set of (tribe, store_type, conf_id) to avoid duplicate teardowns for the same instance + tribes_seen = set() + online_stores_cfg = getattr(config.online_store, "online_stores", []) + tag_name = getattr(config.online_store, "routing_tag", "tribe") + for table in tables: + tribe = table.tags.get(tag_name) + if not tribe: + continue + # Find all store configs matching this tribe (supporting multiple instances of the same type) + for store_cfg in online_stores_cfg: + store_type = store_cfg.type + # Use id(store_cfg.conf) to distinguish different configs of the same type + key = (tribe, store_type, id(store_cfg.conf)) + if key in tribes_seen: + continue + tribes_seen.add(key) + # Only select the online store if tribe matches the type (or you can add a mapping in config for more flexibility) + if tribe.lower() == store_type.split(".")[-1].lower(): + online_store = self._get_online_store(tribe, config) + if online_store: + config = RepoConfig(**self._prepare_repo_conf(config, tribe)) + online_store.teardown(config, tables, entities) diff --git a/sdk/python/feast/infra/online_stores/hybrid_online_store/hybrid_online_store_repo_configuration.py b/sdk/python/feast/infra/online_stores/hybrid_online_store/hybrid_online_store_repo_configuration.py new file mode 100644 index 00000000000..90a65a092d0 --- /dev/null +++ b/sdk/python/feast/infra/online_stores/hybrid_online_store/hybrid_online_store_repo_configuration.py @@ -0,0 +1,28 @@ +# Copyright 2025 The Feast Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file provides integration test repo configuration for HybridOnlineStore. +# It enables running integration tests with multiple online store backends. +# Update this file if you add more backends or change test setup. + +from tests.integration.feature_repos.integration_test_repo_config import ( + IntegrationTestRepoConfig, +) +from tests.integration.feature_repos.universal.online_store.hybrid_online_store import ( + HybridOnlineStoreCreator, +) + +FULL_REPO_CONFIGS = [ + IntegrationTestRepoConfig(online_store_creator=HybridOnlineStoreCreator), +] diff --git a/sdk/python/tests/integration/feature_repos/universal/online_store/hybrid_online_store.py b/sdk/python/tests/integration/feature_repos/universal/online_store/hybrid_online_store.py new file mode 100644 index 00000000000..f0efbd11044 --- /dev/null +++ b/sdk/python/tests/integration/feature_repos/universal/online_store/hybrid_online_store.py @@ -0,0 +1,25 @@ +from sdk.python.tests.integration.feature_repos.universal.online_store_creator import ( + OnlineStoreCreator, +) + + +class HybridOnlineStoreCreator(OnlineStoreCreator): + def create_online_store(self): + # Use Redis and SQLite as two backends for demonstration/testing, but mock Redis config for unit tests + return { + "type": "hybrid_online_store.HybridOnlineStore", + "online_stores": [ + { + "type": "redis", + "conf": { + "redis_type": "redis", + "connection_string": "localhost:6379", + }, + }, + {"type": "sqlite", "conf": {"path": "/tmp/feast_hybrid_test.db"}}, + ], + } + + def teardown(self): + # Implement any resource cleanup if needed (e.g., remove test DB files) + pass diff --git a/sdk/python/tests/integration/online_store/test_hybrid_online_store.py b/sdk/python/tests/integration/online_store/test_hybrid_online_store.py new file mode 100644 index 00000000000..4b9dad05ff8 --- /dev/null +++ b/sdk/python/tests/integration/online_store/test_hybrid_online_store.py @@ -0,0 +1,87 @@ +from datetime import datetime +from unittest.mock import patch + +import pytest + +from feast import Entity, FeatureView, Field, FileSource, RepoConfig, ValueType +from feast.infra.online_stores.hybrid_online_store.hybrid_online_store import ( + HybridOnlineStore, + HybridOnlineStoreConfig, +) +from feast.protos.feast.types.EntityKey_pb2 import EntityKey +from feast.protos.feast.types.Value_pb2 import Value +from feast.types import PrimitiveFeastType + + +@pytest.fixture +def sample_entity(): + return Entity(name="id", join_keys=["id"], value_type=ValueType.INT64) + + +@pytest.fixture +def sample_feature_view(sample_entity): + file_source = FileSource( + path="/tmp/feast_hybrid_test.parquet", + event_timestamp_column="event_timestamp", + ) + return FeatureView( + name="test_fv", + entities=[sample_entity], + schema=[Field(name="feature1", dtype=PrimitiveFeastType.INT64)], + online=True, + tags={"tribe": "redis"}, + source=file_source, + ) + + +@pytest.fixture +def sample_repo_config(): + # Minimal config for HybridOnlineStore with two backends (mocked for test) + return RepoConfig( + registry="test-registry.db", + project="test_project", + provider="local", + online_store=HybridOnlineStoreConfig( + online_stores=[ + HybridOnlineStoreConfig.OnlineStoresWithConfig( + type="redis", + conf={"redis_type": "redis", "connection_string": "localhost:6379"}, + ), + HybridOnlineStoreConfig.OnlineStoresWithConfig( + type="sqlite", + conf={"path": "/tmp/feast_hybrid_test.db"}, + ), + ] + ), + offline_store=None, + ) + + +@pytest.mark.usefixtures("sample_entity", "sample_feature_view", "sample_repo_config") +def test_hybrid_online_store_write_and_read(sample_repo_config, sample_feature_view): + with ( + patch( + "feast.infra.online_stores.redis.RedisOnlineStore.online_write_batch" + ) as mock_write, + patch( + "feast.infra.online_stores.redis.RedisOnlineStore.online_read" + ) as mock_read, + ): + mock_write.return_value = None + mock_read.return_value = [(None, {"feature1": Value(int64_val=100)})] + store = HybridOnlineStore() + entity_key = EntityKey( + join_keys=["id"], + entity_values=[Value(int64_val=1)], + ) + now = datetime.utcnow() + odata = [(entity_key, {"feature1": Value(int64_val=100)}, now, None)] + # Write to the online store (mocked) + store.online_write_batch( + sample_repo_config, sample_feature_view, odata, progress=None + ) + # Read back (mocked) + result = store.online_read( + sample_repo_config, sample_feature_view, [entity_key] + ) + assert result[0][1]["feature1"].int64_val == 100