From 5425a9299da9178f07863fd2f25d64de1623abf4 Mon Sep 17 00:00:00 2001 From: soojin Date: Tue, 17 Mar 2026 10:59:21 +0900 Subject: [PATCH 1/9] feat: Support nested collection types (Array/Set of Array/Set) (#5947) Add support for 2-level nested collection types: Array(Array(T)), Array(Set(T)), Set(Array(T)), and Set(Set(T)). - Add 4 generic ValueType enums (LIST_LIST, LIST_SET, SET_LIST, SET_SET) backed by RepeatedValue proto messages - Persist inner type info in Field tags (feast:nested_inner_type), following the existing Struct schema tag pattern - Handle edge cases: empty inner collections, Set dedup at inner level, depth limit enforcement (2 levels max) - Add proto/JSON/remote transport serialization support - Add 25 unit tests covering all combinations and edge cases Signed-off-by: Soojin Lee Co-Authored-By: Claude Opus 4.6 Signed-off-by: soojin --- protos/feast/types/Value.proto | 8 ++ sdk/python/feast/field.py | 37 +++++- .../feast/infra/online_stores/remote.py | 5 + sdk/python/feast/proto_json.py | 6 + .../feast/protos/feast/types/Value_pb2.py | 86 ++++++------- .../feast/protos/feast/types/Value_pb2.pyi | 30 ++++- sdk/python/feast/type_map.py | 86 +++++++++++++ sdk/python/feast/types.py | 69 ++++++++-- sdk/python/feast/value_type.py | 4 + sdk/python/tests/unit/test_type_map.py | 118 +++++++++++++++++ sdk/python/tests/unit/test_types.py | 120 +++++++++++++++++- 11 files changed, 505 insertions(+), 64 deletions(-) diff --git a/protos/feast/types/Value.proto b/protos/feast/types/Value.proto index 69922eb0e8e..21e4de17cf5 100644 --- a/protos/feast/types/Value.proto +++ b/protos/feast/types/Value.proto @@ -63,6 +63,10 @@ message ValueType { TIME_UUID_LIST = 39; UUID_SET = 40; TIME_UUID_SET = 41; + LIST_LIST = 42; + LIST_SET = 43; + SET_LIST = 44; + SET_SET = 45; } } @@ -108,6 +112,10 @@ message Value { StringList time_uuid_list_val = 39; StringSet uuid_set_val = 40; StringSet time_uuid_set_val = 41; + RepeatedValue list_list_val = 42; + RepeatedValue list_set_val = 43; + RepeatedValue set_list_val = 44; + RepeatedValue set_set_val = 45; } } diff --git a/sdk/python/feast/field.py b/sdk/python/feast/field.py index c61ed6a5c5e..adec17db2f8 100644 --- a/sdk/python/feast/field.py +++ b/sdk/python/feast/field.py @@ -23,6 +23,7 @@ from feast.value_type import ValueType STRUCT_SCHEMA_TAG = "feast:struct_schema" +NESTED_COLLECTION_INNER_TYPE_TAG = "feast:nested_inner_type" @typechecked @@ -118,7 +119,7 @@ def __str__(self): def to_proto(self) -> FieldProto: """Converts a Field object to its protobuf representation.""" - from feast.types import Array + from feast.types import Array, Set value_type = self.dtype.to_value_type() vector_search_metric = self.vector_search_metric or "" @@ -128,6 +129,11 @@ def to_proto(self) -> FieldProto: tags[STRUCT_SCHEMA_TAG] = _serialize_struct_schema(self.dtype) elif isinstance(self.dtype, Array) and isinstance(self.dtype.base_type, Struct): tags[STRUCT_SCHEMA_TAG] = _serialize_struct_schema(self.dtype.base_type) + # Persist nested collection type info in tags + if isinstance(self.dtype, (Array, Set)) and isinstance( + self.dtype.base_type, (Array, Set) + ): + tags[NESTED_COLLECTION_INNER_TYPE_TAG] = _feast_type_to_str(self.dtype) return FieldProto( name=self.name, value_type=value_type.value, @@ -155,17 +161,30 @@ def from_proto(cls, field_proto: FieldProto): # Reconstruct Struct type from persisted schema in tags from feast.types import Array + internal_tags = {STRUCT_SCHEMA_TAG, NESTED_COLLECTION_INNER_TYPE_TAG} dtype: FeastType if value_type == ValueType.STRUCT and STRUCT_SCHEMA_TAG in tags: dtype = _deserialize_struct_schema(tags[STRUCT_SCHEMA_TAG]) - user_tags = {k: v for k, v in tags.items() if k != STRUCT_SCHEMA_TAG} + user_tags = {k: v for k, v in tags.items() if k not in internal_tags} elif value_type == ValueType.STRUCT_LIST and STRUCT_SCHEMA_TAG in tags: inner_struct = _deserialize_struct_schema(tags[STRUCT_SCHEMA_TAG]) dtype = Array(inner_struct) - user_tags = {k: v for k, v in tags.items() if k != STRUCT_SCHEMA_TAG} + user_tags = {k: v for k, v in tags.items() if k not in internal_tags} + elif ( + value_type + in ( + ValueType.LIST_LIST, + ValueType.LIST_SET, + ValueType.SET_LIST, + ValueType.SET_SET, + ) + and NESTED_COLLECTION_INNER_TYPE_TAG in tags + ): + dtype = _str_to_feast_type(tags[NESTED_COLLECTION_INNER_TYPE_TAG]) + user_tags = {k: v for k, v in tags.items() if k not in internal_tags} else: dtype = from_value_type(value_type=value_type) - user_tags = tags + user_tags = {k: v for k, v in tags.items() if k not in internal_tags} return cls( name=field_proto.name, @@ -198,6 +217,7 @@ def _feast_type_to_str(feast_type: FeastType) -> str: from feast.types import ( Array, PrimitiveFeastType, + Set, ) if isinstance(feast_type, PrimitiveFeastType): @@ -209,6 +229,8 @@ def _feast_type_to_str(feast_type: FeastType) -> str: return json.dumps({"__struct__": nested}) elif isinstance(feast_type, Array): return f"Array({_feast_type_to_str(feast_type.base_type)})" + elif isinstance(feast_type, Set): + return f"Set({_feast_type_to_str(feast_type.base_type)})" else: return str(feast_type) @@ -218,6 +240,7 @@ def _str_to_feast_type(type_str: str) -> FeastType: from feast.types import ( Array, PrimitiveFeastType, + Set, ) # Check if it's an Array type @@ -226,6 +249,12 @@ def _str_to_feast_type(type_str: str) -> FeastType: base_type = _str_to_feast_type(inner) return Array(base_type) + # Check if it's a Set type + if type_str.startswith("Set(") and type_str.endswith(")"): + inner = type_str[4:-1] + base_type = _str_to_feast_type(inner) + return Set(base_type) + # Check if it's a nested Struct (JSON encoded) if type_str.startswith("{"): try: diff --git a/sdk/python/feast/infra/online_stores/remote.py b/sdk/python/feast/infra/online_stores/remote.py index f8e8dfce483..7597e95f2f4 100644 --- a/sdk/python/feast/infra/online_stores/remote.py +++ b/sdk/python/feast/infra/online_stores/remote.py @@ -106,6 +106,11 @@ def _proto_value_to_transport_value(proto_value: ValueProto) -> Any: if val_attr == "json_list_val": return list(getattr(proto_value, val_attr).val) + # Nested collection types use feast_value_type_to_python_type + # which handles recursive conversion of RepeatedValue protos. + if val_attr in ("list_list_val", "list_set_val", "set_list_val", "set_set_val"): + return feast_value_type_to_python_type(proto_value) + # Map/Struct types are converted to Python dicts by # feast_value_type_to_python_type. Serialise them to JSON strings # so the server-side DataFrame gets VARCHAR columns instead of diff --git a/sdk/python/feast/proto_json.py b/sdk/python/feast/proto_json.py index 487dc4284f3..eacc9c5ec3e 100644 --- a/sdk/python/feast/proto_json.py +++ b/sdk/python/feast/proto_json.py @@ -63,6 +63,12 @@ def to_json_object(printer: _Printer, message: ProtoMessage) -> JsonObject: # to JSON. The parse back result will be different from original message. if which is None or which == "null_val": return None + elif which in ("list_list_val", "list_set_val", "set_list_val", "set_set_val"): + # Nested collection: RepeatedValue containing Values + repeated = getattr(message, which) + value = [ + printer._MessageToJsonObject(inner_val) for inner_val in repeated.val + ] elif "_list_" in which: value = list(getattr(message, which).val) else: diff --git a/sdk/python/feast/protos/feast/types/Value_pb2.py b/sdk/python/feast/protos/feast/types/Value_pb2.py index 16b0a7a961c..c831f8da6c0 100644 --- a/sdk/python/feast/protos/feast/types/Value_pb2.py +++ b/sdk/python/feast/protos/feast/types/Value_pb2.py @@ -14,7 +14,7 @@ -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x17\x66\x65\x61st/types/Value.proto\x12\x0b\x66\x65\x61st.types\"\xc3\x04\n\tValueType\"\xb5\x04\n\x04\x45num\x12\x0b\n\x07INVALID\x10\x00\x12\t\n\x05\x42YTES\x10\x01\x12\n\n\x06STRING\x10\x02\x12\t\n\x05INT32\x10\x03\x12\t\n\x05INT64\x10\x04\x12\n\n\x06\x44OUBLE\x10\x05\x12\t\n\x05\x46LOAT\x10\x06\x12\x08\n\x04\x42OOL\x10\x07\x12\x12\n\x0eUNIX_TIMESTAMP\x10\x08\x12\x0e\n\nBYTES_LIST\x10\x0b\x12\x0f\n\x0bSTRING_LIST\x10\x0c\x12\x0e\n\nINT32_LIST\x10\r\x12\x0e\n\nINT64_LIST\x10\x0e\x12\x0f\n\x0b\x44OUBLE_LIST\x10\x0f\x12\x0e\n\nFLOAT_LIST\x10\x10\x12\r\n\tBOOL_LIST\x10\x11\x12\x17\n\x13UNIX_TIMESTAMP_LIST\x10\x12\x12\x08\n\x04NULL\x10\x13\x12\x07\n\x03MAP\x10\x14\x12\x0c\n\x08MAP_LIST\x10\x15\x12\r\n\tBYTES_SET\x10\x16\x12\x0e\n\nSTRING_SET\x10\x17\x12\r\n\tINT32_SET\x10\x18\x12\r\n\tINT64_SET\x10\x19\x12\x0e\n\nDOUBLE_SET\x10\x1a\x12\r\n\tFLOAT_SET\x10\x1b\x12\x0c\n\x08\x42OOL_SET\x10\x1c\x12\x16\n\x12UNIX_TIMESTAMP_SET\x10\x1d\x12\x08\n\x04JSON\x10 \x12\r\n\tJSON_LIST\x10!\x12\n\n\x06STRUCT\x10\"\x12\x0f\n\x0bSTRUCT_LIST\x10#\x12\x08\n\x04UUID\x10$\x12\r\n\tTIME_UUID\x10%\x12\r\n\tUUID_LIST\x10&\x12\x12\n\x0eTIME_UUID_LIST\x10\'\x12\x0c\n\x08UUID_SET\x10(\x12\x11\n\rTIME_UUID_SET\x10)\"\xfa\x0b\n\x05Value\x12\x13\n\tbytes_val\x18\x01 \x01(\x0cH\x00\x12\x14\n\nstring_val\x18\x02 \x01(\tH\x00\x12\x13\n\tint32_val\x18\x03 \x01(\x05H\x00\x12\x13\n\tint64_val\x18\x04 \x01(\x03H\x00\x12\x14\n\ndouble_val\x18\x05 \x01(\x01H\x00\x12\x13\n\tfloat_val\x18\x06 \x01(\x02H\x00\x12\x12\n\x08\x62ool_val\x18\x07 \x01(\x08H\x00\x12\x1c\n\x12unix_timestamp_val\x18\x08 \x01(\x03H\x00\x12\x30\n\x0e\x62ytes_list_val\x18\x0b \x01(\x0b\x32\x16.feast.types.BytesListH\x00\x12\x32\n\x0fstring_list_val\x18\x0c \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12\x30\n\x0eint32_list_val\x18\r \x01(\x0b\x32\x16.feast.types.Int32ListH\x00\x12\x30\n\x0eint64_list_val\x18\x0e \x01(\x0b\x32\x16.feast.types.Int64ListH\x00\x12\x32\n\x0f\x64ouble_list_val\x18\x0f \x01(\x0b\x32\x17.feast.types.DoubleListH\x00\x12\x30\n\x0e\x66loat_list_val\x18\x10 \x01(\x0b\x32\x16.feast.types.FloatListH\x00\x12.\n\rbool_list_val\x18\x11 \x01(\x0b\x32\x15.feast.types.BoolListH\x00\x12\x39\n\x17unix_timestamp_list_val\x18\x12 \x01(\x0b\x32\x16.feast.types.Int64ListH\x00\x12%\n\x08null_val\x18\x13 \x01(\x0e\x32\x11.feast.types.NullH\x00\x12#\n\x07map_val\x18\x14 \x01(\x0b\x32\x10.feast.types.MapH\x00\x12,\n\x0cmap_list_val\x18\x15 \x01(\x0b\x32\x14.feast.types.MapListH\x00\x12.\n\rbytes_set_val\x18\x16 \x01(\x0b\x32\x15.feast.types.BytesSetH\x00\x12\x30\n\x0estring_set_val\x18\x17 \x01(\x0b\x32\x16.feast.types.StringSetH\x00\x12.\n\rint32_set_val\x18\x18 \x01(\x0b\x32\x15.feast.types.Int32SetH\x00\x12.\n\rint64_set_val\x18\x19 \x01(\x0b\x32\x15.feast.types.Int64SetH\x00\x12\x30\n\x0e\x64ouble_set_val\x18\x1a \x01(\x0b\x32\x16.feast.types.DoubleSetH\x00\x12.\n\rfloat_set_val\x18\x1b \x01(\x0b\x32\x15.feast.types.FloatSetH\x00\x12,\n\x0c\x62ool_set_val\x18\x1c \x01(\x0b\x32\x14.feast.types.BoolSetH\x00\x12\x37\n\x16unix_timestamp_set_val\x18\x1d \x01(\x0b\x32\x15.feast.types.Int64SetH\x00\x12\x12\n\x08json_val\x18 \x01(\tH\x00\x12\x30\n\rjson_list_val\x18! \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12&\n\nstruct_val\x18\" \x01(\x0b\x32\x10.feast.types.MapH\x00\x12/\n\x0fstruct_list_val\x18# \x01(\x0b\x32\x14.feast.types.MapListH\x00\x12\x12\n\x08uuid_val\x18$ \x01(\tH\x00\x12\x17\n\rtime_uuid_val\x18% \x01(\tH\x00\x12\x30\n\ruuid_list_val\x18& \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12\x35\n\x12time_uuid_list_val\x18\' \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12.\n\x0cuuid_set_val\x18( \x01(\x0b\x32\x16.feast.types.StringSetH\x00\x12\x33\n\x11time_uuid_set_val\x18) \x01(\x0b\x32\x16.feast.types.StringSetH\x00\x42\x05\n\x03val\"\x18\n\tBytesList\x12\x0b\n\x03val\x18\x01 \x03(\x0c\"\x19\n\nStringList\x12\x0b\n\x03val\x18\x01 \x03(\t\"\x18\n\tInt32List\x12\x0b\n\x03val\x18\x01 \x03(\x05\"\x18\n\tInt64List\x12\x0b\n\x03val\x18\x01 \x03(\x03\"\x19\n\nDoubleList\x12\x0b\n\x03val\x18\x01 \x03(\x01\"\x18\n\tFloatList\x12\x0b\n\x03val\x18\x01 \x03(\x02\"\x17\n\x08\x42oolList\x12\x0b\n\x03val\x18\x01 \x03(\x08\"\x17\n\x08\x42ytesSet\x12\x0b\n\x03val\x18\x01 \x03(\x0c\"\x18\n\tStringSet\x12\x0b\n\x03val\x18\x01 \x03(\t\"\x17\n\x08Int32Set\x12\x0b\n\x03val\x18\x01 \x03(\x05\"\x17\n\x08Int64Set\x12\x0b\n\x03val\x18\x01 \x03(\x03\"\x18\n\tDoubleSet\x12\x0b\n\x03val\x18\x01 \x03(\x01\"\x17\n\x08\x46loatSet\x12\x0b\n\x03val\x18\x01 \x03(\x02\"\x16\n\x07\x42oolSet\x12\x0b\n\x03val\x18\x01 \x03(\x08\"m\n\x03Map\x12&\n\x03val\x18\x01 \x03(\x0b\x32\x19.feast.types.Map.ValEntry\x1a>\n\x08ValEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.feast.types.Value:\x02\x38\x01\"(\n\x07MapList\x12\x1d\n\x03val\x18\x01 \x03(\x0b\x32\x10.feast.types.Map\"0\n\rRepeatedValue\x12\x1f\n\x03val\x18\x01 \x03(\x0b\x32\x12.feast.types.Value*\x10\n\x04Null\x12\x08\n\x04NULL\x10\x00\x42Q\n\x11\x66\x65\x61st.proto.typesB\nValueProtoZ0github.com/feast-dev/feast/go/protos/feast/typesb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x17\x66\x65\x61st/types/Value.proto\x12\x0b\x66\x65\x61st.types\"\xfb\x04\n\tValueType\"\xed\x04\n\x04\x45num\x12\x0b\n\x07INVALID\x10\x00\x12\t\n\x05\x42YTES\x10\x01\x12\n\n\x06STRING\x10\x02\x12\t\n\x05INT32\x10\x03\x12\t\n\x05INT64\x10\x04\x12\n\n\x06\x44OUBLE\x10\x05\x12\t\n\x05\x46LOAT\x10\x06\x12\x08\n\x04\x42OOL\x10\x07\x12\x12\n\x0eUNIX_TIMESTAMP\x10\x08\x12\x0e\n\nBYTES_LIST\x10\x0b\x12\x0f\n\x0bSTRING_LIST\x10\x0c\x12\x0e\n\nINT32_LIST\x10\r\x12\x0e\n\nINT64_LIST\x10\x0e\x12\x0f\n\x0b\x44OUBLE_LIST\x10\x0f\x12\x0e\n\nFLOAT_LIST\x10\x10\x12\r\n\tBOOL_LIST\x10\x11\x12\x17\n\x13UNIX_TIMESTAMP_LIST\x10\x12\x12\x08\n\x04NULL\x10\x13\x12\x07\n\x03MAP\x10\x14\x12\x0c\n\x08MAP_LIST\x10\x15\x12\r\n\tBYTES_SET\x10\x16\x12\x0e\n\nSTRING_SET\x10\x17\x12\r\n\tINT32_SET\x10\x18\x12\r\n\tINT64_SET\x10\x19\x12\x0e\n\nDOUBLE_SET\x10\x1a\x12\r\n\tFLOAT_SET\x10\x1b\x12\x0c\n\x08\x42OOL_SET\x10\x1c\x12\x16\n\x12UNIX_TIMESTAMP_SET\x10\x1d\x12\x08\n\x04JSON\x10 \x12\r\n\tJSON_LIST\x10!\x12\n\n\x06STRUCT\x10\"\x12\x0f\n\x0bSTRUCT_LIST\x10#\x12\x08\n\x04UUID\x10$\x12\r\n\tTIME_UUID\x10%\x12\r\n\tUUID_LIST\x10&\x12\x12\n\x0eTIME_UUID_LIST\x10\'\x12\x0c\n\x08UUID_SET\x10(\x12\x11\n\rTIME_UUID_SET\x10)\x12\r\n\tLIST_LIST\x10*\x12\x0c\n\x08LIST_SET\x10+\x12\x0c\n\x08SET_LIST\x10,\x12\x0b\n\x07SET_SET\x10-\"\xca\r\n\x05Value\x12\x13\n\tbytes_val\x18\x01 \x01(\x0cH\x00\x12\x14\n\nstring_val\x18\x02 \x01(\tH\x00\x12\x13\n\tint32_val\x18\x03 \x01(\x05H\x00\x12\x13\n\tint64_val\x18\x04 \x01(\x03H\x00\x12\x14\n\ndouble_val\x18\x05 \x01(\x01H\x00\x12\x13\n\tfloat_val\x18\x06 \x01(\x02H\x00\x12\x12\n\x08\x62ool_val\x18\x07 \x01(\x08H\x00\x12\x1c\n\x12unix_timestamp_val\x18\x08 \x01(\x03H\x00\x12\x30\n\x0e\x62ytes_list_val\x18\x0b \x01(\x0b\x32\x16.feast.types.BytesListH\x00\x12\x32\n\x0fstring_list_val\x18\x0c \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12\x30\n\x0eint32_list_val\x18\r \x01(\x0b\x32\x16.feast.types.Int32ListH\x00\x12\x30\n\x0eint64_list_val\x18\x0e \x01(\x0b\x32\x16.feast.types.Int64ListH\x00\x12\x32\n\x0f\x64ouble_list_val\x18\x0f \x01(\x0b\x32\x17.feast.types.DoubleListH\x00\x12\x30\n\x0e\x66loat_list_val\x18\x10 \x01(\x0b\x32\x16.feast.types.FloatListH\x00\x12.\n\rbool_list_val\x18\x11 \x01(\x0b\x32\x15.feast.types.BoolListH\x00\x12\x39\n\x17unix_timestamp_list_val\x18\x12 \x01(\x0b\x32\x16.feast.types.Int64ListH\x00\x12%\n\x08null_val\x18\x13 \x01(\x0e\x32\x11.feast.types.NullH\x00\x12#\n\x07map_val\x18\x14 \x01(\x0b\x32\x10.feast.types.MapH\x00\x12,\n\x0cmap_list_val\x18\x15 \x01(\x0b\x32\x14.feast.types.MapListH\x00\x12.\n\rbytes_set_val\x18\x16 \x01(\x0b\x32\x15.feast.types.BytesSetH\x00\x12\x30\n\x0estring_set_val\x18\x17 \x01(\x0b\x32\x16.feast.types.StringSetH\x00\x12.\n\rint32_set_val\x18\x18 \x01(\x0b\x32\x15.feast.types.Int32SetH\x00\x12.\n\rint64_set_val\x18\x19 \x01(\x0b\x32\x15.feast.types.Int64SetH\x00\x12\x30\n\x0e\x64ouble_set_val\x18\x1a \x01(\x0b\x32\x16.feast.types.DoubleSetH\x00\x12.\n\rfloat_set_val\x18\x1b \x01(\x0b\x32\x15.feast.types.FloatSetH\x00\x12,\n\x0c\x62ool_set_val\x18\x1c \x01(\x0b\x32\x14.feast.types.BoolSetH\x00\x12\x37\n\x16unix_timestamp_set_val\x18\x1d \x01(\x0b\x32\x15.feast.types.Int64SetH\x00\x12\x12\n\x08json_val\x18 \x01(\tH\x00\x12\x30\n\rjson_list_val\x18! \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12&\n\nstruct_val\x18\" \x01(\x0b\x32\x10.feast.types.MapH\x00\x12/\n\x0fstruct_list_val\x18# \x01(\x0b\x32\x14.feast.types.MapListH\x00\x12\x12\n\x08uuid_val\x18$ \x01(\tH\x00\x12\x17\n\rtime_uuid_val\x18% \x01(\tH\x00\x12\x30\n\ruuid_list_val\x18& \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12\x35\n\x12time_uuid_list_val\x18\' \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12.\n\x0cuuid_set_val\x18( \x01(\x0b\x32\x16.feast.types.StringSetH\x00\x12\x33\n\x11time_uuid_set_val\x18) \x01(\x0b\x32\x16.feast.types.StringSetH\x00\x12\x33\n\rlist_list_val\x18* \x01(\x0b\x32\x1a.feast.types.RepeatedValueH\x00\x12\x32\n\x0clist_set_val\x18+ \x01(\x0b\x32\x1a.feast.types.RepeatedValueH\x00\x12\x32\n\x0cset_list_val\x18, \x01(\x0b\x32\x1a.feast.types.RepeatedValueH\x00\x12\x31\n\x0bset_set_val\x18- \x01(\x0b\x32\x1a.feast.types.RepeatedValueH\x00\x42\x05\n\x03val\"\x18\n\tBytesList\x12\x0b\n\x03val\x18\x01 \x03(\x0c\"\x19\n\nStringList\x12\x0b\n\x03val\x18\x01 \x03(\t\"\x18\n\tInt32List\x12\x0b\n\x03val\x18\x01 \x03(\x05\"\x18\n\tInt64List\x12\x0b\n\x03val\x18\x01 \x03(\x03\"\x19\n\nDoubleList\x12\x0b\n\x03val\x18\x01 \x03(\x01\"\x18\n\tFloatList\x12\x0b\n\x03val\x18\x01 \x03(\x02\"\x17\n\x08\x42oolList\x12\x0b\n\x03val\x18\x01 \x03(\x08\"\x17\n\x08\x42ytesSet\x12\x0b\n\x03val\x18\x01 \x03(\x0c\"\x18\n\tStringSet\x12\x0b\n\x03val\x18\x01 \x03(\t\"\x17\n\x08Int32Set\x12\x0b\n\x03val\x18\x01 \x03(\x05\"\x17\n\x08Int64Set\x12\x0b\n\x03val\x18\x01 \x03(\x03\"\x18\n\tDoubleSet\x12\x0b\n\x03val\x18\x01 \x03(\x01\"\x17\n\x08\x46loatSet\x12\x0b\n\x03val\x18\x01 \x03(\x02\"\x16\n\x07\x42oolSet\x12\x0b\n\x03val\x18\x01 \x03(\x08\"m\n\x03Map\x12&\n\x03val\x18\x01 \x03(\x0b\x32\x19.feast.types.Map.ValEntry\x1a>\n\x08ValEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.feast.types.Value:\x02\x38\x01\"(\n\x07MapList\x12\x1d\n\x03val\x18\x01 \x03(\x0b\x32\x10.feast.types.Map\"0\n\rRepeatedValue\x12\x1f\n\x03val\x18\x01 \x03(\x0b\x32\x12.feast.types.Value*\x10\n\x04Null\x12\x08\n\x04NULL\x10\x00\x42Q\n\x11\x66\x65\x61st.proto.typesB\nValueProtoZ0github.com/feast-dev/feast/go/protos/feast/typesb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -24,48 +24,48 @@ _globals['DESCRIPTOR']._serialized_options = b'\n\021feast.proto.typesB\nValueProtoZ0github.com/feast-dev/feast/go/protos/feast/types' _globals['_MAP_VALENTRY']._options = None _globals['_MAP_VALENTRY']._serialized_options = b'8\001' - _globals['_NULL']._serialized_start=2717 - _globals['_NULL']._serialized_end=2733 + _globals['_NULL']._serialized_start=2981 + _globals['_NULL']._serialized_end=2997 _globals['_VALUETYPE']._serialized_start=41 - _globals['_VALUETYPE']._serialized_end=620 + _globals['_VALUETYPE']._serialized_end=676 _globals['_VALUETYPE_ENUM']._serialized_start=55 - _globals['_VALUETYPE_ENUM']._serialized_end=620 - _globals['_VALUE']._serialized_start=623 - _globals['_VALUE']._serialized_end=2153 - _globals['_BYTESLIST']._serialized_start=2155 - _globals['_BYTESLIST']._serialized_end=2179 - _globals['_STRINGLIST']._serialized_start=2181 - _globals['_STRINGLIST']._serialized_end=2206 - _globals['_INT32LIST']._serialized_start=2208 - _globals['_INT32LIST']._serialized_end=2232 - _globals['_INT64LIST']._serialized_start=2234 - _globals['_INT64LIST']._serialized_end=2258 - _globals['_DOUBLELIST']._serialized_start=2260 - _globals['_DOUBLELIST']._serialized_end=2285 - _globals['_FLOATLIST']._serialized_start=2287 - _globals['_FLOATLIST']._serialized_end=2311 - _globals['_BOOLLIST']._serialized_start=2313 - _globals['_BOOLLIST']._serialized_end=2336 - _globals['_BYTESSET']._serialized_start=2338 - _globals['_BYTESSET']._serialized_end=2361 - _globals['_STRINGSET']._serialized_start=2363 - _globals['_STRINGSET']._serialized_end=2387 - _globals['_INT32SET']._serialized_start=2389 - _globals['_INT32SET']._serialized_end=2412 - _globals['_INT64SET']._serialized_start=2414 - _globals['_INT64SET']._serialized_end=2437 - _globals['_DOUBLESET']._serialized_start=2439 - _globals['_DOUBLESET']._serialized_end=2463 - _globals['_FLOATSET']._serialized_start=2465 - _globals['_FLOATSET']._serialized_end=2488 - _globals['_BOOLSET']._serialized_start=2490 - _globals['_BOOLSET']._serialized_end=2512 - _globals['_MAP']._serialized_start=2514 - _globals['_MAP']._serialized_end=2623 - _globals['_MAP_VALENTRY']._serialized_start=2561 - _globals['_MAP_VALENTRY']._serialized_end=2623 - _globals['_MAPLIST']._serialized_start=2625 - _globals['_MAPLIST']._serialized_end=2665 - _globals['_REPEATEDVALUE']._serialized_start=2667 - _globals['_REPEATEDVALUE']._serialized_end=2715 + _globals['_VALUETYPE_ENUM']._serialized_end=676 + _globals['_VALUE']._serialized_start=679 + _globals['_VALUE']._serialized_end=2417 + _globals['_BYTESLIST']._serialized_start=2419 + _globals['_BYTESLIST']._serialized_end=2443 + _globals['_STRINGLIST']._serialized_start=2445 + _globals['_STRINGLIST']._serialized_end=2470 + _globals['_INT32LIST']._serialized_start=2472 + _globals['_INT32LIST']._serialized_end=2496 + _globals['_INT64LIST']._serialized_start=2498 + _globals['_INT64LIST']._serialized_end=2522 + _globals['_DOUBLELIST']._serialized_start=2524 + _globals['_DOUBLELIST']._serialized_end=2549 + _globals['_FLOATLIST']._serialized_start=2551 + _globals['_FLOATLIST']._serialized_end=2575 + _globals['_BOOLLIST']._serialized_start=2577 + _globals['_BOOLLIST']._serialized_end=2600 + _globals['_BYTESSET']._serialized_start=2602 + _globals['_BYTESSET']._serialized_end=2625 + _globals['_STRINGSET']._serialized_start=2627 + _globals['_STRINGSET']._serialized_end=2651 + _globals['_INT32SET']._serialized_start=2653 + _globals['_INT32SET']._serialized_end=2676 + _globals['_INT64SET']._serialized_start=2678 + _globals['_INT64SET']._serialized_end=2701 + _globals['_DOUBLESET']._serialized_start=2703 + _globals['_DOUBLESET']._serialized_end=2727 + _globals['_FLOATSET']._serialized_start=2729 + _globals['_FLOATSET']._serialized_end=2752 + _globals['_BOOLSET']._serialized_start=2754 + _globals['_BOOLSET']._serialized_end=2776 + _globals['_MAP']._serialized_start=2778 + _globals['_MAP']._serialized_end=2887 + _globals['_MAP_VALENTRY']._serialized_start=2825 + _globals['_MAP_VALENTRY']._serialized_end=2887 + _globals['_MAPLIST']._serialized_start=2889 + _globals['_MAPLIST']._serialized_end=2929 + _globals['_REPEATEDVALUE']._serialized_start=2931 + _globals['_REPEATEDVALUE']._serialized_end=2979 # @@protoc_insertion_point(module_scope) diff --git a/sdk/python/feast/protos/feast/types/Value_pb2.pyi b/sdk/python/feast/protos/feast/types/Value_pb2.pyi index 75487088939..f83e9818eb4 100644 --- a/sdk/python/feast/protos/feast/types/Value_pb2.pyi +++ b/sdk/python/feast/protos/feast/types/Value_pb2.pyi @@ -92,6 +92,10 @@ class ValueType(google.protobuf.message.Message): TIME_UUID_LIST: ValueType._Enum.ValueType # 39 UUID_SET: ValueType._Enum.ValueType # 40 TIME_UUID_SET: ValueType._Enum.ValueType # 41 + LIST_LIST: ValueType._Enum.ValueType # 42 + LIST_SET: ValueType._Enum.ValueType # 43 + SET_LIST: ValueType._Enum.ValueType # 44 + SET_SET: ValueType._Enum.ValueType # 45 class Enum(_Enum, metaclass=_EnumEnumTypeWrapper): ... INVALID: ValueType.Enum.ValueType # 0 @@ -132,6 +136,10 @@ class ValueType(google.protobuf.message.Message): TIME_UUID_LIST: ValueType.Enum.ValueType # 39 UUID_SET: ValueType.Enum.ValueType # 40 TIME_UUID_SET: ValueType.Enum.ValueType # 41 + LIST_LIST: ValueType.Enum.ValueType # 42 + LIST_SET: ValueType.Enum.ValueType # 43 + SET_LIST: ValueType.Enum.ValueType # 44 + SET_SET: ValueType.Enum.ValueType # 45 def __init__( self, @@ -179,6 +187,10 @@ class Value(google.protobuf.message.Message): TIME_UUID_LIST_VAL_FIELD_NUMBER: builtins.int UUID_SET_VAL_FIELD_NUMBER: builtins.int TIME_UUID_SET_VAL_FIELD_NUMBER: builtins.int + LIST_LIST_VAL_FIELD_NUMBER: builtins.int + LIST_SET_VAL_FIELD_NUMBER: builtins.int + SET_LIST_VAL_FIELD_NUMBER: builtins.int + SET_SET_VAL_FIELD_NUMBER: builtins.int bytes_val: builtins.bytes string_val: builtins.str int32_val: builtins.int @@ -241,6 +253,14 @@ class Value(google.protobuf.message.Message): def uuid_set_val(self) -> global___StringSet: ... @property def time_uuid_set_val(self) -> global___StringSet: ... + @property + def list_list_val(self) -> global___RepeatedValue: ... + @property + def list_set_val(self) -> global___RepeatedValue: ... + @property + def set_list_val(self) -> global___RepeatedValue: ... + @property + def set_set_val(self) -> global___RepeatedValue: ... def __init__( self, *, @@ -281,10 +301,14 @@ class Value(google.protobuf.message.Message): time_uuid_list_val: global___StringList | None = ..., uuid_set_val: global___StringSet | None = ..., time_uuid_set_val: global___StringSet | None = ..., + list_list_val: global___RepeatedValue | None = ..., + list_set_val: global___RepeatedValue | None = ..., + set_list_val: global___RepeatedValue | None = ..., + set_set_val: global___RepeatedValue | None = ..., ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["bool_list_val", b"bool_list_val", "bool_set_val", b"bool_set_val", "bool_val", b"bool_val", "bytes_list_val", b"bytes_list_val", "bytes_set_val", b"bytes_set_val", "bytes_val", b"bytes_val", "double_list_val", b"double_list_val", "double_set_val", b"double_set_val", "double_val", b"double_val", "float_list_val", b"float_list_val", "float_set_val", b"float_set_val", "float_val", b"float_val", "int32_list_val", b"int32_list_val", "int32_set_val", b"int32_set_val", "int32_val", b"int32_val", "int64_list_val", b"int64_list_val", "int64_set_val", b"int64_set_val", "int64_val", b"int64_val", "json_list_val", b"json_list_val", "json_val", b"json_val", "map_list_val", b"map_list_val", "map_val", b"map_val", "null_val", b"null_val", "string_list_val", b"string_list_val", "string_set_val", b"string_set_val", "string_val", b"string_val", "struct_list_val", b"struct_list_val", "struct_val", b"struct_val", "time_uuid_list_val", b"time_uuid_list_val", "time_uuid_set_val", b"time_uuid_set_val", "time_uuid_val", b"time_uuid_val", "unix_timestamp_list_val", b"unix_timestamp_list_val", "unix_timestamp_set_val", b"unix_timestamp_set_val", "unix_timestamp_val", b"unix_timestamp_val", "uuid_list_val", b"uuid_list_val", "uuid_set_val", b"uuid_set_val", "uuid_val", b"uuid_val", "val", b"val"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["bool_list_val", b"bool_list_val", "bool_set_val", b"bool_set_val", "bool_val", b"bool_val", "bytes_list_val", b"bytes_list_val", "bytes_set_val", b"bytes_set_val", "bytes_val", b"bytes_val", "double_list_val", b"double_list_val", "double_set_val", b"double_set_val", "double_val", b"double_val", "float_list_val", b"float_list_val", "float_set_val", b"float_set_val", "float_val", b"float_val", "int32_list_val", b"int32_list_val", "int32_set_val", b"int32_set_val", "int32_val", b"int32_val", "int64_list_val", b"int64_list_val", "int64_set_val", b"int64_set_val", "int64_val", b"int64_val", "json_list_val", b"json_list_val", "json_val", b"json_val", "map_list_val", b"map_list_val", "map_val", b"map_val", "null_val", b"null_val", "string_list_val", b"string_list_val", "string_set_val", b"string_set_val", "string_val", b"string_val", "struct_list_val", b"struct_list_val", "struct_val", b"struct_val", "time_uuid_list_val", b"time_uuid_list_val", "time_uuid_set_val", b"time_uuid_set_val", "time_uuid_val", b"time_uuid_val", "unix_timestamp_list_val", b"unix_timestamp_list_val", "unix_timestamp_set_val", b"unix_timestamp_set_val", "unix_timestamp_val", b"unix_timestamp_val", "uuid_list_val", b"uuid_list_val", "uuid_set_val", b"uuid_set_val", "uuid_val", b"uuid_val", "val", b"val"]) -> None: ... - def WhichOneof(self, oneof_group: typing_extensions.Literal["val", b"val"]) -> typing_extensions.Literal["bytes_val", "string_val", "int32_val", "int64_val", "double_val", "float_val", "bool_val", "unix_timestamp_val", "bytes_list_val", "string_list_val", "int32_list_val", "int64_list_val", "double_list_val", "float_list_val", "bool_list_val", "unix_timestamp_list_val", "null_val", "map_val", "map_list_val", "bytes_set_val", "string_set_val", "int32_set_val", "int64_set_val", "double_set_val", "float_set_val", "bool_set_val", "unix_timestamp_set_val", "json_val", "json_list_val", "struct_val", "struct_list_val", "uuid_val", "time_uuid_val", "uuid_list_val", "time_uuid_list_val", "uuid_set_val", "time_uuid_set_val"] | None: ... + def HasField(self, field_name: typing_extensions.Literal["bool_list_val", b"bool_list_val", "bool_set_val", b"bool_set_val", "bool_val", b"bool_val", "bytes_list_val", b"bytes_list_val", "bytes_set_val", b"bytes_set_val", "bytes_val", b"bytes_val", "double_list_val", b"double_list_val", "double_set_val", b"double_set_val", "double_val", b"double_val", "float_list_val", b"float_list_val", "float_set_val", b"float_set_val", "float_val", b"float_val", "int32_list_val", b"int32_list_val", "int32_set_val", b"int32_set_val", "int32_val", b"int32_val", "int64_list_val", b"int64_list_val", "int64_set_val", b"int64_set_val", "int64_val", b"int64_val", "json_list_val", b"json_list_val", "json_val", b"json_val", "list_list_val", b"list_list_val", "list_set_val", b"list_set_val", "map_list_val", b"map_list_val", "map_val", b"map_val", "null_val", b"null_val", "set_list_val", b"set_list_val", "set_set_val", b"set_set_val", "string_list_val", b"string_list_val", "string_set_val", b"string_set_val", "string_val", b"string_val", "struct_list_val", b"struct_list_val", "struct_val", b"struct_val", "time_uuid_list_val", b"time_uuid_list_val", "time_uuid_set_val", b"time_uuid_set_val", "time_uuid_val", b"time_uuid_val", "unix_timestamp_list_val", b"unix_timestamp_list_val", "unix_timestamp_set_val", b"unix_timestamp_set_val", "unix_timestamp_val", b"unix_timestamp_val", "uuid_list_val", b"uuid_list_val", "uuid_set_val", b"uuid_set_val", "uuid_val", b"uuid_val", "val", b"val"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["bool_list_val", b"bool_list_val", "bool_set_val", b"bool_set_val", "bool_val", b"bool_val", "bytes_list_val", b"bytes_list_val", "bytes_set_val", b"bytes_set_val", "bytes_val", b"bytes_val", "double_list_val", b"double_list_val", "double_set_val", b"double_set_val", "double_val", b"double_val", "float_list_val", b"float_list_val", "float_set_val", b"float_set_val", "float_val", b"float_val", "int32_list_val", b"int32_list_val", "int32_set_val", b"int32_set_val", "int32_val", b"int32_val", "int64_list_val", b"int64_list_val", "int64_set_val", b"int64_set_val", "int64_val", b"int64_val", "json_list_val", b"json_list_val", "json_val", b"json_val", "list_list_val", b"list_list_val", "list_set_val", b"list_set_val", "map_list_val", b"map_list_val", "map_val", b"map_val", "null_val", b"null_val", "set_list_val", b"set_list_val", "set_set_val", b"set_set_val", "string_list_val", b"string_list_val", "string_set_val", b"string_set_val", "string_val", b"string_val", "struct_list_val", b"struct_list_val", "struct_val", b"struct_val", "time_uuid_list_val", b"time_uuid_list_val", "time_uuid_set_val", b"time_uuid_set_val", "time_uuid_val", b"time_uuid_val", "unix_timestamp_list_val", b"unix_timestamp_list_val", "unix_timestamp_set_val", b"unix_timestamp_set_val", "unix_timestamp_val", b"unix_timestamp_val", "uuid_list_val", b"uuid_list_val", "uuid_set_val", b"uuid_set_val", "uuid_val", b"uuid_val", "val", b"val"]) -> None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["val", b"val"]) -> typing_extensions.Literal["bytes_val", "string_val", "int32_val", "int64_val", "double_val", "float_val", "bool_val", "unix_timestamp_val", "bytes_list_val", "string_list_val", "int32_list_val", "int64_list_val", "double_list_val", "float_list_val", "bool_list_val", "unix_timestamp_list_val", "null_val", "map_val", "map_list_val", "bytes_set_val", "string_set_val", "int32_set_val", "int64_set_val", "double_set_val", "float_set_val", "bool_set_val", "unix_timestamp_set_val", "json_val", "json_list_val", "struct_val", "struct_list_val", "uuid_val", "time_uuid_val", "uuid_list_val", "time_uuid_list_val", "uuid_set_val", "time_uuid_set_val", "list_list_val", "list_set_val", "set_list_val", "set_set_val"] | None: ... global___Value = Value diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 4478445f4e4..7257311ec68 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -53,6 +53,7 @@ Int64Set, Map, MapList, + RepeatedValue, StringList, StringSet, ) @@ -105,6 +106,10 @@ def feast_value_type_to_python_type( result.append(v) return result + # Handle nested collection types (list_list, list_set, set_list, set_set) + if val_attr in ("list_list_val", "list_set_val", "set_list_val", "set_set_val"): + return _handle_nested_collection_value(val) + # Handle Struct types — stored using Map proto, returned as dicts if val_attr == "struct_val": return _handle_map_value(val) @@ -198,6 +203,18 @@ def _handle_map_list_value(map_list_message) -> List[Dict[str, Any]]: return result +def _handle_nested_collection_value(repeated_value) -> List[Any]: + """Handle nested collection proto (RepeatedValue containing Values). + + Each inner Value is itself a list/set proto. We recursively convert + each inner Value to a Python list/set via feast_value_type_to_python_type. + """ + result = [] + for inner_value in repeated_value.val: + result.append(feast_value_type_to_python_type(inner_value)) + return result + + def feast_value_type_to_pandas_type(value_type: ValueType) -> Any: value_type_to_pandas_type: Dict[ValueType, str] = { ValueType.FLOAT: "float", @@ -446,6 +463,10 @@ def _convert_value_type_str_to_value_type(type_str: str) -> ValueType: "TIME_UUID_LIST": ValueType.TIME_UUID_LIST, "UUID_SET": ValueType.UUID_SET, "TIME_UUID_SET": ValueType.TIME_UUID_SET, + "LIST_LIST": ValueType.LIST_LIST, + "LIST_SET": ValueType.LIST_SET, + "SET_LIST": ValueType.SET_LIST, + "SET_SET": ValueType.SET_SET, } return type_map.get(type_str, ValueType.STRING) @@ -916,6 +937,15 @@ def _python_value_to_proto_value( Returns: List of Feast Value Proto """ + # Handle nested collection types (LIST_LIST, LIST_SET, SET_LIST, SET_SET) + if feast_value_type in ( + ValueType.LIST_LIST, + ValueType.LIST_SET, + ValueType.SET_LIST, + ValueType.SET_SET, + ): + return _convert_nested_collection_to_proto(feast_value_type, values) + # Handle Map types if feast_value_type == ValueType.MAP: result = [] @@ -1043,6 +1073,54 @@ def _python_value_to_proto_value( raise Exception(f"Unsupported data type: {feast_value_type}") +def _convert_nested_collection_to_proto( + feast_value_type: ValueType, values: List[Any] +) -> List[ProtoValue]: + """Convert nested collection values (list-of-lists, list-of-sets, etc.) to proto.""" + val_attr_map = { + ValueType.LIST_LIST: "list_list_val", + ValueType.LIST_SET: "list_set_val", + ValueType.SET_LIST: "set_list_val", + ValueType.SET_SET: "set_set_val", + } + val_attr = val_attr_map[feast_value_type] + + # Inner type has Set semantics for LIST_SET and SET_SET + inner_is_set = feast_value_type in (ValueType.LIST_SET, ValueType.SET_SET) + + result = [] + for value in values: + if value is None: + result.append(ProtoValue()) + else: + inner_values = [] + for inner_collection in value: + if inner_collection is None: + inner_values.append(ProtoValue()) + else: + inner_list = list(inner_collection) + # Apply Set semantics: deduplicate inner elements + if inner_is_set: + seen: list = [] + for item in inner_list: + if item not in seen: + seen.append(item) + inner_list = seen + if len(inner_list) == 0: + # Empty inner collection: store as empty ProtoValue + inner_values.append(ProtoValue()) + else: + # Wrap the inner list as a single list-typed Value + proto_vals = python_values_to_proto_values( + [inner_list], ValueType.UNKNOWN + ) + inner_values.append(proto_vals[0]) + repeated = RepeatedValue(val=inner_values) + proto = ProtoValue(**{val_attr: repeated}) + result.append(proto) + return result + + def _python_dict_to_map_proto(python_dict: Dict[str, Any]) -> Map: """Convert a Python dictionary to a Map proto message.""" map_proto = Map() @@ -1135,6 +1213,10 @@ def python_values_to_proto_values( "json_list_val": ValueType.JSON_LIST, "struct_val": ValueType.STRUCT, "struct_list_val": ValueType.STRUCT_LIST, + "list_list_val": ValueType.LIST_LIST, + "list_set_val": ValueType.LIST_SET, + "set_list_val": ValueType.SET_LIST, + "set_set_val": ValueType.SET_SET, "int32_set_val": ValueType.INT32_SET, "int64_set_val": ValueType.INT64_SET, "double_set_val": ValueType.DOUBLE_SET, @@ -1694,6 +1776,10 @@ def feast_value_type_to_pa( ValueType.JSON_LIST: pyarrow.list_(pyarrow.large_string()), ValueType.STRUCT: pyarrow.struct([]), ValueType.STRUCT_LIST: pyarrow.list_(pyarrow.struct([])), + ValueType.LIST_LIST: pyarrow.list_(pyarrow.list_(pyarrow.string())), + ValueType.LIST_SET: pyarrow.list_(pyarrow.list_(pyarrow.string())), + ValueType.SET_LIST: pyarrow.list_(pyarrow.list_(pyarrow.string())), + ValueType.SET_SET: pyarrow.list_(pyarrow.list_(pyarrow.string())), ValueType.NULL: pyarrow.null(), ValueType.UUID: pyarrow.string(), ValueType.TIME_UUID: pyarrow.string(), diff --git a/sdk/python/feast/types.py b/sdk/python/feast/types.py index 6f027f08e0b..4c0935086e4 100644 --- a/sdk/python/feast/types.py +++ b/sdk/python/feast/types.py @@ -176,8 +176,18 @@ class Array(ComplexFeastType): base_type: Union[PrimitiveFeastType, ComplexFeastType] def __init__(self, base_type: Union[PrimitiveFeastType, "ComplexFeastType"]): - # Allow Struct as a base type for Array(Struct(...)) - if not isinstance(base_type, Struct) and base_type not in SUPPORTED_BASE_TYPES: + # Allow Struct, Array, and Set as base types for nested collections + if isinstance(base_type, (Struct, Array, Set)): + # Enforce 2-level depth limit: reject Array(Array(Array(...))) etc. + if isinstance(base_type, (Array, Set)) and isinstance( + base_type.base_type, (Array, Set) + ): + raise ValueError( + f"Nested collection types are limited to 2 levels of nesting. " + f"{type(base_type).__name__}({type(base_type.base_type).__name__}(...)) " + f"is too deeply nested." + ) + elif base_type not in SUPPORTED_BASE_TYPES: raise ValueError( f"Type {type(base_type)} is currently not supported as a base type for Array." ) @@ -187,6 +197,10 @@ def __init__(self, base_type: Union[PrimitiveFeastType, "ComplexFeastType"]): def to_value_type(self) -> ValueType: if isinstance(self.base_type, Struct): return ValueType.STRUCT_LIST + if isinstance(self.base_type, Array): + return ValueType.LIST_LIST + if isinstance(self.base_type, Set): + return ValueType.LIST_SET assert isinstance(self.base_type, PrimitiveFeastType) value_type_name = PRIMITIVE_FEAST_TYPES_TO_VALUE_TYPES[self.base_type.name] value_type_list_name = value_type_name + "_LIST" @@ -207,16 +221,30 @@ class Set(ComplexFeastType): base_type: Union[PrimitiveFeastType, ComplexFeastType] def __init__(self, base_type: Union[PrimitiveFeastType, ComplexFeastType]): - # Sets do not support MAP as a base type - supported_set_types = [t for t in SUPPORTED_BASE_TYPES if t not in (Map,)] - if base_type not in supported_set_types: - raise ValueError( - f"Type {type(base_type)} is currently not supported as a base type for Set." - ) + # Allow Array and Set as base types for nested collections + if isinstance(base_type, (Array, Set)): + # Enforce 2-level depth limit + if isinstance(base_type.base_type, (Array, Set)): + raise ValueError( + f"Nested collection types are limited to 2 levels of nesting. " + f"{type(base_type).__name__}({type(base_type.base_type).__name__}(...)) " + f"is too deeply nested." + ) + else: + # Sets do not support MAP as a base type + supported_set_types = [t for t in SUPPORTED_BASE_TYPES if t not in (Map,)] + if base_type not in supported_set_types: + raise ValueError( + f"Type {type(base_type)} is currently not supported as a base type for Set." + ) self.base_type = base_type def to_value_type(self) -> ValueType: + if isinstance(self.base_type, Array): + return ValueType.SET_LIST + if isinstance(self.base_type, Set): + return ValueType.SET_SET assert isinstance(self.base_type, PrimitiveFeastType) value_type_name = PRIMITIVE_FEAST_TYPES_TO_VALUE_TYPES[self.base_type.name] value_type_set_name = value_type_name + "_SET" @@ -365,6 +393,8 @@ def from_feast_to_pyarrow_type(feast_type: FeastType) -> pyarrow.DataType: base_type = feast_type.base_type if isinstance(base_type, Struct): return pyarrow.list_(base_type.to_pyarrow_type()) + if isinstance(base_type, (Array, Set)): + return pyarrow.list_(from_feast_to_pyarrow_type(base_type)) if isinstance(base_type, PrimitiveFeastType): if base_type == Map: return pyarrow.list_(pyarrow.map_(pyarrow.string(), pyarrow.string())) @@ -372,6 +402,8 @@ def from_feast_to_pyarrow_type(feast_type: FeastType) -> pyarrow.DataType: return pyarrow.list_(FEAST_TYPES_TO_PYARROW_TYPES[base_type]) elif isinstance(feast_type, Set): base_type = feast_type.base_type + if isinstance(base_type, (Array, Set)): + return pyarrow.list_(from_feast_to_pyarrow_type(base_type)) if isinstance(base_type, PrimitiveFeastType): if base_type in FEAST_TYPES_TO_PYARROW_TYPES: return pyarrow.list_(FEAST_TYPES_TO_PYARROW_TYPES[base_type]) @@ -402,6 +434,17 @@ def from_value_type( if value_type == ValueType.STRUCT_LIST: return Array(Struct({"_value": String})) + # Nested collection types use placeholder inner types. + # Real inner type is restored from Field tags during deserialization. + if value_type == ValueType.LIST_LIST: + return Array(Array(String)) + if value_type == ValueType.LIST_SET: + return Array(Set(String)) + if value_type == ValueType.SET_LIST: + return Set(Array(String)) + if value_type == ValueType.SET_SET: + return Set(Set(String)) + raise ValueError(f"Could not convert value type {value_type} to FeastType.") @@ -426,6 +469,16 @@ def from_feast_type( if isinstance(feast_type, Array) and isinstance(feast_type.base_type, Struct): return ValueType.STRUCT_LIST + # Handle nested collection types + if isinstance(feast_type, Array) and isinstance(feast_type.base_type, Array): + return ValueType.LIST_LIST + if isinstance(feast_type, Array) and isinstance(feast_type.base_type, Set): + return ValueType.LIST_SET + if isinstance(feast_type, Set) and isinstance(feast_type.base_type, Array): + return ValueType.SET_LIST + if isinstance(feast_type, Set) and isinstance(feast_type.base_type, Set): + return ValueType.SET_SET + if feast_type in VALUE_TYPES_TO_FEAST_TYPES.values(): return list(VALUE_TYPES_TO_FEAST_TYPES.keys())[ list(VALUE_TYPES_TO_FEAST_TYPES.values()).index(feast_type) diff --git a/sdk/python/feast/value_type.py b/sdk/python/feast/value_type.py index 0575d25a1f1..0fb4071895a 100644 --- a/sdk/python/feast/value_type.py +++ b/sdk/python/feast/value_type.py @@ -77,6 +77,10 @@ class ValueType(enum.Enum): TIME_UUID_LIST = 39 UUID_SET = 40 TIME_UUID_SET = 41 + LIST_LIST = 42 + LIST_SET = 43 + SET_LIST = 44 + SET_SET = 45 ListType = Union[ diff --git a/sdk/python/tests/unit/test_type_map.py b/sdk/python/tests/unit/test_type_map.py index 6715817d3cb..018beb9da6e 100644 --- a/sdk/python/tests/unit/test_type_map.py +++ b/sdk/python/tests/unit/test_type_map.py @@ -1553,3 +1553,121 @@ def test_pg_uuid_type_mapping(self): """PostgreSQL uuid type maps to ValueType.UUID.""" assert pg_type_to_feast_value_type("uuid") == ValueType.UUID assert pg_type_to_feast_value_type("uuid[]") == ValueType.UUID_LIST + + +class TestNestedCollectionTypes: + """Tests for nested collection type proto conversion (LIST_LIST, LIST_SET, SET_LIST, SET_SET).""" + + def test_list_list_proto_roundtrip(self): + """Test python_values_to_proto_values and feast_value_type_to_python_type for LIST_LIST.""" + values = [[[1, 2, 3], [4, 5]]] + protos = python_values_to_proto_values(values, ValueType.LIST_LIST) + assert len(protos) == 1 + assert protos[0].WhichOneof("val") == "list_list_val" + result = feast_value_type_to_python_type(protos[0]) + assert isinstance(result, list) + assert len(result) == 2 + + def test_list_set_proto_roundtrip(self): + """Test LIST_SET proto conversion.""" + values = [[[1, 2], [3, 4, 5]]] + protos = python_values_to_proto_values(values, ValueType.LIST_SET) + assert len(protos) == 1 + assert protos[0].WhichOneof("val") == "list_set_val" + result = feast_value_type_to_python_type(protos[0]) + assert isinstance(result, list) + assert len(result) == 2 + + def test_set_list_proto_roundtrip(self): + """Test SET_LIST proto conversion.""" + values = [[["a", "b"], ["c"]]] + protos = python_values_to_proto_values(values, ValueType.SET_LIST) + assert len(protos) == 1 + assert protos[0].WhichOneof("val") == "set_list_val" + result = feast_value_type_to_python_type(protos[0]) + assert isinstance(result, list) + assert len(result) == 2 + + def test_set_set_proto_roundtrip(self): + """Test SET_SET proto conversion.""" + values = [[["x", "y"], ["z"]]] + protos = python_values_to_proto_values(values, ValueType.SET_SET) + assert len(protos) == 1 + assert protos[0].WhichOneof("val") == "set_set_val" + result = feast_value_type_to_python_type(protos[0]) + assert isinstance(result, list) + + def test_nested_collection_null_handling(self): + """Test that None values are handled correctly.""" + values = [None] + protos = python_values_to_proto_values(values, ValueType.LIST_LIST) + assert len(protos) == 1 + assert protos[0].WhichOneof("val") is None + + def test_convert_value_type_str_nested(self): + """Test _convert_value_type_str_to_value_type for nested types.""" + assert _convert_value_type_str_to_value_type("LIST_LIST") == ValueType.LIST_LIST + assert _convert_value_type_str_to_value_type("LIST_SET") == ValueType.LIST_SET + assert _convert_value_type_str_to_value_type("SET_LIST") == ValueType.SET_LIST + assert _convert_value_type_str_to_value_type("SET_SET") == ValueType.SET_SET + + def test_nested_collection_empty_inner_list(self): + """Test that empty inner collections are handled gracefully.""" + values = [[[], [1, 2]]] + protos = python_values_to_proto_values(values, ValueType.LIST_LIST) + result = feast_value_type_to_python_type(protos[0]) + assert isinstance(result, list) + assert len(result) == 2 + # Empty inner list should round-trip as None (empty ProtoValue) + assert result[0] is None + assert result[1] == [1, 2] + + def test_nested_collection_inner_none(self): + """Test that None inner elements are handled.""" + values = [[[1, 2], None, [3]]] + protos = python_values_to_proto_values(values, ValueType.LIST_LIST) + result = feast_value_type_to_python_type(protos[0]) + assert len(result) == 3 + assert result[0] == [1, 2] + assert result[1] is None + assert result[2] == [3] + + def test_list_set_deduplicates_inner(self): + """Test that LIST_SET deduplicates inner collection elements.""" + values = [[[1, 1, 2, 2, 3], [4, 4]]] + protos = python_values_to_proto_values(values, ValueType.LIST_SET) + result = feast_value_type_to_python_type(protos[0]) + assert result[0] == [1, 2, 3] + assert result[1] == [4] + + def test_set_set_deduplicates_inner(self): + """Test that SET_SET deduplicates inner collection elements.""" + values = [[["a", "a", "b"], ["c", "c"]]] + protos = python_values_to_proto_values(values, ValueType.SET_SET) + result = feast_value_type_to_python_type(protos[0]) + assert result[0] == ["a", "b"] + assert result[1] == ["c"] + + def test_list_list_no_dedup(self): + """Test that LIST_LIST does NOT deduplicate (Array semantics).""" + values = [[[1, 1, 2], [3, 3]]] + protos = python_values_to_proto_values(values, ValueType.LIST_LIST) + result = feast_value_type_to_python_type(protos[0]) + assert result[0] == [1, 1, 2] + assert result[1] == [3, 3] + + def test_set_list_no_dedup_inner(self): + """Test that SET_LIST does NOT deduplicate inner elements (inner is Array).""" + values = [[[1, 1, 2], [3, 3]]] + protos = python_values_to_proto_values(values, ValueType.SET_LIST) + result = feast_value_type_to_python_type(protos[0]) + assert result[0] == [1, 1, 2] + assert result[1] == [3, 3] + + def test_feast_value_type_to_pa_nested(self): + """Test feast_value_type_to_pa for nested collection types.""" + pa_type = feast_value_type_to_pa(ValueType.LIST_LIST) + assert pa_type == pyarrow.list_(pyarrow.list_(pyarrow.string())) + + pa_type = feast_value_type_to_pa(ValueType.SET_SET) + assert pa_type == pyarrow.list_(pyarrow.list_(pyarrow.string())) diff --git a/sdk/python/tests/unit/test_types.py b/sdk/python/tests/unit/test_types.py index ed4b1383879..8ec955d10af 100644 --- a/sdk/python/tests/unit/test_types.py +++ b/sdk/python/tests/unit/test_types.py @@ -1,6 +1,22 @@ +import pyarrow import pytest -from feast.types import Array, Float32, Set, String, TimeUuid, Uuid, from_value_type +from feast.field import Field +from feast.types import ( + Array, + Bool, + Float32, + Float64, + Int32, + Int64, + Set, + String, + TimeUuid, + Uuid, + from_feast_to_pyarrow_type, + from_feast_type, + from_value_type, +) from feast.value_type import ValueType @@ -23,9 +39,6 @@ def test_array_feast_type(): with pytest.raises(ValueError): _ = Array(Array) - with pytest.raises(ValueError): - _ = Array(Array(String)) - def test_set_feast_type(): set_string = Set(String) @@ -39,8 +52,103 @@ def test_set_feast_type(): with pytest.raises(ValueError): _ = Set(Set) - with pytest.raises(ValueError): - _ = Set(Set(String)) + +def test_nested_array_array(): + """Array(Array(T)) should produce LIST_LIST.""" + t = Array(Array(String)) + assert t.to_value_type() == ValueType.LIST_LIST + assert from_feast_type(t) == ValueType.LIST_LIST + + t2 = Array(Array(Int32)) + assert t2.to_value_type() == ValueType.LIST_LIST + + +def test_nested_array_set(): + """Array(Set(T)) should produce LIST_SET.""" + t = Array(Set(String)) + assert t.to_value_type() == ValueType.LIST_SET + assert from_feast_type(t) == ValueType.LIST_SET + + +def test_nested_set_array(): + """Set(Array(T)) should produce SET_LIST.""" + t = Set(Array(String)) + assert t.to_value_type() == ValueType.SET_LIST + assert from_feast_type(t) == ValueType.SET_LIST + + +def test_nested_set_set(): + """Set(Set(T)) should produce SET_SET.""" + t = Set(Set(String)) + assert t.to_value_type() == ValueType.SET_SET + assert from_feast_type(t) == ValueType.SET_SET + + +def test_nested_depth_limit(): + """3 levels of nesting should raise ValueError.""" + with pytest.raises(ValueError, match="too deeply nested"): + Array(Array(Array(String))) + + with pytest.raises(ValueError, match="too deeply nested"): + Array(Set(Array(String))) + + with pytest.raises(ValueError, match="too deeply nested"): + Set(Array(Array(String))) + + with pytest.raises(ValueError, match="too deeply nested"): + Set(Set(Set(String))) + + +def test_nested_from_value_type_roundtrip(): + """from_value_type should return a placeholder for nested types.""" + for vt in ( + ValueType.LIST_LIST, + ValueType.LIST_SET, + ValueType.SET_LIST, + ValueType.SET_SET, + ): + ft = from_value_type(vt) + assert ft.to_value_type() == vt + + +def test_nested_pyarrow_conversion(): + """Nested collection types should convert to pyarrow list(list(...)).""" + # Array(Array(String)) -> list(list(string)) + pa_type = from_feast_to_pyarrow_type(Array(Array(String))) + assert pa_type == pyarrow.list_(pyarrow.list_(pyarrow.string())) + + # Array(Set(Int64)) -> list(list(int64)) + pa_type = from_feast_to_pyarrow_type(Array(Set(Int64))) + assert pa_type == pyarrow.list_(pyarrow.list_(pyarrow.int64())) + + # Set(Array(Float64)) -> list(list(float64)) + pa_type = from_feast_to_pyarrow_type(Set(Array(Float64))) + assert pa_type == pyarrow.list_(pyarrow.list_(pyarrow.float64())) + + # Set(Set(Bool)) -> list(list(bool)) + pa_type = from_feast_to_pyarrow_type(Set(Set(Bool))) + assert pa_type == pyarrow.list_(pyarrow.list_(pyarrow.bool_())) + + +def test_nested_field_roundtrip(): + """Field with nested collection type should survive to_proto -> from_proto.""" + test_cases = [ + ("aa", Array(Array(String))), + ("as", Array(Set(Int32))), + ("sa", Set(Array(Float64))), + ("ss", Set(Set(Bool))), + ] + for name, dtype in test_cases: + field = Field(name=name, dtype=dtype, tags={"user_tag": "value"}) + proto = field.to_proto() + restored = Field.from_proto(proto) + assert restored.name == name, f"Name mismatch for {dtype}" + assert restored.dtype.to_value_type() == dtype.to_value_type(), ( + f"dtype mismatch for {name}: {restored.dtype} vs {dtype}" + ) + assert restored.tags == {"user_tag": "value"}, ( + f"Tags should not contain internal tags for {name}" + ) def test_uuid_feast_type(): From 734aee628a73fd30263cb8a1d9fcdd1aa1928c17 Mon Sep 17 00:00:00 2001 From: soojin Date: Wed, 18 Mar 2026 18:20:14 +0900 Subject: [PATCH 2/9] fix: Fix remote online read for nested collection types and add docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix remote online store read path to use declared feature types from FeatureView instead of ValueType.UNKNOWN, which fails for nested collection types (LIST_LIST, LIST_SET, SET_LIST, SET_SET) - Add Nested Collection Types section to type-system.md with type table, usage examples, and empty-inner-collection→None limitation docs Co-Authored-By: Claude Opus 4.6 Signed-off-by: soojin --- docs/reference/type-system.md | 50 +++++++++++++++++++ .../feast/infra/online_stores/remote.py | 11 +++- 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/docs/reference/type-system.md b/docs/reference/type-system.md index f0739df8cd3..44b88125af0 100644 --- a/docs/reference/type-system.md +++ b/docs/reference/type-system.md @@ -86,6 +86,25 @@ All primitive types (except `Map` and `Json`) have corresponding set types for s - Set types are best suited for **online serving** use cases where feature values are written as Python sets and retrieved via `get_online_features`. {% endhint %} +### Nested Collection Types + +Feast supports 2-level nested collections, combining Array and Set types: + +| Feast Type | Python Type | ValueType | Description | +|------------|-------------|-----------|-------------| +| `Array(Array(T))` | `List[List[T]]` | `LIST_LIST` | List of lists | +| `Array(Set(T))` | `List[List[T]]` | `LIST_SET` | List of sets (inner elements deduplicated) | +| `Set(Array(T))` | `List[List[T]]` | `SET_LIST` | Set of lists | +| `Set(Set(T))` | `List[List[T]]` | `SET_SET` | Set of sets (inner elements deduplicated) | + +Where `T` is any supported primitive type (Int32, Int64, Float32, Float64, String, Bytes, Bool, UnixTimestamp). + +**Notes:** +- Nesting is limited to 2 levels. `Array(Array(Array(T)))` will raise a `ValueError`. +- Inner type information is preserved via Field tags (`feast:nested_inner_type`) and restored during deserialization. +- For `Array(Set(T))` and `Set(Set(T))`, inner collection elements are automatically deduplicated. +- Empty inner collections (`[]`) are stored as empty proto values and round-trip as `None`. For example, `[[1, 2], [], [3]]` becomes `[[1, 2], None, [3]]` after a write-read cycle. + ### Map Types Map types allow storing dictionary-like data structures: @@ -233,6 +252,10 @@ user_features = FeatureView( Field(name="metadata", dtype=Map), Field(name="activity_log", dtype=Array(Map)), + # Nested collection types + Field(name="weekly_scores", dtype=Array(Array(Float64))), + Field(name="unique_tags_per_category", dtype=Array(Set(String))), + # JSON type Field(name="raw_event", dtype=Json), @@ -290,6 +313,33 @@ related_sessions = [uuid.uuid4(), uuid.uuid4(), uuid.uuid4()] unique_devices = {uuid.uuid4(), uuid.uuid4()} ``` +### Nested Collection Type Usage Examples + +Nested collections allow storing multi-dimensional data: + +```python +# List of lists — e.g., weekly score history per user +weekly_scores = [[85.0, 90.5, 78.0], [92.0, 88.5], [95.0, 91.0, 87.5]] + +# List of sets — e.g., unique tags assigned per category +unique_tags_per_category = [["python", "ml"], ["rust", "systems"], ["python", "web"]] +# Inner sets are automatically deduplicated: +# [["python", "ml"], ...] (duplicates within each inner set are removed) + +# Set of lists — e.g., distinct ordered sequences observed +distinct_sequences = [[1, 2, 3], [4, 5], [1, 2, 3]] + +# Set of sets — e.g., distinct groups of unique items +distinct_groups = [["a", "b"], ["c", "d"], ["a", "b"]] +# Inner elements are deduplicated within each set +``` + +**Limitation:** Empty inner collections round-trip as `None`: +```python +# Input: [[1, 2], [], [3]] +# Output: [[1, 2], None, [3]] (empty [] becomes None after write-read cycle) +``` + ### Map Type Usage Examples Maps can store complex nested data structures: diff --git a/sdk/python/feast/infra/online_stores/remote.py b/sdk/python/feast/infra/online_stores/remote.py index 7597e95f2f4..191f5692e7b 100644 --- a/sdk/python/feast/infra/online_stores/remote.py +++ b/sdk/python/feast/infra/online_stores/remote.py @@ -209,6 +209,12 @@ def online_read( logger.debug("Able to retrieve the online features from feature server.") response_json = json.loads(response.text) event_ts = self._get_event_ts(response_json) + # Build feature name -> ValueType mapping so we can reconstruct + # complex types (nested collections, sets, etc.) that cannot be + # inferred from raw JSON values alone. + feature_type_map: Dict[str, ValueType] = { + f.name: f.dtype.to_value_type() for f in table.features + } # Iterating over results and converting the API results in column format to row format. result_tuples: List[ Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]] @@ -228,13 +234,16 @@ def online_read( ] == "PRESENT" ): + feature_value_type = feature_type_map.get( + feature_name, ValueType.UNKNOWN + ) message = python_values_to_proto_values( [ response_json["results"][index]["values"][ feature_value_index ] ], - ValueType.UNKNOWN, + feature_value_type, ) feature_values_dict[feature_name] = message[0] else: From e57ddd23d0670bd8d72eb107a0343662a54d506f Mon Sep 17 00:00:00 2001 From: soojin Date: Thu, 19 Mar 2026 00:27:13 +0900 Subject: [PATCH 3/9] fix: Fix JSON deserialization, schema inference, and silent fallback for nested collection types - Add nested list handling in proto_json from_json_object (list of lists was raising ParseError since no branch matched list-typed elements) - Fix pa_to_feast_value_type to recognize nested list PyArrow types (list>) instead of crashing with KeyError - Replace silent String fallback in _str_to_feast_type with ValueError to surface corrupted tag values instead of silently losing type info - Strengthen test coverage: type str roundtrip, inner value verification, multi-value batch, proto JSON roundtrip, PyArrow nested type inference Co-Authored-By: Claude Opus 4.6 Signed-off-by: soojin --- sdk/python/feast/field.py | 7 +-- sdk/python/feast/proto_json.py | 9 ++++ sdk/python/feast/type_map.py | 6 ++- sdk/python/tests/unit/test_proto_json.py | 27 ++++++++++++ sdk/python/tests/unit/test_type_map.py | 56 +++++++++++++++++++++--- sdk/python/tests/unit/test_types.py | 40 ++++++++++++++++- 6 files changed, 134 insertions(+), 11 deletions(-) diff --git a/sdk/python/feast/field.py b/sdk/python/feast/field.py index adec17db2f8..042f0ad511b 100644 --- a/sdk/python/feast/field.py +++ b/sdk/python/feast/field.py @@ -272,9 +272,10 @@ def _str_to_feast_type(type_str: str) -> FeastType: try: return PrimitiveFeastType[type_str] except KeyError: - from feast.types import String - - return String + raise ValueError( + f"Unknown FeastType: {type_str!r}. " + f"Valid primitive types: {[t.name for t in PrimitiveFeastType]}" + ) def _serialize_struct_schema(struct_type: Struct) -> str: diff --git a/sdk/python/feast/proto_json.py b/sdk/python/feast/proto_json.py index eacc9c5ec3e..4980cd64bf3 100644 --- a/sdk/python/feast/proto_json.py +++ b/sdk/python/feast/proto_json.py @@ -92,6 +92,15 @@ def from_json_object( if len(value) == 0: # Clear will mark the struct as modified so it will be created even if there are no values message.int64_list_val.Clear() + elif isinstance(value[0], list): + # Nested collection (list of lists). + # Default to list_list_val since JSON transport loses the + # outer/inner set distinction. + rv = RepeatedValue() + for inner in value: + inner_val = rv.val.add() + from_json_object(parser, inner, inner_val) + message.list_list_val.CopyFrom(rv) elif isinstance(value[0], bool): message.bool_list_val.val.extend(value) elif isinstance(value[0], str): diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 7257311ec68..1b9cd257ae1 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -1258,7 +1258,11 @@ def pa_to_feast_value_type(pa_type_as_str: str) -> ValueType: is_list = False if pa_type_as_str.startswith("list", "") + inner_str = pa_type_as_str[len("list>") + == ValueType.LIST_LIST + ) + assert ( + pa_to_feast_value_type("list>") + == ValueType.LIST_LIST + ) + assert ( + pa_to_feast_value_type("list>") + == ValueType.LIST_LIST + ) diff --git a/sdk/python/tests/unit/test_types.py b/sdk/python/tests/unit/test_types.py index 8ec955d10af..4e158bf75ad 100644 --- a/sdk/python/tests/unit/test_types.py +++ b/sdk/python/tests/unit/test_types.py @@ -134,7 +134,7 @@ def test_nested_field_roundtrip(): """Field with nested collection type should survive to_proto -> from_proto.""" test_cases = [ ("aa", Array(Array(String))), - ("as", Array(Set(Int32))), + ("as_field", Array(Set(Int32))), ("sa", Set(Array(Float64))), ("ss", Set(Set(Bool))), ] @@ -146,6 +146,10 @@ def test_nested_field_roundtrip(): assert restored.dtype.to_value_type() == dtype.to_value_type(), ( f"dtype mismatch for {name}: {restored.dtype} vs {dtype}" ) + # Verify inner type is preserved (not just ValueType equality) + assert str(restored.dtype) == str(dtype), ( + f"Inner type lost for {name}: got {restored.dtype}, expected {dtype}" + ) assert restored.tags == {"user_tag": "value"}, ( f"Tags should not contain internal tags for {name}" ) @@ -178,6 +182,40 @@ def test_uuid_set_feast_type(): assert from_value_type(set_time_uuid.to_value_type()) == set_time_uuid +def test_feast_type_str_roundtrip(): + """_feast_type_to_str and _str_to_feast_type should roundtrip for nested types.""" + from feast.field import _feast_type_to_str, _str_to_feast_type + + test_cases = [ + Array(Array(String)), + Array(Array(Int32)), + Array(Array(Float64)), + Array(Set(Int64)), + Array(Set(Bool)), + Set(Array(String)), + Set(Array(Float32)), + Set(Set(Int32)), + Set(Set(Float64)), + ] + for dtype in test_cases: + s = _feast_type_to_str(dtype) + restored = _str_to_feast_type(s) + assert str(restored) == str(dtype), ( + f"Roundtrip failed: {dtype} -> '{s}' -> {restored}" + ) + + +def test_str_to_feast_type_invalid(): + """_str_to_feast_type should raise ValueError on unrecognized type names.""" + from feast.field import _str_to_feast_type + + with pytest.raises(ValueError, match="Unknown FeastType"): + _str_to_feast_type("INVALID_TYPE") + + with pytest.raises(ValueError, match="Unknown FeastType"): + _str_to_feast_type("Strig") + + def test_all_value_types(): for value in ValueType: # We do not support the NULL type. From 36fdf4e4be14575717519db2598dd19dbfe6aef6 Mon Sep 17 00:00:00 2001 From: soojin Date: Thu, 19 Mar 2026 23:19:59 +0900 Subject: [PATCH 4/9] fix: Fix mypy type error in nested collection proto construction Use getattr/CopyFrom instead of **dict unpacking for ProtoValue construction to satisfy mypy's strict type checking. Signed-off-by: soojin Co-Authored-By: Claude Opus 4.6 Signed-off-by: soojin --- sdk/python/feast/type_map.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 1b9cd257ae1..0f49cf0313d 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -1116,7 +1116,8 @@ def _convert_nested_collection_to_proto( ) inner_values.append(proto_vals[0]) repeated = RepeatedValue(val=inner_values) - proto = ProtoValue(**{val_attr: repeated}) + proto = ProtoValue() + getattr(proto, val_attr).CopyFrom(repeated) result.append(proto) return result From 3c81d5e9dde4a07779438834dc0d5e473dc1a566 Mon Sep 17 00:00:00 2001 From: soojin Date: Thu, 26 Mar 2026 12:07:50 +0900 Subject: [PATCH 5/9] fix: Fix equality comparison for nested types and JSON deserialization edge case - Add __eq__/__hash__ to Array and Set so inner element types are compared (previously Array(Array(String)) == Array(Array(Int32)) was True) - Fix nested collection detection in proto_json when first element is None by using any() fallback instead of only checking value[0] Co-Authored-By: Claude Opus 4.6 Signed-off-by: soojin --- sdk/python/feast/proto_json.py | 4 +++- sdk/python/feast/types.py | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/sdk/python/feast/proto_json.py b/sdk/python/feast/proto_json.py index 4980cd64bf3..424d07fd09a 100644 --- a/sdk/python/feast/proto_json.py +++ b/sdk/python/feast/proto_json.py @@ -92,8 +92,10 @@ def from_json_object( if len(value) == 0: # Clear will mark the struct as modified so it will be created even if there are no values message.int64_list_val.Clear() - elif isinstance(value[0], list): + elif isinstance(value[0], list) or any(isinstance(v, list) for v in value): # Nested collection (list of lists). + # Check any() to handle cases where the first element is None + # (empty inner collections round-trip through proto as None). # Default to list_list_val since JSON transport loses the # outer/inner set distinction. rv = RepeatedValue() diff --git a/sdk/python/feast/types.py b/sdk/python/feast/types.py index 4c0935086e4..a98d20c32fc 100644 --- a/sdk/python/feast/types.py +++ b/sdk/python/feast/types.py @@ -206,6 +206,14 @@ def to_value_type(self) -> ValueType: value_type_list_name = value_type_name + "_LIST" return ValueType[value_type_list_name] + def __eq__(self, other): + if isinstance(other, Array): + return self.base_type == other.base_type + return False + + def __hash__(self): + return hash(("Array", hash(self.base_type))) + def __str__(self): return f"Array({self.base_type})" @@ -250,6 +258,14 @@ def to_value_type(self) -> ValueType: value_type_set_name = value_type_name + "_SET" return ValueType[value_type_set_name] + def __eq__(self, other): + if isinstance(other, Set): + return self.base_type == other.base_type + return False + + def __hash__(self): + return hash(("Set", hash(self.base_type))) + def __str__(self): return f"Set({self.base_type})" From 4dabd1b79614f8a40ba720fba87cf22245ecb53a Mon Sep 17 00:00:00 2001 From: soojin Date: Thu, 26 Mar 2026 16:05:47 +0900 Subject: [PATCH 6/9] feat: Remove depth limit for nested collection types and improve test coverage - Remove 2-level depth restriction from Array and Set constructors to support unbounded nesting per maintainer request - Make _convert_nested_collection_to_proto() recursive for 3+ levels - Update error message for nested type inference to guide users toward explicit Field dtype declaration - Add 3+ level tests for Field roundtrip, str roundtrip, and PyArrow conversion Co-Authored-By: Claude Opus 4.6 Signed-off-by: soojin --- sdk/python/feast/type_map.py | 15 +++++++++--- sdk/python/feast/types.py | 25 ++++--------------- sdk/python/tests/unit/test_types.py | 38 +++++++++++++++++++++-------- 3 files changed, 45 insertions(+), 33 deletions(-) diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 0f49cf0313d..0db54b89d7e 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -323,8 +323,9 @@ def python_type_to_feast_value_type( if not recurse: raise ValueError( f"Value type for field {name} is {type(value)} but " - f"recursion is not allowed. Array types can only be one level " - f"deep." + f"recursion is not allowed. Nested collection types cannot be " + f"inferred automatically; use an explicit Field dtype instead " + f"(e.g., dtype=Array(Array(Int32)))." ) # This is the final type which we infer from the list @@ -1109,8 +1110,16 @@ def _convert_nested_collection_to_proto( if len(inner_list) == 0: # Empty inner collection: store as empty ProtoValue inner_values.append(ProtoValue()) + elif any( + isinstance(item, (list, set, tuple)) for item in inner_list + ): + # Deeper nesting (3+ levels): recurse + inner_proto = _convert_nested_collection_to_proto( + feast_value_type, [inner_list] + ) + inner_values.append(inner_proto[0]) else: - # Wrap the inner list as a single list-typed Value + # Leaf level: wrap as a single list-typed Value proto_vals = python_values_to_proto_values( [inner_list], ValueType.UNKNOWN ) diff --git a/sdk/python/feast/types.py b/sdk/python/feast/types.py index a98d20c32fc..a4c96460e98 100644 --- a/sdk/python/feast/types.py +++ b/sdk/python/feast/types.py @@ -177,17 +177,10 @@ class Array(ComplexFeastType): def __init__(self, base_type: Union[PrimitiveFeastType, "ComplexFeastType"]): # Allow Struct, Array, and Set as base types for nested collections - if isinstance(base_type, (Struct, Array, Set)): - # Enforce 2-level depth limit: reject Array(Array(Array(...))) etc. - if isinstance(base_type, (Array, Set)) and isinstance( - base_type.base_type, (Array, Set) - ): - raise ValueError( - f"Nested collection types are limited to 2 levels of nesting. " - f"{type(base_type).__name__}({type(base_type.base_type).__name__}(...)) " - f"is too deeply nested." - ) - elif base_type not in SUPPORTED_BASE_TYPES: + if ( + not isinstance(base_type, (Struct, Array, Set)) + and base_type not in SUPPORTED_BASE_TYPES + ): raise ValueError( f"Type {type(base_type)} is currently not supported as a base type for Array." ) @@ -230,15 +223,7 @@ class Set(ComplexFeastType): def __init__(self, base_type: Union[PrimitiveFeastType, ComplexFeastType]): # Allow Array and Set as base types for nested collections - if isinstance(base_type, (Array, Set)): - # Enforce 2-level depth limit - if isinstance(base_type.base_type, (Array, Set)): - raise ValueError( - f"Nested collection types are limited to 2 levels of nesting. " - f"{type(base_type).__name__}({type(base_type.base_type).__name__}(...)) " - f"is too deeply nested." - ) - else: + if not isinstance(base_type, (Array, Set)): # Sets do not support MAP as a base type supported_set_types = [t for t in SUPPORTED_BASE_TYPES if t not in (Map,)] if base_type not in supported_set_types: diff --git a/sdk/python/tests/unit/test_types.py b/sdk/python/tests/unit/test_types.py index 4e158bf75ad..313a6cdcaa2 100644 --- a/sdk/python/tests/unit/test_types.py +++ b/sdk/python/tests/unit/test_types.py @@ -84,19 +84,24 @@ def test_nested_set_set(): assert from_feast_type(t) == ValueType.SET_SET -def test_nested_depth_limit(): - """3 levels of nesting should raise ValueError.""" - with pytest.raises(ValueError, match="too deeply nested"): - Array(Array(Array(String))) +def test_nested_unbounded_depth(): + """Nesting depth should be unbounded.""" + # 3-level + t3 = Array(Array(Array(String))) + assert t3.to_value_type() == ValueType.LIST_LIST - with pytest.raises(ValueError, match="too deeply nested"): - Array(Set(Array(String))) + t3_mixed = Array(Set(Array(String))) + assert t3_mixed.to_value_type() == ValueType.LIST_SET - with pytest.raises(ValueError, match="too deeply nested"): - Set(Array(Array(String))) + t3_set = Set(Array(Array(String))) + assert t3_set.to_value_type() == ValueType.SET_LIST - with pytest.raises(ValueError, match="too deeply nested"): - Set(Set(Set(String))) + t3_set2 = Set(Set(Set(String))) + assert t3_set2.to_value_type() == ValueType.SET_SET + + # 4-level + t4 = Array(Array(Array(Array(Int32)))) + assert t4.to_value_type() == ValueType.LIST_LIST def test_nested_from_value_type_roundtrip(): @@ -129,6 +134,10 @@ def test_nested_pyarrow_conversion(): pa_type = from_feast_to_pyarrow_type(Set(Set(Bool))) assert pa_type == pyarrow.list_(pyarrow.list_(pyarrow.bool_())) + # 3-level: Array(Array(Array(Int32))) -> list(list(list(int32))) + pa_type = from_feast_to_pyarrow_type(Array(Array(Array(Int32)))) + assert pa_type == pyarrow.list_(pyarrow.list_(pyarrow.list_(pyarrow.int32()))) + def test_nested_field_roundtrip(): """Field with nested collection type should survive to_proto -> from_proto.""" @@ -137,6 +146,11 @@ def test_nested_field_roundtrip(): ("as_field", Array(Set(Int32))), ("sa", Set(Array(Float64))), ("ss", Set(Set(Bool))), + # 3-level nesting + ("aaa", Array(Array(Array(Int32)))), + ("asa", Array(Set(Array(String)))), + # 4-level nesting + ("aaaa", Array(Array(Array(Array(Float64))))), ] for name, dtype in test_cases: field = Field(name=name, dtype=dtype, tags={"user_tag": "value"}) @@ -196,6 +210,10 @@ def test_feast_type_str_roundtrip(): Set(Array(Float32)), Set(Set(Int32)), Set(Set(Float64)), + # 3+ level nesting + Array(Array(Array(String))), + Array(Set(Array(Int32))), + Set(Set(Set(Float64))), ] for dtype in test_cases: s = _feast_type_to_str(dtype) From fd0d745fcab9c78957aacabf8dcb4baf442f041b Mon Sep 17 00:00:00 2001 From: soojin Date: Sat, 28 Mar 2026 15:26:00 +0900 Subject: [PATCH 7/9] refactor: Replace combinatorial nested collection enums with recursive VALUE_LIST/VALUE_SET Replace 4 combinatorial enum values (LIST_LIST=36, LIST_SET=37, SET_LIST=38, SET_SET=39) with 2 recursive enum values (VALUE_LIST=40, VALUE_SET=41) that use RepeatedValue to enable unlimited nesting depth. This is a breaking change for an unreleased feature, as suggested in PR #6132 review. Key changes: - Proto: Remove 4 enum/oneof fields, add VALUE_LIST/VALUE_SET with reserved 36-39 - Python: Update ValueType enum, type system, serialization, field persistence - JSON: Update proto_json encode/decode for new field names - Tests: Rewrite all nested collection tests (204 tests passing) - Docs: Update type-system.md for recursive design Co-Authored-By: Claude Opus 4.6 Signed-off-by: soojin --- docs/reference/type-system.md | 31 +++-- protos/feast/types/Value.proto | 12 +- sdk/python/feast/field.py | 8 +- .../feast/infra/online_stores/remote.py | 2 +- sdk/python/feast/proto_json.py | 6 +- .../feast/protos/feast/types/Value_pb2.py | 86 +++++++------- .../feast/protos/feast/types/Value_pb2.pyi | 38 +++--- sdk/python/feast/type_map.py | 57 +++------ sdk/python/feast/types.py | 32 ++--- sdk/python/feast/value_type.py | 6 +- sdk/python/tests/unit/test_proto_json.py | 10 +- sdk/python/tests/unit/test_type_map.py | 109 +++++------------- sdk/python/tests/unit/test_types.py | 42 ++++--- 13 files changed, 163 insertions(+), 276 deletions(-) diff --git a/docs/reference/type-system.md b/docs/reference/type-system.md index 44b88125af0..eb39b5e7948 100644 --- a/docs/reference/type-system.md +++ b/docs/reference/type-system.md @@ -88,21 +88,21 @@ All primitive types (except `Map` and `Json`) have corresponding set types for s ### Nested Collection Types -Feast supports 2-level nested collections, combining Array and Set types: +Feast supports arbitrarily nested collections using a recursive `VALUE_LIST` / `VALUE_SET` design. The outer container determines the proto enum (`VALUE_LIST` for `Array(…)`, `VALUE_SET` for `Set(…)`), while the full inner type structure is persisted via a mandatory `feast:nested_inner_type` Field tag. | Feast Type | Python Type | ValueType | Description | |------------|-------------|-----------|-------------| -| `Array(Array(T))` | `List[List[T]]` | `LIST_LIST` | List of lists | -| `Array(Set(T))` | `List[List[T]]` | `LIST_SET` | List of sets (inner elements deduplicated) | -| `Set(Array(T))` | `List[List[T]]` | `SET_LIST` | Set of lists | -| `Set(Set(T))` | `List[List[T]]` | `SET_SET` | Set of sets (inner elements deduplicated) | +| `Array(Array(T))` | `List[List[T]]` | `VALUE_LIST` | List of lists | +| `Array(Set(T))` | `List[List[T]]` | `VALUE_LIST` | List of sets | +| `Set(Array(T))` | `List[List[T]]` | `VALUE_SET` | Set of lists | +| `Set(Set(T))` | `List[List[T]]` | `VALUE_SET` | Set of sets | +| `Array(Array(Array(T)))` | `List[List[List[T]]]` | `VALUE_LIST` | 3-level nesting | -Where `T` is any supported primitive type (Int32, Int64, Float32, Float64, String, Bytes, Bool, UnixTimestamp). +Where `T` is any supported primitive type (Int32, Int64, Float32, Float64, String, Bytes, Bool, UnixTimestamp) or another nested collection type. **Notes:** -- Nesting is limited to 2 levels. `Array(Array(Array(T)))` will raise a `ValueError`. -- Inner type information is preserved via Field tags (`feast:nested_inner_type`) and restored during deserialization. -- For `Array(Set(T))` and `Set(Set(T))`, inner collection elements are automatically deduplicated. +- Nesting depth is **unlimited**. `Array(Array(Array(T)))`, `Set(Array(Set(T)))`, etc. are all supported. +- Inner type information is preserved via Field tags (`feast:nested_inner_type`) and restored during deserialization. This tag is mandatory for nested collection types. - Empty inner collections (`[]`) are stored as empty proto values and round-trip as `None`. For example, `[[1, 2], [], [3]]` becomes `[[1, 2], None, [3]]` after a write-read cycle. ### Map Types @@ -315,7 +315,7 @@ unique_devices = {uuid.uuid4(), uuid.uuid4()} ### Nested Collection Type Usage Examples -Nested collections allow storing multi-dimensional data: +Nested collections allow storing multi-dimensional data with unlimited depth: ```python # List of lists — e.g., weekly score history per user @@ -323,15 +323,12 @@ weekly_scores = [[85.0, 90.5, 78.0], [92.0, 88.5], [95.0, 91.0, 87.5]] # List of sets — e.g., unique tags assigned per category unique_tags_per_category = [["python", "ml"], ["rust", "systems"], ["python", "web"]] -# Inner sets are automatically deduplicated: -# [["python", "ml"], ...] (duplicates within each inner set are removed) -# Set of lists — e.g., distinct ordered sequences observed -distinct_sequences = [[1, 2, 3], [4, 5], [1, 2, 3]] +# 3-level nesting — e.g., multi-dimensional matrices +Field(name="tensor", dtype=Array(Array(Array(Float64)))) -# Set of sets — e.g., distinct groups of unique items -distinct_groups = [["a", "b"], ["c", "d"], ["a", "b"]] -# Inner elements are deduplicated within each set +# Mixed nesting +Field(name="grouped_tags", dtype=Array(Set(Array(String)))) ``` **Limitation:** Empty inner collections round-trip as `None`: diff --git a/protos/feast/types/Value.proto b/protos/feast/types/Value.proto index 21e4de17cf5..4194c19bac5 100644 --- a/protos/feast/types/Value.proto +++ b/protos/feast/types/Value.proto @@ -63,10 +63,8 @@ message ValueType { TIME_UUID_LIST = 39; UUID_SET = 40; TIME_UUID_SET = 41; - LIST_LIST = 42; - LIST_SET = 43; - SET_LIST = 44; - SET_SET = 45; + VALUE_LIST = 42; + VALUE_SET = 43; } } @@ -112,10 +110,8 @@ message Value { StringList time_uuid_list_val = 39; StringSet uuid_set_val = 40; StringSet time_uuid_set_val = 41; - RepeatedValue list_list_val = 42; - RepeatedValue list_set_val = 43; - RepeatedValue set_list_val = 44; - RepeatedValue set_set_val = 45; + RepeatedValue list_val = 42; + RepeatedValue set_val = 43; } } diff --git a/sdk/python/feast/field.py b/sdk/python/feast/field.py index 042f0ad511b..e155836467b 100644 --- a/sdk/python/feast/field.py +++ b/sdk/python/feast/field.py @@ -171,13 +171,7 @@ def from_proto(cls, field_proto: FieldProto): dtype = Array(inner_struct) user_tags = {k: v for k, v in tags.items() if k not in internal_tags} elif ( - value_type - in ( - ValueType.LIST_LIST, - ValueType.LIST_SET, - ValueType.SET_LIST, - ValueType.SET_SET, - ) + value_type in (ValueType.VALUE_LIST, ValueType.VALUE_SET) and NESTED_COLLECTION_INNER_TYPE_TAG in tags ): dtype = _str_to_feast_type(tags[NESTED_COLLECTION_INNER_TYPE_TAG]) diff --git a/sdk/python/feast/infra/online_stores/remote.py b/sdk/python/feast/infra/online_stores/remote.py index 191f5692e7b..9bead1fcb9d 100644 --- a/sdk/python/feast/infra/online_stores/remote.py +++ b/sdk/python/feast/infra/online_stores/remote.py @@ -108,7 +108,7 @@ def _proto_value_to_transport_value(proto_value: ValueProto) -> Any: # Nested collection types use feast_value_type_to_python_type # which handles recursive conversion of RepeatedValue protos. - if val_attr in ("list_list_val", "list_set_val", "set_list_val", "set_set_val"): + if val_attr in ("list_val", "set_val"): return feast_value_type_to_python_type(proto_value) # Map/Struct types are converted to Python dicts by diff --git a/sdk/python/feast/proto_json.py b/sdk/python/feast/proto_json.py index 424d07fd09a..82fb4dd0b13 100644 --- a/sdk/python/feast/proto_json.py +++ b/sdk/python/feast/proto_json.py @@ -63,7 +63,7 @@ def to_json_object(printer: _Printer, message: ProtoMessage) -> JsonObject: # to JSON. The parse back result will be different from original message. if which is None or which == "null_val": return None - elif which in ("list_list_val", "list_set_val", "set_list_val", "set_set_val"): + elif which in ("list_val", "set_val"): # Nested collection: RepeatedValue containing Values repeated = getattr(message, which) value = [ @@ -96,13 +96,13 @@ def from_json_object( # Nested collection (list of lists). # Check any() to handle cases where the first element is None # (empty inner collections round-trip through proto as None). - # Default to list_list_val since JSON transport loses the + # Default to list_val since JSON transport loses the # outer/inner set distinction. rv = RepeatedValue() for inner in value: inner_val = rv.val.add() from_json_object(parser, inner, inner_val) - message.list_list_val.CopyFrom(rv) + message.list_val.CopyFrom(rv) elif isinstance(value[0], bool): message.bool_list_val.val.extend(value) elif isinstance(value[0], str): diff --git a/sdk/python/feast/protos/feast/types/Value_pb2.py b/sdk/python/feast/protos/feast/types/Value_pb2.py index c831f8da6c0..44ad6f115c2 100644 --- a/sdk/python/feast/protos/feast/types/Value_pb2.py +++ b/sdk/python/feast/protos/feast/types/Value_pb2.py @@ -14,7 +14,7 @@ -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x17\x66\x65\x61st/types/Value.proto\x12\x0b\x66\x65\x61st.types\"\xfb\x04\n\tValueType\"\xed\x04\n\x04\x45num\x12\x0b\n\x07INVALID\x10\x00\x12\t\n\x05\x42YTES\x10\x01\x12\n\n\x06STRING\x10\x02\x12\t\n\x05INT32\x10\x03\x12\t\n\x05INT64\x10\x04\x12\n\n\x06\x44OUBLE\x10\x05\x12\t\n\x05\x46LOAT\x10\x06\x12\x08\n\x04\x42OOL\x10\x07\x12\x12\n\x0eUNIX_TIMESTAMP\x10\x08\x12\x0e\n\nBYTES_LIST\x10\x0b\x12\x0f\n\x0bSTRING_LIST\x10\x0c\x12\x0e\n\nINT32_LIST\x10\r\x12\x0e\n\nINT64_LIST\x10\x0e\x12\x0f\n\x0b\x44OUBLE_LIST\x10\x0f\x12\x0e\n\nFLOAT_LIST\x10\x10\x12\r\n\tBOOL_LIST\x10\x11\x12\x17\n\x13UNIX_TIMESTAMP_LIST\x10\x12\x12\x08\n\x04NULL\x10\x13\x12\x07\n\x03MAP\x10\x14\x12\x0c\n\x08MAP_LIST\x10\x15\x12\r\n\tBYTES_SET\x10\x16\x12\x0e\n\nSTRING_SET\x10\x17\x12\r\n\tINT32_SET\x10\x18\x12\r\n\tINT64_SET\x10\x19\x12\x0e\n\nDOUBLE_SET\x10\x1a\x12\r\n\tFLOAT_SET\x10\x1b\x12\x0c\n\x08\x42OOL_SET\x10\x1c\x12\x16\n\x12UNIX_TIMESTAMP_SET\x10\x1d\x12\x08\n\x04JSON\x10 \x12\r\n\tJSON_LIST\x10!\x12\n\n\x06STRUCT\x10\"\x12\x0f\n\x0bSTRUCT_LIST\x10#\x12\x08\n\x04UUID\x10$\x12\r\n\tTIME_UUID\x10%\x12\r\n\tUUID_LIST\x10&\x12\x12\n\x0eTIME_UUID_LIST\x10\'\x12\x0c\n\x08UUID_SET\x10(\x12\x11\n\rTIME_UUID_SET\x10)\x12\r\n\tLIST_LIST\x10*\x12\x0c\n\x08LIST_SET\x10+\x12\x0c\n\x08SET_LIST\x10,\x12\x0b\n\x07SET_SET\x10-\"\xca\r\n\x05Value\x12\x13\n\tbytes_val\x18\x01 \x01(\x0cH\x00\x12\x14\n\nstring_val\x18\x02 \x01(\tH\x00\x12\x13\n\tint32_val\x18\x03 \x01(\x05H\x00\x12\x13\n\tint64_val\x18\x04 \x01(\x03H\x00\x12\x14\n\ndouble_val\x18\x05 \x01(\x01H\x00\x12\x13\n\tfloat_val\x18\x06 \x01(\x02H\x00\x12\x12\n\x08\x62ool_val\x18\x07 \x01(\x08H\x00\x12\x1c\n\x12unix_timestamp_val\x18\x08 \x01(\x03H\x00\x12\x30\n\x0e\x62ytes_list_val\x18\x0b \x01(\x0b\x32\x16.feast.types.BytesListH\x00\x12\x32\n\x0fstring_list_val\x18\x0c \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12\x30\n\x0eint32_list_val\x18\r \x01(\x0b\x32\x16.feast.types.Int32ListH\x00\x12\x30\n\x0eint64_list_val\x18\x0e \x01(\x0b\x32\x16.feast.types.Int64ListH\x00\x12\x32\n\x0f\x64ouble_list_val\x18\x0f \x01(\x0b\x32\x17.feast.types.DoubleListH\x00\x12\x30\n\x0e\x66loat_list_val\x18\x10 \x01(\x0b\x32\x16.feast.types.FloatListH\x00\x12.\n\rbool_list_val\x18\x11 \x01(\x0b\x32\x15.feast.types.BoolListH\x00\x12\x39\n\x17unix_timestamp_list_val\x18\x12 \x01(\x0b\x32\x16.feast.types.Int64ListH\x00\x12%\n\x08null_val\x18\x13 \x01(\x0e\x32\x11.feast.types.NullH\x00\x12#\n\x07map_val\x18\x14 \x01(\x0b\x32\x10.feast.types.MapH\x00\x12,\n\x0cmap_list_val\x18\x15 \x01(\x0b\x32\x14.feast.types.MapListH\x00\x12.\n\rbytes_set_val\x18\x16 \x01(\x0b\x32\x15.feast.types.BytesSetH\x00\x12\x30\n\x0estring_set_val\x18\x17 \x01(\x0b\x32\x16.feast.types.StringSetH\x00\x12.\n\rint32_set_val\x18\x18 \x01(\x0b\x32\x15.feast.types.Int32SetH\x00\x12.\n\rint64_set_val\x18\x19 \x01(\x0b\x32\x15.feast.types.Int64SetH\x00\x12\x30\n\x0e\x64ouble_set_val\x18\x1a \x01(\x0b\x32\x16.feast.types.DoubleSetH\x00\x12.\n\rfloat_set_val\x18\x1b \x01(\x0b\x32\x15.feast.types.FloatSetH\x00\x12,\n\x0c\x62ool_set_val\x18\x1c \x01(\x0b\x32\x14.feast.types.BoolSetH\x00\x12\x37\n\x16unix_timestamp_set_val\x18\x1d \x01(\x0b\x32\x15.feast.types.Int64SetH\x00\x12\x12\n\x08json_val\x18 \x01(\tH\x00\x12\x30\n\rjson_list_val\x18! \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12&\n\nstruct_val\x18\" \x01(\x0b\x32\x10.feast.types.MapH\x00\x12/\n\x0fstruct_list_val\x18# \x01(\x0b\x32\x14.feast.types.MapListH\x00\x12\x12\n\x08uuid_val\x18$ \x01(\tH\x00\x12\x17\n\rtime_uuid_val\x18% \x01(\tH\x00\x12\x30\n\ruuid_list_val\x18& \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12\x35\n\x12time_uuid_list_val\x18\' \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12.\n\x0cuuid_set_val\x18( \x01(\x0b\x32\x16.feast.types.StringSetH\x00\x12\x33\n\x11time_uuid_set_val\x18) \x01(\x0b\x32\x16.feast.types.StringSetH\x00\x12\x33\n\rlist_list_val\x18* \x01(\x0b\x32\x1a.feast.types.RepeatedValueH\x00\x12\x32\n\x0clist_set_val\x18+ \x01(\x0b\x32\x1a.feast.types.RepeatedValueH\x00\x12\x32\n\x0cset_list_val\x18, \x01(\x0b\x32\x1a.feast.types.RepeatedValueH\x00\x12\x31\n\x0bset_set_val\x18- \x01(\x0b\x32\x1a.feast.types.RepeatedValueH\x00\x42\x05\n\x03val\"\x18\n\tBytesList\x12\x0b\n\x03val\x18\x01 \x03(\x0c\"\x19\n\nStringList\x12\x0b\n\x03val\x18\x01 \x03(\t\"\x18\n\tInt32List\x12\x0b\n\x03val\x18\x01 \x03(\x05\"\x18\n\tInt64List\x12\x0b\n\x03val\x18\x01 \x03(\x03\"\x19\n\nDoubleList\x12\x0b\n\x03val\x18\x01 \x03(\x01\"\x18\n\tFloatList\x12\x0b\n\x03val\x18\x01 \x03(\x02\"\x17\n\x08\x42oolList\x12\x0b\n\x03val\x18\x01 \x03(\x08\"\x17\n\x08\x42ytesSet\x12\x0b\n\x03val\x18\x01 \x03(\x0c\"\x18\n\tStringSet\x12\x0b\n\x03val\x18\x01 \x03(\t\"\x17\n\x08Int32Set\x12\x0b\n\x03val\x18\x01 \x03(\x05\"\x17\n\x08Int64Set\x12\x0b\n\x03val\x18\x01 \x03(\x03\"\x18\n\tDoubleSet\x12\x0b\n\x03val\x18\x01 \x03(\x01\"\x17\n\x08\x46loatSet\x12\x0b\n\x03val\x18\x01 \x03(\x02\"\x16\n\x07\x42oolSet\x12\x0b\n\x03val\x18\x01 \x03(\x08\"m\n\x03Map\x12&\n\x03val\x18\x01 \x03(\x0b\x32\x19.feast.types.Map.ValEntry\x1a>\n\x08ValEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.feast.types.Value:\x02\x38\x01\"(\n\x07MapList\x12\x1d\n\x03val\x18\x01 \x03(\x0b\x32\x10.feast.types.Map\"0\n\rRepeatedValue\x12\x1f\n\x03val\x18\x01 \x03(\x0b\x32\x12.feast.types.Value*\x10\n\x04Null\x12\x08\n\x04NULL\x10\x00\x42Q\n\x11\x66\x65\x61st.proto.typesB\nValueProtoZ0github.com/feast-dev/feast/go/protos/feast/typesb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x17\x66\x65\x61st/types/Value.proto\x12\x0b\x66\x65\x61st.types\"\xe2\x04\n\tValueType\"\xd4\x04\n\x04\x45num\x12\x0b\n\x07INVALID\x10\x00\x12\t\n\x05\x42YTES\x10\x01\x12\n\n\x06STRING\x10\x02\x12\t\n\x05INT32\x10\x03\x12\t\n\x05INT64\x10\x04\x12\n\n\x06\x44OUBLE\x10\x05\x12\t\n\x05\x46LOAT\x10\x06\x12\x08\n\x04\x42OOL\x10\x07\x12\x12\n\x0eUNIX_TIMESTAMP\x10\x08\x12\x0e\n\nBYTES_LIST\x10\x0b\x12\x0f\n\x0bSTRING_LIST\x10\x0c\x12\x0e\n\nINT32_LIST\x10\r\x12\x0e\n\nINT64_LIST\x10\x0e\x12\x0f\n\x0b\x44OUBLE_LIST\x10\x0f\x12\x0e\n\nFLOAT_LIST\x10\x10\x12\r\n\tBOOL_LIST\x10\x11\x12\x17\n\x13UNIX_TIMESTAMP_LIST\x10\x12\x12\x08\n\x04NULL\x10\x13\x12\x07\n\x03MAP\x10\x14\x12\x0c\n\x08MAP_LIST\x10\x15\x12\r\n\tBYTES_SET\x10\x16\x12\x0e\n\nSTRING_SET\x10\x17\x12\r\n\tINT32_SET\x10\x18\x12\r\n\tINT64_SET\x10\x19\x12\x0e\n\nDOUBLE_SET\x10\x1a\x12\r\n\tFLOAT_SET\x10\x1b\x12\x0c\n\x08\x42OOL_SET\x10\x1c\x12\x16\n\x12UNIX_TIMESTAMP_SET\x10\x1d\x12\x08\n\x04JSON\x10 \x12\r\n\tJSON_LIST\x10!\x12\n\n\x06STRUCT\x10\"\x12\x0f\n\x0bSTRUCT_LIST\x10#\x12\x08\n\x04UUID\x10$\x12\r\n\tTIME_UUID\x10%\x12\r\n\tUUID_LIST\x10&\x12\x12\n\x0eTIME_UUID_LIST\x10\'\x12\x0c\n\x08UUID_SET\x10(\x12\x11\n\rTIME_UUID_SET\x10)\x12\x0e\n\nVALUE_LIST\x10*\x12\r\n\tVALUE_SET\x10+\"\xd9\x0c\n\x05Value\x12\x13\n\tbytes_val\x18\x01 \x01(\x0cH\x00\x12\x14\n\nstring_val\x18\x02 \x01(\tH\x00\x12\x13\n\tint32_val\x18\x03 \x01(\x05H\x00\x12\x13\n\tint64_val\x18\x04 \x01(\x03H\x00\x12\x14\n\ndouble_val\x18\x05 \x01(\x01H\x00\x12\x13\n\tfloat_val\x18\x06 \x01(\x02H\x00\x12\x12\n\x08\x62ool_val\x18\x07 \x01(\x08H\x00\x12\x1c\n\x12unix_timestamp_val\x18\x08 \x01(\x03H\x00\x12\x30\n\x0e\x62ytes_list_val\x18\x0b \x01(\x0b\x32\x16.feast.types.BytesListH\x00\x12\x32\n\x0fstring_list_val\x18\x0c \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12\x30\n\x0eint32_list_val\x18\r \x01(\x0b\x32\x16.feast.types.Int32ListH\x00\x12\x30\n\x0eint64_list_val\x18\x0e \x01(\x0b\x32\x16.feast.types.Int64ListH\x00\x12\x32\n\x0f\x64ouble_list_val\x18\x0f \x01(\x0b\x32\x17.feast.types.DoubleListH\x00\x12\x30\n\x0e\x66loat_list_val\x18\x10 \x01(\x0b\x32\x16.feast.types.FloatListH\x00\x12.\n\rbool_list_val\x18\x11 \x01(\x0b\x32\x15.feast.types.BoolListH\x00\x12\x39\n\x17unix_timestamp_list_val\x18\x12 \x01(\x0b\x32\x16.feast.types.Int64ListH\x00\x12%\n\x08null_val\x18\x13 \x01(\x0e\x32\x11.feast.types.NullH\x00\x12#\n\x07map_val\x18\x14 \x01(\x0b\x32\x10.feast.types.MapH\x00\x12,\n\x0cmap_list_val\x18\x15 \x01(\x0b\x32\x14.feast.types.MapListH\x00\x12.\n\rbytes_set_val\x18\x16 \x01(\x0b\x32\x15.feast.types.BytesSetH\x00\x12\x30\n\x0estring_set_val\x18\x17 \x01(\x0b\x32\x16.feast.types.StringSetH\x00\x12.\n\rint32_set_val\x18\x18 \x01(\x0b\x32\x15.feast.types.Int32SetH\x00\x12.\n\rint64_set_val\x18\x19 \x01(\x0b\x32\x15.feast.types.Int64SetH\x00\x12\x30\n\x0e\x64ouble_set_val\x18\x1a \x01(\x0b\x32\x16.feast.types.DoubleSetH\x00\x12.\n\rfloat_set_val\x18\x1b \x01(\x0b\x32\x15.feast.types.FloatSetH\x00\x12,\n\x0c\x62ool_set_val\x18\x1c \x01(\x0b\x32\x14.feast.types.BoolSetH\x00\x12\x37\n\x16unix_timestamp_set_val\x18\x1d \x01(\x0b\x32\x15.feast.types.Int64SetH\x00\x12\x12\n\x08json_val\x18 \x01(\tH\x00\x12\x30\n\rjson_list_val\x18! \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12&\n\nstruct_val\x18\" \x01(\x0b\x32\x10.feast.types.MapH\x00\x12/\n\x0fstruct_list_val\x18# \x01(\x0b\x32\x14.feast.types.MapListH\x00\x12\x12\n\x08uuid_val\x18$ \x01(\tH\x00\x12\x17\n\rtime_uuid_val\x18% \x01(\tH\x00\x12\x30\n\ruuid_list_val\x18& \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12\x35\n\x12time_uuid_list_val\x18\' \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12.\n\x0cuuid_set_val\x18( \x01(\x0b\x32\x16.feast.types.StringSetH\x00\x12\x33\n\x11time_uuid_set_val\x18) \x01(\x0b\x32\x16.feast.types.StringSetH\x00\x12.\n\x08list_val\x18* \x01(\x0b\x32\x1a.feast.types.RepeatedValueH\x00\x12-\n\x07set_val\x18+ \x01(\x0b\x32\x1a.feast.types.RepeatedValueH\x00\x42\x05\n\x03val\"\x18\n\tBytesList\x12\x0b\n\x03val\x18\x01 \x03(\x0c\"\x19\n\nStringList\x12\x0b\n\x03val\x18\x01 \x03(\t\"\x18\n\tInt32List\x12\x0b\n\x03val\x18\x01 \x03(\x05\"\x18\n\tInt64List\x12\x0b\n\x03val\x18\x01 \x03(\x03\"\x19\n\nDoubleList\x12\x0b\n\x03val\x18\x01 \x03(\x01\"\x18\n\tFloatList\x12\x0b\n\x03val\x18\x01 \x03(\x02\"\x17\n\x08\x42oolList\x12\x0b\n\x03val\x18\x01 \x03(\x08\"\x17\n\x08\x42ytesSet\x12\x0b\n\x03val\x18\x01 \x03(\x0c\"\x18\n\tStringSet\x12\x0b\n\x03val\x18\x01 \x03(\t\"\x17\n\x08Int32Set\x12\x0b\n\x03val\x18\x01 \x03(\x05\"\x17\n\x08Int64Set\x12\x0b\n\x03val\x18\x01 \x03(\x03\"\x18\n\tDoubleSet\x12\x0b\n\x03val\x18\x01 \x03(\x01\"\x17\n\x08\x46loatSet\x12\x0b\n\x03val\x18\x01 \x03(\x02\"\x16\n\x07\x42oolSet\x12\x0b\n\x03val\x18\x01 \x03(\x08\"m\n\x03Map\x12&\n\x03val\x18\x01 \x03(\x0b\x32\x19.feast.types.Map.ValEntry\x1a>\n\x08ValEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.feast.types.Value:\x02\x38\x01\"(\n\x07MapList\x12\x1d\n\x03val\x18\x01 \x03(\x0b\x32\x10.feast.types.Map\"0\n\rRepeatedValue\x12\x1f\n\x03val\x18\x01 \x03(\x0b\x32\x12.feast.types.Value*\x10\n\x04Null\x12\x08\n\x04NULL\x10\x00\x42Q\n\x11\x66\x65\x61st.proto.typesB\nValueProtoZ0github.com/feast-dev/feast/go/protos/feast/typesb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -24,48 +24,48 @@ _globals['DESCRIPTOR']._serialized_options = b'\n\021feast.proto.typesB\nValueProtoZ0github.com/feast-dev/feast/go/protos/feast/types' _globals['_MAP_VALENTRY']._options = None _globals['_MAP_VALENTRY']._serialized_options = b'8\001' - _globals['_NULL']._serialized_start=2981 - _globals['_NULL']._serialized_end=2997 + _globals['_NULL']._serialized_start=2843 + _globals['_NULL']._serialized_end=2859 _globals['_VALUETYPE']._serialized_start=41 - _globals['_VALUETYPE']._serialized_end=676 + _globals['_VALUETYPE']._serialized_end=651 _globals['_VALUETYPE_ENUM']._serialized_start=55 - _globals['_VALUETYPE_ENUM']._serialized_end=676 - _globals['_VALUE']._serialized_start=679 - _globals['_VALUE']._serialized_end=2417 - _globals['_BYTESLIST']._serialized_start=2419 - _globals['_BYTESLIST']._serialized_end=2443 - _globals['_STRINGLIST']._serialized_start=2445 - _globals['_STRINGLIST']._serialized_end=2470 - _globals['_INT32LIST']._serialized_start=2472 - _globals['_INT32LIST']._serialized_end=2496 - _globals['_INT64LIST']._serialized_start=2498 - _globals['_INT64LIST']._serialized_end=2522 - _globals['_DOUBLELIST']._serialized_start=2524 - _globals['_DOUBLELIST']._serialized_end=2549 - _globals['_FLOATLIST']._serialized_start=2551 - _globals['_FLOATLIST']._serialized_end=2575 - _globals['_BOOLLIST']._serialized_start=2577 - _globals['_BOOLLIST']._serialized_end=2600 - _globals['_BYTESSET']._serialized_start=2602 - _globals['_BYTESSET']._serialized_end=2625 - _globals['_STRINGSET']._serialized_start=2627 - _globals['_STRINGSET']._serialized_end=2651 - _globals['_INT32SET']._serialized_start=2653 - _globals['_INT32SET']._serialized_end=2676 - _globals['_INT64SET']._serialized_start=2678 - _globals['_INT64SET']._serialized_end=2701 - _globals['_DOUBLESET']._serialized_start=2703 - _globals['_DOUBLESET']._serialized_end=2727 - _globals['_FLOATSET']._serialized_start=2729 - _globals['_FLOATSET']._serialized_end=2752 - _globals['_BOOLSET']._serialized_start=2754 - _globals['_BOOLSET']._serialized_end=2776 - _globals['_MAP']._serialized_start=2778 - _globals['_MAP']._serialized_end=2887 - _globals['_MAP_VALENTRY']._serialized_start=2825 - _globals['_MAP_VALENTRY']._serialized_end=2887 - _globals['_MAPLIST']._serialized_start=2889 - _globals['_MAPLIST']._serialized_end=2929 - _globals['_REPEATEDVALUE']._serialized_start=2931 - _globals['_REPEATEDVALUE']._serialized_end=2979 + _globals['_VALUETYPE_ENUM']._serialized_end=651 + _globals['_VALUE']._serialized_start=654 + _globals['_VALUE']._serialized_end=2279 + _globals['_BYTESLIST']._serialized_start=2281 + _globals['_BYTESLIST']._serialized_end=2305 + _globals['_STRINGLIST']._serialized_start=2307 + _globals['_STRINGLIST']._serialized_end=2332 + _globals['_INT32LIST']._serialized_start=2334 + _globals['_INT32LIST']._serialized_end=2358 + _globals['_INT64LIST']._serialized_start=2360 + _globals['_INT64LIST']._serialized_end=2384 + _globals['_DOUBLELIST']._serialized_start=2386 + _globals['_DOUBLELIST']._serialized_end=2411 + _globals['_FLOATLIST']._serialized_start=2413 + _globals['_FLOATLIST']._serialized_end=2437 + _globals['_BOOLLIST']._serialized_start=2439 + _globals['_BOOLLIST']._serialized_end=2462 + _globals['_BYTESSET']._serialized_start=2464 + _globals['_BYTESSET']._serialized_end=2487 + _globals['_STRINGSET']._serialized_start=2489 + _globals['_STRINGSET']._serialized_end=2513 + _globals['_INT32SET']._serialized_start=2515 + _globals['_INT32SET']._serialized_end=2538 + _globals['_INT64SET']._serialized_start=2540 + _globals['_INT64SET']._serialized_end=2563 + _globals['_DOUBLESET']._serialized_start=2565 + _globals['_DOUBLESET']._serialized_end=2589 + _globals['_FLOATSET']._serialized_start=2591 + _globals['_FLOATSET']._serialized_end=2614 + _globals['_BOOLSET']._serialized_start=2616 + _globals['_BOOLSET']._serialized_end=2638 + _globals['_MAP']._serialized_start=2640 + _globals['_MAP']._serialized_end=2749 + _globals['_MAP_VALENTRY']._serialized_start=2687 + _globals['_MAP_VALENTRY']._serialized_end=2749 + _globals['_MAPLIST']._serialized_start=2751 + _globals['_MAPLIST']._serialized_end=2791 + _globals['_REPEATEDVALUE']._serialized_start=2793 + _globals['_REPEATEDVALUE']._serialized_end=2841 # @@protoc_insertion_point(module_scope) diff --git a/sdk/python/feast/protos/feast/types/Value_pb2.pyi b/sdk/python/feast/protos/feast/types/Value_pb2.pyi index f83e9818eb4..53a7c800f04 100644 --- a/sdk/python/feast/protos/feast/types/Value_pb2.pyi +++ b/sdk/python/feast/protos/feast/types/Value_pb2.pyi @@ -92,10 +92,8 @@ class ValueType(google.protobuf.message.Message): TIME_UUID_LIST: ValueType._Enum.ValueType # 39 UUID_SET: ValueType._Enum.ValueType # 40 TIME_UUID_SET: ValueType._Enum.ValueType # 41 - LIST_LIST: ValueType._Enum.ValueType # 42 - LIST_SET: ValueType._Enum.ValueType # 43 - SET_LIST: ValueType._Enum.ValueType # 44 - SET_SET: ValueType._Enum.ValueType # 45 + VALUE_LIST: ValueType._Enum.ValueType # 42 + VALUE_SET: ValueType._Enum.ValueType # 43 class Enum(_Enum, metaclass=_EnumEnumTypeWrapper): ... INVALID: ValueType.Enum.ValueType # 0 @@ -136,10 +134,8 @@ class ValueType(google.protobuf.message.Message): TIME_UUID_LIST: ValueType.Enum.ValueType # 39 UUID_SET: ValueType.Enum.ValueType # 40 TIME_UUID_SET: ValueType.Enum.ValueType # 41 - LIST_LIST: ValueType.Enum.ValueType # 42 - LIST_SET: ValueType.Enum.ValueType # 43 - SET_LIST: ValueType.Enum.ValueType # 44 - SET_SET: ValueType.Enum.ValueType # 45 + VALUE_LIST: ValueType.Enum.ValueType # 42 + VALUE_SET: ValueType.Enum.ValueType # 43 def __init__( self, @@ -187,10 +183,8 @@ class Value(google.protobuf.message.Message): TIME_UUID_LIST_VAL_FIELD_NUMBER: builtins.int UUID_SET_VAL_FIELD_NUMBER: builtins.int TIME_UUID_SET_VAL_FIELD_NUMBER: builtins.int - LIST_LIST_VAL_FIELD_NUMBER: builtins.int - LIST_SET_VAL_FIELD_NUMBER: builtins.int - SET_LIST_VAL_FIELD_NUMBER: builtins.int - SET_SET_VAL_FIELD_NUMBER: builtins.int + LIST_VAL_FIELD_NUMBER: builtins.int + SET_VAL_FIELD_NUMBER: builtins.int bytes_val: builtins.bytes string_val: builtins.str int32_val: builtins.int @@ -254,13 +248,9 @@ class Value(google.protobuf.message.Message): @property def time_uuid_set_val(self) -> global___StringSet: ... @property - def list_list_val(self) -> global___RepeatedValue: ... + def list_val(self) -> global___RepeatedValue: ... @property - def list_set_val(self) -> global___RepeatedValue: ... - @property - def set_list_val(self) -> global___RepeatedValue: ... - @property - def set_set_val(self) -> global___RepeatedValue: ... + def set_val(self) -> global___RepeatedValue: ... def __init__( self, *, @@ -301,14 +291,12 @@ class Value(google.protobuf.message.Message): time_uuid_list_val: global___StringList | None = ..., uuid_set_val: global___StringSet | None = ..., time_uuid_set_val: global___StringSet | None = ..., - list_list_val: global___RepeatedValue | None = ..., - list_set_val: global___RepeatedValue | None = ..., - set_list_val: global___RepeatedValue | None = ..., - set_set_val: global___RepeatedValue | None = ..., + list_val: global___RepeatedValue | None = ..., + set_val: global___RepeatedValue | None = ..., ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["bool_list_val", b"bool_list_val", "bool_set_val", b"bool_set_val", "bool_val", b"bool_val", "bytes_list_val", b"bytes_list_val", "bytes_set_val", b"bytes_set_val", "bytes_val", b"bytes_val", "double_list_val", b"double_list_val", "double_set_val", b"double_set_val", "double_val", b"double_val", "float_list_val", b"float_list_val", "float_set_val", b"float_set_val", "float_val", b"float_val", "int32_list_val", b"int32_list_val", "int32_set_val", b"int32_set_val", "int32_val", b"int32_val", "int64_list_val", b"int64_list_val", "int64_set_val", b"int64_set_val", "int64_val", b"int64_val", "json_list_val", b"json_list_val", "json_val", b"json_val", "list_list_val", b"list_list_val", "list_set_val", b"list_set_val", "map_list_val", b"map_list_val", "map_val", b"map_val", "null_val", b"null_val", "set_list_val", b"set_list_val", "set_set_val", b"set_set_val", "string_list_val", b"string_list_val", "string_set_val", b"string_set_val", "string_val", b"string_val", "struct_list_val", b"struct_list_val", "struct_val", b"struct_val", "time_uuid_list_val", b"time_uuid_list_val", "time_uuid_set_val", b"time_uuid_set_val", "time_uuid_val", b"time_uuid_val", "unix_timestamp_list_val", b"unix_timestamp_list_val", "unix_timestamp_set_val", b"unix_timestamp_set_val", "unix_timestamp_val", b"unix_timestamp_val", "uuid_list_val", b"uuid_list_val", "uuid_set_val", b"uuid_set_val", "uuid_val", b"uuid_val", "val", b"val"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["bool_list_val", b"bool_list_val", "bool_set_val", b"bool_set_val", "bool_val", b"bool_val", "bytes_list_val", b"bytes_list_val", "bytes_set_val", b"bytes_set_val", "bytes_val", b"bytes_val", "double_list_val", b"double_list_val", "double_set_val", b"double_set_val", "double_val", b"double_val", "float_list_val", b"float_list_val", "float_set_val", b"float_set_val", "float_val", b"float_val", "int32_list_val", b"int32_list_val", "int32_set_val", b"int32_set_val", "int32_val", b"int32_val", "int64_list_val", b"int64_list_val", "int64_set_val", b"int64_set_val", "int64_val", b"int64_val", "json_list_val", b"json_list_val", "json_val", b"json_val", "list_list_val", b"list_list_val", "list_set_val", b"list_set_val", "map_list_val", b"map_list_val", "map_val", b"map_val", "null_val", b"null_val", "set_list_val", b"set_list_val", "set_set_val", b"set_set_val", "string_list_val", b"string_list_val", "string_set_val", b"string_set_val", "string_val", b"string_val", "struct_list_val", b"struct_list_val", "struct_val", b"struct_val", "time_uuid_list_val", b"time_uuid_list_val", "time_uuid_set_val", b"time_uuid_set_val", "time_uuid_val", b"time_uuid_val", "unix_timestamp_list_val", b"unix_timestamp_list_val", "unix_timestamp_set_val", b"unix_timestamp_set_val", "unix_timestamp_val", b"unix_timestamp_val", "uuid_list_val", b"uuid_list_val", "uuid_set_val", b"uuid_set_val", "uuid_val", b"uuid_val", "val", b"val"]) -> None: ... - def WhichOneof(self, oneof_group: typing_extensions.Literal["val", b"val"]) -> typing_extensions.Literal["bytes_val", "string_val", "int32_val", "int64_val", "double_val", "float_val", "bool_val", "unix_timestamp_val", "bytes_list_val", "string_list_val", "int32_list_val", "int64_list_val", "double_list_val", "float_list_val", "bool_list_val", "unix_timestamp_list_val", "null_val", "map_val", "map_list_val", "bytes_set_val", "string_set_val", "int32_set_val", "int64_set_val", "double_set_val", "float_set_val", "bool_set_val", "unix_timestamp_set_val", "json_val", "json_list_val", "struct_val", "struct_list_val", "uuid_val", "time_uuid_val", "uuid_list_val", "time_uuid_list_val", "uuid_set_val", "time_uuid_set_val", "list_list_val", "list_set_val", "set_list_val", "set_set_val"] | None: ... + def HasField(self, field_name: typing_extensions.Literal["bool_list_val", b"bool_list_val", "bool_set_val", b"bool_set_val", "bool_val", b"bool_val", "bytes_list_val", b"bytes_list_val", "bytes_set_val", b"bytes_set_val", "bytes_val", b"bytes_val", "double_list_val", b"double_list_val", "double_set_val", b"double_set_val", "double_val", b"double_val", "float_list_val", b"float_list_val", "float_set_val", b"float_set_val", "float_val", b"float_val", "int32_list_val", b"int32_list_val", "int32_set_val", b"int32_set_val", "int32_val", b"int32_val", "int64_list_val", b"int64_list_val", "int64_set_val", b"int64_set_val", "int64_val", b"int64_val", "json_list_val", b"json_list_val", "json_val", b"json_val", "list_val", b"list_val", "map_list_val", b"map_list_val", "map_val", b"map_val", "null_val", b"null_val", "set_val", b"set_val", "string_list_val", b"string_list_val", "string_set_val", b"string_set_val", "string_val", b"string_val", "struct_list_val", b"struct_list_val", "struct_val", b"struct_val", "time_uuid_list_val", b"time_uuid_list_val", "time_uuid_set_val", b"time_uuid_set_val", "time_uuid_val", b"time_uuid_val", "unix_timestamp_list_val", b"unix_timestamp_list_val", "unix_timestamp_set_val", b"unix_timestamp_set_val", "unix_timestamp_val", b"unix_timestamp_val", "uuid_list_val", b"uuid_list_val", "uuid_set_val", b"uuid_set_val", "uuid_val", b"uuid_val", "val", b"val"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["bool_list_val", b"bool_list_val", "bool_set_val", b"bool_set_val", "bool_val", b"bool_val", "bytes_list_val", b"bytes_list_val", "bytes_set_val", b"bytes_set_val", "bytes_val", b"bytes_val", "double_list_val", b"double_list_val", "double_set_val", b"double_set_val", "double_val", b"double_val", "float_list_val", b"float_list_val", "float_set_val", b"float_set_val", "float_val", b"float_val", "int32_list_val", b"int32_list_val", "int32_set_val", b"int32_set_val", "int32_val", b"int32_val", "int64_list_val", b"int64_list_val", "int64_set_val", b"int64_set_val", "int64_val", b"int64_val", "json_list_val", b"json_list_val", "json_val", b"json_val", "list_val", b"list_val", "map_list_val", b"map_list_val", "map_val", b"map_val", "null_val", b"null_val", "set_val", b"set_val", "string_list_val", b"string_list_val", "string_set_val", b"string_set_val", "string_val", b"string_val", "struct_list_val", b"struct_list_val", "struct_val", b"struct_val", "time_uuid_list_val", b"time_uuid_list_val", "time_uuid_set_val", b"time_uuid_set_val", "time_uuid_val", b"time_uuid_val", "unix_timestamp_list_val", b"unix_timestamp_list_val", "unix_timestamp_set_val", b"unix_timestamp_set_val", "unix_timestamp_val", b"unix_timestamp_val", "uuid_list_val", b"uuid_list_val", "uuid_set_val", b"uuid_set_val", "uuid_val", b"uuid_val", "val", b"val"]) -> None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["val", b"val"]) -> typing_extensions.Literal["bytes_val", "string_val", "int32_val", "int64_val", "double_val", "float_val", "bool_val", "unix_timestamp_val", "bytes_list_val", "string_list_val", "int32_list_val", "int64_list_val", "double_list_val", "float_list_val", "bool_list_val", "unix_timestamp_list_val", "null_val", "map_val", "map_list_val", "bytes_set_val", "string_set_val", "int32_set_val", "int64_set_val", "double_set_val", "float_set_val", "bool_set_val", "unix_timestamp_set_val", "json_val", "json_list_val", "struct_val", "struct_list_val", "uuid_val", "time_uuid_val", "uuid_list_val", "time_uuid_list_val", "uuid_set_val", "time_uuid_set_val", "list_val", "set_val"] | None: ... global___Value = Value diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 0db54b89d7e..da17ed6bbfe 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -106,8 +106,8 @@ def feast_value_type_to_python_type( result.append(v) return result - # Handle nested collection types (list_list, list_set, set_list, set_set) - if val_attr in ("list_list_val", "list_set_val", "set_list_val", "set_set_val"): + # Handle nested collection types (list_val, set_val) + if val_attr in ("list_val", "set_val"): return _handle_nested_collection_value(val) # Handle Struct types — stored using Map proto, returned as dicts @@ -229,7 +229,7 @@ def feast_value_type_to_pandas_type(value_type: ValueType) -> Any: ValueType.TIME_UUID: "str", } if ( - value_type.name in ("MAP", "JSON", "STRUCT") + value_type.name in ("MAP", "JSON", "STRUCT", "VALUE_LIST", "VALUE_SET") or value_type.name.endswith("_LIST") or value_type.name.endswith("_SET") ): @@ -464,10 +464,8 @@ def _convert_value_type_str_to_value_type(type_str: str) -> ValueType: "TIME_UUID_LIST": ValueType.TIME_UUID_LIST, "UUID_SET": ValueType.UUID_SET, "TIME_UUID_SET": ValueType.TIME_UUID_SET, - "LIST_LIST": ValueType.LIST_LIST, - "LIST_SET": ValueType.LIST_SET, - "SET_LIST": ValueType.SET_LIST, - "SET_SET": ValueType.SET_SET, + "VALUE_LIST": ValueType.VALUE_LIST, + "VALUE_SET": ValueType.VALUE_SET, } return type_map.get(type_str, ValueType.STRING) @@ -938,13 +936,8 @@ def _python_value_to_proto_value( Returns: List of Feast Value Proto """ - # Handle nested collection types (LIST_LIST, LIST_SET, SET_LIST, SET_SET) - if feast_value_type in ( - ValueType.LIST_LIST, - ValueType.LIST_SET, - ValueType.SET_LIST, - ValueType.SET_SET, - ): + # Handle nested collection types (VALUE_LIST, VALUE_SET) + if feast_value_type in (ValueType.VALUE_LIST, ValueType.VALUE_SET): return _convert_nested_collection_to_proto(feast_value_type, values) # Handle Map types @@ -1078,16 +1071,7 @@ def _convert_nested_collection_to_proto( feast_value_type: ValueType, values: List[Any] ) -> List[ProtoValue]: """Convert nested collection values (list-of-lists, list-of-sets, etc.) to proto.""" - val_attr_map = { - ValueType.LIST_LIST: "list_list_val", - ValueType.LIST_SET: "list_set_val", - ValueType.SET_LIST: "set_list_val", - ValueType.SET_SET: "set_set_val", - } - val_attr = val_attr_map[feast_value_type] - - # Inner type has Set semantics for LIST_SET and SET_SET - inner_is_set = feast_value_type in (ValueType.LIST_SET, ValueType.SET_SET) + val_attr = "list_val" if feast_value_type == ValueType.VALUE_LIST else "set_val" result = [] for value in values: @@ -1100,22 +1084,15 @@ def _convert_nested_collection_to_proto( inner_values.append(ProtoValue()) else: inner_list = list(inner_collection) - # Apply Set semantics: deduplicate inner elements - if inner_is_set: - seen: list = [] - for item in inner_list: - if item not in seen: - seen.append(item) - inner_list = seen if len(inner_list) == 0: # Empty inner collection: store as empty ProtoValue inner_values.append(ProtoValue()) elif any( isinstance(item, (list, set, tuple)) for item in inner_list ): - # Deeper nesting (3+ levels): recurse + # Deeper nesting (3+ levels): recurse using VALUE_LIST inner_proto = _convert_nested_collection_to_proto( - feast_value_type, [inner_list] + ValueType.VALUE_LIST, [inner_list] ) inner_values.append(inner_proto[0]) else: @@ -1223,10 +1200,8 @@ def python_values_to_proto_values( "json_list_val": ValueType.JSON_LIST, "struct_val": ValueType.STRUCT, "struct_list_val": ValueType.STRUCT_LIST, - "list_list_val": ValueType.LIST_LIST, - "list_set_val": ValueType.LIST_SET, - "set_list_val": ValueType.SET_LIST, - "set_set_val": ValueType.SET_SET, + "list_val": ValueType.VALUE_LIST, + "set_val": ValueType.VALUE_SET, "int32_set_val": ValueType.INT32_SET, "int64_set_val": ValueType.INT64_SET, "double_set_val": ValueType.DOUBLE_SET, @@ -1271,7 +1246,7 @@ def pa_to_feast_value_type(pa_type_as_str: str) -> ValueType: inner_str = pa_type_as_str[len("list ValueType: if isinstance(self.base_type, Struct): return ValueType.STRUCT_LIST - if isinstance(self.base_type, Array): - return ValueType.LIST_LIST - if isinstance(self.base_type, Set): - return ValueType.LIST_SET + if isinstance(self.base_type, (Array, Set)): + return ValueType.VALUE_LIST assert isinstance(self.base_type, PrimitiveFeastType) value_type_name = PRIMITIVE_FEAST_TYPES_TO_VALUE_TYPES[self.base_type.name] value_type_list_name = value_type_name + "_LIST" @@ -234,10 +232,8 @@ def __init__(self, base_type: Union[PrimitiveFeastType, ComplexFeastType]): self.base_type = base_type def to_value_type(self) -> ValueType: - if isinstance(self.base_type, Array): - return ValueType.SET_LIST - if isinstance(self.base_type, Set): - return ValueType.SET_SET + if isinstance(self.base_type, (Array, Set)): + return ValueType.VALUE_SET assert isinstance(self.base_type, PrimitiveFeastType) value_type_name = PRIMITIVE_FEAST_TYPES_TO_VALUE_TYPES[self.base_type.name] value_type_set_name = value_type_name + "_SET" @@ -437,14 +433,10 @@ def from_value_type( # Nested collection types use placeholder inner types. # Real inner type is restored from Field tags during deserialization. - if value_type == ValueType.LIST_LIST: + if value_type == ValueType.VALUE_LIST: return Array(Array(String)) - if value_type == ValueType.LIST_SET: - return Array(Set(String)) - if value_type == ValueType.SET_LIST: + if value_type == ValueType.VALUE_SET: return Set(Array(String)) - if value_type == ValueType.SET_SET: - return Set(Set(String)) raise ValueError(f"Could not convert value type {value_type} to FeastType.") @@ -471,14 +463,10 @@ def from_feast_type( return ValueType.STRUCT_LIST # Handle nested collection types - if isinstance(feast_type, Array) and isinstance(feast_type.base_type, Array): - return ValueType.LIST_LIST - if isinstance(feast_type, Array) and isinstance(feast_type.base_type, Set): - return ValueType.LIST_SET - if isinstance(feast_type, Set) and isinstance(feast_type.base_type, Array): - return ValueType.SET_LIST - if isinstance(feast_type, Set) and isinstance(feast_type.base_type, Set): - return ValueType.SET_SET + if isinstance(feast_type, Array) and isinstance(feast_type.base_type, (Array, Set)): + return ValueType.VALUE_LIST + if isinstance(feast_type, Set) and isinstance(feast_type.base_type, (Array, Set)): + return ValueType.VALUE_SET if feast_type in VALUE_TYPES_TO_FEAST_TYPES.values(): return list(VALUE_TYPES_TO_FEAST_TYPES.keys())[ diff --git a/sdk/python/feast/value_type.py b/sdk/python/feast/value_type.py index 0fb4071895a..508493de6d8 100644 --- a/sdk/python/feast/value_type.py +++ b/sdk/python/feast/value_type.py @@ -77,10 +77,8 @@ class ValueType(enum.Enum): TIME_UUID_LIST = 39 UUID_SET = 40 TIME_UUID_SET = 41 - LIST_LIST = 42 - LIST_SET = 43 - SET_LIST = 44 - SET_SET = 45 + VALUE_LIST = 42 + VALUE_SET = 43 ListType = Union[ diff --git a/sdk/python/tests/unit/test_proto_json.py b/sdk/python/tests/unit/test_proto_json.py index 40add39c787..6b3c7181ad7 100644 --- a/sdk/python/tests/unit/test_proto_json.py +++ b/sdk/python/tests/unit/test_proto_json.py @@ -107,11 +107,11 @@ def test_nested_collection_json_roundtrip(proto_json_patch): """Nested collection values (list of lists) should survive JSON roundtrip.""" from feast.protos.feast.types.Value_pb2 import Value - # Build a Value with list_list_val containing [[1,2],[3,4,5]] + # Build a Value with list_val containing [[1,2],[3,4,5]] value_proto = Value() - inner1 = value_proto.list_list_val.val.add() + inner1 = value_proto.list_val.val.add() inner1.int64_list_val.val.extend([1, 2]) - inner2 = value_proto.list_list_val.val.add() + inner2 = value_proto.list_val.val.add() inner2.int64_list_val.val.extend([3, 4, 5]) # Serialize to JSON @@ -126,8 +126,8 @@ def test_nested_collection_json_roundtrip(proto_json_patch): feature_vector_proto = FeatureVector() Parse(feature_vector_str, feature_vector_proto) assert len(feature_vector_proto.values) == 1 - assert feature_vector_proto.values[0].WhichOneof("val") == "list_list_val" - assert len(feature_vector_proto.values[0].list_list_val.val) == 2 + assert feature_vector_proto.values[0].WhichOneof("val") == "list_val" + assert len(feature_vector_proto.values[0].list_val.val) == 2 @pytest.fixture(scope="module") diff --git a/sdk/python/tests/unit/test_type_map.py b/sdk/python/tests/unit/test_type_map.py index 8e2a8dccf8c..e135f44ac6e 100644 --- a/sdk/python/tests/unit/test_type_map.py +++ b/sdk/python/tests/unit/test_type_map.py @@ -1556,65 +1556,46 @@ def test_pg_uuid_type_mapping(self): class TestNestedCollectionTypes: - """Tests for nested collection type proto conversion (LIST_LIST, LIST_SET, SET_LIST, SET_SET).""" + """Tests for nested collection type proto conversion (VALUE_LIST, VALUE_SET).""" - def test_list_list_proto_roundtrip(self): - """Test python_values_to_proto_values and feast_value_type_to_python_type for LIST_LIST.""" + def test_value_list_proto_roundtrip(self): + """Test python_values_to_proto_values and feast_value_type_to_python_type for VALUE_LIST.""" values = [[[1, 2, 3], [4, 5]]] - protos = python_values_to_proto_values(values, ValueType.LIST_LIST) + protos = python_values_to_proto_values(values, ValueType.VALUE_LIST) assert len(protos) == 1 - assert protos[0].WhichOneof("val") == "list_list_val" + assert protos[0].WhichOneof("val") == "list_val" result = feast_value_type_to_python_type(protos[0]) assert isinstance(result, list) assert len(result) == 2 - def test_list_set_proto_roundtrip(self): - """Test LIST_SET proto conversion.""" - values = [[[1, 2], [3, 4, 5]]] - protos = python_values_to_proto_values(values, ValueType.LIST_SET) - assert len(protos) == 1 - assert protos[0].WhichOneof("val") == "list_set_val" - result = feast_value_type_to_python_type(protos[0]) - assert isinstance(result, list) - assert len(result) == 2 - - def test_set_list_proto_roundtrip(self): - """Test SET_LIST proto conversion.""" + def test_value_set_proto_roundtrip(self): + """Test VALUE_SET proto conversion.""" values = [[["a", "b"], ["c"]]] - protos = python_values_to_proto_values(values, ValueType.SET_LIST) + protos = python_values_to_proto_values(values, ValueType.VALUE_SET) assert len(protos) == 1 - assert protos[0].WhichOneof("val") == "set_list_val" + assert protos[0].WhichOneof("val") == "set_val" result = feast_value_type_to_python_type(protos[0]) assert isinstance(result, list) assert len(result) == 2 - def test_set_set_proto_roundtrip(self): - """Test SET_SET proto conversion.""" - values = [[["x", "y"], ["z"]]] - protos = python_values_to_proto_values(values, ValueType.SET_SET) - assert len(protos) == 1 - assert protos[0].WhichOneof("val") == "set_set_val" - result = feast_value_type_to_python_type(protos[0]) - assert isinstance(result, list) - def test_nested_collection_null_handling(self): """Test that None values are handled correctly.""" values = [None] - protos = python_values_to_proto_values(values, ValueType.LIST_LIST) + protos = python_values_to_proto_values(values, ValueType.VALUE_LIST) assert len(protos) == 1 assert protos[0].WhichOneof("val") is None def test_convert_value_type_str_nested(self): """Test _convert_value_type_str_to_value_type for nested types.""" - assert _convert_value_type_str_to_value_type("LIST_LIST") == ValueType.LIST_LIST - assert _convert_value_type_str_to_value_type("LIST_SET") == ValueType.LIST_SET - assert _convert_value_type_str_to_value_type("SET_LIST") == ValueType.SET_LIST - assert _convert_value_type_str_to_value_type("SET_SET") == ValueType.SET_SET + assert ( + _convert_value_type_str_to_value_type("VALUE_LIST") == ValueType.VALUE_LIST + ) + assert _convert_value_type_str_to_value_type("VALUE_SET") == ValueType.VALUE_SET def test_nested_collection_empty_inner_list(self): """Test that empty inner collections are handled gracefully.""" values = [[[], [1, 2]]] - protos = python_values_to_proto_values(values, ValueType.LIST_LIST) + protos = python_values_to_proto_values(values, ValueType.VALUE_LIST) result = feast_value_type_to_python_type(protos[0]) assert isinstance(result, list) assert len(result) == 2 @@ -1625,57 +1606,33 @@ def test_nested_collection_empty_inner_list(self): def test_nested_collection_inner_none(self): """Test that None inner elements are handled.""" values = [[[1, 2], None, [3]]] - protos = python_values_to_proto_values(values, ValueType.LIST_LIST) + protos = python_values_to_proto_values(values, ValueType.VALUE_LIST) result = feast_value_type_to_python_type(protos[0]) assert len(result) == 3 assert result[0] == [1, 2] assert result[1] is None assert result[2] == [3] - def test_list_set_deduplicates_inner(self): - """Test that LIST_SET deduplicates inner collection elements.""" - values = [[[1, 1, 2, 2, 3], [4, 4]]] - protos = python_values_to_proto_values(values, ValueType.LIST_SET) - result = feast_value_type_to_python_type(protos[0]) - assert result[0] == [1, 2, 3] - assert result[1] == [4] - - def test_set_set_deduplicates_inner(self): - """Test that SET_SET deduplicates inner collection elements.""" - values = [[["a", "a", "b"], ["c", "c"]]] - protos = python_values_to_proto_values(values, ValueType.SET_SET) - result = feast_value_type_to_python_type(protos[0]) - assert result[0] == ["a", "b"] - assert result[1] == ["c"] - - def test_list_list_no_dedup(self): - """Test that LIST_LIST does NOT deduplicate (Array semantics).""" - values = [[[1, 1, 2], [3, 3]]] - protos = python_values_to_proto_values(values, ValueType.LIST_LIST) - result = feast_value_type_to_python_type(protos[0]) - assert result[0] == [1, 1, 2] - assert result[1] == [3, 3] - - def test_set_list_no_dedup_inner(self): - """Test that SET_LIST does NOT deduplicate inner elements (inner is Array).""" + def test_value_list_no_dedup(self): + """Test that VALUE_LIST does NOT deduplicate (Array semantics).""" values = [[[1, 1, 2], [3, 3]]] - protos = python_values_to_proto_values(values, ValueType.SET_LIST) + protos = python_values_to_proto_values(values, ValueType.VALUE_LIST) result = feast_value_type_to_python_type(protos[0]) assert result[0] == [1, 1, 2] assert result[1] == [3, 3] - def test_list_list_proto_roundtrip_values(self): - """Test that LIST_LIST roundtrip preserves actual inner values.""" + def test_value_list_proto_roundtrip_values(self): + """Test that VALUE_LIST roundtrip preserves actual inner values.""" values = [[[1, 2, 3], [4, 5]]] - protos = python_values_to_proto_values(values, ValueType.LIST_LIST) + protos = python_values_to_proto_values(values, ValueType.VALUE_LIST) result = feast_value_type_to_python_type(protos[0]) assert result[0] == [1, 2, 3] assert result[1] == [4, 5] - def test_set_list_proto_roundtrip_values(self): - """Test that SET_LIST roundtrip preserves actual inner values.""" + def test_value_set_proto_roundtrip_values(self): + """Test that VALUE_SET roundtrip preserves actual inner values.""" values = [[["a", "b"], ["c"]]] - protos = python_values_to_proto_values(values, ValueType.SET_LIST) + protos = python_values_to_proto_values(values, ValueType.VALUE_SET) result = feast_value_type_to_python_type(protos[0]) assert result[0] == ["a", "b"] assert result[1] == ["c"] @@ -1683,7 +1640,7 @@ def test_set_list_proto_roundtrip_values(self): def test_multi_value_batch_nested(self): """Test multiple nested collection values in a single batch.""" values = [[[1, 2], [3]], [[4], [5, 6]]] - protos = python_values_to_proto_values(values, ValueType.LIST_LIST) + protos = python_values_to_proto_values(values, ValueType.VALUE_LIST) assert len(protos) == 2 r0 = feast_value_type_to_python_type(protos[0]) r1 = feast_value_type_to_python_type(protos[1]) @@ -1691,12 +1648,10 @@ def test_multi_value_batch_nested(self): assert r1 == [[4], [5, 6]] def test_feast_value_type_to_pa_nested(self): - """Test feast_value_type_to_pa for all nested collection types.""" + """Test feast_value_type_to_pa for nested collection types.""" for vt in ( - ValueType.LIST_LIST, - ValueType.LIST_SET, - ValueType.SET_LIST, - ValueType.SET_SET, + ValueType.VALUE_LIST, + ValueType.VALUE_SET, ): pa_type = feast_value_type_to_pa(vt) assert pa_type == pyarrow.list_(pyarrow.list_(pyarrow.string())) @@ -1705,13 +1660,13 @@ def test_pa_to_feast_value_type_nested(self): """Test pa_to_feast_value_type recognizes nested list PyArrow types.""" assert ( pa_to_feast_value_type("list>") - == ValueType.LIST_LIST + == ValueType.VALUE_LIST ) assert ( pa_to_feast_value_type("list>") - == ValueType.LIST_LIST + == ValueType.VALUE_LIST ) assert ( pa_to_feast_value_type("list>") - == ValueType.LIST_LIST + == ValueType.VALUE_LIST ) diff --git a/sdk/python/tests/unit/test_types.py b/sdk/python/tests/unit/test_types.py index 313a6cdcaa2..638a9bc7297 100644 --- a/sdk/python/tests/unit/test_types.py +++ b/sdk/python/tests/unit/test_types.py @@ -54,63 +54,61 @@ def test_set_feast_type(): def test_nested_array_array(): - """Array(Array(T)) should produce LIST_LIST.""" + """Array(Array(T)) should produce VALUE_LIST.""" t = Array(Array(String)) - assert t.to_value_type() == ValueType.LIST_LIST - assert from_feast_type(t) == ValueType.LIST_LIST + assert t.to_value_type() == ValueType.VALUE_LIST + assert from_feast_type(t) == ValueType.VALUE_LIST t2 = Array(Array(Int32)) - assert t2.to_value_type() == ValueType.LIST_LIST + assert t2.to_value_type() == ValueType.VALUE_LIST def test_nested_array_set(): - """Array(Set(T)) should produce LIST_SET.""" + """Array(Set(T)) should produce VALUE_LIST.""" t = Array(Set(String)) - assert t.to_value_type() == ValueType.LIST_SET - assert from_feast_type(t) == ValueType.LIST_SET + assert t.to_value_type() == ValueType.VALUE_LIST + assert from_feast_type(t) == ValueType.VALUE_LIST def test_nested_set_array(): - """Set(Array(T)) should produce SET_LIST.""" + """Set(Array(T)) should produce VALUE_SET.""" t = Set(Array(String)) - assert t.to_value_type() == ValueType.SET_LIST - assert from_feast_type(t) == ValueType.SET_LIST + assert t.to_value_type() == ValueType.VALUE_SET + assert from_feast_type(t) == ValueType.VALUE_SET def test_nested_set_set(): - """Set(Set(T)) should produce SET_SET.""" + """Set(Set(T)) should produce VALUE_SET.""" t = Set(Set(String)) - assert t.to_value_type() == ValueType.SET_SET - assert from_feast_type(t) == ValueType.SET_SET + assert t.to_value_type() == ValueType.VALUE_SET + assert from_feast_type(t) == ValueType.VALUE_SET def test_nested_unbounded_depth(): """Nesting depth should be unbounded.""" # 3-level t3 = Array(Array(Array(String))) - assert t3.to_value_type() == ValueType.LIST_LIST + assert t3.to_value_type() == ValueType.VALUE_LIST t3_mixed = Array(Set(Array(String))) - assert t3_mixed.to_value_type() == ValueType.LIST_SET + assert t3_mixed.to_value_type() == ValueType.VALUE_LIST t3_set = Set(Array(Array(String))) - assert t3_set.to_value_type() == ValueType.SET_LIST + assert t3_set.to_value_type() == ValueType.VALUE_SET t3_set2 = Set(Set(Set(String))) - assert t3_set2.to_value_type() == ValueType.SET_SET + assert t3_set2.to_value_type() == ValueType.VALUE_SET # 4-level t4 = Array(Array(Array(Array(Int32)))) - assert t4.to_value_type() == ValueType.LIST_LIST + assert t4.to_value_type() == ValueType.VALUE_LIST def test_nested_from_value_type_roundtrip(): """from_value_type should return a placeholder for nested types.""" for vt in ( - ValueType.LIST_LIST, - ValueType.LIST_SET, - ValueType.SET_LIST, - ValueType.SET_SET, + ValueType.VALUE_LIST, + ValueType.VALUE_SET, ): ft = from_value_type(vt) assert ft.to_value_type() == vt From 89379fc4d1d84579587001e759f1aeb57a096e23 Mon Sep 17 00:00:00 2001 From: soojin Date: Thu, 2 Apr 2026 00:12:25 +0900 Subject: [PATCH 8/9] fix: Preserve inner element types in PyArrow schema inference and optimize JSON nested list detection - Add _parse_pa_type_str() to reconstruct PyArrow types from type strings for VALUE_LIST/VALUE_SET, avoiding lossy round-trip through placeholder - Optimize proto_json nested list detection: only scan with any() when first element is None, avoiding O(n) scan for flat lists - Add warning log for unrecognized PyArrow type strings Co-Authored-By: Claude Opus 4.6 Signed-off-by: soojin --- .../infra/offline_stores/offline_utils.py | 48 +++++++++++++++---- sdk/python/feast/proto_json.py | 4 +- 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/sdk/python/feast/infra/offline_stores/offline_utils.py b/sdk/python/feast/infra/offline_stores/offline_utils.py index 0c478adb2c4..5664e6f45a6 100644 --- a/sdk/python/feast/infra/offline_stores/offline_utils.py +++ b/sdk/python/feast/infra/offline_stores/offline_utils.py @@ -1,3 +1,4 @@ +import logging import uuid from dataclasses import asdict, dataclass from datetime import datetime, timedelta, timezone @@ -21,6 +22,7 @@ from feast.repo_config import RepoConfig from feast.type_map import feast_value_type_to_pa from feast.utils import _get_requested_feature_views_to_features_dict, to_naive_utc +from feast.value_type import ValueType DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL = "event_timestamp" @@ -241,6 +243,37 @@ def get_offline_store_from_config(offline_store_config: Any) -> OfflineStore: return offline_store_class() +_PA_BASIC_TYPES = { + "int32": pa.int32(), + "int64": pa.int64(), + "double": pa.float64(), + "float": pa.float32(), + "string": pa.string(), + "binary": pa.binary(), + "bool": pa.bool_(), + "large_string": pa.large_string(), + "null": pa.null(), +} + + +def _parse_pa_type_str(pa_type_str: str) -> pa.DataType: + """Parse a PyArrow type string to preserve inner element types for nested lists.""" + pa_type_str = pa_type_str.strip() + if pa_type_str.startswith("list"): + inner = pa_type_str[len("list Tuple[pa.Schema, List[str]]: @@ -250,15 +283,12 @@ def get_pyarrow_schema_from_batch_source( pa_schema = [] column_names = [] for column_name, column_type in column_names_and_types: - pa_schema.append( - ( - column_name, - feast_value_type_to_pa( - batch_source.source_datatype_to_feast_value_type()(column_type), - timestamp_unit=timestamp_unit, - ), - ) - ) + value_type = batch_source.source_datatype_to_feast_value_type()(column_type) + if value_type in (ValueType.VALUE_LIST, ValueType.VALUE_SET): + pa_type = _parse_pa_type_str(column_type) + else: + pa_type = feast_value_type_to_pa(value_type, timestamp_unit=timestamp_unit) + pa_schema.append((column_name, pa_type)) column_names.append(column_name) return pa.schema(pa_schema), column_names diff --git a/sdk/python/feast/proto_json.py b/sdk/python/feast/proto_json.py index 82fb4dd0b13..d663e316b03 100644 --- a/sdk/python/feast/proto_json.py +++ b/sdk/python/feast/proto_json.py @@ -92,7 +92,9 @@ def from_json_object( if len(value) == 0: # Clear will mark the struct as modified so it will be created even if there are no values message.int64_list_val.Clear() - elif isinstance(value[0], list) or any(isinstance(v, list) for v in value): + elif isinstance(value[0], list) or ( + value[0] is None and any(isinstance(v, list) for v in value) + ): # Nested collection (list of lists). # Check any() to handle cases where the first element is None # (empty inner collections round-trip through proto as None). From a4dde0f81f79681ad8d41fe5a068b12ea7deb724 Mon Sep 17 00:00:00 2001 From: soojin Date: Thu, 2 Apr 2026 00:28:51 +0900 Subject: [PATCH 9/9] fix: Add np.ndarray support in nested collection proto conversion and clarify placeholder pyarrow type - Add np.ndarray to isinstance check in _convert_nested_collection_to_proto to fix KeyError for 3+ level nesting during materialization (PyArrow produces np.ndarray, not Python list) - Add comment clarifying VALUE_LIST/VALUE_SET placeholder in feast_value_type_to_pa Co-Authored-By: Claude Opus 4.6 Signed-off-by: soojin --- sdk/python/feast/type_map.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index da17ed6bbfe..3c1cc5a9380 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -1088,7 +1088,8 @@ def _convert_nested_collection_to_proto( # Empty inner collection: store as empty ProtoValue inner_values.append(ProtoValue()) elif any( - isinstance(item, (list, set, tuple)) for item in inner_list + isinstance(item, (list, set, tuple, np.ndarray)) + for item in inner_list ): # Deeper nesting (3+ levels): recurse using VALUE_LIST inner_proto = _convert_nested_collection_to_proto( @@ -1765,6 +1766,8 @@ def feast_value_type_to_pa( ValueType.JSON_LIST: pyarrow.list_(pyarrow.large_string()), ValueType.STRUCT: pyarrow.struct([]), ValueType.STRUCT_LIST: pyarrow.list_(pyarrow.struct([])), + # Placeholder: inner type is unknown from ValueType alone. + # Callers needing accurate inner types should use from_feast_to_pyarrow_type() with a FeastType. ValueType.VALUE_LIST: pyarrow.list_(pyarrow.list_(pyarrow.string())), ValueType.VALUE_SET: pyarrow.list_(pyarrow.list_(pyarrow.string())), ValueType.NULL: pyarrow.null(),