diff --git a/docs/reference/type-system.md b/docs/reference/type-system.md index affe394f570..4a1e93734f3 100644 --- a/docs/reference/type-system.md +++ b/docs/reference/type-system.md @@ -4,6 +4,7 @@ Feast uses an internal type system to provide guarantees on training and serving data. Feast currently supports eight primitive types - `INT32`, `INT64`, `FLOAT32`, `FLOAT64`, `STRING`, `BYTES`, `BOOL`, and `UNIX_TIMESTAMP` - and the corresponding array types. +Map type is also supported using a key of `STRING` type and any supported feast type as a value. Null types are not supported, although the `UNIX_TIMESTAMP` type is nullable. The type system is controlled by [`Value.proto`](https://github.com/feast-dev/feast/blob/master/protos/feast/types/Value.proto) in protobuf and by [`types.py`](https://github.com/feast-dev/feast/blob/master/sdk/python/feast/types.py) in Python. Type conversion logic can be found in [`type_map.py`](https://github.com/feast-dev/feast/blob/master/sdk/python/feast/type_map.py). diff --git a/protos/feast/types/Value.proto b/protos/feast/types/Value.proto index b273fecfeae..703684c3b52 100644 --- a/protos/feast/types/Value.proto +++ b/protos/feast/types/Value.proto @@ -1,3 +1,4 @@ + /* * Copyright 2018 The Feast Authors * @@ -42,6 +43,8 @@ message ValueType { BOOL_LIST = 17; UNIX_TIMESTAMP_LIST = 18; NULL = 19; + MAP = 20; + MAP_LIST = 21; } } @@ -67,6 +70,8 @@ message Value { BoolList bool_list_val = 17; Int64List unix_timestamp_list_val = 18; Null null_val = 19; + Map map_val = 20; + MapList map_list_val = 21; } } @@ -102,8 +107,16 @@ message BoolList { repeated bool val = 1; } +message Map { + map val = 1; +} + +message MapList { + repeated Map val = 1; +} + // This is to avoid an issue of being unable to specify `repeated value` in oneofs or maps // In JSON "val" field can be omitted message RepeatedValue { repeated Value val = 1; -} \ No newline at end of file +} diff --git a/sdk/python/feast/protos/feast/types/Value_pb2.py b/sdk/python/feast/protos/feast/types/Value_pb2.py index 18ee3311808..87134112a66 100644 --- a/sdk/python/feast/protos/feast/types/Value_pb2.py +++ b/sdk/python/feast/protos/feast/types/Value_pb2.py @@ -14,7 +14,7 @@ -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x17\x66\x65\x61st/types/Value.proto\x12\x0b\x66\x65\x61st.types\"\x97\x02\n\tValueType\"\x89\x02\n\x04\x45num\x12\x0b\n\x07INVALID\x10\x00\x12\t\n\x05\x42YTES\x10\x01\x12\n\n\x06STRING\x10\x02\x12\t\n\x05INT32\x10\x03\x12\t\n\x05INT64\x10\x04\x12\n\n\x06\x44OUBLE\x10\x05\x12\t\n\x05\x46LOAT\x10\x06\x12\x08\n\x04\x42OOL\x10\x07\x12\x12\n\x0eUNIX_TIMESTAMP\x10\x08\x12\x0e\n\nBYTES_LIST\x10\x0b\x12\x0f\n\x0bSTRING_LIST\x10\x0c\x12\x0e\n\nINT32_LIST\x10\r\x12\x0e\n\nINT64_LIST\x10\x0e\x12\x0f\n\x0b\x44OUBLE_LIST\x10\x0f\x12\x0e\n\nFLOAT_LIST\x10\x10\x12\r\n\tBOOL_LIST\x10\x11\x12\x17\n\x13UNIX_TIMESTAMP_LIST\x10\x12\x12\x08\n\x04NULL\x10\x13\"\x82\x05\n\x05Value\x12\x13\n\tbytes_val\x18\x01 \x01(\x0cH\x00\x12\x14\n\nstring_val\x18\x02 \x01(\tH\x00\x12\x13\n\tint32_val\x18\x03 \x01(\x05H\x00\x12\x13\n\tint64_val\x18\x04 \x01(\x03H\x00\x12\x14\n\ndouble_val\x18\x05 \x01(\x01H\x00\x12\x13\n\tfloat_val\x18\x06 \x01(\x02H\x00\x12\x12\n\x08\x62ool_val\x18\x07 \x01(\x08H\x00\x12\x1c\n\x12unix_timestamp_val\x18\x08 \x01(\x03H\x00\x12\x30\n\x0e\x62ytes_list_val\x18\x0b \x01(\x0b\x32\x16.feast.types.BytesListH\x00\x12\x32\n\x0fstring_list_val\x18\x0c \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12\x30\n\x0eint32_list_val\x18\r \x01(\x0b\x32\x16.feast.types.Int32ListH\x00\x12\x30\n\x0eint64_list_val\x18\x0e \x01(\x0b\x32\x16.feast.types.Int64ListH\x00\x12\x32\n\x0f\x64ouble_list_val\x18\x0f \x01(\x0b\x32\x17.feast.types.DoubleListH\x00\x12\x30\n\x0e\x66loat_list_val\x18\x10 \x01(\x0b\x32\x16.feast.types.FloatListH\x00\x12.\n\rbool_list_val\x18\x11 \x01(\x0b\x32\x15.feast.types.BoolListH\x00\x12\x39\n\x17unix_timestamp_list_val\x18\x12 \x01(\x0b\x32\x16.feast.types.Int64ListH\x00\x12%\n\x08null_val\x18\x13 \x01(\x0e\x32\x11.feast.types.NullH\x00\x42\x05\n\x03val\"\x18\n\tBytesList\x12\x0b\n\x03val\x18\x01 \x03(\x0c\"\x19\n\nStringList\x12\x0b\n\x03val\x18\x01 \x03(\t\"\x18\n\tInt32List\x12\x0b\n\x03val\x18\x01 \x03(\x05\"\x18\n\tInt64List\x12\x0b\n\x03val\x18\x01 \x03(\x03\"\x19\n\nDoubleList\x12\x0b\n\x03val\x18\x01 \x03(\x01\"\x18\n\tFloatList\x12\x0b\n\x03val\x18\x01 \x03(\x02\"\x17\n\x08\x42oolList\x12\x0b\n\x03val\x18\x01 \x03(\x08\"0\n\rRepeatedValue\x12\x1f\n\x03val\x18\x01 \x03(\x0b\x32\x12.feast.types.Value*\x10\n\x04Null\x12\x08\n\x04NULL\x10\x00\x42Q\n\x11\x66\x65\x61st.proto.typesB\nValueProtoZ0github.com/feast-dev/feast/go/protos/feast/typesb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x17\x66\x65\x61st/types/Value.proto\x12\x0b\x66\x65\x61st.types\"\xae\x02\n\tValueType\"\xa0\x02\n\x04\x45num\x12\x0b\n\x07INVALID\x10\x00\x12\t\n\x05\x42YTES\x10\x01\x12\n\n\x06STRING\x10\x02\x12\t\n\x05INT32\x10\x03\x12\t\n\x05INT64\x10\x04\x12\n\n\x06\x44OUBLE\x10\x05\x12\t\n\x05\x46LOAT\x10\x06\x12\x08\n\x04\x42OOL\x10\x07\x12\x12\n\x0eUNIX_TIMESTAMP\x10\x08\x12\x0e\n\nBYTES_LIST\x10\x0b\x12\x0f\n\x0bSTRING_LIST\x10\x0c\x12\x0e\n\nINT32_LIST\x10\r\x12\x0e\n\nINT64_LIST\x10\x0e\x12\x0f\n\x0b\x44OUBLE_LIST\x10\x0f\x12\x0e\n\nFLOAT_LIST\x10\x10\x12\r\n\tBOOL_LIST\x10\x11\x12\x17\n\x13UNIX_TIMESTAMP_LIST\x10\x12\x12\x08\n\x04NULL\x10\x13\x12\x07\n\x03MAP\x10\x14\x12\x0c\n\x08MAP_LIST\x10\x15\"\xd5\x05\n\x05Value\x12\x13\n\tbytes_val\x18\x01 \x01(\x0cH\x00\x12\x14\n\nstring_val\x18\x02 \x01(\tH\x00\x12\x13\n\tint32_val\x18\x03 \x01(\x05H\x00\x12\x13\n\tint64_val\x18\x04 \x01(\x03H\x00\x12\x14\n\ndouble_val\x18\x05 \x01(\x01H\x00\x12\x13\n\tfloat_val\x18\x06 \x01(\x02H\x00\x12\x12\n\x08\x62ool_val\x18\x07 \x01(\x08H\x00\x12\x1c\n\x12unix_timestamp_val\x18\x08 \x01(\x03H\x00\x12\x30\n\x0e\x62ytes_list_val\x18\x0b \x01(\x0b\x32\x16.feast.types.BytesListH\x00\x12\x32\n\x0fstring_list_val\x18\x0c \x01(\x0b\x32\x17.feast.types.StringListH\x00\x12\x30\n\x0eint32_list_val\x18\r \x01(\x0b\x32\x16.feast.types.Int32ListH\x00\x12\x30\n\x0eint64_list_val\x18\x0e \x01(\x0b\x32\x16.feast.types.Int64ListH\x00\x12\x32\n\x0f\x64ouble_list_val\x18\x0f \x01(\x0b\x32\x17.feast.types.DoubleListH\x00\x12\x30\n\x0e\x66loat_list_val\x18\x10 \x01(\x0b\x32\x16.feast.types.FloatListH\x00\x12.\n\rbool_list_val\x18\x11 \x01(\x0b\x32\x15.feast.types.BoolListH\x00\x12\x39\n\x17unix_timestamp_list_val\x18\x12 \x01(\x0b\x32\x16.feast.types.Int64ListH\x00\x12%\n\x08null_val\x18\x13 \x01(\x0e\x32\x11.feast.types.NullH\x00\x12#\n\x07map_val\x18\x14 \x01(\x0b\x32\x10.feast.types.MapH\x00\x12,\n\x0cmap_list_val\x18\x15 \x01(\x0b\x32\x14.feast.types.MapListH\x00\x42\x05\n\x03val\"\x18\n\tBytesList\x12\x0b\n\x03val\x18\x01 \x03(\x0c\"\x19\n\nStringList\x12\x0b\n\x03val\x18\x01 \x03(\t\"\x18\n\tInt32List\x12\x0b\n\x03val\x18\x01 \x03(\x05\"\x18\n\tInt64List\x12\x0b\n\x03val\x18\x01 \x03(\x03\"\x19\n\nDoubleList\x12\x0b\n\x03val\x18\x01 \x03(\x01\"\x18\n\tFloatList\x12\x0b\n\x03val\x18\x01 \x03(\x02\"\x17\n\x08\x42oolList\x12\x0b\n\x03val\x18\x01 \x03(\x08\"m\n\x03Map\x12&\n\x03val\x18\x01 \x03(\x0b\x32\x19.feast.types.Map.ValEntry\x1a>\n\x08ValEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.feast.types.Value:\x02\x38\x01\"(\n\x07MapList\x12\x1d\n\x03val\x18\x01 \x03(\x0b\x32\x10.feast.types.Map\"0\n\rRepeatedValue\x12\x1f\n\x03val\x18\x01 \x03(\x0b\x32\x12.feast.types.Value*\x10\n\x04Null\x12\x08\n\x04NULL\x10\x00\x42Q\n\x11\x66\x65\x61st.proto.typesB\nValueProtoZ0github.com/feast-dev/feast/go/protos/feast/typesb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -22,28 +22,36 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['DESCRIPTOR']._options = None _globals['DESCRIPTOR']._serialized_options = b'\n\021feast.proto.typesB\nValueProtoZ0github.com/feast-dev/feast/go/protos/feast/types' - _globals['_NULL']._serialized_start=1200 - _globals['_NULL']._serialized_end=1216 + _globals['_MAP_VALENTRY']._options = None + _globals['_MAP_VALENTRY']._serialized_options = b'8\001' + _globals['_NULL']._serialized_start=1459 + _globals['_NULL']._serialized_end=1475 _globals['_VALUETYPE']._serialized_start=41 - _globals['_VALUETYPE']._serialized_end=320 + _globals['_VALUETYPE']._serialized_end=343 _globals['_VALUETYPE_ENUM']._serialized_start=55 - _globals['_VALUETYPE_ENUM']._serialized_end=320 - _globals['_VALUE']._serialized_start=323 - _globals['_VALUE']._serialized_end=965 - _globals['_BYTESLIST']._serialized_start=967 - _globals['_BYTESLIST']._serialized_end=991 - _globals['_STRINGLIST']._serialized_start=993 - _globals['_STRINGLIST']._serialized_end=1018 - _globals['_INT32LIST']._serialized_start=1020 - _globals['_INT32LIST']._serialized_end=1044 - _globals['_INT64LIST']._serialized_start=1046 - _globals['_INT64LIST']._serialized_end=1070 - _globals['_DOUBLELIST']._serialized_start=1072 - _globals['_DOUBLELIST']._serialized_end=1097 - _globals['_FLOATLIST']._serialized_start=1099 - _globals['_FLOATLIST']._serialized_end=1123 - _globals['_BOOLLIST']._serialized_start=1125 - _globals['_BOOLLIST']._serialized_end=1148 - _globals['_REPEATEDVALUE']._serialized_start=1150 - _globals['_REPEATEDVALUE']._serialized_end=1198 + _globals['_VALUETYPE_ENUM']._serialized_end=343 + _globals['_VALUE']._serialized_start=346 + _globals['_VALUE']._serialized_end=1071 + _globals['_BYTESLIST']._serialized_start=1073 + _globals['_BYTESLIST']._serialized_end=1097 + _globals['_STRINGLIST']._serialized_start=1099 + _globals['_STRINGLIST']._serialized_end=1124 + _globals['_INT32LIST']._serialized_start=1126 + _globals['_INT32LIST']._serialized_end=1150 + _globals['_INT64LIST']._serialized_start=1152 + _globals['_INT64LIST']._serialized_end=1176 + _globals['_DOUBLELIST']._serialized_start=1178 + _globals['_DOUBLELIST']._serialized_end=1203 + _globals['_FLOATLIST']._serialized_start=1205 + _globals['_FLOATLIST']._serialized_end=1229 + _globals['_BOOLLIST']._serialized_start=1231 + _globals['_BOOLLIST']._serialized_end=1254 + _globals['_MAP']._serialized_start=1256 + _globals['_MAP']._serialized_end=1365 + _globals['_MAP_VALENTRY']._serialized_start=1303 + _globals['_MAP_VALENTRY']._serialized_end=1365 + _globals['_MAPLIST']._serialized_start=1367 + _globals['_MAPLIST']._serialized_end=1407 + _globals['_REPEATEDVALUE']._serialized_start=1409 + _globals['_REPEATEDVALUE']._serialized_end=1457 # @@protoc_insertion_point(module_scope) diff --git a/sdk/python/feast/protos/feast/types/Value_pb2.pyi b/sdk/python/feast/protos/feast/types/Value_pb2.pyi index 15e4870e6a1..a1e364ec7e5 100644 --- a/sdk/python/feast/protos/feast/types/Value_pb2.pyi +++ b/sdk/python/feast/protos/feast/types/Value_pb2.pyi @@ -72,6 +72,8 @@ class ValueType(google.protobuf.message.Message): BOOL_LIST: ValueType._Enum.ValueType # 17 UNIX_TIMESTAMP_LIST: ValueType._Enum.ValueType # 18 NULL: ValueType._Enum.ValueType # 19 + MAP: ValueType._Enum.ValueType # 20 + MAP_LIST: ValueType._Enum.ValueType # 21 class Enum(_Enum, metaclass=_EnumEnumTypeWrapper): ... INVALID: ValueType.Enum.ValueType # 0 @@ -92,6 +94,8 @@ class ValueType(google.protobuf.message.Message): BOOL_LIST: ValueType.Enum.ValueType # 17 UNIX_TIMESTAMP_LIST: ValueType.Enum.ValueType # 18 NULL: ValueType.Enum.ValueType # 19 + MAP: ValueType.Enum.ValueType # 20 + MAP_LIST: ValueType.Enum.ValueType # 21 def __init__( self, @@ -119,6 +123,8 @@ class Value(google.protobuf.message.Message): BOOL_LIST_VAL_FIELD_NUMBER: builtins.int UNIX_TIMESTAMP_LIST_VAL_FIELD_NUMBER: builtins.int NULL_VAL_FIELD_NUMBER: builtins.int + MAP_VAL_FIELD_NUMBER: builtins.int + MAP_LIST_VAL_FIELD_NUMBER: builtins.int bytes_val: builtins.bytes string_val: builtins.str int32_val: builtins.int @@ -144,6 +150,10 @@ class Value(google.protobuf.message.Message): @property def unix_timestamp_list_val(self) -> global___Int64List: ... null_val: global___Null.ValueType + @property + def map_val(self) -> global___Map: ... + @property + def map_list_val(self) -> global___MapList: ... def __init__( self, *, @@ -164,10 +174,12 @@ class Value(google.protobuf.message.Message): bool_list_val: global___BoolList | None = ..., unix_timestamp_list_val: global___Int64List | None = ..., null_val: global___Null.ValueType = ..., + map_val: global___Map | None = ..., + map_list_val: global___MapList | None = ..., ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["bool_list_val", b"bool_list_val", "bool_val", b"bool_val", "bytes_list_val", b"bytes_list_val", "bytes_val", b"bytes_val", "double_list_val", b"double_list_val", "double_val", b"double_val", "float_list_val", b"float_list_val", "float_val", b"float_val", "int32_list_val", b"int32_list_val", "int32_val", b"int32_val", "int64_list_val", b"int64_list_val", "int64_val", b"int64_val", "null_val", b"null_val", "string_list_val", b"string_list_val", "string_val", b"string_val", "unix_timestamp_list_val", b"unix_timestamp_list_val", "unix_timestamp_val", b"unix_timestamp_val", "val", b"val"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["bool_list_val", b"bool_list_val", "bool_val", b"bool_val", "bytes_list_val", b"bytes_list_val", "bytes_val", b"bytes_val", "double_list_val", b"double_list_val", "double_val", b"double_val", "float_list_val", b"float_list_val", "float_val", b"float_val", "int32_list_val", b"int32_list_val", "int32_val", b"int32_val", "int64_list_val", b"int64_list_val", "int64_val", b"int64_val", "null_val", b"null_val", "string_list_val", b"string_list_val", "string_val", b"string_val", "unix_timestamp_list_val", b"unix_timestamp_list_val", "unix_timestamp_val", b"unix_timestamp_val", "val", b"val"]) -> None: ... - def WhichOneof(self, oneof_group: typing_extensions.Literal["val", b"val"]) -> typing_extensions.Literal["bytes_val", "string_val", "int32_val", "int64_val", "double_val", "float_val", "bool_val", "unix_timestamp_val", "bytes_list_val", "string_list_val", "int32_list_val", "int64_list_val", "double_list_val", "float_list_val", "bool_list_val", "unix_timestamp_list_val", "null_val"] | None: ... + def HasField(self, field_name: typing_extensions.Literal["bool_list_val", b"bool_list_val", "bool_val", b"bool_val", "bytes_list_val", b"bytes_list_val", "bytes_val", b"bytes_val", "double_list_val", b"double_list_val", "double_val", b"double_val", "float_list_val", b"float_list_val", "float_val", b"float_val", "int32_list_val", b"int32_list_val", "int32_val", b"int32_val", "int64_list_val", b"int64_list_val", "int64_val", b"int64_val", "map_list_val", b"map_list_val", "map_val", b"map_val", "null_val", b"null_val", "string_list_val", b"string_list_val", "string_val", b"string_val", "unix_timestamp_list_val", b"unix_timestamp_list_val", "unix_timestamp_val", b"unix_timestamp_val", "val", b"val"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["bool_list_val", b"bool_list_val", "bool_val", b"bool_val", "bytes_list_val", b"bytes_list_val", "bytes_val", b"bytes_val", "double_list_val", b"double_list_val", "double_val", b"double_val", "float_list_val", b"float_list_val", "float_val", b"float_val", "int32_list_val", b"int32_list_val", "int32_val", b"int32_val", "int64_list_val", b"int64_list_val", "int64_val", b"int64_val", "map_list_val", b"map_list_val", "map_val", b"map_val", "null_val", b"null_val", "string_list_val", b"string_list_val", "string_val", b"string_val", "unix_timestamp_list_val", b"unix_timestamp_list_val", "unix_timestamp_val", b"unix_timestamp_val", "val", b"val"]) -> None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["val", b"val"]) -> typing_extensions.Literal["bytes_val", "string_val", "int32_val", "int64_val", "double_val", "float_val", "bool_val", "unix_timestamp_val", "bytes_list_val", "string_list_val", "int32_list_val", "int64_list_val", "double_list_val", "float_list_val", "bool_list_val", "unix_timestamp_list_val", "null_val", "map_val", "map_list_val"] | None: ... global___Value = Value @@ -276,6 +288,53 @@ class BoolList(google.protobuf.message.Message): global___BoolList = BoolList +class Map(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + class ValEntry(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + KEY_FIELD_NUMBER: builtins.int + VALUE_FIELD_NUMBER: builtins.int + key: builtins.str + @property + def value(self) -> global___Value: ... + def __init__( + self, + *, + key: builtins.str = ..., + value: global___Value | None = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["value", b"value"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"]) -> None: ... + + VAL_FIELD_NUMBER: builtins.int + @property + def val(self) -> google.protobuf.internal.containers.MessageMap[builtins.str, global___Value]: ... + def __init__( + self, + *, + val: collections.abc.Mapping[builtins.str, global___Value] | None = ..., + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["val", b"val"]) -> None: ... + +global___Map = Map + +class MapList(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + VAL_FIELD_NUMBER: builtins.int + @property + def val(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___Map]: ... + def __init__( + self, + *, + val: collections.abc.Iterable[global___Map] | None = ..., + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["val", b"val"]) -> None: ... + +global___MapList = MapList + class RepeatedValue(google.protobuf.message.Message): """This is to avoid an issue of being unable to specify `repeated value` in oneofs or maps In JSON "val" field can be omitted diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index ebf6f0eae19..10917150794 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -44,6 +44,8 @@ FloatList, Int32List, Int64List, + Map, + MapList, StringList, ) from feast.protos.feast.types.Value_pb2 import Value as ProtoValue @@ -74,6 +76,12 @@ def feast_value_type_to_python_type(field_value_proto: ProtoValue) -> Any: return None val = getattr(field_value_proto, val_attr) + # Handle Map and MapList types FIRST (before generic list processing) + if val_attr == "map_val": + return _handle_map_value(val) + elif val_attr == "map_list_val": + return _handle_map_list_value(val) + # If it's a _LIST type extract the list. if hasattr(val, "val"): val = list(val.val) @@ -98,6 +106,29 @@ def feast_value_type_to_python_type(field_value_proto: ProtoValue) -> Any: return val +def _handle_map_value(map_message) -> Dict[str, Any]: + """Handle Map proto message containing map val.""" + result = {} + + for key, value in map_message.val.items(): + # Recursively handle the Value message + result[key] = feast_value_type_to_python_type(value) + + return result + + +def _handle_map_list_value(map_list_message) -> List[Dict[str, Any]]: + """Handle MapList proto message containing repeated Map val.""" + result = [] + + for map_item in map_list_message.val: + # Handle each Map in the list + processed_map = _handle_map_value(map_item) + result.append(processed_map) + + return result + + def feast_value_type_to_pandas_type(value_type: ValueType) -> Any: value_type_to_pandas_type: Dict[ValueType, str] = { ValueType.FLOAT: "float", @@ -109,7 +140,7 @@ def feast_value_type_to_pandas_type(value_type: ValueType) -> Any: ValueType.BOOL: "bool", ValueType.UNIX_TIMESTAMP: "datetime64[ns]", } - if value_type.name.endswith("_LIST"): + if value_type.name == "MAP" or value_type.name.endswith("_LIST"): return "object" if value_type in value_type_to_pandas_type: return value_type_to_pandas_type[value_type] @@ -178,6 +209,9 @@ def python_type_to_feast_value_type( actual_type = type(value).__name__.lower() if actual_type == "str": return ValueType.STRING + # Check if it's a dictionary (could be a Map) + elif actual_type == "dict": + return ValueType.MAP # If it's a different type wrapped in object, try to infer from the value elif actual_type in type_map: return type_map[actual_type] @@ -187,6 +221,9 @@ def python_type_to_feast_value_type( return ValueType[item_type.name + "_LIST"] if isinstance(value, (list, np.ndarray)): + # Check if it's a list of maps + if value and isinstance(value[0], dict): + return ValueType.MAP_LIST # if the value's type is "ndarray" and we couldn't infer from "value.dtype" # this is most probably array of "object", # so we need to iterate over objects and try to infer type of each item @@ -222,6 +259,10 @@ def python_type_to_feast_value_type( return ValueType.UNKNOWN return ValueType[common_item_value_type.name + "_LIST"] + # Check if it's a dictionary (Map type) + if isinstance(value, dict): + return ValueType.MAP + raise ValueError( f"Value with native type {type_name} cannot be converted into Feast value type" ) @@ -372,6 +413,23 @@ def _python_value_to_proto_value( Returns: List of Feast Value Proto """ + # Handle Map and MapList types first + if feast_value_type == ValueType.MAP: + return [ + ProtoValue(map_val=_python_dict_to_map_proto(value)) + if value is not None + else ProtoValue() + for value in values + ] + + if feast_value_type == ValueType.MAP_LIST: + return [ + ProtoValue(map_list_val=_python_list_to_map_list_proto(value)) + if value is not None + else ProtoValue() + for value in values + ] + # ToDo: make a better sample for type checks (more than one element) sample = next(filter(_non_empty_value, values), None) # first not empty value @@ -512,6 +570,47 @@ def _python_value_to_proto_value( raise Exception(f"Unsupported data type: ${str(type(values[0]))}") +def _python_dict_to_map_proto(python_dict: Dict[str, Any]) -> Map: + """Convert a Python dictionary to a Map proto message.""" + map_proto = Map() + for key, value in python_dict.items(): + # Handle None values explicitly + if value is None: + map_proto.val[key].CopyFrom( + ProtoValue() + ) # Empty ProtoValue represents None + continue + + if isinstance(value, dict): + # Nested map + nested_map_proto = _python_dict_to_map_proto(value) + map_proto.val[key].CopyFrom(ProtoValue(map_val=nested_map_proto)) + elif isinstance(value, list) and value and isinstance(value[0], dict): + # List of maps (MapList) + map_list_proto = _python_list_to_map_list_proto(value) + map_proto.val[key].CopyFrom(ProtoValue(map_list_val=map_list_proto)) + else: + # Handle scalar values and regular lists + # Let python_values_to_proto_values infer the type + proto_values = python_values_to_proto_values([value], ValueType.UNKNOWN) + map_proto.val[key].CopyFrom(proto_values[0]) + return map_proto + + +def _python_list_to_map_list_proto(python_list: List[Dict[str, Any]]) -> MapList: + """Convert a Python list of dictionaries to a MapList proto message.""" + map_list_proto = MapList() + + for item in python_list: + if isinstance(item, dict): + map_proto = _python_dict_to_map_proto(item) + map_list_proto.val.append(map_proto) + else: + raise ValueError(f"MapList can only contain dictionaries, got {type(item)}") + + return map_list_proto + + def python_values_to_proto_values( values: List[Any], feature_type: ValueType = ValueType.UNKNOWN ) -> List[ProtoValue]: @@ -555,6 +654,8 @@ def python_values_to_proto_values( "string_list_val": ValueType.STRING_LIST, "bytes_list_val": ValueType.BYTES_LIST, "bool_list_val": ValueType.BOOL_LIST, + "map_val": ValueType.MAP, + "map_list_val": ValueType.MAP_LIST, } VALUE_TYPE_TO_PROTO_VALUE_MAP: Dict[ValueType, str] = { diff --git a/sdk/python/feast/types.py b/sdk/python/feast/types.py index ddc98115ad1..2dab0ba2b0a 100644 --- a/sdk/python/feast/types.py +++ b/sdk/python/feast/types.py @@ -32,6 +32,7 @@ "FLOAT32": "FLOAT", "BOOL": "BOOL", "UNIX_TIMESTAMP": "UNIX_TIMESTAMP", + "MAP": "MAP", } @@ -83,6 +84,7 @@ class PrimitiveFeastType(Enum): UNIX_TIMESTAMP = 8 PDF_BYTES = 9 IMAGE_BYTES = 10 + MAP = 11 def to_value_type(self) -> ValueType: """ @@ -115,6 +117,7 @@ def __hash__(self): Float32 = PrimitiveFeastType.FLOAT32 Float64 = PrimitiveFeastType.FLOAT64 UnixTimestamp = PrimitiveFeastType.UNIX_TIMESTAMP +Map = PrimitiveFeastType.MAP SUPPORTED_BASE_TYPES = [ Invalid, @@ -128,6 +131,7 @@ def __hash__(self): Float32, Float64, UnixTimestamp, + Map, ] PRIMITIVE_FEAST_TYPES_TO_STRING = { @@ -142,6 +146,7 @@ def __hash__(self): "FLOAT32": "Float32", "FLOAT64": "Float64", "UNIX_TIMESTAMP": "UnixTimestamp", + "MAP": "Map", } @@ -195,6 +200,8 @@ def __str__(self): ValueType.FLOAT_LIST: Array(Float32), ValueType.BOOL_LIST: Array(Bool), ValueType.UNIX_TIMESTAMP_LIST: Array(UnixTimestamp), + ValueType.MAP: Map, + ValueType.MAP_LIST: Array(Map), } FEAST_TYPES_TO_PYARROW_TYPES = { @@ -214,6 +221,7 @@ def __str__(self): ValueType.INT64_LIST, ValueType.FLOAT_LIST, ValueType.BOOL_LIST, + ValueType.MAP_LIST, ] for k in VALUE_TYPES_TO_FEAST_TYPES: if k in FEAST_VECTOR_TYPES: diff --git a/sdk/python/feast/value_type.py b/sdk/python/feast/value_type.py index 8cf748c9392..b3a1b35c248 100644 --- a/sdk/python/feast/value_type.py +++ b/sdk/python/feast/value_type.py @@ -48,8 +48,10 @@ class ValueType(enum.Enum): BOOL_LIST = 17 UNIX_TIMESTAMP_LIST = 18 NULL = 19 - PDF_BYTES = 20 - IMAGE_BYTES = 21 + MAP = 20 + MAP_LIST = 21 + PDF_BYTES = 22 + IMAGE_BYTES = 23 ListType = Union[ diff --git a/sdk/python/tests/unit/test_type_map.py b/sdk/python/tests/unit/test_type_map.py index be8a25c1639..945b9114f96 100644 --- a/sdk/python/tests/unit/test_type_map.py +++ b/sdk/python/tests/unit/test_type_map.py @@ -2,8 +2,12 @@ import pandas as pd import pytest +from feast.protos.feast.types.Value_pb2 import Map, MapList from feast.type_map import ( + _python_dict_to_map_proto, + _python_list_to_map_list_proto, feast_value_type_to_python_type, + python_type_to_feast_value_type, python_values_to_proto_values, ) from feast.value_type import ValueType @@ -94,3 +98,284 @@ def test_python_values_to_proto_values_int_list_with_null_not_supported(): arr = df["column"].to_numpy() with pytest.raises(TypeError): _ = python_values_to_proto_values(arr, ValueType.INT32_LIST) + + +class TestMapTypes: + """Test cases for MAP and MAP_LIST value types.""" + + def test_simple_map_conversion(self): + """Test basic MAP type conversion from Python dict to proto and back.""" + test_dict = {"key1": "value1", "key2": "value2", "key3": 123} + + protos = python_values_to_proto_values([test_dict], ValueType.MAP) + converted = feast_value_type_to_python_type(protos[0]) + + assert isinstance(converted, dict) + assert converted["key1"] == "value1" + assert converted["key2"] == "value2" + assert converted["key3"] == 123 + + def test_nested_map_conversion(self): + """Test nested MAP type conversion.""" + test_dict = { + "level1": { + "level2": {"key": "deep_value", "number": 42}, + "simple": "value", + }, + "top_level": "top_value", + } + + protos = python_values_to_proto_values([test_dict], ValueType.MAP) + converted = feast_value_type_to_python_type(protos[0]) + + assert isinstance(converted, dict) + assert converted["level1"]["level2"]["key"] == "deep_value" + assert converted["level1"]["level2"]["number"] == 42 + assert converted["level1"]["simple"] == "value" + assert converted["top_level"] == "top_value" + + def test_map_with_different_value_types(self): + """Test MAP with various value types.""" + test_dict = { + "string_val": "hello", + "int_val": 42, + "float_val": 3.14, + "bool_val": True, + "list_val": [1, 2, 3], + "string_list_val": ["a", "b", "c"], + } + + protos = python_values_to_proto_values([test_dict], ValueType.MAP) + converted = feast_value_type_to_python_type(protos[0]) + + assert converted["string_val"] == "hello" + assert converted["int_val"] == 42 + assert converted["float_val"] == 3.14 + assert converted["bool_val"] is True + assert converted["list_val"] == [1, 2, 3] + assert converted["string_list_val"] == ["a", "b", "c"] + + def test_map_with_none_values(self): + """Test MAP with None values.""" + test_dict = {"key1": "value1", "key2": None, "key3": "value3"} + + protos = python_values_to_proto_values([test_dict], ValueType.MAP) + converted = feast_value_type_to_python_type(protos[0]) + + assert converted["key1"] == "value1" + assert converted["key2"] is None + assert converted["key3"] == "value3" + + def test_empty_map(self): + """Test empty MAP conversion.""" + test_dict = {} + + protos = python_values_to_proto_values([test_dict], ValueType.MAP) + converted = feast_value_type_to_python_type(protos[0]) + + assert isinstance(converted, dict) + assert len(converted) == 0 + + def test_null_map(self): + """Test None MAP conversion.""" + protos = python_values_to_proto_values([None], ValueType.MAP) + converted = feast_value_type_to_python_type(protos[0]) + + assert converted is None + + def test_map_list_conversion(self): + """Test basic MAP_LIST type conversion.""" + test_list = [ + {"name": "John", "age": 30}, + {"name": "Jane", "age": 25}, + {"name": "Bob", "score": 85.5}, + ] + + protos = python_values_to_proto_values([test_list], ValueType.MAP_LIST) + converted = feast_value_type_to_python_type(protos[0]) + + assert isinstance(converted, list) + assert len(converted) == 3 + assert converted[0]["name"] == "John" + assert converted[0]["age"] == 30 + assert converted[1]["name"] == "Jane" + assert converted[1]["age"] == 25 + assert converted[2]["name"] == "Bob" + assert converted[2]["score"] == 85.5 + + def test_map_list_with_nested_maps(self): + """Test MAP_LIST with nested maps.""" + test_list = [ + {"user": {"name": "John", "details": {"city": "NYC"}}, "score": 100}, + {"user": {"name": "Jane", "details": {"city": "SF"}}, "score": 95}, + ] + + protos = python_values_to_proto_values([test_list], ValueType.MAP_LIST) + converted = feast_value_type_to_python_type(protos[0]) + + assert len(converted) == 2 + assert converted[0]["user"]["name"] == "John" + assert converted[0]["user"]["details"]["city"] == "NYC" + assert converted[1]["user"]["name"] == "Jane" + assert converted[1]["user"]["details"]["city"] == "SF" + + def test_map_list_with_lists_in_maps(self): + """Test MAP_LIST where maps contain lists.""" + test_list = [ + {"name": "John", "hobbies": ["reading", "swimming"]}, + {"name": "Jane", "scores": [95, 87, 92]}, + ] + + protos = python_values_to_proto_values([test_list], ValueType.MAP_LIST) + converted = feast_value_type_to_python_type(protos[0]) + + assert converted[0]["name"] == "John" + assert converted[0]["hobbies"] == ["reading", "swimming"] + assert converted[1]["name"] == "Jane" + assert converted[1]["scores"] == [95, 87, 92] + + def test_empty_map_list(self): + """Test empty MAP_LIST conversion.""" + test_list = [] + + protos = python_values_to_proto_values([test_list], ValueType.MAP_LIST) + converted = feast_value_type_to_python_type(protos[0]) + + assert isinstance(converted, list) + assert len(converted) == 0 + + def test_null_map_list(self): + """Test None MAP_LIST conversion.""" + protos = python_values_to_proto_values([None], ValueType.MAP_LIST) + converted = feast_value_type_to_python_type(protos[0]) + + assert converted is None + + def test_map_list_with_empty_maps(self): + """Test MAP_LIST containing empty maps.""" + test_list = [{}, {"key": "value"}, {}] + + protos = python_values_to_proto_values([test_list], ValueType.MAP_LIST) + converted = feast_value_type_to_python_type(protos[0]) + + assert len(converted) == 3 + assert len(converted[0]) == 0 + assert converted[1]["key"] == "value" + assert len(converted[2]) == 0 + + def test_python_type_inference_for_map(self): + """Test that dictionaries are correctly inferred as MAP type.""" + test_dict = {"key1": "value1", "key2": 123} + + inferred_type = python_type_to_feast_value_type("test_field", test_dict) + + assert inferred_type == ValueType.MAP + + def test_python_type_inference_for_map_list(self): + """Test that list of dictionaries is correctly inferred as MAP_LIST type.""" + test_list = [{"key1": "value1"}, {"key2": "value2"}] + + inferred_type = python_type_to_feast_value_type("test_field", test_list) + + assert inferred_type == ValueType.MAP_LIST + + def test_multiple_map_values(self): + """Test conversion of multiple MAP values.""" + test_dicts = [ + {"name": "Alice", "age": 30}, + {"name": "Bob", "age": 25}, + {"name": "Charlie", "city": "NYC"}, + ] + + protos = python_values_to_proto_values(test_dicts, ValueType.MAP) + converted_values = [feast_value_type_to_python_type(proto) for proto in protos] + + assert len(converted_values) == 3 + assert converted_values[0]["name"] == "Alice" + assert converted_values[1]["name"] == "Bob" + assert converted_values[2]["city"] == "NYC" + + def test_multiple_map_list_values(self): + """Test conversion of multiple MAP_LIST values.""" + test_lists = [[{"id": 1}, {"id": 2}], [{"id": 3}, {"id": 4}], []] + + protos = python_values_to_proto_values(test_lists, ValueType.MAP_LIST) + converted_values = [feast_value_type_to_python_type(proto) for proto in protos] + + assert len(converted_values) == 3 + assert len(converted_values[0]) == 2 + assert converted_values[0][0]["id"] == 1 + assert len(converted_values[2]) == 0 + + def test_map_with_map_list_value(self): + """Test MAP containing MAP_LIST as a value.""" + test_dict = { + "metadata": {"version": "1.0"}, + "items": [{"name": "item1", "count": 5}, {"name": "item2", "count": 3}], + } + + protos = python_values_to_proto_values([test_dict], ValueType.MAP) + converted = feast_value_type_to_python_type(protos[0]) + + assert converted["metadata"]["version"] == "1.0" + assert len(converted["items"]) == 2 + assert converted["items"][0]["name"] == "item1" + assert converted["items"][1]["count"] == 3 + + @pytest.mark.parametrize( + "invalid_value", + [ + [{"key": "value"}, "not_a_dict", {"another": "dict"}], + ["string1", "string2"], + [1, 2, 3], + ], + ) + def test_map_list_with_invalid_items(self, invalid_value): + """Test that MAP_LIST with non-dict items raises appropriate errors.""" + with pytest.raises((ValueError, TypeError)): + python_values_to_proto_values([invalid_value], ValueType.MAP_LIST) + + def test_direct_proto_construction(self): + """Test direct construction of Map and MapList proto messages.""" + # Test Map proto construction + test_dict = {"key1": "value1", "key2": 42} + map_proto = _python_dict_to_map_proto(test_dict) + + assert isinstance(map_proto, Map) + assert len(map_proto.val) == 2 + + # Test MapList proto construction + test_list = [{"a": 1}, {"b": 2}] + map_list_proto = _python_list_to_map_list_proto(test_list) + + assert isinstance(map_list_proto, MapList) + assert len(map_list_proto.val) == 2 + + def test_roundtrip_conversion_consistency(self): + """Test that roundtrip conversion maintains data integrity.""" + original_map = { + "string": "hello", + "integer": 42, + "float": 3.14159, + "boolean": True, + "nested": {"inner_string": "world", "inner_list": [1, 2, 3]}, + "list_of_maps": [{"item": "first"}, {"item": "second"}], + } + + # Convert to proto and back + protos = python_values_to_proto_values([original_map], ValueType.MAP) + converted = feast_value_type_to_python_type(protos[0]) + + # Verify all data is preserved + assert converted["string"] == original_map["string"] + assert converted["integer"] == original_map["integer"] + assert converted["float"] == original_map["float"] + assert converted["boolean"] == original_map["boolean"] + assert ( + converted["nested"]["inner_string"] + == original_map["nested"]["inner_string"] + ) + assert converted["nested"]["inner_list"] == original_map["nested"]["inner_list"] + assert len(converted["list_of_maps"]) == len(original_map["list_of_maps"]) + assert converted["list_of_maps"][0]["item"] == "first" + assert converted["list_of_maps"][1]["item"] == "second"