Skip to content

Commit 407cffc

Browse files
committed
fix: Infer entity value types from dbt column types
When creating entities from dbt models with integer columns, entities were being created with default ValueType.STRING, causing validation errors: "Entity X has type ValueType.STRING, which does not match the inferred type Int64" Solution: Added mapping dict and helper function for clean type conversion: - FEAST_TYPE_TO_VALUE_TYPE: Maps FeastType to ValueType - feast_type_to_value_type(): Helper function for conversion - _infer_entity_value_type(): Method in DbtToFeastMapper class This replaces verbose if-else chains with a clean dictionary lookup pattern. Also corrects schema generation to include entity columns, as FeatureView.__init__ expects to extract entity columns from the schema itself (lines 216-234 in feature_view.py). Changes: - feast/cli/dbt_import.py: Use mapper's _infer_entity_value_type() method - feast/dbt/mapper.py: Add type mapping dict and helper method Signed-off-by: yassinnouh21 <yassinnouh21@gmail.com>
1 parent be2eb9e commit 407cffc

File tree

2 files changed

+45
-5
lines changed

2 files changed

+45
-5
lines changed

sdk/python/feast/cli/dbt_import.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,9 +225,12 @@ def import_command(
225225
model_entities: List[Any] = []
226226
for entity_col in entity_cols:
227227
if entity_col not in entities_created:
228+
# Use mapper's internal method for value type inference
229+
entity_value_type = mapper._infer_entity_value_type(model, entity_col)
228230
entity = mapper.create_entity(
229231
name=entity_col,
230232
description="Entity key for dbt models",
233+
value_type=entity_value_type,
231234
)
232235
entities_created[entity_col] = entity
233236
all_objects.append(entity)

sdk/python/feast/dbt/mapper.py

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,24 @@
2626
)
2727
from feast.value_type import ValueType
2828

29+
# Mapping from FeastType to ValueType for entity value inference
30+
FEAST_TYPE_TO_VALUE_TYPE: Dict[FeastType, ValueType] = {
31+
String: ValueType.STRING,
32+
Int32: ValueType.INT64,
33+
Int64: ValueType.INT64,
34+
Float32: ValueType.DOUBLE,
35+
Float64: ValueType.DOUBLE,
36+
Bool: ValueType.BOOL,
37+
Bytes: ValueType.BYTES,
38+
UnixTimestamp: ValueType.UNIX_TIMESTAMP,
39+
}
40+
41+
42+
def feast_type_to_value_type(feast_type: FeastType) -> ValueType:
43+
"""Convert a FeastType to its corresponding ValueType for entities."""
44+
return FEAST_TYPE_TO_VALUE_TYPE.get(feast_type, ValueType.STRING)
45+
46+
2947
# Comprehensive mapping from dbt/warehouse types to Feast types
3048
# Covers BigQuery, Snowflake, Redshift, PostgreSQL, and common SQL types
3149
DBT_TO_FEAST_TYPE_MAP: Dict[str, FeastType] = {
@@ -180,6 +198,14 @@ def __init__(
180198
self.timestamp_field = timestamp_field
181199
self.ttl_days = ttl_days
182200

201+
def _infer_entity_value_type(self, model: DbtModel, entity_col: str) -> ValueType:
202+
"""Infer entity ValueType from dbt model column type."""
203+
for column in model.columns:
204+
if column.name == entity_col:
205+
feast_type = map_dbt_type_to_feast_type(column.data_type)
206+
return feast_type_to_value_type(feast_type)
207+
return ValueType.UNKNOWN
208+
183209
def create_data_source(
184210
self,
185211
model: DbtModel,
@@ -310,7 +336,9 @@ def create_feature_view(
310336
"""
311337
# Normalize to lists
312338
entity_cols: List[str] = (
313-
[entity_columns] if isinstance(entity_columns, str) else list(entity_columns)
339+
[entity_columns]
340+
if isinstance(entity_columns, str)
341+
else list(entity_columns)
314342
)
315343

316344
entity_objs: List[Entity] = []
@@ -330,12 +358,14 @@ def create_feature_view(
330358
ts_field = timestamp_field or self.timestamp_field
331359
ttl = timedelta(days=ttl_days if ttl_days is not None else self.ttl_days)
332360

333-
# Columns to exclude from features (all entity columns + timestamp)
334-
excluded = set(entity_cols) | {ts_field}
361+
# Columns to exclude from schema (timestamp + any explicitly excluded)
362+
# Note: entity columns should NOT be excluded - FeatureView.__init__
363+
# expects entity columns to be in the schema and will extract them
364+
excluded = {ts_field}
335365
if exclude_columns:
336366
excluded.update(exclude_columns)
337367

338-
# Create schema from model columns
368+
# Create schema from model columns (includes entity columns)
339369
schema: List[Field] = []
340370
for column in model.columns:
341371
if column.name not in excluded:
@@ -352,9 +382,12 @@ def create_feature_view(
352382
if not entity_objs:
353383
entity_objs = []
354384
for entity_col in entity_cols:
385+
# Infer entity value type from model column
386+
entity_value_type = self._infer_entity_value_type(model, entity_col)
355387
ent = self.create_entity(
356388
name=entity_col,
357389
description=f"Entity for {model.name}",
390+
value_type=entity_value_type,
358391
)
359392
entity_objs.append(ent)
360393

@@ -405,16 +438,20 @@ def create_all_from_model(
405438
"""
406439
# Normalize to list
407440
entity_cols: List[str] = (
408-
[entity_columns] if isinstance(entity_columns, str) else list(entity_columns)
441+
[entity_columns]
442+
if isinstance(entity_columns, str)
443+
else list(entity_columns)
409444
)
410445

411446
# Create entities (plural)
412447
entities_list = []
413448
for entity_col in entity_cols:
449+
entity_value_type = self._infer_entity_value_type(model, entity_col)
414450
entity = self.create_entity(
415451
name=entity_col,
416452
description=f"Entity for {model.name}",
417453
tags={"dbt.model": model.name},
454+
value_type=entity_value_type,
418455
)
419456
entities_list.append(entity)
420457

0 commit comments

Comments
 (0)