diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index 0dcb861db7c..87816982abd 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -160,8 +160,10 @@ driver_stats_source = FileSource( # three feature column. Here we define a Feature View that will allow us to serve this # data to our model online. driver_stats_fv = FeatureView( - # The unique name of this feature view. Two feature views in a single - # project cannot have the same name +# The unique name of this feature view. Two feature views in a single +# project cannot have the same name, and names must be unique across +# all feature view types (regular, stream, on-demand) to avoid conflicts +# during `feast apply`. name="driver_hourly_stats", entities=[driver], ttl=timedelta(days=1), diff --git a/sdk/python/feast/errors.py b/sdk/python/feast/errors.py index 8c72422f44e..53895344d3b 100644 --- a/sdk/python/feast/errors.py +++ b/sdk/python/feast/errors.py @@ -411,10 +411,32 @@ def __init__(self, entity_type: type): class ConflictingFeatureViewNames(FeastError): # TODO: print file location of conflicting feature views - def __init__(self, feature_view_name: str): - super().__init__( - f"The feature view name: {feature_view_name} refers to feature views of different types." - ) + def __init__( + self, + feature_view_name: str, + existing_type: Optional[str] = None, + new_type: Optional[str] = None, + ): + if existing_type and new_type: + if existing_type == new_type: + # Same-type duplicate + super().__init__( + f"Multiple {existing_type}s with name '{feature_view_name}' found. " + f"Feature view names must be case-insensitively unique. " + f"It may be necessary to ignore certain files in your feature " + f"repository by using a .feastignore file." + ) + else: + # Cross-type conflict + super().__init__( + f"Feature view name '{feature_view_name}' is already used by a {existing_type}. " + f"Cannot register a {new_type} with the same name. " + f"Feature view names must be unique across FeatureView, StreamFeatureView, and OnDemandFeatureView." + ) + else: + super().__init__( + f"The feature view name: {feature_view_name} refers to feature views of different types." + ) class FeastInvalidInfraObjectType(FeastError): diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 36ff2c9d4eb..72bd93cc522 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -58,6 +58,7 @@ from feast.dqm.errors import ValidationFailed from feast.entity import Entity from feast.errors import ( + ConflictingFeatureViewNames, DataFrameSerializationError, DataSourceRepeatNamesException, FeatureViewNotFoundException, @@ -3255,18 +3256,25 @@ def _print_materialization_log( def _validate_feature_views(feature_views: List[BaseFeatureView]): - """Verify feature views have case-insensitively unique names""" - fv_names = set() + """Verify feature views have case-insensitively unique names across all types. + + This validates that no two feature views (of any type: FeatureView, + StreamFeatureView, OnDemandFeatureView) share the same case-insensitive name. + This is critical because get_online_features uses get_any_feature_view which + resolves names in a fixed order, potentially returning the wrong feature view. + """ + fv_by_name: Dict[str, BaseFeatureView] = {} for fv in feature_views: case_insensitive_fv_name = fv.name.lower() - if case_insensitive_fv_name in fv_names: - raise ValueError( - f"More than one feature view with name {case_insensitive_fv_name} found. " - f"Please ensure that all feature view names are case-insensitively unique. " - f"It may be necessary to ignore certain files in your feature repository by using a .feastignore file." + if case_insensitive_fv_name in fv_by_name: + existing_fv = fv_by_name[case_insensitive_fv_name] + raise ConflictingFeatureViewNames( + fv.name, + existing_type=type(existing_fv).__name__, + new_type=type(fv).__name__, ) else: - fv_names.add(case_insensitive_fv_name) + fv_by_name[case_insensitive_fv_name] = fv def _validate_data_sources(data_sources: List[DataSource]): diff --git a/sdk/python/feast/infra/registry/base_registry.py b/sdk/python/feast/infra/registry/base_registry.py index 24e9f36fbd2..c4bf1f5979c 100644 --- a/sdk/python/feast/infra/registry/base_registry.py +++ b/sdk/python/feast/infra/registry/base_registry.py @@ -24,6 +24,10 @@ from feast.base_feature_view import BaseFeatureView from feast.data_source import DataSource from feast.entity import Entity +from feast.errors import ( + ConflictingFeatureViewNames, + FeatureViewNotFoundException, +) from feast.feature_service import FeatureService from feast.feature_view import FeatureView from feast.infra.infra_object import Infra @@ -263,6 +267,61 @@ def apply_feature_view( """ raise NotImplementedError + def _ensure_feature_view_name_is_unique( + self, + feature_view: BaseFeatureView, + project: str, + allow_cache: bool = False, + ): + """ + Validates that no feature view name conflict exists across feature view types. + Raises ConflictingFeatureViewNames if a different type already uses the name. + + This is a defense-in-depth check for direct apply_feature_view() calls. + The primary validation happens in _validate_all_feature_views() during feast plan/apply. + """ + name = feature_view.name + new_type = type(feature_view).__name__ + + def _check_conflict(getter, not_found_exc, existing_type: str): + try: + getter(name, project, allow_cache=allow_cache) + raise ConflictingFeatureViewNames(name, existing_type, new_type) + except not_found_exc: + pass + + # Check StreamFeatureView before FeatureView since StreamFeatureView is a subclass + # Note: All getters raise FeatureViewNotFoundException (not type-specific exceptions) + if isinstance(feature_view, StreamFeatureView): + _check_conflict( + self.get_feature_view, FeatureViewNotFoundException, "FeatureView" + ) + _check_conflict( + self.get_on_demand_feature_view, + FeatureViewNotFoundException, + "OnDemandFeatureView", + ) + elif isinstance(feature_view, FeatureView): + _check_conflict( + self.get_stream_feature_view, + FeatureViewNotFoundException, + "StreamFeatureView", + ) + _check_conflict( + self.get_on_demand_feature_view, + FeatureViewNotFoundException, + "OnDemandFeatureView", + ) + elif isinstance(feature_view, OnDemandFeatureView): + _check_conflict( + self.get_feature_view, FeatureViewNotFoundException, "FeatureView" + ) + _check_conflict( + self.get_stream_feature_view, + FeatureViewNotFoundException, + "StreamFeatureView", + ) + @abstractmethod def delete_feature_view(self, name: str, project: str, commit: bool = True): """ diff --git a/sdk/python/feast/infra/registry/sql.py b/sdk/python/feast/infra/registry/sql.py index 70bd3c64392..197ca02d57a 100644 --- a/sdk/python/feast/infra/registry/sql.py +++ b/sdk/python/feast/infra/registry/sql.py @@ -578,6 +578,7 @@ def apply_data_source( def apply_feature_view( self, feature_view: BaseFeatureView, project: str, commit: bool = True ): + self._ensure_feature_view_name_is_unique(feature_view, project) fv_table = self._infer_fv_table(feature_view) return self._apply_object( diff --git a/sdk/python/tests/integration/registration/test_feature_store.py b/sdk/python/tests/integration/registration/test_feature_store.py index b59af900190..93334fbda19 100644 --- a/sdk/python/tests/integration/registration/test_feature_store.py +++ b/sdk/python/tests/integration/registration/test_feature_store.py @@ -14,15 +14,23 @@ from datetime import timedelta from tempfile import mkstemp +import pandas as pd import pytest from pytest_lazyfixture import lazy_fixture +from feast import FileSource +from feast.data_format import AvroFormat +from feast.data_source import KafkaSource from feast.entity import Entity -from feast.feature_store import FeatureStore +from feast.errors import ConflictingFeatureViewNames +from feast.feature_store import FeatureStore, _validate_feature_views from feast.feature_view import FeatureView +from feast.field import Field from feast.infra.online_stores.sqlite import SqliteOnlineStoreConfig +from feast.on_demand_feature_view import on_demand_feature_view from feast.repo_config import RepoConfig -from feast.types import Float64, Int64, String +from feast.stream_feature_view import StreamFeatureView +from feast.types import Float32, Float64, Int64, String from tests.utils.data_source_test_creator import prep_file_source @@ -75,3 +83,146 @@ def feature_store_with_local_registry(): entity_key_serialization_version=3, ) ) + + +@pytest.mark.integration +def test_validate_feature_views_cross_type_conflict(): + """ + Test that _validate_feature_views() catches cross-type name conflicts. + + This is a unit test for the validation that happens during feast plan/apply. + The validation must catch conflicts across FeatureView, StreamFeatureView, + and OnDemandFeatureView to prevent silent data correctness bugs in + get_online_features (which uses fixed-order lookup). + + See: https://github.com/feast-dev/feast/issues/5995 + """ + # Create a simple entity + entity = Entity(name="driver_entity", join_keys=["test_key"]) + + # Create a regular FeatureView + file_source = FileSource(name="my_file_source", path="test.parquet") + feature_view = FeatureView( + name="my_feature_view", + entities=[entity], + schema=[Field(name="feature1", dtype=Float32)], + source=file_source, + ) + + # Create a StreamFeatureView with the SAME name + stream_source = KafkaSource( + name="kafka", + timestamp_field="event_timestamp", + kafka_bootstrap_servers="", + message_format=AvroFormat(""), + topic="topic", + batch_source=file_source, + watermark_delay_threshold=timedelta(days=1), + ) + stream_feature_view = StreamFeatureView( + name="my_feature_view", # Same name as FeatureView! + entities=[entity], + ttl=timedelta(days=30), + schema=[Field(name="feature1", dtype=Float32)], + source=stream_source, + ) + + # Validate should raise ConflictingFeatureViewNames + with pytest.raises(ConflictingFeatureViewNames) as exc_info: + _validate_feature_views([feature_view, stream_feature_view]) + + # Verify error message contains type information + error_message = str(exc_info.value) + assert "my_feature_view" in error_message + assert "FeatureView" in error_message + assert "StreamFeatureView" in error_message + + +def test_validate_feature_views_same_type_conflict(): + """ + Test that _validate_feature_views() also catches same-type name conflicts + with a proper error message indicating duplicate FeatureViews. + """ + # Create a simple entity + entity = Entity(name="driver_entity", join_keys=["test_key"]) + + # Create two FeatureViews with the same name + file_source = FileSource(name="my_file_source", path="test.parquet") + fv1 = FeatureView( + name="duplicate_fv", + entities=[entity], + schema=[Field(name="feature1", dtype=Float32)], + source=file_source, + ) + fv2 = FeatureView( + name="duplicate_fv", # Same name! + entities=[entity], + schema=[Field(name="feature2", dtype=Float32)], + source=file_source, + ) + + # Validate should raise ConflictingFeatureViewNames + with pytest.raises(ConflictingFeatureViewNames) as exc_info: + _validate_feature_views([fv1, fv2]) + + # Verify error message indicates same-type duplicate + error_message = str(exc_info.value) + assert "duplicate_fv" in error_message + assert "Multiple FeatureViews" in error_message + assert "case-insensitively unique" in error_message + + +def test_validate_feature_views_case_insensitive(): + """ + Test that _validate_feature_views() catches case-insensitive conflicts. + """ + entity = Entity(name="driver_entity", join_keys=["test_key"]) + file_source = FileSource(name="my_file_source", path="test.parquet") + + fv1 = FeatureView( + name="MyFeatureView", + entities=[entity], + schema=[Field(name="feature1", dtype=Float32)], + source=file_source, + ) + fv2 = FeatureView( + name="myfeatureview", # Same name, different case! + entities=[entity], + schema=[Field(name="feature2", dtype=Float32)], + source=file_source, + ) + + # Validate should raise ConflictingFeatureViewNames (case-insensitive) + with pytest.raises(ConflictingFeatureViewNames): + _validate_feature_views([fv1, fv2]) + + +def test_validate_feature_views_odfv_conflict(): + """ + Test that _validate_feature_views() catches OnDemandFeatureView name conflicts. + """ + entity = Entity(name="driver_entity", join_keys=["test_key"]) + file_source = FileSource(name="my_file_source", path="test.parquet") + + fv = FeatureView( + name="shared_name", + entities=[entity], + schema=[Field(name="feature1", dtype=Float32)], + source=file_source, + ) + + @on_demand_feature_view( + sources=[fv], + schema=[Field(name="output", dtype=Float32)], + ) + def shared_name(inputs: pd.DataFrame) -> pd.DataFrame: + return pd.DataFrame({"output": inputs["feature1"] * 2}) + + # Validate should raise ConflictingFeatureViewNames + with pytest.raises(ConflictingFeatureViewNames) as exc_info: + _validate_feature_views([fv, shared_name]) + + error_message = str(exc_info.value) + assert "shared_name" in error_message + assert "FeatureView" in error_message + assert "OnDemandFeatureView" in error_message diff --git a/sdk/python/tests/integration/registration/test_universal_registry.py b/sdk/python/tests/integration/registration/test_universal_registry.py index 97c767171eb..1c036367895 100644 --- a/sdk/python/tests/integration/registration/test_universal_registry.py +++ b/sdk/python/tests/integration/registration/test_universal_registry.py @@ -35,7 +35,7 @@ from feast.data_format import AvroFormat, ParquetFormat from feast.data_source import KafkaSource from feast.entity import Entity -from feast.errors import FeatureViewNotFoundException +from feast.errors import ConflictingFeatureViewNames, FeatureViewNotFoundException from feast.feature_view import FeatureView from feast.field import Field from feast.infra.infra_object import Infra @@ -2001,3 +2001,119 @@ def test_commit_for_read_only_user(): assert len(entities) == 1 write_registry.teardown() + + +@pytest.mark.integration +@pytest.mark.parametrize( + "test_registry", + # mock_remote_registry excluded: the mock gRPC channel does not propagate + # server-side errors, so ConflictingFeatureViewNames is not raised client-side. + [f for f in all_fixtures if "mock_remote" not in str(f)], +) +def test_cross_type_feature_view_name_conflict(test_registry: BaseRegistry): + """ + Test that feature view names must be unique across all feature view types. + + This validates the fix for feast-dev/feast#5995: If a FeatureView and + StreamFeatureView share the same name, get_online_features would silently + return the wrong one (fixed order lookup). This test ensures such conflicts + are caught during registration. + """ + project = "project" + + # Create a simple entity + entity = Entity(name="driver_entity", join_keys=["test_key"]) + + # Create a regular FeatureView + file_source = FileSource(name="my_file_source", path="test.parquet") + feature_view = FeatureView( + name="shared_feature_view_name", + entities=[entity], + schema=[Field(name="feature1", dtype=Float32)], + source=file_source, + ) + + # Create a StreamFeatureView with the SAME name + stream_source = KafkaSource( + name="kafka", + timestamp_field="event_timestamp", + kafka_bootstrap_servers="", + message_format=AvroFormat(""), + topic="topic", + batch_source=FileSource(path="some path"), + watermark_delay_threshold=timedelta(days=1), + ) + + def simple_udf(x: int): + return x + 3 + + stream_feature_view = StreamFeatureView( + name="shared_feature_view_name", # Same name as FeatureView! + entities=[entity], + ttl=timedelta(days=30), + schema=[Field(name="feature1", dtype=Float32)], + source=stream_source, + udf=simple_udf, + ) + + # Register the regular FeatureView first - should succeed + test_registry.apply_feature_view(feature_view, project) + + # Attempt to register StreamFeatureView with same name - should fail + with pytest.raises(ConflictingFeatureViewNames) as exc_info: + test_registry.apply_feature_view(stream_feature_view, project) + + # Verify error message contains the conflicting types + error_message = str(exc_info.value) + assert "shared_feature_view_name" in error_message + + # Cleanup + test_registry.delete_feature_view("shared_feature_view_name", project) + test_registry.teardown() + + +@pytest.mark.integration +@pytest.mark.parametrize( + "test_registry", + [f for f in all_fixtures if "mock_remote" not in str(f)], +) +def test_cross_type_feature_view_odfv_conflict(test_registry: BaseRegistry): + """ + Test that OnDemandFeatureView names must be unique across all feature view types. + """ + project = "project" + + # Create a simple entity + entity = Entity(name="driver_entity", join_keys=["test_key"]) + + # Create a regular FeatureView + file_source = FileSource(name="my_file_source", path="test.parquet") + feature_view = FeatureView( + name="shared_odfv_name", + entities=[entity], + schema=[Field(name="feature1", dtype=Float32)], + source=file_source, + ) + + # Create an OnDemandFeatureView with the SAME name + @on_demand_feature_view( + sources=[feature_view], + schema=[Field(name="output", dtype=Float32)], + ) + def shared_odfv_name(inputs: pd.DataFrame) -> pd.DataFrame: + return pd.DataFrame({"output": inputs["feature1"] * 2}) + + # Register the regular FeatureView first - should succeed + test_registry.apply_feature_view(feature_view, project) + + # Attempt to register OnDemandFeatureView with same name - should fail + with pytest.raises(ConflictingFeatureViewNames) as exc_info: + test_registry.apply_feature_view(shared_odfv_name, project) + + # Verify error message contains the conflicting types + error_message = str(exc_info.value) + assert "shared_odfv_name" in error_message + + # Cleanup + test_registry.delete_feature_view("shared_odfv_name", project) + test_registry.teardown() diff --git a/sdk/python/tests/unit/cli/test_cli_apply_duplicates.py b/sdk/python/tests/unit/cli/test_cli_apply_duplicates.py index b3e350fe73c..cf5b64dbd20 100644 --- a/sdk/python/tests/unit/cli/test_cli_apply_duplicates.py +++ b/sdk/python/tests/unit/cli/test_cli_apply_duplicates.py @@ -8,7 +8,7 @@ def test_cli_apply_duplicated_featureview_names() -> None: run_simple_apply_test( example_repo_file_name="example_feature_repo_with_duplicated_featureview_names.py", - expected_error=b"Please ensure that all feature view names are case-insensitively unique", + expected_error=b"Feature view names must be case-insensitively unique", ) @@ -152,9 +152,7 @@ def test_cli_apply_imported_featureview_with_duplication() -> None: rc, output = runner.run_with_output(["apply"], cwd=repo_path) assert rc != 0 - assert ( - b"More than one feature view with name driver_hourly_stats found." in output - ) + assert b"Multiple FeatureViews with name 'driver_hourly_stats' found." in output def test_cli_apply_duplicated_featureview_names_multiple_py_files() -> None: @@ -195,6 +193,5 @@ def test_cli_apply_duplicated_featureview_names_multiple_py_files() -> None: assert ( rc != 0 - and b"Please ensure that all feature view names are case-insensitively unique" - in output + and b"Feature view names must be case-insensitively unique" in output ) diff --git a/sdk/python/tests/unit/infra/registry/test_sql_registry.py b/sdk/python/tests/unit/infra/registry/test_sql_registry.py index 8e5154da47b..5f144adbaf4 100644 --- a/sdk/python/tests/unit/infra/registry/test_sql_registry.py +++ b/sdk/python/tests/unit/infra/registry/test_sql_registry.py @@ -13,11 +13,20 @@ # limitations under the License. import tempfile +from datetime import timedelta import pytest +from feast import Field +from feast.data_source import PushSource from feast.entity import Entity +from feast.errors import ConflictingFeatureViewNames +from feast.feature_view import FeatureView +from feast.infra.offline_stores.file_source import FileSource from feast.infra.registry.sql import SqlRegistry, SqlRegistryConfig +from feast.stream_feature_view import StreamFeatureView +from feast.types import Float32 +from feast.value_type import ValueType @pytest.fixture @@ -56,3 +65,43 @@ def test_sql_registry(sqlite_registry): sqlite_registry.delete_entity("test_entity", "test_project") with pytest.raises(Exception): sqlite_registry.get_entity("test_entity", "test_project") + + +def _build_feature_view(name: str, entity: Entity, source: FileSource) -> FeatureView: + return FeatureView( + name=name, + entities=[entity], + ttl=timedelta(days=1), + schema=[Field(name="conv_rate", dtype=Float32)], + source=source, + ) + + +def test_feature_view_name_conflict_between_stream_and_batch(sqlite_registry): + entity = Entity( + name="driver", + value_type=ValueType.STRING, + join_keys=["driver_id"], + ) + sqlite_registry.apply_entity(entity, "test_project") + + file_source = FileSource( + path="driver_stats.parquet", + timestamp_field="event_timestamp", + created_timestamp_column="created", + ) + + batch_view = _build_feature_view("driver_activity", entity, file_source) + sqlite_registry.apply_feature_view(batch_view, "test_project") + + push_source = PushSource(name="driver_push", batch_source=file_source) + stream_view = StreamFeatureView( + name="driver_activity", + source=push_source, + entities=[entity], + schema=[Field(name="conv_rate", dtype=Float32)], + timestamp_field="event_timestamp", + ) + + with pytest.raises(ConflictingFeatureViewNames): + sqlite_registry.apply_feature_view(stream_view, "test_project") diff --git a/sdk/python/tests/unit/local_feast_tests/test_local_feature_store.py b/sdk/python/tests/unit/local_feast_tests/test_local_feature_store.py index b9062336bc2..9b7660bf692 100644 --- a/sdk/python/tests/unit/local_feast_tests/test_local_feature_store.py +++ b/sdk/python/tests/unit/local_feast_tests/test_local_feature_store.py @@ -11,6 +11,7 @@ from feast.data_format import AvroFormat, ParquetFormat from feast.data_source import KafkaSource from feast.entity import Entity +from feast.errors import ConflictingFeatureViewNames from feast.feast_object import ALL_RESOURCE_TYPES from feast.feature_store import FeatureStore from feast.feature_view import DUMMY_ENTITY_ID, DUMMY_ENTITY_NAME, FeatureView @@ -534,16 +535,10 @@ def test_apply_conflicting_feature_view_names(feature_store_with_local_registry) source=FileSource(path="customer_stats.parquet"), tags={}, ) - try: + with pytest.raises(ConflictingFeatureViewNames) as exc_info: feature_store_with_local_registry.apply([driver_stats, customer_stats]) - error = None - except ValueError as e: - error = e - assert ( - isinstance(error, ValueError) - and "Please ensure that all feature view names are case-insensitively unique" - in error.args[0] - ) + + assert "Feature view names must be case-insensitively unique" in str(exc_info.value) feature_store_with_local_registry.teardown()