Skip to content

Commit 7ebcf03

Browse files
authored
fix: Auto-detect GCS/S3 registry store when registry is passed as string (#6260)
fix: auto-detect GCS/S3 registry store when registry is passed as string Signed-off-by: jiwidi <fhjaime96@gmail.com>
1 parent d41becf commit 7ebcf03

File tree

2 files changed

+142
-4
lines changed

2 files changed

+142
-4
lines changed

sdk/python/feast/repo_config.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -404,10 +404,11 @@ def registry(self):
404404
# This may be a custom registry store, which does not need a 'registry_type'
405405
self._registry = RegistryConfig(**self.registry_config)
406406
elif isinstance(self.registry_config, str):
407-
# User passed in just a path to file registry
408-
self._registry = get_registry_config_from_type("file")(
409-
path=self.registry_config
410-
)
407+
# Let Registry.__init__ auto-detect the correct store class
408+
# from the URI scheme (e.g. gs:// -> GCSRegistryStore).
409+
# Previously this hardcoded "file" type, which broke gs:// and
410+
# s3:// paths because FileRegistryStore uses pathlib.Path.
411+
self._registry = RegistryConfig(path=self.registry_config)
411412
elif self.registry_config:
412413
self._registry = self.registry_config
413414
return self._registry
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
"""Tests for string-based registry configuration in RepoConfig.
2+
3+
Verifies that passing registry as a string (e.g. "gs://bucket/registry.pb")
4+
correctly allows auto-detection of the registry store class from the URI
5+
scheme, rather than hardcoding FileRegistryStore.
6+
7+
Regression test for: RepoConfig.registry property hardcodes
8+
get_registry_config_from_type("file") when registry_config is a string,
9+
ignoring URI scheme and breaking gs:// and s3:// paths.
10+
"""
11+
12+
from pathlib import Path
13+
14+
import pytest
15+
16+
from feast.infra.registry.registry import (
17+
REGISTRY_STORE_CLASS_FOR_SCHEME,
18+
get_registry_store_class_from_scheme,
19+
)
20+
from feast.repo_config import RegistryConfig, RepoConfig
21+
22+
23+
def _make_repo_config(registry):
24+
"""Helper to create a minimal RepoConfig with the given registry value."""
25+
return RepoConfig(
26+
project="test",
27+
provider="gcp",
28+
registry=registry,
29+
online_store={"type": "redis", "connection_string": "localhost:6379"},
30+
entity_key_serialization_version=3,
31+
)
32+
33+
34+
class TestStringRegistryAutoDetection:
35+
"""When registry is passed as a string, RepoConfig must produce a
36+
RegistryConfig that allows Registry.__init__ to auto-detect the correct
37+
store class from the URI scheme."""
38+
39+
def test_gcs_string_registry_produces_correct_config(self):
40+
"""gs:// string -> RegistryConfig with registry_store_type=None
41+
so Registry.__init__ auto-detects GCSRegistryStore."""
42+
config = _make_repo_config("gs://my-bucket/feast/registry.pb")
43+
reg = config.registry
44+
45+
assert isinstance(reg, RegistryConfig)
46+
assert reg.path == "gs://my-bucket/feast/registry.pb"
47+
assert reg.registry_store_type is None
48+
49+
def test_s3_string_registry_produces_correct_config(self):
50+
"""s3:// string -> RegistryConfig with registry_store_type=None
51+
so Registry.__init__ auto-detects S3RegistryStore."""
52+
config = _make_repo_config("s3://my-bucket/feast/registry.pb")
53+
reg = config.registry
54+
55+
assert isinstance(reg, RegistryConfig)
56+
assert reg.path == "s3://my-bucket/feast/registry.pb"
57+
assert reg.registry_store_type is None
58+
59+
def test_local_string_registry_still_works(self):
60+
"""A local file path string must still produce a valid RegistryConfig
61+
(no regression for the common case)."""
62+
config = _make_repo_config("/tmp/feast/registry.db")
63+
reg = config.registry
64+
65+
assert isinstance(reg, RegistryConfig)
66+
assert reg.path == "/tmp/feast/registry.db"
67+
# registry_type defaults to "file", which is correct for local paths
68+
assert reg.registry_type == "file"
69+
70+
def test_dict_registry_still_works(self):
71+
"""Dict-based registry config must continue to work as before."""
72+
config = _make_repo_config({"path": "gs://my-bucket/feast/registry.pb"})
73+
reg = config.registry
74+
75+
assert isinstance(reg, RegistryConfig)
76+
assert reg.path == "gs://my-bucket/feast/registry.pb"
77+
assert reg.registry_store_type is None
78+
79+
def test_dict_registry_with_explicit_registry_type(self):
80+
"""Dict with explicit registry_type must route through
81+
get_registry_config_from_type (no change in behavior)."""
82+
config = _make_repo_config(
83+
{"registry_type": "file", "path": "/tmp/registry.db"}
84+
)
85+
reg = config.registry
86+
87+
assert isinstance(reg, RegistryConfig)
88+
assert reg.path == "/tmp/registry.db"
89+
assert reg.registry_type == "file"
90+
91+
92+
class TestRegistryStoreSchemeDetection:
93+
"""Verify that REGISTRY_STORE_CLASS_FOR_SCHEME and
94+
get_registry_store_class_from_scheme correctly map URI schemes
95+
to their store classes."""
96+
97+
def test_gcs_scheme_selects_gcs_registry_store(self):
98+
assert "gs" in REGISTRY_STORE_CLASS_FOR_SCHEME
99+
cls = get_registry_store_class_from_scheme("gs://bucket/registry.pb")
100+
assert cls.__name__ == "GCSRegistryStore"
101+
102+
def test_s3_scheme_selects_s3_registry_store(self):
103+
assert "s3" in REGISTRY_STORE_CLASS_FOR_SCHEME
104+
cls = get_registry_store_class_from_scheme("s3://bucket/registry.pb")
105+
assert cls.__name__ == "S3RegistryStore"
106+
107+
def test_file_scheme_selects_file_registry_store(self):
108+
assert "file" in REGISTRY_STORE_CLASS_FOR_SCHEME
109+
cls = get_registry_store_class_from_scheme("file:///tmp/registry.db")
110+
assert cls.__name__ == "FileRegistryStore"
111+
112+
def test_unknown_scheme_raises(self):
113+
with pytest.raises(Exception, match="unsupported scheme"):
114+
get_registry_store_class_from_scheme("ftp://host/registry.pb")
115+
116+
117+
class TestFileRegistryStorePathHandling:
118+
"""Demonstrate that FileRegistryStore cannot handle cloud URIs —
119+
this is the root cause of the IsADirectoryError in production when
120+
the bug is present."""
121+
122+
def test_pathlib_does_not_treat_gcs_as_absolute(self):
123+
"""pathlib.Path('gs://...') is NOT absolute, so FileRegistryStore
124+
joins it with repo_path producing nonsense like /app/gs://..."""
125+
gcs_path = Path("gs://my-bucket/feast/registry.pb")
126+
assert not gcs_path.is_absolute()
127+
128+
joined = Path("/app").joinpath(gcs_path)
129+
assert str(joined).startswith("/app/gs:")
130+
131+
def test_pathlib_does_not_treat_s3_as_absolute(self):
132+
"""Same issue for s3:// paths."""
133+
s3_path = Path("s3://my-bucket/feast/registry.pb")
134+
assert not s3_path.is_absolute()
135+
136+
joined = Path("/app").joinpath(s3_path)
137+
assert str(joined).startswith("/app/s3:")

0 commit comments

Comments
 (0)