diff --git a/sdk/python/feast/api/registry/rest/rest_utils.py b/sdk/python/feast/api/registry/rest/rest_utils.py index 765e1bbeb8b..dac6b7ccf7b 100644 --- a/sdk/python/feast/api/registry/rest/rest_utils.py +++ b/sdk/python/feast/api/registry/rest/rest_utils.py @@ -558,13 +558,39 @@ def filter_search_results_and_match_score( # Search in tags tags = result.get("tags", {}) tag_match = False + matched_tag = None + best_fuzzy_score = 0.0 + best_fuzzy_tag = None + for key, value in tags.items(): - if query_lower in key.lower() or query_lower in str(value).lower(): + key_lower = key.lower() + value_str = str(value).lower() + + # Exact match in key or value + if query_lower in key_lower or query_lower in value_str: tag_match = True + # Store the matched tag as a dictionary + matched_tag = {key: value} break + # Fuzzy match for tags (on combined "key:value" string) + tag_combined = f"{key_lower}={value_str}" + tag_fuzzy_score = fuzzy_match(query_lower, tag_combined) + + if tag_fuzzy_score > best_fuzzy_score: + best_fuzzy_score = tag_fuzzy_score + best_fuzzy_tag = {key: value} + if tag_match: result["match_score"] = MATCH_SCORE_TAGS + result["matched_tag"] = matched_tag + filtered_results.append(result) + continue + + # Fuzzy tag match + if best_fuzzy_score >= MATCH_SCORE_DEFAULT_THRESHOLD: + result["match_score"] = best_fuzzy_score * 100 + result["matched_tag"] = best_fuzzy_tag filtered_results.append(result) continue diff --git a/sdk/python/tests/unit/api/test_search_api.py b/sdk/python/tests/unit/api/test_search_api.py index 48e422c5fe1..9116db1c59b 100644 --- a/sdk/python/tests/unit/api/test_search_api.py +++ b/sdk/python/tests/unit/api/test_search_api.py @@ -734,6 +734,103 @@ def test_search_by_tags(self, shared_search_responses): f"Expected to find some of {expected_resources} but found none in {found_resources}" ) + def test_search_matched_tag_exact_match(self, search_test_app): + """Test that matched_tag field is present when a tag matches exactly""" + # Search for "data" which should match tag key "team" with value "data" + response = search_test_app.get("/search?query=data") + assert response.status_code == 200 + + data = response.json() + results = data["results"] + + # Find results that matched via tags (match_score = 60) + tag_matched_results = [ + r for r in results if r.get("match_score") == 60 and "matched_tag" in r + ] + + assert len(tag_matched_results) > 0, ( + "Expected to find at least one result with matched_tag from tag matching" + ) + + # Verify matched_tag is present and has a valid dictionary value + for result in tag_matched_results: + matched_tag = result.get("matched_tag") + assert matched_tag is not None, ( + f"matched_tag should not be None for result {result['name']}" + ) + assert isinstance(matched_tag, dict), ( + f"matched_tag should be a dictionary, got {type(matched_tag)}" + ) + # matched_tag should be a dictionary with key:value format + assert len(matched_tag) > 0, "matched_tag should not be empty" + assert len(matched_tag) == 1, ( + f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}" + ) + + logger.debug( + f"Found {len(tag_matched_results)} results with matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) for r in tag_matched_results]}" + ) + + def test_search_matched_tag_fuzzy_match(self, search_test_app): + """Test that matched_tag field is present when a tag matches via fuzzy matching""" + # Search for "te" which should fuzzy match tag key "team" + # "te" vs "team": overlap={'t','e'}/union={'t','e','a','m'} = 2/4 = 50% (below threshold) + # Try "tea" which should fuzzy match "team" better + # "tea" vs "team": overlap={'t','e','a'}/union={'t','e','a','m'} = 3/4 = 75% (above threshold) + response = search_test_app.get("/search?query=tea") + assert response.status_code == 200 + + data = response.json() + results = data["results"] + + # Find results that matched via fuzzy tag matching (match_score < 60 but >= 40) + fuzzy_tag_matched_results = [ + r + for r in results + if r.get("match_score", 0) >= 40 + and r.get("match_score", 0) < 60 + and "matched_tag" in r + ] + + # If we don't find fuzzy matches, try a different query that's more likely to match + if len(fuzzy_tag_matched_results) == 0: + # Try "dat" which should fuzzy match tag value "data" + # "dat" vs "data": overlap={'d','a','t'}/union={'d','a','t','a'} = 3/4 = 75% (above threshold) + response = search_test_app.get("/search?query=dat") + assert response.status_code == 200 + data = response.json() + results = data["results"] + fuzzy_tag_matched_results = [ + r + for r in results + if r.get("match_score", 0) >= 40 + and r.get("match_score", 0) < 60 + and "matched_tag" in r + ] + + if len(fuzzy_tag_matched_results) > 0: + # Verify matched_tag is present for fuzzy matches + for result in fuzzy_tag_matched_results: + matched_tag = result.get("matched_tag") + assert matched_tag is not None, ( + f"matched_tag should not be None for fuzzy-matched result {result['name']}" + ) + assert isinstance(matched_tag, dict), ( + f"matched_tag should be a dictionary, got {type(matched_tag)}" + ) + assert len(matched_tag) > 0, "matched_tag should not be empty" + assert len(matched_tag) == 1, ( + f"matched_tag should contain exactly one key-value pair, got {len(matched_tag)}" + ) + # Verify the match_score is in the fuzzy range + assert 40 <= result.get("match_score", 0) < 60, ( + f"Fuzzy tag match should have score in [40, 60), got {result.get('match_score')}" + ) + + logger.debug( + f"Found {len(fuzzy_tag_matched_results)} results with fuzzy matched_tag: {[r['name'] + ' -> ' + str(r.get('matched_tag', 'N/A')) + ' (score: ' + str(r.get('match_score', 'N/A')) + ')' for r in fuzzy_tag_matched_results]}" + ) + def test_search_sorting_functionality(self, shared_search_responses): """Test search results sorting using pre-computed responses""" # Test match_score descending sort