From 994a6323104057dd3f37d3fb3ed3c2e131a7d13b Mon Sep 17 00:00:00 2001 From: yassinnouh21 Date: Sun, 4 Jan 2026 13:51:35 +0200 Subject: [PATCH 1/3] fix: Add get_table_query_string_with_alias() for PostgreSQL subquery aliasing PostgreSQL requires all subqueries in FROM clauses to have aliases. This adds a new method get_table_query_string_with_alias() to PostgreSQLSource that automatically adds an alias when the source is query-based. The existing get_table_query_string() is preserved for backward compatibility and updated with documentation explaining when to use the new method. Fixes #5605 Signed-off-by: Yassin Nouh Signed-off-by: yassinnouh21 --- .../postgres_offline_store/postgres_source.py | 33 +++++ .../postgres_offline_store/test_postgres.py | 135 ++++++++++++++++++ 2 files changed, 168 insertions(+) diff --git a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres_source.py b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres_source.py index 0d3045e4aa7..33bc634c94e 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres_source.py @@ -145,11 +145,44 @@ def get_table_column_names_and_types( ) def get_table_query_string(self) -> str: + """Returns a string that can be used to reference this table in SQL. + + For query-based sources, returns the query wrapped in parentheses. + + Note: + When using the returned string directly in a FROM clause with PostgreSQL, + you may need to add an alias if this is a query-based source. PostgreSQL + requires all subqueries in FROM clauses to have aliases. Consider using + get_table_query_string_with_alias() for automatic aliasing. + """ if self._postgres_options._table: return f"{self._postgres_options._table}" else: return f"({self._postgres_options._query})" + def get_table_query_string_with_alias(self, alias: str = "subquery") -> str: + """Returns a string for use in FROM clause with alias for PostgreSQL compatibility. + + PostgreSQL requires all subqueries in FROM clauses to have aliases. This method + automatically adds an alias when the source is query-based. + + Args: + alias: The alias to use for query-based sources. Defaults to "subquery". + + Returns: + For table-based sources: the table name (no alias needed). + For query-based sources: "(query) AS alias". + + Example: + >>> source = PostgreSQLSource(query="SELECT * FROM my_table", ...) + >>> entity_sql = f"SELECT id, ts FROM {source.get_table_query_string_with_alias()}" + # Results in: "SELECT id, ts FROM (SELECT * FROM my_table) AS subquery" + """ + if self._postgres_options._table: + return f"{self._postgres_options._table}" + else: + return f"({self._postgres_options._query}) AS {alias}" + class PostgreSQLOptions: def __init__( diff --git a/sdk/python/tests/unit/infra/offline_stores/contrib/postgres_offline_store/test_postgres.py b/sdk/python/tests/unit/infra/offline_stores/contrib/postgres_offline_store/test_postgres.py index ce98315eef4..3afca8d3f9b 100644 --- a/sdk/python/tests/unit/infra/offline_stores/contrib/postgres_offline_store/test_postgres.py +++ b/sdk/python/tests/unit/infra/offline_stores/contrib/postgres_offline_store/test_postgres.py @@ -950,3 +950,138 @@ def test_cli_date_combinations(self): # Should not fail on parameter validation stderr_output = result.stderr.decode() assert "must be provided" not in stderr_output + + +class TestPostgreSQLSourceQueryStringAlias: + """Test suite for get_table_query_string_with_alias() method. + + This addresses GitHub issue #5605: PostgreSQL requires all subqueries + in FROM clauses to have aliases. + """ + + def test_table_source_get_table_query_string(self): + """Test get_table_query_string() with table-based source""" + source = PostgreSQLSource( + name="test_source", + table="my_schema.my_table", + timestamp_field="event_timestamp", + ) + result = source.get_table_query_string() + assert result == "my_schema.my_table" + + def test_query_source_get_table_query_string(self): + """Test get_table_query_string() with query-based source""" + source = PostgreSQLSource( + name="test_source", + query="SELECT * FROM my_table WHERE active = true", + timestamp_field="event_timestamp", + ) + result = source.get_table_query_string() + assert result == "(SELECT * FROM my_table WHERE active = true)" + + def test_table_source_with_alias(self): + """Test get_table_query_string_with_alias() with table-based source returns table without alias""" + source = PostgreSQLSource( + name="test_source", + table="my_schema.my_table", + timestamp_field="event_timestamp", + ) + result = source.get_table_query_string_with_alias() + # Table sources don't need aliases + assert result == "my_schema.my_table" + + def test_query_source_with_default_alias(self): + """Test get_table_query_string_with_alias() with query-based source uses default alias""" + source = PostgreSQLSource( + name="test_source", + query="SELECT * FROM my_table WHERE active = true", + timestamp_field="event_timestamp", + ) + result = source.get_table_query_string_with_alias() + assert result == "(SELECT * FROM my_table WHERE active = true) AS subquery" + + def test_query_source_with_custom_alias(self): + """Test get_table_query_string_with_alias() with custom alias""" + source = PostgreSQLSource( + name="test_source", + query="SELECT id, name FROM users", + timestamp_field="event_timestamp", + ) + result = source.get_table_query_string_with_alias(alias="user_data") + assert result == "(SELECT id, name FROM users) AS user_data" + + def test_table_source_with_custom_alias_ignored(self): + """Test get_table_query_string_with_alias() ignores alias for table-based sources""" + source = PostgreSQLSource( + name="test_source", + table="events", + timestamp_field="event_timestamp", + ) + result = source.get_table_query_string_with_alias(alias="ignored_alias") + # Alias should be ignored for table sources + assert result == "events" + + def test_sql_query_with_alias_is_valid(self): + """Test that SQL using get_table_query_string_with_alias() is syntactically valid""" + source = PostgreSQLSource( + name="test_source", + query="SELECT id, ts FROM raw_data", + timestamp_field="ts", + ) + + # Construct a SQL query using the new method + entity_sql = f"SELECT id, ts FROM {source.get_table_query_string_with_alias()}" + + # Verify SQL is valid using sqlglot + parsed = sqlglot.parse(entity_sql, dialect="postgres") + assert len(parsed) == 1 + assert parsed[0] is not None + + def test_sql_query_without_alias_fails_in_postgres(self): + """Test that SQL using get_table_query_string() for query source produces invalid PostgreSQL + + This demonstrates the issue that get_table_query_string_with_alias() fixes: + PostgreSQL requires all subqueries in FROM clauses to have aliases. + """ + source = PostgreSQLSource( + name="test_source", + query="SELECT id, ts FROM raw_data", + timestamp_field="ts", + ) + + # Using the old method (without alias) for query-based source + entity_sql_without_alias = ( + f"SELECT id, ts FROM {source.get_table_query_string()}" + ) + + # This produces: SELECT id, ts FROM (SELECT id, ts FROM raw_data) + # which is invalid in PostgreSQL (subquery needs alias) + # sqlglot is lenient and may parse it, but PostgreSQL would reject it + assert ( + "AS" not in entity_sql_without_alias + ), "get_table_query_string() should not add alias" + + # Using the new method (with alias) produces valid SQL + entity_sql_with_alias = ( + f"SELECT id, ts FROM {source.get_table_query_string_with_alias()}" + ) + assert "AS subquery" in entity_sql_with_alias + + def test_complex_query_with_alias(self): + """Test get_table_query_string_with_alias() with complex nested query""" + complex_query = """ + SELECT u.id, u.name, o.total + FROM users u + JOIN orders o ON u.id = o.user_id + WHERE o.created_at > '2023-01-01' + """ + source = PostgreSQLSource( + name="test_source", + query=complex_query, + timestamp_field="created_at", + ) + + result = source.get_table_query_string_with_alias(alias="user_orders") + assert result.startswith("(") + assert result.endswith(") AS user_orders") + assert "SELECT u.id" in result From 8f81119c489d2b38fd16a98383261d911d43463d Mon Sep 17 00:00:00 2001 From: yassinnouh21 Date: Sun, 4 Jan 2026 14:31:51 +0200 Subject: [PATCH 2/3] style: Fix ruff 0.14.10 formatting for test file Signed-off-by: yassinnouh21 --- .../contrib/postgres_offline_store/test_postgres.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdk/python/tests/unit/infra/offline_stores/contrib/postgres_offline_store/test_postgres.py b/sdk/python/tests/unit/infra/offline_stores/contrib/postgres_offline_store/test_postgres.py index 3afca8d3f9b..fad837e4c16 100644 --- a/sdk/python/tests/unit/infra/offline_stores/contrib/postgres_offline_store/test_postgres.py +++ b/sdk/python/tests/unit/infra/offline_stores/contrib/postgres_offline_store/test_postgres.py @@ -1057,9 +1057,9 @@ def test_sql_query_without_alias_fails_in_postgres(self): # This produces: SELECT id, ts FROM (SELECT id, ts FROM raw_data) # which is invalid in PostgreSQL (subquery needs alias) # sqlglot is lenient and may parse it, but PostgreSQL would reject it - assert ( - "AS" not in entity_sql_without_alias - ), "get_table_query_string() should not add alias" + assert "AS" not in entity_sql_without_alias, ( + "get_table_query_string() should not add alias" + ) # Using the new method (with alias) produces valid SQL entity_sql_with_alias = ( From f3eb357c78aefdcc7b2cd702149fac630392c436 Mon Sep 17 00:00:00 2001 From: yassinnouh21 Date: Sun, 4 Jan 2026 14:50:12 +0200 Subject: [PATCH 3/3] fix: Remove doctest markers from example to avoid test failures Signed-off-by: yassinnouh21 --- .../contrib/postgres_offline_store/postgres_source.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres_source.py b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres_source.py index 33bc634c94e..272b8ad0474 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres_source.py @@ -173,9 +173,10 @@ def get_table_query_string_with_alias(self, alias: str = "subquery") -> str: For table-based sources: the table name (no alias needed). For query-based sources: "(query) AS alias". - Example: - >>> source = PostgreSQLSource(query="SELECT * FROM my_table", ...) - >>> entity_sql = f"SELECT id, ts FROM {source.get_table_query_string_with_alias()}" + Example:: + + source = PostgreSQLSource(query="SELECT * FROM my_table", ...) + entity_sql = f"SELECT id, ts FROM {source.get_table_query_string_with_alias()}" # Results in: "SELECT id, ts FROM (SELECT * FROM my_table) AS subquery" """ if self._postgres_options._table: