From 81e27b3d81da9b1684eae0b7f0b9abfd7badcc4f Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Mon, 5 Jan 2026 14:26:04 -0800 Subject: [PATCH 01/28] feat: Configure Biome for Consistent Code Style (#2364) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR adds `biome.json` to enforce consistent 2-space indentation and single quotes for JavaScript and CSS files. This aligns with project style guides, automates formatting via pre-commit hooks, and prevents manual style changes from being overwritten. Currently, our Biome setup relies on default settings, which violates Google coding style guidelines. Specifically: * **Quotation Marks:** Biome's defaults do not enforce single quotes for JavaScript (as per `js_style.txt`: `screen/8NaEr3SswN6qx6D`) and CSS property values (as per `css_style.txt`: `screen/9KCh8ZVQ4ByPeDR`). * **Indentation:** Biome's defaults do not consistently enforce 2 spaces for indentation across HTML (`screen/97ZZstQc4iH4Dsb`) and JavaScript (`screen/3crxm5peLvRE2kj`) files. This configuration explicitly sets these rules to ensure full compliance. **Benefits:** * Consistent and readable JS/CSS code. * Automated style enforcement. * Reduced developer friction. **To Test:** Make a style-violating change in a `.js` or `.css` file and attempt to commit. Biome should automatically reformat it. Fixes # 🦕 --- biome.json | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 biome.json diff --git a/biome.json b/biome.json new file mode 100644 index 0000000000..d30c8687a4 --- /dev/null +++ b/biome.json @@ -0,0 +1,16 @@ +{ + "formatter": { + "indentStyle": "space", + "indentWidth": 2 + }, + "javascript": { + "formatter": { + "quoteStyle": "single" + } + }, + "css": { + "formatter": { + "quoteStyle": "single" + } + } +} From b478b0c46e88283111a0e940106634a63788a692 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Mon, 5 Jan 2026 18:42:50 -0800 Subject: [PATCH 02/28] refactor: fix and, or and xor ops on NULL scalar (#2361) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change can resolve `test_series_bool_bool_operators_scalar` and `test_series_int_int_operators_scalar` presubmit failures in #2248. Fixes internal issue 417774347🦕 --- .../compile/sqlglot/expressions/bool_ops.py | 46 +++++++++++++++++-- .../test_bool_ops/test_and_op/out.sql | 23 +++++++--- .../test_bool_ops/test_or_op/out.sql | 23 +++++++--- .../test_bool_ops/test_xor_op/out.sql | 34 +++++++++++--- .../sqlglot/expressions/test_bool_ops.py | 4 ++ 5 files changed, 107 insertions(+), 23 deletions(-) diff --git a/bigframes/core/compile/sqlglot/expressions/bool_ops.py b/bigframes/core/compile/sqlglot/expressions/bool_ops.py index 26653d720c..6fee3f4278 100644 --- a/bigframes/core/compile/sqlglot/expressions/bool_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/bool_ops.py @@ -26,6 +26,16 @@ @register_binary_op(ops.and_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + # For AND, when we encounter a NULL value, we only know when the result is FALSE, + # otherwise the result is unknown (NULL). See: truth table at + # https://en.wikibooks.org/wiki/Structured_Query_Language/NULLs_and_the_Three_Valued_Logic#AND,_OR + if left.expr == sge.null(): + condition = sge.EQ(this=right.expr, expression=sge.convert(False)) + return sge.If(this=condition, true=right.expr, false=sge.null()) + if right.expr == sge.null(): + condition = sge.EQ(this=left.expr, expression=sge.convert(False)) + return sge.If(this=condition, true=left.expr, false=sge.null()) + if left.dtype == dtypes.BOOL_DTYPE and right.dtype == dtypes.BOOL_DTYPE: return sge.And(this=left.expr, expression=right.expr) return sge.BitwiseAnd(this=left.expr, expression=right.expr) @@ -33,6 +43,16 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: @register_binary_op(ops.or_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + # For OR, when we encounter a NULL value, we only know when the result is TRUE, + # otherwise the result is unknown (NULL). See: truth table at + # https://en.wikibooks.org/wiki/Structured_Query_Language/NULLs_and_the_Three_Valued_Logic#AND,_OR + if left.expr == sge.null(): + condition = sge.EQ(this=right.expr, expression=sge.convert(True)) + return sge.If(this=condition, true=right.expr, false=sge.null()) + if right.expr == sge.null(): + condition = sge.EQ(this=left.expr, expression=sge.convert(True)) + return sge.If(this=condition, true=left.expr, false=sge.null()) + if left.dtype == dtypes.BOOL_DTYPE and right.dtype == dtypes.BOOL_DTYPE: return sge.Or(this=left.expr, expression=right.expr) return sge.BitwiseOr(this=left.expr, expression=right.expr) @@ -40,8 +60,26 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: @register_binary_op(ops.xor_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: - if left.dtype == dtypes.BOOL_DTYPE and right.dtype == dtypes.BOOL_DTYPE: - left_expr = sge.And(this=left.expr, expression=sge.Not(this=right.expr)) - right_expr = sge.And(this=sge.Not(this=left.expr), expression=right.expr) - return sge.Or(this=left_expr, expression=right_expr) + # For XOR, cast NULL operands to BOOLEAN to ensure the resulting expression + # maintains the boolean data type. + left_expr = left.expr + left_dtype = left.dtype + if left_expr == sge.null(): + left_expr = sge.Cast(this=sge.convert(None), to="BOOLEAN") + left_dtype = dtypes.BOOL_DTYPE + right_expr = right.expr + right_dtype = right.dtype + if right_expr == sge.null(): + right_expr = sge.Cast(this=sge.convert(None), to="BOOLEAN") + right_dtype = dtypes.BOOL_DTYPE + + if left_dtype == dtypes.BOOL_DTYPE and right_dtype == dtypes.BOOL_DTYPE: + return sge.Or( + this=sge.paren( + sge.And(this=left_expr, expression=sge.Not(this=right_expr)) + ), + expression=sge.paren( + sge.And(this=sge.Not(this=left_expr), expression=right_expr) + ), + ) return sge.BitwiseXor(this=left.expr, expression=right.expr) diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_and_op/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_and_op/out.sql index 634a936a0e..7e46e10708 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_and_op/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_and_op/out.sql @@ -21,11 +21,22 @@ WITH `bfcte_0` AS ( `bfcol_9` AS `bfcol_17`, `bfcol_7` AND `bfcol_7` AS `bfcol_18` FROM `bfcte_1` +), `bfcte_3` AS ( + SELECT + *, + `bfcol_14` AS `bfcol_24`, + `bfcol_15` AS `bfcol_25`, + `bfcol_16` AS `bfcol_26`, + `bfcol_17` AS `bfcol_27`, + `bfcol_18` AS `bfcol_28`, + IF(`bfcol_15` = FALSE, `bfcol_15`, NULL) AS `bfcol_29` + FROM `bfcte_2` ) SELECT - `bfcol_14` AS `rowindex`, - `bfcol_15` AS `bool_col`, - `bfcol_16` AS `int64_col`, - `bfcol_17` AS `int_and_int`, - `bfcol_18` AS `bool_and_bool` -FROM `bfcte_2` \ No newline at end of file + `bfcol_24` AS `rowindex`, + `bfcol_25` AS `bool_col`, + `bfcol_26` AS `int64_col`, + `bfcol_27` AS `int_and_int`, + `bfcol_28` AS `bool_and_bool`, + `bfcol_29` AS `bool_and_null` +FROM `bfcte_3` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_or_op/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_or_op/out.sql index 0069b07d8f..c8e9cf65a9 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_or_op/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_or_op/out.sql @@ -21,11 +21,22 @@ WITH `bfcte_0` AS ( `bfcol_9` AS `bfcol_17`, `bfcol_7` OR `bfcol_7` AS `bfcol_18` FROM `bfcte_1` +), `bfcte_3` AS ( + SELECT + *, + `bfcol_14` AS `bfcol_24`, + `bfcol_15` AS `bfcol_25`, + `bfcol_16` AS `bfcol_26`, + `bfcol_17` AS `bfcol_27`, + `bfcol_18` AS `bfcol_28`, + IF(`bfcol_15` = TRUE, `bfcol_15`, NULL) AS `bfcol_29` + FROM `bfcte_2` ) SELECT - `bfcol_14` AS `rowindex`, - `bfcol_15` AS `bool_col`, - `bfcol_16` AS `int64_col`, - `bfcol_17` AS `int_and_int`, - `bfcol_18` AS `bool_and_bool` -FROM `bfcte_2` \ No newline at end of file + `bfcol_24` AS `rowindex`, + `bfcol_25` AS `bool_col`, + `bfcol_26` AS `int64_col`, + `bfcol_27` AS `int_and_int`, + `bfcol_28` AS `bool_and_bool`, + `bfcol_29` AS `bool_and_null` +FROM `bfcte_3` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_xor_op/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_xor_op/out.sql index e4c87ed720..d6a081cbbd 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_xor_op/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_xor_op/out.sql @@ -19,13 +19,33 @@ WITH `bfcte_0` AS ( `bfcol_7` AS `bfcol_15`, `bfcol_8` AS `bfcol_16`, `bfcol_9` AS `bfcol_17`, - `bfcol_7` AND NOT `bfcol_7` OR NOT `bfcol_7` AND `bfcol_7` AS `bfcol_18` + ( + `bfcol_7` AND NOT `bfcol_7` + ) OR ( + NOT `bfcol_7` AND `bfcol_7` + ) AS `bfcol_18` FROM `bfcte_1` +), `bfcte_3` AS ( + SELECT + *, + `bfcol_14` AS `bfcol_24`, + `bfcol_15` AS `bfcol_25`, + `bfcol_16` AS `bfcol_26`, + `bfcol_17` AS `bfcol_27`, + `bfcol_18` AS `bfcol_28`, + ( + `bfcol_15` AND NOT CAST(NULL AS BOOLEAN) + ) + OR ( + NOT `bfcol_15` AND CAST(NULL AS BOOLEAN) + ) AS `bfcol_29` + FROM `bfcte_2` ) SELECT - `bfcol_14` AS `rowindex`, - `bfcol_15` AS `bool_col`, - `bfcol_16` AS `int64_col`, - `bfcol_17` AS `int_and_int`, - `bfcol_18` AS `bool_and_bool` -FROM `bfcte_2` \ No newline at end of file + `bfcol_24` AS `rowindex`, + `bfcol_25` AS `bool_col`, + `bfcol_26` AS `int64_col`, + `bfcol_27` AS `int_and_int`, + `bfcol_28` AS `bool_and_bool`, + `bfcol_29` AS `bool_and_null` +FROM `bfcte_3` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_bool_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_bool_ops.py index 08b60d6ddf..601fd86e4e 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_bool_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_bool_ops.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pandas as pd import pytest import bigframes.pandas as bpd @@ -24,6 +25,7 @@ def test_and_op(scalar_types_df: bpd.DataFrame, snapshot): bf_df["int_and_int"] = bf_df["int64_col"] & bf_df["int64_col"] bf_df["bool_and_bool"] = bf_df["bool_col"] & bf_df["bool_col"] + bf_df["bool_and_null"] = bf_df["bool_col"] & pd.NA # type: ignore snapshot.assert_match(bf_df.sql, "out.sql") @@ -32,6 +34,7 @@ def test_or_op(scalar_types_df: bpd.DataFrame, snapshot): bf_df["int_and_int"] = bf_df["int64_col"] | bf_df["int64_col"] bf_df["bool_and_bool"] = bf_df["bool_col"] | bf_df["bool_col"] + bf_df["bool_and_null"] = bf_df["bool_col"] | pd.NA # type: ignore snapshot.assert_match(bf_df.sql, "out.sql") @@ -40,4 +43,5 @@ def test_xor_op(scalar_types_df: bpd.DataFrame, snapshot): bf_df["int_and_int"] = bf_df["int64_col"] ^ bf_df["int64_col"] bf_df["bool_and_bool"] = bf_df["bool_col"] ^ bf_df["bool_col"] + bf_df["bool_and_null"] = bf_df["bool_col"] ^ pd.NA # type: ignore snapshot.assert_match(bf_df.sql, "out.sql") From cd3df332ce483484d86ee735d84f7f9ac9e412e5 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Tue, 6 Jan 2026 17:21:24 -0800 Subject: [PATCH 03/28] chore: remove error message for sqlglot version (#2367) This change is removing the error message from vendor sqlglot codes. See screenshot/6pJ3X25XNE8uurW --- third_party/bigframes_vendored/sqlglot/__init__.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/third_party/bigframes_vendored/sqlglot/__init__.py b/third_party/bigframes_vendored/sqlglot/__init__.py index 41c98569ce..f3679caf8d 100644 --- a/third_party/bigframes_vendored/sqlglot/__init__.py +++ b/third_party/bigframes_vendored/sqlglot/__init__.py @@ -74,17 +74,6 @@ logger = logging.getLogger("sqlglot") -try: - from bigframes_vendored.sqlglot._version import ( # noqa: F401 - __version__, - __version_tuple__, - ) -except ImportError: - logger.error( - "Unable to set __version__, run `pip install -e .` or `python setup.py develop` first." - ) - - pretty = False """Whether to format generated SQL by default.""" From e1e1141733320ecdcc6cf9f1bb42e0a73b6e5751 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Tue, 6 Jan 2026 17:21:50 -0800 Subject: [PATCH 04/28] refactor: fix isin, explode and geo ops for sqlglot compiler (#2366) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change can resolve the following bugs in #2248: - fix `test_isin_*` tests failures. - fix `test_*explode*` tests failures. - fix `test_geo_x_non_point` tests failure. Fixes internal issue 417774347🦕 --- .../sqlglot/expressions/comparison_ops.py | 12 ++++-- .../compile/sqlglot/expressions/geo_ops.py | 4 +- bigframes/core/compile/sqlglot/sqlglot_ir.py | 8 ++-- .../test_comparison_ops/test_is_in/out.sql | 37 ++++++++++--------- .../snapshots/test_geo_ops/test_geo_x/out.sql | 2 +- .../snapshots/test_geo_ops/test_geo_y/out.sql | 2 +- .../expressions/test_comparison_ops.py | 10 ++++- .../test_compile_explode_dataframe/out.sql | 2 +- .../test_compile_explode_series/out.sql | 2 +- 9 files changed, 47 insertions(+), 32 deletions(-) diff --git a/bigframes/core/compile/sqlglot/expressions/comparison_ops.py b/bigframes/core/compile/sqlglot/expressions/comparison_ops.py index d64c7b1d3f..81bc9e0f56 100644 --- a/bigframes/core/compile/sqlglot/expressions/comparison_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/comparison_ops.py @@ -31,17 +31,23 @@ @register_unary_op(ops.IsInOp, pass_op=True) def _(expr: TypedExpr, op: ops.IsInOp) -> sge.Expression: values = [] - is_numeric_expr = dtypes.is_numeric(expr.dtype) + is_numeric_expr = dtypes.is_numeric(expr.dtype, include_bool=False) for value in op.values: - if value is None: + if _is_null(value): continue dtype = dtypes.bigframes_type(type(value)) - if expr.dtype == dtype or is_numeric_expr and dtypes.is_numeric(dtype): + if ( + expr.dtype == dtype + or is_numeric_expr + and dtypes.is_numeric(dtype, include_bool=False) + ): values.append(sge.convert(value)) if op.match_nulls: contains_nulls = any(_is_null(value) for value in op.values) if contains_nulls: + if len(values) == 0: + return sge.Is(this=expr.expr, expression=sge.Null()) return sge.Is(this=expr.expr, expression=sge.Null()) | sge.In( this=expr.expr, expressions=values ) diff --git a/bigframes/core/compile/sqlglot/expressions/geo_ops.py b/bigframes/core/compile/sqlglot/expressions/geo_ops.py index a57b4bc931..9c6ba33ea5 100644 --- a/bigframes/core/compile/sqlglot/expressions/geo_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/geo_ops.py @@ -108,12 +108,12 @@ def _(expr: TypedExpr, op: ops.GeoStSimplifyOp) -> sge.Expression: @register_unary_op(ops.geo_x_op) def _(expr: TypedExpr) -> sge.Expression: - return sge.func("SAFE.ST_X", expr.expr) + return sge.func("ST_X", expr.expr) @register_unary_op(ops.geo_y_op) def _(expr: TypedExpr) -> sge.Expression: - return sge.func("SAFE.ST_Y", expr.expr) + return sge.func("ST_Y", expr.expr) @register_binary_op(ops.GeoStDistanceOp, pass_op=True) diff --git a/bigframes/core/compile/sqlglot/sqlglot_ir.py b/bigframes/core/compile/sqlglot/sqlglot_ir.py index 04176014b0..9445b65e99 100644 --- a/bigframes/core/compile/sqlglot/sqlglot_ir.py +++ b/bigframes/core/compile/sqlglot/sqlglot_ir.py @@ -558,16 +558,15 @@ def _explode_single_column( ) selection = sge.Star(replace=[unnested_column_alias.as_(column)]) - # TODO: "CROSS" if not keep_empty else "LEFT" - # TODO: overlaps_with_parent to replace existing column. new_expr = _select_to_cte( self.expr, sge.to_identifier( next(self.uid_gen.get_uid_stream("bfcte_")), quoted=self.quoted ), ) + # Use LEFT JOIN to preserve rows when unnesting empty arrays. new_expr = new_expr.select(selection, append=False).join( - unnest_expr, join_type="CROSS" + unnest_expr, join_type="LEFT" ) return SQLGlotIR(expr=new_expr, uid_gen=self.uid_gen) @@ -621,8 +620,9 @@ def _explode_multiple_columns( next(self.uid_gen.get_uid_stream("bfcte_")), quoted=self.quoted ), ) + # Use LEFT JOIN to preserve rows when unnesting empty arrays. new_expr = new_expr.select(selection, append=False).join( - unnest_expr, join_type="CROSS" + unnest_expr, join_type="LEFT" ) return SQLGlotIR(expr=new_expr, uid_gen=self.uid_gen) diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_is_in/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_is_in/out.sql index 197ed279fa..ec85f060da 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_is_in/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_is_in/out.sql @@ -1,32 +1,35 @@ WITH `bfcte_0` AS ( SELECT + `bool_col`, `float64_col`, `int64_col` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` ), `bfcte_1` AS ( SELECT *, - COALESCE(`int64_col` IN (1, 2, 3), FALSE) AS `bfcol_2`, - ( - `int64_col` IS NULL - ) OR `int64_col` IN (123456) AS `bfcol_3`, - COALESCE(`int64_col` IN (1.0, 2.0, 3.0), FALSE) AS `bfcol_4`, - FALSE AS `bfcol_5`, - COALESCE(`int64_col` IN (2.5, 3), FALSE) AS `bfcol_6`, + COALESCE(`bool_col` IN (TRUE, FALSE), FALSE) AS `bfcol_3`, + COALESCE(`int64_col` IN (1, 2, 3), FALSE) AS `bfcol_4`, + `int64_col` IS NULL AS `bfcol_5`, + COALESCE(`int64_col` IN (1.0, 2.0, 3.0), FALSE) AS `bfcol_6`, FALSE AS `bfcol_7`, - COALESCE(`int64_col` IN (123456), FALSE) AS `bfcol_8`, + COALESCE(`int64_col` IN (2.5, 3), FALSE) AS `bfcol_8`, + FALSE AS `bfcol_9`, + FALSE AS `bfcol_10`, + COALESCE(`int64_col` IN (123456), FALSE) AS `bfcol_11`, ( `float64_col` IS NULL - ) OR `float64_col` IN (1, 2, 3) AS `bfcol_9` + ) OR `float64_col` IN (1, 2, 3) AS `bfcol_12` FROM `bfcte_0` ) SELECT - `bfcol_2` AS `ints`, - `bfcol_3` AS `ints_w_null`, - `bfcol_4` AS `floats`, - `bfcol_5` AS `strings`, - `bfcol_6` AS `mixed`, - `bfcol_7` AS `empty`, - `bfcol_8` AS `ints_wo_match_nulls`, - `bfcol_9` AS `float_in_ints` + `bfcol_3` AS `bools`, + `bfcol_4` AS `ints`, + `bfcol_5` AS `ints_w_null`, + `bfcol_6` AS `floats`, + `bfcol_7` AS `strings`, + `bfcol_8` AS `mixed`, + `bfcol_9` AS `empty`, + `bfcol_10` AS `empty_wo_match_nulls`, + `bfcol_11` AS `ints_wo_match_nulls`, + `bfcol_12` AS `float_in_ints` FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_x/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_x/out.sql index 09211270d1..826eb9f209 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_x/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_x/out.sql @@ -5,7 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - SAFE.ST_X(`geography_col`) AS `bfcol_1` + ST_X(`geography_col`) AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_y/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_y/out.sql index 625613ae2a..dd411820b2 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_y/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_y/out.sql @@ -5,7 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - SAFE.ST_Y(`geography_col`) AS `bfcol_1` + ST_Y(`geography_col`) AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py index 20dd6c5ca6..ea94bcae56 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pandas as pd import pytest from bigframes import operations as ops @@ -22,18 +23,23 @@ def test_is_in(scalar_types_df: bpd.DataFrame, snapshot): + bool_col = "bool_col" int_col = "int64_col" float_col = "float64_col" - bf_df = scalar_types_df[[int_col, float_col]] + bf_df = scalar_types_df[[bool_col, int_col, float_col]] ops_map = { + "bools": ops.IsInOp(values=(True, False)).as_expr(bool_col), "ints": ops.IsInOp(values=(1, 2, 3)).as_expr(int_col), - "ints_w_null": ops.IsInOp(values=(None, 123456)).as_expr(int_col), + "ints_w_null": ops.IsInOp(values=(None, pd.NA)).as_expr(int_col), "floats": ops.IsInOp(values=(1.0, 2.0, 3.0), match_nulls=False).as_expr( int_col ), "strings": ops.IsInOp(values=("1.0", "2.0")).as_expr(int_col), "mixed": ops.IsInOp(values=("1.0", 2.5, 3)).as_expr(int_col), "empty": ops.IsInOp(values=()).as_expr(int_col), + "empty_wo_match_nulls": ops.IsInOp(values=(), match_nulls=False).as_expr( + int_col + ), "ints_wo_match_nulls": ops.IsInOp( values=(None, 123456), match_nulls=False ).as_expr(int_col), diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_dataframe/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_dataframe/out.sql index e594b67669..5d9019439f 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_dataframe/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_dataframe/out.sql @@ -9,7 +9,7 @@ WITH `bfcte_0` AS ( * REPLACE (`int_list_col`[SAFE_OFFSET(`bfcol_13`)] AS `int_list_col`, `string_list_col`[SAFE_OFFSET(`bfcol_13`)] AS `string_list_col`) FROM `bfcte_0` - CROSS JOIN UNNEST(GENERATE_ARRAY(0, LEAST(ARRAY_LENGTH(`int_list_col`) - 1, ARRAY_LENGTH(`string_list_col`) - 1))) AS `bfcol_13` WITH OFFSET AS `bfcol_7` + LEFT JOIN UNNEST(GENERATE_ARRAY(0, LEAST(ARRAY_LENGTH(`int_list_col`) - 1, ARRAY_LENGTH(`string_list_col`) - 1))) AS `bfcol_13` WITH OFFSET AS `bfcol_7` ) SELECT `rowindex`, diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_series/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_series/out.sql index 5af0aa0092..8ba4559da8 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_series/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_series/out.sql @@ -8,7 +8,7 @@ WITH `bfcte_0` AS ( * REPLACE (`bfcol_8` AS `int_list_col`) FROM `bfcte_0` - CROSS JOIN UNNEST(`int_list_col`) AS `bfcol_8` WITH OFFSET AS `bfcol_4` + LEFT JOIN UNNEST(`int_list_col`) AS `bfcol_8` WITH OFFSET AS `bfcol_4` ) SELECT `rowindex`, From 0f593c27bfee89fe1bdfc880504f9ab0ac28a24e Mon Sep 17 00:00:00 2001 From: jialuoo Date: Wed, 7 Jan 2026 22:26:05 +0000 Subject: [PATCH 05/28] fix: implement retry logic for cloud function endpoint fetching (#2369) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR addresses intermittent failures when creating remote functions where the Cloud Function endpoint is not immediately available after the creation operation completes. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes b/473903255 🦕 --- bigframes/functions/_function_client.py | 31 ++++++++++++++++++++----- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/bigframes/functions/_function_client.py b/bigframes/functions/_function_client.py index 8a88a14040..a82217da03 100644 --- a/bigframes/functions/_function_client.py +++ b/bigframes/functions/_function_client.py @@ -375,6 +375,20 @@ def generate_cloud_function_code( ) return entry_point + @google.api_core.retry.Retry( + predicate=google.api_core.retry.if_exception_type(ValueError), + initial=1.0, + maximum=10.0, + multiplier=2.0, + deadline=300.0, # Wait up to 5 minutes for propagation + ) + def _get_cloud_function_endpoint_with_retry(self, name): + endpoint = self.get_cloud_function_endpoint(name) + if not endpoint: + # Raising ValueError triggers the retry predicate + raise ValueError(f"Endpoint for {name} not yet available.") + return endpoint + def create_cloud_function( self, def_, @@ -516,11 +530,14 @@ def create_cloud_function( create_function_request.function = function # Create the cloud function and wait for it to be ready to use + endpoint = None try: operation = self._cloud_functions_client.create_function( request=create_function_request ) - operation.result() + # operation.result() returns the Function object upon completion + function_obj = operation.result() + endpoint = function_obj.service_config.uri # Cleanup os.remove(archive_path) @@ -535,12 +552,14 @@ def create_cloud_function( # we created it. This error is safe to ignore. pass - # Fetch the endpoint of the just created function - endpoint = self.get_cloud_function_endpoint(random_name) + # Fetch the endpoint with retries if it wasn't returned by the operation if not endpoint: - raise bf_formatting.create_exception_with_feedback_link( - ValueError, "Couldn't fetch the http endpoint." - ) + try: + endpoint = self._get_cloud_function_endpoint_with_retry(random_name) + except Exception as e: + raise bf_formatting.create_exception_with_feedback_link( + ValueError, f"Couldn't fetch the http endpoint: {e}" + ) logger.info( f"Successfully created cloud function {random_name} with uri ({endpoint})" From 2763b41d4b86939e389f76789f5b2acd44f18169 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 7 Jan 2026 15:09:16 -0800 Subject: [PATCH 06/28] feat: Add dark mode to anywidget mode (#2365) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The table now automatically adapts to dark and light themes in environments like Colab and VS Code for anywidget mode. Verified at: 1. vs code notebook: - light mode: screen/5ne2xKyq6JGR5VQ - dark mode: screen/8cq6y3ugbs5wN3T 2. colab notebook: - light mode: screen/92W8xCdjGtfkQgo - dark mode: screen/4uYykxKA7wTuhot Fixes #<460861328> 🦕 --- bigframes/display/table_widget.css | 132 +++++- bigframes/display/table_widget.js | 558 ++++++++++++------------- tests/js/table_widget.test.js | 600 ++++++++++++++------------- tests/unit/display/test_anywidget.py | 26 ++ 4 files changed, 713 insertions(+), 603 deletions(-) diff --git a/bigframes/display/table_widget.css b/bigframes/display/table_widget.css index 34134b043d..b02caa004e 100644 --- a/bigframes/display/table_widget.css +++ b/bigframes/display/table_widget.css @@ -14,24 +14,83 @@ * limitations under the License. */ -.bigframes-widget { +/* Increase specificity to override framework styles without !important */ +.bigframes-widget.bigframes-widget { + /* Default Light Mode Variables */ + --bf-bg: white; + --bf-border-color: #ccc; + --bf-error-bg: #fbe; + --bf-error-border: red; + --bf-error-fg: black; + --bf-fg: black; + --bf-header-bg: #f5f5f5; + --bf-null-fg: gray; + --bf-row-even-bg: #f5f5f5; + --bf-row-odd-bg: white; + + background-color: var(--bf-bg); + box-sizing: border-box; + color: var(--bf-fg); display: flex; flex-direction: column; + font-family: + '-apple-system', 'BlinkMacSystemFont', 'Segoe UI', 'Roboto', sans-serif; + margin: 0; + padding: 0; +} + +.bigframes-widget * { + box-sizing: border-box; +} + +/* Dark Mode Overrides: + * 1. @media (prefers-color-scheme: dark) - System-wide dark mode + * 2. .bigframes-dark-mode - Explicit class for VSCode theme detection + * 3. html[theme="dark"], body[data-theme="dark"] - Colab/Pantheon manual override + */ +@media (prefers-color-scheme: dark) { + .bigframes-widget.bigframes-widget { + --bf-bg: var(--vscode-editor-background, #202124); + --bf-border-color: #444; + --bf-error-bg: #511; + --bf-error-border: #f88; + --bf-error-fg: #fcc; + --bf-fg: white; + --bf-header-bg: var(--vscode-editor-background, black); + --bf-null-fg: #aaa; + --bf-row-even-bg: #202124; + --bf-row-odd-bg: #383838; + } +} + +.bigframes-widget.bigframes-dark-mode.bigframes-dark-mode, +html[theme='dark'] .bigframes-widget.bigframes-widget, +body[data-theme='dark'] .bigframes-widget.bigframes-widget { + --bf-bg: var(--vscode-editor-background, #202124); + --bf-border-color: #444; + --bf-error-bg: #511; + --bf-error-border: #f88; + --bf-error-fg: #fcc; + --bf-fg: white; + --bf-header-bg: var(--vscode-editor-background, black); + --bf-null-fg: #aaa; + --bf-row-even-bg: #202124; + --bf-row-odd-bg: #383838; } .bigframes-widget .table-container { + background-color: var(--bf-bg); + margin: 0; max-height: 620px; overflow: auto; + padding: 0; } .bigframes-widget .footer { align-items: center; - /* TODO(b/460861328): We will support dark mode in a media selector once we - * determine how to override the background colors as well. */ - color: black; + background-color: var(--bf-bg); + color: var(--bf-fg); display: flex; - font-family: - "-apple-system", "BlinkMacSystemFont", "Segoe UI", "Roboto", sans-serif; font-size: 0.8rem; justify-content: space-between; padding: 8px; @@ -70,16 +129,28 @@ margin-right: 8px; } -.bigframes-widget table { +.bigframes-widget table.bigframes-widget-table, +.bigframes-widget table.dataframe { + background-color: var(--bf-bg); + border: 1px solid var(--bf-border-color); border-collapse: collapse; - /* TODO(b/460861328): We will support dark mode in a media selector once we - * determine how to override the background colors as well. */ - color: black; + border-spacing: 0; + box-shadow: none; + color: var(--bf-fg); + margin: 0; + outline: none; text-align: left; + width: auto; /* Fix stretching */ +} + +.bigframes-widget tr { + border: none; } .bigframes-widget th { - background-color: var(--colab-primary-surface-color, var(--jp-layout-color0)); + background-color: var(--bf-header-bg); + border: 1px solid var(--bf-border-color); + color: var(--bf-fg); padding: 0; position: sticky; text-align: left; @@ -87,6 +158,22 @@ z-index: 1; } +.bigframes-widget td { + border: 1px solid var(--bf-border-color); + color: var(--bf-fg); + padding: 0.5em; +} + +.bigframes-widget table tbody tr:nth-child(odd), +.bigframes-widget table tbody tr:nth-child(odd) td { + background-color: var(--bf-row-odd-bg); +} + +.bigframes-widget table tbody tr:nth-child(even), +.bigframes-widget table tbody tr:nth-child(even) td { + background-color: var(--bf-row-even-bg); +} + .bigframes-widget .bf-header-content { box-sizing: border-box; height: 100%; @@ -106,8 +193,13 @@ } .bigframes-widget button { + background-color: transparent; + border: 1px solid currentColor; + border-radius: 4px; + color: inherit; cursor: pointer; display: inline-block; + padding: 2px 8px; text-align: center; text-decoration: none; user-select: none; @@ -120,11 +212,10 @@ } .bigframes-widget .bigframes-error-message { - background-color: #fbe; - border: 1px solid red; + background-color: var(--bf-error-bg); + border: 1px solid var(--bf-error-border); border-radius: 4px; - font-family: - "-apple-system", "BlinkMacSystemFont", "Segoe UI", "Roboto", sans-serif; + color: var(--bf-error-fg); font-size: 14px; margin-bottom: 8px; padding: 8px; @@ -139,14 +230,9 @@ } .bigframes-widget .null-value { - color: gray; -} - -.bigframes-widget td { - padding: 0.5em; + color: var(--bf-null-fg); } -.bigframes-widget tr:hover td, -.bigframes-widget td.row-hover { - background-color: var(--colab-hover-surface-color, var(--jp-layout-color2)); +.bigframes-widget .debug-info { + border-top: 1px solid var(--bf-border-color); } diff --git a/bigframes/display/table_widget.js b/bigframes/display/table_widget.js index 3944f48da7..40a027a8bc 100644 --- a/bigframes/display/table_widget.js +++ b/bigframes/display/table_widget.js @@ -15,19 +15,19 @@ */ const ModelProperty = { - ERROR_MESSAGE: "error_message", - ORDERABLE_COLUMNS: "orderable_columns", - PAGE: "page", - PAGE_SIZE: "page_size", - ROW_COUNT: "row_count", - SORT_CONTEXT: "sort_context", - TABLE_HTML: "table_html", + ERROR_MESSAGE: 'error_message', + ORDERABLE_COLUMNS: 'orderable_columns', + PAGE: 'page', + PAGE_SIZE: 'page_size', + ROW_COUNT: 'row_count', + SORT_CONTEXT: 'sort_context', + TABLE_HTML: 'table_html', }; const Event = { - CHANGE: "change", - CHANGE_TABLE_HTML: "change:table_html", - CLICK: "click", + CHANGE: 'change', + CHANGE_TABLE_HTML: 'change:table_html', + CLICK: 'click', }; /** @@ -35,297 +35,253 @@ const Event = { * @param {{ model: any, el: !HTMLElement }} props - The widget properties. */ function render({ model, el }) { - // Main container with a unique class for CSS scoping - el.classList.add("bigframes-widget"); - - // Add error message container at the top - const errorContainer = document.createElement("div"); - errorContainer.classList.add("error-message"); - - const tableContainer = document.createElement("div"); - tableContainer.classList.add("table-container"); - const footer = document.createElement("footer"); - footer.classList.add("footer"); - - // Pagination controls - const paginationContainer = document.createElement("div"); - paginationContainer.classList.add("pagination"); - const prevPage = document.createElement("button"); - const pageIndicator = document.createElement("span"); - pageIndicator.classList.add("page-indicator"); - const nextPage = document.createElement("button"); - const rowCountLabel = document.createElement("span"); - rowCountLabel.classList.add("row-count"); - - // Page size controls - const pageSizeContainer = document.createElement("div"); - pageSizeContainer.classList.add("page-size"); - const pageSizeLabel = document.createElement("label"); - const pageSizeInput = document.createElement("select"); - - prevPage.textContent = "<"; - nextPage.textContent = ">"; - pageSizeLabel.textContent = "Page size:"; - - // Page size options - const pageSizes = [10, 25, 50, 100]; - for (const size of pageSizes) { - const option = document.createElement("option"); - option.value = size; - option.textContent = size; - if (size === model.get(ModelProperty.PAGE_SIZE)) { - option.selected = true; - } - pageSizeInput.appendChild(option); - } - - /** Updates the footer states and page label based on the model. */ - function updateButtonStates() { - const currentPage = model.get(ModelProperty.PAGE); - const pageSize = model.get(ModelProperty.PAGE_SIZE); - const rowCount = model.get(ModelProperty.ROW_COUNT); - - if (rowCount === null) { - // Unknown total rows - rowCountLabel.textContent = "Total rows unknown"; - pageIndicator.textContent = `Page ${( - currentPage + 1 - ).toLocaleString()} of many`; - prevPage.disabled = currentPage === 0; - nextPage.disabled = false; // Allow navigation until we hit the end - } else if (rowCount === 0) { - // Empty dataset - rowCountLabel.textContent = "0 total rows"; - pageIndicator.textContent = "Page 1 of 1"; - prevPage.disabled = true; - nextPage.disabled = true; - } else { - // Known total rows - const totalPages = Math.ceil(rowCount / pageSize); - rowCountLabel.textContent = `${rowCount.toLocaleString()} total rows`; - pageIndicator.textContent = `Page ${( - currentPage + 1 - ).toLocaleString()} of ${totalPages.toLocaleString()}`; - prevPage.disabled = currentPage === 0; - nextPage.disabled = currentPage >= totalPages - 1; - } - pageSizeInput.value = pageSize; - } - - /** - * Handles page navigation. - * @param {number} direction - The direction to navigate (-1 for previous, 1 for next). - */ - function handlePageChange(direction) { - const currentPage = model.get(ModelProperty.PAGE); - model.set(ModelProperty.PAGE, currentPage + direction); - model.save_changes(); - } - - /** - * Handles page size changes. - * @param {number} newSize - The new page size. - */ - function handlePageSizeChange(newSize) { - model.set(ModelProperty.PAGE_SIZE, newSize); - model.set(ModelProperty.PAGE, 0); // Reset to first page - model.save_changes(); - } - - /** Updates the HTML in the table container and refreshes button states. */ - function handleTableHTMLChange() { - // Note: Using innerHTML is safe here because the content is generated - // by a trusted backend (DataFrame.to_html). - tableContainer.innerHTML = model.get(ModelProperty.TABLE_HTML); - - // Get sortable columns from backend - const sortableColumns = model.get(ModelProperty.ORDERABLE_COLUMNS); - const currentSortContext = model.get(ModelProperty.SORT_CONTEXT) || []; - - const getSortIndex = (colName) => - currentSortContext.findIndex((item) => item.column === colName); - - // Add click handlers to column headers for sorting - const headers = tableContainer.querySelectorAll("th"); - headers.forEach((header) => { - const headerDiv = header.querySelector("div"); - const columnName = headerDiv.textContent.trim(); - - // Only add sorting UI for sortable columns - if (columnName && sortableColumns.includes(columnName)) { - header.style.cursor = "pointer"; - - // Create a span for the indicator - const indicatorSpan = document.createElement("span"); - indicatorSpan.classList.add("sort-indicator"); - indicatorSpan.style.paddingLeft = "5px"; - - // Determine sort indicator and initial visibility - let indicator = "●"; // Default: unsorted (dot) - const sortIndex = getSortIndex(columnName); - - if (sortIndex !== -1) { - const isAscending = currentSortContext[sortIndex].ascending; - indicator = isAscending ? "▲" : "▼"; - indicatorSpan.style.visibility = "visible"; // Sorted arrows always visible - } else { - indicatorSpan.style.visibility = "hidden"; // Unsorted dot hidden by default - } - indicatorSpan.textContent = indicator; - - // Add indicator to the header, replacing the old one if it exists - const existingIndicator = headerDiv.querySelector(".sort-indicator"); - if (existingIndicator) { - headerDiv.removeChild(existingIndicator); - } - headerDiv.appendChild(indicatorSpan); - - // Add hover effects for unsorted columns only - header.addEventListener("mouseover", () => { - if (getSortIndex(columnName) === -1) { - indicatorSpan.style.visibility = "visible"; - } - }); - header.addEventListener("mouseout", () => { - if (getSortIndex(columnName) === -1) { - indicatorSpan.style.visibility = "hidden"; - } - }); - - // Add click handler for three-state toggle - header.addEventListener(Event.CLICK, (event) => { - const sortIndex = getSortIndex(columnName); - let newContext = [...currentSortContext]; - - if (event.shiftKey) { - if (sortIndex !== -1) { - // Already sorted. Toggle or Remove. - if (newContext[sortIndex].ascending) { - // Asc -> Desc - // Clone object to avoid mutation issues - newContext[sortIndex] = { - ...newContext[sortIndex], - ascending: false, - }; - } else { - // Desc -> Remove - newContext.splice(sortIndex, 1); - } - } else { - // Not sorted -> Append Asc - newContext.push({ column: columnName, ascending: true }); - } - } else { - // No shift key. Single column mode. - if (sortIndex !== -1 && newContext.length === 1) { - // Already only this column. Toggle or Remove. - if (newContext[sortIndex].ascending) { - newContext[sortIndex] = { - ...newContext[sortIndex], - ascending: false, - }; - } else { - newContext = []; - } - } else { - // Start fresh with this column - newContext = [{ column: columnName, ascending: true }]; - } - } - - model.set(ModelProperty.SORT_CONTEXT, newContext); - model.save_changes(); - }); - } - }); - - const table = tableContainer.querySelector("table"); - if (table) { - const tableBody = table.querySelector("tbody"); - - /** - * Handles row hover events. - * @param {!Event} event - The mouse event. - * @param {boolean} isHovering - True to add hover class, false to remove. - */ - function handleRowHover(event, isHovering) { - const cell = event.target.closest("td"); - if (cell) { - const row = cell.closest("tr"); - const origRowId = row.dataset.origRow; - if (origRowId) { - const allCellsInGroup = tableBody.querySelectorAll( - `tr[data-orig-row="${origRowId}"] td`, - ); - allCellsInGroup.forEach((c) => { - c.classList.toggle("row-hover", isHovering); - }); - } - } - } - - if (tableBody) { - tableBody.addEventListener("mouseover", (event) => - handleRowHover(event, true), - ); - tableBody.addEventListener("mouseout", (event) => - handleRowHover(event, false), - ); - } - } - - updateButtonStates(); - } - - // Add error message handler - function handleErrorMessageChange() { - const errorMsg = model.get(ModelProperty.ERROR_MESSAGE); - if (errorMsg) { - errorContainer.textContent = errorMsg; - errorContainer.style.display = "block"; - } else { - errorContainer.style.display = "none"; - } - } - - // Add event listeners - prevPage.addEventListener(Event.CLICK, () => handlePageChange(-1)); - nextPage.addEventListener(Event.CLICK, () => handlePageChange(1)); - pageSizeInput.addEventListener(Event.CHANGE, (e) => { - const newSize = Number(e.target.value); - if (newSize) { - handlePageSizeChange(newSize); - } - }); - model.on(Event.CHANGE_TABLE_HTML, handleTableHTMLChange); - model.on(`change:${ModelProperty.ROW_COUNT}`, updateButtonStates); - model.on(`change:${ModelProperty.ERROR_MESSAGE}`, handleErrorMessageChange); - model.on(`change:_initial_load_complete`, (val) => { - if (val) { - updateButtonStates(); - } - }); - model.on(`change:${ModelProperty.PAGE}`, updateButtonStates); - - // Assemble the DOM - paginationContainer.appendChild(prevPage); - paginationContainer.appendChild(pageIndicator); - paginationContainer.appendChild(nextPage); - - pageSizeContainer.appendChild(pageSizeLabel); - pageSizeContainer.appendChild(pageSizeInput); - - footer.appendChild(rowCountLabel); - footer.appendChild(paginationContainer); - footer.appendChild(pageSizeContainer); - - el.appendChild(errorContainer); - el.appendChild(tableContainer); - el.appendChild(footer); - - // Initial render - handleTableHTMLChange(); - handleErrorMessageChange(); + el.classList.add('bigframes-widget'); + + const errorContainer = document.createElement('div'); + errorContainer.classList.add('error-message'); + + const tableContainer = document.createElement('div'); + tableContainer.classList.add('table-container'); + const footer = document.createElement('footer'); + footer.classList.add('footer'); + + /** Detects theme and applies necessary style overrides. */ + function updateTheme() { + const body = document.body; + const isDark = + body.classList.contains('vscode-dark') || + body.classList.contains('theme-dark') || + body.dataset.theme === 'dark' || + body.getAttribute('data-vscode-theme-kind') === 'vscode-dark'; + + if (isDark) { + el.classList.add('bigframes-dark-mode'); + } else { + el.classList.remove('bigframes-dark-mode'); + } + } + + updateTheme(); + // Re-check after mount to ensure parent styling is applied. + setTimeout(updateTheme, 300); + + const observer = new MutationObserver(updateTheme); + observer.observe(document.body, { + attributes: true, + attributeFilter: ['class', 'data-theme', 'data-vscode-theme-kind'], + }); + + // Pagination controls + const paginationContainer = document.createElement('div'); + paginationContainer.classList.add('pagination'); + const prevPage = document.createElement('button'); + const pageIndicator = document.createElement('span'); + pageIndicator.classList.add('page-indicator'); + const nextPage = document.createElement('button'); + const rowCountLabel = document.createElement('span'); + rowCountLabel.classList.add('row-count'); + + // Page size controls + const pageSizeContainer = document.createElement('div'); + pageSizeContainer.classList.add('page-size'); + const pageSizeLabel = document.createElement('label'); + const pageSizeInput = document.createElement('select'); + + prevPage.textContent = '<'; + nextPage.textContent = '>'; + pageSizeLabel.textContent = 'Page size:'; + + const pageSizes = [10, 25, 50, 100]; + for (const size of pageSizes) { + const option = document.createElement('option'); + option.value = size; + option.textContent = size; + if (size === model.get(ModelProperty.PAGE_SIZE)) { + option.selected = true; + } + pageSizeInput.appendChild(option); + } + + function updateButtonStates() { + const currentPage = model.get(ModelProperty.PAGE); + const pageSize = model.get(ModelProperty.PAGE_SIZE); + const rowCount = model.get(ModelProperty.ROW_COUNT); + + if (rowCount === null) { + rowCountLabel.textContent = 'Total rows unknown'; + pageIndicator.textContent = `Page ${(currentPage + 1).toLocaleString()} of many`; + prevPage.disabled = currentPage === 0; + nextPage.disabled = false; + } else if (rowCount === 0) { + rowCountLabel.textContent = '0 total rows'; + pageIndicator.textContent = 'Page 1 of 1'; + prevPage.disabled = true; + nextPage.disabled = true; + } else { + const totalPages = Math.ceil(rowCount / pageSize); + rowCountLabel.textContent = `${rowCount.toLocaleString()} total rows`; + pageIndicator.textContent = `Page ${(currentPage + 1).toLocaleString()} of ${totalPages.toLocaleString()}`; + prevPage.disabled = currentPage === 0; + nextPage.disabled = currentPage >= totalPages - 1; + } + pageSizeInput.value = pageSize; + } + + function handlePageChange(direction) { + const currentPage = model.get(ModelProperty.PAGE); + model.set(ModelProperty.PAGE, currentPage + direction); + model.save_changes(); + } + + function handlePageSizeChange(newSize) { + model.set(ModelProperty.PAGE_SIZE, newSize); + model.set(ModelProperty.PAGE, 0); + model.save_changes(); + } + + function handleTableHTMLChange() { + tableContainer.innerHTML = model.get(ModelProperty.TABLE_HTML); + + const sortableColumns = model.get(ModelProperty.ORDERABLE_COLUMNS); + const currentSortContext = model.get(ModelProperty.SORT_CONTEXT) || []; + + const getSortIndex = (colName) => + currentSortContext.findIndex((item) => item.column === colName); + + const headers = tableContainer.querySelectorAll('th'); + headers.forEach((header) => { + const headerDiv = header.querySelector('div'); + const columnName = headerDiv.textContent.trim(); + + if (columnName && sortableColumns.includes(columnName)) { + header.style.cursor = 'pointer'; + + const indicatorSpan = document.createElement('span'); + indicatorSpan.classList.add('sort-indicator'); + indicatorSpan.style.paddingLeft = '5px'; + + // Determine sort indicator and initial visibility + let indicator = '●'; // Default: unsorted (dot) + const sortIndex = getSortIndex(columnName); + + if (sortIndex !== -1) { + const isAscending = currentSortContext[sortIndex].ascending; + indicator = isAscending ? '▲' : '▼'; + indicatorSpan.style.visibility = 'visible'; // Sorted arrows always visible + } else { + indicatorSpan.style.visibility = 'hidden'; + } + indicatorSpan.textContent = indicator; + + const existingIndicator = headerDiv.querySelector('.sort-indicator'); + if (existingIndicator) { + headerDiv.removeChild(existingIndicator); + } + headerDiv.appendChild(indicatorSpan); + + header.addEventListener('mouseover', () => { + if (getSortIndex(columnName) === -1) { + indicatorSpan.style.visibility = 'visible'; + } + }); + header.addEventListener('mouseout', () => { + if (getSortIndex(columnName) === -1) { + indicatorSpan.style.visibility = 'hidden'; + } + }); + + // Add click handler for three-state toggle + header.addEventListener(Event.CLICK, (event) => { + const sortIndex = getSortIndex(columnName); + let newContext = [...currentSortContext]; + + if (event.shiftKey) { + if (sortIndex !== -1) { + // Already sorted. Toggle or Remove. + if (newContext[sortIndex].ascending) { + // Asc -> Desc + // Clone object to avoid mutation issues + newContext[sortIndex] = { + ...newContext[sortIndex], + ascending: false, + }; + } else { + // Desc -> Remove + newContext.splice(sortIndex, 1); + } + } else { + // Not sorted -> Append Asc + newContext.push({ column: columnName, ascending: true }); + } + } else { + // No shift key. Single column mode. + if (sortIndex !== -1 && newContext.length === 1) { + // Already only this column. Toggle or Remove. + if (newContext[sortIndex].ascending) { + newContext[sortIndex] = { + ...newContext[sortIndex], + ascending: false, + }; + } else { + newContext = []; + } + } else { + // Start fresh with this column + newContext = [{ column: columnName, ascending: true }]; + } + } + + model.set(ModelProperty.SORT_CONTEXT, newContext); + model.save_changes(); + }); + } + }); + + updateButtonStates(); + } + + function handleErrorMessageChange() { + const errorMsg = model.get(ModelProperty.ERROR_MESSAGE); + if (errorMsg) { + errorContainer.textContent = errorMsg; + errorContainer.style.display = 'block'; + } else { + errorContainer.style.display = 'none'; + } + } + + prevPage.addEventListener(Event.CLICK, () => handlePageChange(-1)); + nextPage.addEventListener(Event.CLICK, () => handlePageChange(1)); + pageSizeInput.addEventListener(Event.CHANGE, (e) => { + const newSize = Number(e.target.value); + if (newSize) { + handlePageSizeChange(newSize); + } + }); + + model.on(Event.CHANGE_TABLE_HTML, handleTableHTMLChange); + model.on(`change:${ModelProperty.ROW_COUNT}`, updateButtonStates); + model.on(`change:${ModelProperty.ERROR_MESSAGE}`, handleErrorMessageChange); + model.on(`change:_initial_load_complete`, (val) => { + if (val) updateButtonStates(); + }); + model.on(`change:${ModelProperty.PAGE}`, updateButtonStates); + + paginationContainer.appendChild(prevPage); + paginationContainer.appendChild(pageIndicator); + paginationContainer.appendChild(nextPage); + pageSizeContainer.appendChild(pageSizeLabel); + pageSizeContainer.appendChild(pageSizeInput); + footer.appendChild(rowCountLabel); + footer.appendChild(paginationContainer); + footer.appendChild(pageSizeContainer); + + el.appendChild(errorContainer); + el.appendChild(tableContainer); + el.appendChild(footer); + + handleTableHTMLChange(); + handleErrorMessageChange(); } export default { render }; diff --git a/tests/js/table_widget.test.js b/tests/js/table_widget.test.js index b3796905e5..5843694617 100644 --- a/tests/js/table_widget.test.js +++ b/tests/js/table_widget.test.js @@ -14,283 +14,325 @@ * limitations under the License. */ -import { jest } from "@jest/globals"; -import { JSDOM } from "jsdom"; - -describe("TableWidget", () => { - let model; - let el; - let render; - - beforeEach(async () => { - jest.resetModules(); - document.body.innerHTML = "
"; - el = document.body.querySelector("div"); - - const tableWidget = ( - await import("../../bigframes/display/table_widget.js") - ).default; - render = tableWidget.render; - - model = { - get: jest.fn(), - set: jest.fn(), - save_changes: jest.fn(), - on: jest.fn(), - }; - }); - - it("should have a render function", () => { - expect(render).toBeDefined(); - }); - - describe("render", () => { - it("should create the basic structure", () => { - // Mock the initial state - model.get.mockImplementation((property) => { - if (property === "table_html") { - return ""; - } - if (property === "row_count") { - return 100; - } - if (property === "error_message") { - return null; - } - if (property === "page_size") { - return 10; - } - if (property === "page") { - return 0; - } - return null; - }); - - render({ model, el }); - - expect(el.classList.contains("bigframes-widget")).toBe(true); - expect(el.querySelector(".error-message")).not.toBeNull(); - expect(el.querySelector("div")).not.toBeNull(); - expect(el.querySelector("div:nth-child(3)")).not.toBeNull(); - }); - - it("should sort when a sortable column is clicked", () => { - // Mock the initial state - model.get.mockImplementation((property) => { - if (property === "table_html") { - return "
col1
"; - } - if (property === "orderable_columns") { - return ["col1"]; - } - if (property === "sort_context") { - return []; - } - return null; - }); - - render({ model, el }); - - // Manually trigger the table_html change handler - const tableHtmlChangeHandler = model.on.mock.calls.find( - (call) => call[0] === "change:table_html", - )[1]; - tableHtmlChangeHandler(); - - const header = el.querySelector("th"); - header.click(); - - expect(model.set).toHaveBeenCalledWith("sort_context", [ - { column: "col1", ascending: true }, - ]); - expect(model.save_changes).toHaveBeenCalled(); - }); - - it("should reverse sort direction when a sorted column is clicked", () => { - // Mock the initial state - model.get.mockImplementation((property) => { - if (property === "table_html") { - return "
col1
"; - } - if (property === "orderable_columns") { - return ["col1"]; - } - if (property === "sort_context") { - return [{ column: "col1", ascending: true }]; - } - return null; - }); - - render({ model, el }); - - // Manually trigger the table_html change handler - const tableHtmlChangeHandler = model.on.mock.calls.find( - (call) => call[0] === "change:table_html", - )[1]; - tableHtmlChangeHandler(); - - const header = el.querySelector("th"); - header.click(); - - expect(model.set).toHaveBeenCalledWith("sort_context", [ - { column: "col1", ascending: false }, - ]); - expect(model.save_changes).toHaveBeenCalled(); - }); - - it("should clear sort when a descending sorted column is clicked", () => { - // Mock the initial state - model.get.mockImplementation((property) => { - if (property === "table_html") { - return "
col1
"; - } - if (property === "orderable_columns") { - return ["col1"]; - } - if (property === "sort_context") { - return [{ column: "col1", ascending: false }]; - } - return null; - }); - - render({ model, el }); - - // Manually trigger the table_html change handler - const tableHtmlChangeHandler = model.on.mock.calls.find( - (call) => call[0] === "change:table_html", - )[1]; - tableHtmlChangeHandler(); - - const header = el.querySelector("th"); - header.click(); - - expect(model.set).toHaveBeenCalledWith("sort_context", []); - expect(model.save_changes).toHaveBeenCalled(); - }); - - it("should display the correct sort indicator", () => { - // Mock the initial state - model.get.mockImplementation((property) => { - if (property === "table_html") { - return "
col1
col2
"; - } - if (property === "orderable_columns") { - return ["col1", "col2"]; - } - if (property === "sort_context") { - return [{ column: "col1", ascending: true }]; - } - return null; - }); - - render({ model, el }); - - // Manually trigger the table_html change handler - const tableHtmlChangeHandler = model.on.mock.calls.find( - (call) => call[0] === "change:table_html", - )[1]; - tableHtmlChangeHandler(); - - const headers = el.querySelectorAll("th"); - const indicator1 = headers[0].querySelector(".sort-indicator"); - const indicator2 = headers[1].querySelector(".sort-indicator"); - - expect(indicator1.textContent).toBe("▲"); - expect(indicator2.textContent).toBe("●"); - }); - - it("should add a column to sort when Shift+Click is used", () => { - // Mock the initial state: already sorted by col1 asc - model.get.mockImplementation((property) => { - if (property === "table_html") { - return "
col1
col2
"; - } - if (property === "orderable_columns") { - return ["col1", "col2"]; - } - if (property === "sort_context") { - return [{ column: "col1", ascending: true }]; - } - return null; - }); - - render({ model, el }); - - // Manually trigger the table_html change handler - const tableHtmlChangeHandler = model.on.mock.calls.find( - (call) => call[0] === "change:table_html", - )[1]; - tableHtmlChangeHandler(); - - const headers = el.querySelectorAll("th"); - const header2 = headers[1]; // col2 - - // Simulate Shift+Click - const clickEvent = new MouseEvent("click", { - bubbles: true, - cancelable: true, - shiftKey: true, - }); - header2.dispatchEvent(clickEvent); - - expect(model.set).toHaveBeenCalledWith("sort_context", [ - { column: "col1", ascending: true }, - { column: "col2", ascending: true }, - ]); - expect(model.save_changes).toHaveBeenCalled(); - }); - }); - - it("should render the series as a table with an index and one value column", () => { - // Mock the initial state - model.get.mockImplementation((property) => { - if (property === "table_html") { - return ` -
-
- - - - - - - - - - - - - - - - - -
value
0a
1b
-
-
`; - } - if (property === "orderable_columns") { - return []; - } - return null; - }); - - render({ model, el }); - - // Manually trigger the table_html change handler - const tableHtmlChangeHandler = model.on.mock.calls.find( - (call) => call[0] === "change:table_html", - )[1]; - tableHtmlChangeHandler(); - - // Check that the table has two columns - const headers = el.querySelectorAll( - ".paginated-table-container .col-header-name", - ); - expect(headers).toHaveLength(2); - - // Check that the headers are an empty string (for the index) and "value" - expect(headers[0].textContent).toBe(""); - expect(headers[1].textContent).toBe("value"); - }); +import { jest } from '@jest/globals'; + +describe('TableWidget', () => { + let model; + let el; + let render; + + beforeEach(async () => { + jest.resetModules(); + document.body.innerHTML = '
'; + el = document.body.querySelector('div'); + + const tableWidget = ( + await import('../../bigframes/display/table_widget.js') + ).default; + render = tableWidget.render; + + model = { + get: jest.fn(), + set: jest.fn(), + save_changes: jest.fn(), + on: jest.fn(), + }; + }); + + it('should have a render function', () => { + expect(render).toBeDefined(); + }); + + describe('render', () => { + it('should create the basic structure', () => { + // Mock the initial state + model.get.mockImplementation((property) => { + if (property === 'table_html') { + return ''; + } + if (property === 'row_count') { + return 100; + } + if (property === 'error_message') { + return null; + } + if (property === 'page_size') { + return 10; + } + if (property === 'page') { + return 0; + } + return null; + }); + + render({ model, el }); + + expect(el.classList.contains('bigframes-widget')).toBe(true); + expect(el.querySelector('.error-message')).not.toBeNull(); + expect(el.querySelector('div')).not.toBeNull(); + expect(el.querySelector('div:nth-child(3)')).not.toBeNull(); + }); + + it('should sort when a sortable column is clicked', () => { + // Mock the initial state + model.get.mockImplementation((property) => { + if (property === 'table_html') { + return '
col1
'; + } + if (property === 'orderable_columns') { + return ['col1']; + } + if (property === 'sort_context') { + return []; + } + return null; + }); + + render({ model, el }); + + // Manually trigger the table_html change handler + const tableHtmlChangeHandler = model.on.mock.calls.find( + (call) => call[0] === 'change:table_html', + )[1]; + tableHtmlChangeHandler(); + + const header = el.querySelector('th'); + header.click(); + + expect(model.set).toHaveBeenCalledWith('sort_context', [ + { column: 'col1', ascending: true }, + ]); + expect(model.save_changes).toHaveBeenCalled(); + }); + + it('should reverse sort direction when a sorted column is clicked', () => { + // Mock the initial state + model.get.mockImplementation((property) => { + if (property === 'table_html') { + return '
col1
'; + } + if (property === 'orderable_columns') { + return ['col1']; + } + if (property === 'sort_context') { + return [{ column: 'col1', ascending: true }]; + } + return null; + }); + + render({ model, el }); + + // Manually trigger the table_html change handler + const tableHtmlChangeHandler = model.on.mock.calls.find( + (call) => call[0] === 'change:table_html', + )[1]; + tableHtmlChangeHandler(); + + const header = el.querySelector('th'); + header.click(); + + expect(model.set).toHaveBeenCalledWith('sort_context', [ + { column: 'col1', ascending: false }, + ]); + expect(model.save_changes).toHaveBeenCalled(); + }); + + it('should clear sort when a descending sorted column is clicked', () => { + // Mock the initial state + model.get.mockImplementation((property) => { + if (property === 'table_html') { + return '
col1
'; + } + if (property === 'orderable_columns') { + return ['col1']; + } + if (property === 'sort_context') { + return [{ column: 'col1', ascending: false }]; + } + return null; + }); + + render({ model, el }); + + // Manually trigger the table_html change handler + const tableHtmlChangeHandler = model.on.mock.calls.find( + (call) => call[0] === 'change:table_html', + )[1]; + tableHtmlChangeHandler(); + + const header = el.querySelector('th'); + header.click(); + + expect(model.set).toHaveBeenCalledWith('sort_context', []); + expect(model.save_changes).toHaveBeenCalled(); + }); + + it('should display the correct sort indicator', () => { + // Mock the initial state + model.get.mockImplementation((property) => { + if (property === 'table_html') { + return '
col1
col2
'; + } + if (property === 'orderable_columns') { + return ['col1', 'col2']; + } + if (property === 'sort_context') { + return [{ column: 'col1', ascending: true }]; + } + return null; + }); + + render({ model, el }); + + // Manually trigger the table_html change handler + const tableHtmlChangeHandler = model.on.mock.calls.find( + (call) => call[0] === 'change:table_html', + )[1]; + tableHtmlChangeHandler(); + + const headers = el.querySelectorAll('th'); + const indicator1 = headers[0].querySelector('.sort-indicator'); + const indicator2 = headers[1].querySelector('.sort-indicator'); + + expect(indicator1.textContent).toBe('▲'); + expect(indicator2.textContent).toBe('●'); + }); + + it('should add a column to sort when Shift+Click is used', () => { + // Mock the initial state: already sorted by col1 asc + model.get.mockImplementation((property) => { + if (property === 'table_html') { + return '
col1
col2
'; + } + if (property === 'orderable_columns') { + return ['col1', 'col2']; + } + if (property === 'sort_context') { + return [{ column: 'col1', ascending: true }]; + } + return null; + }); + + render({ model, el }); + + // Manually trigger the table_html change handler + const tableHtmlChangeHandler = model.on.mock.calls.find( + (call) => call[0] === 'change:table_html', + )[1]; + tableHtmlChangeHandler(); + + const headers = el.querySelectorAll('th'); + const header2 = headers[1]; // col2 + + // Simulate Shift+Click + const clickEvent = new MouseEvent('click', { + bubbles: true, + cancelable: true, + shiftKey: true, + }); + header2.dispatchEvent(clickEvent); + + expect(model.set).toHaveBeenCalledWith('sort_context', [ + { column: 'col1', ascending: true }, + { column: 'col2', ascending: true }, + ]); + expect(model.save_changes).toHaveBeenCalled(); + }); + }); + + describe('Theme detection', () => { + beforeEach(() => { + jest.useFakeTimers(); + // Mock the initial state for theme detection tests + model.get.mockImplementation((property) => { + if (property === 'table_html') { + return ''; + } + if (property === 'row_count') { + return 100; + } + if (property === 'error_message') { + return null; + } + if (property === 'page_size') { + return 10; + } + if (property === 'page') { + return 0; + } + return null; + }); + }); + + afterEach(() => { + jest.useRealTimers(); + document.body.classList.remove('vscode-dark'); + }); + + it('should add bigframes-dark-mode class in dark mode', () => { + document.body.classList.add('vscode-dark'); + render({ model, el }); + jest.runAllTimers(); + expect(el.classList.contains('bigframes-dark-mode')).toBe(true); + }); + + it('should not add bigframes-dark-mode class in light mode', () => { + render({ model, el }); + jest.runAllTimers(); + expect(el.classList.contains('bigframes-dark-mode')).toBe(false); + }); + }); + + it('should render the series as a table with an index and one value column', () => { + // Mock the initial state + model.get.mockImplementation((property) => { + if (property === 'table_html') { + return ` +
+
+ + + + + + + + + + + + + + + + + +
value
0a
1b
+
+
`; + } + if (property === 'orderable_columns') { + return []; + } + return null; + }); + + render({ model, el }); + + // Manually trigger the table_html change handler + const tableHtmlChangeHandler = model.on.mock.calls.find( + (call) => call[0] === 'change:table_html', + )[1]; + tableHtmlChangeHandler(); + + // Check that the table has two columns + const headers = el.querySelectorAll( + '.paginated-table-container .col-header-name', + ); + expect(headers).toHaveLength(2); + + // Check that the headers are an empty string (for the index) and "value" + expect(headers[0].textContent).toBe(''); + expect(headers[1].textContent).toBe('value'); + }); }); diff --git a/tests/unit/display/test_anywidget.py b/tests/unit/display/test_anywidget.py index a635697e20..252ba8100e 100644 --- a/tests/unit/display/test_anywidget.py +++ b/tests/unit/display/test_anywidget.py @@ -80,8 +80,31 @@ def handler(signum, frame): signal.alarm(0) +def test_css_contains_dark_mode_selectors(): + """Test that the CSS for dark mode is loaded with all required selectors.""" + from bigframes.display.anywidget import TableWidget + + mock_df = mock.create_autospec(bigframes.dataframe.DataFrame, instance=True) + # mock_df.columns and mock_df.dtypes are needed for __init__ + mock_df.columns = ["col1"] + mock_df.dtypes = {"col1": "object"} + + # Mock _block to avoid AttributeError during _set_table_html + mock_block = mock.Mock() + mock_block.has_index = False + mock_df._block = mock_block + + with mock.patch.object(TableWidget, "_initial_load"): + widget = TableWidget(mock_df) + css = widget._css + assert "@media (prefers-color-scheme: dark)" in css + assert 'html[theme="dark"]' in css + assert 'body[data-theme="dark"]' in css + + @pytest.fixture def mock_df(): + """A mock DataFrame that can be used in multiple tests.""" df = mock.create_autospec(bigframes.dataframe.DataFrame, instance=True) df.columns = ["col1", "col2"] df.dtypes = {"col1": "int64", "col2": "int64"} @@ -104,6 +127,7 @@ def mock_df(): def test_sorting_single_column(mock_df): + """Test that the widget can be sorted by a single column.""" from bigframes.display.anywidget import TableWidget with bigframes.option_context("display.repr_mode", "anywidget"): @@ -122,6 +146,7 @@ def test_sorting_single_column(mock_df): def test_sorting_multi_column(mock_df): + """Test that the widget can be sorted by multiple columns.""" from bigframes.display.anywidget import TableWidget with bigframes.option_context("display.repr_mode", "anywidget"): @@ -137,6 +162,7 @@ def test_sorting_multi_column(mock_df): def test_page_size_change_resets_sort(mock_df): + """Test that changing the page size resets the sorting.""" from bigframes.display.anywidget import TableWidget with bigframes.option_context("display.repr_mode", "anywidget"): From cc994f3a7a9c04efe4e1f2beb78e33256f3a1e30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Fri, 9 Jan 2026 14:12:24 -0600 Subject: [PATCH 07/28] chore: remove language from sitemap paths (#2373) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 --- docs/conf.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/conf.py b/docs/conf.py index 22868aab67..9883467edf 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -267,6 +267,12 @@ # https://sphinx-sitemap.readthedocs.io/en/latest/getting-started.html#usage html_baseurl = "https://dataframes.bigquery.dev/" +sitemap_locales = [None] + +# We don't have any immediate plans to translate the API reference, so omit the +# language from the URLs. +# https://sphinx-sitemap.readthedocs.io/en/latest/advanced-configuration.html#configuration-customizing-url-scheme +sitemap_url_scheme = "{link}" # -- Options for warnings ------------------------------------------------------ From b023cb0a7c9f1db7bd9db5f3f87f9f642c7a2950 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Fri, 9 Jan 2026 18:01:37 -0800 Subject: [PATCH 08/28] refactor: fix window and numeric/comparison ops for sqlglot compiler (#2372) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change can resolve `test_series_int_int_operators_scalar` and all tests in test_windows.py presubmit failures in #2248. Fixes internal issue 417774347 🦕 --- .../compile/sqlglot/aggregations/windows.py | 25 +++++++++++++++++++ bigframes/core/compile/sqlglot/compiler.py | 3 +++ .../sqlglot/expressions/comparison_ops.py | 12 +++++++++ .../sqlglot/expressions/numeric_ops.py | 12 +++++++++ .../sqlglot/aggregations/test_windows.py | 4 +-- .../out.sql | 22 ++++++++++------ .../out.sql | 11 +++++--- .../out.sql | 5 +++- 8 files changed, 79 insertions(+), 15 deletions(-) diff --git a/bigframes/core/compile/sqlglot/aggregations/windows.py b/bigframes/core/compile/sqlglot/aggregations/windows.py index 6d6c507455..9c32788585 100644 --- a/bigframes/core/compile/sqlglot/aggregations/windows.py +++ b/bigframes/core/compile/sqlglot/aggregations/windows.py @@ -44,6 +44,7 @@ def apply_window_if_present( order_by = None elif window.is_range_bounded: order_by = get_window_order_by((window.ordering[0],)) + order_by = remove_null_ordering_for_range_windows(order_by) else: order_by = get_window_order_by(window.ordering) @@ -150,6 +151,30 @@ def get_window_order_by( return tuple(order_by) +def remove_null_ordering_for_range_windows( + order_by: typing.Optional[tuple[sge.Ordered, ...]], +) -> typing.Optional[tuple[sge.Ordered, ...]]: + """Removes NULL FIRST/LAST from ORDER BY expressions in RANGE windows. + Here's the support matrix: + ✅ sum(x) over (order by y desc nulls last) + 🚫 sum(x) over (order by y asc nulls last) + ✅ sum(x) over (order by y asc nulls first) + 🚫 sum(x) over (order by y desc nulls first) + """ + if order_by is None: + return None + + new_order_by = [] + for key in order_by: + kargs = key.args + if kargs.get("desc") is True and kargs.get("nulls_first", False): + kargs["nulls_first"] = False + elif kargs.get("desc") is False and not kargs.setdefault("nulls_first", True): + kargs["nulls_first"] = True + new_order_by.append(sge.Ordered(**kargs)) + return tuple(new_order_by) + + def _get_window_bounds( value, is_preceding: bool ) -> tuple[typing.Union[str, sge.Expression], typing.Optional[str]]: diff --git a/bigframes/core/compile/sqlglot/compiler.py b/bigframes/core/compile/sqlglot/compiler.py index b3b813a1c0..e77370892c 100644 --- a/bigframes/core/compile/sqlglot/compiler.py +++ b/bigframes/core/compile/sqlglot/compiler.py @@ -356,6 +356,9 @@ def compile_window(node: nodes.WindowOpNode, child: ir.SQLGlotIR) -> ir.SQLGlotI observation_count = windows.apply_window_if_present( sge.func("SUM", is_observation), window_spec ) + observation_count = sge.func( + "COALESCE", observation_count, sge.convert(0) + ) else: # Operations like count treat even NULLs as valid observations # for the sake of min_periods notnull is just used to convert diff --git a/bigframes/core/compile/sqlglot/expressions/comparison_ops.py b/bigframes/core/compile/sqlglot/expressions/comparison_ops.py index 81bc9e0f56..8fda3b80dd 100644 --- a/bigframes/core/compile/sqlglot/expressions/comparison_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/comparison_ops.py @@ -89,6 +89,9 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: @register_binary_op(ops.ge_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + if left.expr == sge.null() or right.expr == sge.null(): + return sge.null() + left_expr = _coerce_bool_to_int(left) right_expr = _coerce_bool_to_int(right) return sge.GTE(this=left_expr, expression=right_expr) @@ -96,6 +99,9 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: @register_binary_op(ops.gt_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + if left.expr == sge.null() or right.expr == sge.null(): + return sge.null() + left_expr = _coerce_bool_to_int(left) right_expr = _coerce_bool_to_int(right) return sge.GT(this=left_expr, expression=right_expr) @@ -103,6 +109,9 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: @register_binary_op(ops.lt_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + if left.expr == sge.null() or right.expr == sge.null(): + return sge.null() + left_expr = _coerce_bool_to_int(left) right_expr = _coerce_bool_to_int(right) return sge.LT(this=left_expr, expression=right_expr) @@ -110,6 +119,9 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: @register_binary_op(ops.le_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + if left.expr == sge.null() or right.expr == sge.null(): + return sge.null() + left_expr = _coerce_bool_to_int(left) right_expr = _coerce_bool_to_int(right) return sge.LTE(this=left_expr, expression=right_expr) diff --git a/bigframes/core/compile/sqlglot/expressions/numeric_ops.py b/bigframes/core/compile/sqlglot/expressions/numeric_ops.py index 16f7dec717..f7c763e207 100644 --- a/bigframes/core/compile/sqlglot/expressions/numeric_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/numeric_ops.py @@ -388,6 +388,9 @@ def _(expr: TypedExpr) -> sge.Expression: @register_binary_op(ops.add_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + if left.expr == sge.null() or right.expr == sge.null(): + return sge.null() + if left.dtype == dtypes.STRING_DTYPE and right.dtype == dtypes.STRING_DTYPE: # String addition return sge.Concat(expressions=[left.expr, right.expr]) @@ -442,6 +445,9 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: @register_binary_op(ops.floordiv_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + if left.expr == sge.null() or right.expr == sge.null(): + return sge.null() + left_expr = _coerce_bool_to_int(left) right_expr = _coerce_bool_to_int(right) @@ -525,6 +531,9 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: @register_binary_op(ops.mul_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + if left.expr == sge.null() or right.expr == sge.null(): + return sge.null() + left_expr = _coerce_bool_to_int(left) right_expr = _coerce_bool_to_int(right) @@ -548,6 +557,9 @@ def _(expr: TypedExpr, n_digits: TypedExpr) -> sge.Expression: @register_binary_op(ops.sub_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + if left.expr == sge.null() or right.expr == sge.null(): + return sge.null() + if dtypes.is_numeric(left.dtype) and dtypes.is_numeric(right.dtype): left_expr = _coerce_bool_to_int(left) right_expr = _coerce_bool_to_int(right) diff --git a/tests/unit/core/compile/sqlglot/aggregations/test_windows.py b/tests/unit/core/compile/sqlglot/aggregations/test_windows.py index e6343a63d7..d1204c6010 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/test_windows.py +++ b/tests/unit/core/compile/sqlglot/aggregations/test_windows.py @@ -127,7 +127,7 @@ def test_apply_window_if_present_range_bounded(self): ) self.assertEqual( result.sql(dialect="bigquery"), - "value OVER (ORDER BY `col1` ASC NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)", + "value OVER (ORDER BY `col1` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)", ) def test_apply_window_if_present_range_bounded_timedelta(self): @@ -142,7 +142,7 @@ def test_apply_window_if_present_range_bounded_timedelta(self): ) self.assertEqual( result.sql(dialect="bigquery"), - "value OVER (ORDER BY `col1` ASC NULLS LAST RANGE BETWEEN 86400000000 PRECEDING AND 43200000000 FOLLOWING)", + "value OVER (ORDER BY `col1` ASC RANGE BETWEEN 86400000000 PRECEDING AND 43200000000 FOLLOWING)", ) def test_apply_window_if_present_all_params(self): diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_groupby_rolling/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_groupby_rolling/out.sql index e8fabd1129..0dca6d9d49 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_groupby_rolling/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_groupby_rolling/out.sql @@ -22,10 +22,13 @@ WITH `bfcte_0` AS ( SELECT *, CASE - WHEN SUM(CAST(NOT `bfcol_7` IS NULL AS INT64)) OVER ( - PARTITION BY `bfcol_9` - ORDER BY `bfcol_9` ASC NULLS LAST, `rowindex` ASC NULLS LAST - ROWS BETWEEN 3 PRECEDING AND CURRENT ROW + WHEN COALESCE( + SUM(CAST(NOT `bfcol_7` IS NULL AS INT64)) OVER ( + PARTITION BY `bfcol_9` + ORDER BY `bfcol_9` ASC NULLS LAST, `rowindex` ASC NULLS LAST + ROWS BETWEEN 3 PRECEDING AND CURRENT ROW + ), + 0 ) < 3 THEN NULL ELSE COALESCE( @@ -42,10 +45,13 @@ WITH `bfcte_0` AS ( SELECT *, CASE - WHEN SUM(CAST(NOT `bfcol_8` IS NULL AS INT64)) OVER ( - PARTITION BY `bfcol_9` - ORDER BY `bfcol_9` ASC NULLS LAST, `rowindex` ASC NULLS LAST - ROWS BETWEEN 3 PRECEDING AND CURRENT ROW + WHEN COALESCE( + SUM(CAST(NOT `bfcol_8` IS NULL AS INT64)) OVER ( + PARTITION BY `bfcol_9` + ORDER BY `bfcol_9` ASC NULLS LAST, `rowindex` ASC NULLS LAST + ROWS BETWEEN 3 PRECEDING AND CURRENT ROW + ), + 0 ) < 3 THEN NULL ELSE COALESCE( diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_range_rolling/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_range_rolling/out.sql index 581c81c6b4..fe4cea08cb 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_range_rolling/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_range_rolling/out.sql @@ -6,14 +6,17 @@ WITH `bfcte_0` AS ( SELECT *, CASE - WHEN SUM(CAST(NOT `bfcol_1` IS NULL AS INT64)) OVER ( - ORDER BY UNIX_MICROS(`bfcol_0`) ASC NULLS LAST - RANGE BETWEEN 2999999 PRECEDING AND CURRENT ROW + WHEN COALESCE( + SUM(CAST(NOT `bfcol_1` IS NULL AS INT64)) OVER ( + ORDER BY UNIX_MICROS(`bfcol_0`) ASC + RANGE BETWEEN 2999999 PRECEDING AND CURRENT ROW + ), + 0 ) < 1 THEN NULL ELSE COALESCE( SUM(`bfcol_1`) OVER ( - ORDER BY UNIX_MICROS(`bfcol_0`) ASC NULLS LAST + ORDER BY UNIX_MICROS(`bfcol_0`) ASC RANGE BETWEEN 2999999 PRECEDING AND CURRENT ROW ), 0 diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_skips_nulls_op/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_skips_nulls_op/out.sql index 788eb49ddf..bf1e76c55c 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_skips_nulls_op/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_skips_nulls_op/out.sql @@ -7,7 +7,10 @@ WITH `bfcte_0` AS ( SELECT *, CASE - WHEN SUM(CAST(NOT `int64_col` IS NULL AS INT64)) OVER (ORDER BY `rowindex` ASC NULLS LAST ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) < 3 + WHEN COALESCE( + SUM(CAST(NOT `int64_col` IS NULL AS INT64)) OVER (ORDER BY `rowindex` ASC NULLS LAST ROWS BETWEEN 2 PRECEDING AND CURRENT ROW), + 0 + ) < 3 THEN NULL ELSE COALESCE( SUM(`int64_col`) OVER (ORDER BY `rowindex` ASC NULLS LAST ROWS BETWEEN 2 PRECEDING AND CURRENT ROW), From dbe8e7eb9b497ea15c6b473923b61c56c003de7a Mon Sep 17 00:00:00 2001 From: jialuoo Date: Mon, 12 Jan 2026 19:00:44 +0000 Subject: [PATCH 09/28] chore: Migrate BinaryRemoteFunctionOp operator to SQLGlot (#2371) Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes b/452130300 --- .../sqlglot/expressions/generic_ops.py | 13 +++++++ .../test_binary_remote_function_op/out.sql | 14 +++++++ .../sqlglot/expressions/test_generic_ops.py | 37 +++++++++++++++++++ 3 files changed, 64 insertions(+) create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_binary_remote_function_op/out.sql diff --git a/bigframes/core/compile/sqlglot/expressions/generic_ops.py b/bigframes/core/compile/sqlglot/expressions/generic_ops.py index 27973ef8b5..4a2a5fb213 100644 --- a/bigframes/core/compile/sqlglot/expressions/generic_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/generic_ops.py @@ -140,6 +140,19 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: return sge.Coalesce(this=left.expr, expressions=[right.expr]) +@register_binary_op(ops.BinaryRemoteFunctionOp, pass_op=True) +def _( + left: TypedExpr, right: TypedExpr, op: ops.BinaryRemoteFunctionOp +) -> sge.Expression: + routine_ref = op.function_def.routine_ref + # Quote project, dataset, and routine IDs to avoid keyword clashes. + func_name = ( + f"`{routine_ref.project}`.`{routine_ref.dataset_id}`.`{routine_ref.routine_id}`" + ) + + return sge.func(func_name, left.expr, right.expr) + + @register_nary_op(ops.case_when_op) def _(*cases_and_outputs: TypedExpr) -> sge.Expression: # Need to upcast BOOL to INT if any output is numeric diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_binary_remote_function_op/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_binary_remote_function_op/out.sql new file mode 100644 index 0000000000..7272a3a5be --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_binary_remote_function_op/out.sql @@ -0,0 +1,14 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col`, + `int64_col` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + `my_project`.`my_dataset`.`my_routine`(`int64_col`, `float64_col`) AS `bfcol_2` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `int64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py index 11daf6813a..5657874eb5 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py @@ -168,6 +168,43 @@ def test_astype_json_invalid( ) +def test_binary_remote_function_op(scalar_types_df: bpd.DataFrame, snapshot): + from google.cloud import bigquery + + from bigframes.functions import udf_def + + bf_df = scalar_types_df[["int64_col", "float64_col"]] + op = ops.BinaryRemoteFunctionOp( + function_def=udf_def.BigqueryUdf( + routine_ref=bigquery.RoutineReference.from_string( + "my_project.my_dataset.my_routine" + ), + signature=udf_def.UdfSignature( + input_types=( + udf_def.UdfField( + "x", + bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 + ), + ), + udf_def.UdfField( + "y", + bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.FLOAT64 + ), + ), + ), + output_bq_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.FLOAT64 + ), + ), + ) + ) + sql = utils._apply_binary_op(bf_df, op, "int64_col", "float64_col") + + snapshot.assert_match(sql, "out.sql") + + def test_case_when_op(scalar_types_df: bpd.DataFrame, snapshot): ops_map = { "single_case": ops.case_when_op.as_expr( From 34b5975f6911c5aa5ffc64a2fe6967a9f3d86f78 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Mon, 12 Jan 2026 13:47:19 -0800 Subject: [PATCH 10/28] feat: Add max_columns control for anywidget mode (#2374) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR introduces a max_columns configuration for anywidget mode, giving users control over how many columns are rendered in the interactive table. This is particularly useful for improving readability and performance when working with wide DataFrames. Key Changes: * Configurable Column Limit: Users can now set bigframes.options.display.max_columns to limit the number of displayed columns. * Interactive Control: The TableWidget now includes a "Max columns" dropdown in the footer, allowing users to dynamically adjust this setting (options: 3, 5, 10, 15, 20, All). * Smart Truncation: When columns exceed the limit, the table displays the first N/2 and last N/2 columns, separated by an ellipsis (...) column. * Default Value: The default max_columns is set to 7 to provide a balanced view on standard screens without requiring horizontal scrolling. Example Usage: ``` 1 import bigframes.pandas as bpd 2 3 # Set global option 4 bpd.options.display.max_columns = 10 5 6 # Or use context manager 7 with bpd.option_context("display.max_columns", 5): 8 display(df) ``` verified at: * vs code notebook: http://screencast/cast/NTE2MDM4NTkxNjE3NDMzNnw2ZGI5YjAxOS1jMw * colab notebook: screen/A8CBDFHyoJTzkAu Fixes #<452681068> 🦕 --- bigframes/display/anywidget.py | 20 +++ bigframes/display/html.py | 72 ++++++++++- bigframes/display/table_widget.css | 15 ++- bigframes/display/table_widget.js | 47 ++++++- notebooks/dataframes/anywidget_mode.ipynb | 8 +- tests/js/table_widget.test.js | 145 ++++++++++++++++++++++ tests/unit/display/test_html.py | 37 ++++++ 7 files changed, 332 insertions(+), 12 deletions(-) diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py index 6a16a9f762..be0d2b45d0 100644 --- a/bigframes/display/anywidget.py +++ b/bigframes/display/anywidget.py @@ -66,6 +66,7 @@ class TableWidget(_WIDGET_BASE): page = traitlets.Int(0).tag(sync=True) page_size = traitlets.Int(0).tag(sync=True) + max_columns = traitlets.Int(allow_none=True, default_value=None).tag(sync=True) row_count = traitlets.Int(allow_none=True, default_value=None).tag(sync=True) table_html = traitlets.Unicode("").tag(sync=True) sort_context = traitlets.List(traitlets.Dict(), default_value=[]).tag(sync=True) @@ -103,10 +104,13 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame): # respect display options for initial page size initial_page_size = bigframes.options.display.max_rows + initial_max_columns = bigframes.options.display.max_columns # set traitlets properties that trigger observers # TODO(b/462525985): Investigate and improve TableWidget UX for DataFrames with a large number of columns. self.page_size = initial_page_size + self.max_columns = initial_max_columns + # TODO(b/469861913): Nested columns from structs (e.g., 'struct_col.name') are not currently sortable. # TODO(b/463754889): Support non-string column labels for sorting. if all(isinstance(col, str) for col in dataframe.columns): @@ -218,6 +222,14 @@ def _validate_page_size(self, proposal: dict[str, Any]) -> int: max_page_size = 1000 return min(value, max_page_size) + @traitlets.validate("max_columns") + def _validate_max_columns(self, proposal: dict[str, Any]) -> int: + """Validate max columns to ensure it's positive or 0 (for all).""" + value = proposal["value"] + if value is None: + return 0 # Normalize None to 0 for traitlet + return max(0, value) + def _get_next_batch(self) -> bool: """ Gets the next batch of data from the generator and appends to cache. @@ -348,6 +360,7 @@ def _set_table_html(self) -> None: dataframe=page_data, table_id=f"table-{self._table_id}", orderable_columns=self.orderable_columns, + max_columns=self.max_columns, ) if new_page is not None: @@ -382,3 +395,10 @@ def _page_size_changed(self, _change: dict[str, Any]) -> None: # Update the table display self._set_table_html() + + @traitlets.observe("max_columns") + def _max_columns_changed(self, _change: dict[str, Any]) -> None: + """Handler for when max columns is changed from the frontend.""" + if not self._initial_load_complete: + return + self._set_table_html() diff --git a/bigframes/display/html.py b/bigframes/display/html.py index 912f1d7e3a..6102d1512c 100644 --- a/bigframes/display/html.py +++ b/bigframes/display/html.py @@ -46,21 +46,51 @@ def render_html( dataframe: pd.DataFrame, table_id: str, orderable_columns: list[str] | None = None, + max_columns: int | None = None, ) -> str: """Render a pandas DataFrame to HTML with specific styling.""" orderable_columns = orderable_columns or [] classes = "dataframe table table-striped table-hover" table_html_parts = [f''] - table_html_parts.append(_render_table_header(dataframe, orderable_columns)) - table_html_parts.append(_render_table_body(dataframe)) + + # Handle column truncation + columns = list(dataframe.columns) + if max_columns is not None and max_columns > 0 and len(columns) > max_columns: + half = max_columns // 2 + left_columns = columns[:half] + # Ensure we don't take more than available if half is 0 or calculation is weird, + # but typical case is safe. + right_count = max_columns - half + right_columns = columns[-right_count:] if right_count > 0 else [] + show_ellipsis = True + else: + left_columns = columns + right_columns = [] + show_ellipsis = False + + table_html_parts.append( + _render_table_header( + dataframe, orderable_columns, left_columns, right_columns, show_ellipsis + ) + ) + table_html_parts.append( + _render_table_body(dataframe, left_columns, right_columns, show_ellipsis) + ) table_html_parts.append("
") return "".join(table_html_parts) -def _render_table_header(dataframe: pd.DataFrame, orderable_columns: list[str]) -> str: +def _render_table_header( + dataframe: pd.DataFrame, + orderable_columns: list[str], + left_columns: list[Any], + right_columns: list[Any], + show_ellipsis: bool, +) -> str: """Render the header of the HTML table.""" header_parts = [" ", " "] - for col in dataframe.columns: + + def render_col_header(col): th_classes = [] if col in orderable_columns: th_classes.append("sortable") @@ -69,11 +99,28 @@ def _render_table_header(dataframe: pd.DataFrame, orderable_columns: list[str]) f'
' f"{html.escape(str(col))}
" ) + + for col in left_columns: + render_col_header(col) + + if show_ellipsis: + header_parts.append( + '
...
' + ) + + for col in right_columns: + render_col_header(col) + header_parts.extend([" ", " "]) return "\n".join(header_parts) -def _render_table_body(dataframe: pd.DataFrame) -> str: +def _render_table_body( + dataframe: pd.DataFrame, + left_columns: list[Any], + right_columns: list[Any], + show_ellipsis: bool, +) -> str: """Render the body of the HTML table.""" body_parts = [" "] precision = options.display.precision @@ -81,7 +128,9 @@ def _render_table_body(dataframe: pd.DataFrame) -> str: for i in range(len(dataframe)): body_parts.append(" ") row = dataframe.iloc[i] - for col_name, value in row.items(): + + def render_col_cell(col_name): + value = row[col_name] dtype = dataframe.dtypes.loc[col_name] # type: ignore align = "right" if _is_dtype_numeric(dtype) else "left" @@ -101,6 +150,17 @@ def _render_table_body(dataframe: pd.DataFrame) -> str: f' ' f"{html.escape(cell_content)}" ) + + for col in left_columns: + render_col_cell(col) + + if show_ellipsis: + # Ellipsis cell + body_parts.append(' ...') + + for col in right_columns: + render_col_cell(col) + body_parts.append(" ") body_parts.append(" ") return "\n".join(body_parts) diff --git a/bigframes/display/table_widget.css b/bigframes/display/table_widget.css index b02caa004e..da0a701d69 100644 --- a/bigframes/display/table_widget.css +++ b/bigframes/display/table_widget.css @@ -117,15 +117,24 @@ body[data-theme='dark'] .bigframes-widget.bigframes-widget { margin: 0 8px; } -.bigframes-widget .page-size { +.bigframes-widget .settings { align-items: center; display: flex; flex-direction: row; - gap: 4px; + gap: 16px; justify-content: end; } -.bigframes-widget .page-size label { +.bigframes-widget .page-size, +.bigframes-widget .max-columns { + align-items: center; + display: flex; + flex-direction: row; + gap: 4px; +} + +.bigframes-widget .page-size label, +.bigframes-widget .max-columns label { margin-right: 8px; } diff --git a/bigframes/display/table_widget.js b/bigframes/display/table_widget.js index 40a027a8bc..6beaf47c21 100644 --- a/bigframes/display/table_widget.js +++ b/bigframes/display/table_widget.js @@ -22,6 +22,7 @@ const ModelProperty = { ROW_COUNT: 'row_count', SORT_CONTEXT: 'sort_context', TABLE_HTML: 'table_html', + MAX_COLUMNS: 'max_columns', }; const Event = { @@ -71,6 +72,10 @@ function render({ model, el }) { attributeFilter: ['class', 'data-theme', 'data-vscode-theme-kind'], }); + // Settings controls container + const settingsContainer = document.createElement('div'); + settingsContainer.classList.add('settings'); + // Pagination controls const paginationContainer = document.createElement('div'); paginationContainer.classList.add('pagination'); @@ -102,6 +107,32 @@ function render({ model, el }) { pageSizeInput.appendChild(option); } + // Max columns controls + const maxColumnsContainer = document.createElement('div'); + maxColumnsContainer.classList.add('max-columns'); + const maxColumnsLabel = document.createElement('label'); + const maxColumnsInput = document.createElement('select'); + + maxColumnsLabel.textContent = 'Max columns:'; + + // 0 represents "All" (all columns) + const maxColumnOptions = [5, 10, 15, 20, 0]; + for (const cols of maxColumnOptions) { + const option = document.createElement('option'); + option.value = cols; + option.textContent = cols === 0 ? 'All' : cols; + + const currentMax = model.get(ModelProperty.MAX_COLUMNS); + // Handle None/null from python as 0/All + const currentMaxVal = + currentMax === null || currentMax === undefined ? 0 : currentMax; + + if (cols === currentMaxVal) { + option.selected = true; + } + maxColumnsInput.appendChild(option); + } + function updateButtonStates() { const currentPage = model.get(ModelProperty.PAGE); const pageSize = model.get(ModelProperty.PAGE_SIZE); @@ -259,6 +290,12 @@ function render({ model, el }) { } }); + maxColumnsInput.addEventListener(Event.CHANGE, (e) => { + const newVal = Number(e.target.value); + model.set(ModelProperty.MAX_COLUMNS, newVal); + model.save_changes(); + }); + model.on(Event.CHANGE_TABLE_HTML, handleTableHTMLChange); model.on(`change:${ModelProperty.ROW_COUNT}`, updateButtonStates); model.on(`change:${ModelProperty.ERROR_MESSAGE}`, handleErrorMessageChange); @@ -270,11 +307,19 @@ function render({ model, el }) { paginationContainer.appendChild(prevPage); paginationContainer.appendChild(pageIndicator); paginationContainer.appendChild(nextPage); + pageSizeContainer.appendChild(pageSizeLabel); pageSizeContainer.appendChild(pageSizeInput); + + maxColumnsContainer.appendChild(maxColumnsLabel); + maxColumnsContainer.appendChild(maxColumnsInput); + + settingsContainer.appendChild(maxColumnsContainer); + settingsContainer.appendChild(pageSizeContainer); + footer.appendChild(rowCountLabel); footer.appendChild(paginationContainer); - footer.appendChild(pageSizeContainer); + footer.appendChild(settingsContainer); el.appendChild(errorContainer); el.appendChild(tableContainer); diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index a25acd5d28..bf40dd77c5 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -51,7 +51,8 @@ "- **Rich DataFrames & Series:** Both DataFrames and Series are displayed as interactive widgets.\n", "- **Pagination:** Navigate through large datasets page by page without overwhelming the output.\n", "- **Column Sorting:** Click column headers to toggle between ascending, descending, and unsorted views. Use **Shift + Click** to sort by multiple columns.\n", - "- **Column Resizing:** Drag the dividers between column headers to adjust their width." + "- **Column Resizing:** Drag the dividers between column headers to adjust their width.\n", + "- **Max Columns Control:** Limit the number of displayed columns to improve performance and readability for wide datasets." ] }, { @@ -511,7 +512,10 @@ "metadata": {}, "source": [ "### Adjustable Column Widths\n", - "You can easily adjust the width of any column in the table. Simply hover your mouse over the vertical dividers between column headers. When the cursor changes to a resize icon, click and drag to expand or shrink the column to your desired width. This allows for better readability and customization of your table view." + "You can easily adjust the width of any column in the table. Simply hover your mouse over the vertical dividers between column headers. When the cursor changes to a resize icon, click and drag to expand or shrink the column to your desired width. This allows for better readability and customization of your table view.\n", + "\n", + "### Control Maximum Columns\n", + "You can control the number of columns displayed in the widget using the **Max columns** dropdown in the footer. This is useful for wide DataFrames where you want to focus on a subset of columns or improve rendering performance. Options include 3, 5, 7, 10, 20, or All." ] }, { diff --git a/tests/js/table_widget.test.js b/tests/js/table_widget.test.js index 5843694617..e392b38270 100644 --- a/tests/js/table_widget.test.js +++ b/tests/js/table_widget.test.js @@ -335,4 +335,149 @@ describe('TableWidget', () => { expect(headers[0].textContent).toBe(''); expect(headers[1].textContent).toBe('value'); }); + + /* + * Tests that the widget correctly renders HTML with truncated columns (ellipsis) + * and ensures that the ellipsis column is not treated as a sortable column. + */ + it('should render truncated columns with ellipsis and not make ellipsis sortable', () => { + // Mock HTML with truncated columns + // Use the structure produced by the python backend + const mockHtml = ` + + + + + + + + + + + + + + + +
col1
...
col10
1...10
+ `; + + model.get.mockImplementation((property) => { + if (property === 'table_html') { + return mockHtml; + } + if (property === 'orderable_columns') { + // Only actual columns are orderable + return ['col1', 'col10']; + } + if (property === 'sort_context') { + return []; + } + return null; + }); + + render({ model, el }); + + // Manually trigger the table_html change handler + const tableHtmlChangeHandler = model.on.mock.calls.find( + (call) => call[0] === 'change:table_html', + )[1]; + tableHtmlChangeHandler(); + + const headers = el.querySelectorAll('th'); + expect(headers).toHaveLength(3); + + // Check col1 (sortable) + const col1Header = headers[0]; + const col1Indicator = col1Header.querySelector('.sort-indicator'); + expect(col1Indicator).not.toBeNull(); // Should exist (hidden by default) + + // Check ellipsis (not sortable) + const ellipsisHeader = headers[1]; + const ellipsisIndicator = ellipsisHeader.querySelector('.sort-indicator'); + // The render function adds sort indicators only if the column name matches an entry in orderable_columns. + // The ellipsis header content is "..." which is not in ['col1', 'col10']. + expect(ellipsisIndicator).toBeNull(); + + // Check col10 (sortable) + const col10Header = headers[2]; + const col10Indicator = col10Header.querySelector('.sort-indicator'); + expect(col10Indicator).not.toBeNull(); + }); + + describe('Max columns', () => { + /* + * Tests for the max columns dropdown functionality. + */ + + it('should render the max columns dropdown', () => { + // Mock basic state + model.get.mockImplementation((property) => { + if (property === 'max_columns') { + return 20; + } + return null; + }); + + render({ model, el }); + + const maxColumnsContainer = el.querySelector('.max-columns'); + expect(maxColumnsContainer).not.toBeNull(); + const label = maxColumnsContainer.querySelector('label'); + expect(label.textContent).toBe('Max columns:'); + const select = maxColumnsContainer.querySelector('select'); + expect(select).not.toBeNull(); + }); + + it('should select the correct initial value', () => { + const initialMaxColumns = 20; + model.get.mockImplementation((property) => { + if (property === 'max_columns') { + return initialMaxColumns; + } + return null; + }); + + render({ model, el }); + + const select = el.querySelector('.max-columns select'); + expect(Number(select.value)).toBe(initialMaxColumns); + }); + + it('should handle None/null initial value as 0 (All)', () => { + model.get.mockImplementation((property) => { + if (property === 'max_columns') { + return null; // Python None is null in JS + } + return null; + }); + + render({ model, el }); + + const select = el.querySelector('.max-columns select'); + expect(Number(select.value)).toBe(0); + expect(select.options[select.selectedIndex].textContent).toBe('All'); + }); + + it('should update model when value changes', () => { + model.get.mockImplementation((property) => { + if (property === 'max_columns') { + return 20; + } + return null; + }); + + render({ model, el }); + + const select = el.querySelector('.max-columns select'); + + // Change to 10 + select.value = '10'; + const event = new Event('change'); + select.dispatchEvent(event); + + expect(model.set).toHaveBeenCalledWith('max_columns', 10); + expect(model.save_changes).toHaveBeenCalled(); + }); + }); }); diff --git a/tests/unit/display/test_html.py b/tests/unit/display/test_html.py index 0762a2fd8d..35a74d098a 100644 --- a/tests/unit/display/test_html.py +++ b/tests/unit/display/test_html.py @@ -148,3 +148,40 @@ def test_render_html_precision(): # Make sure we reset to default html = bf_html.render_html(dataframe=df, table_id="test-table") assert "3.141593" in html + + +def test_render_html_max_columns_truncation(): + # Create a DataFrame with 10 columns + data = {f"col_{i}": [i] for i in range(10)} + df = pd.DataFrame(data) + + # Test max_columns=4 + # max_columns=4 -> 2 left, 2 right. col_0, col_1 ... col_8, col_9 + html = bf_html.render_html(dataframe=df, table_id="test", max_columns=4) + + assert "col_0" in html + assert "col_1" in html + assert "col_2" not in html + assert "col_7" not in html + assert "col_8" in html + assert "col_9" in html + assert "..." in html + + # Test max_columns=3 + # 3 // 2 = 1. Left: col_0. Right: 3 - 1 = 2. col_8, col_9. + # Total displayed: col_0, ..., col_8, col_9. (3 data cols + 1 ellipsis) + html = bf_html.render_html(dataframe=df, table_id="test", max_columns=3) + assert "col_0" in html + assert "col_1" not in html + assert "col_7" not in html + assert "col_8" in html + assert "col_9" in html + + # Test max_columns=1 + # 1 // 2 = 0. Left: []. Right: 1. col_9. + # Total: ..., col_9. + html = bf_html.render_html(dataframe=df, table_id="test", max_columns=1) + assert "col_0" not in html + assert "col_8" not in html + assert "col_9" in html + assert "..." in html From 8adc6b2108ba9fe4f10ab0241838b52b46fc940a Mon Sep 17 00:00:00 2001 From: Garrett Wu <6505921+GarrettWu@users.noreply.github.com> Date: Mon, 12 Jan 2026 14:20:33 -0800 Subject: [PATCH 11/28] test: remove deprecated claude-3-opus tests (#2375) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 --- bigframes/ml/llm.py | 2 +- tests/system/load/test_llm.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py index b670cabaea..e627e76d17 100644 --- a/bigframes/ml/llm.py +++ b/bigframes/ml/llm.py @@ -873,7 +873,7 @@ class Claude3TextGenerator(base.RetriableRemotePredictor): "claude-3-sonnet" (deprecated) is Anthropic's dependable combination of skills and speed. It is engineered to be dependable for scaled AI deployments across a variety of use cases. "claude-3-haiku" is Anthropic's fastest, most compact vision and text model for near-instant responses to simple queries, meant for seamless AI experiences mimicking human interactions. "claude-3-5-sonnet" is Anthropic's most powerful AI model and maintains the speed and cost of Claude 3 Sonnet, which is a mid-tier model. - "claude-3-opus" is Anthropic's second-most powerful AI model, with strong performance on highly complex tasks. + "claude-3-opus" (deprecated) is Anthropic's second-most powerful AI model, with strong performance on highly complex tasks. https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude#available-claude-models If no setting is provided, "claude-3-sonnet" will be used by default and a warning will be issued. diff --git a/tests/system/load/test_llm.py b/tests/system/load/test_llm.py index 9630952e67..25cde92c13 100644 --- a/tests/system/load/test_llm.py +++ b/tests/system/load/test_llm.py @@ -100,13 +100,13 @@ def test_llm_gemini_w_ground_with_google_search(llm_remote_text_df): # (b/366290533): Claude models are of extremely low capacity. The tests should reside in small tests. Moving these here just to protect BQML's shared capacity(as load test only runs once per day.) and make sure we still have minimum coverage. @pytest.mark.parametrize( "model_name", - ("claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"), + ("claude-3-haiku", "claude-3-5-sonnet"), ) @pytest.mark.flaky(retries=3, delay=120) def test_claude3_text_generator_create_load( dataset_id, model_name, session, session_us_east5, bq_connection ): - if model_name in ("claude-3-5-sonnet", "claude-3-opus"): + if model_name in ("claude-3-5-sonnet",): session = session_us_east5 claude3_text_generator_model = llm.Claude3TextGenerator( model_name=model_name, connection_name=bq_connection, session=session @@ -125,13 +125,13 @@ def test_claude3_text_generator_create_load( @pytest.mark.parametrize( "model_name", - ("claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"), + ("claude-3-haiku", "claude-3-5-sonnet"), ) @pytest.mark.flaky(retries=3, delay=120) def test_claude3_text_generator_predict_default_params_success( llm_text_df, model_name, session, session_us_east5, bq_connection ): - if model_name in ("claude-3-5-sonnet", "claude-3-opus"): + if model_name in ("claude-3-5-sonnet",): session = session_us_east5 claude3_text_generator_model = llm.Claude3TextGenerator( model_name=model_name, connection_name=bq_connection, session=session @@ -144,13 +144,13 @@ def test_claude3_text_generator_predict_default_params_success( @pytest.mark.parametrize( "model_name", - ("claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"), + ("claude-3-haiku", "claude-3-5-sonnet"), ) @pytest.mark.flaky(retries=3, delay=120) def test_claude3_text_generator_predict_with_params_success( llm_text_df, model_name, session, session_us_east5, bq_connection ): - if model_name in ("claude-3-5-sonnet", "claude-3-opus"): + if model_name in ("claude-3-5-sonnet",): session = session_us_east5 claude3_text_generator_model = llm.Claude3TextGenerator( model_name=model_name, connection_name=bq_connection, session=session @@ -165,13 +165,13 @@ def test_claude3_text_generator_predict_with_params_success( @pytest.mark.parametrize( "model_name", - ("claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"), + ("claude-3-haiku", "claude-3-5-sonnet"), ) @pytest.mark.flaky(retries=3, delay=120) def test_claude3_text_generator_predict_multi_col_success( llm_text_df, model_name, session, session_us_east5, bq_connection ): - if model_name in ("claude-3-5-sonnet", "claude-3-opus"): + if model_name in ("claude-3-5-sonnet",): session = session_us_east5 llm_text_df["additional_col"] = 1 From 798af4a30d34a2fed46df4c0f94ea2e7b7e17f68 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Mon, 12 Jan 2026 14:46:13 -0800 Subject: [PATCH 12/28] refactor: move log_adapter.py under logging/ directory (#2376) We will have more logging utilities in the future. It's better to organize them into a separate directory now. Related issue: 406578908 --- bigframes/bigquery/__init__.py | 2 +- bigframes/bigquery/_operations/ai.py | 3 ++- bigframes/bigquery/_operations/ml.py | 2 +- bigframes/core/groupby/dataframe_group_by.py | 2 +- bigframes/core/groupby/series_group_by.py | 2 +- bigframes/core/logging/__init__.py | 17 +++++++++++++++++ bigframes/core/{ => logging}/log_adapter.py | 0 bigframes/core/window/rolling.py | 3 ++- bigframes/dataframe.py | 3 ++- bigframes/ml/cluster.py | 2 +- bigframes/ml/compose.py | 2 +- bigframes/ml/decomposition.py | 2 +- bigframes/ml/ensemble.py | 2 +- bigframes/ml/forecasting.py | 2 +- bigframes/ml/imported.py | 2 +- bigframes/ml/impute.py | 2 +- bigframes/ml/linear_model.py | 2 +- bigframes/ml/llm.py | 3 ++- bigframes/ml/model_selection.py | 2 +- bigframes/ml/pipeline.py | 2 +- bigframes/ml/preprocessing.py | 2 +- bigframes/ml/remote.py | 3 ++- bigframes/operations/ai.py | 3 ++- bigframes/operations/blob.py | 2 +- bigframes/operations/datetimes.py | 2 +- bigframes/operations/lists.py | 2 +- bigframes/operations/plotting.py | 2 +- bigframes/operations/semantics.py | 3 ++- bigframes/operations/strings.py | 2 +- bigframes/operations/structs.py | 3 ++- bigframes/pandas/__init__.py | 2 +- bigframes/series.py | 3 ++- bigframes/session/__init__.py | 3 ++- bigframes/session/_io/bigquery/__init__.py | 2 +- bigframes/streaming/__init__.py | 2 +- bigframes/streaming/dataframe.py | 3 ++- tests/unit/core/logging/__init__.py | 13 +++++++++++++ .../unit/core/{ => logging}/test_log_adapter.py | 2 +- tests/unit/session/test_io_bigquery.py | 2 +- 39 files changed, 77 insertions(+), 36 deletions(-) create mode 100644 bigframes/core/logging/__init__.py rename bigframes/core/{ => logging}/log_adapter.py (100%) create mode 100644 tests/unit/core/logging/__init__.py rename tests/unit/core/{ => logging}/test_log_adapter.py (99%) diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index f835285a21..7a7a01a8fc 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -60,7 +60,7 @@ from bigframes.bigquery._operations.search import create_vector_index, vector_search from bigframes.bigquery._operations.sql import sql_scalar from bigframes.bigquery._operations.struct import struct -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter _functions = [ # approximate aggregate ops diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py index e8c28e61f5..e56292d64f 100644 --- a/bigframes/bigquery/_operations/ai.py +++ b/bigframes/bigquery/_operations/ai.py @@ -26,7 +26,8 @@ from bigframes import clients, dataframe, dtypes from bigframes import pandas as bpd from bigframes import series, session -from bigframes.core import convert, log_adapter +from bigframes.core import convert +from bigframes.core.logging import log_adapter from bigframes.ml import core as ml_core from bigframes.operations import ai_ops, output_schemas diff --git a/bigframes/bigquery/_operations/ml.py b/bigframes/bigquery/_operations/ml.py index 073be0ef2b..c9b48bb5ac 100644 --- a/bigframes/bigquery/_operations/ml.py +++ b/bigframes/bigquery/_operations/ml.py @@ -20,7 +20,7 @@ import google.cloud.bigquery import pandas as pd -import bigframes.core.log_adapter as log_adapter +import bigframes.core.logging.log_adapter as log_adapter import bigframes.core.sql.ml import bigframes.dataframe as dataframe import bigframes.ml.base diff --git a/bigframes/core/groupby/dataframe_group_by.py b/bigframes/core/groupby/dataframe_group_by.py index e3a132d4d0..7f9e5d627a 100644 --- a/bigframes/core/groupby/dataframe_group_by.py +++ b/bigframes/core/groupby/dataframe_group_by.py @@ -26,10 +26,10 @@ from bigframes import session from bigframes.core import agg_expressions from bigframes.core import expression as ex -from bigframes.core import log_adapter import bigframes.core.block_transforms as block_ops import bigframes.core.blocks as blocks from bigframes.core.groupby import aggs, group_by, series_group_by +from bigframes.core.logging import log_adapter import bigframes.core.ordering as order import bigframes.core.utils as utils import bigframes.core.validations as validations diff --git a/bigframes/core/groupby/series_group_by.py b/bigframes/core/groupby/series_group_by.py index b1485888a8..a8900cf545 100644 --- a/bigframes/core/groupby/series_group_by.py +++ b/bigframes/core/groupby/series_group_by.py @@ -25,10 +25,10 @@ from bigframes import session from bigframes.core import expression as ex -from bigframes.core import log_adapter import bigframes.core.block_transforms as block_ops import bigframes.core.blocks as blocks from bigframes.core.groupby import aggs, group_by +from bigframes.core.logging import log_adapter import bigframes.core.ordering as order import bigframes.core.utils as utils import bigframes.core.validations as validations diff --git a/bigframes/core/logging/__init__.py b/bigframes/core/logging/__init__.py new file mode 100644 index 0000000000..95c077a99a --- /dev/null +++ b/bigframes/core/logging/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from bigframes.core.logging import log_adapter + +__all__ = ["log_adapter"] diff --git a/bigframes/core/log_adapter.py b/bigframes/core/logging/log_adapter.py similarity index 100% rename from bigframes/core/log_adapter.py rename to bigframes/core/logging/log_adapter.py diff --git a/bigframes/core/window/rolling.py b/bigframes/core/window/rolling.py index d6c77bf0a7..b7bb62372c 100644 --- a/bigframes/core/window/rolling.py +++ b/bigframes/core/window/rolling.py @@ -24,8 +24,9 @@ from bigframes import dtypes from bigframes.core import agg_expressions from bigframes.core import expression as ex -from bigframes.core import log_adapter, ordering, utils, window_spec +from bigframes.core import ordering, utils, window_spec import bigframes.core.blocks as blocks +from bigframes.core.logging import log_adapter from bigframes.core.window import ordering as window_ordering import bigframes.operations.aggregations as agg_ops diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 9efc6ba061..e1ad4f3e75 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -55,7 +55,7 @@ import bigframes.constants import bigframes.core -from bigframes.core import agg_expressions, log_adapter +from bigframes.core import agg_expressions import bigframes.core.block_transforms as block_ops import bigframes.core.blocks as blocks import bigframes.core.convert @@ -66,6 +66,7 @@ import bigframes.core.indexers as indexers import bigframes.core.indexes as indexes import bigframes.core.interchange +from bigframes.core.logging import log_adapter import bigframes.core.ordering as order import bigframes.core.utils as utils import bigframes.core.validations as validations diff --git a/bigframes/ml/cluster.py b/bigframes/ml/cluster.py index 9ce4649c5e..f371be0cf3 100644 --- a/bigframes/ml/cluster.py +++ b/bigframes/ml/cluster.py @@ -24,7 +24,7 @@ import pandas as pd import bigframes -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd diff --git a/bigframes/ml/compose.py b/bigframes/ml/compose.py index 54ce7066cb..d638e026e4 100644 --- a/bigframes/ml/compose.py +++ b/bigframes/ml/compose.py @@ -27,8 +27,8 @@ import bigframes_vendored.sklearn.compose._column_transformer from google.cloud import bigquery -from bigframes.core import log_adapter import bigframes.core.compile.googlesql as sql_utils +from bigframes.core.logging import log_adapter import bigframes.core.utils as core_utils from bigframes.ml import base, core, globals, impute, preprocessing, utils import bigframes.pandas as bpd diff --git a/bigframes/ml/decomposition.py b/bigframes/ml/decomposition.py index 3ff32d2433..ca5ff102b4 100644 --- a/bigframes/ml/decomposition.py +++ b/bigframes/ml/decomposition.py @@ -23,7 +23,7 @@ import bigframes_vendored.sklearn.decomposition._pca from google.cloud import bigquery -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd import bigframes.session diff --git a/bigframes/ml/ensemble.py b/bigframes/ml/ensemble.py index 2633f13411..7cd7079dfb 100644 --- a/bigframes/ml/ensemble.py +++ b/bigframes/ml/ensemble.py @@ -23,7 +23,7 @@ import bigframes_vendored.xgboost.sklearn from google.cloud import bigquery -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter import bigframes.dataframe from bigframes.ml import base, core, globals, utils import bigframes.session diff --git a/bigframes/ml/forecasting.py b/bigframes/ml/forecasting.py index d26abdfa71..99a7b1743d 100644 --- a/bigframes/ml/forecasting.py +++ b/bigframes/ml/forecasting.py @@ -20,7 +20,7 @@ from google.cloud import bigquery -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd import bigframes.session diff --git a/bigframes/ml/imported.py b/bigframes/ml/imported.py index a73ee352d0..295649ed7f 100644 --- a/bigframes/ml/imported.py +++ b/bigframes/ml/imported.py @@ -20,7 +20,7 @@ from google.cloud import bigquery -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd import bigframes.session diff --git a/bigframes/ml/impute.py b/bigframes/ml/impute.py index 818151a4f9..b3da895201 100644 --- a/bigframes/ml/impute.py +++ b/bigframes/ml/impute.py @@ -22,7 +22,7 @@ import bigframes_vendored.sklearn.impute._base -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter import bigframes.core.utils as core_utils from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd diff --git a/bigframes/ml/linear_model.py b/bigframes/ml/linear_model.py index 3774a62c0c..df054eb306 100644 --- a/bigframes/ml/linear_model.py +++ b/bigframes/ml/linear_model.py @@ -24,7 +24,7 @@ import bigframes_vendored.sklearn.linear_model._logistic from google.cloud import bigquery -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd import bigframes.session diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py index e627e76d17..f4e60f3f9d 100644 --- a/bigframes/ml/llm.py +++ b/bigframes/ml/llm.py @@ -24,7 +24,8 @@ from bigframes import dtypes, exceptions import bigframes.bigquery as bbq -from bigframes.core import blocks, global_session, log_adapter +from bigframes.core import blocks, global_session +from bigframes.core.logging import log_adapter import bigframes.dataframe from bigframes.ml import base, core, globals, utils import bigframes.series diff --git a/bigframes/ml/model_selection.py b/bigframes/ml/model_selection.py index 6eba4f81c2..5adfb03b7f 100644 --- a/bigframes/ml/model_selection.py +++ b/bigframes/ml/model_selection.py @@ -26,7 +26,7 @@ import bigframes_vendored.sklearn.model_selection._validation as vendored_model_selection_validation import pandas as pd -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter from bigframes.ml import utils import bigframes.pandas as bpd diff --git a/bigframes/ml/pipeline.py b/bigframes/ml/pipeline.py index dac51b1956..8d69217694 100644 --- a/bigframes/ml/pipeline.py +++ b/bigframes/ml/pipeline.py @@ -24,7 +24,7 @@ import bigframes_vendored.sklearn.pipeline from google.cloud import bigquery -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter import bigframes.dataframe from bigframes.ml import ( base, diff --git a/bigframes/ml/preprocessing.py b/bigframes/ml/preprocessing.py index 94c61674f6..8bf89b0838 100644 --- a/bigframes/ml/preprocessing.py +++ b/bigframes/ml/preprocessing.py @@ -26,7 +26,7 @@ import bigframes_vendored.sklearn.preprocessing._label import bigframes_vendored.sklearn.preprocessing._polynomial -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter import bigframes.core.utils as core_utils from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd diff --git a/bigframes/ml/remote.py b/bigframes/ml/remote.py index b091c61f3f..24083bd4e8 100644 --- a/bigframes/ml/remote.py +++ b/bigframes/ml/remote.py @@ -19,7 +19,8 @@ from typing import Mapping, Optional import warnings -from bigframes.core import global_session, log_adapter +from bigframes.core import global_session +from bigframes.core.logging import log_adapter import bigframes.dataframe import bigframes.exceptions as bfe from bigframes.ml import base, core, globals, utils diff --git a/bigframes/operations/ai.py b/bigframes/operations/ai.py index ad58e8825c..6921299acd 100644 --- a/bigframes/operations/ai.py +++ b/bigframes/operations/ai.py @@ -20,7 +20,8 @@ import warnings from bigframes import dtypes, exceptions, options -from bigframes.core import guid, log_adapter +from bigframes.core import guid +from bigframes.core.logging import log_adapter @log_adapter.class_logger diff --git a/bigframes/operations/blob.py b/bigframes/operations/blob.py index 577de458f4..29f720b3eb 100644 --- a/bigframes/operations/blob.py +++ b/bigframes/operations/blob.py @@ -23,7 +23,7 @@ import requests from bigframes import clients, dtypes -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter import bigframes.dataframe import bigframes.exceptions as bfe import bigframes.operations as ops diff --git a/bigframes/operations/datetimes.py b/bigframes/operations/datetimes.py index c259dd018e..2eedb96b43 100644 --- a/bigframes/operations/datetimes.py +++ b/bigframes/operations/datetimes.py @@ -22,7 +22,7 @@ import pandas from bigframes import dataframe, dtypes, series -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter import bigframes.operations as ops _ONE_DAY = pandas.Timedelta("1D") diff --git a/bigframes/operations/lists.py b/bigframes/operations/lists.py index 34ecdd8118..9974e68693 100644 --- a/bigframes/operations/lists.py +++ b/bigframes/operations/lists.py @@ -19,7 +19,7 @@ import bigframes_vendored.pandas.core.arrays.arrow.accessors as vendoracessors -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter import bigframes.operations as ops from bigframes.operations._op_converters import convert_index, convert_slice import bigframes.series as series diff --git a/bigframes/operations/plotting.py b/bigframes/operations/plotting.py index df0c138f0f..21a23a9ab5 100644 --- a/bigframes/operations/plotting.py +++ b/bigframes/operations/plotting.py @@ -17,7 +17,7 @@ import bigframes_vendored.constants as constants import bigframes_vendored.pandas.plotting._core as vendordt -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter import bigframes.operations._matplotlib as bfplt diff --git a/bigframes/operations/semantics.py b/bigframes/operations/semantics.py index 2266702d47..f237959d0d 100644 --- a/bigframes/operations/semantics.py +++ b/bigframes/operations/semantics.py @@ -21,7 +21,8 @@ import numpy as np from bigframes import dtypes, exceptions -from bigframes.core import guid, log_adapter +from bigframes.core import guid +from bigframes.core.logging import log_adapter @log_adapter.class_logger diff --git a/bigframes/operations/strings.py b/bigframes/operations/strings.py index d84a66789d..922d26a23c 100644 --- a/bigframes/operations/strings.py +++ b/bigframes/operations/strings.py @@ -20,8 +20,8 @@ import bigframes_vendored.constants as constants import bigframes_vendored.pandas.core.strings.accessor as vendorstr -from bigframes.core import log_adapter import bigframes.core.indexes.base as indices +from bigframes.core.logging import log_adapter import bigframes.dataframe as df import bigframes.operations as ops from bigframes.operations._op_converters import convert_index, convert_slice diff --git a/bigframes/operations/structs.py b/bigframes/operations/structs.py index 35010e1733..ec0b5dae52 100644 --- a/bigframes/operations/structs.py +++ b/bigframes/operations/structs.py @@ -17,7 +17,8 @@ import bigframes_vendored.pandas.core.arrays.arrow.accessors as vendoracessors import pandas as pd -from bigframes.core import backports, log_adapter +from bigframes.core import backports +from bigframes.core.logging import log_adapter import bigframes.dataframe import bigframes.operations import bigframes.series diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index 0b9648fd56..9da2204a71 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -27,9 +27,9 @@ import pandas import bigframes._config as config -from bigframes.core import log_adapter import bigframes.core.global_session as global_session import bigframes.core.indexes +from bigframes.core.logging import log_adapter from bigframes.core.reshape.api import concat, crosstab, cut, get_dummies, merge, qcut import bigframes.dataframe import bigframes.functions._utils as bff_utils diff --git a/bigframes/series.py b/bigframes/series.py index 606169a8a1..814d59beff 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -49,13 +49,14 @@ import typing_extensions import bigframes.core -from bigframes.core import agg_expressions, groupby, log_adapter +from bigframes.core import agg_expressions, groupby import bigframes.core.block_transforms as block_ops import bigframes.core.blocks as blocks import bigframes.core.expression as ex import bigframes.core.identifiers as ids import bigframes.core.indexers import bigframes.core.indexes as indexes +from bigframes.core.logging import log_adapter import bigframes.core.ordering as order import bigframes.core.scalar as scalars import bigframes.core.utils as utils diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 4f32514652..ca8fbf2919 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -67,10 +67,11 @@ import bigframes.clients import bigframes.constants import bigframes.core -from bigframes.core import blocks, log_adapter, utils +from bigframes.core import blocks, utils import bigframes.core.events import bigframes.core.indexes import bigframes.core.indexes.multi +from bigframes.core.logging import log_adapter import bigframes.core.pyformat import bigframes.formatting_helpers import bigframes.functions._function_session as bff_session diff --git a/bigframes/session/_io/bigquery/__init__.py b/bigframes/session/_io/bigquery/__init__.py index 9114770224..98b5f194c7 100644 --- a/bigframes/session/_io/bigquery/__init__.py +++ b/bigframes/session/_io/bigquery/__init__.py @@ -32,9 +32,9 @@ import google.cloud.bigquery._job_helpers import google.cloud.bigquery.table -from bigframes.core import log_adapter import bigframes.core.compile.googlesql as googlesql import bigframes.core.events +from bigframes.core.logging import log_adapter import bigframes.core.sql import bigframes.session.metrics diff --git a/bigframes/streaming/__init__.py b/bigframes/streaming/__init__.py index 477c7a99e0..0d91e5f91a 100644 --- a/bigframes/streaming/__init__.py +++ b/bigframes/streaming/__init__.py @@ -17,8 +17,8 @@ import inspect import sys -from bigframes.core import log_adapter import bigframes.core.global_session as global_session +from bigframes.core.logging import log_adapter from bigframes.pandas.io.api import _set_default_session_location_if_possible import bigframes.session import bigframes.streaming.dataframe as streaming_dataframe diff --git a/bigframes/streaming/dataframe.py b/bigframes/streaming/dataframe.py index 3e030a4aa2..b7b67178ce 100644 --- a/bigframes/streaming/dataframe.py +++ b/bigframes/streaming/dataframe.py @@ -27,7 +27,8 @@ import pandas as pd from bigframes import dataframe -from bigframes.core import log_adapter, nodes +from bigframes.core import nodes +from bigframes.core.logging import log_adapter import bigframes.exceptions as bfe import bigframes.session diff --git a/tests/unit/core/logging/__init__.py b/tests/unit/core/logging/__init__.py new file mode 100644 index 0000000000..58d482ea38 --- /dev/null +++ b/tests/unit/core/logging/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/unit/core/test_log_adapter.py b/tests/unit/core/logging/test_log_adapter.py similarity index 99% rename from tests/unit/core/test_log_adapter.py rename to tests/unit/core/logging/test_log_adapter.py index c236bb6886..ecef966afc 100644 --- a/tests/unit/core/test_log_adapter.py +++ b/tests/unit/core/logging/test_log_adapter.py @@ -17,7 +17,7 @@ from google.cloud import bigquery import pytest -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter # The limit is 64 (https://cloud.google.com/bigquery/docs/labels-intro#requirements), # but leave a few spare for internal labels to be added. diff --git a/tests/unit/session/test_io_bigquery.py b/tests/unit/session/test_io_bigquery.py index 4349c1b6ee..eb58c6bb52 100644 --- a/tests/unit/session/test_io_bigquery.py +++ b/tests/unit/session/test_io_bigquery.py @@ -23,8 +23,8 @@ import pytest import bigframes -from bigframes.core import log_adapter import bigframes.core.events +from bigframes.core.logging import log_adapter import bigframes.pandas as bpd import bigframes.session._io.bigquery import bigframes.session._io.bigquery as io_bq From a634e976c0f44087ca2a65f68cf2775ae6f04024 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 13 Jan 2026 11:28:36 -0800 Subject: [PATCH 13/28] feat: Stabilize interactive table height to prevent notebook layout shifts (#2378) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This update introduces a new feature in the interactive table display that prevents notebook layout shifts when changing the number of rows per page. The table height is now intelligently set and fixed after its initial display, creating a more stable and predictable user experience. Verified at: vs code notebook: https://screencast.googleplex.com/cast/NTY2NjM1NDQwNDI2MTg4OHwzNDEwZTA5Zi0wOA Fixes #<460861785> 🦕 --- bigframes/display/table_widget.js | 18 + notebooks/dataframes/anywidget_mode.ipynb | 530 ++++++++++++++++------ tests/js/table_widget.test.js | 48 ++ 3 files changed, 468 insertions(+), 128 deletions(-) diff --git a/bigframes/display/table_widget.js b/bigframes/display/table_widget.js index 6beaf47c21..314bf771d0 100644 --- a/bigframes/display/table_widget.js +++ b/bigframes/display/table_widget.js @@ -170,9 +170,27 @@ function render({ model, el }) { model.save_changes(); } + let isHeightInitialized = false; + function handleTableHTMLChange() { tableContainer.innerHTML = model.get(ModelProperty.TABLE_HTML); + // After the first render, dynamically set the container height to fit the + // initial page (usually 10 rows) and then lock it. + setTimeout(() => { + if (!isHeightInitialized) { + const table = tableContainer.querySelector('table'); + if (table) { + const tableHeight = table.offsetHeight; + // Add a small buffer(e.g. 2px) for borders to avoid scrollbars. + if (tableHeight > 0) { + tableContainer.style.height = `${tableHeight + 2}px`; + isHeightInitialized = true; + } + } + } + }, 0); + const sortableColumns = model.get(ModelProperty.ORDERABLE_COLUMNS); const currentSortContext = model.get(ModelProperty.SORT_CONTEXT) || []; diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index bf40dd77c5..5dd8af1c5f 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -120,16 +120,16 @@ "output_type": "stream", "text": [ "state gender year name number\n", - " AL F 1910 Sadie 40\n", - " AL F 1910 Mary 875\n", - " AR F 1910 Vera 39\n", - " AR F 1910 Marie 78\n", - " AR F 1910 Lucille 66\n", - " CA F 1910 Virginia 101\n", - " DC F 1910 Margaret 72\n", - " GA F 1910 Mildred 133\n", - " GA F 1910 Vera 51\n", - " GA F 1910 Sallie 92\n", + " AL F 1910 Annie 482\n", + " AL F 1910 Myrtle 104\n", + " AR F 1910 Lillian 56\n", + " CT F 1910 Anne 38\n", + " CT F 1910 Frances 45\n", + " FL F 1910 Margaret 53\n", + " GA F 1910 Mae 73\n", + " GA F 1910 Beatrice 96\n", + " GA F 1910 Lola 47\n", + " IA F 1910 Viola 49\n", "...\n", "\n", "[5552452 rows x 5 columns]\n" @@ -143,14 +143,31 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "220340b0", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "✅ Completed. " + "\n", + " Query started with request ID bigframes-dev:US.161c75bd-f9f8-4b21-8a45-1d7dfc659034.
SQL
SELECT\n",
+       "`state` AS `state`,\n",
+       "`gender` AS `gender`,\n",
+       "`year` AS `year`,\n",
+       "`name` AS `name`,\n",
+       "`number` AS `number`\n",
+       "FROM\n",
+       "(SELECT\n",
+       "  `t0`.`state`,\n",
+       "  `t0`.`gender`,\n",
+       "  `t0`.`year`,\n",
+       "  `t0`.`name`,\n",
+       "  `t0`.`number`,\n",
+       "  `t0`.`bfuid_col_2` AS `bfuid_col_15`\n",
+       "FROM `bigframes-dev._8b037bfb7316dddf9d92b12dcf93e008906bfe52._c58be946_1477_4c00_b699_0ae022f13563_bqdf_8e323719-899f-4da2-89cd-2dbb53ab1dfc` AS `t0`)\n",
+       "ORDER BY `bfuid_col_15` ASC NULLS LAST
\n", + " " ], "text/plain": [ "" @@ -162,7 +179,9 @@ { "data": { "text/html": [ - "✅ Completed. " + "✅ Completed. \n", + " Query processed 215.9 MB in 7 seconds of slot time. [Job bigframes-dev:US.job_IuiJsjhfPtOrKuTIOqPIjnVLX820 details]\n", + " " ], "text/plain": [ "" @@ -174,7 +193,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a5c2a45c5cc044b59656a2f6b71f710f", + "model_id": "e68fbb9eb4d24bab837c77730d31c8a1", "version_major": 2, "version_minor": 1 }, @@ -210,80 +229,80 @@ " AL\n", " F\n", " 1910\n", - " Vera\n", - " 71\n", + " Hazel\n", + " 51\n", " \n", " \n", " 1\n", - " AR\n", + " AL\n", " F\n", " 1910\n", - " Viola\n", - " 37\n", + " Lucy\n", + " 76\n", " \n", " \n", " 2\n", " AR\n", " F\n", " 1910\n", - " Alice\n", - " 57\n", + " Nellie\n", + " 39\n", " \n", " \n", " 3\n", " AR\n", " F\n", " 1910\n", - " Edna\n", - " 95\n", + " Lena\n", + " 40\n", " \n", " \n", " 4\n", - " AR\n", + " CO\n", " F\n", " 1910\n", - " Ollie\n", - " 40\n", + " Thelma\n", + " 36\n", " \n", " \n", " 5\n", - " CA\n", + " CO\n", " F\n", " 1910\n", - " Beatrice\n", - " 37\n", + " Ruth\n", + " 68\n", " \n", " \n", " 6\n", " CT\n", " F\n", " 1910\n", - " Marion\n", - " 36\n", + " Elizabeth\n", + " 86\n", " \n", " \n", " 7\n", - " CT\n", + " DC\n", " F\n", " 1910\n", - " Marie\n", - " 36\n", + " Mary\n", + " 80\n", " \n", " \n", " 8\n", " FL\n", " F\n", " 1910\n", - " Alice\n", - " 53\n", + " Annie\n", + " 101\n", " \n", " \n", " 9\n", - " GA\n", + " FL\n", " F\n", " 1910\n", - " Thelma\n", - " 133\n", + " Alma\n", + " 39\n", " \n", " \n", "\n", @@ -291,25 +310,67 @@ "[5552452 rows x 5 columns in total]" ], "text/plain": [ - "state gender year name number\n", - " AL F 1910 Vera 71\n", - " AR F 1910 Viola 37\n", - " AR F 1910 Alice 57\n", - " AR F 1910 Edna 95\n", - " AR F 1910 Ollie 40\n", - " CA F 1910 Beatrice 37\n", - " CT F 1910 Marion 36\n", - " CT F 1910 Marie 36\n", - " FL F 1910 Alice 53\n", - " GA F 1910 Thelma 133\n", + "state gender year name number\n", + " AL F 1910 Hazel 51\n", + " AL F 1910 Lucy 76\n", + " AR F 1910 Nellie 39\n", + " AR F 1910 Lena 40\n", + " CO F 1910 Thelma 36\n", + " CO F 1910 Ruth 68\n", + " CT F 1910 Elizabeth 86\n", + " DC F 1910 Mary 80\n", + " FL F 1910 Annie 101\n", + " FL F 1910 Alma 39\n", "...\n", "\n", "[5552452 rows x 5 columns]" ] }, - "execution_count": 5, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" + }, + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 215.9 MB in 9 seconds of slot time. [Job bigframes-dev:US.job_IEjIRaqt2w-_pAttPw1VAVuRPxA7 details]\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 215.9 MB in 5 seconds of slot time. [Job bigframes-dev:US.job_Mi-3m2AkEC1iPgWi7hmcWa1M1oIA details]\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 215.9 MB in 6 seconds of slot time. [Job bigframes-dev:US.job_j8pvY385WwIY7tGvhI7Yxc62aBwd details]\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ @@ -327,14 +388,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "42bb02ab", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "✅ Completed. " + "✅ Completed. \n", + " Query processed 171.4 MB in 30 seconds of slot time. [Job bigframes-dev:US.ff90d507-bec8-4d24-abc3-0209ac28e21f details]\n", + " " ], "text/plain": [ "" @@ -346,7 +409,9 @@ { "data": { "text/html": [ - "✅ Completed. " + "✅ Completed. \n", + " Query processed 88.8 MB in a moment of slot time.\n", + " " ], "text/plain": [ "" @@ -357,43 +422,35 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "004beca7d4034b498add8f9edd55027b", - "version_major": 2, - "version_minor": 1 - }, "text/html": [ - "
0    1910\n",
-       "1    1910\n",
-       "2    1910\n",
-       "3    1910\n",
-       "4    1910\n",
-       "5    1910\n",
-       "6    1910\n",
-       "7    1910\n",
-       "8    1910\n",
-       "9    1910

[5552452 rows]

" + "✅ Completed. " ], "text/plain": [ - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "Name: year, dtype: Int64\n", - "...\n", - "\n", - "[5552452 rows]" + "" ] }, - "execution_count": 6, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "Name: year, dtype: Int64\n", + "...\n", + "\n", + "[5552452 rows]\n" + ] } ], "source": [ @@ -419,7 +476,9 @@ { "data": { "text/html": [ - "✅ Completed. " + "✅ Completed. \n", + " Query processed 88.8 MB in 3 seconds of slot time. [Job bigframes-dev:US.job_517TdI--FMoURkV7QQNMltY_-dZ7 details]\n", + " " ], "text/plain": [ "" @@ -431,7 +490,9 @@ { "data": { "text/html": [ - "✅ Completed. " + "✅ Completed. \n", + " Query processed 88.8 MB in 2 seconds of slot time. [Job bigframes-dev:US.job_rCeYkeBPqmTKNFWFgwXjz5Ed8uWI details]\n", + " " ], "text/plain": [ "" @@ -443,7 +504,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1251df51c4ba44d0b93af07917888511", + "model_id": "3e630b1a56c740e781772ca5f5c7267a", "version_major": 2, "version_minor": 1 }, @@ -544,7 +605,9 @@ { "data": { "text/html": [ - "✅ Completed. " + "✅ Completed. \n", + " Query processed 215.9 MB in 11 seconds of slot time. [Job bigframes-dev:US.job_XwXTDb6gWVkuyIFMeWA0waE33bSg details]\n", + " " ], "text/plain": [ "" @@ -556,7 +619,9 @@ { "data": { "text/html": [ - "✅ Completed. " + "✅ Completed. \n", + " Query processed 215.9 MB in 7 seconds of slot time. [Job bigframes-dev:US.job_bCW0LYK5_PzyyGPf9OAg4YfNMG1C details]\n", + " " ], "text/plain": [ "" @@ -575,12 +640,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "45a462a3a42a445bb06d89132b7d0331", + "model_id": "a6a2b19314b04283a5a66ca9d66eb771", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "" + "" ] }, "execution_count": 8, @@ -655,27 +720,8 @@ { "data": { "text/html": [ - "\n", - " Query started with request ID bigframes-dev:US.a9f6b054-3709-49d6-8109-c325ffe07679.
SQL
SELECT\n",
-       "`state` AS `state`,\n",
-       "`gender` AS `gender`,\n",
-       "`year` AS `year`,\n",
-       "`name` AS `name`,\n",
-       "`number` AS `number`\n",
-       "FROM\n",
-       "(SELECT\n",
-       "  *\n",
-       "FROM (\n",
-       "  SELECT\n",
-       "    `state`,\n",
-       "    `gender`,\n",
-       "    `year`,\n",
-       "    `name`,\n",
-       "    `number`\n",
-       "  FROM `bigquery-public-data.usa_names.usa_1910_2013` FOR SYSTEM_TIME AS OF TIMESTAMP('2025-12-29T22:47:29.748716+00:00')\n",
-       ") AS `t0`)\n",
-       "ORDER BY `name` ASC NULLS LAST ,`year` ASC NULLS LAST ,`state` ASC NULLS LAST\n",
-       "LIMIT 5
\n", + "✅ Completed. \n", + " Query processed 215.9 MB in a moment of slot time.\n", " " ], "text/plain": [ @@ -689,7 +735,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 0 Bytes in a moment of slot time.\n", + " Query processed 215.9 MB in a moment of slot time.\n", " " ], "text/plain": [ @@ -709,12 +755,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "219f91f2341d42b8b96da795a79fc3e8", + "model_id": "beb362548a6b4fd4a163569edd6f1a90", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "" + "" ] }, "execution_count": 10, @@ -750,24 +796,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "added-cell-1", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "\n", - " Query started with request ID bigframes-dev:US.8819b8bd-6697-4c65-a8bc-c7a95a06fe8e.
SQL
\n",
-       "  SELECT\n",
-       "    AI.GENERATE(\n",
-       "      prompt=>("Extract the values.", OBJ.GET_ACCESS_URL(OBJ.FETCH_METADATA(OBJ.MAKE_REF(gcs_path, "us.conn")), "r")),\n",
-       "      connection_id=>"bigframes-dev.us.bigframes-default-connection",\n",
-       "      output_schema=>"publication_date string, class_international string, application_number string, filing_date string") AS result,\n",
-       "    *\n",
-       "  FROM `bigquery-public-data.labeled_patents.extracted_data`\n",
-       "  LIMIT 5;\n",
-       "
\n", + "✅ Completed. \n", + " Query processed 85.9 kB in 19 seconds of slot time.\n", " " ], "text/plain": [ @@ -776,6 +813,243 @@ }, "metadata": {}, "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "02a46cf499b442d4bfe03934195e67df", + "version_major": 2, + "version_minor": 1 + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
resultgcs_pathissuerlanguagepublication_dateclass_internationalclass_usapplication_numberfiling_datepriority_date_eurepresentative_line_1_euapplicant_line_1inventor_line_1title_line_1number
0{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H01L 21/20<NA>18166536.516.02.2016<NA>Scheider, Sascha et alEV Group E. Thallner GmbHKurz, FlorianVORRICHTUNG ZUM BONDEN VON SUBSTRATENEP 3 382 744 A1
1{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018A01K 31/00<NA>18171005.405.02.201505.02.2014Stork Bamberger PatentanwälteLinco Food Systems A/SThrane, UffeMASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E...EP 3 381 276 A1
2{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018G06F 11/30<NA>18157347.819.02.201831.03.2017Hoffmann EitleFUJITSU LIMITEDKukihara, KensukeMETHOD EXECUTED BY A COMPUTER, INFORMATION PRO...EP 3 382 553 A1
3{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H05B 6/12<NA>18165514.303.04.201830.03.2017<NA>BSH Hausger√§te GmbHAcero Acero, JesusVORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNGEP 3 383 141 A2
4{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE29.08.018E04H 6/12<NA>18157874.121.02.201822.02.2017Liedtke & Partner PatentanwälteSHB Hebezeugbau GmbHVOLGER, AlexanderSTEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSEREP 3 366 869 A1
\n", + "

5 rows × 15 columns

\n", + "
[5 rows x 15 columns in total]" + ], + "text/plain": [ + " result \\\n", + "0 {'application_number': None, 'class_internatio... \n", + "1 {'application_number': None, 'class_internatio... \n", + "2 {'application_number': None, 'class_internatio... \n", + "3 {'application_number': None, 'class_internatio... \n", + "4 {'application_number': None, 'class_internatio... \n", + "\n", + " gcs_path issuer language \\\n", + "0 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "1 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "2 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "3 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "4 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "\n", + " publication_date class_international class_us application_number \\\n", + "0 03.10.2018 H01L 21/20 18166536.5 \n", + "1 03.10.2018 A01K 31/00 18171005.4 \n", + "2 03.10.2018 G06F 11/30 18157347.8 \n", + "3 03.10.2018 H05B 6/12 18165514.3 \n", + "4 29.08.018 E04H 6/12 18157874.1 \n", + "\n", + " filing_date priority_date_eu representative_line_1_eu \\\n", + "0 16.02.2016 Scheider, Sascha et al \n", + "1 05.02.2015 05.02.2014 Stork Bamberger Patentanwälte \n", + "2 19.02.2018 31.03.2017 Hoffmann Eitle \n", + "3 03.04.2018 30.03.2017 \n", + "4 21.02.2018 22.02.2017 Liedtke & Partner Patentanwälte \n", + "\n", + " applicant_line_1 inventor_line_1 \\\n", + "0 EV Group E. Thallner GmbH Kurz, Florian \n", + "1 Linco Food Systems A/S Thrane, Uffe \n", + "2 FUJITSU LIMITED Kukihara, Kensuke \n", + "3 BSH Hausgeräte GmbH Acero Acero, Jesus \n", + "4 SHB Hebezeugbau GmbH VOLGER, Alexander \n", + "\n", + " title_line_1 number \n", + "0 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", + "1 MASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", + "2 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", + "3 VORRICHTUNG ZUR INDUKTIVEN ENERGIEÜBERTRAGUNG EP 3 383 141 A2 \n", + "4 STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER EP 3 366 869 A1 \n", + "\n", + "[5 rows x 15 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ diff --git a/tests/js/table_widget.test.js b/tests/js/table_widget.test.js index e392b38270..d701d8692e 100644 --- a/tests/js/table_widget.test.js +++ b/tests/js/table_widget.test.js @@ -340,6 +340,54 @@ describe('TableWidget', () => { * Tests that the widget correctly renders HTML with truncated columns (ellipsis) * and ensures that the ellipsis column is not treated as a sortable column. */ + it('should set height dynamically on first load and remain fixed', () => { + jest.useFakeTimers(); + + // Mock the table's offsetHeight + let mockHeight = 150; + Object.defineProperty(HTMLElement.prototype, 'offsetHeight', { + configurable: true, + get: () => mockHeight, + }); + + // Mock model properties + model.get.mockImplementation((property) => { + if (property === 'table_html') { + return '...
'; + } + return null; + }); + + render({ model, el }); + + const tableContainer = el.querySelector('.table-container'); + + // --- First render --- + const tableHtmlChangeHandler = model.on.mock.calls.find( + (call) => call[0] === 'change:table_html', + )[1]; + tableHtmlChangeHandler(); + jest.runAllTimers(); + + // Height should be set to the mocked offsetHeight + 2px buffer + expect(tableContainer.style.height).toBe('152px'); + + // --- Second render (e.g., page size change) --- + // Simulate the new content being taller + mockHeight = 350; + tableHtmlChangeHandler(); + jest.runAllTimers(); + + // Height should NOT change + expect(tableContainer.style.height).toBe('152px'); + + // Restore original implementation + Object.defineProperty(HTMLElement.prototype, 'offsetHeight', { + value: 0, + }); + jest.useRealTimers(); + }); + it('should render truncated columns with ellipsis and not make ellipsis sortable', () => { // Mock HTML with truncated columns // Use the structure produced by the python backend From 173efd999ea5da2673cb6873bc22805342c20da2 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Wed, 14 Jan 2026 18:44:16 -0800 Subject: [PATCH 14/28] refactor: adds null literal checks when sqlglot compiling eq and nq ops (#2381) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adds null literal checks during the compilation of eq, ne and map ops. This aims to resolve the `test_series_replace_nans_with_pd_na` failure reported in #2248. Fixes internal issue 417774347 🦕 --- .../sqlglot/expressions/comparison_ops.py | 17 +++++++++ .../sqlglot/expressions/generic_ops.py | 20 ++++++++--- bigframes/core/compile/sqlglot/sqlglot_ir.py | 9 +++++ .../test_eq_numeric/out.sql | 33 +++++++++++------ .../test_ne_numeric/out.sql | 35 +++++++++++++------ .../test_generic_ops/test_map/out.sql | 8 ++++- .../expressions/test_comparison_ops.py | 10 +++--- .../sqlglot/expressions/test_generic_ops.py | 7 +++- 8 files changed, 109 insertions(+), 30 deletions(-) diff --git a/bigframes/core/compile/sqlglot/expressions/comparison_ops.py b/bigframes/core/compile/sqlglot/expressions/comparison_ops.py index 8fda3b80dd..8c201f6a06 100644 --- a/bigframes/core/compile/sqlglot/expressions/comparison_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/comparison_ops.py @@ -16,11 +16,13 @@ import typing +import bigframes_vendored.sqlglot as sg import bigframes_vendored.sqlglot.expressions as sge import pandas as pd from bigframes import dtypes from bigframes import operations as ops +from bigframes.core.compile.sqlglot import sqlglot_ir from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler @@ -62,6 +64,10 @@ def _(expr: TypedExpr, op: ops.IsInOp) -> sge.Expression: @register_binary_op(ops.eq_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + if sqlglot_ir._is_null_literal(left.expr): + return sge.Is(this=right.expr, expression=sge.Null()) + if sqlglot_ir._is_null_literal(right.expr): + return sge.Is(this=left.expr, expression=sge.Null()) left_expr = _coerce_bool_to_int(left) right_expr = _coerce_bool_to_int(right) return sge.EQ(this=left_expr, expression=right_expr) @@ -139,6 +145,17 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: @register_binary_op(ops.ne_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + if sqlglot_ir._is_null_literal(left.expr): + return sge.Is( + this=sge.paren(right.expr, copy=False), + expression=sg.not_(sge.Null(), copy=False), + ) + if sqlglot_ir._is_null_literal(right.expr): + return sge.Is( + this=sge.paren(left.expr, copy=False), + expression=sg.not_(sge.Null(), copy=False), + ) + left_expr = _coerce_bool_to_int(left) right_expr = _coerce_bool_to_int(right) return sge.NEQ(this=left_expr, expression=right_expr) diff --git a/bigframes/core/compile/sqlglot/expressions/generic_ops.py b/bigframes/core/compile/sqlglot/expressions/generic_ops.py index 4a2a5fb213..ec0d0b3b34 100644 --- a/bigframes/core/compile/sqlglot/expressions/generic_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/generic_ops.py @@ -19,7 +19,7 @@ from bigframes import dtypes from bigframes import operations as ops -from bigframes.core.compile.sqlglot import sqlglot_types +from bigframes.core.compile.sqlglot import sqlglot_ir, sqlglot_types from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler @@ -101,11 +101,23 @@ def _(expr: TypedExpr) -> sge.Expression: def _(expr: TypedExpr, op: ops.MapOp) -> sge.Expression: if len(op.mappings) == 0: return expr.expr + + mappings = [ + ( + sqlglot_ir._literal(key, dtypes.is_compatible(key, expr.dtype)), + sqlglot_ir._literal(value, dtypes.is_compatible(value, expr.dtype)), + ) + for key, value in op.mappings + ] return sge.Case( - this=expr.expr, ifs=[ - sge.If(this=sge.convert(key), true=sge.convert(value)) - for key, value in op.mappings + sge.If( + this=sge.EQ(this=expr.expr, expression=key) + if not sqlglot_ir._is_null_literal(key) + else sge.Is(this=expr.expr, expression=sge.Null()), + true=value, + ) + for key, value in mappings ], default=expr.expr, ) diff --git a/bigframes/core/compile/sqlglot/sqlglot_ir.py b/bigframes/core/compile/sqlglot/sqlglot_ir.py index 9445b65e99..cefe983e24 100644 --- a/bigframes/core/compile/sqlglot/sqlglot_ir.py +++ b/bigframes/core/compile/sqlglot/sqlglot_ir.py @@ -642,6 +642,15 @@ def _select_to_cte(expr: sge.Select, cte_name: sge.Identifier) -> sge.Select: return new_select_expr +def _is_null_literal(expr: sge.Expression) -> bool: + """Checks if the given expression is a NULL literal.""" + if isinstance(expr, sge.Null): + return True + if isinstance(expr, sge.Cast) and isinstance(expr.this, sge.Null): + return True + return False + + def _literal(value: typing.Any, dtype: dtypes.Dtype) -> sge.Expression: sqlglot_type = sgt.from_bigframes_dtype(dtype) if dtype else None if sqlglot_type is None: diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_eq_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_eq_numeric/out.sql index 9c7c19e61c..a21e008941 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_eq_numeric/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_eq_numeric/out.sql @@ -29,7 +29,7 @@ WITH `bfcte_0` AS ( `bfcol_16` AS `bfcol_26`, `bfcol_17` AS `bfcol_27`, `bfcol_18` AS `bfcol_28`, - `bfcol_15` = CAST(`bfcol_16` AS INT64) AS `bfcol_29` + `bfcol_15` IS NULL AS `bfcol_29` FROM `bfcte_2` ), `bfcte_4` AS ( SELECT @@ -40,15 +40,28 @@ WITH `bfcte_0` AS ( `bfcol_27` AS `bfcol_39`, `bfcol_28` AS `bfcol_40`, `bfcol_29` AS `bfcol_41`, - CAST(`bfcol_26` AS INT64) = `bfcol_25` AS `bfcol_42` + `bfcol_25` = CAST(`bfcol_26` AS INT64) AS `bfcol_42` FROM `bfcte_3` +), `bfcte_5` AS ( + SELECT + *, + `bfcol_36` AS `bfcol_50`, + `bfcol_37` AS `bfcol_51`, + `bfcol_38` AS `bfcol_52`, + `bfcol_39` AS `bfcol_53`, + `bfcol_40` AS `bfcol_54`, + `bfcol_41` AS `bfcol_55`, + `bfcol_42` AS `bfcol_56`, + CAST(`bfcol_38` AS INT64) = `bfcol_37` AS `bfcol_57` + FROM `bfcte_4` ) SELECT - `bfcol_36` AS `rowindex`, - `bfcol_37` AS `int64_col`, - `bfcol_38` AS `bool_col`, - `bfcol_39` AS `int_ne_int`, - `bfcol_40` AS `int_ne_1`, - `bfcol_41` AS `int_ne_bool`, - `bfcol_42` AS `bool_ne_int` -FROM `bfcte_4` \ No newline at end of file + `bfcol_50` AS `rowindex`, + `bfcol_51` AS `int64_col`, + `bfcol_52` AS `bool_col`, + `bfcol_53` AS `int_eq_int`, + `bfcol_54` AS `int_eq_1`, + `bfcol_55` AS `int_eq_null`, + `bfcol_56` AS `int_eq_bool`, + `bfcol_57` AS `bool_eq_int` +FROM `bfcte_5` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_ne_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_ne_numeric/out.sql index 417d24aa72..1a1ff6e44d 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_ne_numeric/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_ne_numeric/out.sql @@ -29,7 +29,9 @@ WITH `bfcte_0` AS ( `bfcol_16` AS `bfcol_26`, `bfcol_17` AS `bfcol_27`, `bfcol_18` AS `bfcol_28`, - `bfcol_15` <> CAST(`bfcol_16` AS INT64) AS `bfcol_29` + ( + `bfcol_15` + ) IS NOT NULL AS `bfcol_29` FROM `bfcte_2` ), `bfcte_4` AS ( SELECT @@ -40,15 +42,28 @@ WITH `bfcte_0` AS ( `bfcol_27` AS `bfcol_39`, `bfcol_28` AS `bfcol_40`, `bfcol_29` AS `bfcol_41`, - CAST(`bfcol_26` AS INT64) <> `bfcol_25` AS `bfcol_42` + `bfcol_25` <> CAST(`bfcol_26` AS INT64) AS `bfcol_42` FROM `bfcte_3` +), `bfcte_5` AS ( + SELECT + *, + `bfcol_36` AS `bfcol_50`, + `bfcol_37` AS `bfcol_51`, + `bfcol_38` AS `bfcol_52`, + `bfcol_39` AS `bfcol_53`, + `bfcol_40` AS `bfcol_54`, + `bfcol_41` AS `bfcol_55`, + `bfcol_42` AS `bfcol_56`, + CAST(`bfcol_38` AS INT64) <> `bfcol_37` AS `bfcol_57` + FROM `bfcte_4` ) SELECT - `bfcol_36` AS `rowindex`, - `bfcol_37` AS `int64_col`, - `bfcol_38` AS `bool_col`, - `bfcol_39` AS `int_ne_int`, - `bfcol_40` AS `int_ne_1`, - `bfcol_41` AS `int_ne_bool`, - `bfcol_42` AS `bool_ne_int` -FROM `bfcte_4` \ No newline at end of file + `bfcol_50` AS `rowindex`, + `bfcol_51` AS `int64_col`, + `bfcol_52` AS `bool_col`, + `bfcol_53` AS `int_ne_int`, + `bfcol_54` AS `int_ne_1`, + `bfcol_55` AS `int_ne_null`, + `bfcol_56` AS `int_ne_bool`, + `bfcol_57` AS `bool_ne_int` +FROM `bfcte_5` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_map/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_map/out.sql index 22628c6a4b..49eada2230 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_map/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_map/out.sql @@ -5,7 +5,13 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE `string_col` WHEN 'value1' THEN 'mapped1' ELSE `string_col` END AS `bfcol_1` + CASE + WHEN `string_col` = 'value1' + THEN 'mapped1' + WHEN `string_col` IS NULL + THEN 'UNKNOWN' + ELSE `string_col` + END AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py index ea94bcae56..3c13bc798b 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py @@ -59,11 +59,12 @@ def test_eq_null_match(scalar_types_df: bpd.DataFrame, snapshot): def test_eq_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["int64_col", "bool_col"]] - bf_df["int_ne_int"] = bf_df["int64_col"] == bf_df["int64_col"] - bf_df["int_ne_1"] = bf_df["int64_col"] == 1 + bf_df["int_eq_int"] = bf_df["int64_col"] == bf_df["int64_col"] + bf_df["int_eq_1"] = bf_df["int64_col"] == 1 + bf_df["int_eq_null"] = bf_df["int64_col"] == pd.NA - bf_df["int_ne_bool"] = bf_df["int64_col"] == bf_df["bool_col"] - bf_df["bool_ne_int"] = bf_df["bool_col"] == bf_df["int64_col"] + bf_df["int_eq_bool"] = bf_df["int64_col"] == bf_df["bool_col"] + bf_df["bool_eq_int"] = bf_df["bool_col"] == bf_df["int64_col"] snapshot.assert_match(bf_df.sql, "out.sql") @@ -135,6 +136,7 @@ def test_ne_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df["int_ne_int"] = bf_df["int64_col"] != bf_df["int64_col"] bf_df["int_ne_1"] = bf_df["int64_col"] != 1 + bf_df["int_ne_null"] = bf_df["int64_col"] != pd.NA bf_df["int_ne_bool"] = bf_df["int64_col"] != bf_df["bool_col"] bf_df["bool_ne_int"] = bf_df["bool_col"] != bf_df["int64_col"] diff --git a/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py index 5657874eb5..03b517096e 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pandas as pd import pytest from bigframes import dtypes @@ -342,7 +343,11 @@ def test_map(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[[col_name]] sql = utils._apply_ops_to_sql( bf_df, - [ops.MapOp(mappings=(("value1", "mapped1"),)).as_expr(col_name)], + [ + ops.MapOp(mappings=(("value1", "mapped1"), (pd.NA, "UNKNOWN"))).as_expr( + col_name + ) + ], [col_name], ) From e1d54d2fe16c6a29ff736edcece8e269ba7defb3 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Wed, 14 Jan 2026 18:47:36 -0800 Subject: [PATCH 15/28] refactor: fix some aggregation ops in the sqlglot compiler (#2382) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change fixes several aggregation-related test failures in #2248 by including the following operators: - `agg_ops.DiffOp`: Fixes test_date_series_diff_agg - `agg_ops.AllOp/AnyOp`: Fixes test_list_apply_callable - `agg_ops.QuantileOp`: Fixes test_dataframe_aggregates_median - `agg_ops.ProductOp`: Fixes test_dataframe_groupby_analytic Fixes internal issue 417774347 🦕 --- .../sqlglot/aggregations/unary_compiler.py | 58 +++++++++++------ .../compile/sqlglot/expressions/constants.py | 1 + .../test_unary_compiler/test_all/out.sql | 9 ++- .../test_all/window_partition_out.sql | 14 ----- .../out.sql} | 0 .../test_unary_compiler/test_any/out.sql | 9 ++- .../out.sql} | 0 .../test_diff_w_date/out.sql | 15 +++++ .../test_unary_compiler/test_product/out.sql | 2 +- .../test_product/window_partition_out.sql | 14 ++--- .../test_unary_compiler/test_quantile/out.sql | 11 ++-- .../aggregations/test_unary_compiler.py | 63 ++++++++++++------- 12 files changed, 122 insertions(+), 74 deletions(-) delete mode 100644 tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_partition_out.sql rename tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/{test_all/window_out.sql => test_all_w_window/out.sql} (100%) rename tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/{test_any/window_out.sql => test_any_w_window/out.sql} (100%) create mode 100644 tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_diff_w_date/out.sql diff --git a/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py b/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py index 89bb58d7dd..647e86d28a 100644 --- a/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py +++ b/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py @@ -23,6 +23,7 @@ from bigframes.core import window_spec import bigframes.core.compile.sqlglot.aggregations.op_registration as reg from bigframes.core.compile.sqlglot.aggregations.windows import apply_window_if_present +from bigframes.core.compile.sqlglot.expressions import constants import bigframes.core.compile.sqlglot.expressions.typed_expr as typed_expr import bigframes.core.compile.sqlglot.sqlglot_ir as ir from bigframes.operations import aggregations as agg_ops @@ -44,9 +45,13 @@ def _( column: typed_expr.TypedExpr, window: typing.Optional[window_spec.WindowSpec] = None, ) -> sge.Expression: - # BQ will return null for empty column, result would be false in pandas. - result = apply_window_if_present(sge.func("LOGICAL_AND", column.expr), window) - return sge.func("IFNULL", result, sge.true()) + expr = column.expr + if column.dtype != dtypes.BOOL_DTYPE: + expr = sge.NEQ(this=expr, expression=sge.convert(0)) + expr = apply_window_if_present(sge.func("LOGICAL_AND", expr), window) + + # BQ will return null for empty column, result would be true in pandas. + return sge.func("COALESCE", expr, sge.convert(True)) @UNARY_OP_REGISTRATION.register(agg_ops.AnyOp) @@ -56,6 +61,8 @@ def _( window: typing.Optional[window_spec.WindowSpec] = None, ) -> sge.Expression: expr = column.expr + if column.dtype != dtypes.BOOL_DTYPE: + expr = sge.NEQ(this=expr, expression=sge.convert(0)) expr = apply_window_if_present(sge.func("LOGICAL_OR", expr), window) # BQ will return null for empty column, result would be false in pandas. @@ -326,6 +333,15 @@ def _( unit=sge.Identifier(this="MICROSECOND"), ) + if column.dtype == dtypes.DATE_DTYPE: + date_diff = sge.DateDiff( + this=column.expr, expression=shifted, unit=sge.Identifier(this="DAY") + ) + return sge.Cast( + this=sge.Floor(this=date_diff * constants._DAY_TO_MICROSECONDS), + to="INT64", + ) + raise TypeError(f"Cannot perform diff on type {column.dtype}") @@ -410,24 +426,28 @@ def _( column: typed_expr.TypedExpr, window: typing.Optional[window_spec.WindowSpec] = None, ) -> sge.Expression: + expr = column.expr + if column.dtype == dtypes.BOOL_DTYPE: + expr = sge.Cast(this=expr, to="INT64") + # Need to short-circuit as log with zeroes is illegal sql - is_zero = sge.EQ(this=column.expr, expression=sge.convert(0)) + is_zero = sge.EQ(this=expr, expression=sge.convert(0)) # There is no product sql aggregate function, so must implement as a sum of logs, and then # apply power after. Note, log and power base must be equal! This impl uses natural log. - logs = ( - sge.Case() - .when(is_zero, sge.convert(0)) - .else_(sge.func("LN", sge.func("ABS", column.expr))) + logs = sge.If( + this=is_zero, + true=sge.convert(0), + false=sge.func("LOG", sge.convert(2), sge.func("ABS", expr)), ) logs_sum = apply_window_if_present(sge.func("SUM", logs), window) - magnitude = sge.func("EXP", logs_sum) + magnitude = sge.func("POWER", sge.convert(2), logs_sum) # Can't determine sign from logs, so have to determine parity of count of negative inputs is_negative = ( sge.Case() .when( - sge.LT(this=sge.func("SIGN", column.expr), expression=sge.convert(0)), + sge.EQ(this=sge.func("SIGN", expr), expression=sge.convert(-1)), sge.convert(1), ) .else_(sge.convert(0)) @@ -445,11 +465,7 @@ def _( .else_( sge.Mul( this=magnitude, - expression=sge.If( - this=sge.EQ(this=negative_count_parity, expression=sge.convert(1)), - true=sge.convert(-1), - false=sge.convert(1), - ), + expression=sge.func("POWER", sge.convert(-1), negative_count_parity), ) ) ) @@ -499,14 +515,18 @@ def _( column: typed_expr.TypedExpr, window: typing.Optional[window_spec.WindowSpec] = None, ) -> sge.Expression: - # TODO: Support interpolation argument - # TODO: Support percentile_disc - result: sge.Expression = sge.func("PERCENTILE_CONT", column.expr, sge.convert(op.q)) + expr = column.expr + if column.dtype == dtypes.BOOL_DTYPE: + expr = sge.Cast(this=expr, to="INT64") + + result: sge.Expression = sge.func("PERCENTILE_CONT", expr, sge.convert(op.q)) if window is None: - # PERCENTILE_CONT is a navigation function, not an aggregate function, so it always needs an OVER clause. + # PERCENTILE_CONT is a navigation function, not an aggregate function, + # so it always needs an OVER clause. result = sge.Window(this=result) else: result = apply_window_if_present(result, window) + if op.should_floor_result: result = sge.Cast(this=sge.func("FLOOR", result), to="INT64") return result diff --git a/bigframes/core/compile/sqlglot/expressions/constants.py b/bigframes/core/compile/sqlglot/expressions/constants.py index f383306292..5ba4a72279 100644 --- a/bigframes/core/compile/sqlglot/expressions/constants.py +++ b/bigframes/core/compile/sqlglot/expressions/constants.py @@ -20,6 +20,7 @@ _NAN = sge.Cast(this=sge.convert("NaN"), to="FLOAT64") _INF = sge.Cast(this=sge.convert("Infinity"), to="FLOAT64") _NEG_INF = sge.Cast(this=sge.convert("-Infinity"), to="FLOAT64") +_DAY_TO_MICROSECONDS = sge.convert(86400000000) # Approx Highest number you can pass in to EXP function and get a valid FLOAT64 result # FLOAT64 has 11 exponent bits, so max values is about 2**(2**10) diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/out.sql index d31b21f56b..0be2fea80b 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/out.sql @@ -1,12 +1,15 @@ WITH `bfcte_0` AS ( SELECT - `bool_col` + `bool_col`, + `int64_col` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` ), `bfcte_1` AS ( SELECT - COALESCE(LOGICAL_AND(`bool_col`), TRUE) AS `bfcol_1` + COALESCE(LOGICAL_AND(`bool_col`), TRUE) AS `bfcol_2`, + COALESCE(LOGICAL_AND(`int64_col` <> 0), TRUE) AS `bfcol_3` FROM `bfcte_0` ) SELECT - `bfcol_1` AS `bool_col` + `bfcol_2` AS `bool_col`, + `bfcol_3` AS `int64_col` FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_partition_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_partition_out.sql deleted file mode 100644 index 23357817c1..0000000000 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_partition_out.sql +++ /dev/null @@ -1,14 +0,0 @@ -WITH `bfcte_0` AS ( - SELECT - `bool_col`, - `string_col` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` -), `bfcte_1` AS ( - SELECT - *, - COALESCE(LOGICAL_AND(`bool_col`) OVER (PARTITION BY `string_col`), TRUE) AS `bfcol_2` - FROM `bfcte_0` -) -SELECT - `bfcol_2` AS `agg_bool` -FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all_w_window/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_out.sql rename to tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all_w_window/out.sql diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any/out.sql index 03b0d5c151..ae62e22e36 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any/out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any/out.sql @@ -1,12 +1,15 @@ WITH `bfcte_0` AS ( SELECT - `bool_col` + `bool_col`, + `int64_col` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` ), `bfcte_1` AS ( SELECT - COALESCE(LOGICAL_OR(`bool_col`), FALSE) AS `bfcol_1` + COALESCE(LOGICAL_OR(`bool_col`), FALSE) AS `bfcol_2`, + COALESCE(LOGICAL_OR(`int64_col` <> 0), FALSE) AS `bfcol_3` FROM `bfcte_0` ) SELECT - `bfcol_1` AS `bool_col` + `bfcol_2` AS `bool_col`, + `bfcol_3` AS `int64_col` FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any/window_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_w_window/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any/window_out.sql rename to tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_w_window/out.sql diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_diff_w_date/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_diff_w_date/out.sql new file mode 100644 index 0000000000..4f1729d2e2 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_diff_w_date/out.sql @@ -0,0 +1,15 @@ +WITH `bfcte_0` AS ( + SELECT + `date_col` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CAST(FLOOR( + DATE_DIFF(`date_col`, LAG(`date_col`, 1) OVER (ORDER BY `date_col` ASC NULLS LAST), DAY) * 86400000000 + ) AS INT64) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `diff_date` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_product/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_product/out.sql index bec1527137..94ca21988e 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_product/out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_product/out.sql @@ -7,7 +7,7 @@ WITH `bfcte_0` AS ( CASE WHEN LOGICAL_OR(`int64_col` = 0) THEN 0 - ELSE EXP(SUM(CASE WHEN `int64_col` = 0 THEN 0 ELSE LN(ABS(`int64_col`)) END)) * IF(MOD(SUM(CASE WHEN SIGN(`int64_col`) < 0 THEN 1 ELSE 0 END), 2) = 1, -1, 1) + ELSE POWER(2, SUM(IF(`int64_col` = 0, 0, LOG(ABS(`int64_col`), 2)))) * POWER(-1, MOD(SUM(CASE WHEN SIGN(`int64_col`) = -1 THEN 1 ELSE 0 END), 2)) END AS `bfcol_1` FROM `bfcte_0` ) diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_product/window_partition_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_product/window_partition_out.sql index 9c1650222a..c5f12f7009 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_product/window_partition_out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_product/window_partition_out.sql @@ -9,15 +9,15 @@ WITH `bfcte_0` AS ( CASE WHEN LOGICAL_OR(`int64_col` = 0) OVER (PARTITION BY `string_col`) THEN 0 - ELSE EXP( - SUM(CASE WHEN `int64_col` = 0 THEN 0 ELSE LN(ABS(`int64_col`)) END) OVER (PARTITION BY `string_col`) - ) * IF( + ELSE POWER( + 2, + SUM(IF(`int64_col` = 0, 0, LOG(ABS(`int64_col`), 2))) OVER (PARTITION BY `string_col`) + ) * POWER( + -1, MOD( - SUM(CASE WHEN SIGN(`int64_col`) < 0 THEN 1 ELSE 0 END) OVER (PARTITION BY `string_col`), + SUM(CASE WHEN SIGN(`int64_col`) = -1 THEN 1 ELSE 0 END) OVER (PARTITION BY `string_col`), 2 - ) = 1, - -1, - 1 + ) ) END AS `bfcol_2` FROM `bfcte_0` diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_quantile/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_quantile/out.sql index b79d8d381f..e337356d96 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_quantile/out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_quantile/out.sql @@ -1,14 +1,17 @@ WITH `bfcte_0` AS ( SELECT + `bool_col`, `int64_col` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` ), `bfcte_1` AS ( SELECT - PERCENTILE_CONT(`int64_col`, 0.5) OVER () AS `bfcol_1`, - CAST(FLOOR(PERCENTILE_CONT(`int64_col`, 0.5) OVER ()) AS INT64) AS `bfcol_2` + PERCENTILE_CONT(`int64_col`, 0.5) OVER () AS `bfcol_4`, + PERCENTILE_CONT(CAST(`bool_col` AS INT64), 0.5) OVER () AS `bfcol_5`, + CAST(FLOOR(PERCENTILE_CONT(`int64_col`, 0.5) OVER ()) AS INT64) AS `bfcol_6` FROM `bfcte_0` ) SELECT - `bfcol_1` AS `quantile`, - `bfcol_2` AS `quantile_floor` + `bfcol_4` AS `int64`, + `bfcol_5` AS `bool`, + `bfcol_6` AS `int64_w_floor` FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py b/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py index c15d70478a..d9bfb1f5f3 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py +++ b/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py @@ -63,41 +63,47 @@ def _apply_unary_window_op( def test_all(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["bool_col", "int64_col"]] + ops_map = { + "bool_col": agg_ops.AllOp().as_expr("bool_col"), + "int64_col": agg_ops.AllOp().as_expr("int64_col"), + } + sql = _apply_unary_agg_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + + snapshot.assert_match(sql, "out.sql") + + +def test_all_w_window(scalar_types_df: bpd.DataFrame, snapshot): col_name = "bool_col" bf_df = scalar_types_df[[col_name]] agg_expr = agg_ops.AllOp().as_expr(col_name) - sql = _apply_unary_agg_ops(bf_df, [agg_expr], [col_name]) - - snapshot.assert_match(sql, "out.sql") # Window tests window = window_spec.WindowSpec(ordering=(ordering.ascending_over(col_name),)) sql_window = _apply_unary_window_op(bf_df, agg_expr, window, "agg_bool") - snapshot.assert_match(sql_window, "window_out.sql") - - bf_df_str = scalar_types_df[[col_name, "string_col"]] - window_partition = window_spec.WindowSpec( - grouping_keys=(expression.deref("string_col"),), - ordering=(ordering.descending_over(col_name),), - ) - sql_window_partition = _apply_unary_window_op( - bf_df_str, agg_expr, window_partition, "agg_bool" - ) - snapshot.assert_match(sql_window_partition, "window_partition_out.sql") + snapshot.assert_match(sql_window, "out.sql") def test_any(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["bool_col", "int64_col"]] + ops_map = { + "bool_col": agg_ops.AnyOp().as_expr("bool_col"), + "int64_col": agg_ops.AnyOp().as_expr("int64_col"), + } + sql = _apply_unary_agg_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + + snapshot.assert_match(sql, "out.sql") + + +def test_any_w_window(scalar_types_df: bpd.DataFrame, snapshot): col_name = "bool_col" bf_df = scalar_types_df[[col_name]] agg_expr = agg_ops.AnyOp().as_expr(col_name) - sql = _apply_unary_agg_ops(bf_df, [agg_expr], [col_name]) - - snapshot.assert_match(sql, "out.sql") # Window tests window = window_spec.WindowSpec(ordering=(ordering.ascending_over(col_name),)) sql_window = _apply_unary_window_op(bf_df, agg_expr, window, "agg_bool") - snapshot.assert_match(sql_window, "window_out.sql") + snapshot.assert_match(sql_window, "out.sql") def test_approx_quartiles(scalar_types_df: bpd.DataFrame, snapshot): @@ -247,6 +253,17 @@ def test_diff_w_datetime(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") +def test_diff_w_date(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "date_col" + bf_df_date = scalar_types_df[[col_name]] + window = window_spec.WindowSpec(ordering=(ordering.ascending_over(col_name),)) + op = agg_exprs.UnaryAggregation( + agg_ops.DiffOp(periods=1), expression.deref(col_name) + ) + sql = _apply_unary_window_op(bf_df_date, op, window, "diff_date") + snapshot.assert_match(sql, "out.sql") + + def test_diff_w_timestamp(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df_timestamp = scalar_types_df[[col_name]] @@ -474,12 +491,12 @@ def test_qcut(scalar_types_df: bpd.DataFrame, snapshot): def test_quantile(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "int64_col" - bf_df = scalar_types_df[[col_name]] + bf_df = scalar_types_df[["int64_col", "bool_col"]] agg_ops_map = { - "quantile": agg_ops.QuantileOp(q=0.5).as_expr(col_name), - "quantile_floor": agg_ops.QuantileOp(q=0.5, should_floor_result=True).as_expr( - col_name + "int64": agg_ops.QuantileOp(q=0.5).as_expr("int64_col"), + "bool": agg_ops.QuantileOp(q=0.5).as_expr("bool_col"), + "int64_w_floor": agg_ops.QuantileOp(q=0.5, should_floor_result=True).as_expr( + "int64_col" ), } sql = _apply_unary_agg_ops( From e156660a85adddbd4675fe99f96f0aa9564c4a70 Mon Sep 17 00:00:00 2001 From: jialuoo Date: Thu, 15 Jan 2026 18:11:33 +0000 Subject: [PATCH 16/28] chore: Migrate IntegerLabelToDatetimeOp operator to SQLGlot (#2310) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes b/447388852 🦕 --- .../sqlglot/expressions/datetime_ops.py | 279 ++++++++++++++++-- .../test_integer_label_to_datetime/out.sql | 58 ++++ .../out.sql | 16 + .../out.sql | 50 ++++ .../out.sql | 54 ++++ .../out.sql | 18 ++ .../out.sql | 14 + .../sqlglot/expressions/test_datetime_ops.py | 71 +++++ 8 files changed, 538 insertions(+), 22 deletions(-) create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime/out.sql create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_fixed/out.sql create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_month/out.sql create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_quarter/out.sql create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_week/out.sql create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_year/out.sql diff --git a/bigframes/core/compile/sqlglot/expressions/datetime_ops.py b/bigframes/core/compile/sqlglot/expressions/datetime_ops.py index e20d2da567..7f3e8135af 100644 --- a/bigframes/core/compile/sqlglot/expressions/datetime_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/datetime_ops.py @@ -19,6 +19,7 @@ from bigframes import dtypes from bigframes import operations as ops from bigframes.core.compile.constants import UNIT_TO_US_CONVERSION_FACTORS +from bigframes.core.compile.sqlglot import sqlglot_types from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler @@ -26,28 +27,6 @@ register_binary_op = scalar_compiler.scalar_op_compiler.register_binary_op -def _calculate_resample_first(y: TypedExpr, origin: str) -> sge.Expression: - if origin == "epoch": - return sge.convert(0) - elif origin == "start_day": - return sge.func( - "UNIX_MICROS", - sge.Cast( - this=sge.Cast( - this=y.expr, to=sge.DataType(this=sge.DataType.Type.DATE) - ), - to=sge.DataType(this=sge.DataType.Type.TIMESTAMPTZ), - ), - ) - elif origin == "start": - return sge.func( - "UNIX_MICROS", - sge.Cast(this=y.expr, to=sge.DataType(this=sge.DataType.Type.TIMESTAMPTZ)), - ) - else: - raise ValueError(f"Origin {origin} not supported") - - @register_binary_op(ops.DatetimeToIntegerLabelOp, pass_op=True) def datetime_to_integer_label_op( x: TypedExpr, y: TypedExpr, op: ops.DatetimeToIntegerLabelOp @@ -317,6 +296,20 @@ def _(expr: TypedExpr, op: ops.FloorDtOp) -> sge.Expression: return sge.TimestampTrunc(this=expr.expr, unit=sge.Identifier(this=bq_freq)) +def _calculate_resample_first(y: TypedExpr, origin: str) -> sge.Expression: + if origin == "epoch": + return sge.convert(0) + elif origin == "start_day": + return sge.func( + "UNIX_MICROS", + sge.Cast(this=sge.Cast(this=y.expr, to="DATE"), to="TIMESTAMP"), + ) + elif origin == "start": + return sge.func("UNIX_MICROS", sge.Cast(this=y.expr, to="TIMESTAMP")) + else: + raise ValueError(f"Origin {origin} not supported") + + @register_unary_op(ops.hour_op) def _(expr: TypedExpr) -> sge.Expression: return sge.Extract(this=sge.Identifier(this="HOUR"), expression=expr.expr) @@ -436,3 +429,245 @@ def _(expr: TypedExpr, op: ops.UnixSeconds) -> sge.Expression: @register_unary_op(ops.year_op) def _(expr: TypedExpr) -> sge.Expression: return sge.Extract(this=sge.Identifier(this="YEAR"), expression=expr.expr) + + +@register_binary_op(ops.IntegerLabelToDatetimeOp, pass_op=True) +def integer_label_to_datetime_op( + x: TypedExpr, y: TypedExpr, op: ops.IntegerLabelToDatetimeOp +) -> sge.Expression: + # Determine if the frequency is fixed by checking if 'op.freq.nanos' is defined. + try: + return _integer_label_to_datetime_op_fixed_frequency(x, y, op) + + except ValueError: + # Non-fixed frequency conversions for units ranging from weeks to years. + rule_code = op.freq.rule_code + + if rule_code == "W-SUN": + return _integer_label_to_datetime_op_weekly_freq(x, y, op) + + if rule_code in ("ME", "M"): + return _integer_label_to_datetime_op_monthly_freq(x, y, op) + + if rule_code in ("QE-DEC", "Q-DEC"): + return _integer_label_to_datetime_op_quarterly_freq(x, y, op) + + if rule_code in ("YE-DEC", "A-DEC", "Y-DEC"): + return _integer_label_to_datetime_op_yearly_freq(x, y, op) + + # If the rule_code is not recognized, raise an error here. + raise ValueError(f"Unsupported frequency rule code: {rule_code}") + + +def _integer_label_to_datetime_op_fixed_frequency( + x: TypedExpr, y: TypedExpr, op: ops.IntegerLabelToDatetimeOp +) -> sge.Expression: + """ + This function handles fixed frequency conversions where the unit can range + from microseconds (us) to days. + """ + us = op.freq.nanos / 1000 + first = _calculate_resample_first(y, op.origin) # type: ignore + x_label = sge.Cast( + this=sge.func( + "TIMESTAMP_MICROS", + sge.Cast( + this=sge.Add( + this=sge.Mul( + this=sge.Cast(this=x.expr, to="BIGNUMERIC"), + expression=sge.convert(int(us)), + ), + expression=sge.Cast(this=first, to="BIGNUMERIC"), + ), + to="INT64", + ), + ), + to=sqlglot_types.from_bigframes_dtype(y.dtype), + ) + return x_label + + +def _integer_label_to_datetime_op_weekly_freq( + x: TypedExpr, y: TypedExpr, op: ops.IntegerLabelToDatetimeOp +) -> sge.Expression: + n = op.freq.n + # Calculate microseconds for the weekly interval. + us = n * 7 * 24 * 60 * 60 * 1000000 + first = sge.func( + "UNIX_MICROS", + sge.Add( + this=sge.TimestampTrunc( + this=sge.Cast(this=y.expr, to="TIMESTAMP"), + unit=sge.Var(this="WEEK(MONDAY)"), + ), + expression=sge.Interval( + this=sge.convert(6), unit=sge.Identifier(this="DAY") + ), + ), + ) + return sge.Cast( + this=sge.func( + "TIMESTAMP_MICROS", + sge.Cast( + this=sge.Add( + this=sge.Mul( + this=sge.Cast(this=x.expr, to="BIGNUMERIC"), + expression=sge.convert(us), + ), + expression=sge.Cast(this=first, to="BIGNUMERIC"), + ), + to="INT64", + ), + ), + to=sqlglot_types.from_bigframes_dtype(y.dtype), + ) + + +def _integer_label_to_datetime_op_monthly_freq( + x: TypedExpr, y: TypedExpr, op: ops.IntegerLabelToDatetimeOp +) -> sge.Expression: + n = op.freq.n + one = sge.convert(1) + twelve = sge.convert(12) + first = sge.Sub( # type: ignore + this=sge.Add( + this=sge.Mul( + this=sge.Extract(this="YEAR", expression=y.expr), + expression=twelve, + ), + expression=sge.Extract(this="MONTH", expression=y.expr), + ), + expression=one, + ) + x_val = sge.Add( + this=sge.Mul(this=x.expr, expression=sge.convert(n)), expression=first + ) + year = sge.Cast( + this=sge.Floor(this=sge.func("IEEE_DIVIDE", x_val, twelve)), + to="INT64", + ) + month = sge.Add(this=sge.Mod(this=x_val, expression=twelve), expression=one) + + next_year = sge.Case( + ifs=[ + sge.If( + this=sge.EQ(this=month, expression=twelve), + true=sge.Add(this=year, expression=one), + ) + ], + default=year, + ) + next_month = sge.Case( + ifs=[sge.If(this=sge.EQ(this=month, expression=twelve), true=one)], + default=sge.Add(this=month, expression=one), + ) + next_month_date = sge.func( + "TIMESTAMP", + sge.Anonymous( + this="DATETIME", + expressions=[ + next_year, + next_month, + one, + sge.convert(0), + sge.convert(0), + sge.convert(0), + ], + ), + ) + x_label = sge.Sub( # type: ignore + this=next_month_date, expression=sge.Interval(this=one, unit="DAY") + ) + return sge.Cast(this=x_label, to=sqlglot_types.from_bigframes_dtype(y.dtype)) + + +def _integer_label_to_datetime_op_quarterly_freq( + x: TypedExpr, y: TypedExpr, op: ops.IntegerLabelToDatetimeOp +) -> sge.Expression: + n = op.freq.n + one = sge.convert(1) + three = sge.convert(3) + four = sge.convert(4) + twelve = sge.convert(12) + first = sge.Sub( # type: ignore + this=sge.Add( + this=sge.Mul( + this=sge.Extract(this="YEAR", expression=y.expr), + expression=four, + ), + expression=sge.Extract(this="QUARTER", expression=y.expr), + ), + expression=one, + ) + x_val = sge.Add( + this=sge.Mul(this=x.expr, expression=sge.convert(n)), expression=first + ) + year = sge.Cast( + this=sge.Floor(this=sge.func("IEEE_DIVIDE", x_val, four)), + to="INT64", + ) + month = sge.Mul( # type: ignore + this=sge.Paren( + this=sge.Add(this=sge.Mod(this=x_val, expression=four), expression=one) + ), + expression=three, + ) + + next_year = sge.Case( + ifs=[ + sge.If( + this=sge.EQ(this=month, expression=twelve), + true=sge.Add(this=year, expression=one), + ) + ], + default=year, + ) + next_month = sge.Case( + ifs=[sge.If(this=sge.EQ(this=month, expression=twelve), true=one)], + default=sge.Add(this=month, expression=one), + ) + next_month_date = sge.Anonymous( + this="DATETIME", + expressions=[ + next_year, + next_month, + one, + sge.convert(0), + sge.convert(0), + sge.convert(0), + ], + ) + x_label = sge.Sub( # type: ignore + this=next_month_date, expression=sge.Interval(this=one, unit="DAY") + ) + return sge.Cast(this=x_label, to=sqlglot_types.from_bigframes_dtype(y.dtype)) + + +def _integer_label_to_datetime_op_yearly_freq( + x: TypedExpr, y: TypedExpr, op: ops.IntegerLabelToDatetimeOp +) -> sge.Expression: + n = op.freq.n + one = sge.convert(1) + first = sge.Extract(this="YEAR", expression=y.expr) + x_val = sge.Add( + this=sge.Mul(this=x.expr, expression=sge.convert(n)), expression=first + ) + next_year = sge.Add(this=x_val, expression=one) # type: ignore + next_month_date = sge.func( + "TIMESTAMP", + sge.Anonymous( + this="DATETIME", + expressions=[ + next_year, + one, + one, + sge.convert(0), + sge.convert(0), + sge.convert(0), + ], + ), + ) + x_label = sge.Sub( # type: ignore + this=next_month_date, expression=sge.Interval(this=one, unit="DAY") + ) + return sge.Cast(this=x_label, to=sqlglot_types.from_bigframes_dtype(y.dtype)) diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime/out.sql new file mode 100644 index 0000000000..2a1bd0e2e2 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime/out.sql @@ -0,0 +1,58 @@ +WITH `bfcte_0` AS ( + SELECT + `rowindex`, + `timestamp_col` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CAST(TIMESTAMP_MICROS( + CAST(CAST(`rowindex` AS BIGNUMERIC) * 86400000000 + CAST(UNIX_MICROS(CAST(`timestamp_col` AS TIMESTAMP)) AS BIGNUMERIC) AS INT64) + ) AS TIMESTAMP) AS `bfcol_2`, + CAST(DATETIME( + CASE + WHEN ( + MOD( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 4 + EXTRACT(QUARTER FROM `timestamp_col`) - 1, + 4 + ) + 1 + ) * 3 = 12 + THEN CAST(FLOOR( + IEEE_DIVIDE( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 4 + EXTRACT(QUARTER FROM `timestamp_col`) - 1, + 4 + ) + ) AS INT64) + 1 + ELSE CAST(FLOOR( + IEEE_DIVIDE( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 4 + EXTRACT(QUARTER FROM `timestamp_col`) - 1, + 4 + ) + ) AS INT64) + END, + CASE + WHEN ( + MOD( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 4 + EXTRACT(QUARTER FROM `timestamp_col`) - 1, + 4 + ) + 1 + ) * 3 = 12 + THEN 1 + ELSE ( + MOD( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 4 + EXTRACT(QUARTER FROM `timestamp_col`) - 1, + 4 + ) + 1 + ) * 3 + 1 + END, + 1, + 0, + 0, + 0 + ) - INTERVAL 1 DAY AS TIMESTAMP) AS `bfcol_3` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `fixed_freq`, + `bfcol_3` AS `non_fixed_freq` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_fixed/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_fixed/out.sql new file mode 100644 index 0000000000..8a759e85f9 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_fixed/out.sql @@ -0,0 +1,16 @@ +WITH `bfcte_0` AS ( + SELECT + `rowindex`, + `timestamp_col` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CAST(TIMESTAMP_MICROS( + CAST(CAST(`rowindex` AS BIGNUMERIC) * 86400000000 + CAST(UNIX_MICROS(CAST(`timestamp_col` AS TIMESTAMP)) AS BIGNUMERIC) AS INT64) + ) AS TIMESTAMP) AS `bfcol_2` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `fixed_freq` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_month/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_month/out.sql new file mode 100644 index 0000000000..a9e64fead6 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_month/out.sql @@ -0,0 +1,50 @@ +WITH `bfcte_0` AS ( + SELECT + `rowindex`, + `timestamp_col` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CAST(TIMESTAMP( + DATETIME( + CASE + WHEN MOD( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 12 + EXTRACT(MONTH FROM `timestamp_col`) - 1, + 12 + ) + 1 = 12 + THEN CAST(FLOOR( + IEEE_DIVIDE( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 12 + EXTRACT(MONTH FROM `timestamp_col`) - 1, + 12 + ) + ) AS INT64) + 1 + ELSE CAST(FLOOR( + IEEE_DIVIDE( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 12 + EXTRACT(MONTH FROM `timestamp_col`) - 1, + 12 + ) + ) AS INT64) + END, + CASE + WHEN MOD( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 12 + EXTRACT(MONTH FROM `timestamp_col`) - 1, + 12 + ) + 1 = 12 + THEN 1 + ELSE MOD( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 12 + EXTRACT(MONTH FROM `timestamp_col`) - 1, + 12 + ) + 1 + 1 + END, + 1, + 0, + 0, + 0 + ) + ) - INTERVAL 1 DAY AS TIMESTAMP) AS `bfcol_2` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `non_fixed_freq_monthly` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_quarter/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_quarter/out.sql new file mode 100644 index 0000000000..58064855a9 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_quarter/out.sql @@ -0,0 +1,54 @@ +WITH `bfcte_0` AS ( + SELECT + `rowindex`, + `timestamp_col` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CAST(DATETIME( + CASE + WHEN ( + MOD( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 4 + EXTRACT(QUARTER FROM `timestamp_col`) - 1, + 4 + ) + 1 + ) * 3 = 12 + THEN CAST(FLOOR( + IEEE_DIVIDE( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 4 + EXTRACT(QUARTER FROM `timestamp_col`) - 1, + 4 + ) + ) AS INT64) + 1 + ELSE CAST(FLOOR( + IEEE_DIVIDE( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 4 + EXTRACT(QUARTER FROM `timestamp_col`) - 1, + 4 + ) + ) AS INT64) + END, + CASE + WHEN ( + MOD( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 4 + EXTRACT(QUARTER FROM `timestamp_col`) - 1, + 4 + ) + 1 + ) * 3 = 12 + THEN 1 + ELSE ( + MOD( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 4 + EXTRACT(QUARTER FROM `timestamp_col`) - 1, + 4 + ) + 1 + ) * 3 + 1 + END, + 1, + 0, + 0, + 0 + ) - INTERVAL 1 DAY AS TIMESTAMP) AS `bfcol_2` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `non_fixed_freq` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_week/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_week/out.sql new file mode 100644 index 0000000000..142f8561f4 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_week/out.sql @@ -0,0 +1,18 @@ +WITH `bfcte_0` AS ( + SELECT + `rowindex`, + `timestamp_col` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CAST(TIMESTAMP_MICROS( + CAST(CAST(`rowindex` AS BIGNUMERIC) * 604800000000 + CAST(UNIX_MICROS( + TIMESTAMP_TRUNC(CAST(`timestamp_col` AS TIMESTAMP), WEEK(MONDAY)) + INTERVAL 6 DAY + ) AS BIGNUMERIC) AS INT64) + ) AS TIMESTAMP) AS `bfcol_2` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `non_fixed_freq_weekly` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_year/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_year/out.sql new file mode 100644 index 0000000000..ab77a9d190 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_year/out.sql @@ -0,0 +1,14 @@ +WITH `bfcte_0` AS ( + SELECT + `rowindex`, + `timestamp_col` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CAST(TIMESTAMP(DATETIME(`rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) + 1, 1, 1, 0, 0, 0)) - INTERVAL 1 DAY AS TIMESTAMP) AS `bfcol_2` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `non_fixed_freq_yearly` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py index c4acb37e51..95156748e9 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py @@ -293,3 +293,74 @@ def test_sub_timedelta(scalar_types_df: bpd.DataFrame, snapshot): bf_df["timedelta_sub_timedelta"] = bf_df["duration_col"] - bf_df["duration_col"] snapshot.assert_match(bf_df.sql, "out.sql") + + +def test_integer_label_to_datetime_fixed(scalar_types_df: bpd.DataFrame, snapshot): + col_names = ["rowindex", "timestamp_col"] + bf_df = scalar_types_df[col_names] + ops_map = { + "fixed_freq": ops.IntegerLabelToDatetimeOp( + freq=pd.tseries.offsets.Day(), origin="start", label="left" # type: ignore + ).as_expr("rowindex", "timestamp_col"), + } + + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) + snapshot.assert_match(sql, "out.sql") + + +def test_integer_label_to_datetime_week(scalar_types_df: bpd.DataFrame, snapshot): + col_names = ["rowindex", "timestamp_col"] + bf_df = scalar_types_df[col_names] + ops_map = { + "non_fixed_freq_weekly": ops.IntegerLabelToDatetimeOp( + freq=pd.tseries.offsets.Week(weekday=6), origin="start", label="left" # type: ignore + ).as_expr("rowindex", "timestamp_col"), + } + + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) + snapshot.assert_match(sql, "out.sql") + + +def test_integer_label_to_datetime_month(scalar_types_df: bpd.DataFrame, snapshot): + col_names = ["rowindex", "timestamp_col"] + bf_df = scalar_types_df[col_names] + ops_map = { + "non_fixed_freq_monthly": ops.IntegerLabelToDatetimeOp( + freq=pd.tseries.offsets.MonthEnd(), # type: ignore + origin="start", + label="left", + ).as_expr("rowindex", "timestamp_col"), + } + + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) + snapshot.assert_match(sql, "out.sql") + + +def test_integer_label_to_datetime_quarter(scalar_types_df: bpd.DataFrame, snapshot): + col_names = ["rowindex", "timestamp_col"] + bf_df = scalar_types_df[col_names] + ops_map = { + "non_fixed_freq": ops.IntegerLabelToDatetimeOp( + freq=pd.tseries.offsets.QuarterEnd(startingMonth=12), # type: ignore + origin="start", + label="left", + ).as_expr("rowindex", "timestamp_col"), + } + + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) + snapshot.assert_match(sql, "out.sql") + + +def test_integer_label_to_datetime_year(scalar_types_df: bpd.DataFrame, snapshot): + col_names = ["rowindex", "timestamp_col"] + bf_df = scalar_types_df[col_names] + ops_map = { + "non_fixed_freq_yearly": ops.IntegerLabelToDatetimeOp( + freq=pd.tseries.offsets.YearEnd(month=12), # type: ignore + origin="start", + label="left", + ).as_expr("rowindex", "timestamp_col"), + } + + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) + snapshot.assert_match(sql, "out.sql") From 39481a5ba0b0ca66e09283dbf5b090fb40fe5954 Mon Sep 17 00:00:00 2001 From: jialuoo Date: Thu, 15 Jan 2026 18:11:52 +0000 Subject: [PATCH 17/28] chore: Migrate RemoteFunctionOp operator to SQLGlot (#2377) Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes b/452130300 --- .../sqlglot/expressions/generic_ops.py | 19 ++++++++++ .../test_remote_function_op/out.sql | 19 ++++++++++ .../sqlglot/expressions/test_generic_ops.py | 36 +++++++++++++++++++ 3 files changed, 74 insertions(+) create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_remote_function_op/out.sql diff --git a/bigframes/core/compile/sqlglot/expressions/generic_ops.py b/bigframes/core/compile/sqlglot/expressions/generic_ops.py index ec0d0b3b34..0fd8a010ae 100644 --- a/bigframes/core/compile/sqlglot/expressions/generic_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/generic_ops.py @@ -152,6 +152,25 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: return sge.Coalesce(this=left.expr, expressions=[right.expr]) +@register_unary_op(ops.RemoteFunctionOp, pass_op=True) +def _(expr: TypedExpr, op: ops.RemoteFunctionOp) -> sge.Expression: + routine_ref = op.function_def.routine_ref + # Quote project, dataset, and routine IDs to avoid keyword clashes. + func_name = ( + f"`{routine_ref.project}`.`{routine_ref.dataset_id}`.`{routine_ref.routine_id}`" + ) + func = sge.func(func_name, expr.expr) + + if not op.apply_on_null: + return sge.If( + this=sge.Is(this=expr.expr, expression=sge.Null()), + true=expr.expr, + false=func, + ) + + return func + + @register_binary_op(ops.BinaryRemoteFunctionOp, pass_op=True) def _( left: TypedExpr, right: TypedExpr, op: ops.BinaryRemoteFunctionOp diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_remote_function_op/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_remote_function_op/out.sql new file mode 100644 index 0000000000..dee0d35355 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_remote_function_op/out.sql @@ -0,0 +1,19 @@ +WITH `bfcte_0` AS ( + SELECT + `int64_col` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + `my_project`.`my_dataset`.`my_routine`(`int64_col`) AS `bfcol_1`, + IF( + `int64_col` IS NULL, + `int64_col`, + `my_project`.`my_dataset`.`my_routine`(`int64_col`) + ) AS `bfcol_2` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `apply_on_null_true`, + `bfcol_2` AS `apply_on_null_false` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py index 03b517096e..c4f16d93a1 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py @@ -169,6 +169,42 @@ def test_astype_json_invalid( ) +def test_remote_function_op(scalar_types_df: bpd.DataFrame, snapshot): + from google.cloud import bigquery + + from bigframes.functions import udf_def + + bf_df = scalar_types_df[["int64_col"]] + function_def = udf_def.BigqueryUdf( + routine_ref=bigquery.RoutineReference.from_string( + "my_project.my_dataset.my_routine" + ), + signature=udf_def.UdfSignature( + input_types=( + udf_def.UdfField( + "x", + bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 + ), + ), + ), + output_bq_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.FLOAT64 + ), + ), + ) + ops_map = { + "apply_on_null_true": ops.RemoteFunctionOp( + function_def=function_def, apply_on_null=True + ).as_expr("int64_col"), + "apply_on_null_false": ops.RemoteFunctionOp( + function_def=function_def, apply_on_null=False + ).as_expr("int64_col"), + } + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) + snapshot.assert_match(sql, "out.sql") + + def test_binary_remote_function_op(scalar_types_df: bpd.DataFrame, snapshot): from google.cloud import bigquery From 9c3bbc36983dffb265454f27b37450df8c5fbc71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Thu, 15 Jan 2026 13:10:42 -0600 Subject: [PATCH 18/28] feat: Add BigQuery ObjectRef functions to `bigframes.bigquery.obj` (#2380) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change introduces support for BigQuery ObjectRef functions: - `OBJ.FETCH_METADATA` - `OBJ.GET_ACCESS_URL` - `OBJ.MAKE_REF` These are exposed via a new `bigframes.bigquery.obj` module. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Replaces https://github.com/googleapis/python-bigquery-dataframes/pull/2340 🦕 --- .gitignore | 1 + bigframes/bigquery/__init__.py | 3 +- bigframes/bigquery/_operations/obj.py | 115 ++++++++++++++++ bigframes/bigquery/obj.py | 41 ++++++ .../ibis_compiler/scalar_op_registry.py | 23 ++++ .../compile/sqlglot/expressions/blob_ops.py | 15 ++- bigframes/operations/__init__.py | 2 + bigframes/operations/blob_ops.py | 12 ++ docs/reference/index.rst | 1 + tests/system/large/bigquery/__init__.py | 13 ++ tests/system/large/bigquery/test_obj.py | 41 ++++++ tests/unit/bigquery/test_obj.py | 125 ++++++++++++++++++ 12 files changed, 390 insertions(+), 2 deletions(-) create mode 100644 bigframes/bigquery/_operations/obj.py create mode 100644 bigframes/bigquery/obj.py create mode 100644 tests/system/large/bigquery/__init__.py create mode 100644 tests/system/large/bigquery/test_obj.py create mode 100644 tests/unit/bigquery/test_obj.py diff --git a/.gitignore b/.gitignore index 0ff74ef528..52dcccd33d 100644 --- a/.gitignore +++ b/.gitignore @@ -64,3 +64,4 @@ tests/js/node_modules/ pylintrc pylintrc.test dummy.pkl +.mypy_cache/ diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index 7a7a01a8fc..0bbbc418e6 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -18,7 +18,7 @@ import sys -from bigframes.bigquery import ai, ml +from bigframes.bigquery import ai, ml, obj from bigframes.bigquery._operations.approx_agg import approx_top_count from bigframes.bigquery._operations.array import ( array_agg, @@ -158,4 +158,5 @@ # Modules / SQL namespaces "ai", "ml", + "obj", ] diff --git a/bigframes/bigquery/_operations/obj.py b/bigframes/bigquery/_operations/obj.py new file mode 100644 index 0000000000..5aef00e73b --- /dev/null +++ b/bigframes/bigquery/_operations/obj.py @@ -0,0 +1,115 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""This module exposes BigQuery ObjectRef functions. + +See bigframes.bigquery.obj for public docs. +""" + + +from __future__ import annotations + +import datetime +from typing import Optional, Sequence, Union + +import numpy as np +import pandas as pd + +from bigframes.core import convert +from bigframes.core.logging import log_adapter +import bigframes.core.utils as utils +import bigframes.operations as ops +import bigframes.series as series + + +@log_adapter.method_logger(custom_base_name="bigquery_obj") +def fetch_metadata( + objectref: series.Series, +) -> series.Series: + """[Preview] The OBJ.FETCH_METADATA function returns Cloud Storage metadata for a partially populated ObjectRef value. + + Args: + objectref (bigframes.pandas.Series): + A partially populated ObjectRef value, in which the uri and authorizer fields are populated and the details field isn't. + + Returns: + bigframes.pandas.Series: A fully populated ObjectRef value. The metadata is provided in the details field of the returned ObjectRef value. + """ + objectref = convert.to_bf_series(objectref, default_index=None) + return objectref._apply_unary_op(ops.obj_fetch_metadata_op) + + +@log_adapter.method_logger(custom_base_name="bigquery_obj") +def get_access_url( + objectref: series.Series, + mode: str, + duration: Optional[Union[datetime.timedelta, pd.Timedelta, np.timedelta64]] = None, +) -> series.Series: + """[Preview] The OBJ.GET_ACCESS_URL function returns JSON that contains reference information for the input ObjectRef value, and also access URLs that you can use to read or modify the Cloud Storage object. + + Args: + objectref (bigframes.pandas.Series): + An ObjectRef value that represents a Cloud Storage object. + mode (str): + A STRING value that identifies the type of URL that you want to be returned. The following values are supported: + 'r': Returns a URL that lets you read the object. + 'rw': Returns two URLs, one that lets you read the object, and one that lets you modify the object. + duration (Union[datetime.timedelta, pandas.Timedelta, numpy.timedelta64], optional): + An optional INTERVAL value that specifies how long the generated access URLs remain valid. You can specify a value between 30 minutes and 6 hours. For example, you could specify INTERVAL 2 HOUR to generate URLs that expire after 2 hours. The default value is 6 hours. + + Returns: + bigframes.pandas.Series: A JSON value that contains the Cloud Storage object reference information from the input ObjectRef value, and also one or more URLs that you can use to access the Cloud Storage object. + """ + objectref = convert.to_bf_series(objectref, default_index=None) + + duration_micros = None + if duration is not None: + duration_micros = utils.timedelta_to_micros(duration) + + return objectref._apply_unary_op( + ops.ObjGetAccessUrl(mode=mode, duration=duration_micros) + ) + + +@log_adapter.method_logger(custom_base_name="bigquery_obj") +def make_ref( + uri_or_json: Union[series.Series, Sequence[str]], + authorizer: Union[series.Series, str, None] = None, +) -> series.Series: + """[Preview] Use the OBJ.MAKE_REF function to create an ObjectRef value that contains reference information for a Cloud Storage object. + + Args: + uri_or_json (bigframes.pandas.Series or str): + A series of STRING values that contains the URI for the Cloud Storage object, for example, gs://mybucket/flowers/12345.jpg. + OR + A series of JSON value that represents a Cloud Storage object. + authorizer (bigframes.pandas.Series or str, optional): + A STRING value that contains the Cloud Resource connection used to access the Cloud Storage object. + Required if ``uri_or_json`` is a URI string. + + Returns: + bigframes.pandas.Series: An ObjectRef value. + """ + uri_or_json = convert.to_bf_series(uri_or_json, default_index=None) + + if authorizer is not None: + # Avoid join problems encountered if we try to convert a literal into Series. + if not isinstance(authorizer, str): + authorizer = convert.to_bf_series(authorizer, default_index=None) + + return uri_or_json._apply_binary_op(authorizer, ops.obj_make_ref_op) + + # If authorizer is not provided, we assume uri_or_json is a JSON objectref + return uri_or_json._apply_unary_op(ops.obj_make_ref_json_op) diff --git a/bigframes/bigquery/obj.py b/bigframes/bigquery/obj.py new file mode 100644 index 0000000000..dc2c29e1f3 --- /dev/null +++ b/bigframes/bigquery/obj.py @@ -0,0 +1,41 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module integrates BigQuery built-in 'ObjectRef' functions for use with Series/DataFrame objects, +such as OBJ.FETCH_METADATA: +https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/objectref_functions + + +.. warning:: + + This product or feature is subject to the "Pre-GA Offerings Terms" in the + General Service Terms section of the `Service Specific Terms + `_. Pre-GA products and + features are available "as is" and might have limited support. For more + information, see the `launch stage descriptions + `_. + +.. note:: + + To provide feedback or request support for this feature, send an email to + bq-objectref-feedback@google.com. +""" + +from bigframes.bigquery._operations.obj import fetch_metadata, get_access_url, make_ref + +__all__ = [ + "fetch_metadata", + "get_access_url", + "make_ref", +] diff --git a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py index 91bbfbfbcf..519b2c9442 100644 --- a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py +++ b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py @@ -16,6 +16,7 @@ import functools import typing +from typing import cast from bigframes_vendored import ibis import bigframes_vendored.ibis.expr.api as ibis_api @@ -1247,6 +1248,13 @@ def obj_fetch_metadata_op_impl(obj_ref: ibis_types.Value): @scalar_op_compiler.register_unary_op(ops.ObjGetAccessUrl, pass_op=True) def obj_get_access_url_op_impl(obj_ref: ibis_types.Value, op: ops.ObjGetAccessUrl): + if op.duration is not None: + duration_value = cast( + ibis_types.IntegerValue, ibis_types.literal(op.duration) + ).to_interval("us") + return obj_get_access_url_with_duration( + obj_ref=obj_ref, mode=op.mode, duration=duration_value + ) return obj_get_access_url(obj_ref=obj_ref, mode=op.mode) @@ -1807,6 +1815,11 @@ def obj_make_ref_op(x: ibis_types.Value, y: ibis_types.Value): return obj_make_ref(uri=x, authorizer=y) +@scalar_op_compiler.register_unary_op(ops.obj_make_ref_json_op) +def obj_make_ref_json_op(x: ibis_types.Value): + return obj_make_ref_json(objectref_json=x) + + # Ternary Operations @scalar_op_compiler.register_ternary_op(ops.where_op) def where_op( @@ -2141,11 +2154,21 @@ def obj_make_ref(uri: str, authorizer: str) -> _OBJ_REF_IBIS_DTYPE: # type: ign """Make ObjectRef Struct from uri and connection.""" +@ibis_udf.scalar.builtin(name="OBJ.MAKE_REF") +def obj_make_ref_json(objectref_json: ibis_dtypes.JSON) -> _OBJ_REF_IBIS_DTYPE: # type: ignore + """Make ObjectRef Struct from json.""" + + @ibis_udf.scalar.builtin(name="OBJ.GET_ACCESS_URL") def obj_get_access_url(obj_ref: _OBJ_REF_IBIS_DTYPE, mode: ibis_dtypes.String) -> ibis_dtypes.JSON: # type: ignore """Get access url (as ObjectRefRumtime JSON) from ObjectRef.""" +@ibis_udf.scalar.builtin(name="OBJ.GET_ACCESS_URL") +def obj_get_access_url_with_duration(obj_ref, mode, duration) -> ibis_dtypes.JSON: # type: ignore + """Get access url (as ObjectRefRumtime JSON) from ObjectRef.""" + + @ibis_udf.scalar.builtin(name="ltrim") def str_lstrip_op( # type: ignore[empty-body] x: ibis_dtypes.String, to_strip: ibis_dtypes.String diff --git a/bigframes/core/compile/sqlglot/expressions/blob_ops.py b/bigframes/core/compile/sqlglot/expressions/blob_ops.py index 0c1491b92a..3105cd8e30 100644 --- a/bigframes/core/compile/sqlglot/expressions/blob_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/blob_ops.py @@ -31,9 +31,22 @@ def _(expr: TypedExpr) -> sge.Expression: @register_unary_op(ops.ObjGetAccessUrl, pass_op=True) def _(expr: TypedExpr, op: ops.ObjGetAccessUrl) -> sge.Expression: - return sge.func("OBJ.GET_ACCESS_URL", expr.expr, sge.convert(op.mode)) + args = [expr.expr, sge.Literal.string(op.mode)] + if op.duration is not None: + args.append( + sge.Interval( + this=sge.Literal.number(op.duration), + unit=sge.Var(this="MICROSECOND"), + ) + ) + return sge.func("OBJ.GET_ACCESS_URL", *args) @register_binary_op(ops.obj_make_ref_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: return sge.func("OBJ.MAKE_REF", left.expr, right.expr) + + +@register_unary_op(ops.obj_make_ref_json_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("OBJ.MAKE_REF", expr.expr) diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index 5da8efaa3b..a1c7754ab5 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -40,6 +40,7 @@ ) from bigframes.operations.blob_ops import ( obj_fetch_metadata_op, + obj_make_ref_json_op, obj_make_ref_op, ObjGetAccessUrl, ) @@ -365,6 +366,7 @@ "ArrayToStringOp", # Blob ops "ObjGetAccessUrl", + "obj_make_ref_json_op", "obj_make_ref_op", "obj_fetch_metadata_op", # Struct ops diff --git a/bigframes/operations/blob_ops.py b/bigframes/operations/blob_ops.py index 29f23a2f70..d1e2764eb4 100644 --- a/bigframes/operations/blob_ops.py +++ b/bigframes/operations/blob_ops.py @@ -29,6 +29,7 @@ class ObjGetAccessUrl(base_ops.UnaryOp): name: typing.ClassVar[str] = "obj_get_access_url" mode: str # access mode, e.g. R read, W write, RW read & write + duration: typing.Optional[int] = None # duration in microseconds def output_type(self, *input_types): return dtypes.JSON_DTYPE @@ -46,3 +47,14 @@ def output_type(self, *input_types): obj_make_ref_op = ObjMakeRef() + + +@dataclasses.dataclass(frozen=True) +class ObjMakeRefJson(base_ops.UnaryOp): + name: typing.ClassVar[str] = "obj_make_ref_json" + + def output_type(self, *input_types): + return dtypes.OBJ_REF_DTYPE + + +obj_make_ref_json_op = ObjMakeRefJson() diff --git a/docs/reference/index.rst b/docs/reference/index.rst index e348bd608b..bdf38e977d 100644 --- a/docs/reference/index.rst +++ b/docs/reference/index.rst @@ -11,6 +11,7 @@ packages. bigframes.bigquery bigframes.bigquery.ai bigframes.bigquery.ml + bigframes.bigquery.obj bigframes.enums bigframes.exceptions bigframes.geopandas diff --git a/tests/system/large/bigquery/__init__.py b/tests/system/large/bigquery/__init__.py new file mode 100644 index 0000000000..58d482ea38 --- /dev/null +++ b/tests/system/large/bigquery/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/system/large/bigquery/test_obj.py b/tests/system/large/bigquery/test_obj.py new file mode 100644 index 0000000000..dcca7580b1 --- /dev/null +++ b/tests/system/large/bigquery/test_obj.py @@ -0,0 +1,41 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +import bigframes.bigquery as bbq + + +@pytest.fixture() +def objectrefs(bq_connection): + return bbq.obj.make_ref( + [ + "gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/tick-terminator-for-dogs.png" + ], + bq_connection, + ) + + +def test_obj_fetch_metadata(objectrefs): + metadata = bbq.obj.fetch_metadata(objectrefs) + + result = metadata.to_pandas() + assert len(result) == len(objectrefs) + + +def test_obj_get_access_url(objectrefs): + access = bbq.obj.get_access_url(objectrefs, "r") + + result = access.to_pandas() + assert len(result) == len(objectrefs) diff --git a/tests/unit/bigquery/test_obj.py b/tests/unit/bigquery/test_obj.py new file mode 100644 index 0000000000..9eac234b8b --- /dev/null +++ b/tests/unit/bigquery/test_obj.py @@ -0,0 +1,125 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +from unittest import mock + +import bigframes.bigquery.obj as obj +import bigframes.operations as ops +import bigframes.series + + +def create_mock_series(): + result = mock.create_autospec(bigframes.series.Series, instance=True) + result.copy.return_value = result + return result + + +def test_fetch_metadata_op_structure(): + op = ops.obj_fetch_metadata_op + assert op.name == "obj_fetch_metadata" + + +def test_get_access_url_op_structure(): + op = ops.ObjGetAccessUrl(mode="r") + assert op.name == "obj_get_access_url" + assert op.mode == "r" + assert op.duration is None + + +def test_get_access_url_with_duration_op_structure(): + op = ops.ObjGetAccessUrl(mode="rw", duration=3600000000) + assert op.name == "obj_get_access_url" + assert op.mode == "rw" + assert op.duration == 3600000000 + + +def test_make_ref_op_structure(): + op = ops.obj_make_ref_op + assert op.name == "obj_make_ref" + + +def test_make_ref_json_op_structure(): + op = ops.obj_make_ref_json_op + assert op.name == "obj_make_ref_json" + + +def test_fetch_metadata_calls_apply_unary_op(): + series = create_mock_series() + + obj.fetch_metadata(series) + + series._apply_unary_op.assert_called_once() + args, _ = series._apply_unary_op.call_args + assert args[0] == ops.obj_fetch_metadata_op + + +def test_get_access_url_calls_apply_unary_op_without_duration(): + series = create_mock_series() + + obj.get_access_url(series, mode="r") + + series._apply_unary_op.assert_called_once() + args, _ = series._apply_unary_op.call_args + assert isinstance(args[0], ops.ObjGetAccessUrl) + assert args[0].mode == "r" + assert args[0].duration is None + + +def test_get_access_url_calls_apply_unary_op_with_duration(): + series = create_mock_series() + duration = datetime.timedelta(hours=1) + + obj.get_access_url(series, mode="rw", duration=duration) + + series._apply_unary_op.assert_called_once() + args, _ = series._apply_unary_op.call_args + assert isinstance(args[0], ops.ObjGetAccessUrl) + assert args[0].mode == "rw" + # 1 hour = 3600 seconds = 3600 * 1000 * 1000 microseconds + assert args[0].duration == 3600000000 + + +def test_make_ref_calls_apply_binary_op_with_authorizer(): + uri = create_mock_series() + auth = create_mock_series() + + obj.make_ref(uri, authorizer=auth) + + uri._apply_binary_op.assert_called_once() + args, _ = uri._apply_binary_op.call_args + assert args[0] == auth + assert args[1] == ops.obj_make_ref_op + + +def test_make_ref_calls_apply_binary_op_with_authorizer_string(): + uri = create_mock_series() + auth = "us.bigframes-test-connection" + + obj.make_ref(uri, authorizer=auth) + + uri._apply_binary_op.assert_called_once() + args, _ = uri._apply_binary_op.call_args + assert args[0] == auth + assert args[1] == ops.obj_make_ref_op + + +def test_make_ref_calls_apply_unary_op_without_authorizer(): + json_val = create_mock_series() + + obj.make_ref(json_val) + + json_val._apply_unary_op.assert_called_once() + args, _ = json_val._apply_unary_op.call_args + assert args[0] == ops.obj_make_ref_json_op From 7abfef0598d476ef233364a01f72d73291983c30 Mon Sep 17 00:00:00 2001 From: TrevorBergeron Date: Thu, 15 Jan 2026 13:49:43 -0800 Subject: [PATCH 19/28] fix: Throw if write api commit op has stream_errors (#2385) --- bigframes/session/loader.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bigframes/session/loader.py b/bigframes/session/loader.py index bf91637be4..9c18d727c8 100644 --- a/bigframes/session/loader.py +++ b/bigframes/session/loader.py @@ -540,7 +540,9 @@ def request_generator(): commit_request = bq_storage_types.BatchCommitWriteStreamsRequest( parent=parent, write_streams=stream_names ) - self._write_client.batch_commit_write_streams(commit_request) + response = self._write_client.batch_commit_write_streams(commit_request) + for error in response.stream_errors: + raise ValueError(f"Errors commiting stream {error}") result_table = bq_data.GbqTable.from_ref_and_schema( bq_table_ref, schema=bq_schema, cluster_cols=[offsets_col] From 039627813f79ca0557bb1af4b272d93a433e7cce Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 15 Jan 2026 17:35:45 -0800 Subject: [PATCH 20/28] chore: define type logging bit masks (#2384) This PR only defines the data types and their bit positions. There's a slight update from our design: I reserved the least significant bit for unknown types. Tree traversal code will be implemented in the next PR. Related bug: b/406578908 --- bigframes/core/logging/__init__.py | 4 +- bigframes/core/logging/data_types.py | 65 ++++++++++++++++++++ tests/unit/core/logging/test_data_types.py | 69 ++++++++++++++++++++++ 3 files changed, 136 insertions(+), 2 deletions(-) create mode 100644 bigframes/core/logging/data_types.py create mode 100644 tests/unit/core/logging/test_data_types.py diff --git a/bigframes/core/logging/__init__.py b/bigframes/core/logging/__init__.py index 95c077a99a..5d06124efc 100644 --- a/bigframes/core/logging/__init__.py +++ b/bigframes/core/logging/__init__.py @@ -12,6 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from bigframes.core.logging import log_adapter +from bigframes.core.logging import data_types, log_adapter -__all__ = ["log_adapter"] +__all__ = ["log_adapter", "data_types"] diff --git a/bigframes/core/logging/data_types.py b/bigframes/core/logging/data_types.py new file mode 100644 index 0000000000..db99b1a020 --- /dev/null +++ b/bigframes/core/logging/data_types.py @@ -0,0 +1,65 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from bigframes import dtypes + + +def _add_data_type(existing_types: int, curr_type: dtypes.Dtype) -> int: + return existing_types | _get_dtype_mask(curr_type) + + +def _get_dtype_mask(dtype: dtypes.Dtype) -> int: + if dtype == dtypes.INT_DTYPE: + return 1 << 1 + if dtype == dtypes.FLOAT_DTYPE: + return 1 << 2 + if dtype == dtypes.BOOL_DTYPE: + return 1 << 3 + if dtype == dtypes.STRING_DTYPE: + return 1 << 4 + if dtype == dtypes.BYTES_DTYPE: + return 1 << 5 + if dtype == dtypes.DATE_DTYPE: + return 1 << 6 + if dtype == dtypes.TIME_DTYPE: + return 1 << 7 + if dtype == dtypes.DATETIME_DTYPE: + return 1 << 8 + if dtype == dtypes.TIMESTAMP_DTYPE: + return 1 << 9 + if dtype == dtypes.TIMEDELTA_DTYPE: + return 1 << 10 + if dtype == dtypes.NUMERIC_DTYPE: + return 1 << 11 + if dtype == dtypes.BIGNUMERIC_DTYPE: + return 1 << 12 + if dtype == dtypes.GEO_DTYPE: + return 1 << 13 + if dtype == dtypes.JSON_DTYPE: + return 1 << 14 + + if dtypes.is_struct_like(dtype): + mask = 1 << 15 + if dtype == dtypes.OBJ_REF_DTYPE: + # obj_ref is a special struct type for multi-modal data. + # It should be double counted as both "struct" and its own type. + mask = mask | (1 << 17) + return mask + + if dtypes.is_array_like(dtype): + return 1 << 16 + + # If an unknown datat type is present, mark it with the least significant bit. + return 1 << 0 diff --git a/tests/unit/core/logging/test_data_types.py b/tests/unit/core/logging/test_data_types.py new file mode 100644 index 0000000000..9e3d1f1ed0 --- /dev/null +++ b/tests/unit/core/logging/test_data_types.py @@ -0,0 +1,69 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pandas as pd +import pyarrow as pa +import pytest + +from bigframes import dtypes +from bigframes.core.logging import data_types + +UNKNOWN_TYPE = pd.ArrowDtype(pa.time64("ns")) + +PA_STRUCT_TYPE = pa.struct([("city", pa.string()), ("pop", pa.int64())]) + +PA_LIST_TYPE = pa.list_(pa.int64()) + + +@pytest.mark.parametrize( + ("dtype", "expected_mask"), + [ + (UNKNOWN_TYPE, 1 << 0), + (dtypes.INT_DTYPE, 1 << 1), + (dtypes.FLOAT_DTYPE, 1 << 2), + (dtypes.BOOL_DTYPE, 1 << 3), + (dtypes.STRING_DTYPE, 1 << 4), + (dtypes.BYTES_DTYPE, 1 << 5), + (dtypes.DATE_DTYPE, 1 << 6), + (dtypes.TIME_DTYPE, 1 << 7), + (dtypes.DATETIME_DTYPE, 1 << 8), + (dtypes.TIMESTAMP_DTYPE, 1 << 9), + (dtypes.TIMEDELTA_DTYPE, 1 << 10), + (dtypes.NUMERIC_DTYPE, 1 << 11), + (dtypes.BIGNUMERIC_DTYPE, 1 << 12), + (dtypes.GEO_DTYPE, 1 << 13), + (dtypes.JSON_DTYPE, 1 << 14), + (pd.ArrowDtype(PA_STRUCT_TYPE), 1 << 15), + (pd.ArrowDtype(PA_LIST_TYPE), 1 << 16), + (dtypes.OBJ_REF_DTYPE, (1 << 15) | (1 << 17)), + ], +) +def test_get_dtype_mask(dtype, expected_mask): + assert data_types._get_dtype_mask(dtype) == expected_mask + + +def test_add_data_type__type_overlap_no_op(): + curr_type = dtypes.STRING_DTYPE + existing_types = data_types._get_dtype_mask(curr_type) + + assert data_types._add_data_type(existing_types, curr_type) == existing_types + + +def test_add_data_type__new_type_updated(): + curr_type = dtypes.STRING_DTYPE + existing_types = data_types._get_dtype_mask(dtypes.INT_DTYPE) + + assert data_types._add_data_type( + existing_types, curr_type + ) == existing_types | data_types._get_dtype_mask(curr_type) From 95763ff2e11f527f28f9a2caa82ae36ec1a98c65 Mon Sep 17 00:00:00 2001 From: TrevorBergeron Date: Fri, 16 Jan 2026 11:22:30 -0800 Subject: [PATCH 21/28] perf: Avoid requery for some result downsample methods (#2219) Co-authored-by: Chelsea Lin --- bigframes/core/blocks.py | 65 ++++++++++------------------ bigframes/core/bq_data.py | 13 +++++- bigframes/core/local_data.py | 11 ++++- bigframes/session/executor.py | 26 ++++++----- tests/system/small/test_anywidget.py | 2 +- tests/system/small/test_dataframe.py | 6 +-- 6 files changed, 64 insertions(+), 59 deletions(-) diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py index 0f98f582c2..5bac1a06f1 100644 --- a/bigframes/core/blocks.py +++ b/bigframes/core/blocks.py @@ -818,49 +818,30 @@ def _materialize_local( total_rows = result_batches.approx_total_rows # Remove downsampling config from subsequent invocations, as otherwise could result in many # iterations if downsampling undershoots - return self._downsample( - total_rows=total_rows, - sampling_method=sample_config.sampling_method, - fraction=fraction, - random_state=sample_config.random_state, - )._materialize_local( - MaterializationOptions(ordered=materialize_options.ordered) - ) - else: - df = result_batches.to_pandas() - df = self._copy_index_to_pandas(df) - df.set_axis(self.column_labels, axis=1, copy=False) - return df, execute_result.query_job - - def _downsample( - self, total_rows: int, sampling_method: str, fraction: float, random_state - ) -> Block: - # either selecting fraction or number of rows - if sampling_method == _HEAD: - filtered_block = self.slice(stop=int(total_rows * fraction)) - return filtered_block - elif (sampling_method == _UNIFORM) and (random_state is None): - filtered_expr = self.expr._uniform_sampling(fraction) - block = Block( - filtered_expr, - index_columns=self.index_columns, - column_labels=self.column_labels, - index_labels=self.index.names, - ) - return block - elif sampling_method == _UNIFORM: - block = self.split( - fracs=(fraction,), - random_state=random_state, - sort=False, - )[0] - return block + if sample_config.sampling_method == "head": + # Just truncates the result iterator without a follow-up query + raw_df = result_batches.to_pandas(limit=int(total_rows * fraction)) + elif ( + sample_config.sampling_method == "uniform" + and sample_config.random_state is None + ): + # Pushes sample into result without new query + sampled_batches = execute_result.batches(sample_rate=fraction) + raw_df = sampled_batches.to_pandas() + else: # uniform sample with random state requires a full follow-up query + down_sampled_block = self.split( + fracs=(fraction,), + random_state=sample_config.random_state, + sort=False, + )[0] + return down_sampled_block._materialize_local( + MaterializationOptions(ordered=materialize_options.ordered) + ) else: - # This part should never be called, just in case. - raise NotImplementedError( - f"The downsampling method {sampling_method} is not implemented, " - f"please choose from {','.join(_SAMPLING_METHODS)}." - ) + raw_df = result_batches.to_pandas() + df = self._copy_index_to_pandas(raw_df) + df.set_axis(self.column_labels, axis=1, copy=False) + return df, execute_result.query_job def split( self, diff --git a/bigframes/core/bq_data.py b/bigframes/core/bq_data.py index 9b2103b01d..3b42ff7c03 100644 --- a/bigframes/core/bq_data.py +++ b/bigframes/core/bq_data.py @@ -186,11 +186,22 @@ def get_arrow_batches( columns: Sequence[str], storage_read_client: bigquery_storage_v1.BigQueryReadClient, project_id: str, + sample_rate: Optional[float] = None, ) -> ReadResult: table_mod_options = {} read_options_dict: dict[str, Any] = {"selected_fields": list(columns)} + + predicates = [] if data.sql_predicate: - read_options_dict["row_restriction"] = data.sql_predicate + predicates.append(data.sql_predicate) + if sample_rate is not None: + assert isinstance(sample_rate, float) + predicates.append(f"RAND() < {sample_rate}") + + if predicates: + full_predicates = " AND ".join(f"( {pred} )" for pred in predicates) + read_options_dict["row_restriction"] = full_predicates + read_options = bq_storage_types.ReadSession.TableReadOptions(**read_options_dict) if data.at_time: diff --git a/bigframes/core/local_data.py b/bigframes/core/local_data.py index ef7374a5a4..0ef24089b2 100644 --- a/bigframes/core/local_data.py +++ b/bigframes/core/local_data.py @@ -25,6 +25,7 @@ import uuid import geopandas # type: ignore +import numpy import numpy as np import pandas as pd import pyarrow as pa @@ -124,13 +125,21 @@ def to_arrow( geo_format: Literal["wkb", "wkt"] = "wkt", duration_type: Literal["int", "duration"] = "duration", json_type: Literal["string"] = "string", + sample_rate: Optional[float] = None, max_chunksize: Optional[int] = None, ) -> tuple[pa.Schema, Iterable[pa.RecordBatch]]: if geo_format != "wkt": raise NotImplementedError(f"geo format {geo_format} not yet implemented") assert json_type == "string" - batches = self.data.to_batches(max_chunksize=max_chunksize) + data = self.data + + # This exists for symmetry with remote sources, but sampling local data like this shouldn't really happen + if sample_rate is not None: + to_take = numpy.random.rand(data.num_rows) < sample_rate + data = data.filter(to_take) + + batches = data.to_batches(max_chunksize=max_chunksize) schema = self.data.schema if duration_type == "int": schema = _schema_durations_to_ints(schema) diff --git a/bigframes/session/executor.py b/bigframes/session/executor.py index bca98bfb2f..2cbf6d8705 100644 --- a/bigframes/session/executor.py +++ b/bigframes/session/executor.py @@ -88,7 +88,7 @@ def arrow_batches(self) -> Iterator[pyarrow.RecordBatch]: yield batch - def to_arrow_table(self) -> pyarrow.Table: + def to_arrow_table(self, limit: Optional[int] = None) -> pyarrow.Table: # Need to provide schema if no result rows, as arrow can't infer # If ther are rows, it is safest to infer schema from batches. # Any discrepencies between predicted schema and actual schema will produce errors. @@ -97,9 +97,12 @@ def to_arrow_table(self) -> pyarrow.Table: peek_value = list(peek_it) # TODO: Enforce our internal schema on the table for consistency if len(peek_value) > 0: - return pyarrow.Table.from_batches( - itertools.chain(peek_value, batches), # reconstruct - ) + batches = itertools.chain(peek_value, batches) # reconstruct + if limit: + batches = pyarrow_utils.truncate_pyarrow_iterable( + batches, max_results=limit + ) + return pyarrow.Table.from_batches(batches) else: try: return self._schema.to_pyarrow().empty_table() @@ -107,8 +110,8 @@ def to_arrow_table(self) -> pyarrow.Table: # Bug with some pyarrow versions, empty_table only supports base storage types, not extension types. return self._schema.to_pyarrow(use_storage_types=True).empty_table() - def to_pandas(self) -> pd.DataFrame: - return io_pandas.arrow_to_pandas(self.to_arrow_table(), self._schema) + def to_pandas(self, limit: Optional[int] = None) -> pd.DataFrame: + return io_pandas.arrow_to_pandas(self.to_arrow_table(limit=limit), self._schema) def to_pandas_batches( self, page_size: Optional[int] = None, max_results: Optional[int] = None @@ -158,7 +161,7 @@ def schema(self) -> bigframes.core.schema.ArraySchema: ... @abc.abstractmethod - def batches(self) -> ResultsIterator: + def batches(self, sample_rate: Optional[float] = None) -> ResultsIterator: ... @property @@ -200,9 +203,9 @@ def execution_metadata(self) -> ExecutionMetadata: def schema(self) -> bigframes.core.schema.ArraySchema: return self._data.schema - def batches(self) -> ResultsIterator: + def batches(self, sample_rate: Optional[float] = None) -> ResultsIterator: return ResultsIterator( - iter(self._data.to_arrow()[1]), + iter(self._data.to_arrow(sample_rate=sample_rate)[1]), self.schema, self._data.metadata.row_count, self._data.metadata.total_bytes, @@ -226,7 +229,7 @@ def execution_metadata(self) -> ExecutionMetadata: def schema(self) -> bigframes.core.schema.ArraySchema: return self._schema - def batches(self) -> ResultsIterator: + def batches(self, sample_rate: Optional[float] = None) -> ResultsIterator: return ResultsIterator(iter([]), self.schema, 0, 0) @@ -260,12 +263,13 @@ def schema(self) -> bigframes.core.schema.ArraySchema: source_ids = [selection[0] for selection in self._selected_fields] return self._data.schema.select(source_ids).rename(dict(self._selected_fields)) - def batches(self) -> ResultsIterator: + def batches(self, sample_rate: Optional[float] = None) -> ResultsIterator: read_batches = bq_data.get_arrow_batches( self._data, [x[0] for x in self._selected_fields], self._storage_client, self._project_id, + sample_rate=sample_rate, ) arrow_batches: Iterator[pa.RecordBatch] = map( functools.partial( diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index da87568c91..fad8f5b2b5 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -165,7 +165,7 @@ def execution_metadata(self) -> ExecutionMetadata: def schema(self) -> Any: return schema - def batches(self) -> ResultsIterator: + def batches(self, sample_rate=None) -> ResultsIterator: return ResultsIterator( arrow_batches_val, self.schema, diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index d2a157b131..0f7b782b66 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -4524,7 +4524,7 @@ def test_df_kurt(scalars_dfs): "n_default", ], ) -def test_sample(scalars_dfs, frac, n, random_state): +def test_df_to_pandas_sample(scalars_dfs, frac, n, random_state): scalars_df, _ = scalars_dfs df = scalars_df.sample(frac=frac, n=n, random_state=random_state) bf_result = df.to_pandas() @@ -4535,7 +4535,7 @@ def test_sample(scalars_dfs, frac, n, random_state): assert bf_result.shape[1] == scalars_df.shape[1] -def test_sample_determinism(penguins_df_default_index): +def test_df_to_pandas_sample_determinism(penguins_df_default_index): df = penguins_df_default_index.sample(n=100, random_state=12345).head(15) bf_result = df.to_pandas() bf_result2 = df.to_pandas() @@ -4543,7 +4543,7 @@ def test_sample_determinism(penguins_df_default_index): pandas.testing.assert_frame_equal(bf_result, bf_result2) -def test_sample_raises_value_error(scalars_dfs): +def test_df_to_pandas_sample_raises_value_error(scalars_dfs): scalars_df, _ = scalars_dfs with pytest.raises( ValueError, match="Only one of 'n' or 'frac' parameter can be specified." From d02d32f22890d0ad9347618bc0cad8208df67ad7 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Wed, 21 Jan 2026 10:11:38 -0800 Subject: [PATCH 22/28] refactor: fix math numeric_ops for test_series_ufuncs (#2386) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change fixes several numeric_ops test failures in #2248 Fixes internal issue 417774347 🦕 --- .../sqlglot/expressions/numeric_ops.py | 78 ++++++++++++++----- bigframes/core/compile/sqlglot/sqlglot_ir.py | 2 +- .../test_numeric_ops/test_arctanh/out.sql | 4 +- .../test_numeric_ops/test_expm1/out.sql | 6 +- .../test_numeric_ops/test_ln/out.sql | 10 ++- .../test_numeric_ops/test_log10/out.sql | 8 +- .../test_numeric_ops/test_log1p/out.sql | 8 +- 7 files changed, 83 insertions(+), 33 deletions(-) diff --git a/bigframes/core/compile/sqlglot/expressions/numeric_ops.py b/bigframes/core/compile/sqlglot/expressions/numeric_ops.py index f7c763e207..28d3532b8b 100644 --- a/bigframes/core/compile/sqlglot/expressions/numeric_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/numeric_ops.py @@ -93,12 +93,19 @@ def _(expr: TypedExpr) -> sge.Expression: def _(expr: TypedExpr) -> sge.Expression: return sge.Case( ifs=[ + # |x| < 1: The standard formula + sge.If( + this=sge.func("ABS", expr.expr) < sge.convert(1), + true=sge.func("ATANH", expr.expr), + ), + # |x| > 1: Returns NaN sge.If( this=sge.func("ABS", expr.expr) > sge.convert(1), true=constants._NAN, - ) + ), ], - default=sge.func("ATANH", expr.expr), + # |x| = 1: Returns Infinity or -Infinity + default=sge.Mul(this=constants._INF, expression=expr.expr), ) @@ -145,15 +152,11 @@ def _(expr: TypedExpr) -> sge.Expression: @register_unary_op(ops.expm1_op) def _(expr: TypedExpr) -> sge.Expression: - return sge.Case( - ifs=[ - sge.If( - this=expr.expr > constants._FLOAT64_EXP_BOUND, - true=constants._INF, - ) - ], - default=sge.func("EXP", expr.expr), - ) - sge.convert(1) + return sge.If( + this=expr.expr > constants._FLOAT64_EXP_BOUND, + true=constants._INF, + false=sge.func("EXP", expr.expr) - sge.convert(1), + ) @register_unary_op(ops.floor_op) @@ -166,11 +169,22 @@ def _(expr: TypedExpr) -> sge.Expression: return sge.Case( ifs=[ sge.If( - this=expr.expr <= sge.convert(0), + this=sge.Is(this=expr.expr, expression=sge.Null()), + true=sge.null(), + ), + # |x| > 0: The standard formula + sge.If( + this=expr.expr > sge.convert(0), + true=sge.Ln(this=expr.expr), + ), + # |x| < 0: Returns NaN + sge.If( + this=expr.expr < sge.convert(0), true=constants._NAN, - ) + ), ], - default=sge.Ln(this=expr.expr), + # |x| == 0: Returns -Infinity + default=constants._NEG_INF, ) @@ -179,11 +193,22 @@ def _(expr: TypedExpr) -> sge.Expression: return sge.Case( ifs=[ sge.If( - this=expr.expr <= sge.convert(0), + this=sge.Is(this=expr.expr, expression=sge.Null()), + true=sge.null(), + ), + # |x| > 0: The standard formula + sge.If( + this=expr.expr > sge.convert(0), + true=sge.Log(this=sge.convert(10), expression=expr.expr), + ), + # |x| < 0: Returns NaN + sge.If( + this=expr.expr < sge.convert(0), true=constants._NAN, - ) + ), ], - default=sge.Log(this=expr.expr, expression=sge.convert(10)), + # |x| == 0: Returns -Infinity + default=constants._NEG_INF, ) @@ -192,11 +217,22 @@ def _(expr: TypedExpr) -> sge.Expression: return sge.Case( ifs=[ sge.If( - this=expr.expr <= sge.convert(-1), + this=sge.Is(this=expr.expr, expression=sge.Null()), + true=sge.null(), + ), + # Domain: |x| > -1 (The standard formula) + sge.If( + this=expr.expr > sge.convert(-1), + true=sge.Ln(this=sge.convert(1) + expr.expr), + ), + # Out of Domain: |x| < -1 (Returns NaN) + sge.If( + this=expr.expr < sge.convert(-1), true=constants._NAN, - ) + ), ], - default=sge.Ln(this=sge.convert(1) + expr.expr), + # Boundary: |x| == -1 (Returns -Infinity) + default=constants._NEG_INF, ) @@ -608,7 +644,7 @@ def isfinite(arg: TypedExpr) -> sge.Expression: return sge.Not( this=sge.Or( this=sge.IsInf(this=arg.expr), - right=sge.IsNan(this=arg.expr), + expression=sge.IsNan(this=arg.expr), ), ) diff --git a/bigframes/core/compile/sqlglot/sqlglot_ir.py b/bigframes/core/compile/sqlglot/sqlglot_ir.py index cefe983e24..d4dc4ecc06 100644 --- a/bigframes/core/compile/sqlglot/sqlglot_ir.py +++ b/bigframes/core/compile/sqlglot/sqlglot_ir.py @@ -674,7 +674,7 @@ def _literal(value: typing.Any, dtype: dtypes.Dtype) -> sge.Expression: expressions=[_literal(value=v, dtype=value_type) for v in value] ) return values if len(value) > 0 else _cast(values, sqlglot_type) - elif pd.isna(value): + elif pd.isna(value) or (isinstance(value, pa.Scalar) and not value.is_valid): return _cast(sge.Null(), sqlglot_type) elif dtype == dtypes.JSON_DTYPE: return sge.ParseJSON(this=sge.convert(str(value))) diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_arctanh/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_arctanh/out.sql index 197bf59306..dc6de62e7b 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_arctanh/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_arctanh/out.sql @@ -6,9 +6,11 @@ WITH `bfcte_0` AS ( SELECT *, CASE + WHEN ABS(`float64_col`) < 1 + THEN ATANH(`float64_col`) WHEN ABS(`float64_col`) > 1 THEN CAST('NaN' AS FLOAT64) - ELSE ATANH(`float64_col`) + ELSE CAST('Infinity' AS FLOAT64) * `float64_col` END AS `bfcol_1` FROM `bfcte_0` ) diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_expm1/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_expm1/out.sql index 076ad584c2..13038bf8e8 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_expm1/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_expm1/out.sql @@ -5,11 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE - WHEN `float64_col` > 709.78 - THEN CAST('Infinity' AS FLOAT64) - ELSE EXP(`float64_col`) - END - 1 AS `bfcol_1` + IF(`float64_col` > 709.78, CAST('Infinity' AS FLOAT64), EXP(`float64_col`) - 1) AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_ln/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_ln/out.sql index 776cc33e0f..bd4cfa7c9a 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_ln/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_ln/out.sql @@ -5,7 +5,15 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE WHEN `float64_col` <= 0 THEN CAST('NaN' AS FLOAT64) ELSE LN(`float64_col`) END AS `bfcol_1` + CASE + WHEN `float64_col` IS NULL + THEN NULL + WHEN `float64_col` > 0 + THEN LN(`float64_col`) + WHEN `float64_col` < 0 + THEN CAST('NaN' AS FLOAT64) + ELSE CAST('-Infinity' AS FLOAT64) + END AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_log10/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_log10/out.sql index 11a318c22d..c5bbff0e62 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_log10/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_log10/out.sql @@ -6,9 +6,13 @@ WITH `bfcte_0` AS ( SELECT *, CASE - WHEN `float64_col` <= 0 + WHEN `float64_col` IS NULL + THEN NULL + WHEN `float64_col` > 0 + THEN LOG(`float64_col`, 10) + WHEN `float64_col` < 0 THEN CAST('NaN' AS FLOAT64) - ELSE LOG(10, `float64_col`) + ELSE CAST('-Infinity' AS FLOAT64) END AS `bfcol_1` FROM `bfcte_0` ) diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_log1p/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_log1p/out.sql index 4297fff227..22e67e24ee 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_log1p/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_log1p/out.sql @@ -6,9 +6,13 @@ WITH `bfcte_0` AS ( SELECT *, CASE - WHEN `float64_col` <= -1 + WHEN `float64_col` IS NULL + THEN NULL + WHEN `float64_col` > -1 + THEN LN(1 + `float64_col`) + WHEN `float64_col` < -1 THEN CAST('NaN' AS FLOAT64) - ELSE LN(1 + `float64_col`) + ELSE CAST('-Infinity' AS FLOAT64) END AS `bfcol_1` FROM `bfcte_0` ) From ee83d98ae995786ecbf332da783e1d4f92bc0398 Mon Sep 17 00:00:00 2001 From: jialuoo Date: Wed, 21 Jan 2026 18:48:10 +0000 Subject: [PATCH 23/28] chore: Migrate NaryRemoteFunctionOp operator to SQLGlot (#2387) Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes b/452130300 --- .../sqlglot/expressions/generic_ops.py | 23 +++++---- .../test_nary_remote_function_op/out.sql | 15 ++++++ .../sqlglot/expressions/test_generic_ops.py | 48 +++++++++++++++---- 3 files changed, 69 insertions(+), 17 deletions(-) create mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_nary_remote_function_op/out.sql diff --git a/bigframes/core/compile/sqlglot/expressions/generic_ops.py b/bigframes/core/compile/sqlglot/expressions/generic_ops.py index 0fd8a010ae..2f486fc9d5 100644 --- a/bigframes/core/compile/sqlglot/expressions/generic_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/generic_ops.py @@ -152,13 +152,17 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: return sge.Coalesce(this=left.expr, expressions=[right.expr]) -@register_unary_op(ops.RemoteFunctionOp, pass_op=True) -def _(expr: TypedExpr, op: ops.RemoteFunctionOp) -> sge.Expression: +def _get_remote_function_name(op): routine_ref = op.function_def.routine_ref # Quote project, dataset, and routine IDs to avoid keyword clashes. - func_name = ( + return ( f"`{routine_ref.project}`.`{routine_ref.dataset_id}`.`{routine_ref.routine_id}`" ) + + +@register_unary_op(ops.RemoteFunctionOp, pass_op=True) +def _(expr: TypedExpr, op: ops.RemoteFunctionOp) -> sge.Expression: + func_name = _get_remote_function_name(op) func = sge.func(func_name, expr.expr) if not op.apply_on_null: @@ -175,15 +179,16 @@ def _(expr: TypedExpr, op: ops.RemoteFunctionOp) -> sge.Expression: def _( left: TypedExpr, right: TypedExpr, op: ops.BinaryRemoteFunctionOp ) -> sge.Expression: - routine_ref = op.function_def.routine_ref - # Quote project, dataset, and routine IDs to avoid keyword clashes. - func_name = ( - f"`{routine_ref.project}`.`{routine_ref.dataset_id}`.`{routine_ref.routine_id}`" - ) - + func_name = _get_remote_function_name(op) return sge.func(func_name, left.expr, right.expr) +@register_nary_op(ops.NaryRemoteFunctionOp, pass_op=True) +def _(*operands: TypedExpr, op: ops.NaryRemoteFunctionOp) -> sge.Expression: + func_name = _get_remote_function_name(op) + return sge.func(func_name, *(operand.expr for operand in operands)) + + @register_nary_op(ops.case_when_op) def _(*cases_and_outputs: TypedExpr) -> sge.Expression: # Need to upcast BOOL to INT if any output is numeric diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_nary_remote_function_op/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_nary_remote_function_op/out.sql new file mode 100644 index 0000000000..a6641b13db --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_nary_remote_function_op/out.sql @@ -0,0 +1,15 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col`, + `int64_col`, + `string_col` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + `my_project`.`my_dataset`.`my_routine`(`int64_col`, `float64_col`, `string_col`) AS `bfcol_3` + FROM `bfcte_0` +) +SELECT + `bfcol_3` AS `int64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py index c4f16d93a1..2667e482c8 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py @@ -12,12 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +from google.cloud import bigquery import pandas as pd import pytest from bigframes import dtypes from bigframes import operations as ops from bigframes.core import expression as ex +from bigframes.functions import udf_def import bigframes.pandas as bpd from bigframes.testing import utils @@ -170,10 +172,6 @@ def test_astype_json_invalid( def test_remote_function_op(scalar_types_df: bpd.DataFrame, snapshot): - from google.cloud import bigquery - - from bigframes.functions import udf_def - bf_df = scalar_types_df[["int64_col"]] function_def = udf_def.BigqueryUdf( routine_ref=bigquery.RoutineReference.from_string( @@ -206,10 +204,6 @@ def test_remote_function_op(scalar_types_df: bpd.DataFrame, snapshot): def test_binary_remote_function_op(scalar_types_df: bpd.DataFrame, snapshot): - from google.cloud import bigquery - - from bigframes.functions import udf_def - bf_df = scalar_types_df[["int64_col", "float64_col"]] op = ops.BinaryRemoteFunctionOp( function_def=udf_def.BigqueryUdf( @@ -242,6 +236,44 @@ def test_binary_remote_function_op(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") +def test_nary_remote_function_op(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["int64_col", "float64_col", "string_col"]] + op = ops.NaryRemoteFunctionOp( + function_def=udf_def.BigqueryUdf( + routine_ref=bigquery.RoutineReference.from_string( + "my_project.my_dataset.my_routine" + ), + signature=udf_def.UdfSignature( + input_types=( + udf_def.UdfField( + "x", + bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 + ), + ), + udf_def.UdfField( + "y", + bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.FLOAT64 + ), + ), + udf_def.UdfField( + "z", + bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.STRING + ), + ), + ), + output_bq_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.FLOAT64 + ), + ), + ) + ) + sql = utils._apply_nary_op(bf_df, op, "int64_col", "float64_col", "string_col") + snapshot.assert_match(sql, "out.sql") + + def test_case_when_op(scalar_types_df: bpd.DataFrame, snapshot): ops_map = { "single_case": ops.case_when_op.as_expr( From 74150c5df41bcafeaaa590ad136b177a741ac65c Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Wed, 21 Jan 2026 14:04:04 -0800 Subject: [PATCH 24/28] chore: deflake the ai generate doc test by skipping it (#2393) --- bigframes/bigquery/_operations/ai.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py index e56292d64f..fd7dafe95f 100644 --- a/bigframes/bigquery/_operations/ai.py +++ b/bigframes/bigquery/_operations/ai.py @@ -58,14 +58,14 @@ def generate( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq >>> country = bpd.Series(["Japan", "Canada"]) - >>> bbq.ai.generate(("What's the capital city of ", country, " one word only")) - 0 {'result': 'Tokyo\\n', 'full_response': '{"cand... - 1 {'result': 'Ottawa\\n', 'full_response': '{"can... + >>> bbq.ai.generate(("What's the capital city of ", country, " one word only")) # doctest: +SKIP + 0 {'result': 'Tokyo', 'full_response': '{"cand... + 1 {'result': 'Ottawa', 'full_response': '{"can... dtype: struct>, status: string>[pyarrow] - >>> bbq.ai.generate(("What's the capital city of ", country, " one word only")).struct.field("result") - 0 Tokyo\\n - 1 Ottawa\\n + >>> bbq.ai.generate(("What's the capital city of ", country, " one word only")).struct.field("result") # doctest: +SKIP + 0 Tokyo + 1 Ottawa Name: result, dtype: string You get structured output when the `output_schema` parameter is set: From e90e1d8b5149402b36be5b54228d1b95aa464282 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Wed, 21 Jan 2026 16:52:52 -0800 Subject: [PATCH 25/28] chore: Add a function to traverse BFET and encode type usage (#2390) Next step is to add the encoded usage to the job_config.label before SQL dispatch. Related bug: b/406578908 --- bigframes/core/logging/data_types.py | 106 +++++++++++++++- tests/system/small/core/logging/__init__.py | 13 ++ .../small/core/logging/test_data_types.py | 113 ++++++++++++++++++ tests/unit/core/logging/test_data_types.py | 17 +-- 4 files changed, 230 insertions(+), 19 deletions(-) create mode 100644 tests/system/small/core/logging/__init__.py create mode 100644 tests/system/small/core/logging/test_data_types.py diff --git a/bigframes/core/logging/data_types.py b/bigframes/core/logging/data_types.py index db99b1a020..3cb65a5c50 100644 --- a/bigframes/core/logging/data_types.py +++ b/bigframes/core/logging/data_types.py @@ -12,15 +12,115 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import functools from bigframes import dtypes +from bigframes.core import agg_expressions, bigframe_node, expression, nodes +from bigframes.core.rewrite import schema_binding + +IGNORED_NODES = ( + nodes.SelectionNode, + nodes.ReadLocalNode, + nodes.ReadTableNode, + nodes.ConcatNode, + nodes.RandomSampleNode, + nodes.FromRangeNode, + nodes.PromoteOffsetsNode, + nodes.ReversedNode, + nodes.SliceNode, + nodes.ResultNode, +) + + +def encode_type_refs(root: bigframe_node.BigFrameNode) -> str: + return f"{root.reduce_up(_encode_type_refs_from_node):x}" + + +def _encode_type_refs_from_node( + node: bigframe_node.BigFrameNode, child_results: tuple[int, ...] +) -> int: + child_result = functools.reduce(lambda x, y: x | y, child_results, 0) + + curr_result = 0 + if isinstance(node, nodes.FilterNode): + curr_result = _encode_type_refs_from_expr(node.predicate, node.child) + elif isinstance(node, nodes.ProjectionNode): + for assignment in node.assignments: + expr = assignment[0] + if isinstance(expr, (expression.DerefOp)): + # Ignore direct assignments in projection nodes. + continue + curr_result = curr_result | _encode_type_refs_from_expr( + assignment[0], node.child + ) + elif isinstance(node, nodes.OrderByNode): + for by in node.by: + curr_result = curr_result | _encode_type_refs_from_expr( + by.scalar_expression, node.child + ) + elif isinstance(node, nodes.JoinNode): + for left, right in node.conditions: + curr_result = ( + curr_result + | _encode_type_refs_from_expr(left, node.left_child) + | _encode_type_refs_from_expr(right, node.right_child) + ) + elif isinstance(node, nodes.InNode): + curr_result = _encode_type_refs_from_expr(node.left_col, node.left_child) + elif isinstance(node, nodes.AggregateNode): + for agg, _ in node.aggregations: + curr_result = curr_result | _encode_type_refs_from_expr(agg, node.child) + elif isinstance(node, nodes.WindowOpNode): + for grouping_key in node.window_spec.grouping_keys: + curr_result = curr_result | _encode_type_refs_from_expr( + grouping_key, node.child + ) + for ordering_expr in node.window_spec.ordering: + curr_result = curr_result | _encode_type_refs_from_expr( + ordering_expr.scalar_expression, node.child + ) + for col_def in node.agg_exprs: + curr_result = curr_result | _encode_type_refs_from_expr( + col_def.expression, node.child + ) + elif isinstance(node, nodes.ExplodeNode): + for col_id in node.column_ids: + curr_result = curr_result | _encode_type_refs_from_expr(col_id, node.child) + elif isinstance(node, IGNORED_NODES): + # Do nothing + pass + else: + # For unseen nodes, do not raise errors as this is the logging path, but + # we should cover those nodes either in the branches above, or place them + # in the IGNORED_NODES collection. + pass + + return child_result | curr_result + + +def _encode_type_refs_from_expr( + expr: expression.Expression, child_node: bigframe_node.BigFrameNode +) -> int: + # TODO(b/409387790): Remove this branch once SQLGlot compiler fully replaces Ibis compiler + if not expr.is_resolved: + if isinstance(expr, agg_expressions.Aggregation): + expr = schema_binding._bind_schema_to_aggregation_expr(expr, child_node) + else: + expr = expression.bind_schema_fields(expr, child_node.field_by_id) + result = _get_dtype_mask(expr.output_type) + for child_expr in expr.children: + result = result | _encode_type_refs_from_expr(child_expr, child_node) -def _add_data_type(existing_types: int, curr_type: dtypes.Dtype) -> int: - return existing_types | _get_dtype_mask(curr_type) + return result -def _get_dtype_mask(dtype: dtypes.Dtype) -> int: +def _get_dtype_mask(dtype: dtypes.Dtype | None) -> int: + if dtype is None: + # If the dtype is not given, ignore + return 0 if dtype == dtypes.INT_DTYPE: return 1 << 1 if dtype == dtypes.FLOAT_DTYPE: diff --git a/tests/system/small/core/logging/__init__.py b/tests/system/small/core/logging/__init__.py new file mode 100644 index 0000000000..58d482ea38 --- /dev/null +++ b/tests/system/small/core/logging/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/system/small/core/logging/test_data_types.py b/tests/system/small/core/logging/test_data_types.py new file mode 100644 index 0000000000..7e197a9672 --- /dev/null +++ b/tests/system/small/core/logging/test_data_types.py @@ -0,0 +1,113 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Sequence + +import pandas as pd +import pyarrow as pa + +from bigframes import dtypes +from bigframes.core.logging import data_types +import bigframes.pandas as bpd + + +def encode_types(inputs: Sequence[dtypes.Dtype]) -> str: + encoded_val = 0 + for t in inputs: + encoded_val = encoded_val | data_types._get_dtype_mask(t) + + return f"{encoded_val:x}" + + +def test_get_type_refs_no_op(scalars_df_index): + node = scalars_df_index._block._expr.node + expected_types: list[dtypes.Dtype] = [] + + assert data_types.encode_type_refs(node) == encode_types(expected_types) + + +def test_get_type_refs_projection(scalars_df_index): + node = ( + scalars_df_index["datetime_col"] - scalars_df_index["datetime_col"] + )._block._expr.node + expected_types = [dtypes.DATETIME_DTYPE, dtypes.TIMEDELTA_DTYPE] + + assert data_types.encode_type_refs(node) == encode_types(expected_types) + + +def test_get_type_refs_filter(scalars_df_index): + node = scalars_df_index[scalars_df_index["int64_col"] > 0]._block._expr.node + expected_types = [dtypes.INT_DTYPE, dtypes.BOOL_DTYPE] + + assert data_types.encode_type_refs(node) == encode_types(expected_types) + + +def test_get_type_refs_order_by(scalars_df_index): + node = scalars_df_index.sort_index()._block._expr.node + expected_types = [dtypes.INT_DTYPE] + + assert data_types.encode_type_refs(node) == encode_types(expected_types) + + +def test_get_type_refs_join(scalars_df_index): + node = ( + scalars_df_index[["int64_col"]].merge( + scalars_df_index[["float64_col"]], + left_on="int64_col", + right_on="float64_col", + ) + )._block._expr.node + expected_types = [dtypes.INT_DTYPE, dtypes.FLOAT_DTYPE] + + assert data_types.encode_type_refs(node) == encode_types(expected_types) + + +def test_get_type_refs_isin(scalars_df_index): + node = scalars_df_index["string_col"].isin(["a"])._block._expr.node + expected_types = [dtypes.STRING_DTYPE, dtypes.BOOL_DTYPE] + + assert data_types.encode_type_refs(node) == encode_types(expected_types) + + +def test_get_type_refs_agg(scalars_df_index): + node = scalars_df_index[["bool_col", "string_col"]].count()._block._expr.node + expected_types = [ + dtypes.INT_DTYPE, + dtypes.BOOL_DTYPE, + dtypes.STRING_DTYPE, + dtypes.FLOAT_DTYPE, + ] + + assert data_types.encode_type_refs(node) == encode_types(expected_types) + + +def test_get_type_refs_window(scalars_df_index): + node = ( + scalars_df_index[["string_col", "bool_col"]] + .groupby("string_col") + .rolling(window=3) + .count() + ._block._expr.node + ) + expected_types = [dtypes.STRING_DTYPE, dtypes.BOOL_DTYPE, dtypes.INT_DTYPE] + + assert data_types.encode_type_refs(node) == encode_types(expected_types) + + +def test_get_type_refs_explode(): + df = bpd.DataFrame({"A": ["a", "b"], "B": [[1, 2], [3, 4, 5]]}) + node = df.explode("B")._block._expr.node + expected_types = [pd.ArrowDtype(pa.list_(pa.int64()))] + + assert data_types.encode_type_refs(node) == encode_types(expected_types) diff --git a/tests/unit/core/logging/test_data_types.py b/tests/unit/core/logging/test_data_types.py index 9e3d1f1ed0..09b3429f00 100644 --- a/tests/unit/core/logging/test_data_types.py +++ b/tests/unit/core/logging/test_data_types.py @@ -29,6 +29,7 @@ @pytest.mark.parametrize( ("dtype", "expected_mask"), [ + (None, 0), (UNKNOWN_TYPE, 1 << 0), (dtypes.INT_DTYPE, 1 << 1), (dtypes.FLOAT_DTYPE, 1 << 2), @@ -51,19 +52,3 @@ ) def test_get_dtype_mask(dtype, expected_mask): assert data_types._get_dtype_mask(dtype) == expected_mask - - -def test_add_data_type__type_overlap_no_op(): - curr_type = dtypes.STRING_DTYPE - existing_types = data_types._get_dtype_mask(curr_type) - - assert data_types._add_data_type(existing_types, curr_type) == existing_types - - -def test_add_data_type__new_type_updated(): - curr_type = dtypes.STRING_DTYPE - existing_types = data_types._get_dtype_mask(dtypes.INT_DTYPE) - - assert data_types._add_data_type( - existing_types, curr_type - ) == existing_types | data_types._get_dtype_mask(curr_type) From 1f9ee373c1f1d0cd08b80169c3063b862ea46465 Mon Sep 17 00:00:00 2001 From: Garrett Wu <6505921+GarrettWu@users.noreply.github.com> Date: Thu, 22 Jan 2026 12:10:59 -0800 Subject: [PATCH 26/28] feat: add bigquery.ml.transform function (#2394) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 --- bigframes/bigquery/_operations/ml.py | 38 +++++++++++++++++++ bigframes/bigquery/ml.py | 2 + bigframes/core/sql/ml.py | 11 ++++++ tests/unit/bigquery/test_ml.py | 18 +++++++++ .../transform_model_basic.sql | 1 + tests/unit/core/sql/test_ml.py | 8 ++++ 6 files changed, 78 insertions(+) create mode 100644 tests/unit/core/sql/snapshots/test_ml/test_transform_model_basic/transform_model_basic.sql diff --git a/bigframes/bigquery/_operations/ml.py b/bigframes/bigquery/_operations/ml.py index c9b48bb5ac..e5a5c5dfb6 100644 --- a/bigframes/bigquery/_operations/ml.py +++ b/bigframes/bigquery/_operations/ml.py @@ -393,3 +393,41 @@ def global_explain( return bpd.read_gbq_query(sql) else: return session.read_gbq_query(sql) + + +@log_adapter.method_logger(custom_base_name="bigquery_ml") +def transform( + model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series], + input_: Union[pd.DataFrame, dataframe.DataFrame, str], +) -> dataframe.DataFrame: + """ + Transforms input data using a BigQuery ML model. + + See the `BigQuery ML TRANSFORM function syntax + `_ + for additional reference. + + Args: + model (bigframes.ml.base.BaseEstimator or str): + The model to use for transformation. + input_ (Union[bigframes.pandas.DataFrame, str]): + The DataFrame or query to use for transformation. + + Returns: + bigframes.pandas.DataFrame: + The transformed data. + """ + import bigframes.pandas as bpd + + model_name, session = _get_model_name_and_session(model, input_) + table_sql = _to_sql(input_) + + sql = bigframes.core.sql.ml.transform( + model_name=model_name, + table=table_sql, + ) + + if session is None: + return bpd.read_gbq_query(sql) + else: + return session.read_gbq_query(sql) diff --git a/bigframes/bigquery/ml.py b/bigframes/bigquery/ml.py index 93b0670ba5..6ceadb324d 100644 --- a/bigframes/bigquery/ml.py +++ b/bigframes/bigquery/ml.py @@ -25,6 +25,7 @@ explain_predict, global_explain, predict, + transform, ) __all__ = [ @@ -33,4 +34,5 @@ "predict", "explain_predict", "global_explain", + "transform", ] diff --git a/bigframes/core/sql/ml.py b/bigframes/core/sql/ml.py index ec55fe0426..1749315925 100644 --- a/bigframes/core/sql/ml.py +++ b/bigframes/core/sql/ml.py @@ -213,3 +213,14 @@ def global_explain( sql += _build_struct_sql(struct_options) sql += ")\n" return sql + + +def transform( + model_name: str, + table: str, +) -> str: + """Encode the ML.TRANSFORM statement. + See https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-transform for reference. + """ + sql = f"SELECT * FROM ML.TRANSFORM(MODEL {googlesql.identifier(model_name)}, ({table}))\n" + return sql diff --git a/tests/unit/bigquery/test_ml.py b/tests/unit/bigquery/test_ml.py index 063ddafcca..96b97d68fe 100644 --- a/tests/unit/bigquery/test_ml.py +++ b/tests/unit/bigquery/test_ml.py @@ -145,3 +145,21 @@ def test_global_explain_with_pandas_series_model(read_gbq_query_mock): generated_sql = read_gbq_query_mock.call_args[0][0] assert "ML.GLOBAL_EXPLAIN" in generated_sql assert f"MODEL `{MODEL_NAME}`" in generated_sql + + +@mock.patch("bigframes.pandas.read_gbq_query") +@mock.patch("bigframes.pandas.read_pandas") +def test_transform_with_pandas_dataframe(read_pandas_mock, read_gbq_query_mock): + df = pd.DataFrame({"col1": [1, 2, 3]}) + read_pandas_mock.return_value._to_sql_query.return_value = ( + "SELECT * FROM `pandas_df`", + [], + [], + ) + ml_ops.transform(MODEL_SERIES, input_=df) + read_pandas_mock.assert_called_once() + read_gbq_query_mock.assert_called_once() + generated_sql = read_gbq_query_mock.call_args[0][0] + assert "ML.TRANSFORM" in generated_sql + assert f"MODEL `{MODEL_NAME}`" in generated_sql + assert "(SELECT * FROM `pandas_df`)" in generated_sql diff --git a/tests/unit/core/sql/snapshots/test_ml/test_transform_model_basic/transform_model_basic.sql b/tests/unit/core/sql/snapshots/test_ml/test_transform_model_basic/transform_model_basic.sql new file mode 100644 index 0000000000..e6cedc1647 --- /dev/null +++ b/tests/unit/core/sql/snapshots/test_ml/test_transform_model_basic/transform_model_basic.sql @@ -0,0 +1 @@ +SELECT * FROM ML.TRANSFORM(MODEL `my_project.my_dataset.my_model`, (SELECT * FROM new_data)) diff --git a/tests/unit/core/sql/test_ml.py b/tests/unit/core/sql/test_ml.py index fe8c1a04d4..9721f42fee 100644 --- a/tests/unit/core/sql/test_ml.py +++ b/tests/unit/core/sql/test_ml.py @@ -169,3 +169,11 @@ def test_global_explain_model_with_options(snapshot): class_level_explain=True, ) snapshot.assert_match(sql, "global_explain_model_with_options.sql") + + +def test_transform_model_basic(snapshot): + sql = bigframes.core.sql.ml.transform( + model_name="my_project.my_dataset.my_model", + table="SELECT * FROM new_data", + ) + snapshot.assert_match(sql, "transform_model_basic.sql") From a2d13e7124dc36ebc69d45ecb7d353d00f385b22 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Thu, 22 Jan 2026 15:01:29 -0800 Subject: [PATCH 27/28] chore: librarian update image pull request: 20260122T223041Z (#2398) feat: update image to us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:e7cc6823efb073a8a26e7cefdd869f12ec228abfbd2a44aa9a7eacc284023677 --- .librarian/state.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.librarian/state.yaml b/.librarian/state.yaml index e37895f78d..8028720498 100644 --- a/.librarian/state.yaml +++ b/.librarian/state.yaml @@ -1,4 +1,4 @@ -image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:c8612d3fffb3f6a32353b2d1abd16b61e87811866f7ec9d65b59b02eb452a620 +image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:e7cc6823efb073a8a26e7cefdd869f12ec228abfbd2a44aa9a7eacc284023677 libraries: - id: bigframes version: 2.32.0 From 6fef9be445d9f4c44095287b654ecdd56a3326af Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Thu, 22 Jan 2026 15:58:32 -0800 Subject: [PATCH 28/28] chore: librarian release pull request: 20260122T231714Z (#2402) PR created by the Librarian CLI to initialize a release. Merging this PR will auto trigger a release. Librarian Version: v0.7.0 Language Image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:e7cc6823efb073a8a26e7cefdd869f12ec228abfbd2a44aa9a7eacc284023677
bigframes: 2.33.0 ## [2.33.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.32.0...v2.33.0) (2026-01-22) ### Features * add bigquery.ml.transform function (#2394) ([1f9ee373](https://github.com/googleapis/python-bigquery-dataframes/commit/1f9ee373)) * Add dark mode to anywidget mode (#2365) ([2763b41d](https://github.com/googleapis/python-bigquery-dataframes/commit/2763b41d)) * Add max_columns control for anywidget mode (#2374) ([34b5975f](https://github.com/googleapis/python-bigquery-dataframes/commit/34b5975f)) * Configure Biome for Consistent Code Style (#2364) ([81e27b3d](https://github.com/googleapis/python-bigquery-dataframes/commit/81e27b3d)) * Add BigQuery ObjectRef functions to `bigframes.bigquery.obj` (#2380) ([9c3bbc36](https://github.com/googleapis/python-bigquery-dataframes/commit/9c3bbc36)) * Stabilize interactive table height to prevent notebook layout shifts (#2378) ([a634e976](https://github.com/googleapis/python-bigquery-dataframes/commit/a634e976)) ### Bug Fixes * implement retry logic for cloud function endpoint fetching (#2369) ([0f593c27](https://github.com/googleapis/python-bigquery-dataframes/commit/0f593c27)) * Throw if write api commit op has stream_errors (#2385) ([7abfef05](https://github.com/googleapis/python-bigquery-dataframes/commit/7abfef05)) ### Performance Improvements * Avoid requery for some result downsample methods (#2219) ([95763ff2](https://github.com/googleapis/python-bigquery-dataframes/commit/95763ff2))
--- .librarian/state.yaml | 2 +- CHANGELOG.md | 18 ++++++++++++++++++ bigframes/version.py | 4 ++-- third_party/bigframes_vendored/version.py | 4 ++-- 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/.librarian/state.yaml b/.librarian/state.yaml index 8028720498..4dba64808e 100644 --- a/.librarian/state.yaml +++ b/.librarian/state.yaml @@ -1,7 +1,7 @@ image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:e7cc6823efb073a8a26e7cefdd869f12ec228abfbd2a44aa9a7eacc284023677 libraries: - id: bigframes - version: 2.32.0 + version: 2.33.0 last_generated_commit: "" apis: [] source_roots: diff --git a/CHANGELOG.md b/CHANGELOG.md index b49afe535c..090cf2ee57 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,24 @@ [1]: https://pypi.org/project/bigframes/#history +## [2.33.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.32.0...v2.33.0) (2026-01-22) + + +### Features + +* add bigquery.ml.transform function (#2394) ([1f9ee373c1f1d0cd08b80169c3063b862ea46465](https://github.com/googleapis/python-bigquery-dataframes/commit/1f9ee373c1f1d0cd08b80169c3063b862ea46465)) +* Add BigQuery ObjectRef functions to `bigframes.bigquery.obj` (#2380) ([9c3bbc36983dffb265454f27b37450df8c5fbc71](https://github.com/googleapis/python-bigquery-dataframes/commit/9c3bbc36983dffb265454f27b37450df8c5fbc71)) +* Stabilize interactive table height to prevent notebook layout shifts (#2378) ([a634e976c0f44087ca2a65f68cf2775ae6f04024](https://github.com/googleapis/python-bigquery-dataframes/commit/a634e976c0f44087ca2a65f68cf2775ae6f04024)) +* Add max_columns control for anywidget mode (#2374) ([34b5975f6911c5aa5ffc64a2fe6967a9f3d86f78](https://github.com/googleapis/python-bigquery-dataframes/commit/34b5975f6911c5aa5ffc64a2fe6967a9f3d86f78)) +* Add dark mode to anywidget mode (#2365) ([2763b41d4b86939e389f76789f5b2acd44f18169](https://github.com/googleapis/python-bigquery-dataframes/commit/2763b41d4b86939e389f76789f5b2acd44f18169)) +* Configure Biome for Consistent Code Style (#2364) ([81e27b3d81da9b1684eae0b7f0b9abfd7badcc4f](https://github.com/googleapis/python-bigquery-dataframes/commit/81e27b3d81da9b1684eae0b7f0b9abfd7badcc4f)) + + +### Bug Fixes + +* Throw if write api commit op has stream_errors (#2385) ([7abfef0598d476ef233364a01f72d73291983c30](https://github.com/googleapis/python-bigquery-dataframes/commit/7abfef0598d476ef233364a01f72d73291983c30)) +* implement retry logic for cloud function endpoint fetching (#2369) ([0f593c27bfee89fe1bdfc880504f9ab0ac28a24e](https://github.com/googleapis/python-bigquery-dataframes/commit/0f593c27bfee89fe1bdfc880504f9ab0ac28a24e)) + ## [2.32.0](https://github.com/googleapis/google-cloud-python/compare/bigframes-v2.31.0...bigframes-v2.32.0) (2026-01-05) diff --git a/bigframes/version.py b/bigframes/version.py index f36c6789c1..1e9ed79f82 100644 --- a/bigframes/version.py +++ b/bigframes/version.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.32.0" +__version__ = "2.33.0" # {x-release-please-start-date} -__release_date__ = "2026-01-05" +__release_date__ = "2026-01-22" # {x-release-please-end} diff --git a/third_party/bigframes_vendored/version.py b/third_party/bigframes_vendored/version.py index f36c6789c1..1e9ed79f82 100644 --- a/third_party/bigframes_vendored/version.py +++ b/third_party/bigframes_vendored/version.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.32.0" +__version__ = "2.33.0" # {x-release-please-start-date} -__release_date__ = "2026-01-05" +__release_date__ = "2026-01-22" # {x-release-please-end}