diff --git a/.gitignore b/.gitignore index 0ff74ef5283..52dcccd33d8 100644 --- a/.gitignore +++ b/.gitignore @@ -64,3 +64,4 @@ tests/js/node_modules/ pylintrc pylintrc.test dummy.pkl +.mypy_cache/ diff --git a/.librarian/state.yaml b/.librarian/state.yaml index e37895f78d5..4dba64808eb 100644 --- a/.librarian/state.yaml +++ b/.librarian/state.yaml @@ -1,7 +1,7 @@ -image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:c8612d3fffb3f6a32353b2d1abd16b61e87811866f7ec9d65b59b02eb452a620 +image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:e7cc6823efb073a8a26e7cefdd869f12ec228abfbd2a44aa9a7eacc284023677 libraries: - id: bigframes - version: 2.32.0 + version: 2.33.0 last_generated_commit: "" apis: [] source_roots: diff --git a/CHANGELOG.md b/CHANGELOG.md index b49afe535c6..090cf2ee57b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,24 @@ [1]: https://pypi.org/project/bigframes/#history +## [2.33.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.32.0...v2.33.0) (2026-01-22) + + +### Features + +* add bigquery.ml.transform function (#2394) ([1f9ee373c1f1d0cd08b80169c3063b862ea46465](https://github.com/googleapis/python-bigquery-dataframes/commit/1f9ee373c1f1d0cd08b80169c3063b862ea46465)) +* Add BigQuery ObjectRef functions to `bigframes.bigquery.obj` (#2380) ([9c3bbc36983dffb265454f27b37450df8c5fbc71](https://github.com/googleapis/python-bigquery-dataframes/commit/9c3bbc36983dffb265454f27b37450df8c5fbc71)) +* Stabilize interactive table height to prevent notebook layout shifts (#2378) ([a634e976c0f44087ca2a65f68cf2775ae6f04024](https://github.com/googleapis/python-bigquery-dataframes/commit/a634e976c0f44087ca2a65f68cf2775ae6f04024)) +* Add max_columns control for anywidget mode (#2374) ([34b5975f6911c5aa5ffc64a2fe6967a9f3d86f78](https://github.com/googleapis/python-bigquery-dataframes/commit/34b5975f6911c5aa5ffc64a2fe6967a9f3d86f78)) +* Add dark mode to anywidget mode (#2365) ([2763b41d4b86939e389f76789f5b2acd44f18169](https://github.com/googleapis/python-bigquery-dataframes/commit/2763b41d4b86939e389f76789f5b2acd44f18169)) +* Configure Biome for Consistent Code Style (#2364) ([81e27b3d81da9b1684eae0b7f0b9abfd7badcc4f](https://github.com/googleapis/python-bigquery-dataframes/commit/81e27b3d81da9b1684eae0b7f0b9abfd7badcc4f)) + + +### Bug Fixes + +* Throw if write api commit op has stream_errors (#2385) ([7abfef0598d476ef233364a01f72d73291983c30](https://github.com/googleapis/python-bigquery-dataframes/commit/7abfef0598d476ef233364a01f72d73291983c30)) +* implement retry logic for cloud function endpoint fetching (#2369) ([0f593c27bfee89fe1bdfc880504f9ab0ac28a24e](https://github.com/googleapis/python-bigquery-dataframes/commit/0f593c27bfee89fe1bdfc880504f9ab0ac28a24e)) + ## [2.32.0](https://github.com/googleapis/google-cloud-python/compare/bigframes-v2.31.0...bigframes-v2.32.0) (2026-01-05) diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index f835285a216..0bbbc418e6d 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -18,7 +18,7 @@ import sys -from bigframes.bigquery import ai, ml +from bigframes.bigquery import ai, ml, obj from bigframes.bigquery._operations.approx_agg import approx_top_count from bigframes.bigquery._operations.array import ( array_agg, @@ -60,7 +60,7 @@ from bigframes.bigquery._operations.search import create_vector_index, vector_search from bigframes.bigquery._operations.sql import sql_scalar from bigframes.bigquery._operations.struct import struct -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter _functions = [ # approximate aggregate ops @@ -158,4 +158,5 @@ # Modules / SQL namespaces "ai", "ml", + "obj", ] diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py index e8c28e61f5e..fd7dafe95fc 100644 --- a/bigframes/bigquery/_operations/ai.py +++ b/bigframes/bigquery/_operations/ai.py @@ -26,7 +26,8 @@ from bigframes import clients, dataframe, dtypes from bigframes import pandas as bpd from bigframes import series, session -from bigframes.core import convert, log_adapter +from bigframes.core import convert +from bigframes.core.logging import log_adapter from bigframes.ml import core as ml_core from bigframes.operations import ai_ops, output_schemas @@ -57,14 +58,14 @@ def generate( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq >>> country = bpd.Series(["Japan", "Canada"]) - >>> bbq.ai.generate(("What's the capital city of ", country, " one word only")) - 0 {'result': 'Tokyo\\n', 'full_response': '{"cand... - 1 {'result': 'Ottawa\\n', 'full_response': '{"can... + >>> bbq.ai.generate(("What's the capital city of ", country, " one word only")) # doctest: +SKIP + 0 {'result': 'Tokyo', 'full_response': '{"cand... + 1 {'result': 'Ottawa', 'full_response': '{"can... dtype: struct>, status: string>[pyarrow] - >>> bbq.ai.generate(("What's the capital city of ", country, " one word only")).struct.field("result") - 0 Tokyo\\n - 1 Ottawa\\n + >>> bbq.ai.generate(("What's the capital city of ", country, " one word only")).struct.field("result") # doctest: +SKIP + 0 Tokyo + 1 Ottawa Name: result, dtype: string You get structured output when the `output_schema` parameter is set: diff --git a/bigframes/bigquery/_operations/ml.py b/bigframes/bigquery/_operations/ml.py index 073be0ef2b0..e5a5c5dfb68 100644 --- a/bigframes/bigquery/_operations/ml.py +++ b/bigframes/bigquery/_operations/ml.py @@ -20,7 +20,7 @@ import google.cloud.bigquery import pandas as pd -import bigframes.core.log_adapter as log_adapter +import bigframes.core.logging.log_adapter as log_adapter import bigframes.core.sql.ml import bigframes.dataframe as dataframe import bigframes.ml.base @@ -393,3 +393,41 @@ def global_explain( return bpd.read_gbq_query(sql) else: return session.read_gbq_query(sql) + + +@log_adapter.method_logger(custom_base_name="bigquery_ml") +def transform( + model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series], + input_: Union[pd.DataFrame, dataframe.DataFrame, str], +) -> dataframe.DataFrame: + """ + Transforms input data using a BigQuery ML model. + + See the `BigQuery ML TRANSFORM function syntax + `_ + for additional reference. + + Args: + model (bigframes.ml.base.BaseEstimator or str): + The model to use for transformation. + input_ (Union[bigframes.pandas.DataFrame, str]): + The DataFrame or query to use for transformation. + + Returns: + bigframes.pandas.DataFrame: + The transformed data. + """ + import bigframes.pandas as bpd + + model_name, session = _get_model_name_and_session(model, input_) + table_sql = _to_sql(input_) + + sql = bigframes.core.sql.ml.transform( + model_name=model_name, + table=table_sql, + ) + + if session is None: + return bpd.read_gbq_query(sql) + else: + return session.read_gbq_query(sql) diff --git a/bigframes/bigquery/_operations/obj.py b/bigframes/bigquery/_operations/obj.py new file mode 100644 index 00000000000..5aef00e73bd --- /dev/null +++ b/bigframes/bigquery/_operations/obj.py @@ -0,0 +1,115 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""This module exposes BigQuery ObjectRef functions. + +See bigframes.bigquery.obj for public docs. +""" + + +from __future__ import annotations + +import datetime +from typing import Optional, Sequence, Union + +import numpy as np +import pandas as pd + +from bigframes.core import convert +from bigframes.core.logging import log_adapter +import bigframes.core.utils as utils +import bigframes.operations as ops +import bigframes.series as series + + +@log_adapter.method_logger(custom_base_name="bigquery_obj") +def fetch_metadata( + objectref: series.Series, +) -> series.Series: + """[Preview] The OBJ.FETCH_METADATA function returns Cloud Storage metadata for a partially populated ObjectRef value. + + Args: + objectref (bigframes.pandas.Series): + A partially populated ObjectRef value, in which the uri and authorizer fields are populated and the details field isn't. + + Returns: + bigframes.pandas.Series: A fully populated ObjectRef value. The metadata is provided in the details field of the returned ObjectRef value. + """ + objectref = convert.to_bf_series(objectref, default_index=None) + return objectref._apply_unary_op(ops.obj_fetch_metadata_op) + + +@log_adapter.method_logger(custom_base_name="bigquery_obj") +def get_access_url( + objectref: series.Series, + mode: str, + duration: Optional[Union[datetime.timedelta, pd.Timedelta, np.timedelta64]] = None, +) -> series.Series: + """[Preview] The OBJ.GET_ACCESS_URL function returns JSON that contains reference information for the input ObjectRef value, and also access URLs that you can use to read or modify the Cloud Storage object. + + Args: + objectref (bigframes.pandas.Series): + An ObjectRef value that represents a Cloud Storage object. + mode (str): + A STRING value that identifies the type of URL that you want to be returned. The following values are supported: + 'r': Returns a URL that lets you read the object. + 'rw': Returns two URLs, one that lets you read the object, and one that lets you modify the object. + duration (Union[datetime.timedelta, pandas.Timedelta, numpy.timedelta64], optional): + An optional INTERVAL value that specifies how long the generated access URLs remain valid. You can specify a value between 30 minutes and 6 hours. For example, you could specify INTERVAL 2 HOUR to generate URLs that expire after 2 hours. The default value is 6 hours. + + Returns: + bigframes.pandas.Series: A JSON value that contains the Cloud Storage object reference information from the input ObjectRef value, and also one or more URLs that you can use to access the Cloud Storage object. + """ + objectref = convert.to_bf_series(objectref, default_index=None) + + duration_micros = None + if duration is not None: + duration_micros = utils.timedelta_to_micros(duration) + + return objectref._apply_unary_op( + ops.ObjGetAccessUrl(mode=mode, duration=duration_micros) + ) + + +@log_adapter.method_logger(custom_base_name="bigquery_obj") +def make_ref( + uri_or_json: Union[series.Series, Sequence[str]], + authorizer: Union[series.Series, str, None] = None, +) -> series.Series: + """[Preview] Use the OBJ.MAKE_REF function to create an ObjectRef value that contains reference information for a Cloud Storage object. + + Args: + uri_or_json (bigframes.pandas.Series or str): + A series of STRING values that contains the URI for the Cloud Storage object, for example, gs://mybucket/flowers/12345.jpg. + OR + A series of JSON value that represents a Cloud Storage object. + authorizer (bigframes.pandas.Series or str, optional): + A STRING value that contains the Cloud Resource connection used to access the Cloud Storage object. + Required if ``uri_or_json`` is a URI string. + + Returns: + bigframes.pandas.Series: An ObjectRef value. + """ + uri_or_json = convert.to_bf_series(uri_or_json, default_index=None) + + if authorizer is not None: + # Avoid join problems encountered if we try to convert a literal into Series. + if not isinstance(authorizer, str): + authorizer = convert.to_bf_series(authorizer, default_index=None) + + return uri_or_json._apply_binary_op(authorizer, ops.obj_make_ref_op) + + # If authorizer is not provided, we assume uri_or_json is a JSON objectref + return uri_or_json._apply_unary_op(ops.obj_make_ref_json_op) diff --git a/bigframes/bigquery/ml.py b/bigframes/bigquery/ml.py index 93b0670ba5e..6ceadb324d5 100644 --- a/bigframes/bigquery/ml.py +++ b/bigframes/bigquery/ml.py @@ -25,6 +25,7 @@ explain_predict, global_explain, predict, + transform, ) __all__ = [ @@ -33,4 +34,5 @@ "predict", "explain_predict", "global_explain", + "transform", ] diff --git a/bigframes/bigquery/obj.py b/bigframes/bigquery/obj.py new file mode 100644 index 00000000000..dc2c29e1f3d --- /dev/null +++ b/bigframes/bigquery/obj.py @@ -0,0 +1,41 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module integrates BigQuery built-in 'ObjectRef' functions for use with Series/DataFrame objects, +such as OBJ.FETCH_METADATA: +https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/objectref_functions + + +.. warning:: + + This product or feature is subject to the "Pre-GA Offerings Terms" in the + General Service Terms section of the `Service Specific Terms + `_. Pre-GA products and + features are available "as is" and might have limited support. For more + information, see the `launch stage descriptions + `_. + +.. note:: + + To provide feedback or request support for this feature, send an email to + bq-objectref-feedback@google.com. +""" + +from bigframes.bigquery._operations.obj import fetch_metadata, get_access_url, make_ref + +__all__ = [ + "fetch_metadata", + "get_access_url", + "make_ref", +] diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py index 0f98f582c26..5bac1a06f1e 100644 --- a/bigframes/core/blocks.py +++ b/bigframes/core/blocks.py @@ -818,49 +818,30 @@ def _materialize_local( total_rows = result_batches.approx_total_rows # Remove downsampling config from subsequent invocations, as otherwise could result in many # iterations if downsampling undershoots - return self._downsample( - total_rows=total_rows, - sampling_method=sample_config.sampling_method, - fraction=fraction, - random_state=sample_config.random_state, - )._materialize_local( - MaterializationOptions(ordered=materialize_options.ordered) - ) - else: - df = result_batches.to_pandas() - df = self._copy_index_to_pandas(df) - df.set_axis(self.column_labels, axis=1, copy=False) - return df, execute_result.query_job - - def _downsample( - self, total_rows: int, sampling_method: str, fraction: float, random_state - ) -> Block: - # either selecting fraction or number of rows - if sampling_method == _HEAD: - filtered_block = self.slice(stop=int(total_rows * fraction)) - return filtered_block - elif (sampling_method == _UNIFORM) and (random_state is None): - filtered_expr = self.expr._uniform_sampling(fraction) - block = Block( - filtered_expr, - index_columns=self.index_columns, - column_labels=self.column_labels, - index_labels=self.index.names, - ) - return block - elif sampling_method == _UNIFORM: - block = self.split( - fracs=(fraction,), - random_state=random_state, - sort=False, - )[0] - return block + if sample_config.sampling_method == "head": + # Just truncates the result iterator without a follow-up query + raw_df = result_batches.to_pandas(limit=int(total_rows * fraction)) + elif ( + sample_config.sampling_method == "uniform" + and sample_config.random_state is None + ): + # Pushes sample into result without new query + sampled_batches = execute_result.batches(sample_rate=fraction) + raw_df = sampled_batches.to_pandas() + else: # uniform sample with random state requires a full follow-up query + down_sampled_block = self.split( + fracs=(fraction,), + random_state=sample_config.random_state, + sort=False, + )[0] + return down_sampled_block._materialize_local( + MaterializationOptions(ordered=materialize_options.ordered) + ) else: - # This part should never be called, just in case. - raise NotImplementedError( - f"The downsampling method {sampling_method} is not implemented, " - f"please choose from {','.join(_SAMPLING_METHODS)}." - ) + raw_df = result_batches.to_pandas() + df = self._copy_index_to_pandas(raw_df) + df.set_axis(self.column_labels, axis=1, copy=False) + return df, execute_result.query_job def split( self, diff --git a/bigframes/core/bq_data.py b/bigframes/core/bq_data.py index 9b2103b01d7..3b42ff7c031 100644 --- a/bigframes/core/bq_data.py +++ b/bigframes/core/bq_data.py @@ -186,11 +186,22 @@ def get_arrow_batches( columns: Sequence[str], storage_read_client: bigquery_storage_v1.BigQueryReadClient, project_id: str, + sample_rate: Optional[float] = None, ) -> ReadResult: table_mod_options = {} read_options_dict: dict[str, Any] = {"selected_fields": list(columns)} + + predicates = [] if data.sql_predicate: - read_options_dict["row_restriction"] = data.sql_predicate + predicates.append(data.sql_predicate) + if sample_rate is not None: + assert isinstance(sample_rate, float) + predicates.append(f"RAND() < {sample_rate}") + + if predicates: + full_predicates = " AND ".join(f"( {pred} )" for pred in predicates) + read_options_dict["row_restriction"] = full_predicates + read_options = bq_storage_types.ReadSession.TableReadOptions(**read_options_dict) if data.at_time: diff --git a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py index 91bbfbfbcf6..519b2c94426 100644 --- a/bigframes/core/compile/ibis_compiler/scalar_op_registry.py +++ b/bigframes/core/compile/ibis_compiler/scalar_op_registry.py @@ -16,6 +16,7 @@ import functools import typing +from typing import cast from bigframes_vendored import ibis import bigframes_vendored.ibis.expr.api as ibis_api @@ -1247,6 +1248,13 @@ def obj_fetch_metadata_op_impl(obj_ref: ibis_types.Value): @scalar_op_compiler.register_unary_op(ops.ObjGetAccessUrl, pass_op=True) def obj_get_access_url_op_impl(obj_ref: ibis_types.Value, op: ops.ObjGetAccessUrl): + if op.duration is not None: + duration_value = cast( + ibis_types.IntegerValue, ibis_types.literal(op.duration) + ).to_interval("us") + return obj_get_access_url_with_duration( + obj_ref=obj_ref, mode=op.mode, duration=duration_value + ) return obj_get_access_url(obj_ref=obj_ref, mode=op.mode) @@ -1807,6 +1815,11 @@ def obj_make_ref_op(x: ibis_types.Value, y: ibis_types.Value): return obj_make_ref(uri=x, authorizer=y) +@scalar_op_compiler.register_unary_op(ops.obj_make_ref_json_op) +def obj_make_ref_json_op(x: ibis_types.Value): + return obj_make_ref_json(objectref_json=x) + + # Ternary Operations @scalar_op_compiler.register_ternary_op(ops.where_op) def where_op( @@ -2141,11 +2154,21 @@ def obj_make_ref(uri: str, authorizer: str) -> _OBJ_REF_IBIS_DTYPE: # type: ign """Make ObjectRef Struct from uri and connection.""" +@ibis_udf.scalar.builtin(name="OBJ.MAKE_REF") +def obj_make_ref_json(objectref_json: ibis_dtypes.JSON) -> _OBJ_REF_IBIS_DTYPE: # type: ignore + """Make ObjectRef Struct from json.""" + + @ibis_udf.scalar.builtin(name="OBJ.GET_ACCESS_URL") def obj_get_access_url(obj_ref: _OBJ_REF_IBIS_DTYPE, mode: ibis_dtypes.String) -> ibis_dtypes.JSON: # type: ignore """Get access url (as ObjectRefRumtime JSON) from ObjectRef.""" +@ibis_udf.scalar.builtin(name="OBJ.GET_ACCESS_URL") +def obj_get_access_url_with_duration(obj_ref, mode, duration) -> ibis_dtypes.JSON: # type: ignore + """Get access url (as ObjectRefRumtime JSON) from ObjectRef.""" + + @ibis_udf.scalar.builtin(name="ltrim") def str_lstrip_op( # type: ignore[empty-body] x: ibis_dtypes.String, to_strip: ibis_dtypes.String diff --git a/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py b/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py index 89bb58d7dda..647e86d28ac 100644 --- a/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py +++ b/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py @@ -23,6 +23,7 @@ from bigframes.core import window_spec import bigframes.core.compile.sqlglot.aggregations.op_registration as reg from bigframes.core.compile.sqlglot.aggregations.windows import apply_window_if_present +from bigframes.core.compile.sqlglot.expressions import constants import bigframes.core.compile.sqlglot.expressions.typed_expr as typed_expr import bigframes.core.compile.sqlglot.sqlglot_ir as ir from bigframes.operations import aggregations as agg_ops @@ -44,9 +45,13 @@ def _( column: typed_expr.TypedExpr, window: typing.Optional[window_spec.WindowSpec] = None, ) -> sge.Expression: - # BQ will return null for empty column, result would be false in pandas. - result = apply_window_if_present(sge.func("LOGICAL_AND", column.expr), window) - return sge.func("IFNULL", result, sge.true()) + expr = column.expr + if column.dtype != dtypes.BOOL_DTYPE: + expr = sge.NEQ(this=expr, expression=sge.convert(0)) + expr = apply_window_if_present(sge.func("LOGICAL_AND", expr), window) + + # BQ will return null for empty column, result would be true in pandas. + return sge.func("COALESCE", expr, sge.convert(True)) @UNARY_OP_REGISTRATION.register(agg_ops.AnyOp) @@ -56,6 +61,8 @@ def _( window: typing.Optional[window_spec.WindowSpec] = None, ) -> sge.Expression: expr = column.expr + if column.dtype != dtypes.BOOL_DTYPE: + expr = sge.NEQ(this=expr, expression=sge.convert(0)) expr = apply_window_if_present(sge.func("LOGICAL_OR", expr), window) # BQ will return null for empty column, result would be false in pandas. @@ -326,6 +333,15 @@ def _( unit=sge.Identifier(this="MICROSECOND"), ) + if column.dtype == dtypes.DATE_DTYPE: + date_diff = sge.DateDiff( + this=column.expr, expression=shifted, unit=sge.Identifier(this="DAY") + ) + return sge.Cast( + this=sge.Floor(this=date_diff * constants._DAY_TO_MICROSECONDS), + to="INT64", + ) + raise TypeError(f"Cannot perform diff on type {column.dtype}") @@ -410,24 +426,28 @@ def _( column: typed_expr.TypedExpr, window: typing.Optional[window_spec.WindowSpec] = None, ) -> sge.Expression: + expr = column.expr + if column.dtype == dtypes.BOOL_DTYPE: + expr = sge.Cast(this=expr, to="INT64") + # Need to short-circuit as log with zeroes is illegal sql - is_zero = sge.EQ(this=column.expr, expression=sge.convert(0)) + is_zero = sge.EQ(this=expr, expression=sge.convert(0)) # There is no product sql aggregate function, so must implement as a sum of logs, and then # apply power after. Note, log and power base must be equal! This impl uses natural log. - logs = ( - sge.Case() - .when(is_zero, sge.convert(0)) - .else_(sge.func("LN", sge.func("ABS", column.expr))) + logs = sge.If( + this=is_zero, + true=sge.convert(0), + false=sge.func("LOG", sge.convert(2), sge.func("ABS", expr)), ) logs_sum = apply_window_if_present(sge.func("SUM", logs), window) - magnitude = sge.func("EXP", logs_sum) + magnitude = sge.func("POWER", sge.convert(2), logs_sum) # Can't determine sign from logs, so have to determine parity of count of negative inputs is_negative = ( sge.Case() .when( - sge.LT(this=sge.func("SIGN", column.expr), expression=sge.convert(0)), + sge.EQ(this=sge.func("SIGN", expr), expression=sge.convert(-1)), sge.convert(1), ) .else_(sge.convert(0)) @@ -445,11 +465,7 @@ def _( .else_( sge.Mul( this=magnitude, - expression=sge.If( - this=sge.EQ(this=negative_count_parity, expression=sge.convert(1)), - true=sge.convert(-1), - false=sge.convert(1), - ), + expression=sge.func("POWER", sge.convert(-1), negative_count_parity), ) ) ) @@ -499,14 +515,18 @@ def _( column: typed_expr.TypedExpr, window: typing.Optional[window_spec.WindowSpec] = None, ) -> sge.Expression: - # TODO: Support interpolation argument - # TODO: Support percentile_disc - result: sge.Expression = sge.func("PERCENTILE_CONT", column.expr, sge.convert(op.q)) + expr = column.expr + if column.dtype == dtypes.BOOL_DTYPE: + expr = sge.Cast(this=expr, to="INT64") + + result: sge.Expression = sge.func("PERCENTILE_CONT", expr, sge.convert(op.q)) if window is None: - # PERCENTILE_CONT is a navigation function, not an aggregate function, so it always needs an OVER clause. + # PERCENTILE_CONT is a navigation function, not an aggregate function, + # so it always needs an OVER clause. result = sge.Window(this=result) else: result = apply_window_if_present(result, window) + if op.should_floor_result: result = sge.Cast(this=sge.func("FLOOR", result), to="INT64") return result diff --git a/bigframes/core/compile/sqlglot/aggregations/windows.py b/bigframes/core/compile/sqlglot/aggregations/windows.py index 6d6c507455b..9c327885850 100644 --- a/bigframes/core/compile/sqlglot/aggregations/windows.py +++ b/bigframes/core/compile/sqlglot/aggregations/windows.py @@ -44,6 +44,7 @@ def apply_window_if_present( order_by = None elif window.is_range_bounded: order_by = get_window_order_by((window.ordering[0],)) + order_by = remove_null_ordering_for_range_windows(order_by) else: order_by = get_window_order_by(window.ordering) @@ -150,6 +151,30 @@ def get_window_order_by( return tuple(order_by) +def remove_null_ordering_for_range_windows( + order_by: typing.Optional[tuple[sge.Ordered, ...]], +) -> typing.Optional[tuple[sge.Ordered, ...]]: + """Removes NULL FIRST/LAST from ORDER BY expressions in RANGE windows. + Here's the support matrix: + ✅ sum(x) over (order by y desc nulls last) + 🚫 sum(x) over (order by y asc nulls last) + ✅ sum(x) over (order by y asc nulls first) + 🚫 sum(x) over (order by y desc nulls first) + """ + if order_by is None: + return None + + new_order_by = [] + for key in order_by: + kargs = key.args + if kargs.get("desc") is True and kargs.get("nulls_first", False): + kargs["nulls_first"] = False + elif kargs.get("desc") is False and not kargs.setdefault("nulls_first", True): + kargs["nulls_first"] = True + new_order_by.append(sge.Ordered(**kargs)) + return tuple(new_order_by) + + def _get_window_bounds( value, is_preceding: bool ) -> tuple[typing.Union[str, sge.Expression], typing.Optional[str]]: diff --git a/bigframes/core/compile/sqlglot/compiler.py b/bigframes/core/compile/sqlglot/compiler.py index b3b813a1c09..e77370892c0 100644 --- a/bigframes/core/compile/sqlglot/compiler.py +++ b/bigframes/core/compile/sqlglot/compiler.py @@ -356,6 +356,9 @@ def compile_window(node: nodes.WindowOpNode, child: ir.SQLGlotIR) -> ir.SQLGlotI observation_count = windows.apply_window_if_present( sge.func("SUM", is_observation), window_spec ) + observation_count = sge.func( + "COALESCE", observation_count, sge.convert(0) + ) else: # Operations like count treat even NULLs as valid observations # for the sake of min_periods notnull is just used to convert diff --git a/bigframes/core/compile/sqlglot/expressions/blob_ops.py b/bigframes/core/compile/sqlglot/expressions/blob_ops.py index 0c1491b92a4..3105cd8e303 100644 --- a/bigframes/core/compile/sqlglot/expressions/blob_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/blob_ops.py @@ -31,9 +31,22 @@ def _(expr: TypedExpr) -> sge.Expression: @register_unary_op(ops.ObjGetAccessUrl, pass_op=True) def _(expr: TypedExpr, op: ops.ObjGetAccessUrl) -> sge.Expression: - return sge.func("OBJ.GET_ACCESS_URL", expr.expr, sge.convert(op.mode)) + args = [expr.expr, sge.Literal.string(op.mode)] + if op.duration is not None: + args.append( + sge.Interval( + this=sge.Literal.number(op.duration), + unit=sge.Var(this="MICROSECOND"), + ) + ) + return sge.func("OBJ.GET_ACCESS_URL", *args) @register_binary_op(ops.obj_make_ref_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: return sge.func("OBJ.MAKE_REF", left.expr, right.expr) + + +@register_unary_op(ops.obj_make_ref_json_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("OBJ.MAKE_REF", expr.expr) diff --git a/bigframes/core/compile/sqlglot/expressions/bool_ops.py b/bigframes/core/compile/sqlglot/expressions/bool_ops.py index 26653d720c3..6fee3f4278e 100644 --- a/bigframes/core/compile/sqlglot/expressions/bool_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/bool_ops.py @@ -26,6 +26,16 @@ @register_binary_op(ops.and_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + # For AND, when we encounter a NULL value, we only know when the result is FALSE, + # otherwise the result is unknown (NULL). See: truth table at + # https://en.wikibooks.org/wiki/Structured_Query_Language/NULLs_and_the_Three_Valued_Logic#AND,_OR + if left.expr == sge.null(): + condition = sge.EQ(this=right.expr, expression=sge.convert(False)) + return sge.If(this=condition, true=right.expr, false=sge.null()) + if right.expr == sge.null(): + condition = sge.EQ(this=left.expr, expression=sge.convert(False)) + return sge.If(this=condition, true=left.expr, false=sge.null()) + if left.dtype == dtypes.BOOL_DTYPE and right.dtype == dtypes.BOOL_DTYPE: return sge.And(this=left.expr, expression=right.expr) return sge.BitwiseAnd(this=left.expr, expression=right.expr) @@ -33,6 +43,16 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: @register_binary_op(ops.or_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + # For OR, when we encounter a NULL value, we only know when the result is TRUE, + # otherwise the result is unknown (NULL). See: truth table at + # https://en.wikibooks.org/wiki/Structured_Query_Language/NULLs_and_the_Three_Valued_Logic#AND,_OR + if left.expr == sge.null(): + condition = sge.EQ(this=right.expr, expression=sge.convert(True)) + return sge.If(this=condition, true=right.expr, false=sge.null()) + if right.expr == sge.null(): + condition = sge.EQ(this=left.expr, expression=sge.convert(True)) + return sge.If(this=condition, true=left.expr, false=sge.null()) + if left.dtype == dtypes.BOOL_DTYPE and right.dtype == dtypes.BOOL_DTYPE: return sge.Or(this=left.expr, expression=right.expr) return sge.BitwiseOr(this=left.expr, expression=right.expr) @@ -40,8 +60,26 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: @register_binary_op(ops.xor_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: - if left.dtype == dtypes.BOOL_DTYPE and right.dtype == dtypes.BOOL_DTYPE: - left_expr = sge.And(this=left.expr, expression=sge.Not(this=right.expr)) - right_expr = sge.And(this=sge.Not(this=left.expr), expression=right.expr) - return sge.Or(this=left_expr, expression=right_expr) + # For XOR, cast NULL operands to BOOLEAN to ensure the resulting expression + # maintains the boolean data type. + left_expr = left.expr + left_dtype = left.dtype + if left_expr == sge.null(): + left_expr = sge.Cast(this=sge.convert(None), to="BOOLEAN") + left_dtype = dtypes.BOOL_DTYPE + right_expr = right.expr + right_dtype = right.dtype + if right_expr == sge.null(): + right_expr = sge.Cast(this=sge.convert(None), to="BOOLEAN") + right_dtype = dtypes.BOOL_DTYPE + + if left_dtype == dtypes.BOOL_DTYPE and right_dtype == dtypes.BOOL_DTYPE: + return sge.Or( + this=sge.paren( + sge.And(this=left_expr, expression=sge.Not(this=right_expr)) + ), + expression=sge.paren( + sge.And(this=sge.Not(this=left_expr), expression=right_expr) + ), + ) return sge.BitwiseXor(this=left.expr, expression=right.expr) diff --git a/bigframes/core/compile/sqlglot/expressions/comparison_ops.py b/bigframes/core/compile/sqlglot/expressions/comparison_ops.py index d64c7b1d3f4..8c201f6a068 100644 --- a/bigframes/core/compile/sqlglot/expressions/comparison_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/comparison_ops.py @@ -16,11 +16,13 @@ import typing +import bigframes_vendored.sqlglot as sg import bigframes_vendored.sqlglot.expressions as sge import pandas as pd from bigframes import dtypes from bigframes import operations as ops +from bigframes.core.compile.sqlglot import sqlglot_ir from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler @@ -31,17 +33,23 @@ @register_unary_op(ops.IsInOp, pass_op=True) def _(expr: TypedExpr, op: ops.IsInOp) -> sge.Expression: values = [] - is_numeric_expr = dtypes.is_numeric(expr.dtype) + is_numeric_expr = dtypes.is_numeric(expr.dtype, include_bool=False) for value in op.values: - if value is None: + if _is_null(value): continue dtype = dtypes.bigframes_type(type(value)) - if expr.dtype == dtype or is_numeric_expr and dtypes.is_numeric(dtype): + if ( + expr.dtype == dtype + or is_numeric_expr + and dtypes.is_numeric(dtype, include_bool=False) + ): values.append(sge.convert(value)) if op.match_nulls: contains_nulls = any(_is_null(value) for value in op.values) if contains_nulls: + if len(values) == 0: + return sge.Is(this=expr.expr, expression=sge.Null()) return sge.Is(this=expr.expr, expression=sge.Null()) | sge.In( this=expr.expr, expressions=values ) @@ -56,6 +64,10 @@ def _(expr: TypedExpr, op: ops.IsInOp) -> sge.Expression: @register_binary_op(ops.eq_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + if sqlglot_ir._is_null_literal(left.expr): + return sge.Is(this=right.expr, expression=sge.Null()) + if sqlglot_ir._is_null_literal(right.expr): + return sge.Is(this=left.expr, expression=sge.Null()) left_expr = _coerce_bool_to_int(left) right_expr = _coerce_bool_to_int(right) return sge.EQ(this=left_expr, expression=right_expr) @@ -83,6 +95,9 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: @register_binary_op(ops.ge_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + if left.expr == sge.null() or right.expr == sge.null(): + return sge.null() + left_expr = _coerce_bool_to_int(left) right_expr = _coerce_bool_to_int(right) return sge.GTE(this=left_expr, expression=right_expr) @@ -90,6 +105,9 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: @register_binary_op(ops.gt_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + if left.expr == sge.null() or right.expr == sge.null(): + return sge.null() + left_expr = _coerce_bool_to_int(left) right_expr = _coerce_bool_to_int(right) return sge.GT(this=left_expr, expression=right_expr) @@ -97,6 +115,9 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: @register_binary_op(ops.lt_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + if left.expr == sge.null() or right.expr == sge.null(): + return sge.null() + left_expr = _coerce_bool_to_int(left) right_expr = _coerce_bool_to_int(right) return sge.LT(this=left_expr, expression=right_expr) @@ -104,6 +125,9 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: @register_binary_op(ops.le_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + if left.expr == sge.null() or right.expr == sge.null(): + return sge.null() + left_expr = _coerce_bool_to_int(left) right_expr = _coerce_bool_to_int(right) return sge.LTE(this=left_expr, expression=right_expr) @@ -121,6 +145,17 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: @register_binary_op(ops.ne_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + if sqlglot_ir._is_null_literal(left.expr): + return sge.Is( + this=sge.paren(right.expr, copy=False), + expression=sg.not_(sge.Null(), copy=False), + ) + if sqlglot_ir._is_null_literal(right.expr): + return sge.Is( + this=sge.paren(left.expr, copy=False), + expression=sg.not_(sge.Null(), copy=False), + ) + left_expr = _coerce_bool_to_int(left) right_expr = _coerce_bool_to_int(right) return sge.NEQ(this=left_expr, expression=right_expr) diff --git a/bigframes/core/compile/sqlglot/expressions/constants.py b/bigframes/core/compile/sqlglot/expressions/constants.py index f383306292a..5ba4a72279f 100644 --- a/bigframes/core/compile/sqlglot/expressions/constants.py +++ b/bigframes/core/compile/sqlglot/expressions/constants.py @@ -20,6 +20,7 @@ _NAN = sge.Cast(this=sge.convert("NaN"), to="FLOAT64") _INF = sge.Cast(this=sge.convert("Infinity"), to="FLOAT64") _NEG_INF = sge.Cast(this=sge.convert("-Infinity"), to="FLOAT64") +_DAY_TO_MICROSECONDS = sge.convert(86400000000) # Approx Highest number you can pass in to EXP function and get a valid FLOAT64 result # FLOAT64 has 11 exponent bits, so max values is about 2**(2**10) diff --git a/bigframes/core/compile/sqlglot/expressions/datetime_ops.py b/bigframes/core/compile/sqlglot/expressions/datetime_ops.py index e20d2da5679..7f3e8135af1 100644 --- a/bigframes/core/compile/sqlglot/expressions/datetime_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/datetime_ops.py @@ -19,6 +19,7 @@ from bigframes import dtypes from bigframes import operations as ops from bigframes.core.compile.constants import UNIT_TO_US_CONVERSION_FACTORS +from bigframes.core.compile.sqlglot import sqlglot_types from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler @@ -26,28 +27,6 @@ register_binary_op = scalar_compiler.scalar_op_compiler.register_binary_op -def _calculate_resample_first(y: TypedExpr, origin: str) -> sge.Expression: - if origin == "epoch": - return sge.convert(0) - elif origin == "start_day": - return sge.func( - "UNIX_MICROS", - sge.Cast( - this=sge.Cast( - this=y.expr, to=sge.DataType(this=sge.DataType.Type.DATE) - ), - to=sge.DataType(this=sge.DataType.Type.TIMESTAMPTZ), - ), - ) - elif origin == "start": - return sge.func( - "UNIX_MICROS", - sge.Cast(this=y.expr, to=sge.DataType(this=sge.DataType.Type.TIMESTAMPTZ)), - ) - else: - raise ValueError(f"Origin {origin} not supported") - - @register_binary_op(ops.DatetimeToIntegerLabelOp, pass_op=True) def datetime_to_integer_label_op( x: TypedExpr, y: TypedExpr, op: ops.DatetimeToIntegerLabelOp @@ -317,6 +296,20 @@ def _(expr: TypedExpr, op: ops.FloorDtOp) -> sge.Expression: return sge.TimestampTrunc(this=expr.expr, unit=sge.Identifier(this=bq_freq)) +def _calculate_resample_first(y: TypedExpr, origin: str) -> sge.Expression: + if origin == "epoch": + return sge.convert(0) + elif origin == "start_day": + return sge.func( + "UNIX_MICROS", + sge.Cast(this=sge.Cast(this=y.expr, to="DATE"), to="TIMESTAMP"), + ) + elif origin == "start": + return sge.func("UNIX_MICROS", sge.Cast(this=y.expr, to="TIMESTAMP")) + else: + raise ValueError(f"Origin {origin} not supported") + + @register_unary_op(ops.hour_op) def _(expr: TypedExpr) -> sge.Expression: return sge.Extract(this=sge.Identifier(this="HOUR"), expression=expr.expr) @@ -436,3 +429,245 @@ def _(expr: TypedExpr, op: ops.UnixSeconds) -> sge.Expression: @register_unary_op(ops.year_op) def _(expr: TypedExpr) -> sge.Expression: return sge.Extract(this=sge.Identifier(this="YEAR"), expression=expr.expr) + + +@register_binary_op(ops.IntegerLabelToDatetimeOp, pass_op=True) +def integer_label_to_datetime_op( + x: TypedExpr, y: TypedExpr, op: ops.IntegerLabelToDatetimeOp +) -> sge.Expression: + # Determine if the frequency is fixed by checking if 'op.freq.nanos' is defined. + try: + return _integer_label_to_datetime_op_fixed_frequency(x, y, op) + + except ValueError: + # Non-fixed frequency conversions for units ranging from weeks to years. + rule_code = op.freq.rule_code + + if rule_code == "W-SUN": + return _integer_label_to_datetime_op_weekly_freq(x, y, op) + + if rule_code in ("ME", "M"): + return _integer_label_to_datetime_op_monthly_freq(x, y, op) + + if rule_code in ("QE-DEC", "Q-DEC"): + return _integer_label_to_datetime_op_quarterly_freq(x, y, op) + + if rule_code in ("YE-DEC", "A-DEC", "Y-DEC"): + return _integer_label_to_datetime_op_yearly_freq(x, y, op) + + # If the rule_code is not recognized, raise an error here. + raise ValueError(f"Unsupported frequency rule code: {rule_code}") + + +def _integer_label_to_datetime_op_fixed_frequency( + x: TypedExpr, y: TypedExpr, op: ops.IntegerLabelToDatetimeOp +) -> sge.Expression: + """ + This function handles fixed frequency conversions where the unit can range + from microseconds (us) to days. + """ + us = op.freq.nanos / 1000 + first = _calculate_resample_first(y, op.origin) # type: ignore + x_label = sge.Cast( + this=sge.func( + "TIMESTAMP_MICROS", + sge.Cast( + this=sge.Add( + this=sge.Mul( + this=sge.Cast(this=x.expr, to="BIGNUMERIC"), + expression=sge.convert(int(us)), + ), + expression=sge.Cast(this=first, to="BIGNUMERIC"), + ), + to="INT64", + ), + ), + to=sqlglot_types.from_bigframes_dtype(y.dtype), + ) + return x_label + + +def _integer_label_to_datetime_op_weekly_freq( + x: TypedExpr, y: TypedExpr, op: ops.IntegerLabelToDatetimeOp +) -> sge.Expression: + n = op.freq.n + # Calculate microseconds for the weekly interval. + us = n * 7 * 24 * 60 * 60 * 1000000 + first = sge.func( + "UNIX_MICROS", + sge.Add( + this=sge.TimestampTrunc( + this=sge.Cast(this=y.expr, to="TIMESTAMP"), + unit=sge.Var(this="WEEK(MONDAY)"), + ), + expression=sge.Interval( + this=sge.convert(6), unit=sge.Identifier(this="DAY") + ), + ), + ) + return sge.Cast( + this=sge.func( + "TIMESTAMP_MICROS", + sge.Cast( + this=sge.Add( + this=sge.Mul( + this=sge.Cast(this=x.expr, to="BIGNUMERIC"), + expression=sge.convert(us), + ), + expression=sge.Cast(this=first, to="BIGNUMERIC"), + ), + to="INT64", + ), + ), + to=sqlglot_types.from_bigframes_dtype(y.dtype), + ) + + +def _integer_label_to_datetime_op_monthly_freq( + x: TypedExpr, y: TypedExpr, op: ops.IntegerLabelToDatetimeOp +) -> sge.Expression: + n = op.freq.n + one = sge.convert(1) + twelve = sge.convert(12) + first = sge.Sub( # type: ignore + this=sge.Add( + this=sge.Mul( + this=sge.Extract(this="YEAR", expression=y.expr), + expression=twelve, + ), + expression=sge.Extract(this="MONTH", expression=y.expr), + ), + expression=one, + ) + x_val = sge.Add( + this=sge.Mul(this=x.expr, expression=sge.convert(n)), expression=first + ) + year = sge.Cast( + this=sge.Floor(this=sge.func("IEEE_DIVIDE", x_val, twelve)), + to="INT64", + ) + month = sge.Add(this=sge.Mod(this=x_val, expression=twelve), expression=one) + + next_year = sge.Case( + ifs=[ + sge.If( + this=sge.EQ(this=month, expression=twelve), + true=sge.Add(this=year, expression=one), + ) + ], + default=year, + ) + next_month = sge.Case( + ifs=[sge.If(this=sge.EQ(this=month, expression=twelve), true=one)], + default=sge.Add(this=month, expression=one), + ) + next_month_date = sge.func( + "TIMESTAMP", + sge.Anonymous( + this="DATETIME", + expressions=[ + next_year, + next_month, + one, + sge.convert(0), + sge.convert(0), + sge.convert(0), + ], + ), + ) + x_label = sge.Sub( # type: ignore + this=next_month_date, expression=sge.Interval(this=one, unit="DAY") + ) + return sge.Cast(this=x_label, to=sqlglot_types.from_bigframes_dtype(y.dtype)) + + +def _integer_label_to_datetime_op_quarterly_freq( + x: TypedExpr, y: TypedExpr, op: ops.IntegerLabelToDatetimeOp +) -> sge.Expression: + n = op.freq.n + one = sge.convert(1) + three = sge.convert(3) + four = sge.convert(4) + twelve = sge.convert(12) + first = sge.Sub( # type: ignore + this=sge.Add( + this=sge.Mul( + this=sge.Extract(this="YEAR", expression=y.expr), + expression=four, + ), + expression=sge.Extract(this="QUARTER", expression=y.expr), + ), + expression=one, + ) + x_val = sge.Add( + this=sge.Mul(this=x.expr, expression=sge.convert(n)), expression=first + ) + year = sge.Cast( + this=sge.Floor(this=sge.func("IEEE_DIVIDE", x_val, four)), + to="INT64", + ) + month = sge.Mul( # type: ignore + this=sge.Paren( + this=sge.Add(this=sge.Mod(this=x_val, expression=four), expression=one) + ), + expression=three, + ) + + next_year = sge.Case( + ifs=[ + sge.If( + this=sge.EQ(this=month, expression=twelve), + true=sge.Add(this=year, expression=one), + ) + ], + default=year, + ) + next_month = sge.Case( + ifs=[sge.If(this=sge.EQ(this=month, expression=twelve), true=one)], + default=sge.Add(this=month, expression=one), + ) + next_month_date = sge.Anonymous( + this="DATETIME", + expressions=[ + next_year, + next_month, + one, + sge.convert(0), + sge.convert(0), + sge.convert(0), + ], + ) + x_label = sge.Sub( # type: ignore + this=next_month_date, expression=sge.Interval(this=one, unit="DAY") + ) + return sge.Cast(this=x_label, to=sqlglot_types.from_bigframes_dtype(y.dtype)) + + +def _integer_label_to_datetime_op_yearly_freq( + x: TypedExpr, y: TypedExpr, op: ops.IntegerLabelToDatetimeOp +) -> sge.Expression: + n = op.freq.n + one = sge.convert(1) + first = sge.Extract(this="YEAR", expression=y.expr) + x_val = sge.Add( + this=sge.Mul(this=x.expr, expression=sge.convert(n)), expression=first + ) + next_year = sge.Add(this=x_val, expression=one) # type: ignore + next_month_date = sge.func( + "TIMESTAMP", + sge.Anonymous( + this="DATETIME", + expressions=[ + next_year, + one, + one, + sge.convert(0), + sge.convert(0), + sge.convert(0), + ], + ), + ) + x_label = sge.Sub( # type: ignore + this=next_month_date, expression=sge.Interval(this=one, unit="DAY") + ) + return sge.Cast(this=x_label, to=sqlglot_types.from_bigframes_dtype(y.dtype)) diff --git a/bigframes/core/compile/sqlglot/expressions/generic_ops.py b/bigframes/core/compile/sqlglot/expressions/generic_ops.py index 27973ef8b50..2f486fc9d51 100644 --- a/bigframes/core/compile/sqlglot/expressions/generic_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/generic_ops.py @@ -19,7 +19,7 @@ from bigframes import dtypes from bigframes import operations as ops -from bigframes.core.compile.sqlglot import sqlglot_types +from bigframes.core.compile.sqlglot import sqlglot_ir, sqlglot_types from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler @@ -101,11 +101,23 @@ def _(expr: TypedExpr) -> sge.Expression: def _(expr: TypedExpr, op: ops.MapOp) -> sge.Expression: if len(op.mappings) == 0: return expr.expr + + mappings = [ + ( + sqlglot_ir._literal(key, dtypes.is_compatible(key, expr.dtype)), + sqlglot_ir._literal(value, dtypes.is_compatible(value, expr.dtype)), + ) + for key, value in op.mappings + ] return sge.Case( - this=expr.expr, ifs=[ - sge.If(this=sge.convert(key), true=sge.convert(value)) - for key, value in op.mappings + sge.If( + this=sge.EQ(this=expr.expr, expression=key) + if not sqlglot_ir._is_null_literal(key) + else sge.Is(this=expr.expr, expression=sge.Null()), + true=value, + ) + for key, value in mappings ], default=expr.expr, ) @@ -140,6 +152,43 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: return sge.Coalesce(this=left.expr, expressions=[right.expr]) +def _get_remote_function_name(op): + routine_ref = op.function_def.routine_ref + # Quote project, dataset, and routine IDs to avoid keyword clashes. + return ( + f"`{routine_ref.project}`.`{routine_ref.dataset_id}`.`{routine_ref.routine_id}`" + ) + + +@register_unary_op(ops.RemoteFunctionOp, pass_op=True) +def _(expr: TypedExpr, op: ops.RemoteFunctionOp) -> sge.Expression: + func_name = _get_remote_function_name(op) + func = sge.func(func_name, expr.expr) + + if not op.apply_on_null: + return sge.If( + this=sge.Is(this=expr.expr, expression=sge.Null()), + true=expr.expr, + false=func, + ) + + return func + + +@register_binary_op(ops.BinaryRemoteFunctionOp, pass_op=True) +def _( + left: TypedExpr, right: TypedExpr, op: ops.BinaryRemoteFunctionOp +) -> sge.Expression: + func_name = _get_remote_function_name(op) + return sge.func(func_name, left.expr, right.expr) + + +@register_nary_op(ops.NaryRemoteFunctionOp, pass_op=True) +def _(*operands: TypedExpr, op: ops.NaryRemoteFunctionOp) -> sge.Expression: + func_name = _get_remote_function_name(op) + return sge.func(func_name, *(operand.expr for operand in operands)) + + @register_nary_op(ops.case_when_op) def _(*cases_and_outputs: TypedExpr) -> sge.Expression: # Need to upcast BOOL to INT if any output is numeric diff --git a/bigframes/core/compile/sqlglot/expressions/geo_ops.py b/bigframes/core/compile/sqlglot/expressions/geo_ops.py index a57b4bc9314..9c6ba33ea54 100644 --- a/bigframes/core/compile/sqlglot/expressions/geo_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/geo_ops.py @@ -108,12 +108,12 @@ def _(expr: TypedExpr, op: ops.GeoStSimplifyOp) -> sge.Expression: @register_unary_op(ops.geo_x_op) def _(expr: TypedExpr) -> sge.Expression: - return sge.func("SAFE.ST_X", expr.expr) + return sge.func("ST_X", expr.expr) @register_unary_op(ops.geo_y_op) def _(expr: TypedExpr) -> sge.Expression: - return sge.func("SAFE.ST_Y", expr.expr) + return sge.func("ST_Y", expr.expr) @register_binary_op(ops.GeoStDistanceOp, pass_op=True) diff --git a/bigframes/core/compile/sqlglot/expressions/numeric_ops.py b/bigframes/core/compile/sqlglot/expressions/numeric_ops.py index 16f7dec717e..28d3532b8b8 100644 --- a/bigframes/core/compile/sqlglot/expressions/numeric_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/numeric_ops.py @@ -93,12 +93,19 @@ def _(expr: TypedExpr) -> sge.Expression: def _(expr: TypedExpr) -> sge.Expression: return sge.Case( ifs=[ + # |x| < 1: The standard formula + sge.If( + this=sge.func("ABS", expr.expr) < sge.convert(1), + true=sge.func("ATANH", expr.expr), + ), + # |x| > 1: Returns NaN sge.If( this=sge.func("ABS", expr.expr) > sge.convert(1), true=constants._NAN, - ) + ), ], - default=sge.func("ATANH", expr.expr), + # |x| = 1: Returns Infinity or -Infinity + default=sge.Mul(this=constants._INF, expression=expr.expr), ) @@ -145,15 +152,11 @@ def _(expr: TypedExpr) -> sge.Expression: @register_unary_op(ops.expm1_op) def _(expr: TypedExpr) -> sge.Expression: - return sge.Case( - ifs=[ - sge.If( - this=expr.expr > constants._FLOAT64_EXP_BOUND, - true=constants._INF, - ) - ], - default=sge.func("EXP", expr.expr), - ) - sge.convert(1) + return sge.If( + this=expr.expr > constants._FLOAT64_EXP_BOUND, + true=constants._INF, + false=sge.func("EXP", expr.expr) - sge.convert(1), + ) @register_unary_op(ops.floor_op) @@ -166,11 +169,22 @@ def _(expr: TypedExpr) -> sge.Expression: return sge.Case( ifs=[ sge.If( - this=expr.expr <= sge.convert(0), + this=sge.Is(this=expr.expr, expression=sge.Null()), + true=sge.null(), + ), + # |x| > 0: The standard formula + sge.If( + this=expr.expr > sge.convert(0), + true=sge.Ln(this=expr.expr), + ), + # |x| < 0: Returns NaN + sge.If( + this=expr.expr < sge.convert(0), true=constants._NAN, - ) + ), ], - default=sge.Ln(this=expr.expr), + # |x| == 0: Returns -Infinity + default=constants._NEG_INF, ) @@ -179,11 +193,22 @@ def _(expr: TypedExpr) -> sge.Expression: return sge.Case( ifs=[ sge.If( - this=expr.expr <= sge.convert(0), + this=sge.Is(this=expr.expr, expression=sge.Null()), + true=sge.null(), + ), + # |x| > 0: The standard formula + sge.If( + this=expr.expr > sge.convert(0), + true=sge.Log(this=sge.convert(10), expression=expr.expr), + ), + # |x| < 0: Returns NaN + sge.If( + this=expr.expr < sge.convert(0), true=constants._NAN, - ) + ), ], - default=sge.Log(this=expr.expr, expression=sge.convert(10)), + # |x| == 0: Returns -Infinity + default=constants._NEG_INF, ) @@ -192,11 +217,22 @@ def _(expr: TypedExpr) -> sge.Expression: return sge.Case( ifs=[ sge.If( - this=expr.expr <= sge.convert(-1), + this=sge.Is(this=expr.expr, expression=sge.Null()), + true=sge.null(), + ), + # Domain: |x| > -1 (The standard formula) + sge.If( + this=expr.expr > sge.convert(-1), + true=sge.Ln(this=sge.convert(1) + expr.expr), + ), + # Out of Domain: |x| < -1 (Returns NaN) + sge.If( + this=expr.expr < sge.convert(-1), true=constants._NAN, - ) + ), ], - default=sge.Ln(this=sge.convert(1) + expr.expr), + # Boundary: |x| == -1 (Returns -Infinity) + default=constants._NEG_INF, ) @@ -388,6 +424,9 @@ def _(expr: TypedExpr) -> sge.Expression: @register_binary_op(ops.add_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + if left.expr == sge.null() or right.expr == sge.null(): + return sge.null() + if left.dtype == dtypes.STRING_DTYPE and right.dtype == dtypes.STRING_DTYPE: # String addition return sge.Concat(expressions=[left.expr, right.expr]) @@ -442,6 +481,9 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: @register_binary_op(ops.floordiv_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + if left.expr == sge.null() or right.expr == sge.null(): + return sge.null() + left_expr = _coerce_bool_to_int(left) right_expr = _coerce_bool_to_int(right) @@ -525,6 +567,9 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: @register_binary_op(ops.mul_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + if left.expr == sge.null() or right.expr == sge.null(): + return sge.null() + left_expr = _coerce_bool_to_int(left) right_expr = _coerce_bool_to_int(right) @@ -548,6 +593,9 @@ def _(expr: TypedExpr, n_digits: TypedExpr) -> sge.Expression: @register_binary_op(ops.sub_op) def _(left: TypedExpr, right: TypedExpr) -> sge.Expression: + if left.expr == sge.null() or right.expr == sge.null(): + return sge.null() + if dtypes.is_numeric(left.dtype) and dtypes.is_numeric(right.dtype): left_expr = _coerce_bool_to_int(left) right_expr = _coerce_bool_to_int(right) @@ -596,7 +644,7 @@ def isfinite(arg: TypedExpr) -> sge.Expression: return sge.Not( this=sge.Or( this=sge.IsInf(this=arg.expr), - right=sge.IsNan(this=arg.expr), + expression=sge.IsNan(this=arg.expr), ), ) diff --git a/bigframes/core/compile/sqlglot/sqlglot_ir.py b/bigframes/core/compile/sqlglot/sqlglot_ir.py index 04176014b01..d4dc4ecc064 100644 --- a/bigframes/core/compile/sqlglot/sqlglot_ir.py +++ b/bigframes/core/compile/sqlglot/sqlglot_ir.py @@ -558,16 +558,15 @@ def _explode_single_column( ) selection = sge.Star(replace=[unnested_column_alias.as_(column)]) - # TODO: "CROSS" if not keep_empty else "LEFT" - # TODO: overlaps_with_parent to replace existing column. new_expr = _select_to_cte( self.expr, sge.to_identifier( next(self.uid_gen.get_uid_stream("bfcte_")), quoted=self.quoted ), ) + # Use LEFT JOIN to preserve rows when unnesting empty arrays. new_expr = new_expr.select(selection, append=False).join( - unnest_expr, join_type="CROSS" + unnest_expr, join_type="LEFT" ) return SQLGlotIR(expr=new_expr, uid_gen=self.uid_gen) @@ -621,8 +620,9 @@ def _explode_multiple_columns( next(self.uid_gen.get_uid_stream("bfcte_")), quoted=self.quoted ), ) + # Use LEFT JOIN to preserve rows when unnesting empty arrays. new_expr = new_expr.select(selection, append=False).join( - unnest_expr, join_type="CROSS" + unnest_expr, join_type="LEFT" ) return SQLGlotIR(expr=new_expr, uid_gen=self.uid_gen) @@ -642,6 +642,15 @@ def _select_to_cte(expr: sge.Select, cte_name: sge.Identifier) -> sge.Select: return new_select_expr +def _is_null_literal(expr: sge.Expression) -> bool: + """Checks if the given expression is a NULL literal.""" + if isinstance(expr, sge.Null): + return True + if isinstance(expr, sge.Cast) and isinstance(expr.this, sge.Null): + return True + return False + + def _literal(value: typing.Any, dtype: dtypes.Dtype) -> sge.Expression: sqlglot_type = sgt.from_bigframes_dtype(dtype) if dtype else None if sqlglot_type is None: @@ -665,7 +674,7 @@ def _literal(value: typing.Any, dtype: dtypes.Dtype) -> sge.Expression: expressions=[_literal(value=v, dtype=value_type) for v in value] ) return values if len(value) > 0 else _cast(values, sqlglot_type) - elif pd.isna(value): + elif pd.isna(value) or (isinstance(value, pa.Scalar) and not value.is_valid): return _cast(sge.Null(), sqlglot_type) elif dtype == dtypes.JSON_DTYPE: return sge.ParseJSON(this=sge.convert(str(value))) diff --git a/bigframes/core/groupby/dataframe_group_by.py b/bigframes/core/groupby/dataframe_group_by.py index e3a132d4d0c..7f9e5d627ab 100644 --- a/bigframes/core/groupby/dataframe_group_by.py +++ b/bigframes/core/groupby/dataframe_group_by.py @@ -26,10 +26,10 @@ from bigframes import session from bigframes.core import agg_expressions from bigframes.core import expression as ex -from bigframes.core import log_adapter import bigframes.core.block_transforms as block_ops import bigframes.core.blocks as blocks from bigframes.core.groupby import aggs, group_by, series_group_by +from bigframes.core.logging import log_adapter import bigframes.core.ordering as order import bigframes.core.utils as utils import bigframes.core.validations as validations diff --git a/bigframes/core/groupby/series_group_by.py b/bigframes/core/groupby/series_group_by.py index b1485888a88..a8900cf5455 100644 --- a/bigframes/core/groupby/series_group_by.py +++ b/bigframes/core/groupby/series_group_by.py @@ -25,10 +25,10 @@ from bigframes import session from bigframes.core import expression as ex -from bigframes.core import log_adapter import bigframes.core.block_transforms as block_ops import bigframes.core.blocks as blocks from bigframes.core.groupby import aggs, group_by +from bigframes.core.logging import log_adapter import bigframes.core.ordering as order import bigframes.core.utils as utils import bigframes.core.validations as validations diff --git a/bigframes/core/local_data.py b/bigframes/core/local_data.py index ef7374a5a4f..0ef24089b2b 100644 --- a/bigframes/core/local_data.py +++ b/bigframes/core/local_data.py @@ -25,6 +25,7 @@ import uuid import geopandas # type: ignore +import numpy import numpy as np import pandas as pd import pyarrow as pa @@ -124,13 +125,21 @@ def to_arrow( geo_format: Literal["wkb", "wkt"] = "wkt", duration_type: Literal["int", "duration"] = "duration", json_type: Literal["string"] = "string", + sample_rate: Optional[float] = None, max_chunksize: Optional[int] = None, ) -> tuple[pa.Schema, Iterable[pa.RecordBatch]]: if geo_format != "wkt": raise NotImplementedError(f"geo format {geo_format} not yet implemented") assert json_type == "string" - batches = self.data.to_batches(max_chunksize=max_chunksize) + data = self.data + + # This exists for symmetry with remote sources, but sampling local data like this shouldn't really happen + if sample_rate is not None: + to_take = numpy.random.rand(data.num_rows) < sample_rate + data = data.filter(to_take) + + batches = data.to_batches(max_chunksize=max_chunksize) schema = self.data.schema if duration_type == "int": schema = _schema_durations_to_ints(schema) diff --git a/bigframes/core/logging/__init__.py b/bigframes/core/logging/__init__.py new file mode 100644 index 00000000000..5d06124efce --- /dev/null +++ b/bigframes/core/logging/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from bigframes.core.logging import data_types, log_adapter + +__all__ = ["log_adapter", "data_types"] diff --git a/bigframes/core/logging/data_types.py b/bigframes/core/logging/data_types.py new file mode 100644 index 00000000000..3cb65a5c501 --- /dev/null +++ b/bigframes/core/logging/data_types.py @@ -0,0 +1,165 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import functools + +from bigframes import dtypes +from bigframes.core import agg_expressions, bigframe_node, expression, nodes +from bigframes.core.rewrite import schema_binding + +IGNORED_NODES = ( + nodes.SelectionNode, + nodes.ReadLocalNode, + nodes.ReadTableNode, + nodes.ConcatNode, + nodes.RandomSampleNode, + nodes.FromRangeNode, + nodes.PromoteOffsetsNode, + nodes.ReversedNode, + nodes.SliceNode, + nodes.ResultNode, +) + + +def encode_type_refs(root: bigframe_node.BigFrameNode) -> str: + return f"{root.reduce_up(_encode_type_refs_from_node):x}" + + +def _encode_type_refs_from_node( + node: bigframe_node.BigFrameNode, child_results: tuple[int, ...] +) -> int: + child_result = functools.reduce(lambda x, y: x | y, child_results, 0) + + curr_result = 0 + if isinstance(node, nodes.FilterNode): + curr_result = _encode_type_refs_from_expr(node.predicate, node.child) + elif isinstance(node, nodes.ProjectionNode): + for assignment in node.assignments: + expr = assignment[0] + if isinstance(expr, (expression.DerefOp)): + # Ignore direct assignments in projection nodes. + continue + curr_result = curr_result | _encode_type_refs_from_expr( + assignment[0], node.child + ) + elif isinstance(node, nodes.OrderByNode): + for by in node.by: + curr_result = curr_result | _encode_type_refs_from_expr( + by.scalar_expression, node.child + ) + elif isinstance(node, nodes.JoinNode): + for left, right in node.conditions: + curr_result = ( + curr_result + | _encode_type_refs_from_expr(left, node.left_child) + | _encode_type_refs_from_expr(right, node.right_child) + ) + elif isinstance(node, nodes.InNode): + curr_result = _encode_type_refs_from_expr(node.left_col, node.left_child) + elif isinstance(node, nodes.AggregateNode): + for agg, _ in node.aggregations: + curr_result = curr_result | _encode_type_refs_from_expr(agg, node.child) + elif isinstance(node, nodes.WindowOpNode): + for grouping_key in node.window_spec.grouping_keys: + curr_result = curr_result | _encode_type_refs_from_expr( + grouping_key, node.child + ) + for ordering_expr in node.window_spec.ordering: + curr_result = curr_result | _encode_type_refs_from_expr( + ordering_expr.scalar_expression, node.child + ) + for col_def in node.agg_exprs: + curr_result = curr_result | _encode_type_refs_from_expr( + col_def.expression, node.child + ) + elif isinstance(node, nodes.ExplodeNode): + for col_id in node.column_ids: + curr_result = curr_result | _encode_type_refs_from_expr(col_id, node.child) + elif isinstance(node, IGNORED_NODES): + # Do nothing + pass + else: + # For unseen nodes, do not raise errors as this is the logging path, but + # we should cover those nodes either in the branches above, or place them + # in the IGNORED_NODES collection. + pass + + return child_result | curr_result + + +def _encode_type_refs_from_expr( + expr: expression.Expression, child_node: bigframe_node.BigFrameNode +) -> int: + # TODO(b/409387790): Remove this branch once SQLGlot compiler fully replaces Ibis compiler + if not expr.is_resolved: + if isinstance(expr, agg_expressions.Aggregation): + expr = schema_binding._bind_schema_to_aggregation_expr(expr, child_node) + else: + expr = expression.bind_schema_fields(expr, child_node.field_by_id) + + result = _get_dtype_mask(expr.output_type) + for child_expr in expr.children: + result = result | _encode_type_refs_from_expr(child_expr, child_node) + + return result + + +def _get_dtype_mask(dtype: dtypes.Dtype | None) -> int: + if dtype is None: + # If the dtype is not given, ignore + return 0 + if dtype == dtypes.INT_DTYPE: + return 1 << 1 + if dtype == dtypes.FLOAT_DTYPE: + return 1 << 2 + if dtype == dtypes.BOOL_DTYPE: + return 1 << 3 + if dtype == dtypes.STRING_DTYPE: + return 1 << 4 + if dtype == dtypes.BYTES_DTYPE: + return 1 << 5 + if dtype == dtypes.DATE_DTYPE: + return 1 << 6 + if dtype == dtypes.TIME_DTYPE: + return 1 << 7 + if dtype == dtypes.DATETIME_DTYPE: + return 1 << 8 + if dtype == dtypes.TIMESTAMP_DTYPE: + return 1 << 9 + if dtype == dtypes.TIMEDELTA_DTYPE: + return 1 << 10 + if dtype == dtypes.NUMERIC_DTYPE: + return 1 << 11 + if dtype == dtypes.BIGNUMERIC_DTYPE: + return 1 << 12 + if dtype == dtypes.GEO_DTYPE: + return 1 << 13 + if dtype == dtypes.JSON_DTYPE: + return 1 << 14 + + if dtypes.is_struct_like(dtype): + mask = 1 << 15 + if dtype == dtypes.OBJ_REF_DTYPE: + # obj_ref is a special struct type for multi-modal data. + # It should be double counted as both "struct" and its own type. + mask = mask | (1 << 17) + return mask + + if dtypes.is_array_like(dtype): + return 1 << 16 + + # If an unknown datat type is present, mark it with the least significant bit. + return 1 << 0 diff --git a/bigframes/core/log_adapter.py b/bigframes/core/logging/log_adapter.py similarity index 100% rename from bigframes/core/log_adapter.py rename to bigframes/core/logging/log_adapter.py diff --git a/bigframes/core/sql/ml.py b/bigframes/core/sql/ml.py index ec55fe04269..17493159250 100644 --- a/bigframes/core/sql/ml.py +++ b/bigframes/core/sql/ml.py @@ -213,3 +213,14 @@ def global_explain( sql += _build_struct_sql(struct_options) sql += ")\n" return sql + + +def transform( + model_name: str, + table: str, +) -> str: + """Encode the ML.TRANSFORM statement. + See https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-transform for reference. + """ + sql = f"SELECT * FROM ML.TRANSFORM(MODEL {googlesql.identifier(model_name)}, ({table}))\n" + return sql diff --git a/bigframes/core/window/rolling.py b/bigframes/core/window/rolling.py index d6c77bf0a72..b7bb62372cc 100644 --- a/bigframes/core/window/rolling.py +++ b/bigframes/core/window/rolling.py @@ -24,8 +24,9 @@ from bigframes import dtypes from bigframes.core import agg_expressions from bigframes.core import expression as ex -from bigframes.core import log_adapter, ordering, utils, window_spec +from bigframes.core import ordering, utils, window_spec import bigframes.core.blocks as blocks +from bigframes.core.logging import log_adapter from bigframes.core.window import ordering as window_ordering import bigframes.operations.aggregations as agg_ops diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 9efc6ba061f..e1ad4f3e75d 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -55,7 +55,7 @@ import bigframes.constants import bigframes.core -from bigframes.core import agg_expressions, log_adapter +from bigframes.core import agg_expressions import bigframes.core.block_transforms as block_ops import bigframes.core.blocks as blocks import bigframes.core.convert @@ -66,6 +66,7 @@ import bigframes.core.indexers as indexers import bigframes.core.indexes as indexes import bigframes.core.interchange +from bigframes.core.logging import log_adapter import bigframes.core.ordering as order import bigframes.core.utils as utils import bigframes.core.validations as validations diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py index 6a16a9f7620..be0d2b45d09 100644 --- a/bigframes/display/anywidget.py +++ b/bigframes/display/anywidget.py @@ -66,6 +66,7 @@ class TableWidget(_WIDGET_BASE): page = traitlets.Int(0).tag(sync=True) page_size = traitlets.Int(0).tag(sync=True) + max_columns = traitlets.Int(allow_none=True, default_value=None).tag(sync=True) row_count = traitlets.Int(allow_none=True, default_value=None).tag(sync=True) table_html = traitlets.Unicode("").tag(sync=True) sort_context = traitlets.List(traitlets.Dict(), default_value=[]).tag(sync=True) @@ -103,10 +104,13 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame): # respect display options for initial page size initial_page_size = bigframes.options.display.max_rows + initial_max_columns = bigframes.options.display.max_columns # set traitlets properties that trigger observers # TODO(b/462525985): Investigate and improve TableWidget UX for DataFrames with a large number of columns. self.page_size = initial_page_size + self.max_columns = initial_max_columns + # TODO(b/469861913): Nested columns from structs (e.g., 'struct_col.name') are not currently sortable. # TODO(b/463754889): Support non-string column labels for sorting. if all(isinstance(col, str) for col in dataframe.columns): @@ -218,6 +222,14 @@ def _validate_page_size(self, proposal: dict[str, Any]) -> int: max_page_size = 1000 return min(value, max_page_size) + @traitlets.validate("max_columns") + def _validate_max_columns(self, proposal: dict[str, Any]) -> int: + """Validate max columns to ensure it's positive or 0 (for all).""" + value = proposal["value"] + if value is None: + return 0 # Normalize None to 0 for traitlet + return max(0, value) + def _get_next_batch(self) -> bool: """ Gets the next batch of data from the generator and appends to cache. @@ -348,6 +360,7 @@ def _set_table_html(self) -> None: dataframe=page_data, table_id=f"table-{self._table_id}", orderable_columns=self.orderable_columns, + max_columns=self.max_columns, ) if new_page is not None: @@ -382,3 +395,10 @@ def _page_size_changed(self, _change: dict[str, Any]) -> None: # Update the table display self._set_table_html() + + @traitlets.observe("max_columns") + def _max_columns_changed(self, _change: dict[str, Any]) -> None: + """Handler for when max columns is changed from the frontend.""" + if not self._initial_load_complete: + return + self._set_table_html() diff --git a/bigframes/display/html.py b/bigframes/display/html.py index 912f1d7e3a2..6102d1512c6 100644 --- a/bigframes/display/html.py +++ b/bigframes/display/html.py @@ -46,21 +46,51 @@ def render_html( dataframe: pd.DataFrame, table_id: str, orderable_columns: list[str] | None = None, + max_columns: int | None = None, ) -> str: """Render a pandas DataFrame to HTML with specific styling.""" orderable_columns = orderable_columns or [] classes = "dataframe table table-striped table-hover" table_html_parts = [f''] - table_html_parts.append(_render_table_header(dataframe, orderable_columns)) - table_html_parts.append(_render_table_body(dataframe)) + + # Handle column truncation + columns = list(dataframe.columns) + if max_columns is not None and max_columns > 0 and len(columns) > max_columns: + half = max_columns // 2 + left_columns = columns[:half] + # Ensure we don't take more than available if half is 0 or calculation is weird, + # but typical case is safe. + right_count = max_columns - half + right_columns = columns[-right_count:] if right_count > 0 else [] + show_ellipsis = True + else: + left_columns = columns + right_columns = [] + show_ellipsis = False + + table_html_parts.append( + _render_table_header( + dataframe, orderable_columns, left_columns, right_columns, show_ellipsis + ) + ) + table_html_parts.append( + _render_table_body(dataframe, left_columns, right_columns, show_ellipsis) + ) table_html_parts.append("
") return "".join(table_html_parts) -def _render_table_header(dataframe: pd.DataFrame, orderable_columns: list[str]) -> str: +def _render_table_header( + dataframe: pd.DataFrame, + orderable_columns: list[str], + left_columns: list[Any], + right_columns: list[Any], + show_ellipsis: bool, +) -> str: """Render the header of the HTML table.""" header_parts = [" ", " "] - for col in dataframe.columns: + + def render_col_header(col): th_classes = [] if col in orderable_columns: th_classes.append("sortable") @@ -69,11 +99,28 @@ def _render_table_header(dataframe: pd.DataFrame, orderable_columns: list[str]) f'
' f"{html.escape(str(col))}
" ) + + for col in left_columns: + render_col_header(col) + + if show_ellipsis: + header_parts.append( + '
...
' + ) + + for col in right_columns: + render_col_header(col) + header_parts.extend([" ", " "]) return "\n".join(header_parts) -def _render_table_body(dataframe: pd.DataFrame) -> str: +def _render_table_body( + dataframe: pd.DataFrame, + left_columns: list[Any], + right_columns: list[Any], + show_ellipsis: bool, +) -> str: """Render the body of the HTML table.""" body_parts = [" "] precision = options.display.precision @@ -81,7 +128,9 @@ def _render_table_body(dataframe: pd.DataFrame) -> str: for i in range(len(dataframe)): body_parts.append(" ") row = dataframe.iloc[i] - for col_name, value in row.items(): + + def render_col_cell(col_name): + value = row[col_name] dtype = dataframe.dtypes.loc[col_name] # type: ignore align = "right" if _is_dtype_numeric(dtype) else "left" @@ -101,6 +150,17 @@ def _render_table_body(dataframe: pd.DataFrame) -> str: f' ' f"{html.escape(cell_content)}" ) + + for col in left_columns: + render_col_cell(col) + + if show_ellipsis: + # Ellipsis cell + body_parts.append(' ...') + + for col in right_columns: + render_col_cell(col) + body_parts.append(" ") body_parts.append(" ") return "\n".join(body_parts) diff --git a/bigframes/display/table_widget.css b/bigframes/display/table_widget.css index 34134b043d0..da0a701d694 100644 --- a/bigframes/display/table_widget.css +++ b/bigframes/display/table_widget.css @@ -14,24 +14,83 @@ * limitations under the License. */ -.bigframes-widget { +/* Increase specificity to override framework styles without !important */ +.bigframes-widget.bigframes-widget { + /* Default Light Mode Variables */ + --bf-bg: white; + --bf-border-color: #ccc; + --bf-error-bg: #fbe; + --bf-error-border: red; + --bf-error-fg: black; + --bf-fg: black; + --bf-header-bg: #f5f5f5; + --bf-null-fg: gray; + --bf-row-even-bg: #f5f5f5; + --bf-row-odd-bg: white; + + background-color: var(--bf-bg); + box-sizing: border-box; + color: var(--bf-fg); display: flex; flex-direction: column; + font-family: + '-apple-system', 'BlinkMacSystemFont', 'Segoe UI', 'Roboto', sans-serif; + margin: 0; + padding: 0; +} + +.bigframes-widget * { + box-sizing: border-box; +} + +/* Dark Mode Overrides: + * 1. @media (prefers-color-scheme: dark) - System-wide dark mode + * 2. .bigframes-dark-mode - Explicit class for VSCode theme detection + * 3. html[theme="dark"], body[data-theme="dark"] - Colab/Pantheon manual override + */ +@media (prefers-color-scheme: dark) { + .bigframes-widget.bigframes-widget { + --bf-bg: var(--vscode-editor-background, #202124); + --bf-border-color: #444; + --bf-error-bg: #511; + --bf-error-border: #f88; + --bf-error-fg: #fcc; + --bf-fg: white; + --bf-header-bg: var(--vscode-editor-background, black); + --bf-null-fg: #aaa; + --bf-row-even-bg: #202124; + --bf-row-odd-bg: #383838; + } +} + +.bigframes-widget.bigframes-dark-mode.bigframes-dark-mode, +html[theme='dark'] .bigframes-widget.bigframes-widget, +body[data-theme='dark'] .bigframes-widget.bigframes-widget { + --bf-bg: var(--vscode-editor-background, #202124); + --bf-border-color: #444; + --bf-error-bg: #511; + --bf-error-border: #f88; + --bf-error-fg: #fcc; + --bf-fg: white; + --bf-header-bg: var(--vscode-editor-background, black); + --bf-null-fg: #aaa; + --bf-row-even-bg: #202124; + --bf-row-odd-bg: #383838; } .bigframes-widget .table-container { + background-color: var(--bf-bg); + margin: 0; max-height: 620px; overflow: auto; + padding: 0; } .bigframes-widget .footer { align-items: center; - /* TODO(b/460861328): We will support dark mode in a media selector once we - * determine how to override the background colors as well. */ - color: black; + background-color: var(--bf-bg); + color: var(--bf-fg); display: flex; - font-family: - "-apple-system", "BlinkMacSystemFont", "Segoe UI", "Roboto", sans-serif; font-size: 0.8rem; justify-content: space-between; padding: 8px; @@ -58,28 +117,49 @@ margin: 0 8px; } -.bigframes-widget .page-size { +.bigframes-widget .settings { align-items: center; display: flex; flex-direction: row; - gap: 4px; + gap: 16px; justify-content: end; } -.bigframes-widget .page-size label { +.bigframes-widget .page-size, +.bigframes-widget .max-columns { + align-items: center; + display: flex; + flex-direction: row; + gap: 4px; +} + +.bigframes-widget .page-size label, +.bigframes-widget .max-columns label { margin-right: 8px; } -.bigframes-widget table { +.bigframes-widget table.bigframes-widget-table, +.bigframes-widget table.dataframe { + background-color: var(--bf-bg); + border: 1px solid var(--bf-border-color); border-collapse: collapse; - /* TODO(b/460861328): We will support dark mode in a media selector once we - * determine how to override the background colors as well. */ - color: black; + border-spacing: 0; + box-shadow: none; + color: var(--bf-fg); + margin: 0; + outline: none; text-align: left; + width: auto; /* Fix stretching */ +} + +.bigframes-widget tr { + border: none; } .bigframes-widget th { - background-color: var(--colab-primary-surface-color, var(--jp-layout-color0)); + background-color: var(--bf-header-bg); + border: 1px solid var(--bf-border-color); + color: var(--bf-fg); padding: 0; position: sticky; text-align: left; @@ -87,6 +167,22 @@ z-index: 1; } +.bigframes-widget td { + border: 1px solid var(--bf-border-color); + color: var(--bf-fg); + padding: 0.5em; +} + +.bigframes-widget table tbody tr:nth-child(odd), +.bigframes-widget table tbody tr:nth-child(odd) td { + background-color: var(--bf-row-odd-bg); +} + +.bigframes-widget table tbody tr:nth-child(even), +.bigframes-widget table tbody tr:nth-child(even) td { + background-color: var(--bf-row-even-bg); +} + .bigframes-widget .bf-header-content { box-sizing: border-box; height: 100%; @@ -106,8 +202,13 @@ } .bigframes-widget button { + background-color: transparent; + border: 1px solid currentColor; + border-radius: 4px; + color: inherit; cursor: pointer; display: inline-block; + padding: 2px 8px; text-align: center; text-decoration: none; user-select: none; @@ -120,11 +221,10 @@ } .bigframes-widget .bigframes-error-message { - background-color: #fbe; - border: 1px solid red; + background-color: var(--bf-error-bg); + border: 1px solid var(--bf-error-border); border-radius: 4px; - font-family: - "-apple-system", "BlinkMacSystemFont", "Segoe UI", "Roboto", sans-serif; + color: var(--bf-error-fg); font-size: 14px; margin-bottom: 8px; padding: 8px; @@ -139,14 +239,9 @@ } .bigframes-widget .null-value { - color: gray; -} - -.bigframes-widget td { - padding: 0.5em; + color: var(--bf-null-fg); } -.bigframes-widget tr:hover td, -.bigframes-widget td.row-hover { - background-color: var(--colab-hover-surface-color, var(--jp-layout-color2)); +.bigframes-widget .debug-info { + border-top: 1px solid var(--bf-border-color); } diff --git a/bigframes/display/table_widget.js b/bigframes/display/table_widget.js index 3944f48da7e..314bf771d0e 100644 --- a/bigframes/display/table_widget.js +++ b/bigframes/display/table_widget.js @@ -15,19 +15,20 @@ */ const ModelProperty = { - ERROR_MESSAGE: "error_message", - ORDERABLE_COLUMNS: "orderable_columns", - PAGE: "page", - PAGE_SIZE: "page_size", - ROW_COUNT: "row_count", - SORT_CONTEXT: "sort_context", - TABLE_HTML: "table_html", + ERROR_MESSAGE: 'error_message', + ORDERABLE_COLUMNS: 'orderable_columns', + PAGE: 'page', + PAGE_SIZE: 'page_size', + ROW_COUNT: 'row_count', + SORT_CONTEXT: 'sort_context', + TABLE_HTML: 'table_html', + MAX_COLUMNS: 'max_columns', }; const Event = { - CHANGE: "change", - CHANGE_TABLE_HTML: "change:table_html", - CLICK: "click", + CHANGE: 'change', + CHANGE_TABLE_HTML: 'change:table_html', + CLICK: 'click', }; /** @@ -35,297 +36,315 @@ const Event = { * @param {{ model: any, el: !HTMLElement }} props - The widget properties. */ function render({ model, el }) { - // Main container with a unique class for CSS scoping - el.classList.add("bigframes-widget"); - - // Add error message container at the top - const errorContainer = document.createElement("div"); - errorContainer.classList.add("error-message"); - - const tableContainer = document.createElement("div"); - tableContainer.classList.add("table-container"); - const footer = document.createElement("footer"); - footer.classList.add("footer"); - - // Pagination controls - const paginationContainer = document.createElement("div"); - paginationContainer.classList.add("pagination"); - const prevPage = document.createElement("button"); - const pageIndicator = document.createElement("span"); - pageIndicator.classList.add("page-indicator"); - const nextPage = document.createElement("button"); - const rowCountLabel = document.createElement("span"); - rowCountLabel.classList.add("row-count"); - - // Page size controls - const pageSizeContainer = document.createElement("div"); - pageSizeContainer.classList.add("page-size"); - const pageSizeLabel = document.createElement("label"); - const pageSizeInput = document.createElement("select"); - - prevPage.textContent = "<"; - nextPage.textContent = ">"; - pageSizeLabel.textContent = "Page size:"; - - // Page size options - const pageSizes = [10, 25, 50, 100]; - for (const size of pageSizes) { - const option = document.createElement("option"); - option.value = size; - option.textContent = size; - if (size === model.get(ModelProperty.PAGE_SIZE)) { - option.selected = true; - } - pageSizeInput.appendChild(option); - } - - /** Updates the footer states and page label based on the model. */ - function updateButtonStates() { - const currentPage = model.get(ModelProperty.PAGE); - const pageSize = model.get(ModelProperty.PAGE_SIZE); - const rowCount = model.get(ModelProperty.ROW_COUNT); - - if (rowCount === null) { - // Unknown total rows - rowCountLabel.textContent = "Total rows unknown"; - pageIndicator.textContent = `Page ${( - currentPage + 1 - ).toLocaleString()} of many`; - prevPage.disabled = currentPage === 0; - nextPage.disabled = false; // Allow navigation until we hit the end - } else if (rowCount === 0) { - // Empty dataset - rowCountLabel.textContent = "0 total rows"; - pageIndicator.textContent = "Page 1 of 1"; - prevPage.disabled = true; - nextPage.disabled = true; - } else { - // Known total rows - const totalPages = Math.ceil(rowCount / pageSize); - rowCountLabel.textContent = `${rowCount.toLocaleString()} total rows`; - pageIndicator.textContent = `Page ${( - currentPage + 1 - ).toLocaleString()} of ${totalPages.toLocaleString()}`; - prevPage.disabled = currentPage === 0; - nextPage.disabled = currentPage >= totalPages - 1; - } - pageSizeInput.value = pageSize; - } - - /** - * Handles page navigation. - * @param {number} direction - The direction to navigate (-1 for previous, 1 for next). - */ - function handlePageChange(direction) { - const currentPage = model.get(ModelProperty.PAGE); - model.set(ModelProperty.PAGE, currentPage + direction); - model.save_changes(); - } - - /** - * Handles page size changes. - * @param {number} newSize - The new page size. - */ - function handlePageSizeChange(newSize) { - model.set(ModelProperty.PAGE_SIZE, newSize); - model.set(ModelProperty.PAGE, 0); // Reset to first page - model.save_changes(); - } - - /** Updates the HTML in the table container and refreshes button states. */ - function handleTableHTMLChange() { - // Note: Using innerHTML is safe here because the content is generated - // by a trusted backend (DataFrame.to_html). - tableContainer.innerHTML = model.get(ModelProperty.TABLE_HTML); - - // Get sortable columns from backend - const sortableColumns = model.get(ModelProperty.ORDERABLE_COLUMNS); - const currentSortContext = model.get(ModelProperty.SORT_CONTEXT) || []; - - const getSortIndex = (colName) => - currentSortContext.findIndex((item) => item.column === colName); - - // Add click handlers to column headers for sorting - const headers = tableContainer.querySelectorAll("th"); - headers.forEach((header) => { - const headerDiv = header.querySelector("div"); - const columnName = headerDiv.textContent.trim(); - - // Only add sorting UI for sortable columns - if (columnName && sortableColumns.includes(columnName)) { - header.style.cursor = "pointer"; - - // Create a span for the indicator - const indicatorSpan = document.createElement("span"); - indicatorSpan.classList.add("sort-indicator"); - indicatorSpan.style.paddingLeft = "5px"; - - // Determine sort indicator and initial visibility - let indicator = "●"; // Default: unsorted (dot) - const sortIndex = getSortIndex(columnName); - - if (sortIndex !== -1) { - const isAscending = currentSortContext[sortIndex].ascending; - indicator = isAscending ? "▲" : "▼"; - indicatorSpan.style.visibility = "visible"; // Sorted arrows always visible - } else { - indicatorSpan.style.visibility = "hidden"; // Unsorted dot hidden by default - } - indicatorSpan.textContent = indicator; - - // Add indicator to the header, replacing the old one if it exists - const existingIndicator = headerDiv.querySelector(".sort-indicator"); - if (existingIndicator) { - headerDiv.removeChild(existingIndicator); - } - headerDiv.appendChild(indicatorSpan); - - // Add hover effects for unsorted columns only - header.addEventListener("mouseover", () => { - if (getSortIndex(columnName) === -1) { - indicatorSpan.style.visibility = "visible"; - } - }); - header.addEventListener("mouseout", () => { - if (getSortIndex(columnName) === -1) { - indicatorSpan.style.visibility = "hidden"; - } - }); - - // Add click handler for three-state toggle - header.addEventListener(Event.CLICK, (event) => { - const sortIndex = getSortIndex(columnName); - let newContext = [...currentSortContext]; - - if (event.shiftKey) { - if (sortIndex !== -1) { - // Already sorted. Toggle or Remove. - if (newContext[sortIndex].ascending) { - // Asc -> Desc - // Clone object to avoid mutation issues - newContext[sortIndex] = { - ...newContext[sortIndex], - ascending: false, - }; - } else { - // Desc -> Remove - newContext.splice(sortIndex, 1); - } - } else { - // Not sorted -> Append Asc - newContext.push({ column: columnName, ascending: true }); - } - } else { - // No shift key. Single column mode. - if (sortIndex !== -1 && newContext.length === 1) { - // Already only this column. Toggle or Remove. - if (newContext[sortIndex].ascending) { - newContext[sortIndex] = { - ...newContext[sortIndex], - ascending: false, - }; - } else { - newContext = []; - } - } else { - // Start fresh with this column - newContext = [{ column: columnName, ascending: true }]; - } - } - - model.set(ModelProperty.SORT_CONTEXT, newContext); - model.save_changes(); - }); - } - }); - - const table = tableContainer.querySelector("table"); - if (table) { - const tableBody = table.querySelector("tbody"); - - /** - * Handles row hover events. - * @param {!Event} event - The mouse event. - * @param {boolean} isHovering - True to add hover class, false to remove. - */ - function handleRowHover(event, isHovering) { - const cell = event.target.closest("td"); - if (cell) { - const row = cell.closest("tr"); - const origRowId = row.dataset.origRow; - if (origRowId) { - const allCellsInGroup = tableBody.querySelectorAll( - `tr[data-orig-row="${origRowId}"] td`, - ); - allCellsInGroup.forEach((c) => { - c.classList.toggle("row-hover", isHovering); - }); - } - } - } - - if (tableBody) { - tableBody.addEventListener("mouseover", (event) => - handleRowHover(event, true), - ); - tableBody.addEventListener("mouseout", (event) => - handleRowHover(event, false), - ); - } - } - - updateButtonStates(); - } - - // Add error message handler - function handleErrorMessageChange() { - const errorMsg = model.get(ModelProperty.ERROR_MESSAGE); - if (errorMsg) { - errorContainer.textContent = errorMsg; - errorContainer.style.display = "block"; - } else { - errorContainer.style.display = "none"; - } - } - - // Add event listeners - prevPage.addEventListener(Event.CLICK, () => handlePageChange(-1)); - nextPage.addEventListener(Event.CLICK, () => handlePageChange(1)); - pageSizeInput.addEventListener(Event.CHANGE, (e) => { - const newSize = Number(e.target.value); - if (newSize) { - handlePageSizeChange(newSize); - } - }); - model.on(Event.CHANGE_TABLE_HTML, handleTableHTMLChange); - model.on(`change:${ModelProperty.ROW_COUNT}`, updateButtonStates); - model.on(`change:${ModelProperty.ERROR_MESSAGE}`, handleErrorMessageChange); - model.on(`change:_initial_load_complete`, (val) => { - if (val) { - updateButtonStates(); - } - }); - model.on(`change:${ModelProperty.PAGE}`, updateButtonStates); - - // Assemble the DOM - paginationContainer.appendChild(prevPage); - paginationContainer.appendChild(pageIndicator); - paginationContainer.appendChild(nextPage); - - pageSizeContainer.appendChild(pageSizeLabel); - pageSizeContainer.appendChild(pageSizeInput); - - footer.appendChild(rowCountLabel); - footer.appendChild(paginationContainer); - footer.appendChild(pageSizeContainer); - - el.appendChild(errorContainer); - el.appendChild(tableContainer); - el.appendChild(footer); - - // Initial render - handleTableHTMLChange(); - handleErrorMessageChange(); + el.classList.add('bigframes-widget'); + + const errorContainer = document.createElement('div'); + errorContainer.classList.add('error-message'); + + const tableContainer = document.createElement('div'); + tableContainer.classList.add('table-container'); + const footer = document.createElement('footer'); + footer.classList.add('footer'); + + /** Detects theme and applies necessary style overrides. */ + function updateTheme() { + const body = document.body; + const isDark = + body.classList.contains('vscode-dark') || + body.classList.contains('theme-dark') || + body.dataset.theme === 'dark' || + body.getAttribute('data-vscode-theme-kind') === 'vscode-dark'; + + if (isDark) { + el.classList.add('bigframes-dark-mode'); + } else { + el.classList.remove('bigframes-dark-mode'); + } + } + + updateTheme(); + // Re-check after mount to ensure parent styling is applied. + setTimeout(updateTheme, 300); + + const observer = new MutationObserver(updateTheme); + observer.observe(document.body, { + attributes: true, + attributeFilter: ['class', 'data-theme', 'data-vscode-theme-kind'], + }); + + // Settings controls container + const settingsContainer = document.createElement('div'); + settingsContainer.classList.add('settings'); + + // Pagination controls + const paginationContainer = document.createElement('div'); + paginationContainer.classList.add('pagination'); + const prevPage = document.createElement('button'); + const pageIndicator = document.createElement('span'); + pageIndicator.classList.add('page-indicator'); + const nextPage = document.createElement('button'); + const rowCountLabel = document.createElement('span'); + rowCountLabel.classList.add('row-count'); + + // Page size controls + const pageSizeContainer = document.createElement('div'); + pageSizeContainer.classList.add('page-size'); + const pageSizeLabel = document.createElement('label'); + const pageSizeInput = document.createElement('select'); + + prevPage.textContent = '<'; + nextPage.textContent = '>'; + pageSizeLabel.textContent = 'Page size:'; + + const pageSizes = [10, 25, 50, 100]; + for (const size of pageSizes) { + const option = document.createElement('option'); + option.value = size; + option.textContent = size; + if (size === model.get(ModelProperty.PAGE_SIZE)) { + option.selected = true; + } + pageSizeInput.appendChild(option); + } + + // Max columns controls + const maxColumnsContainer = document.createElement('div'); + maxColumnsContainer.classList.add('max-columns'); + const maxColumnsLabel = document.createElement('label'); + const maxColumnsInput = document.createElement('select'); + + maxColumnsLabel.textContent = 'Max columns:'; + + // 0 represents "All" (all columns) + const maxColumnOptions = [5, 10, 15, 20, 0]; + for (const cols of maxColumnOptions) { + const option = document.createElement('option'); + option.value = cols; + option.textContent = cols === 0 ? 'All' : cols; + + const currentMax = model.get(ModelProperty.MAX_COLUMNS); + // Handle None/null from python as 0/All + const currentMaxVal = + currentMax === null || currentMax === undefined ? 0 : currentMax; + + if (cols === currentMaxVal) { + option.selected = true; + } + maxColumnsInput.appendChild(option); + } + + function updateButtonStates() { + const currentPage = model.get(ModelProperty.PAGE); + const pageSize = model.get(ModelProperty.PAGE_SIZE); + const rowCount = model.get(ModelProperty.ROW_COUNT); + + if (rowCount === null) { + rowCountLabel.textContent = 'Total rows unknown'; + pageIndicator.textContent = `Page ${(currentPage + 1).toLocaleString()} of many`; + prevPage.disabled = currentPage === 0; + nextPage.disabled = false; + } else if (rowCount === 0) { + rowCountLabel.textContent = '0 total rows'; + pageIndicator.textContent = 'Page 1 of 1'; + prevPage.disabled = true; + nextPage.disabled = true; + } else { + const totalPages = Math.ceil(rowCount / pageSize); + rowCountLabel.textContent = `${rowCount.toLocaleString()} total rows`; + pageIndicator.textContent = `Page ${(currentPage + 1).toLocaleString()} of ${totalPages.toLocaleString()}`; + prevPage.disabled = currentPage === 0; + nextPage.disabled = currentPage >= totalPages - 1; + } + pageSizeInput.value = pageSize; + } + + function handlePageChange(direction) { + const currentPage = model.get(ModelProperty.PAGE); + model.set(ModelProperty.PAGE, currentPage + direction); + model.save_changes(); + } + + function handlePageSizeChange(newSize) { + model.set(ModelProperty.PAGE_SIZE, newSize); + model.set(ModelProperty.PAGE, 0); + model.save_changes(); + } + + let isHeightInitialized = false; + + function handleTableHTMLChange() { + tableContainer.innerHTML = model.get(ModelProperty.TABLE_HTML); + + // After the first render, dynamically set the container height to fit the + // initial page (usually 10 rows) and then lock it. + setTimeout(() => { + if (!isHeightInitialized) { + const table = tableContainer.querySelector('table'); + if (table) { + const tableHeight = table.offsetHeight; + // Add a small buffer(e.g. 2px) for borders to avoid scrollbars. + if (tableHeight > 0) { + tableContainer.style.height = `${tableHeight + 2}px`; + isHeightInitialized = true; + } + } + } + }, 0); + + const sortableColumns = model.get(ModelProperty.ORDERABLE_COLUMNS); + const currentSortContext = model.get(ModelProperty.SORT_CONTEXT) || []; + + const getSortIndex = (colName) => + currentSortContext.findIndex((item) => item.column === colName); + + const headers = tableContainer.querySelectorAll('th'); + headers.forEach((header) => { + const headerDiv = header.querySelector('div'); + const columnName = headerDiv.textContent.trim(); + + if (columnName && sortableColumns.includes(columnName)) { + header.style.cursor = 'pointer'; + + const indicatorSpan = document.createElement('span'); + indicatorSpan.classList.add('sort-indicator'); + indicatorSpan.style.paddingLeft = '5px'; + + // Determine sort indicator and initial visibility + let indicator = '●'; // Default: unsorted (dot) + const sortIndex = getSortIndex(columnName); + + if (sortIndex !== -1) { + const isAscending = currentSortContext[sortIndex].ascending; + indicator = isAscending ? '▲' : '▼'; + indicatorSpan.style.visibility = 'visible'; // Sorted arrows always visible + } else { + indicatorSpan.style.visibility = 'hidden'; + } + indicatorSpan.textContent = indicator; + + const existingIndicator = headerDiv.querySelector('.sort-indicator'); + if (existingIndicator) { + headerDiv.removeChild(existingIndicator); + } + headerDiv.appendChild(indicatorSpan); + + header.addEventListener('mouseover', () => { + if (getSortIndex(columnName) === -1) { + indicatorSpan.style.visibility = 'visible'; + } + }); + header.addEventListener('mouseout', () => { + if (getSortIndex(columnName) === -1) { + indicatorSpan.style.visibility = 'hidden'; + } + }); + + // Add click handler for three-state toggle + header.addEventListener(Event.CLICK, (event) => { + const sortIndex = getSortIndex(columnName); + let newContext = [...currentSortContext]; + + if (event.shiftKey) { + if (sortIndex !== -1) { + // Already sorted. Toggle or Remove. + if (newContext[sortIndex].ascending) { + // Asc -> Desc + // Clone object to avoid mutation issues + newContext[sortIndex] = { + ...newContext[sortIndex], + ascending: false, + }; + } else { + // Desc -> Remove + newContext.splice(sortIndex, 1); + } + } else { + // Not sorted -> Append Asc + newContext.push({ column: columnName, ascending: true }); + } + } else { + // No shift key. Single column mode. + if (sortIndex !== -1 && newContext.length === 1) { + // Already only this column. Toggle or Remove. + if (newContext[sortIndex].ascending) { + newContext[sortIndex] = { + ...newContext[sortIndex], + ascending: false, + }; + } else { + newContext = []; + } + } else { + // Start fresh with this column + newContext = [{ column: columnName, ascending: true }]; + } + } + + model.set(ModelProperty.SORT_CONTEXT, newContext); + model.save_changes(); + }); + } + }); + + updateButtonStates(); + } + + function handleErrorMessageChange() { + const errorMsg = model.get(ModelProperty.ERROR_MESSAGE); + if (errorMsg) { + errorContainer.textContent = errorMsg; + errorContainer.style.display = 'block'; + } else { + errorContainer.style.display = 'none'; + } + } + + prevPage.addEventListener(Event.CLICK, () => handlePageChange(-1)); + nextPage.addEventListener(Event.CLICK, () => handlePageChange(1)); + pageSizeInput.addEventListener(Event.CHANGE, (e) => { + const newSize = Number(e.target.value); + if (newSize) { + handlePageSizeChange(newSize); + } + }); + + maxColumnsInput.addEventListener(Event.CHANGE, (e) => { + const newVal = Number(e.target.value); + model.set(ModelProperty.MAX_COLUMNS, newVal); + model.save_changes(); + }); + + model.on(Event.CHANGE_TABLE_HTML, handleTableHTMLChange); + model.on(`change:${ModelProperty.ROW_COUNT}`, updateButtonStates); + model.on(`change:${ModelProperty.ERROR_MESSAGE}`, handleErrorMessageChange); + model.on(`change:_initial_load_complete`, (val) => { + if (val) updateButtonStates(); + }); + model.on(`change:${ModelProperty.PAGE}`, updateButtonStates); + + paginationContainer.appendChild(prevPage); + paginationContainer.appendChild(pageIndicator); + paginationContainer.appendChild(nextPage); + + pageSizeContainer.appendChild(pageSizeLabel); + pageSizeContainer.appendChild(pageSizeInput); + + maxColumnsContainer.appendChild(maxColumnsLabel); + maxColumnsContainer.appendChild(maxColumnsInput); + + settingsContainer.appendChild(maxColumnsContainer); + settingsContainer.appendChild(pageSizeContainer); + + footer.appendChild(rowCountLabel); + footer.appendChild(paginationContainer); + footer.appendChild(settingsContainer); + + el.appendChild(errorContainer); + el.appendChild(tableContainer); + el.appendChild(footer); + + handleTableHTMLChange(); + handleErrorMessageChange(); } export default { render }; diff --git a/bigframes/functions/_function_client.py b/bigframes/functions/_function_client.py index 8a88a14040d..a82217da035 100644 --- a/bigframes/functions/_function_client.py +++ b/bigframes/functions/_function_client.py @@ -375,6 +375,20 @@ def generate_cloud_function_code( ) return entry_point + @google.api_core.retry.Retry( + predicate=google.api_core.retry.if_exception_type(ValueError), + initial=1.0, + maximum=10.0, + multiplier=2.0, + deadline=300.0, # Wait up to 5 minutes for propagation + ) + def _get_cloud_function_endpoint_with_retry(self, name): + endpoint = self.get_cloud_function_endpoint(name) + if not endpoint: + # Raising ValueError triggers the retry predicate + raise ValueError(f"Endpoint for {name} not yet available.") + return endpoint + def create_cloud_function( self, def_, @@ -516,11 +530,14 @@ def create_cloud_function( create_function_request.function = function # Create the cloud function and wait for it to be ready to use + endpoint = None try: operation = self._cloud_functions_client.create_function( request=create_function_request ) - operation.result() + # operation.result() returns the Function object upon completion + function_obj = operation.result() + endpoint = function_obj.service_config.uri # Cleanup os.remove(archive_path) @@ -535,12 +552,14 @@ def create_cloud_function( # we created it. This error is safe to ignore. pass - # Fetch the endpoint of the just created function - endpoint = self.get_cloud_function_endpoint(random_name) + # Fetch the endpoint with retries if it wasn't returned by the operation if not endpoint: - raise bf_formatting.create_exception_with_feedback_link( - ValueError, "Couldn't fetch the http endpoint." - ) + try: + endpoint = self._get_cloud_function_endpoint_with_retry(random_name) + except Exception as e: + raise bf_formatting.create_exception_with_feedback_link( + ValueError, f"Couldn't fetch the http endpoint: {e}" + ) logger.info( f"Successfully created cloud function {random_name} with uri ({endpoint})" diff --git a/bigframes/ml/cluster.py b/bigframes/ml/cluster.py index 9ce4649c5e2..f371be0cf38 100644 --- a/bigframes/ml/cluster.py +++ b/bigframes/ml/cluster.py @@ -24,7 +24,7 @@ import pandas as pd import bigframes -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd diff --git a/bigframes/ml/compose.py b/bigframes/ml/compose.py index 54ce7066cb3..d638e026e45 100644 --- a/bigframes/ml/compose.py +++ b/bigframes/ml/compose.py @@ -27,8 +27,8 @@ import bigframes_vendored.sklearn.compose._column_transformer from google.cloud import bigquery -from bigframes.core import log_adapter import bigframes.core.compile.googlesql as sql_utils +from bigframes.core.logging import log_adapter import bigframes.core.utils as core_utils from bigframes.ml import base, core, globals, impute, preprocessing, utils import bigframes.pandas as bpd diff --git a/bigframes/ml/decomposition.py b/bigframes/ml/decomposition.py index 3ff32d24330..ca5ff102b44 100644 --- a/bigframes/ml/decomposition.py +++ b/bigframes/ml/decomposition.py @@ -23,7 +23,7 @@ import bigframes_vendored.sklearn.decomposition._pca from google.cloud import bigquery -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd import bigframes.session diff --git a/bigframes/ml/ensemble.py b/bigframes/ml/ensemble.py index 2633f134114..7cd7079dfbd 100644 --- a/bigframes/ml/ensemble.py +++ b/bigframes/ml/ensemble.py @@ -23,7 +23,7 @@ import bigframes_vendored.xgboost.sklearn from google.cloud import bigquery -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter import bigframes.dataframe from bigframes.ml import base, core, globals, utils import bigframes.session diff --git a/bigframes/ml/forecasting.py b/bigframes/ml/forecasting.py index d26abdfa712..99a7b1743d3 100644 --- a/bigframes/ml/forecasting.py +++ b/bigframes/ml/forecasting.py @@ -20,7 +20,7 @@ from google.cloud import bigquery -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd import bigframes.session diff --git a/bigframes/ml/imported.py b/bigframes/ml/imported.py index a73ee352d03..295649ed7f5 100644 --- a/bigframes/ml/imported.py +++ b/bigframes/ml/imported.py @@ -20,7 +20,7 @@ from google.cloud import bigquery -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd import bigframes.session diff --git a/bigframes/ml/impute.py b/bigframes/ml/impute.py index 818151a4f96..b3da895201d 100644 --- a/bigframes/ml/impute.py +++ b/bigframes/ml/impute.py @@ -22,7 +22,7 @@ import bigframes_vendored.sklearn.impute._base -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter import bigframes.core.utils as core_utils from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd diff --git a/bigframes/ml/linear_model.py b/bigframes/ml/linear_model.py index 3774a62c0cd..df054eb3062 100644 --- a/bigframes/ml/linear_model.py +++ b/bigframes/ml/linear_model.py @@ -24,7 +24,7 @@ import bigframes_vendored.sklearn.linear_model._logistic from google.cloud import bigquery -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd import bigframes.session diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py index b670cabaea1..f4e60f3f9d4 100644 --- a/bigframes/ml/llm.py +++ b/bigframes/ml/llm.py @@ -24,7 +24,8 @@ from bigframes import dtypes, exceptions import bigframes.bigquery as bbq -from bigframes.core import blocks, global_session, log_adapter +from bigframes.core import blocks, global_session +from bigframes.core.logging import log_adapter import bigframes.dataframe from bigframes.ml import base, core, globals, utils import bigframes.series @@ -873,7 +874,7 @@ class Claude3TextGenerator(base.RetriableRemotePredictor): "claude-3-sonnet" (deprecated) is Anthropic's dependable combination of skills and speed. It is engineered to be dependable for scaled AI deployments across a variety of use cases. "claude-3-haiku" is Anthropic's fastest, most compact vision and text model for near-instant responses to simple queries, meant for seamless AI experiences mimicking human interactions. "claude-3-5-sonnet" is Anthropic's most powerful AI model and maintains the speed and cost of Claude 3 Sonnet, which is a mid-tier model. - "claude-3-opus" is Anthropic's second-most powerful AI model, with strong performance on highly complex tasks. + "claude-3-opus" (deprecated) is Anthropic's second-most powerful AI model, with strong performance on highly complex tasks. https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude#available-claude-models If no setting is provided, "claude-3-sonnet" will be used by default and a warning will be issued. diff --git a/bigframes/ml/model_selection.py b/bigframes/ml/model_selection.py index 6eba4f81c28..5adfb03b7f5 100644 --- a/bigframes/ml/model_selection.py +++ b/bigframes/ml/model_selection.py @@ -26,7 +26,7 @@ import bigframes_vendored.sklearn.model_selection._validation as vendored_model_selection_validation import pandas as pd -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter from bigframes.ml import utils import bigframes.pandas as bpd diff --git a/bigframes/ml/pipeline.py b/bigframes/ml/pipeline.py index dac51b19562..8d692176940 100644 --- a/bigframes/ml/pipeline.py +++ b/bigframes/ml/pipeline.py @@ -24,7 +24,7 @@ import bigframes_vendored.sklearn.pipeline from google.cloud import bigquery -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter import bigframes.dataframe from bigframes.ml import ( base, diff --git a/bigframes/ml/preprocessing.py b/bigframes/ml/preprocessing.py index 94c61674f62..8bf89b08387 100644 --- a/bigframes/ml/preprocessing.py +++ b/bigframes/ml/preprocessing.py @@ -26,7 +26,7 @@ import bigframes_vendored.sklearn.preprocessing._label import bigframes_vendored.sklearn.preprocessing._polynomial -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter import bigframes.core.utils as core_utils from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd diff --git a/bigframes/ml/remote.py b/bigframes/ml/remote.py index b091c61f3f7..24083bd4e88 100644 --- a/bigframes/ml/remote.py +++ b/bigframes/ml/remote.py @@ -19,7 +19,8 @@ from typing import Mapping, Optional import warnings -from bigframes.core import global_session, log_adapter +from bigframes.core import global_session +from bigframes.core.logging import log_adapter import bigframes.dataframe import bigframes.exceptions as bfe from bigframes.ml import base, core, globals, utils diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index 5da8efaa3bf..a1c7754ab5c 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -40,6 +40,7 @@ ) from bigframes.operations.blob_ops import ( obj_fetch_metadata_op, + obj_make_ref_json_op, obj_make_ref_op, ObjGetAccessUrl, ) @@ -365,6 +366,7 @@ "ArrayToStringOp", # Blob ops "ObjGetAccessUrl", + "obj_make_ref_json_op", "obj_make_ref_op", "obj_fetch_metadata_op", # Struct ops diff --git a/bigframes/operations/ai.py b/bigframes/operations/ai.py index ad58e8825c6..6921299acd8 100644 --- a/bigframes/operations/ai.py +++ b/bigframes/operations/ai.py @@ -20,7 +20,8 @@ import warnings from bigframes import dtypes, exceptions, options -from bigframes.core import guid, log_adapter +from bigframes.core import guid +from bigframes.core.logging import log_adapter @log_adapter.class_logger diff --git a/bigframes/operations/blob.py b/bigframes/operations/blob.py index 577de458f43..29f720b3ebc 100644 --- a/bigframes/operations/blob.py +++ b/bigframes/operations/blob.py @@ -23,7 +23,7 @@ import requests from bigframes import clients, dtypes -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter import bigframes.dataframe import bigframes.exceptions as bfe import bigframes.operations as ops diff --git a/bigframes/operations/blob_ops.py b/bigframes/operations/blob_ops.py index 29f23a2f705..d1e2764eb45 100644 --- a/bigframes/operations/blob_ops.py +++ b/bigframes/operations/blob_ops.py @@ -29,6 +29,7 @@ class ObjGetAccessUrl(base_ops.UnaryOp): name: typing.ClassVar[str] = "obj_get_access_url" mode: str # access mode, e.g. R read, W write, RW read & write + duration: typing.Optional[int] = None # duration in microseconds def output_type(self, *input_types): return dtypes.JSON_DTYPE @@ -46,3 +47,14 @@ def output_type(self, *input_types): obj_make_ref_op = ObjMakeRef() + + +@dataclasses.dataclass(frozen=True) +class ObjMakeRefJson(base_ops.UnaryOp): + name: typing.ClassVar[str] = "obj_make_ref_json" + + def output_type(self, *input_types): + return dtypes.OBJ_REF_DTYPE + + +obj_make_ref_json_op = ObjMakeRefJson() diff --git a/bigframes/operations/datetimes.py b/bigframes/operations/datetimes.py index c259dd018e1..2eedb96b43e 100644 --- a/bigframes/operations/datetimes.py +++ b/bigframes/operations/datetimes.py @@ -22,7 +22,7 @@ import pandas from bigframes import dataframe, dtypes, series -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter import bigframes.operations as ops _ONE_DAY = pandas.Timedelta("1D") diff --git a/bigframes/operations/lists.py b/bigframes/operations/lists.py index 34ecdd81184..9974e686933 100644 --- a/bigframes/operations/lists.py +++ b/bigframes/operations/lists.py @@ -19,7 +19,7 @@ import bigframes_vendored.pandas.core.arrays.arrow.accessors as vendoracessors -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter import bigframes.operations as ops from bigframes.operations._op_converters import convert_index, convert_slice import bigframes.series as series diff --git a/bigframes/operations/plotting.py b/bigframes/operations/plotting.py index df0c138f0f0..21a23a9ab54 100644 --- a/bigframes/operations/plotting.py +++ b/bigframes/operations/plotting.py @@ -17,7 +17,7 @@ import bigframes_vendored.constants as constants import bigframes_vendored.pandas.plotting._core as vendordt -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter import bigframes.operations._matplotlib as bfplt diff --git a/bigframes/operations/semantics.py b/bigframes/operations/semantics.py index 2266702d472..f237959d0d3 100644 --- a/bigframes/operations/semantics.py +++ b/bigframes/operations/semantics.py @@ -21,7 +21,8 @@ import numpy as np from bigframes import dtypes, exceptions -from bigframes.core import guid, log_adapter +from bigframes.core import guid +from bigframes.core.logging import log_adapter @log_adapter.class_logger diff --git a/bigframes/operations/strings.py b/bigframes/operations/strings.py index d84a66789d8..922d26a23c1 100644 --- a/bigframes/operations/strings.py +++ b/bigframes/operations/strings.py @@ -20,8 +20,8 @@ import bigframes_vendored.constants as constants import bigframes_vendored.pandas.core.strings.accessor as vendorstr -from bigframes.core import log_adapter import bigframes.core.indexes.base as indices +from bigframes.core.logging import log_adapter import bigframes.dataframe as df import bigframes.operations as ops from bigframes.operations._op_converters import convert_index, convert_slice diff --git a/bigframes/operations/structs.py b/bigframes/operations/structs.py index 35010e1733b..ec0b5dae526 100644 --- a/bigframes/operations/structs.py +++ b/bigframes/operations/structs.py @@ -17,7 +17,8 @@ import bigframes_vendored.pandas.core.arrays.arrow.accessors as vendoracessors import pandas as pd -from bigframes.core import backports, log_adapter +from bigframes.core import backports +from bigframes.core.logging import log_adapter import bigframes.dataframe import bigframes.operations import bigframes.series diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index 0b9648fd565..9da2204a713 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -27,9 +27,9 @@ import pandas import bigframes._config as config -from bigframes.core import log_adapter import bigframes.core.global_session as global_session import bigframes.core.indexes +from bigframes.core.logging import log_adapter from bigframes.core.reshape.api import concat, crosstab, cut, get_dummies, merge, qcut import bigframes.dataframe import bigframes.functions._utils as bff_utils diff --git a/bigframes/series.py b/bigframes/series.py index 606169a8a14..814d59befff 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -49,13 +49,14 @@ import typing_extensions import bigframes.core -from bigframes.core import agg_expressions, groupby, log_adapter +from bigframes.core import agg_expressions, groupby import bigframes.core.block_transforms as block_ops import bigframes.core.blocks as blocks import bigframes.core.expression as ex import bigframes.core.identifiers as ids import bigframes.core.indexers import bigframes.core.indexes as indexes +from bigframes.core.logging import log_adapter import bigframes.core.ordering as order import bigframes.core.scalar as scalars import bigframes.core.utils as utils diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 4f32514652d..ca8fbf29196 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -67,10 +67,11 @@ import bigframes.clients import bigframes.constants import bigframes.core -from bigframes.core import blocks, log_adapter, utils +from bigframes.core import blocks, utils import bigframes.core.events import bigframes.core.indexes import bigframes.core.indexes.multi +from bigframes.core.logging import log_adapter import bigframes.core.pyformat import bigframes.formatting_helpers import bigframes.functions._function_session as bff_session diff --git a/bigframes/session/_io/bigquery/__init__.py b/bigframes/session/_io/bigquery/__init__.py index 91147702244..98b5f194c74 100644 --- a/bigframes/session/_io/bigquery/__init__.py +++ b/bigframes/session/_io/bigquery/__init__.py @@ -32,9 +32,9 @@ import google.cloud.bigquery._job_helpers import google.cloud.bigquery.table -from bigframes.core import log_adapter import bigframes.core.compile.googlesql as googlesql import bigframes.core.events +from bigframes.core.logging import log_adapter import bigframes.core.sql import bigframes.session.metrics diff --git a/bigframes/session/executor.py b/bigframes/session/executor.py index bca98bfb2f8..2cbf6d8705c 100644 --- a/bigframes/session/executor.py +++ b/bigframes/session/executor.py @@ -88,7 +88,7 @@ def arrow_batches(self) -> Iterator[pyarrow.RecordBatch]: yield batch - def to_arrow_table(self) -> pyarrow.Table: + def to_arrow_table(self, limit: Optional[int] = None) -> pyarrow.Table: # Need to provide schema if no result rows, as arrow can't infer # If ther are rows, it is safest to infer schema from batches. # Any discrepencies between predicted schema and actual schema will produce errors. @@ -97,9 +97,12 @@ def to_arrow_table(self) -> pyarrow.Table: peek_value = list(peek_it) # TODO: Enforce our internal schema on the table for consistency if len(peek_value) > 0: - return pyarrow.Table.from_batches( - itertools.chain(peek_value, batches), # reconstruct - ) + batches = itertools.chain(peek_value, batches) # reconstruct + if limit: + batches = pyarrow_utils.truncate_pyarrow_iterable( + batches, max_results=limit + ) + return pyarrow.Table.from_batches(batches) else: try: return self._schema.to_pyarrow().empty_table() @@ -107,8 +110,8 @@ def to_arrow_table(self) -> pyarrow.Table: # Bug with some pyarrow versions, empty_table only supports base storage types, not extension types. return self._schema.to_pyarrow(use_storage_types=True).empty_table() - def to_pandas(self) -> pd.DataFrame: - return io_pandas.arrow_to_pandas(self.to_arrow_table(), self._schema) + def to_pandas(self, limit: Optional[int] = None) -> pd.DataFrame: + return io_pandas.arrow_to_pandas(self.to_arrow_table(limit=limit), self._schema) def to_pandas_batches( self, page_size: Optional[int] = None, max_results: Optional[int] = None @@ -158,7 +161,7 @@ def schema(self) -> bigframes.core.schema.ArraySchema: ... @abc.abstractmethod - def batches(self) -> ResultsIterator: + def batches(self, sample_rate: Optional[float] = None) -> ResultsIterator: ... @property @@ -200,9 +203,9 @@ def execution_metadata(self) -> ExecutionMetadata: def schema(self) -> bigframes.core.schema.ArraySchema: return self._data.schema - def batches(self) -> ResultsIterator: + def batches(self, sample_rate: Optional[float] = None) -> ResultsIterator: return ResultsIterator( - iter(self._data.to_arrow()[1]), + iter(self._data.to_arrow(sample_rate=sample_rate)[1]), self.schema, self._data.metadata.row_count, self._data.metadata.total_bytes, @@ -226,7 +229,7 @@ def execution_metadata(self) -> ExecutionMetadata: def schema(self) -> bigframes.core.schema.ArraySchema: return self._schema - def batches(self) -> ResultsIterator: + def batches(self, sample_rate: Optional[float] = None) -> ResultsIterator: return ResultsIterator(iter([]), self.schema, 0, 0) @@ -260,12 +263,13 @@ def schema(self) -> bigframes.core.schema.ArraySchema: source_ids = [selection[0] for selection in self._selected_fields] return self._data.schema.select(source_ids).rename(dict(self._selected_fields)) - def batches(self) -> ResultsIterator: + def batches(self, sample_rate: Optional[float] = None) -> ResultsIterator: read_batches = bq_data.get_arrow_batches( self._data, [x[0] for x in self._selected_fields], self._storage_client, self._project_id, + sample_rate=sample_rate, ) arrow_batches: Iterator[pa.RecordBatch] = map( functools.partial( diff --git a/bigframes/session/loader.py b/bigframes/session/loader.py index bf91637be41..9c18d727c80 100644 --- a/bigframes/session/loader.py +++ b/bigframes/session/loader.py @@ -540,7 +540,9 @@ def request_generator(): commit_request = bq_storage_types.BatchCommitWriteStreamsRequest( parent=parent, write_streams=stream_names ) - self._write_client.batch_commit_write_streams(commit_request) + response = self._write_client.batch_commit_write_streams(commit_request) + for error in response.stream_errors: + raise ValueError(f"Errors commiting stream {error}") result_table = bq_data.GbqTable.from_ref_and_schema( bq_table_ref, schema=bq_schema, cluster_cols=[offsets_col] diff --git a/bigframes/streaming/__init__.py b/bigframes/streaming/__init__.py index 477c7a99e01..0d91e5f91a2 100644 --- a/bigframes/streaming/__init__.py +++ b/bigframes/streaming/__init__.py @@ -17,8 +17,8 @@ import inspect import sys -from bigframes.core import log_adapter import bigframes.core.global_session as global_session +from bigframes.core.logging import log_adapter from bigframes.pandas.io.api import _set_default_session_location_if_possible import bigframes.session import bigframes.streaming.dataframe as streaming_dataframe diff --git a/bigframes/streaming/dataframe.py b/bigframes/streaming/dataframe.py index 3e030a4aa20..b7b67178cea 100644 --- a/bigframes/streaming/dataframe.py +++ b/bigframes/streaming/dataframe.py @@ -27,7 +27,8 @@ import pandas as pd from bigframes import dataframe -from bigframes.core import log_adapter, nodes +from bigframes.core import nodes +from bigframes.core.logging import log_adapter import bigframes.exceptions as bfe import bigframes.session diff --git a/bigframes/version.py b/bigframes/version.py index f36c6789c1a..1e9ed79f825 100644 --- a/bigframes/version.py +++ b/bigframes/version.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.32.0" +__version__ = "2.33.0" # {x-release-please-start-date} -__release_date__ = "2026-01-05" +__release_date__ = "2026-01-22" # {x-release-please-end} diff --git a/biome.json b/biome.json new file mode 100644 index 00000000000..d30c8687a4c --- /dev/null +++ b/biome.json @@ -0,0 +1,16 @@ +{ + "formatter": { + "indentStyle": "space", + "indentWidth": 2 + }, + "javascript": { + "formatter": { + "quoteStyle": "single" + } + }, + "css": { + "formatter": { + "quoteStyle": "single" + } + } +} diff --git a/docs/conf.py b/docs/conf.py index 22868aab67b..9883467edfa 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -267,6 +267,12 @@ # https://sphinx-sitemap.readthedocs.io/en/latest/getting-started.html#usage html_baseurl = "https://dataframes.bigquery.dev/" +sitemap_locales = [None] + +# We don't have any immediate plans to translate the API reference, so omit the +# language from the URLs. +# https://sphinx-sitemap.readthedocs.io/en/latest/advanced-configuration.html#configuration-customizing-url-scheme +sitemap_url_scheme = "{link}" # -- Options for warnings ------------------------------------------------------ diff --git a/docs/reference/index.rst b/docs/reference/index.rst index e348bd608be..bdf38e977da 100644 --- a/docs/reference/index.rst +++ b/docs/reference/index.rst @@ -11,6 +11,7 @@ packages. bigframes.bigquery bigframes.bigquery.ai bigframes.bigquery.ml + bigframes.bigquery.obj bigframes.enums bigframes.exceptions bigframes.geopandas diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index a25acd5d284..5dd8af1c5f1 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -51,7 +51,8 @@ "- **Rich DataFrames & Series:** Both DataFrames and Series are displayed as interactive widgets.\n", "- **Pagination:** Navigate through large datasets page by page without overwhelming the output.\n", "- **Column Sorting:** Click column headers to toggle between ascending, descending, and unsorted views. Use **Shift + Click** to sort by multiple columns.\n", - "- **Column Resizing:** Drag the dividers between column headers to adjust their width." + "- **Column Resizing:** Drag the dividers between column headers to adjust their width.\n", + "- **Max Columns Control:** Limit the number of displayed columns to improve performance and readability for wide datasets." ] }, { @@ -119,16 +120,16 @@ "output_type": "stream", "text": [ "state gender year name number\n", - " AL F 1910 Sadie 40\n", - " AL F 1910 Mary 875\n", - " AR F 1910 Vera 39\n", - " AR F 1910 Marie 78\n", - " AR F 1910 Lucille 66\n", - " CA F 1910 Virginia 101\n", - " DC F 1910 Margaret 72\n", - " GA F 1910 Mildred 133\n", - " GA F 1910 Vera 51\n", - " GA F 1910 Sallie 92\n", + " AL F 1910 Annie 482\n", + " AL F 1910 Myrtle 104\n", + " AR F 1910 Lillian 56\n", + " CT F 1910 Anne 38\n", + " CT F 1910 Frances 45\n", + " FL F 1910 Margaret 53\n", + " GA F 1910 Mae 73\n", + " GA F 1910 Beatrice 96\n", + " GA F 1910 Lola 47\n", + " IA F 1910 Viola 49\n", "...\n", "\n", "[5552452 rows x 5 columns]\n" @@ -142,14 +143,31 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "220340b0", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "✅ Completed. " + "\n", + " Query started with request ID bigframes-dev:US.161c75bd-f9f8-4b21-8a45-1d7dfc659034.
SQL
SELECT\n",
+       "`state` AS `state`,\n",
+       "`gender` AS `gender`,\n",
+       "`year` AS `year`,\n",
+       "`name` AS `name`,\n",
+       "`number` AS `number`\n",
+       "FROM\n",
+       "(SELECT\n",
+       "  `t0`.`state`,\n",
+       "  `t0`.`gender`,\n",
+       "  `t0`.`year`,\n",
+       "  `t0`.`name`,\n",
+       "  `t0`.`number`,\n",
+       "  `t0`.`bfuid_col_2` AS `bfuid_col_15`\n",
+       "FROM `bigframes-dev._8b037bfb7316dddf9d92b12dcf93e008906bfe52._c58be946_1477_4c00_b699_0ae022f13563_bqdf_8e323719-899f-4da2-89cd-2dbb53ab1dfc` AS `t0`)\n",
+       "ORDER BY `bfuid_col_15` ASC NULLS LAST
\n", + " " ], "text/plain": [ "" @@ -161,7 +179,9 @@ { "data": { "text/html": [ - "✅ Completed. " + "✅ Completed. \n", + " Query processed 215.9 MB in 7 seconds of slot time. [Job bigframes-dev:US.job_IuiJsjhfPtOrKuTIOqPIjnVLX820 details]\n", + " " ], "text/plain": [ "" @@ -173,7 +193,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a5c2a45c5cc044b59656a2f6b71f710f", + "model_id": "e68fbb9eb4d24bab837c77730d31c8a1", "version_major": 2, "version_minor": 1 }, @@ -209,80 +229,80 @@ " AL\n", " F\n", " 1910\n", - " Vera\n", - " 71\n", + " Hazel\n", + " 51\n", " \n", " \n", " 1\n", - " AR\n", + " AL\n", " F\n", " 1910\n", - " Viola\n", - " 37\n", + " Lucy\n", + " 76\n", " \n", " \n", " 2\n", " AR\n", " F\n", " 1910\n", - " Alice\n", - " 57\n", + " Nellie\n", + " 39\n", " \n", " \n", " 3\n", " AR\n", " F\n", " 1910\n", - " Edna\n", - " 95\n", + " Lena\n", + " 40\n", " \n", " \n", " 4\n", - " AR\n", + " CO\n", " F\n", " 1910\n", - " Ollie\n", - " 40\n", + " Thelma\n", + " 36\n", " \n", " \n", " 5\n", - " CA\n", + " CO\n", " F\n", " 1910\n", - " Beatrice\n", - " 37\n", + " Ruth\n", + " 68\n", " \n", " \n", " 6\n", " CT\n", " F\n", " 1910\n", - " Marion\n", - " 36\n", + " Elizabeth\n", + " 86\n", " \n", " \n", " 7\n", - " CT\n", + " DC\n", " F\n", " 1910\n", - " Marie\n", - " 36\n", + " Mary\n", + " 80\n", " \n", " \n", " 8\n", " FL\n", " F\n", " 1910\n", - " Alice\n", - " 53\n", + " Annie\n", + " 101\n", " \n", " \n", " 9\n", - " GA\n", + " FL\n", " F\n", " 1910\n", - " Thelma\n", - " 133\n", + " Alma\n", + " 39\n", " \n", " \n", "\n", @@ -290,25 +310,67 @@ "[5552452 rows x 5 columns in total]" ], "text/plain": [ - "state gender year name number\n", - " AL F 1910 Vera 71\n", - " AR F 1910 Viola 37\n", - " AR F 1910 Alice 57\n", - " AR F 1910 Edna 95\n", - " AR F 1910 Ollie 40\n", - " CA F 1910 Beatrice 37\n", - " CT F 1910 Marion 36\n", - " CT F 1910 Marie 36\n", - " FL F 1910 Alice 53\n", - " GA F 1910 Thelma 133\n", + "state gender year name number\n", + " AL F 1910 Hazel 51\n", + " AL F 1910 Lucy 76\n", + " AR F 1910 Nellie 39\n", + " AR F 1910 Lena 40\n", + " CO F 1910 Thelma 36\n", + " CO F 1910 Ruth 68\n", + " CT F 1910 Elizabeth 86\n", + " DC F 1910 Mary 80\n", + " FL F 1910 Annie 101\n", + " FL F 1910 Alma 39\n", "...\n", "\n", "[5552452 rows x 5 columns]" ] }, - "execution_count": 5, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" + }, + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 215.9 MB in 9 seconds of slot time. [Job bigframes-dev:US.job_IEjIRaqt2w-_pAttPw1VAVuRPxA7 details]\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 215.9 MB in 5 seconds of slot time. [Job bigframes-dev:US.job_Mi-3m2AkEC1iPgWi7hmcWa1M1oIA details]\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 215.9 MB in 6 seconds of slot time. [Job bigframes-dev:US.job_j8pvY385WwIY7tGvhI7Yxc62aBwd details]\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ @@ -326,14 +388,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "42bb02ab", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "✅ Completed. " + "✅ Completed. \n", + " Query processed 171.4 MB in 30 seconds of slot time. [Job bigframes-dev:US.ff90d507-bec8-4d24-abc3-0209ac28e21f details]\n", + " " ], "text/plain": [ "" @@ -345,7 +409,9 @@ { "data": { "text/html": [ - "✅ Completed. " + "✅ Completed. \n", + " Query processed 88.8 MB in a moment of slot time.\n", + " " ], "text/plain": [ "" @@ -356,43 +422,35 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "004beca7d4034b498add8f9edd55027b", - "version_major": 2, - "version_minor": 1 - }, "text/html": [ - "
0    1910\n",
-       "1    1910\n",
-       "2    1910\n",
-       "3    1910\n",
-       "4    1910\n",
-       "5    1910\n",
-       "6    1910\n",
-       "7    1910\n",
-       "8    1910\n",
-       "9    1910

[5552452 rows]

" + "✅ Completed. " ], "text/plain": [ - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "Name: year, dtype: Int64\n", - "...\n", - "\n", - "[5552452 rows]" + "" ] }, - "execution_count": 6, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "Name: year, dtype: Int64\n", + "...\n", + "\n", + "[5552452 rows]\n" + ] } ], "source": [ @@ -418,7 +476,9 @@ { "data": { "text/html": [ - "✅ Completed. " + "✅ Completed. \n", + " Query processed 88.8 MB in 3 seconds of slot time. [Job bigframes-dev:US.job_517TdI--FMoURkV7QQNMltY_-dZ7 details]\n", + " " ], "text/plain": [ "" @@ -430,7 +490,9 @@ { "data": { "text/html": [ - "✅ Completed. " + "✅ Completed. \n", + " Query processed 88.8 MB in 2 seconds of slot time. [Job bigframes-dev:US.job_rCeYkeBPqmTKNFWFgwXjz5Ed8uWI details]\n", + " " ], "text/plain": [ "" @@ -442,7 +504,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1251df51c4ba44d0b93af07917888511", + "model_id": "3e630b1a56c740e781772ca5f5c7267a", "version_major": 2, "version_minor": 1 }, @@ -511,7 +573,10 @@ "metadata": {}, "source": [ "### Adjustable Column Widths\n", - "You can easily adjust the width of any column in the table. Simply hover your mouse over the vertical dividers between column headers. When the cursor changes to a resize icon, click and drag to expand or shrink the column to your desired width. This allows for better readability and customization of your table view." + "You can easily adjust the width of any column in the table. Simply hover your mouse over the vertical dividers between column headers. When the cursor changes to a resize icon, click and drag to expand or shrink the column to your desired width. This allows for better readability and customization of your table view.\n", + "\n", + "### Control Maximum Columns\n", + "You can control the number of columns displayed in the widget using the **Max columns** dropdown in the footer. This is useful for wide DataFrames where you want to focus on a subset of columns or improve rendering performance. Options include 3, 5, 7, 10, 20, or All." ] }, { @@ -540,7 +605,9 @@ { "data": { "text/html": [ - "✅ Completed. " + "✅ Completed. \n", + " Query processed 215.9 MB in 11 seconds of slot time. [Job bigframes-dev:US.job_XwXTDb6gWVkuyIFMeWA0waE33bSg details]\n", + " " ], "text/plain": [ "" @@ -552,7 +619,9 @@ { "data": { "text/html": [ - "✅ Completed. " + "✅ Completed. \n", + " Query processed 215.9 MB in 7 seconds of slot time. [Job bigframes-dev:US.job_bCW0LYK5_PzyyGPf9OAg4YfNMG1C details]\n", + " " ], "text/plain": [ "" @@ -571,12 +640,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "45a462a3a42a445bb06d89132b7d0331", + "model_id": "a6a2b19314b04283a5a66ca9d66eb771", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "" + "" ] }, "execution_count": 8, @@ -651,27 +720,8 @@ { "data": { "text/html": [ - "\n", - " Query started with request ID bigframes-dev:US.a9f6b054-3709-49d6-8109-c325ffe07679.
SQL
SELECT\n",
-       "`state` AS `state`,\n",
-       "`gender` AS `gender`,\n",
-       "`year` AS `year`,\n",
-       "`name` AS `name`,\n",
-       "`number` AS `number`\n",
-       "FROM\n",
-       "(SELECT\n",
-       "  *\n",
-       "FROM (\n",
-       "  SELECT\n",
-       "    `state`,\n",
-       "    `gender`,\n",
-       "    `year`,\n",
-       "    `name`,\n",
-       "    `number`\n",
-       "  FROM `bigquery-public-data.usa_names.usa_1910_2013` FOR SYSTEM_TIME AS OF TIMESTAMP('2025-12-29T22:47:29.748716+00:00')\n",
-       ") AS `t0`)\n",
-       "ORDER BY `name` ASC NULLS LAST ,`year` ASC NULLS LAST ,`state` ASC NULLS LAST\n",
-       "LIMIT 5
\n", + "✅ Completed. \n", + " Query processed 215.9 MB in a moment of slot time.\n", " " ], "text/plain": [ @@ -685,7 +735,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 0 Bytes in a moment of slot time.\n", + " Query processed 215.9 MB in a moment of slot time.\n", " " ], "text/plain": [ @@ -705,12 +755,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "219f91f2341d42b8b96da795a79fc3e8", + "model_id": "beb362548a6b4fd4a163569edd6f1a90", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "" + "" ] }, "execution_count": 10, @@ -746,24 +796,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "added-cell-1", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "\n", - " Query started with request ID bigframes-dev:US.8819b8bd-6697-4c65-a8bc-c7a95a06fe8e.
SQL
\n",
-       "  SELECT\n",
-       "    AI.GENERATE(\n",
-       "      prompt=>("Extract the values.", OBJ.GET_ACCESS_URL(OBJ.FETCH_METADATA(OBJ.MAKE_REF(gcs_path, "us.conn")), "r")),\n",
-       "      connection_id=>"bigframes-dev.us.bigframes-default-connection",\n",
-       "      output_schema=>"publication_date string, class_international string, application_number string, filing_date string") AS result,\n",
-       "    *\n",
-       "  FROM `bigquery-public-data.labeled_patents.extracted_data`\n",
-       "  LIMIT 5;\n",
-       "
\n", + "✅ Completed. \n", + " Query processed 85.9 kB in 19 seconds of slot time.\n", " " ], "text/plain": [ @@ -772,6 +813,243 @@ }, "metadata": {}, "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "02a46cf499b442d4bfe03934195e67df", + "version_major": 2, + "version_minor": 1 + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
resultgcs_pathissuerlanguagepublication_dateclass_internationalclass_usapplication_numberfiling_datepriority_date_eurepresentative_line_1_euapplicant_line_1inventor_line_1title_line_1number
0{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H01L 21/20<NA>18166536.516.02.2016<NA>Scheider, Sascha et alEV Group E. Thallner GmbHKurz, FlorianVORRICHTUNG ZUM BONDEN VON SUBSTRATENEP 3 382 744 A1
1{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018A01K 31/00<NA>18171005.405.02.201505.02.2014Stork Bamberger PatentanwälteLinco Food Systems A/SThrane, UffeMASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E...EP 3 381 276 A1
2{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018G06F 11/30<NA>18157347.819.02.201831.03.2017Hoffmann EitleFUJITSU LIMITEDKukihara, KensukeMETHOD EXECUTED BY A COMPUTER, INFORMATION PRO...EP 3 382 553 A1
3{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H05B 6/12<NA>18165514.303.04.201830.03.2017<NA>BSH Hausger√§te GmbHAcero Acero, JesusVORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNGEP 3 383 141 A2
4{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE29.08.018E04H 6/12<NA>18157874.121.02.201822.02.2017Liedtke & Partner PatentanwälteSHB Hebezeugbau GmbHVOLGER, AlexanderSTEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSEREP 3 366 869 A1
\n", + "

5 rows × 15 columns

\n", + "
[5 rows x 15 columns in total]" + ], + "text/plain": [ + " result \\\n", + "0 {'application_number': None, 'class_internatio... \n", + "1 {'application_number': None, 'class_internatio... \n", + "2 {'application_number': None, 'class_internatio... \n", + "3 {'application_number': None, 'class_internatio... \n", + "4 {'application_number': None, 'class_internatio... \n", + "\n", + " gcs_path issuer language \\\n", + "0 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "1 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "2 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "3 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "4 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "\n", + " publication_date class_international class_us application_number \\\n", + "0 03.10.2018 H01L 21/20 18166536.5 \n", + "1 03.10.2018 A01K 31/00 18171005.4 \n", + "2 03.10.2018 G06F 11/30 18157347.8 \n", + "3 03.10.2018 H05B 6/12 18165514.3 \n", + "4 29.08.018 E04H 6/12 18157874.1 \n", + "\n", + " filing_date priority_date_eu representative_line_1_eu \\\n", + "0 16.02.2016 Scheider, Sascha et al \n", + "1 05.02.2015 05.02.2014 Stork Bamberger Patentanwälte \n", + "2 19.02.2018 31.03.2017 Hoffmann Eitle \n", + "3 03.04.2018 30.03.2017 \n", + "4 21.02.2018 22.02.2017 Liedtke & Partner Patentanwälte \n", + "\n", + " applicant_line_1 inventor_line_1 \\\n", + "0 EV Group E. Thallner GmbH Kurz, Florian \n", + "1 Linco Food Systems A/S Thrane, Uffe \n", + "2 FUJITSU LIMITED Kukihara, Kensuke \n", + "3 BSH Hausgeräte GmbH Acero Acero, Jesus \n", + "4 SHB Hebezeugbau GmbH VOLGER, Alexander \n", + "\n", + " title_line_1 number \n", + "0 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", + "1 MASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", + "2 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", + "3 VORRICHTUNG ZUR INDUKTIVEN ENERGIEÜBERTRAGUNG EP 3 383 141 A2 \n", + "4 STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER EP 3 366 869 A1 \n", + "\n", + "[5 rows x 15 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ diff --git a/tests/js/table_widget.test.js b/tests/js/table_widget.test.js index b3796905e5f..d701d8692e5 100644 --- a/tests/js/table_widget.test.js +++ b/tests/js/table_widget.test.js @@ -14,283 +14,518 @@ * limitations under the License. */ -import { jest } from "@jest/globals"; -import { JSDOM } from "jsdom"; - -describe("TableWidget", () => { - let model; - let el; - let render; - - beforeEach(async () => { - jest.resetModules(); - document.body.innerHTML = "
"; - el = document.body.querySelector("div"); - - const tableWidget = ( - await import("../../bigframes/display/table_widget.js") - ).default; - render = tableWidget.render; - - model = { - get: jest.fn(), - set: jest.fn(), - save_changes: jest.fn(), - on: jest.fn(), - }; - }); - - it("should have a render function", () => { - expect(render).toBeDefined(); - }); - - describe("render", () => { - it("should create the basic structure", () => { - // Mock the initial state - model.get.mockImplementation((property) => { - if (property === "table_html") { - return ""; - } - if (property === "row_count") { - return 100; - } - if (property === "error_message") { - return null; - } - if (property === "page_size") { - return 10; - } - if (property === "page") { - return 0; - } - return null; - }); - - render({ model, el }); - - expect(el.classList.contains("bigframes-widget")).toBe(true); - expect(el.querySelector(".error-message")).not.toBeNull(); - expect(el.querySelector("div")).not.toBeNull(); - expect(el.querySelector("div:nth-child(3)")).not.toBeNull(); - }); - - it("should sort when a sortable column is clicked", () => { - // Mock the initial state - model.get.mockImplementation((property) => { - if (property === "table_html") { - return "
col1
"; - } - if (property === "orderable_columns") { - return ["col1"]; - } - if (property === "sort_context") { - return []; - } - return null; - }); - - render({ model, el }); - - // Manually trigger the table_html change handler - const tableHtmlChangeHandler = model.on.mock.calls.find( - (call) => call[0] === "change:table_html", - )[1]; - tableHtmlChangeHandler(); - - const header = el.querySelector("th"); - header.click(); - - expect(model.set).toHaveBeenCalledWith("sort_context", [ - { column: "col1", ascending: true }, - ]); - expect(model.save_changes).toHaveBeenCalled(); - }); - - it("should reverse sort direction when a sorted column is clicked", () => { - // Mock the initial state - model.get.mockImplementation((property) => { - if (property === "table_html") { - return "
col1
"; - } - if (property === "orderable_columns") { - return ["col1"]; - } - if (property === "sort_context") { - return [{ column: "col1", ascending: true }]; - } - return null; - }); - - render({ model, el }); - - // Manually trigger the table_html change handler - const tableHtmlChangeHandler = model.on.mock.calls.find( - (call) => call[0] === "change:table_html", - )[1]; - tableHtmlChangeHandler(); - - const header = el.querySelector("th"); - header.click(); - - expect(model.set).toHaveBeenCalledWith("sort_context", [ - { column: "col1", ascending: false }, - ]); - expect(model.save_changes).toHaveBeenCalled(); - }); - - it("should clear sort when a descending sorted column is clicked", () => { - // Mock the initial state - model.get.mockImplementation((property) => { - if (property === "table_html") { - return "
col1
"; - } - if (property === "orderable_columns") { - return ["col1"]; - } - if (property === "sort_context") { - return [{ column: "col1", ascending: false }]; - } - return null; - }); - - render({ model, el }); - - // Manually trigger the table_html change handler - const tableHtmlChangeHandler = model.on.mock.calls.find( - (call) => call[0] === "change:table_html", - )[1]; - tableHtmlChangeHandler(); - - const header = el.querySelector("th"); - header.click(); - - expect(model.set).toHaveBeenCalledWith("sort_context", []); - expect(model.save_changes).toHaveBeenCalled(); - }); - - it("should display the correct sort indicator", () => { - // Mock the initial state - model.get.mockImplementation((property) => { - if (property === "table_html") { - return "
col1
col2
"; - } - if (property === "orderable_columns") { - return ["col1", "col2"]; - } - if (property === "sort_context") { - return [{ column: "col1", ascending: true }]; - } - return null; - }); - - render({ model, el }); - - // Manually trigger the table_html change handler - const tableHtmlChangeHandler = model.on.mock.calls.find( - (call) => call[0] === "change:table_html", - )[1]; - tableHtmlChangeHandler(); - - const headers = el.querySelectorAll("th"); - const indicator1 = headers[0].querySelector(".sort-indicator"); - const indicator2 = headers[1].querySelector(".sort-indicator"); - - expect(indicator1.textContent).toBe("▲"); - expect(indicator2.textContent).toBe("●"); - }); - - it("should add a column to sort when Shift+Click is used", () => { - // Mock the initial state: already sorted by col1 asc - model.get.mockImplementation((property) => { - if (property === "table_html") { - return "
col1
col2
"; - } - if (property === "orderable_columns") { - return ["col1", "col2"]; - } - if (property === "sort_context") { - return [{ column: "col1", ascending: true }]; - } - return null; - }); - - render({ model, el }); - - // Manually trigger the table_html change handler - const tableHtmlChangeHandler = model.on.mock.calls.find( - (call) => call[0] === "change:table_html", - )[1]; - tableHtmlChangeHandler(); - - const headers = el.querySelectorAll("th"); - const header2 = headers[1]; // col2 - - // Simulate Shift+Click - const clickEvent = new MouseEvent("click", { - bubbles: true, - cancelable: true, - shiftKey: true, - }); - header2.dispatchEvent(clickEvent); - - expect(model.set).toHaveBeenCalledWith("sort_context", [ - { column: "col1", ascending: true }, - { column: "col2", ascending: true }, - ]); - expect(model.save_changes).toHaveBeenCalled(); - }); - }); - - it("should render the series as a table with an index and one value column", () => { - // Mock the initial state - model.get.mockImplementation((property) => { - if (property === "table_html") { - return ` -
-
- - - - - - - - - - - - - - - - - -
value
0a
1b
-
-
`; - } - if (property === "orderable_columns") { - return []; - } - return null; - }); - - render({ model, el }); - - // Manually trigger the table_html change handler - const tableHtmlChangeHandler = model.on.mock.calls.find( - (call) => call[0] === "change:table_html", - )[1]; - tableHtmlChangeHandler(); - - // Check that the table has two columns - const headers = el.querySelectorAll( - ".paginated-table-container .col-header-name", - ); - expect(headers).toHaveLength(2); - - // Check that the headers are an empty string (for the index) and "value" - expect(headers[0].textContent).toBe(""); - expect(headers[1].textContent).toBe("value"); - }); +import { jest } from '@jest/globals'; + +describe('TableWidget', () => { + let model; + let el; + let render; + + beforeEach(async () => { + jest.resetModules(); + document.body.innerHTML = '
'; + el = document.body.querySelector('div'); + + const tableWidget = ( + await import('../../bigframes/display/table_widget.js') + ).default; + render = tableWidget.render; + + model = { + get: jest.fn(), + set: jest.fn(), + save_changes: jest.fn(), + on: jest.fn(), + }; + }); + + it('should have a render function', () => { + expect(render).toBeDefined(); + }); + + describe('render', () => { + it('should create the basic structure', () => { + // Mock the initial state + model.get.mockImplementation((property) => { + if (property === 'table_html') { + return ''; + } + if (property === 'row_count') { + return 100; + } + if (property === 'error_message') { + return null; + } + if (property === 'page_size') { + return 10; + } + if (property === 'page') { + return 0; + } + return null; + }); + + render({ model, el }); + + expect(el.classList.contains('bigframes-widget')).toBe(true); + expect(el.querySelector('.error-message')).not.toBeNull(); + expect(el.querySelector('div')).not.toBeNull(); + expect(el.querySelector('div:nth-child(3)')).not.toBeNull(); + }); + + it('should sort when a sortable column is clicked', () => { + // Mock the initial state + model.get.mockImplementation((property) => { + if (property === 'table_html') { + return '
col1
'; + } + if (property === 'orderable_columns') { + return ['col1']; + } + if (property === 'sort_context') { + return []; + } + return null; + }); + + render({ model, el }); + + // Manually trigger the table_html change handler + const tableHtmlChangeHandler = model.on.mock.calls.find( + (call) => call[0] === 'change:table_html', + )[1]; + tableHtmlChangeHandler(); + + const header = el.querySelector('th'); + header.click(); + + expect(model.set).toHaveBeenCalledWith('sort_context', [ + { column: 'col1', ascending: true }, + ]); + expect(model.save_changes).toHaveBeenCalled(); + }); + + it('should reverse sort direction when a sorted column is clicked', () => { + // Mock the initial state + model.get.mockImplementation((property) => { + if (property === 'table_html') { + return '
col1
'; + } + if (property === 'orderable_columns') { + return ['col1']; + } + if (property === 'sort_context') { + return [{ column: 'col1', ascending: true }]; + } + return null; + }); + + render({ model, el }); + + // Manually trigger the table_html change handler + const tableHtmlChangeHandler = model.on.mock.calls.find( + (call) => call[0] === 'change:table_html', + )[1]; + tableHtmlChangeHandler(); + + const header = el.querySelector('th'); + header.click(); + + expect(model.set).toHaveBeenCalledWith('sort_context', [ + { column: 'col1', ascending: false }, + ]); + expect(model.save_changes).toHaveBeenCalled(); + }); + + it('should clear sort when a descending sorted column is clicked', () => { + // Mock the initial state + model.get.mockImplementation((property) => { + if (property === 'table_html') { + return '
col1
'; + } + if (property === 'orderable_columns') { + return ['col1']; + } + if (property === 'sort_context') { + return [{ column: 'col1', ascending: false }]; + } + return null; + }); + + render({ model, el }); + + // Manually trigger the table_html change handler + const tableHtmlChangeHandler = model.on.mock.calls.find( + (call) => call[0] === 'change:table_html', + )[1]; + tableHtmlChangeHandler(); + + const header = el.querySelector('th'); + header.click(); + + expect(model.set).toHaveBeenCalledWith('sort_context', []); + expect(model.save_changes).toHaveBeenCalled(); + }); + + it('should display the correct sort indicator', () => { + // Mock the initial state + model.get.mockImplementation((property) => { + if (property === 'table_html') { + return '
col1
col2
'; + } + if (property === 'orderable_columns') { + return ['col1', 'col2']; + } + if (property === 'sort_context') { + return [{ column: 'col1', ascending: true }]; + } + return null; + }); + + render({ model, el }); + + // Manually trigger the table_html change handler + const tableHtmlChangeHandler = model.on.mock.calls.find( + (call) => call[0] === 'change:table_html', + )[1]; + tableHtmlChangeHandler(); + + const headers = el.querySelectorAll('th'); + const indicator1 = headers[0].querySelector('.sort-indicator'); + const indicator2 = headers[1].querySelector('.sort-indicator'); + + expect(indicator1.textContent).toBe('▲'); + expect(indicator2.textContent).toBe('●'); + }); + + it('should add a column to sort when Shift+Click is used', () => { + // Mock the initial state: already sorted by col1 asc + model.get.mockImplementation((property) => { + if (property === 'table_html') { + return '
col1
col2
'; + } + if (property === 'orderable_columns') { + return ['col1', 'col2']; + } + if (property === 'sort_context') { + return [{ column: 'col1', ascending: true }]; + } + return null; + }); + + render({ model, el }); + + // Manually trigger the table_html change handler + const tableHtmlChangeHandler = model.on.mock.calls.find( + (call) => call[0] === 'change:table_html', + )[1]; + tableHtmlChangeHandler(); + + const headers = el.querySelectorAll('th'); + const header2 = headers[1]; // col2 + + // Simulate Shift+Click + const clickEvent = new MouseEvent('click', { + bubbles: true, + cancelable: true, + shiftKey: true, + }); + header2.dispatchEvent(clickEvent); + + expect(model.set).toHaveBeenCalledWith('sort_context', [ + { column: 'col1', ascending: true }, + { column: 'col2', ascending: true }, + ]); + expect(model.save_changes).toHaveBeenCalled(); + }); + }); + + describe('Theme detection', () => { + beforeEach(() => { + jest.useFakeTimers(); + // Mock the initial state for theme detection tests + model.get.mockImplementation((property) => { + if (property === 'table_html') { + return ''; + } + if (property === 'row_count') { + return 100; + } + if (property === 'error_message') { + return null; + } + if (property === 'page_size') { + return 10; + } + if (property === 'page') { + return 0; + } + return null; + }); + }); + + afterEach(() => { + jest.useRealTimers(); + document.body.classList.remove('vscode-dark'); + }); + + it('should add bigframes-dark-mode class in dark mode', () => { + document.body.classList.add('vscode-dark'); + render({ model, el }); + jest.runAllTimers(); + expect(el.classList.contains('bigframes-dark-mode')).toBe(true); + }); + + it('should not add bigframes-dark-mode class in light mode', () => { + render({ model, el }); + jest.runAllTimers(); + expect(el.classList.contains('bigframes-dark-mode')).toBe(false); + }); + }); + + it('should render the series as a table with an index and one value column', () => { + // Mock the initial state + model.get.mockImplementation((property) => { + if (property === 'table_html') { + return ` +
+
+ + + + + + + + + + + + + + + + + +
value
0a
1b
+
+
`; + } + if (property === 'orderable_columns') { + return []; + } + return null; + }); + + render({ model, el }); + + // Manually trigger the table_html change handler + const tableHtmlChangeHandler = model.on.mock.calls.find( + (call) => call[0] === 'change:table_html', + )[1]; + tableHtmlChangeHandler(); + + // Check that the table has two columns + const headers = el.querySelectorAll( + '.paginated-table-container .col-header-name', + ); + expect(headers).toHaveLength(2); + + // Check that the headers are an empty string (for the index) and "value" + expect(headers[0].textContent).toBe(''); + expect(headers[1].textContent).toBe('value'); + }); + + /* + * Tests that the widget correctly renders HTML with truncated columns (ellipsis) + * and ensures that the ellipsis column is not treated as a sortable column. + */ + it('should set height dynamically on first load and remain fixed', () => { + jest.useFakeTimers(); + + // Mock the table's offsetHeight + let mockHeight = 150; + Object.defineProperty(HTMLElement.prototype, 'offsetHeight', { + configurable: true, + get: () => mockHeight, + }); + + // Mock model properties + model.get.mockImplementation((property) => { + if (property === 'table_html') { + return '...
'; + } + return null; + }); + + render({ model, el }); + + const tableContainer = el.querySelector('.table-container'); + + // --- First render --- + const tableHtmlChangeHandler = model.on.mock.calls.find( + (call) => call[0] === 'change:table_html', + )[1]; + tableHtmlChangeHandler(); + jest.runAllTimers(); + + // Height should be set to the mocked offsetHeight + 2px buffer + expect(tableContainer.style.height).toBe('152px'); + + // --- Second render (e.g., page size change) --- + // Simulate the new content being taller + mockHeight = 350; + tableHtmlChangeHandler(); + jest.runAllTimers(); + + // Height should NOT change + expect(tableContainer.style.height).toBe('152px'); + + // Restore original implementation + Object.defineProperty(HTMLElement.prototype, 'offsetHeight', { + value: 0, + }); + jest.useRealTimers(); + }); + + it('should render truncated columns with ellipsis and not make ellipsis sortable', () => { + // Mock HTML with truncated columns + // Use the structure produced by the python backend + const mockHtml = ` + + + + + + + + + + + + + + + +
col1
...
col10
1...10
+ `; + + model.get.mockImplementation((property) => { + if (property === 'table_html') { + return mockHtml; + } + if (property === 'orderable_columns') { + // Only actual columns are orderable + return ['col1', 'col10']; + } + if (property === 'sort_context') { + return []; + } + return null; + }); + + render({ model, el }); + + // Manually trigger the table_html change handler + const tableHtmlChangeHandler = model.on.mock.calls.find( + (call) => call[0] === 'change:table_html', + )[1]; + tableHtmlChangeHandler(); + + const headers = el.querySelectorAll('th'); + expect(headers).toHaveLength(3); + + // Check col1 (sortable) + const col1Header = headers[0]; + const col1Indicator = col1Header.querySelector('.sort-indicator'); + expect(col1Indicator).not.toBeNull(); // Should exist (hidden by default) + + // Check ellipsis (not sortable) + const ellipsisHeader = headers[1]; + const ellipsisIndicator = ellipsisHeader.querySelector('.sort-indicator'); + // The render function adds sort indicators only if the column name matches an entry in orderable_columns. + // The ellipsis header content is "..." which is not in ['col1', 'col10']. + expect(ellipsisIndicator).toBeNull(); + + // Check col10 (sortable) + const col10Header = headers[2]; + const col10Indicator = col10Header.querySelector('.sort-indicator'); + expect(col10Indicator).not.toBeNull(); + }); + + describe('Max columns', () => { + /* + * Tests for the max columns dropdown functionality. + */ + + it('should render the max columns dropdown', () => { + // Mock basic state + model.get.mockImplementation((property) => { + if (property === 'max_columns') { + return 20; + } + return null; + }); + + render({ model, el }); + + const maxColumnsContainer = el.querySelector('.max-columns'); + expect(maxColumnsContainer).not.toBeNull(); + const label = maxColumnsContainer.querySelector('label'); + expect(label.textContent).toBe('Max columns:'); + const select = maxColumnsContainer.querySelector('select'); + expect(select).not.toBeNull(); + }); + + it('should select the correct initial value', () => { + const initialMaxColumns = 20; + model.get.mockImplementation((property) => { + if (property === 'max_columns') { + return initialMaxColumns; + } + return null; + }); + + render({ model, el }); + + const select = el.querySelector('.max-columns select'); + expect(Number(select.value)).toBe(initialMaxColumns); + }); + + it('should handle None/null initial value as 0 (All)', () => { + model.get.mockImplementation((property) => { + if (property === 'max_columns') { + return null; // Python None is null in JS + } + return null; + }); + + render({ model, el }); + + const select = el.querySelector('.max-columns select'); + expect(Number(select.value)).toBe(0); + expect(select.options[select.selectedIndex].textContent).toBe('All'); + }); + + it('should update model when value changes', () => { + model.get.mockImplementation((property) => { + if (property === 'max_columns') { + return 20; + } + return null; + }); + + render({ model, el }); + + const select = el.querySelector('.max-columns select'); + + // Change to 10 + select.value = '10'; + const event = new Event('change'); + select.dispatchEvent(event); + + expect(model.set).toHaveBeenCalledWith('max_columns', 10); + expect(model.save_changes).toHaveBeenCalled(); + }); + }); }); diff --git a/tests/system/large/bigquery/__init__.py b/tests/system/large/bigquery/__init__.py new file mode 100644 index 00000000000..58d482ea386 --- /dev/null +++ b/tests/system/large/bigquery/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/system/large/bigquery/test_obj.py b/tests/system/large/bigquery/test_obj.py new file mode 100644 index 00000000000..dcca7580b14 --- /dev/null +++ b/tests/system/large/bigquery/test_obj.py @@ -0,0 +1,41 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +import bigframes.bigquery as bbq + + +@pytest.fixture() +def objectrefs(bq_connection): + return bbq.obj.make_ref( + [ + "gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/tick-terminator-for-dogs.png" + ], + bq_connection, + ) + + +def test_obj_fetch_metadata(objectrefs): + metadata = bbq.obj.fetch_metadata(objectrefs) + + result = metadata.to_pandas() + assert len(result) == len(objectrefs) + + +def test_obj_get_access_url(objectrefs): + access = bbq.obj.get_access_url(objectrefs, "r") + + result = access.to_pandas() + assert len(result) == len(objectrefs) diff --git a/tests/system/load/test_llm.py b/tests/system/load/test_llm.py index 9630952e678..25cde92c133 100644 --- a/tests/system/load/test_llm.py +++ b/tests/system/load/test_llm.py @@ -100,13 +100,13 @@ def test_llm_gemini_w_ground_with_google_search(llm_remote_text_df): # (b/366290533): Claude models are of extremely low capacity. The tests should reside in small tests. Moving these here just to protect BQML's shared capacity(as load test only runs once per day.) and make sure we still have minimum coverage. @pytest.mark.parametrize( "model_name", - ("claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"), + ("claude-3-haiku", "claude-3-5-sonnet"), ) @pytest.mark.flaky(retries=3, delay=120) def test_claude3_text_generator_create_load( dataset_id, model_name, session, session_us_east5, bq_connection ): - if model_name in ("claude-3-5-sonnet", "claude-3-opus"): + if model_name in ("claude-3-5-sonnet",): session = session_us_east5 claude3_text_generator_model = llm.Claude3TextGenerator( model_name=model_name, connection_name=bq_connection, session=session @@ -125,13 +125,13 @@ def test_claude3_text_generator_create_load( @pytest.mark.parametrize( "model_name", - ("claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"), + ("claude-3-haiku", "claude-3-5-sonnet"), ) @pytest.mark.flaky(retries=3, delay=120) def test_claude3_text_generator_predict_default_params_success( llm_text_df, model_name, session, session_us_east5, bq_connection ): - if model_name in ("claude-3-5-sonnet", "claude-3-opus"): + if model_name in ("claude-3-5-sonnet",): session = session_us_east5 claude3_text_generator_model = llm.Claude3TextGenerator( model_name=model_name, connection_name=bq_connection, session=session @@ -144,13 +144,13 @@ def test_claude3_text_generator_predict_default_params_success( @pytest.mark.parametrize( "model_name", - ("claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"), + ("claude-3-haiku", "claude-3-5-sonnet"), ) @pytest.mark.flaky(retries=3, delay=120) def test_claude3_text_generator_predict_with_params_success( llm_text_df, model_name, session, session_us_east5, bq_connection ): - if model_name in ("claude-3-5-sonnet", "claude-3-opus"): + if model_name in ("claude-3-5-sonnet",): session = session_us_east5 claude3_text_generator_model = llm.Claude3TextGenerator( model_name=model_name, connection_name=bq_connection, session=session @@ -165,13 +165,13 @@ def test_claude3_text_generator_predict_with_params_success( @pytest.mark.parametrize( "model_name", - ("claude-3-haiku", "claude-3-5-sonnet", "claude-3-opus"), + ("claude-3-haiku", "claude-3-5-sonnet"), ) @pytest.mark.flaky(retries=3, delay=120) def test_claude3_text_generator_predict_multi_col_success( llm_text_df, model_name, session, session_us_east5, bq_connection ): - if model_name in ("claude-3-5-sonnet", "claude-3-opus"): + if model_name in ("claude-3-5-sonnet",): session = session_us_east5 llm_text_df["additional_col"] = 1 diff --git a/tests/system/small/core/logging/__init__.py b/tests/system/small/core/logging/__init__.py new file mode 100644 index 00000000000..58d482ea386 --- /dev/null +++ b/tests/system/small/core/logging/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/system/small/core/logging/test_data_types.py b/tests/system/small/core/logging/test_data_types.py new file mode 100644 index 00000000000..7e197a96727 --- /dev/null +++ b/tests/system/small/core/logging/test_data_types.py @@ -0,0 +1,113 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Sequence + +import pandas as pd +import pyarrow as pa + +from bigframes import dtypes +from bigframes.core.logging import data_types +import bigframes.pandas as bpd + + +def encode_types(inputs: Sequence[dtypes.Dtype]) -> str: + encoded_val = 0 + for t in inputs: + encoded_val = encoded_val | data_types._get_dtype_mask(t) + + return f"{encoded_val:x}" + + +def test_get_type_refs_no_op(scalars_df_index): + node = scalars_df_index._block._expr.node + expected_types: list[dtypes.Dtype] = [] + + assert data_types.encode_type_refs(node) == encode_types(expected_types) + + +def test_get_type_refs_projection(scalars_df_index): + node = ( + scalars_df_index["datetime_col"] - scalars_df_index["datetime_col"] + )._block._expr.node + expected_types = [dtypes.DATETIME_DTYPE, dtypes.TIMEDELTA_DTYPE] + + assert data_types.encode_type_refs(node) == encode_types(expected_types) + + +def test_get_type_refs_filter(scalars_df_index): + node = scalars_df_index[scalars_df_index["int64_col"] > 0]._block._expr.node + expected_types = [dtypes.INT_DTYPE, dtypes.BOOL_DTYPE] + + assert data_types.encode_type_refs(node) == encode_types(expected_types) + + +def test_get_type_refs_order_by(scalars_df_index): + node = scalars_df_index.sort_index()._block._expr.node + expected_types = [dtypes.INT_DTYPE] + + assert data_types.encode_type_refs(node) == encode_types(expected_types) + + +def test_get_type_refs_join(scalars_df_index): + node = ( + scalars_df_index[["int64_col"]].merge( + scalars_df_index[["float64_col"]], + left_on="int64_col", + right_on="float64_col", + ) + )._block._expr.node + expected_types = [dtypes.INT_DTYPE, dtypes.FLOAT_DTYPE] + + assert data_types.encode_type_refs(node) == encode_types(expected_types) + + +def test_get_type_refs_isin(scalars_df_index): + node = scalars_df_index["string_col"].isin(["a"])._block._expr.node + expected_types = [dtypes.STRING_DTYPE, dtypes.BOOL_DTYPE] + + assert data_types.encode_type_refs(node) == encode_types(expected_types) + + +def test_get_type_refs_agg(scalars_df_index): + node = scalars_df_index[["bool_col", "string_col"]].count()._block._expr.node + expected_types = [ + dtypes.INT_DTYPE, + dtypes.BOOL_DTYPE, + dtypes.STRING_DTYPE, + dtypes.FLOAT_DTYPE, + ] + + assert data_types.encode_type_refs(node) == encode_types(expected_types) + + +def test_get_type_refs_window(scalars_df_index): + node = ( + scalars_df_index[["string_col", "bool_col"]] + .groupby("string_col") + .rolling(window=3) + .count() + ._block._expr.node + ) + expected_types = [dtypes.STRING_DTYPE, dtypes.BOOL_DTYPE, dtypes.INT_DTYPE] + + assert data_types.encode_type_refs(node) == encode_types(expected_types) + + +def test_get_type_refs_explode(): + df = bpd.DataFrame({"A": ["a", "b"], "B": [[1, 2], [3, 4, 5]]}) + node = df.explode("B")._block._expr.node + expected_types = [pd.ArrowDtype(pa.list_(pa.int64()))] + + assert data_types.encode_type_refs(node) == encode_types(expected_types) diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index da87568c919..fad8f5b2b50 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -165,7 +165,7 @@ def execution_metadata(self) -> ExecutionMetadata: def schema(self) -> Any: return schema - def batches(self) -> ResultsIterator: + def batches(self, sample_rate=None) -> ResultsIterator: return ResultsIterator( arrow_batches_val, self.schema, diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index d2a157b1319..0f7b782b66d 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -4524,7 +4524,7 @@ def test_df_kurt(scalars_dfs): "n_default", ], ) -def test_sample(scalars_dfs, frac, n, random_state): +def test_df_to_pandas_sample(scalars_dfs, frac, n, random_state): scalars_df, _ = scalars_dfs df = scalars_df.sample(frac=frac, n=n, random_state=random_state) bf_result = df.to_pandas() @@ -4535,7 +4535,7 @@ def test_sample(scalars_dfs, frac, n, random_state): assert bf_result.shape[1] == scalars_df.shape[1] -def test_sample_determinism(penguins_df_default_index): +def test_df_to_pandas_sample_determinism(penguins_df_default_index): df = penguins_df_default_index.sample(n=100, random_state=12345).head(15) bf_result = df.to_pandas() bf_result2 = df.to_pandas() @@ -4543,7 +4543,7 @@ def test_sample_determinism(penguins_df_default_index): pandas.testing.assert_frame_equal(bf_result, bf_result2) -def test_sample_raises_value_error(scalars_dfs): +def test_df_to_pandas_sample_raises_value_error(scalars_dfs): scalars_df, _ = scalars_dfs with pytest.raises( ValueError, match="Only one of 'n' or 'frac' parameter can be specified." diff --git a/tests/unit/bigquery/test_ml.py b/tests/unit/bigquery/test_ml.py index 063ddafccae..96b97d68fe3 100644 --- a/tests/unit/bigquery/test_ml.py +++ b/tests/unit/bigquery/test_ml.py @@ -145,3 +145,21 @@ def test_global_explain_with_pandas_series_model(read_gbq_query_mock): generated_sql = read_gbq_query_mock.call_args[0][0] assert "ML.GLOBAL_EXPLAIN" in generated_sql assert f"MODEL `{MODEL_NAME}`" in generated_sql + + +@mock.patch("bigframes.pandas.read_gbq_query") +@mock.patch("bigframes.pandas.read_pandas") +def test_transform_with_pandas_dataframe(read_pandas_mock, read_gbq_query_mock): + df = pd.DataFrame({"col1": [1, 2, 3]}) + read_pandas_mock.return_value._to_sql_query.return_value = ( + "SELECT * FROM `pandas_df`", + [], + [], + ) + ml_ops.transform(MODEL_SERIES, input_=df) + read_pandas_mock.assert_called_once() + read_gbq_query_mock.assert_called_once() + generated_sql = read_gbq_query_mock.call_args[0][0] + assert "ML.TRANSFORM" in generated_sql + assert f"MODEL `{MODEL_NAME}`" in generated_sql + assert "(SELECT * FROM `pandas_df`)" in generated_sql diff --git a/tests/unit/bigquery/test_obj.py b/tests/unit/bigquery/test_obj.py new file mode 100644 index 00000000000..9eac234b8bc --- /dev/null +++ b/tests/unit/bigquery/test_obj.py @@ -0,0 +1,125 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +from unittest import mock + +import bigframes.bigquery.obj as obj +import bigframes.operations as ops +import bigframes.series + + +def create_mock_series(): + result = mock.create_autospec(bigframes.series.Series, instance=True) + result.copy.return_value = result + return result + + +def test_fetch_metadata_op_structure(): + op = ops.obj_fetch_metadata_op + assert op.name == "obj_fetch_metadata" + + +def test_get_access_url_op_structure(): + op = ops.ObjGetAccessUrl(mode="r") + assert op.name == "obj_get_access_url" + assert op.mode == "r" + assert op.duration is None + + +def test_get_access_url_with_duration_op_structure(): + op = ops.ObjGetAccessUrl(mode="rw", duration=3600000000) + assert op.name == "obj_get_access_url" + assert op.mode == "rw" + assert op.duration == 3600000000 + + +def test_make_ref_op_structure(): + op = ops.obj_make_ref_op + assert op.name == "obj_make_ref" + + +def test_make_ref_json_op_structure(): + op = ops.obj_make_ref_json_op + assert op.name == "obj_make_ref_json" + + +def test_fetch_metadata_calls_apply_unary_op(): + series = create_mock_series() + + obj.fetch_metadata(series) + + series._apply_unary_op.assert_called_once() + args, _ = series._apply_unary_op.call_args + assert args[0] == ops.obj_fetch_metadata_op + + +def test_get_access_url_calls_apply_unary_op_without_duration(): + series = create_mock_series() + + obj.get_access_url(series, mode="r") + + series._apply_unary_op.assert_called_once() + args, _ = series._apply_unary_op.call_args + assert isinstance(args[0], ops.ObjGetAccessUrl) + assert args[0].mode == "r" + assert args[0].duration is None + + +def test_get_access_url_calls_apply_unary_op_with_duration(): + series = create_mock_series() + duration = datetime.timedelta(hours=1) + + obj.get_access_url(series, mode="rw", duration=duration) + + series._apply_unary_op.assert_called_once() + args, _ = series._apply_unary_op.call_args + assert isinstance(args[0], ops.ObjGetAccessUrl) + assert args[0].mode == "rw" + # 1 hour = 3600 seconds = 3600 * 1000 * 1000 microseconds + assert args[0].duration == 3600000000 + + +def test_make_ref_calls_apply_binary_op_with_authorizer(): + uri = create_mock_series() + auth = create_mock_series() + + obj.make_ref(uri, authorizer=auth) + + uri._apply_binary_op.assert_called_once() + args, _ = uri._apply_binary_op.call_args + assert args[0] == auth + assert args[1] == ops.obj_make_ref_op + + +def test_make_ref_calls_apply_binary_op_with_authorizer_string(): + uri = create_mock_series() + auth = "us.bigframes-test-connection" + + obj.make_ref(uri, authorizer=auth) + + uri._apply_binary_op.assert_called_once() + args, _ = uri._apply_binary_op.call_args + assert args[0] == auth + assert args[1] == ops.obj_make_ref_op + + +def test_make_ref_calls_apply_unary_op_without_authorizer(): + json_val = create_mock_series() + + obj.make_ref(json_val) + + json_val._apply_unary_op.assert_called_once() + args, _ = json_val._apply_unary_op.call_args + assert args[0] == ops.obj_make_ref_json_op diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/out.sql index d31b21f56ba..0be2fea80b2 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/out.sql @@ -1,12 +1,15 @@ WITH `bfcte_0` AS ( SELECT - `bool_col` + `bool_col`, + `int64_col` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` ), `bfcte_1` AS ( SELECT - COALESCE(LOGICAL_AND(`bool_col`), TRUE) AS `bfcol_1` + COALESCE(LOGICAL_AND(`bool_col`), TRUE) AS `bfcol_2`, + COALESCE(LOGICAL_AND(`int64_col` <> 0), TRUE) AS `bfcol_3` FROM `bfcte_0` ) SELECT - `bfcol_1` AS `bool_col` + `bfcol_2` AS `bool_col`, + `bfcol_3` AS `int64_col` FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all_w_window/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_out.sql rename to tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all_w_window/out.sql diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any/out.sql index 03b0d5c151d..ae62e22e36d 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any/out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any/out.sql @@ -1,12 +1,15 @@ WITH `bfcte_0` AS ( SELECT - `bool_col` + `bool_col`, + `int64_col` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` ), `bfcte_1` AS ( SELECT - COALESCE(LOGICAL_OR(`bool_col`), FALSE) AS `bfcol_1` + COALESCE(LOGICAL_OR(`bool_col`), FALSE) AS `bfcol_2`, + COALESCE(LOGICAL_OR(`int64_col` <> 0), FALSE) AS `bfcol_3` FROM `bfcte_0` ) SELECT - `bfcol_1` AS `bool_col` + `bfcol_2` AS `bool_col`, + `bfcol_3` AS `int64_col` FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any/window_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_w_window/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any/window_out.sql rename to tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_any_w_window/out.sql diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_diff_w_date/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_diff_w_date/out.sql new file mode 100644 index 00000000000..4f1729d2e28 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_diff_w_date/out.sql @@ -0,0 +1,15 @@ +WITH `bfcte_0` AS ( + SELECT + `date_col` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CAST(FLOOR( + DATE_DIFF(`date_col`, LAG(`date_col`, 1) OVER (ORDER BY `date_col` ASC NULLS LAST), DAY) * 86400000000 + ) AS INT64) AS `bfcol_1` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `diff_date` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_product/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_product/out.sql index bec1527137e..94ca21988e9 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_product/out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_product/out.sql @@ -7,7 +7,7 @@ WITH `bfcte_0` AS ( CASE WHEN LOGICAL_OR(`int64_col` = 0) THEN 0 - ELSE EXP(SUM(CASE WHEN `int64_col` = 0 THEN 0 ELSE LN(ABS(`int64_col`)) END)) * IF(MOD(SUM(CASE WHEN SIGN(`int64_col`) < 0 THEN 1 ELSE 0 END), 2) = 1, -1, 1) + ELSE POWER(2, SUM(IF(`int64_col` = 0, 0, LOG(ABS(`int64_col`), 2)))) * POWER(-1, MOD(SUM(CASE WHEN SIGN(`int64_col`) = -1 THEN 1 ELSE 0 END), 2)) END AS `bfcol_1` FROM `bfcte_0` ) diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_product/window_partition_out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_product/window_partition_out.sql index 9c1650222a0..c5f12f70093 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_product/window_partition_out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_product/window_partition_out.sql @@ -9,15 +9,15 @@ WITH `bfcte_0` AS ( CASE WHEN LOGICAL_OR(`int64_col` = 0) OVER (PARTITION BY `string_col`) THEN 0 - ELSE EXP( - SUM(CASE WHEN `int64_col` = 0 THEN 0 ELSE LN(ABS(`int64_col`)) END) OVER (PARTITION BY `string_col`) - ) * IF( + ELSE POWER( + 2, + SUM(IF(`int64_col` = 0, 0, LOG(ABS(`int64_col`), 2))) OVER (PARTITION BY `string_col`) + ) * POWER( + -1, MOD( - SUM(CASE WHEN SIGN(`int64_col`) < 0 THEN 1 ELSE 0 END) OVER (PARTITION BY `string_col`), + SUM(CASE WHEN SIGN(`int64_col`) = -1 THEN 1 ELSE 0 END) OVER (PARTITION BY `string_col`), 2 - ) = 1, - -1, - 1 + ) ) END AS `bfcol_2` FROM `bfcte_0` diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_quantile/out.sql b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_quantile/out.sql index b79d8d381f0..e337356d965 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_quantile/out.sql +++ b/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_quantile/out.sql @@ -1,14 +1,17 @@ WITH `bfcte_0` AS ( SELECT + `bool_col`, `int64_col` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` ), `bfcte_1` AS ( SELECT - PERCENTILE_CONT(`int64_col`, 0.5) OVER () AS `bfcol_1`, - CAST(FLOOR(PERCENTILE_CONT(`int64_col`, 0.5) OVER ()) AS INT64) AS `bfcol_2` + PERCENTILE_CONT(`int64_col`, 0.5) OVER () AS `bfcol_4`, + PERCENTILE_CONT(CAST(`bool_col` AS INT64), 0.5) OVER () AS `bfcol_5`, + CAST(FLOOR(PERCENTILE_CONT(`int64_col`, 0.5) OVER ()) AS INT64) AS `bfcol_6` FROM `bfcte_0` ) SELECT - `bfcol_1` AS `quantile`, - `bfcol_2` AS `quantile_floor` + `bfcol_4` AS `int64`, + `bfcol_5` AS `bool`, + `bfcol_6` AS `int64_w_floor` FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py b/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py index c15d70478a3..d9bfb1f5f3d 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py +++ b/tests/unit/core/compile/sqlglot/aggregations/test_unary_compiler.py @@ -63,41 +63,47 @@ def _apply_unary_window_op( def test_all(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["bool_col", "int64_col"]] + ops_map = { + "bool_col": agg_ops.AllOp().as_expr("bool_col"), + "int64_col": agg_ops.AllOp().as_expr("int64_col"), + } + sql = _apply_unary_agg_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + + snapshot.assert_match(sql, "out.sql") + + +def test_all_w_window(scalar_types_df: bpd.DataFrame, snapshot): col_name = "bool_col" bf_df = scalar_types_df[[col_name]] agg_expr = agg_ops.AllOp().as_expr(col_name) - sql = _apply_unary_agg_ops(bf_df, [agg_expr], [col_name]) - - snapshot.assert_match(sql, "out.sql") # Window tests window = window_spec.WindowSpec(ordering=(ordering.ascending_over(col_name),)) sql_window = _apply_unary_window_op(bf_df, agg_expr, window, "agg_bool") - snapshot.assert_match(sql_window, "window_out.sql") - - bf_df_str = scalar_types_df[[col_name, "string_col"]] - window_partition = window_spec.WindowSpec( - grouping_keys=(expression.deref("string_col"),), - ordering=(ordering.descending_over(col_name),), - ) - sql_window_partition = _apply_unary_window_op( - bf_df_str, agg_expr, window_partition, "agg_bool" - ) - snapshot.assert_match(sql_window_partition, "window_partition_out.sql") + snapshot.assert_match(sql_window, "out.sql") def test_any(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["bool_col", "int64_col"]] + ops_map = { + "bool_col": agg_ops.AnyOp().as_expr("bool_col"), + "int64_col": agg_ops.AnyOp().as_expr("int64_col"), + } + sql = _apply_unary_agg_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + + snapshot.assert_match(sql, "out.sql") + + +def test_any_w_window(scalar_types_df: bpd.DataFrame, snapshot): col_name = "bool_col" bf_df = scalar_types_df[[col_name]] agg_expr = agg_ops.AnyOp().as_expr(col_name) - sql = _apply_unary_agg_ops(bf_df, [agg_expr], [col_name]) - - snapshot.assert_match(sql, "out.sql") # Window tests window = window_spec.WindowSpec(ordering=(ordering.ascending_over(col_name),)) sql_window = _apply_unary_window_op(bf_df, agg_expr, window, "agg_bool") - snapshot.assert_match(sql_window, "window_out.sql") + snapshot.assert_match(sql_window, "out.sql") def test_approx_quartiles(scalar_types_df: bpd.DataFrame, snapshot): @@ -247,6 +253,17 @@ def test_diff_w_datetime(scalar_types_df: bpd.DataFrame, snapshot): snapshot.assert_match(sql, "out.sql") +def test_diff_w_date(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "date_col" + bf_df_date = scalar_types_df[[col_name]] + window = window_spec.WindowSpec(ordering=(ordering.ascending_over(col_name),)) + op = agg_exprs.UnaryAggregation( + agg_ops.DiffOp(periods=1), expression.deref(col_name) + ) + sql = _apply_unary_window_op(bf_df_date, op, window, "diff_date") + snapshot.assert_match(sql, "out.sql") + + def test_diff_w_timestamp(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df_timestamp = scalar_types_df[[col_name]] @@ -474,12 +491,12 @@ def test_qcut(scalar_types_df: bpd.DataFrame, snapshot): def test_quantile(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "int64_col" - bf_df = scalar_types_df[[col_name]] + bf_df = scalar_types_df[["int64_col", "bool_col"]] agg_ops_map = { - "quantile": agg_ops.QuantileOp(q=0.5).as_expr(col_name), - "quantile_floor": agg_ops.QuantileOp(q=0.5, should_floor_result=True).as_expr( - col_name + "int64": agg_ops.QuantileOp(q=0.5).as_expr("int64_col"), + "bool": agg_ops.QuantileOp(q=0.5).as_expr("bool_col"), + "int64_w_floor": agg_ops.QuantileOp(q=0.5, should_floor_result=True).as_expr( + "int64_col" ), } sql = _apply_unary_agg_ops( diff --git a/tests/unit/core/compile/sqlglot/aggregations/test_windows.py b/tests/unit/core/compile/sqlglot/aggregations/test_windows.py index e6343a63d78..d1204c60104 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/test_windows.py +++ b/tests/unit/core/compile/sqlglot/aggregations/test_windows.py @@ -127,7 +127,7 @@ def test_apply_window_if_present_range_bounded(self): ) self.assertEqual( result.sql(dialect="bigquery"), - "value OVER (ORDER BY `col1` ASC NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)", + "value OVER (ORDER BY `col1` ASC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)", ) def test_apply_window_if_present_range_bounded_timedelta(self): @@ -142,7 +142,7 @@ def test_apply_window_if_present_range_bounded_timedelta(self): ) self.assertEqual( result.sql(dialect="bigquery"), - "value OVER (ORDER BY `col1` ASC NULLS LAST RANGE BETWEEN 86400000000 PRECEDING AND 43200000000 FOLLOWING)", + "value OVER (ORDER BY `col1` ASC RANGE BETWEEN 86400000000 PRECEDING AND 43200000000 FOLLOWING)", ) def test_apply_window_if_present_all_params(self): diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_and_op/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_and_op/out.sql index 634a936a0e9..7e46e10708d 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_and_op/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_and_op/out.sql @@ -21,11 +21,22 @@ WITH `bfcte_0` AS ( `bfcol_9` AS `bfcol_17`, `bfcol_7` AND `bfcol_7` AS `bfcol_18` FROM `bfcte_1` +), `bfcte_3` AS ( + SELECT + *, + `bfcol_14` AS `bfcol_24`, + `bfcol_15` AS `bfcol_25`, + `bfcol_16` AS `bfcol_26`, + `bfcol_17` AS `bfcol_27`, + `bfcol_18` AS `bfcol_28`, + IF(`bfcol_15` = FALSE, `bfcol_15`, NULL) AS `bfcol_29` + FROM `bfcte_2` ) SELECT - `bfcol_14` AS `rowindex`, - `bfcol_15` AS `bool_col`, - `bfcol_16` AS `int64_col`, - `bfcol_17` AS `int_and_int`, - `bfcol_18` AS `bool_and_bool` -FROM `bfcte_2` \ No newline at end of file + `bfcol_24` AS `rowindex`, + `bfcol_25` AS `bool_col`, + `bfcol_26` AS `int64_col`, + `bfcol_27` AS `int_and_int`, + `bfcol_28` AS `bool_and_bool`, + `bfcol_29` AS `bool_and_null` +FROM `bfcte_3` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_or_op/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_or_op/out.sql index 0069b07d8f4..c8e9cf65a91 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_or_op/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_or_op/out.sql @@ -21,11 +21,22 @@ WITH `bfcte_0` AS ( `bfcol_9` AS `bfcol_17`, `bfcol_7` OR `bfcol_7` AS `bfcol_18` FROM `bfcte_1` +), `bfcte_3` AS ( + SELECT + *, + `bfcol_14` AS `bfcol_24`, + `bfcol_15` AS `bfcol_25`, + `bfcol_16` AS `bfcol_26`, + `bfcol_17` AS `bfcol_27`, + `bfcol_18` AS `bfcol_28`, + IF(`bfcol_15` = TRUE, `bfcol_15`, NULL) AS `bfcol_29` + FROM `bfcte_2` ) SELECT - `bfcol_14` AS `rowindex`, - `bfcol_15` AS `bool_col`, - `bfcol_16` AS `int64_col`, - `bfcol_17` AS `int_and_int`, - `bfcol_18` AS `bool_and_bool` -FROM `bfcte_2` \ No newline at end of file + `bfcol_24` AS `rowindex`, + `bfcol_25` AS `bool_col`, + `bfcol_26` AS `int64_col`, + `bfcol_27` AS `int_and_int`, + `bfcol_28` AS `bool_and_bool`, + `bfcol_29` AS `bool_and_null` +FROM `bfcte_3` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_xor_op/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_xor_op/out.sql index e4c87ed7208..d6a081cbbde 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_xor_op/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_bool_ops/test_xor_op/out.sql @@ -19,13 +19,33 @@ WITH `bfcte_0` AS ( `bfcol_7` AS `bfcol_15`, `bfcol_8` AS `bfcol_16`, `bfcol_9` AS `bfcol_17`, - `bfcol_7` AND NOT `bfcol_7` OR NOT `bfcol_7` AND `bfcol_7` AS `bfcol_18` + ( + `bfcol_7` AND NOT `bfcol_7` + ) OR ( + NOT `bfcol_7` AND `bfcol_7` + ) AS `bfcol_18` FROM `bfcte_1` +), `bfcte_3` AS ( + SELECT + *, + `bfcol_14` AS `bfcol_24`, + `bfcol_15` AS `bfcol_25`, + `bfcol_16` AS `bfcol_26`, + `bfcol_17` AS `bfcol_27`, + `bfcol_18` AS `bfcol_28`, + ( + `bfcol_15` AND NOT CAST(NULL AS BOOLEAN) + ) + OR ( + NOT `bfcol_15` AND CAST(NULL AS BOOLEAN) + ) AS `bfcol_29` + FROM `bfcte_2` ) SELECT - `bfcol_14` AS `rowindex`, - `bfcol_15` AS `bool_col`, - `bfcol_16` AS `int64_col`, - `bfcol_17` AS `int_and_int`, - `bfcol_18` AS `bool_and_bool` -FROM `bfcte_2` \ No newline at end of file + `bfcol_24` AS `rowindex`, + `bfcol_25` AS `bool_col`, + `bfcol_26` AS `int64_col`, + `bfcol_27` AS `int_and_int`, + `bfcol_28` AS `bool_and_bool`, + `bfcol_29` AS `bool_and_null` +FROM `bfcte_3` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_eq_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_eq_numeric/out.sql index 9c7c19e61c9..a21e0089416 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_eq_numeric/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_eq_numeric/out.sql @@ -29,7 +29,7 @@ WITH `bfcte_0` AS ( `bfcol_16` AS `bfcol_26`, `bfcol_17` AS `bfcol_27`, `bfcol_18` AS `bfcol_28`, - `bfcol_15` = CAST(`bfcol_16` AS INT64) AS `bfcol_29` + `bfcol_15` IS NULL AS `bfcol_29` FROM `bfcte_2` ), `bfcte_4` AS ( SELECT @@ -40,15 +40,28 @@ WITH `bfcte_0` AS ( `bfcol_27` AS `bfcol_39`, `bfcol_28` AS `bfcol_40`, `bfcol_29` AS `bfcol_41`, - CAST(`bfcol_26` AS INT64) = `bfcol_25` AS `bfcol_42` + `bfcol_25` = CAST(`bfcol_26` AS INT64) AS `bfcol_42` FROM `bfcte_3` +), `bfcte_5` AS ( + SELECT + *, + `bfcol_36` AS `bfcol_50`, + `bfcol_37` AS `bfcol_51`, + `bfcol_38` AS `bfcol_52`, + `bfcol_39` AS `bfcol_53`, + `bfcol_40` AS `bfcol_54`, + `bfcol_41` AS `bfcol_55`, + `bfcol_42` AS `bfcol_56`, + CAST(`bfcol_38` AS INT64) = `bfcol_37` AS `bfcol_57` + FROM `bfcte_4` ) SELECT - `bfcol_36` AS `rowindex`, - `bfcol_37` AS `int64_col`, - `bfcol_38` AS `bool_col`, - `bfcol_39` AS `int_ne_int`, - `bfcol_40` AS `int_ne_1`, - `bfcol_41` AS `int_ne_bool`, - `bfcol_42` AS `bool_ne_int` -FROM `bfcte_4` \ No newline at end of file + `bfcol_50` AS `rowindex`, + `bfcol_51` AS `int64_col`, + `bfcol_52` AS `bool_col`, + `bfcol_53` AS `int_eq_int`, + `bfcol_54` AS `int_eq_1`, + `bfcol_55` AS `int_eq_null`, + `bfcol_56` AS `int_eq_bool`, + `bfcol_57` AS `bool_eq_int` +FROM `bfcte_5` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_is_in/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_is_in/out.sql index 197ed279faf..ec85f060dac 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_is_in/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_is_in/out.sql @@ -1,32 +1,35 @@ WITH `bfcte_0` AS ( SELECT + `bool_col`, `float64_col`, `int64_col` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` ), `bfcte_1` AS ( SELECT *, - COALESCE(`int64_col` IN (1, 2, 3), FALSE) AS `bfcol_2`, - ( - `int64_col` IS NULL - ) OR `int64_col` IN (123456) AS `bfcol_3`, - COALESCE(`int64_col` IN (1.0, 2.0, 3.0), FALSE) AS `bfcol_4`, - FALSE AS `bfcol_5`, - COALESCE(`int64_col` IN (2.5, 3), FALSE) AS `bfcol_6`, + COALESCE(`bool_col` IN (TRUE, FALSE), FALSE) AS `bfcol_3`, + COALESCE(`int64_col` IN (1, 2, 3), FALSE) AS `bfcol_4`, + `int64_col` IS NULL AS `bfcol_5`, + COALESCE(`int64_col` IN (1.0, 2.0, 3.0), FALSE) AS `bfcol_6`, FALSE AS `bfcol_7`, - COALESCE(`int64_col` IN (123456), FALSE) AS `bfcol_8`, + COALESCE(`int64_col` IN (2.5, 3), FALSE) AS `bfcol_8`, + FALSE AS `bfcol_9`, + FALSE AS `bfcol_10`, + COALESCE(`int64_col` IN (123456), FALSE) AS `bfcol_11`, ( `float64_col` IS NULL - ) OR `float64_col` IN (1, 2, 3) AS `bfcol_9` + ) OR `float64_col` IN (1, 2, 3) AS `bfcol_12` FROM `bfcte_0` ) SELECT - `bfcol_2` AS `ints`, - `bfcol_3` AS `ints_w_null`, - `bfcol_4` AS `floats`, - `bfcol_5` AS `strings`, - `bfcol_6` AS `mixed`, - `bfcol_7` AS `empty`, - `bfcol_8` AS `ints_wo_match_nulls`, - `bfcol_9` AS `float_in_ints` + `bfcol_3` AS `bools`, + `bfcol_4` AS `ints`, + `bfcol_5` AS `ints_w_null`, + `bfcol_6` AS `floats`, + `bfcol_7` AS `strings`, + `bfcol_8` AS `mixed`, + `bfcol_9` AS `empty`, + `bfcol_10` AS `empty_wo_match_nulls`, + `bfcol_11` AS `ints_wo_match_nulls`, + `bfcol_12` AS `float_in_ints` FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_ne_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_ne_numeric/out.sql index 417d24aa725..1a1ff6e44d2 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_ne_numeric/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_ne_numeric/out.sql @@ -29,7 +29,9 @@ WITH `bfcte_0` AS ( `bfcol_16` AS `bfcol_26`, `bfcol_17` AS `bfcol_27`, `bfcol_18` AS `bfcol_28`, - `bfcol_15` <> CAST(`bfcol_16` AS INT64) AS `bfcol_29` + ( + `bfcol_15` + ) IS NOT NULL AS `bfcol_29` FROM `bfcte_2` ), `bfcte_4` AS ( SELECT @@ -40,15 +42,28 @@ WITH `bfcte_0` AS ( `bfcol_27` AS `bfcol_39`, `bfcol_28` AS `bfcol_40`, `bfcol_29` AS `bfcol_41`, - CAST(`bfcol_26` AS INT64) <> `bfcol_25` AS `bfcol_42` + `bfcol_25` <> CAST(`bfcol_26` AS INT64) AS `bfcol_42` FROM `bfcte_3` +), `bfcte_5` AS ( + SELECT + *, + `bfcol_36` AS `bfcol_50`, + `bfcol_37` AS `bfcol_51`, + `bfcol_38` AS `bfcol_52`, + `bfcol_39` AS `bfcol_53`, + `bfcol_40` AS `bfcol_54`, + `bfcol_41` AS `bfcol_55`, + `bfcol_42` AS `bfcol_56`, + CAST(`bfcol_38` AS INT64) <> `bfcol_37` AS `bfcol_57` + FROM `bfcte_4` ) SELECT - `bfcol_36` AS `rowindex`, - `bfcol_37` AS `int64_col`, - `bfcol_38` AS `bool_col`, - `bfcol_39` AS `int_ne_int`, - `bfcol_40` AS `int_ne_1`, - `bfcol_41` AS `int_ne_bool`, - `bfcol_42` AS `bool_ne_int` -FROM `bfcte_4` \ No newline at end of file + `bfcol_50` AS `rowindex`, + `bfcol_51` AS `int64_col`, + `bfcol_52` AS `bool_col`, + `bfcol_53` AS `int_ne_int`, + `bfcol_54` AS `int_ne_1`, + `bfcol_55` AS `int_ne_null`, + `bfcol_56` AS `int_ne_bool`, + `bfcol_57` AS `bool_ne_int` +FROM `bfcte_5` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime/out.sql new file mode 100644 index 00000000000..2a1bd0e2e21 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime/out.sql @@ -0,0 +1,58 @@ +WITH `bfcte_0` AS ( + SELECT + `rowindex`, + `timestamp_col` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CAST(TIMESTAMP_MICROS( + CAST(CAST(`rowindex` AS BIGNUMERIC) * 86400000000 + CAST(UNIX_MICROS(CAST(`timestamp_col` AS TIMESTAMP)) AS BIGNUMERIC) AS INT64) + ) AS TIMESTAMP) AS `bfcol_2`, + CAST(DATETIME( + CASE + WHEN ( + MOD( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 4 + EXTRACT(QUARTER FROM `timestamp_col`) - 1, + 4 + ) + 1 + ) * 3 = 12 + THEN CAST(FLOOR( + IEEE_DIVIDE( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 4 + EXTRACT(QUARTER FROM `timestamp_col`) - 1, + 4 + ) + ) AS INT64) + 1 + ELSE CAST(FLOOR( + IEEE_DIVIDE( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 4 + EXTRACT(QUARTER FROM `timestamp_col`) - 1, + 4 + ) + ) AS INT64) + END, + CASE + WHEN ( + MOD( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 4 + EXTRACT(QUARTER FROM `timestamp_col`) - 1, + 4 + ) + 1 + ) * 3 = 12 + THEN 1 + ELSE ( + MOD( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 4 + EXTRACT(QUARTER FROM `timestamp_col`) - 1, + 4 + ) + 1 + ) * 3 + 1 + END, + 1, + 0, + 0, + 0 + ) - INTERVAL 1 DAY AS TIMESTAMP) AS `bfcol_3` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `fixed_freq`, + `bfcol_3` AS `non_fixed_freq` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_fixed/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_fixed/out.sql new file mode 100644 index 00000000000..8a759e85f98 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_fixed/out.sql @@ -0,0 +1,16 @@ +WITH `bfcte_0` AS ( + SELECT + `rowindex`, + `timestamp_col` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CAST(TIMESTAMP_MICROS( + CAST(CAST(`rowindex` AS BIGNUMERIC) * 86400000000 + CAST(UNIX_MICROS(CAST(`timestamp_col` AS TIMESTAMP)) AS BIGNUMERIC) AS INT64) + ) AS TIMESTAMP) AS `bfcol_2` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `fixed_freq` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_month/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_month/out.sql new file mode 100644 index 00000000000..a9e64fead63 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_month/out.sql @@ -0,0 +1,50 @@ +WITH `bfcte_0` AS ( + SELECT + `rowindex`, + `timestamp_col` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CAST(TIMESTAMP( + DATETIME( + CASE + WHEN MOD( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 12 + EXTRACT(MONTH FROM `timestamp_col`) - 1, + 12 + ) + 1 = 12 + THEN CAST(FLOOR( + IEEE_DIVIDE( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 12 + EXTRACT(MONTH FROM `timestamp_col`) - 1, + 12 + ) + ) AS INT64) + 1 + ELSE CAST(FLOOR( + IEEE_DIVIDE( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 12 + EXTRACT(MONTH FROM `timestamp_col`) - 1, + 12 + ) + ) AS INT64) + END, + CASE + WHEN MOD( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 12 + EXTRACT(MONTH FROM `timestamp_col`) - 1, + 12 + ) + 1 = 12 + THEN 1 + ELSE MOD( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 12 + EXTRACT(MONTH FROM `timestamp_col`) - 1, + 12 + ) + 1 + 1 + END, + 1, + 0, + 0, + 0 + ) + ) - INTERVAL 1 DAY AS TIMESTAMP) AS `bfcol_2` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `non_fixed_freq_monthly` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_quarter/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_quarter/out.sql new file mode 100644 index 00000000000..58064855a9e --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_quarter/out.sql @@ -0,0 +1,54 @@ +WITH `bfcte_0` AS ( + SELECT + `rowindex`, + `timestamp_col` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CAST(DATETIME( + CASE + WHEN ( + MOD( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 4 + EXTRACT(QUARTER FROM `timestamp_col`) - 1, + 4 + ) + 1 + ) * 3 = 12 + THEN CAST(FLOOR( + IEEE_DIVIDE( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 4 + EXTRACT(QUARTER FROM `timestamp_col`) - 1, + 4 + ) + ) AS INT64) + 1 + ELSE CAST(FLOOR( + IEEE_DIVIDE( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 4 + EXTRACT(QUARTER FROM `timestamp_col`) - 1, + 4 + ) + ) AS INT64) + END, + CASE + WHEN ( + MOD( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 4 + EXTRACT(QUARTER FROM `timestamp_col`) - 1, + 4 + ) + 1 + ) * 3 = 12 + THEN 1 + ELSE ( + MOD( + `rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) * 4 + EXTRACT(QUARTER FROM `timestamp_col`) - 1, + 4 + ) + 1 + ) * 3 + 1 + END, + 1, + 0, + 0, + 0 + ) - INTERVAL 1 DAY AS TIMESTAMP) AS `bfcol_2` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `non_fixed_freq` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_week/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_week/out.sql new file mode 100644 index 00000000000..142f8561f48 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_week/out.sql @@ -0,0 +1,18 @@ +WITH `bfcte_0` AS ( + SELECT + `rowindex`, + `timestamp_col` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CAST(TIMESTAMP_MICROS( + CAST(CAST(`rowindex` AS BIGNUMERIC) * 604800000000 + CAST(UNIX_MICROS( + TIMESTAMP_TRUNC(CAST(`timestamp_col` AS TIMESTAMP), WEEK(MONDAY)) + INTERVAL 6 DAY + ) AS BIGNUMERIC) AS INT64) + ) AS TIMESTAMP) AS `bfcol_2` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `non_fixed_freq_weekly` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_year/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_year/out.sql new file mode 100644 index 00000000000..ab77a9d1906 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_integer_label_to_datetime_year/out.sql @@ -0,0 +1,14 @@ +WITH `bfcte_0` AS ( + SELECT + `rowindex`, + `timestamp_col` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + CAST(TIMESTAMP(DATETIME(`rowindex` * 1 + EXTRACT(YEAR FROM `timestamp_col`) + 1, 1, 1, 0, 0, 0)) - INTERVAL 1 DAY AS TIMESTAMP) AS `bfcol_2` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `non_fixed_freq_yearly` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_binary_remote_function_op/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_binary_remote_function_op/out.sql new file mode 100644 index 00000000000..7272a3a5be1 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_binary_remote_function_op/out.sql @@ -0,0 +1,14 @@ +WITH `bfcte_0` AS ( + SELECT + `float64_col`, + `int64_col` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + `my_project`.`my_dataset`.`my_routine`(`int64_col`, `float64_col`) AS `bfcol_2` + FROM `bfcte_0` +) +SELECT + `bfcol_2` AS `int64_col` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_map/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_map/out.sql index 22628c6a4b4..49eada22301 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_map/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_map/out.sql @@ -5,7 +5,13 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE `string_col` WHEN 'value1' THEN 'mapped1' ELSE `string_col` END AS `bfcol_1` + CASE + WHEN `string_col` = 'value1' + THEN 'mapped1' + WHEN `string_col` IS NULL + THEN 'UNKNOWN' + ELSE `string_col` + END AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_partition_out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_nary_remote_function_op/out.sql similarity index 51% rename from tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_partition_out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_nary_remote_function_op/out.sql index 23357817c1d..a6641b13db6 100644 --- a/tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_partition_out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_nary_remote_function_op/out.sql @@ -1,14 +1,15 @@ WITH `bfcte_0` AS ( SELECT - `bool_col`, + `float64_col`, + `int64_col`, `string_col` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` ), `bfcte_1` AS ( SELECT *, - COALESCE(LOGICAL_AND(`bool_col`) OVER (PARTITION BY `string_col`), TRUE) AS `bfcol_2` + `my_project`.`my_dataset`.`my_routine`(`int64_col`, `float64_col`, `string_col`) AS `bfcol_3` FROM `bfcte_0` ) SELECT - `bfcol_2` AS `agg_bool` + `bfcol_3` AS `int64_col` FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_remote_function_op/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_remote_function_op/out.sql new file mode 100644 index 00000000000..dee0d35355b --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_remote_function_op/out.sql @@ -0,0 +1,19 @@ +WITH `bfcte_0` AS ( + SELECT + `int64_col` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_1` AS ( + SELECT + *, + `my_project`.`my_dataset`.`my_routine`(`int64_col`) AS `bfcol_1`, + IF( + `int64_col` IS NULL, + `int64_col`, + `my_project`.`my_dataset`.`my_routine`(`int64_col`) + ) AS `bfcol_2` + FROM `bfcte_0` +) +SELECT + `bfcol_1` AS `apply_on_null_true`, + `bfcol_2` AS `apply_on_null_false` +FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_x/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_x/out.sql index 09211270d18..826eb9f209d 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_x/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_x/out.sql @@ -5,7 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - SAFE.ST_X(`geography_col`) AS `bfcol_1` + ST_X(`geography_col`) AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_y/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_y/out.sql index 625613ae2a2..dd411820b28 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_y/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_y/out.sql @@ -5,7 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - SAFE.ST_Y(`geography_col`) AS `bfcol_1` + ST_Y(`geography_col`) AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_arctanh/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_arctanh/out.sql index 197bf593067..dc6de62e7bc 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_arctanh/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_arctanh/out.sql @@ -6,9 +6,11 @@ WITH `bfcte_0` AS ( SELECT *, CASE + WHEN ABS(`float64_col`) < 1 + THEN ATANH(`float64_col`) WHEN ABS(`float64_col`) > 1 THEN CAST('NaN' AS FLOAT64) - ELSE ATANH(`float64_col`) + ELSE CAST('Infinity' AS FLOAT64) * `float64_col` END AS `bfcol_1` FROM `bfcte_0` ) diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_expm1/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_expm1/out.sql index 076ad584c21..13038bf8e85 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_expm1/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_expm1/out.sql @@ -5,11 +5,7 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE - WHEN `float64_col` > 709.78 - THEN CAST('Infinity' AS FLOAT64) - ELSE EXP(`float64_col`) - END - 1 AS `bfcol_1` + IF(`float64_col` > 709.78, CAST('Infinity' AS FLOAT64), EXP(`float64_col`) - 1) AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_ln/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_ln/out.sql index 776cc33e0f0..bd4cfa7c9a3 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_ln/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_ln/out.sql @@ -5,7 +5,15 @@ WITH `bfcte_0` AS ( ), `bfcte_1` AS ( SELECT *, - CASE WHEN `float64_col` <= 0 THEN CAST('NaN' AS FLOAT64) ELSE LN(`float64_col`) END AS `bfcol_1` + CASE + WHEN `float64_col` IS NULL + THEN NULL + WHEN `float64_col` > 0 + THEN LN(`float64_col`) + WHEN `float64_col` < 0 + THEN CAST('NaN' AS FLOAT64) + ELSE CAST('-Infinity' AS FLOAT64) + END AS `bfcol_1` FROM `bfcte_0` ) SELECT diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_log10/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_log10/out.sql index 11a318c22d5..c5bbff0e624 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_log10/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_log10/out.sql @@ -6,9 +6,13 @@ WITH `bfcte_0` AS ( SELECT *, CASE - WHEN `float64_col` <= 0 + WHEN `float64_col` IS NULL + THEN NULL + WHEN `float64_col` > 0 + THEN LOG(`float64_col`, 10) + WHEN `float64_col` < 0 THEN CAST('NaN' AS FLOAT64) - ELSE LOG(10, `float64_col`) + ELSE CAST('-Infinity' AS FLOAT64) END AS `bfcol_1` FROM `bfcte_0` ) diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_log1p/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_log1p/out.sql index 4297fff2270..22e67e24eed 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_log1p/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_log1p/out.sql @@ -6,9 +6,13 @@ WITH `bfcte_0` AS ( SELECT *, CASE - WHEN `float64_col` <= -1 + WHEN `float64_col` IS NULL + THEN NULL + WHEN `float64_col` > -1 + THEN LN(1 + `float64_col`) + WHEN `float64_col` < -1 THEN CAST('NaN' AS FLOAT64) - ELSE LN(1 + `float64_col`) + ELSE CAST('-Infinity' AS FLOAT64) END AS `bfcol_1` FROM `bfcte_0` ) diff --git a/tests/unit/core/compile/sqlglot/expressions/test_bool_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_bool_ops.py index 08b60d6ddf8..601fd86e4e9 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_bool_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_bool_ops.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pandas as pd import pytest import bigframes.pandas as bpd @@ -24,6 +25,7 @@ def test_and_op(scalar_types_df: bpd.DataFrame, snapshot): bf_df["int_and_int"] = bf_df["int64_col"] & bf_df["int64_col"] bf_df["bool_and_bool"] = bf_df["bool_col"] & bf_df["bool_col"] + bf_df["bool_and_null"] = bf_df["bool_col"] & pd.NA # type: ignore snapshot.assert_match(bf_df.sql, "out.sql") @@ -32,6 +34,7 @@ def test_or_op(scalar_types_df: bpd.DataFrame, snapshot): bf_df["int_and_int"] = bf_df["int64_col"] | bf_df["int64_col"] bf_df["bool_and_bool"] = bf_df["bool_col"] | bf_df["bool_col"] + bf_df["bool_and_null"] = bf_df["bool_col"] | pd.NA # type: ignore snapshot.assert_match(bf_df.sql, "out.sql") @@ -40,4 +43,5 @@ def test_xor_op(scalar_types_df: bpd.DataFrame, snapshot): bf_df["int_and_int"] = bf_df["int64_col"] ^ bf_df["int64_col"] bf_df["bool_and_bool"] = bf_df["bool_col"] ^ bf_df["bool_col"] + bf_df["bool_and_null"] = bf_df["bool_col"] ^ pd.NA # type: ignore snapshot.assert_match(bf_df.sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py index 20dd6c5ca64..3c13bc798bc 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pandas as pd import pytest from bigframes import operations as ops @@ -22,18 +23,23 @@ def test_is_in(scalar_types_df: bpd.DataFrame, snapshot): + bool_col = "bool_col" int_col = "int64_col" float_col = "float64_col" - bf_df = scalar_types_df[[int_col, float_col]] + bf_df = scalar_types_df[[bool_col, int_col, float_col]] ops_map = { + "bools": ops.IsInOp(values=(True, False)).as_expr(bool_col), "ints": ops.IsInOp(values=(1, 2, 3)).as_expr(int_col), - "ints_w_null": ops.IsInOp(values=(None, 123456)).as_expr(int_col), + "ints_w_null": ops.IsInOp(values=(None, pd.NA)).as_expr(int_col), "floats": ops.IsInOp(values=(1.0, 2.0, 3.0), match_nulls=False).as_expr( int_col ), "strings": ops.IsInOp(values=("1.0", "2.0")).as_expr(int_col), "mixed": ops.IsInOp(values=("1.0", 2.5, 3)).as_expr(int_col), "empty": ops.IsInOp(values=()).as_expr(int_col), + "empty_wo_match_nulls": ops.IsInOp(values=(), match_nulls=False).as_expr( + int_col + ), "ints_wo_match_nulls": ops.IsInOp( values=(None, 123456), match_nulls=False ).as_expr(int_col), @@ -53,11 +59,12 @@ def test_eq_null_match(scalar_types_df: bpd.DataFrame, snapshot): def test_eq_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[["int64_col", "bool_col"]] - bf_df["int_ne_int"] = bf_df["int64_col"] == bf_df["int64_col"] - bf_df["int_ne_1"] = bf_df["int64_col"] == 1 + bf_df["int_eq_int"] = bf_df["int64_col"] == bf_df["int64_col"] + bf_df["int_eq_1"] = bf_df["int64_col"] == 1 + bf_df["int_eq_null"] = bf_df["int64_col"] == pd.NA - bf_df["int_ne_bool"] = bf_df["int64_col"] == bf_df["bool_col"] - bf_df["bool_ne_int"] = bf_df["bool_col"] == bf_df["int64_col"] + bf_df["int_eq_bool"] = bf_df["int64_col"] == bf_df["bool_col"] + bf_df["bool_eq_int"] = bf_df["bool_col"] == bf_df["int64_col"] snapshot.assert_match(bf_df.sql, "out.sql") @@ -129,6 +136,7 @@ def test_ne_numeric(scalar_types_df: bpd.DataFrame, snapshot): bf_df["int_ne_int"] = bf_df["int64_col"] != bf_df["int64_col"] bf_df["int_ne_1"] = bf_df["int64_col"] != 1 + bf_df["int_ne_null"] = bf_df["int64_col"] != pd.NA bf_df["int_ne_bool"] = bf_df["int64_col"] != bf_df["bool_col"] bf_df["bool_ne_int"] = bf_df["bool_col"] != bf_df["int64_col"] diff --git a/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py index c4acb37e519..95156748e96 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py @@ -293,3 +293,74 @@ def test_sub_timedelta(scalar_types_df: bpd.DataFrame, snapshot): bf_df["timedelta_sub_timedelta"] = bf_df["duration_col"] - bf_df["duration_col"] snapshot.assert_match(bf_df.sql, "out.sql") + + +def test_integer_label_to_datetime_fixed(scalar_types_df: bpd.DataFrame, snapshot): + col_names = ["rowindex", "timestamp_col"] + bf_df = scalar_types_df[col_names] + ops_map = { + "fixed_freq": ops.IntegerLabelToDatetimeOp( + freq=pd.tseries.offsets.Day(), origin="start", label="left" # type: ignore + ).as_expr("rowindex", "timestamp_col"), + } + + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) + snapshot.assert_match(sql, "out.sql") + + +def test_integer_label_to_datetime_week(scalar_types_df: bpd.DataFrame, snapshot): + col_names = ["rowindex", "timestamp_col"] + bf_df = scalar_types_df[col_names] + ops_map = { + "non_fixed_freq_weekly": ops.IntegerLabelToDatetimeOp( + freq=pd.tseries.offsets.Week(weekday=6), origin="start", label="left" # type: ignore + ).as_expr("rowindex", "timestamp_col"), + } + + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) + snapshot.assert_match(sql, "out.sql") + + +def test_integer_label_to_datetime_month(scalar_types_df: bpd.DataFrame, snapshot): + col_names = ["rowindex", "timestamp_col"] + bf_df = scalar_types_df[col_names] + ops_map = { + "non_fixed_freq_monthly": ops.IntegerLabelToDatetimeOp( + freq=pd.tseries.offsets.MonthEnd(), # type: ignore + origin="start", + label="left", + ).as_expr("rowindex", "timestamp_col"), + } + + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) + snapshot.assert_match(sql, "out.sql") + + +def test_integer_label_to_datetime_quarter(scalar_types_df: bpd.DataFrame, snapshot): + col_names = ["rowindex", "timestamp_col"] + bf_df = scalar_types_df[col_names] + ops_map = { + "non_fixed_freq": ops.IntegerLabelToDatetimeOp( + freq=pd.tseries.offsets.QuarterEnd(startingMonth=12), # type: ignore + origin="start", + label="left", + ).as_expr("rowindex", "timestamp_col"), + } + + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) + snapshot.assert_match(sql, "out.sql") + + +def test_integer_label_to_datetime_year(scalar_types_df: bpd.DataFrame, snapshot): + col_names = ["rowindex", "timestamp_col"] + bf_df = scalar_types_df[col_names] + ops_map = { + "non_fixed_freq_yearly": ops.IntegerLabelToDatetimeOp( + freq=pd.tseries.offsets.YearEnd(month=12), # type: ignore + origin="start", + label="left", + ).as_expr("rowindex", "timestamp_col"), + } + + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) + snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py index 11daf6813aa..2667e482c88 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py @@ -12,11 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +from google.cloud import bigquery +import pandas as pd import pytest from bigframes import dtypes from bigframes import operations as ops from bigframes.core import expression as ex +from bigframes.functions import udf_def import bigframes.pandas as bpd from bigframes.testing import utils @@ -168,6 +171,109 @@ def test_astype_json_invalid( ) +def test_remote_function_op(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["int64_col"]] + function_def = udf_def.BigqueryUdf( + routine_ref=bigquery.RoutineReference.from_string( + "my_project.my_dataset.my_routine" + ), + signature=udf_def.UdfSignature( + input_types=( + udf_def.UdfField( + "x", + bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 + ), + ), + ), + output_bq_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.FLOAT64 + ), + ), + ) + ops_map = { + "apply_on_null_true": ops.RemoteFunctionOp( + function_def=function_def, apply_on_null=True + ).as_expr("int64_col"), + "apply_on_null_false": ops.RemoteFunctionOp( + function_def=function_def, apply_on_null=False + ).as_expr("int64_col"), + } + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) + snapshot.assert_match(sql, "out.sql") + + +def test_binary_remote_function_op(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["int64_col", "float64_col"]] + op = ops.BinaryRemoteFunctionOp( + function_def=udf_def.BigqueryUdf( + routine_ref=bigquery.RoutineReference.from_string( + "my_project.my_dataset.my_routine" + ), + signature=udf_def.UdfSignature( + input_types=( + udf_def.UdfField( + "x", + bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 + ), + ), + udf_def.UdfField( + "y", + bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.FLOAT64 + ), + ), + ), + output_bq_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.FLOAT64 + ), + ), + ) + ) + sql = utils._apply_binary_op(bf_df, op, "int64_col", "float64_col") + + snapshot.assert_match(sql, "out.sql") + + +def test_nary_remote_function_op(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["int64_col", "float64_col", "string_col"]] + op = ops.NaryRemoteFunctionOp( + function_def=udf_def.BigqueryUdf( + routine_ref=bigquery.RoutineReference.from_string( + "my_project.my_dataset.my_routine" + ), + signature=udf_def.UdfSignature( + input_types=( + udf_def.UdfField( + "x", + bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 + ), + ), + udf_def.UdfField( + "y", + bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.FLOAT64 + ), + ), + udf_def.UdfField( + "z", + bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.STRING + ), + ), + ), + output_bq_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.FLOAT64 + ), + ), + ) + ) + sql = utils._apply_nary_op(bf_df, op, "int64_col", "float64_col", "string_col") + snapshot.assert_match(sql, "out.sql") + + def test_case_when_op(scalar_types_df: bpd.DataFrame, snapshot): ops_map = { "single_case": ops.case_when_op.as_expr( @@ -305,7 +411,11 @@ def test_map(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[[col_name]] sql = utils._apply_ops_to_sql( bf_df, - [ops.MapOp(mappings=(("value1", "mapped1"),)).as_expr(col_name)], + [ + ops.MapOp(mappings=(("value1", "mapped1"), (pd.NA, "UNKNOWN"))).as_expr( + col_name + ) + ], [col_name], ) diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_dataframe/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_dataframe/out.sql index e594b67669d..5d9019439f2 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_dataframe/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_dataframe/out.sql @@ -9,7 +9,7 @@ WITH `bfcte_0` AS ( * REPLACE (`int_list_col`[SAFE_OFFSET(`bfcol_13`)] AS `int_list_col`, `string_list_col`[SAFE_OFFSET(`bfcol_13`)] AS `string_list_col`) FROM `bfcte_0` - CROSS JOIN UNNEST(GENERATE_ARRAY(0, LEAST(ARRAY_LENGTH(`int_list_col`) - 1, ARRAY_LENGTH(`string_list_col`) - 1))) AS `bfcol_13` WITH OFFSET AS `bfcol_7` + LEFT JOIN UNNEST(GENERATE_ARRAY(0, LEAST(ARRAY_LENGTH(`int_list_col`) - 1, ARRAY_LENGTH(`string_list_col`) - 1))) AS `bfcol_13` WITH OFFSET AS `bfcol_7` ) SELECT `rowindex`, diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_series/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_series/out.sql index 5af0aa00922..8ba4559da83 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_series/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_series/out.sql @@ -8,7 +8,7 @@ WITH `bfcte_0` AS ( * REPLACE (`bfcol_8` AS `int_list_col`) FROM `bfcte_0` - CROSS JOIN UNNEST(`int_list_col`) AS `bfcol_8` WITH OFFSET AS `bfcol_4` + LEFT JOIN UNNEST(`int_list_col`) AS `bfcol_8` WITH OFFSET AS `bfcol_4` ) SELECT `rowindex`, diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_groupby_rolling/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_groupby_rolling/out.sql index e8fabd1129d..0dca6d9d49e 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_groupby_rolling/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_groupby_rolling/out.sql @@ -22,10 +22,13 @@ WITH `bfcte_0` AS ( SELECT *, CASE - WHEN SUM(CAST(NOT `bfcol_7` IS NULL AS INT64)) OVER ( - PARTITION BY `bfcol_9` - ORDER BY `bfcol_9` ASC NULLS LAST, `rowindex` ASC NULLS LAST - ROWS BETWEEN 3 PRECEDING AND CURRENT ROW + WHEN COALESCE( + SUM(CAST(NOT `bfcol_7` IS NULL AS INT64)) OVER ( + PARTITION BY `bfcol_9` + ORDER BY `bfcol_9` ASC NULLS LAST, `rowindex` ASC NULLS LAST + ROWS BETWEEN 3 PRECEDING AND CURRENT ROW + ), + 0 ) < 3 THEN NULL ELSE COALESCE( @@ -42,10 +45,13 @@ WITH `bfcte_0` AS ( SELECT *, CASE - WHEN SUM(CAST(NOT `bfcol_8` IS NULL AS INT64)) OVER ( - PARTITION BY `bfcol_9` - ORDER BY `bfcol_9` ASC NULLS LAST, `rowindex` ASC NULLS LAST - ROWS BETWEEN 3 PRECEDING AND CURRENT ROW + WHEN COALESCE( + SUM(CAST(NOT `bfcol_8` IS NULL AS INT64)) OVER ( + PARTITION BY `bfcol_9` + ORDER BY `bfcol_9` ASC NULLS LAST, `rowindex` ASC NULLS LAST + ROWS BETWEEN 3 PRECEDING AND CURRENT ROW + ), + 0 ) < 3 THEN NULL ELSE COALESCE( diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_range_rolling/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_range_rolling/out.sql index 581c81c6b40..fe4cea08cb2 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_range_rolling/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_range_rolling/out.sql @@ -6,14 +6,17 @@ WITH `bfcte_0` AS ( SELECT *, CASE - WHEN SUM(CAST(NOT `bfcol_1` IS NULL AS INT64)) OVER ( - ORDER BY UNIX_MICROS(`bfcol_0`) ASC NULLS LAST - RANGE BETWEEN 2999999 PRECEDING AND CURRENT ROW + WHEN COALESCE( + SUM(CAST(NOT `bfcol_1` IS NULL AS INT64)) OVER ( + ORDER BY UNIX_MICROS(`bfcol_0`) ASC + RANGE BETWEEN 2999999 PRECEDING AND CURRENT ROW + ), + 0 ) < 1 THEN NULL ELSE COALESCE( SUM(`bfcol_1`) OVER ( - ORDER BY UNIX_MICROS(`bfcol_0`) ASC NULLS LAST + ORDER BY UNIX_MICROS(`bfcol_0`) ASC RANGE BETWEEN 2999999 PRECEDING AND CURRENT ROW ), 0 diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_skips_nulls_op/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_skips_nulls_op/out.sql index 788eb49ddf4..bf1e76c55c7 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_skips_nulls_op/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_window/test_compile_window_w_skips_nulls_op/out.sql @@ -7,7 +7,10 @@ WITH `bfcte_0` AS ( SELECT *, CASE - WHEN SUM(CAST(NOT `int64_col` IS NULL AS INT64)) OVER (ORDER BY `rowindex` ASC NULLS LAST ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) < 3 + WHEN COALESCE( + SUM(CAST(NOT `int64_col` IS NULL AS INT64)) OVER (ORDER BY `rowindex` ASC NULLS LAST ROWS BETWEEN 2 PRECEDING AND CURRENT ROW), + 0 + ) < 3 THEN NULL ELSE COALESCE( SUM(`int64_col`) OVER (ORDER BY `rowindex` ASC NULLS LAST ROWS BETWEEN 2 PRECEDING AND CURRENT ROW), diff --git a/tests/unit/core/logging/__init__.py b/tests/unit/core/logging/__init__.py new file mode 100644 index 00000000000..58d482ea386 --- /dev/null +++ b/tests/unit/core/logging/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/unit/core/logging/test_data_types.py b/tests/unit/core/logging/test_data_types.py new file mode 100644 index 00000000000..09b3429f00d --- /dev/null +++ b/tests/unit/core/logging/test_data_types.py @@ -0,0 +1,54 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pandas as pd +import pyarrow as pa +import pytest + +from bigframes import dtypes +from bigframes.core.logging import data_types + +UNKNOWN_TYPE = pd.ArrowDtype(pa.time64("ns")) + +PA_STRUCT_TYPE = pa.struct([("city", pa.string()), ("pop", pa.int64())]) + +PA_LIST_TYPE = pa.list_(pa.int64()) + + +@pytest.mark.parametrize( + ("dtype", "expected_mask"), + [ + (None, 0), + (UNKNOWN_TYPE, 1 << 0), + (dtypes.INT_DTYPE, 1 << 1), + (dtypes.FLOAT_DTYPE, 1 << 2), + (dtypes.BOOL_DTYPE, 1 << 3), + (dtypes.STRING_DTYPE, 1 << 4), + (dtypes.BYTES_DTYPE, 1 << 5), + (dtypes.DATE_DTYPE, 1 << 6), + (dtypes.TIME_DTYPE, 1 << 7), + (dtypes.DATETIME_DTYPE, 1 << 8), + (dtypes.TIMESTAMP_DTYPE, 1 << 9), + (dtypes.TIMEDELTA_DTYPE, 1 << 10), + (dtypes.NUMERIC_DTYPE, 1 << 11), + (dtypes.BIGNUMERIC_DTYPE, 1 << 12), + (dtypes.GEO_DTYPE, 1 << 13), + (dtypes.JSON_DTYPE, 1 << 14), + (pd.ArrowDtype(PA_STRUCT_TYPE), 1 << 15), + (pd.ArrowDtype(PA_LIST_TYPE), 1 << 16), + (dtypes.OBJ_REF_DTYPE, (1 << 15) | (1 << 17)), + ], +) +def test_get_dtype_mask(dtype, expected_mask): + assert data_types._get_dtype_mask(dtype) == expected_mask diff --git a/tests/unit/core/test_log_adapter.py b/tests/unit/core/logging/test_log_adapter.py similarity index 99% rename from tests/unit/core/test_log_adapter.py rename to tests/unit/core/logging/test_log_adapter.py index c236bb68867..ecef966afca 100644 --- a/tests/unit/core/test_log_adapter.py +++ b/tests/unit/core/logging/test_log_adapter.py @@ -17,7 +17,7 @@ from google.cloud import bigquery import pytest -from bigframes.core import log_adapter +from bigframes.core.logging import log_adapter # The limit is 64 (https://cloud.google.com/bigquery/docs/labels-intro#requirements), # but leave a few spare for internal labels to be added. diff --git a/tests/unit/core/sql/snapshots/test_ml/test_transform_model_basic/transform_model_basic.sql b/tests/unit/core/sql/snapshots/test_ml/test_transform_model_basic/transform_model_basic.sql new file mode 100644 index 00000000000..e6cedc16477 --- /dev/null +++ b/tests/unit/core/sql/snapshots/test_ml/test_transform_model_basic/transform_model_basic.sql @@ -0,0 +1 @@ +SELECT * FROM ML.TRANSFORM(MODEL `my_project.my_dataset.my_model`, (SELECT * FROM new_data)) diff --git a/tests/unit/core/sql/test_ml.py b/tests/unit/core/sql/test_ml.py index fe8c1a04d48..9721f42fee1 100644 --- a/tests/unit/core/sql/test_ml.py +++ b/tests/unit/core/sql/test_ml.py @@ -169,3 +169,11 @@ def test_global_explain_model_with_options(snapshot): class_level_explain=True, ) snapshot.assert_match(sql, "global_explain_model_with_options.sql") + + +def test_transform_model_basic(snapshot): + sql = bigframes.core.sql.ml.transform( + model_name="my_project.my_dataset.my_model", + table="SELECT * FROM new_data", + ) + snapshot.assert_match(sql, "transform_model_basic.sql") diff --git a/tests/unit/display/test_anywidget.py b/tests/unit/display/test_anywidget.py index a635697e200..252ba8100e6 100644 --- a/tests/unit/display/test_anywidget.py +++ b/tests/unit/display/test_anywidget.py @@ -80,8 +80,31 @@ def handler(signum, frame): signal.alarm(0) +def test_css_contains_dark_mode_selectors(): + """Test that the CSS for dark mode is loaded with all required selectors.""" + from bigframes.display.anywidget import TableWidget + + mock_df = mock.create_autospec(bigframes.dataframe.DataFrame, instance=True) + # mock_df.columns and mock_df.dtypes are needed for __init__ + mock_df.columns = ["col1"] + mock_df.dtypes = {"col1": "object"} + + # Mock _block to avoid AttributeError during _set_table_html + mock_block = mock.Mock() + mock_block.has_index = False + mock_df._block = mock_block + + with mock.patch.object(TableWidget, "_initial_load"): + widget = TableWidget(mock_df) + css = widget._css + assert "@media (prefers-color-scheme: dark)" in css + assert 'html[theme="dark"]' in css + assert 'body[data-theme="dark"]' in css + + @pytest.fixture def mock_df(): + """A mock DataFrame that can be used in multiple tests.""" df = mock.create_autospec(bigframes.dataframe.DataFrame, instance=True) df.columns = ["col1", "col2"] df.dtypes = {"col1": "int64", "col2": "int64"} @@ -104,6 +127,7 @@ def mock_df(): def test_sorting_single_column(mock_df): + """Test that the widget can be sorted by a single column.""" from bigframes.display.anywidget import TableWidget with bigframes.option_context("display.repr_mode", "anywidget"): @@ -122,6 +146,7 @@ def test_sorting_single_column(mock_df): def test_sorting_multi_column(mock_df): + """Test that the widget can be sorted by multiple columns.""" from bigframes.display.anywidget import TableWidget with bigframes.option_context("display.repr_mode", "anywidget"): @@ -137,6 +162,7 @@ def test_sorting_multi_column(mock_df): def test_page_size_change_resets_sort(mock_df): + """Test that changing the page size resets the sorting.""" from bigframes.display.anywidget import TableWidget with bigframes.option_context("display.repr_mode", "anywidget"): diff --git a/tests/unit/display/test_html.py b/tests/unit/display/test_html.py index 0762a2fd8dd..35a74d098ae 100644 --- a/tests/unit/display/test_html.py +++ b/tests/unit/display/test_html.py @@ -148,3 +148,40 @@ def test_render_html_precision(): # Make sure we reset to default html = bf_html.render_html(dataframe=df, table_id="test-table") assert "3.141593" in html + + +def test_render_html_max_columns_truncation(): + # Create a DataFrame with 10 columns + data = {f"col_{i}": [i] for i in range(10)} + df = pd.DataFrame(data) + + # Test max_columns=4 + # max_columns=4 -> 2 left, 2 right. col_0, col_1 ... col_8, col_9 + html = bf_html.render_html(dataframe=df, table_id="test", max_columns=4) + + assert "col_0" in html + assert "col_1" in html + assert "col_2" not in html + assert "col_7" not in html + assert "col_8" in html + assert "col_9" in html + assert "..." in html + + # Test max_columns=3 + # 3 // 2 = 1. Left: col_0. Right: 3 - 1 = 2. col_8, col_9. + # Total displayed: col_0, ..., col_8, col_9. (3 data cols + 1 ellipsis) + html = bf_html.render_html(dataframe=df, table_id="test", max_columns=3) + assert "col_0" in html + assert "col_1" not in html + assert "col_7" not in html + assert "col_8" in html + assert "col_9" in html + + # Test max_columns=1 + # 1 // 2 = 0. Left: []. Right: 1. col_9. + # Total: ..., col_9. + html = bf_html.render_html(dataframe=df, table_id="test", max_columns=1) + assert "col_0" not in html + assert "col_8" not in html + assert "col_9" in html + assert "..." in html diff --git a/tests/unit/session/test_io_bigquery.py b/tests/unit/session/test_io_bigquery.py index 4349c1b6ee8..eb58c6bb52d 100644 --- a/tests/unit/session/test_io_bigquery.py +++ b/tests/unit/session/test_io_bigquery.py @@ -23,8 +23,8 @@ import pytest import bigframes -from bigframes.core import log_adapter import bigframes.core.events +from bigframes.core.logging import log_adapter import bigframes.pandas as bpd import bigframes.session._io.bigquery import bigframes.session._io.bigquery as io_bq diff --git a/third_party/bigframes_vendored/sqlglot/__init__.py b/third_party/bigframes_vendored/sqlglot/__init__.py index 41c98569ce8..f3679caf8d6 100644 --- a/third_party/bigframes_vendored/sqlglot/__init__.py +++ b/third_party/bigframes_vendored/sqlglot/__init__.py @@ -74,17 +74,6 @@ logger = logging.getLogger("sqlglot") -try: - from bigframes_vendored.sqlglot._version import ( # noqa: F401 - __version__, - __version_tuple__, - ) -except ImportError: - logger.error( - "Unable to set __version__, run `pip install -e .` or `python setup.py develop` first." - ) - - pretty = False """Whether to format generated SQL by default.""" diff --git a/third_party/bigframes_vendored/version.py b/third_party/bigframes_vendored/version.py index f36c6789c1a..1e9ed79f825 100644 --- a/third_party/bigframes_vendored/version.py +++ b/third_party/bigframes_vendored/version.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.32.0" +__version__ = "2.33.0" # {x-release-please-start-date} -__release_date__ = "2026-01-05" +__release_date__ = "2026-01-22" # {x-release-please-end}