feat: add df.bigquery.ai.forecast method to pandas dataframe accessor (#2518)

tswast · google-labs-jules[bot] · web-flow · commit 1126cec9cdfc · 2026-03-17T14:05:35.000-05:00
Adds the `.bigquery.ai.forecast()` method to pandas DataFrame objects, which proxies to `bigframes.bigquery.ai.forecast()`. Added unit tests and mocked session responses. --- *PR created automatically by Jules for task [14604090974587392182](https://jules.google.com/task/14604090974587392182) started by @tswast* --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> Co-authored-by: tswast <247555+tswast@users.noreply.github.com>
diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py
@@ -880,6 +880,7 @@ def forecast(
     id_cols: Iterable[str] | None = None,
     horizon: int = 10,
     confidence_level: float = 0.95,
+    output_historical_time_series: bool = False,
     context_window: int | None = None,
 ) -> dataframe.DataFrame:
     """
@@ -914,6 +915,15 @@ def forecast(
         confidence_level (float, default 0.95):
             A FLOAT64 value that specifies the percentage of the future values that fall in the prediction interval.
             The default value is 0.95. The valid input range is [0, 1).
+        output_historical_time_series (bool, default False):
+            A BOOL value that determines whether the input data is returned
+            along with the forecasted data. Set this argument to TRUE to return
+            input data. The default value is FALSE.
+
+            Returning the input data along with the forecasted data lets you
+            compare the historical value of the data column with the forecasted
+            value of the data column, or chart the change in the data column
+            values over time.
         context_window (int, optional):
             An int value that specifies the context window length used by BigQuery ML's built-in TimesFM model.
             The context window length determines how many of the most recent data points from the input time series are use by the model.
@@ -945,6 +955,7 @@ def forecast(
         "timestamp_col": timestamp_col,
         "model": model,
         "horizon": horizon,
+        "output_historical_time_series": output_historical_time_series,
         "confidence_level": confidence_level,
     }
     if id_cols:
diff --git a/bigframes/extensions/pandas/dataframe_accessor.py b/bigframes/extensions/pandas/dataframe_accessor.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import cast
+from typing import cast, Iterable, Optional
 
 import pandas
 import pandas.api.extensions
@@ -21,6 +21,85 @@
 import bigframes.pandas as bpd
 
 
+class AIAccessor:
+    """
+    Pandas DataFrame accessor for BigQuery AI functions.
+    """
+
+    def __init__(self, pandas_obj: pandas.DataFrame):
+        self._obj = pandas_obj
+
+    def forecast(
+        self,
+        *,
+        data_col: str,
+        timestamp_col: str,
+        model: str = "TimesFM 2.0",
+        id_cols: Optional[Iterable[str]] = None,
+        horizon: int = 10,
+        confidence_level: float = 0.95,
+        context_window: Optional[int] = None,
+        output_historical_time_series: bool = False,
+        session=None,
+    ) -> pandas.DataFrame:
+        """
+        Forecast time series at future horizon using BigQuery AI.FORECAST.
+
+        See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-ai-forecast
+
+        Args:
+            data_col (str):
+                A str value that specifies the name of the data column. The data column contains the data to forecast.
+                The data column must use one of the following data types: INT64, NUMERIC and FLOAT64
+            timestamp_col (str):
+                A str value that specified the name of the time points column.
+                The time points column provides the time points used to generate the forecast.
+                The time points column must use one of the following data types: TIMESTAMP, DATE and DATETIME
+            model (str, default "TimesFM 2.0"):
+                A str value that specifies the name of the model. "TimesFM 2.0" and "TimesFM 2.5" are supported.
+            id_cols (Iterable[str], optional):
+                An iterable of str value that specifies the names of one or more ID columns. Each ID identifies a unique time series to forecast.
+                Specify one or more values for this argument in order to forecast multiple time series using a single query.
+                The columns that you specify must use one of the following data types: STRING, INT64, ARRAY<STRING> and ARRAY<INT64>
+            horizon (int, default 10):
+                An int value that specifies the number of time points to forecast. The default value is 10. The valid input range is [1, 10,000].
+            confidence_level (float, default 0.95):
+                A FLOAT64 value that specifies the percentage of the future values that fall in the prediction interval.
+                The default value is 0.95. The valid input range is [0, 1).
+            context_window (int, optional):
+                An int value that specifies the context window length used by BigQuery ML's built-in TimesFM model.
+                The context window length determines how many of the most recent data points from the input time series are use by the model.
+                If you don't specify a value, the AI.FORECAST function automatically chooses the smallest possible context window length to use
+                that is still large enough to cover the number of time series data points in your input data.
+            output_historical_time_series (bool, default False):
+                A boolean value that determines whether to include the input time series history in the forecast.
+            session (bigframes.session.Session, optional):
+                The BigFrames session to use. If not provided, the default global session is used.
+
+        Returns:
+            pandas.DataFrame:
+                The forecast DataFrame result.
+        """
+        import bigframes.bigquery.ai
+
+        if session is None:
+            session = bf_session.get_global_session()
+
+        bf_df = cast(bpd.DataFrame, session.read_pandas(self._obj))
+        result = bigframes.bigquery.ai.forecast(
+            bf_df,
+            data_col=data_col,
+            timestamp_col=timestamp_col,
+            model=model,
+            id_cols=id_cols,
+            horizon=horizon,
+            confidence_level=confidence_level,
+            context_window=context_window,
+            output_historical_time_series=output_historical_time_series,
+        )
+        return result.to_pandas(ordered=True)
+
+
 @pandas.api.extensions.register_dataframe_accessor("bigquery")
 class BigQueryDataFrameAccessor:
     """
@@ -32,6 +111,13 @@ class BigQueryDataFrameAccessor:
     def __init__(self, pandas_obj: pandas.DataFrame):
         self._obj = pandas_obj
 
+    @property
+    def ai(self) -> "AIAccessor":
+        """
+        Accessor for BigQuery AI functions.
+        """
+        return AIAccessor(self._obj)
+
     def sql_scalar(self, sql_template: str, *, output_dtype=None, session=None):
         """
         Compute a new pandas Series by applying a SQL scalar function to the DataFrame.
diff --git a/tests/unit/core/compile/sqlglot/test_dataframe_accessor.py b/tests/unit/core/compile/sqlglot/test_dataframe_accessor.py
@@ -43,3 +43,42 @@ def to_pandas(series, *, ordered):
 
     session.read_pandas.assert_called_once()
     snapshot.assert_match(result, "out.sql")
+
+
+def test_ai_forecast(snapshot, monkeypatch):
+    import bigframes.bigquery.ai
+    import bigframes.session
+
+    session = mock.create_autospec(bigframes.session.Session)
+    bf_df = mock.create_autospec(bpd.DataFrame)
+    session.read_pandas.return_value = bf_df
+
+    def mock_ai_forecast(df, **kwargs):
+        assert df is bf_df
+        result_df = mock.create_autospec(bpd.DataFrame)
+        result_df.to_pandas.return_value = kwargs
+        return result_df
+
+    import bigframes.bigquery.ai
+
+    monkeypatch.setattr(bigframes.bigquery.ai, "forecast", mock_ai_forecast)
+
+    df = pd.DataFrame({"date": ["2020-01-01"], "value": [1.0]})
+    result = df.bigquery.ai.forecast(
+        timestamp_col="date",
+        data_col="value",
+        horizon=5,
+        session=session,
+    )
+
+    session.read_pandas.assert_called_once()
+    assert result == {
+        "timestamp_col": "date",
+        "data_col": "value",
+        "model": "TimesFM 2.0",
+        "id_cols": None,
+        "horizon": 5,
+        "confidence_level": 0.95,
+        "context_window": None,
+        "output_historical_time_series": False,
+    }