Skip to content

Commit 0396278

Browse files
authored
chore: define type logging bit masks (#2384)
This PR only defines the data types and their bit positions. There's a slight update from our design: I reserved the least significant bit for unknown types. Tree traversal code will be implemented in the next PR. Related bug: b/406578908
1 parent 7abfef0 commit 0396278

File tree

3 files changed

+136
-2
lines changed

3 files changed

+136
-2
lines changed

bigframes/core/logging/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,6 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from bigframes.core.logging import log_adapter
15+
from bigframes.core.logging import data_types, log_adapter
1616

17-
__all__ = ["log_adapter"]
17+
__all__ = ["log_adapter", "data_types"]
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
from bigframes import dtypes
17+
18+
19+
def _add_data_type(existing_types: int, curr_type: dtypes.Dtype) -> int:
20+
return existing_types | _get_dtype_mask(curr_type)
21+
22+
23+
def _get_dtype_mask(dtype: dtypes.Dtype) -> int:
24+
if dtype == dtypes.INT_DTYPE:
25+
return 1 << 1
26+
if dtype == dtypes.FLOAT_DTYPE:
27+
return 1 << 2
28+
if dtype == dtypes.BOOL_DTYPE:
29+
return 1 << 3
30+
if dtype == dtypes.STRING_DTYPE:
31+
return 1 << 4
32+
if dtype == dtypes.BYTES_DTYPE:
33+
return 1 << 5
34+
if dtype == dtypes.DATE_DTYPE:
35+
return 1 << 6
36+
if dtype == dtypes.TIME_DTYPE:
37+
return 1 << 7
38+
if dtype == dtypes.DATETIME_DTYPE:
39+
return 1 << 8
40+
if dtype == dtypes.TIMESTAMP_DTYPE:
41+
return 1 << 9
42+
if dtype == dtypes.TIMEDELTA_DTYPE:
43+
return 1 << 10
44+
if dtype == dtypes.NUMERIC_DTYPE:
45+
return 1 << 11
46+
if dtype == dtypes.BIGNUMERIC_DTYPE:
47+
return 1 << 12
48+
if dtype == dtypes.GEO_DTYPE:
49+
return 1 << 13
50+
if dtype == dtypes.JSON_DTYPE:
51+
return 1 << 14
52+
53+
if dtypes.is_struct_like(dtype):
54+
mask = 1 << 15
55+
if dtype == dtypes.OBJ_REF_DTYPE:
56+
# obj_ref is a special struct type for multi-modal data.
57+
# It should be double counted as both "struct" and its own type.
58+
mask = mask | (1 << 17)
59+
return mask
60+
61+
if dtypes.is_array_like(dtype):
62+
return 1 << 16
63+
64+
# If an unknown datat type is present, mark it with the least significant bit.
65+
return 1 << 0
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import pandas as pd
16+
import pyarrow as pa
17+
import pytest
18+
19+
from bigframes import dtypes
20+
from bigframes.core.logging import data_types
21+
22+
UNKNOWN_TYPE = pd.ArrowDtype(pa.time64("ns"))
23+
24+
PA_STRUCT_TYPE = pa.struct([("city", pa.string()), ("pop", pa.int64())])
25+
26+
PA_LIST_TYPE = pa.list_(pa.int64())
27+
28+
29+
@pytest.mark.parametrize(
30+
("dtype", "expected_mask"),
31+
[
32+
(UNKNOWN_TYPE, 1 << 0),
33+
(dtypes.INT_DTYPE, 1 << 1),
34+
(dtypes.FLOAT_DTYPE, 1 << 2),
35+
(dtypes.BOOL_DTYPE, 1 << 3),
36+
(dtypes.STRING_DTYPE, 1 << 4),
37+
(dtypes.BYTES_DTYPE, 1 << 5),
38+
(dtypes.DATE_DTYPE, 1 << 6),
39+
(dtypes.TIME_DTYPE, 1 << 7),
40+
(dtypes.DATETIME_DTYPE, 1 << 8),
41+
(dtypes.TIMESTAMP_DTYPE, 1 << 9),
42+
(dtypes.TIMEDELTA_DTYPE, 1 << 10),
43+
(dtypes.NUMERIC_DTYPE, 1 << 11),
44+
(dtypes.BIGNUMERIC_DTYPE, 1 << 12),
45+
(dtypes.GEO_DTYPE, 1 << 13),
46+
(dtypes.JSON_DTYPE, 1 << 14),
47+
(pd.ArrowDtype(PA_STRUCT_TYPE), 1 << 15),
48+
(pd.ArrowDtype(PA_LIST_TYPE), 1 << 16),
49+
(dtypes.OBJ_REF_DTYPE, (1 << 15) | (1 << 17)),
50+
],
51+
)
52+
def test_get_dtype_mask(dtype, expected_mask):
53+
assert data_types._get_dtype_mask(dtype) == expected_mask
54+
55+
56+
def test_add_data_type__type_overlap_no_op():
57+
curr_type = dtypes.STRING_DTYPE
58+
existing_types = data_types._get_dtype_mask(curr_type)
59+
60+
assert data_types._add_data_type(existing_types, curr_type) == existing_types
61+
62+
63+
def test_add_data_type__new_type_updated():
64+
curr_type = dtypes.STRING_DTYPE
65+
existing_types = data_types._get_dtype_mask(dtypes.INT_DTYPE)
66+
67+
assert data_types._add_data_type(
68+
existing_types, curr_type
69+
) == existing_types | data_types._get_dtype_mask(curr_type)

0 commit comments

Comments
 (0)