Skip to content

Commit 99d1dff

Browse files
committed
Implement query builder API
1 parent dba1d7a commit 99d1dff

File tree

2 files changed

+945
-0
lines changed

2 files changed

+945
-0
lines changed

bigquery/query_builder.py

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
import logging
2+
3+
4+
def render_query(dataset, tables, select=None, conditions=None,
5+
groupings=None, order_by=None):
6+
"""Render a query that will run over the given tables using the specified
7+
parameters.
8+
9+
Args:
10+
dataset: the BigQuery data set to query data from.
11+
tables: the tables in dataset to query.
12+
select: a dictionary of selections for a table. The keys function as
13+
column names and the values function as options to apply to
14+
the select field such as alias and format. For example,
15+
{
16+
'start_time': {
17+
'alias': 'StartTime',
18+
'format': 'INTEGER-FORMAT_UTC_USEC'
19+
}
20+
}
21+
is represented as 'SEC_TO_TIMESTAMP(INTEGER(start_time)) as
22+
StartTime' in a query. Pass None to select all.
23+
conditions: a list of dicts to filter results by.
24+
Each dict should be formatted as the following:
25+
{
26+
'field': 'foo',
27+
'type': 'FLOAT',
28+
'comparators': [
29+
{
30+
'condition': '>=',
31+
'negate': False,
32+
'value': '1'
33+
}
34+
]
35+
}
36+
which is rendered as 'foo >= FLOAT('1')' in the query.
37+
groupings: a list of field names to group by.
38+
order_by: a dict with two keys, field and direction.
39+
Such that the dictionary should be formatted as
40+
{'field':'TimeStamp, 'direction':'desc'}.
41+
42+
Returns:
43+
a query string.
44+
"""
45+
46+
if None in (dataset, tables):
47+
return None
48+
49+
query = "%s %s %s %s %s" % (
50+
_render_select(select),
51+
_render_sources(dataset, tables),
52+
_render_conditions(conditions),
53+
_render_groupings(groupings),
54+
_render_order(order_by.get('field'), order_by.get('direction')))
55+
56+
return query
57+
58+
59+
def _render_select(selections):
60+
"""Render the selection part of a query.
61+
62+
Args:
63+
selections: a dictionary of selections for a table. The
64+
keys function as column names and the values function as
65+
options to apply to the select field such as alias and format.
66+
For example {'start_time': {'alias': 'StartTime', 'format':
67+
'INTEGER-FORMAT_UTC_USEC'}} is represented as
68+
'SEC_TO_TIMESTAMP(INTEGER(start_time))' in a query. Pass None to
69+
select all.
70+
71+
Returns:
72+
a string that represents the select part of a query.
73+
"""
74+
75+
if not selections:
76+
return 'SELECT *'
77+
78+
rendered_selections = []
79+
for name, options in selections.iteritems():
80+
if not isinstance(options, list):
81+
options = [options]
82+
83+
original_name = name
84+
for options_dict in options:
85+
name = original_name
86+
alias = options_dict.get('alias')
87+
alias = "as %s" % alias if alias else ""
88+
89+
formatter = options_dict.get('format')
90+
if formatter:
91+
for caster in formatter.split('-'):
92+
if caster == 'SEC_TO_MICRO':
93+
name = "%s*1000000" % name
94+
elif ':' in caster:
95+
caster, args = caster.split(':')
96+
name = "%s(%s,%s)" % (caster, name, args)
97+
else:
98+
name = "%s(%s)" % (caster, name)
99+
100+
rendered_selections.append("%s %s" % (name, alias))
101+
102+
return "SELECT " + ", ".join(rendered_selections)
103+
104+
105+
def _render_sources(dataset, tables):
106+
"""Render the source part of a query.
107+
108+
Args:
109+
dataset: the data set to fetch log data from.
110+
tables: the tables to fetch log data from.
111+
112+
Returns:
113+
a string that represents the from part of a query.
114+
"""
115+
116+
return "FROM " + ", ".join(
117+
["[%s.%s]" % (dataset, table) for table in tables])
118+
119+
120+
def _render_conditions(conditions):
121+
"""Render the conditions part of a query.
122+
123+
Args:
124+
conditions: a list of dictionary items to filter a table.
125+
Each dict should be formatted as {'field': 'start_time',
126+
'value': {'value': 1, 'negate': False}, 'comparator': '>',
127+
'type': 'FLOAT'} which is represetned as
128+
'start_time > FLOAT('1')' in the query.
129+
130+
Returns:
131+
a string that represents the where part of a query.
132+
"""
133+
134+
if not conditions:
135+
return ""
136+
137+
rendered_conditions = []
138+
139+
for condition in conditions:
140+
field = condition.get('field')
141+
field_type = condition.get('type')
142+
comparators = condition.get('comparators')
143+
144+
if None in (field, field_type, comparators) or not comparators:
145+
logging.warn('Invalid condition passed in: %s' % condition)
146+
continue
147+
148+
rendered_conditions.append(
149+
_render_condition(field, field_type, comparators))
150+
151+
if not rendered_conditions:
152+
return ""
153+
154+
return "WHERE %s" % (" AND ".join(rendered_conditions))
155+
156+
157+
def _render_condition(field, field_type, comparators):
158+
"""Render a single query condition.
159+
160+
Args:
161+
field: the field the condition applies to.
162+
field_type: the data type of the field.
163+
comparator: the logic operator to use.
164+
value_dicts: a list of value dicts of the form
165+
{'value': 'foo', 'negate': False}
166+
167+
Returns:
168+
a condition string.
169+
"""
170+
171+
field_type = field_type.upper()
172+
173+
negated_conditions, normal_conditions = [], []
174+
175+
for comparator in comparators:
176+
condition = comparator.get("condition").upper()
177+
negated = "NOT " if comparator.get("negate") else ""
178+
value = comparator.get("value")
179+
180+
# BigQuery cannot cast strings to booleans, convert to ints
181+
if field_type == "BOOLEAN":
182+
value = 1 if value else 0
183+
elif field_type in ("STRING", "INTEGER", "FLOAT"):
184+
value = "'%s'" % (value)
185+
186+
rendered_sub_condition = "%s%s %s %s(%s)" % (
187+
negated, field, condition, field_type, value)
188+
189+
if comparator.get("negate"):
190+
negated_conditions.append(rendered_sub_condition)
191+
else:
192+
normal_conditions.append(rendered_sub_condition)
193+
194+
rendered_normal = " AND ".join(normal_conditions)
195+
rendered_negated = " AND ".join(negated_conditions)
196+
197+
if rendered_normal and rendered_negated:
198+
return "((%s) AND (%s))" % (rendered_normal, rendered_negated)
199+
200+
return "(%s)" % (rendered_normal or rendered_negated)
201+
202+
203+
def _render_order(field, direction):
204+
"""Render the order by part of a query.
205+
206+
Args:
207+
order_by: a dictionary with two keys, field and direction.
208+
Such that the dictionary should be formatted as
209+
{'field':'TimeStamp, 'direction':'desc'}.
210+
211+
Returns:
212+
a string that represents the order by part of a query.
213+
"""
214+
215+
if not field or not direction:
216+
return ""
217+
218+
return "ORDER BY %s %s" % (field, direction)
219+
220+
221+
def _render_groupings(fields):
222+
"""Render the group by part of a query.
223+
224+
Args:
225+
fields: a list of fields to group by.
226+
227+
Returns:
228+
a string that represents the group by part of a query.
229+
"""
230+
231+
if not fields:
232+
return ""
233+
234+
return "GROUP BY " + ", ".join(fields)
235+

0 commit comments

Comments
 (0)