|
| 1 | +import logging |
| 2 | + |
| 3 | + |
| 4 | +def render_query(dataset, tables, select=None, conditions=None, |
| 5 | + groupings=None, order_by=None): |
| 6 | + """Render a query that will run over the given tables using the specified |
| 7 | + parameters. |
| 8 | +
|
| 9 | + Args: |
| 10 | + dataset: the BigQuery data set to query data from. |
| 11 | + tables: the tables in dataset to query. |
| 12 | + select: a dictionary of selections for a table. The keys function as |
| 13 | + column names and the values function as options to apply to |
| 14 | + the select field such as alias and format. For example, |
| 15 | + { |
| 16 | + 'start_time': { |
| 17 | + 'alias': 'StartTime', |
| 18 | + 'format': 'INTEGER-FORMAT_UTC_USEC' |
| 19 | + } |
| 20 | + } |
| 21 | + is represented as 'SEC_TO_TIMESTAMP(INTEGER(start_time)) as |
| 22 | + StartTime' in a query. Pass None to select all. |
| 23 | + conditions: a list of dicts to filter results by. |
| 24 | + Each dict should be formatted as the following: |
| 25 | + { |
| 26 | + 'field': 'foo', |
| 27 | + 'type': 'FLOAT', |
| 28 | + 'comparators': [ |
| 29 | + { |
| 30 | + 'condition': '>=', |
| 31 | + 'negate': False, |
| 32 | + 'value': '1' |
| 33 | + } |
| 34 | + ] |
| 35 | + } |
| 36 | + which is rendered as 'foo >= FLOAT('1')' in the query. |
| 37 | + groupings: a list of field names to group by. |
| 38 | + order_by: a dict with two keys, field and direction. |
| 39 | + Such that the dictionary should be formatted as |
| 40 | + {'field':'TimeStamp, 'direction':'desc'}. |
| 41 | +
|
| 42 | + Returns: |
| 43 | + a query string. |
| 44 | + """ |
| 45 | + |
| 46 | + if None in (dataset, tables): |
| 47 | + return None |
| 48 | + |
| 49 | + query = "%s %s %s %s %s" % ( |
| 50 | + _render_select(select), |
| 51 | + _render_sources(dataset, tables), |
| 52 | + _render_conditions(conditions), |
| 53 | + _render_groupings(groupings), |
| 54 | + _render_order(order_by.get('field'), order_by.get('direction'))) |
| 55 | + |
| 56 | + return query |
| 57 | + |
| 58 | + |
| 59 | +def _render_select(selections): |
| 60 | + """Render the selection part of a query. |
| 61 | +
|
| 62 | + Args: |
| 63 | + selections: a dictionary of selections for a table. The |
| 64 | + keys function as column names and the values function as |
| 65 | + options to apply to the select field such as alias and format. |
| 66 | + For example {'start_time': {'alias': 'StartTime', 'format': |
| 67 | + 'INTEGER-FORMAT_UTC_USEC'}} is represented as |
| 68 | + 'SEC_TO_TIMESTAMP(INTEGER(start_time))' in a query. Pass None to |
| 69 | + select all. |
| 70 | +
|
| 71 | + Returns: |
| 72 | + a string that represents the select part of a query. |
| 73 | + """ |
| 74 | + |
| 75 | + if not selections: |
| 76 | + return 'SELECT *' |
| 77 | + |
| 78 | + rendered_selections = [] |
| 79 | + for name, options in selections.iteritems(): |
| 80 | + if not isinstance(options, list): |
| 81 | + options = [options] |
| 82 | + |
| 83 | + original_name = name |
| 84 | + for options_dict in options: |
| 85 | + name = original_name |
| 86 | + alias = options_dict.get('alias') |
| 87 | + alias = "as %s" % alias if alias else "" |
| 88 | + |
| 89 | + formatter = options_dict.get('format') |
| 90 | + if formatter: |
| 91 | + for caster in formatter.split('-'): |
| 92 | + if caster == 'SEC_TO_MICRO': |
| 93 | + name = "%s*1000000" % name |
| 94 | + elif ':' in caster: |
| 95 | + caster, args = caster.split(':') |
| 96 | + name = "%s(%s,%s)" % (caster, name, args) |
| 97 | + else: |
| 98 | + name = "%s(%s)" % (caster, name) |
| 99 | + |
| 100 | + rendered_selections.append("%s %s" % (name, alias)) |
| 101 | + |
| 102 | + return "SELECT " + ", ".join(rendered_selections) |
| 103 | + |
| 104 | + |
| 105 | +def _render_sources(dataset, tables): |
| 106 | + """Render the source part of a query. |
| 107 | +
|
| 108 | + Args: |
| 109 | + dataset: the data set to fetch log data from. |
| 110 | + tables: the tables to fetch log data from. |
| 111 | +
|
| 112 | + Returns: |
| 113 | + a string that represents the from part of a query. |
| 114 | + """ |
| 115 | + |
| 116 | + return "FROM " + ", ".join( |
| 117 | + ["[%s.%s]" % (dataset, table) for table in tables]) |
| 118 | + |
| 119 | + |
| 120 | +def _render_conditions(conditions): |
| 121 | + """Render the conditions part of a query. |
| 122 | +
|
| 123 | + Args: |
| 124 | + conditions: a list of dictionary items to filter a table. |
| 125 | + Each dict should be formatted as {'field': 'start_time', |
| 126 | + 'value': {'value': 1, 'negate': False}, 'comparator': '>', |
| 127 | + 'type': 'FLOAT'} which is represetned as |
| 128 | + 'start_time > FLOAT('1')' in the query. |
| 129 | +
|
| 130 | + Returns: |
| 131 | + a string that represents the where part of a query. |
| 132 | + """ |
| 133 | + |
| 134 | + if not conditions: |
| 135 | + return "" |
| 136 | + |
| 137 | + rendered_conditions = [] |
| 138 | + |
| 139 | + for condition in conditions: |
| 140 | + field = condition.get('field') |
| 141 | + field_type = condition.get('type') |
| 142 | + comparators = condition.get('comparators') |
| 143 | + |
| 144 | + if None in (field, field_type, comparators) or not comparators: |
| 145 | + logging.warn('Invalid condition passed in: %s' % condition) |
| 146 | + continue |
| 147 | + |
| 148 | + rendered_conditions.append( |
| 149 | + _render_condition(field, field_type, comparators)) |
| 150 | + |
| 151 | + if not rendered_conditions: |
| 152 | + return "" |
| 153 | + |
| 154 | + return "WHERE %s" % (" AND ".join(rendered_conditions)) |
| 155 | + |
| 156 | + |
| 157 | +def _render_condition(field, field_type, comparators): |
| 158 | + """Render a single query condition. |
| 159 | +
|
| 160 | + Args: |
| 161 | + field: the field the condition applies to. |
| 162 | + field_type: the data type of the field. |
| 163 | + comparator: the logic operator to use. |
| 164 | + value_dicts: a list of value dicts of the form |
| 165 | + {'value': 'foo', 'negate': False} |
| 166 | +
|
| 167 | + Returns: |
| 168 | + a condition string. |
| 169 | + """ |
| 170 | + |
| 171 | + field_type = field_type.upper() |
| 172 | + |
| 173 | + negated_conditions, normal_conditions = [], [] |
| 174 | + |
| 175 | + for comparator in comparators: |
| 176 | + condition = comparator.get("condition").upper() |
| 177 | + negated = "NOT " if comparator.get("negate") else "" |
| 178 | + value = comparator.get("value") |
| 179 | + |
| 180 | + # BigQuery cannot cast strings to booleans, convert to ints |
| 181 | + if field_type == "BOOLEAN": |
| 182 | + value = 1 if value else 0 |
| 183 | + elif field_type in ("STRING", "INTEGER", "FLOAT"): |
| 184 | + value = "'%s'" % (value) |
| 185 | + |
| 186 | + rendered_sub_condition = "%s%s %s %s(%s)" % ( |
| 187 | + negated, field, condition, field_type, value) |
| 188 | + |
| 189 | + if comparator.get("negate"): |
| 190 | + negated_conditions.append(rendered_sub_condition) |
| 191 | + else: |
| 192 | + normal_conditions.append(rendered_sub_condition) |
| 193 | + |
| 194 | + rendered_normal = " AND ".join(normal_conditions) |
| 195 | + rendered_negated = " AND ".join(negated_conditions) |
| 196 | + |
| 197 | + if rendered_normal and rendered_negated: |
| 198 | + return "((%s) AND (%s))" % (rendered_normal, rendered_negated) |
| 199 | + |
| 200 | + return "(%s)" % (rendered_normal or rendered_negated) |
| 201 | + |
| 202 | + |
| 203 | +def _render_order(field, direction): |
| 204 | + """Render the order by part of a query. |
| 205 | +
|
| 206 | + Args: |
| 207 | + order_by: a dictionary with two keys, field and direction. |
| 208 | + Such that the dictionary should be formatted as |
| 209 | + {'field':'TimeStamp, 'direction':'desc'}. |
| 210 | +
|
| 211 | + Returns: |
| 212 | + a string that represents the order by part of a query. |
| 213 | + """ |
| 214 | + |
| 215 | + if not field or not direction: |
| 216 | + return "" |
| 217 | + |
| 218 | + return "ORDER BY %s %s" % (field, direction) |
| 219 | + |
| 220 | + |
| 221 | +def _render_groupings(fields): |
| 222 | + """Render the group by part of a query. |
| 223 | +
|
| 224 | + Args: |
| 225 | + fields: a list of fields to group by. |
| 226 | +
|
| 227 | + Returns: |
| 228 | + a string that represents the group by part of a query. |
| 229 | + """ |
| 230 | + |
| 231 | + if not fields: |
| 232 | + return "" |
| 233 | + |
| 234 | + return "GROUP BY " + ", ".join(fields) |
| 235 | + |
0 commit comments