diff --git a/.gitignore b/.gitignore index 6f21cde..99119a7 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,6 @@ nosetests.xml .mr.developer.cfg .project .pydevproject + +# Allow for virtualenv usage +venv/ diff --git a/Makefile b/Makefile index ec4159d..0b48e33 100644 --- a/Makefile +++ b/Makefile @@ -31,6 +31,6 @@ $(BUILD_DIR)/pip-dev.out: requirements_dev.txt unit: nosetests -integrations: +integrations: py_dev_deps nosetests --logging-level=ERROR -a slow --with-coverage --cover-package=bigquery diff --git a/bigquery/query_builder.py b/bigquery/query_builder.py index 8fc403f..5e1d197 100644 --- a/bigquery/query_builder.py +++ b/bigquery/query_builder.py @@ -54,6 +54,23 @@ def render_query(dataset, tables, select=None, conditions=None, return query +def generate_formatter(function_name, arguments=list(), inner_function=""): + """ + Generates a string which the 'formatter' key in a select dictionary will + understand as a series of functions. + + function_name : str + The name of the function to apply to your data + arguments: list, optional + A list of arguments to pass to that function + inner_function: str, optional + A string representation of a series of formatter functions that + function_name will be applied to. + """ + arguments_string = ":" + ",".join(map(str, arguments)) if len(arguments) > 0 else "" + inner_function_with_suffix = inner_function + "-" if inner_function \ + is not "" else "" + return inner_function_with_suffix + function_name + arguments_string def _render_select(selections): """Render the selection part of a query. @@ -84,14 +101,23 @@ def _render_select(selections): original_name = name for options_dict in options: name = original_name + name_qualifiers = list() + aggregation_level = options_dict.get('aggregation_level') + if aggregation_level: + name_qualifiers.append("WITHIN {0}".format(aggregation_level)) + alias = options_dict.get('alias') - alias = "as %s" % alias if alias else "" + if alias: + name_qualifiers.append("as {0}".format(alias)) formatter = options_dict.get('format') if formatter: name = _format_select(formatter, name) - rendered_selections.append("%s %s" % (name, alias)) + rendered_selections.append("{0} {1}".format(name, + " ".join(name_qualifiers))) + rendered_selections = [rendered_selection.strip() for \ + rendered_selection in rendered_selections] return "SELECT " + ", ".join(rendered_selections) @@ -117,12 +143,19 @@ def _format_select(formatter, name): for caster in formatter.split('-'): if caster == 'SEC_TO_MICRO': - name = "%s*1000000" % name + name = "{0}*1000000".format(name) elif ':' in caster: - caster, args = caster.split(':') - name = "%s(%s,%s)" % (caster, name, args) + caster, joined_arguments = caster.split(':') + arguments = joined_arguments.split(',') + # For an IF, we need to assume that the user just passed the full + # condition as just an argument. + if caster == 'IF': + name = "IF({0}, {1}, {2})".format(arguments[0], arguments[1], + arguments[2]) + else: + name = "{0}({1},{2})".format(caster, name, ",".join(arguments)) else: - name = "%s(%s)" % (caster, name) + name = "{0}({1})".format(caster, name) return name diff --git a/bigquery/tests/test_query_builder.py b/bigquery/tests/test_query_builder.py index df37a3e..918fc3b 100644 --- a/bigquery/tests/test_query_builder.py +++ b/bigquery/tests/test_query_builder.py @@ -1,14 +1,44 @@ import six import unittest +from bigquery.query_builder import generate_formatter +from bigquery.query_builder import render_query +from bigquery.query_builder import _render_conditions +from bigquery.query_builder import _render_groupings +from bigquery.query_builder import _render_having +from bigquery.query_builder import _render_order +from bigquery.query_builder import _render_select +from bigquery.query_builder import _render_sources + unittest.TestCase.maxDiff = None +class TestGenerateFunctions(unittest.TestCase): + def test_single_function(self): + self.assertEqual(generate_formatter('MAX'), 'MAX') + + def test_single_function_with_args(self): + self.assertEqual(generate_formatter('LEFT', + arguments=[10, 'test']), 'LEFT:10,test') + + def test_nested_functions(self): + + integer_function = generate_formatter('INTEGER') + max_integer_function = generate_formatter('MAX', + inner_function=integer_function) + self.assertEqual('INTEGER-MAX', max_integer_function) + + def test_nesting_with_args(self): + left_function = generate_formatter('LEFT', + arguments=[10]) + integer_func = generate_formatter('INTEGER', + inner_function=left_function) + + self.assertEqual('LEFT:10-INTEGER', integer_func) class TestRenderSelect(unittest.TestCase): def test_multiple_selects(self): """Ensure that render select can handle multiple selects.""" - from bigquery.query_builder import _render_select result = _render_select({ 'start_time': {'alias': 'TimeStamp'}, @@ -31,7 +61,6 @@ def test_multiple_selects(self): def test_casting(self): """Ensure that render select can handle custom casting.""" - from bigquery.query_builder import _render_select result = _render_select({ 'start_time': { @@ -46,19 +75,69 @@ def test_casting(self): def test_no_selects(self): """Ensure that render select can handle being passed no selects.""" - from bigquery.query_builder import _render_select result = _render_select({}) self.assertEqual(result, 'SELECT *') + def test_aggregation_within_record(self): + """Ensure that aggregation within a query works when there is an alias + for that field. + """ + + result_select = _render_select({ + 'start_time': { + 'alias': 'timestamp', + 'aggregation_level': 'RECORD', + 'format': 'MAX' + }}) + expected_select = 'SELECT MAX(start_time) WITHIN RECORD as timestamp' + self.assertEqual(expected_select, result_select) + + def test_aggregation_within_record_no_alias(self): + """ + Ensure that aggregation within a query works when there is no alias for + that field. + """ + + result_select = _render_select({ + 'start_time': { + 'aggregation_level': 'RECORD', + 'format': 'MAX' + }}) + expected_select = 'SELECT MAX(start_time) WITHIN RECORD' + self.assertEqual(expected_select, result_select) + + def test_formatting_if_statement_works_correctly(self): + """ + Ensure that if we pass an IF formatter, the library generates correct + BigQuery. + """ + + result_select = _render_select({ + 'start_time': { + 'format': 'IF:start_time != null,1,2' + }}) + expected_select = 'SELECT IF(start_time != null, 1, 2)' + self.assertEqual(expected_select, result_select) + + def test_multiple_formatting_with_if(self): + """ + Ensure that if we wrap an if statement in another formatter, that syntax + is correctly generated. + """ + + result_select = _render_select({ + 'start_time': { + 'format': 'IF:start_time != null,1,2-MAX' + }}) + expected_select = 'SELECT MAX(IF(start_time != null, 1, 2))' + self.assertEqual(expected_select, result_select) class TestRenderSources(unittest.TestCase): def test_multi_tables(self): """Ensure that render sources can handle multiple sources.""" - from bigquery.query_builder import _render_sources - result = _render_sources('spider', ['man', 'pig', 'bro']) self.assertEqual( @@ -66,7 +145,6 @@ def test_multi_tables(self): def test_no_tables(self): """Ensure that render sources can handle no tables.""" - from bigquery.query_builder import _render_sources result = _render_sources('spider', []) @@ -74,7 +152,6 @@ def test_no_tables(self): def test_no_dataset(self): """Ensure that render sources can handle no data sets.""" - from bigquery.query_builder import _render_sources result = _render_sources('', ['man', 'pig', 'bro']) @@ -82,7 +159,6 @@ def test_no_dataset(self): def test_tables_in_date_range(self): """Ensure that render sources can handle tables in DATE RANGE.""" - from bigquery.query_builder import _render_sources tables = { 'date_range': True, @@ -101,9 +177,6 @@ class TestRenderConditions(unittest.TestCase): def test_single_condition(self): """Ensure that render conditions can handle a single condition.""" - from bigquery.query_builder \ - import _render_conditions - result = _render_conditions([ { 'field': 'bar', @@ -118,9 +191,6 @@ def test_single_condition(self): def test_multiple_conditions(self): """Ensure that render conditions can handle multiple conditions.""" - from bigquery.query_builder \ - import _render_conditions - result = _render_conditions([ { 'field': 'a', @@ -151,9 +221,6 @@ def test_multiple_conditions(self): def test_multiple_comparators(self): """Ensure that render conditions can handle a multiple comparators.""" - from bigquery.query_builder \ - import _render_conditions - result = _render_conditions([ { 'field': 'foobar', @@ -177,9 +244,6 @@ def test_multiple_comparators(self): def test_boolean_field_type(self): """Ensure that render conditions can handle a boolean field.""" - from bigquery.query_builder \ - import _render_conditions - result = _render_conditions([ { 'field': 'foobar', @@ -203,9 +267,6 @@ def test_boolean_field_type(self): def test_in_comparator(self): """Ensure that render conditions can handle "IN" condition.""" - from bigquery.query_builder \ - import _render_conditions - result = _render_conditions([ { 'field': 'foobar', @@ -236,8 +297,6 @@ def test_in_comparator(self): def test_between_comparator(self): """Ensure that render conditions can handle "BETWEEN" condition.""" - from bigquery.query_builder import _render_conditions - result = _render_conditions([ { 'field': 'foobar', @@ -275,16 +334,12 @@ class TestRenderOrder(unittest.TestCase): def test_order(self): """Ensure that render order can work under expected conditions.""" - from bigquery.query_builder import _render_order - result = _render_order({'fields': ['foo'], 'direction': 'desc'}) self.assertEqual(result, "ORDER BY foo desc") def test_no_order(self): """Ensure that render order can work with out any arguments.""" - from bigquery.query_builder import _render_order - result = _render_order(None) self.assertEqual(result, "") @@ -294,18 +349,12 @@ class TestGroupings(unittest.TestCase): def test_mutliple_fields(self): """Ensure that render grouping works with multiple fields.""" - from bigquery.query_builder \ - import _render_groupings - result = _render_groupings(['foo', 'bar', 'shark']) self.assertEqual(result, "GROUP BY foo, bar, shark") def test_no_fields(self): """Ensure that render groupings can work with out any arguments.""" - from bigquery.query_builder \ - import _render_groupings - result = _render_groupings(None) self.assertEqual(result, "") @@ -315,9 +364,6 @@ class TestRenderHaving(unittest.TestCase): def test_mutliple_fields(self): """Ensure that render having works with multiple fields.""" - from bigquery.query_builder \ - import _render_having - result = _render_having([ { 'field': 'bar', @@ -332,9 +378,6 @@ def test_mutliple_fields(self): def test_no_fields(self): """Ensure that render having can work with out any arguments.""" - from bigquery.query_builder \ - import _render_having - result = _render_having(None) self.assertEqual(result, "") @@ -344,8 +387,6 @@ class TestRenderQuery(unittest.TestCase): def test_full_query(self): """Ensure that all the render query arguments work together.""" - from bigquery.query_builder import render_query - result = render_query( dataset='dataset', tables=['2013_06_appspot_1'], @@ -411,8 +452,6 @@ def test_full_query(self): def test_empty_conditions(self): """Ensure that render query can handle an empty list of conditions.""" - from bigquery.query_builder import render_query - result = render_query( dataset='dataset', tables=['2013_06_appspot_1'], @@ -442,8 +481,6 @@ def test_incorrect_conditions(self): """Ensure that render query can handle incorrectly formatted conditions. """ - from bigquery.query_builder import render_query - result = render_query( dataset='dataset', tables=['2013_06_appspot_1'], @@ -477,8 +514,6 @@ def test_incorrect_conditions(self): def test_multiple_condition_values(self): """Ensure that render query can handle conditions with multiple values. """ - from bigquery.query_builder import render_query - result = render_query( dataset='dataset', tables=['2013_06_appspot_1'], @@ -529,8 +564,6 @@ def test_multiple_condition_values(self): def test_negated_condition_value(self): """Ensure that render query can handle conditions with negated values. """ - from bigquery.query_builder import render_query - result = render_query( dataset='dataset', tables=['2013_06_appspot_1'], @@ -564,8 +597,6 @@ def test_multiple_negated_condition_values(self): """Ensure that render query can handle conditions with multiple negated values. """ - from bigquery.query_builder import render_query - result = render_query( dataset='dataset', tables=['2013_06_appspot_1'], @@ -605,8 +636,6 @@ def test_multiple_negated_condition_values(self): def test_empty_order(self): """Ensure that render query can handle an empty formatted order.""" - from bigquery.query_builder import render_query - result = render_query( dataset='dataset', tables=['2013_06_appspot_1'], @@ -643,8 +672,6 @@ def test_empty_order(self): def test_incorrect_order(self): """Ensure that render query can handle inccorectly formatted order.""" - from bigquery.query_builder import render_query - result = render_query( dataset='dataset', tables=['2013_06_appspot_1'], @@ -681,8 +708,6 @@ def test_incorrect_order(self): def test_empty_select(self): """Ensure that render query corrently handles no selection.""" - from bigquery.query_builder import render_query - result = render_query( dataset='dataset', tables=['2013_06_appspot_1'], @@ -707,8 +732,6 @@ def test_empty_select(self): def test_no_alias(self): """Ensure that render query runs without an alias for a select.""" - from bigquery.query_builder import render_query - result = render_query( dataset='dataset', tables=['2013_06_appspot_1'], @@ -747,8 +770,6 @@ def test_no_alias(self): def test_formatting(self): """Ensure that render query runs with formatting a select.""" - from bigquery.query_builder import render_query - result = render_query( dataset='dataset', tables=['2013_06_appspot_1'], @@ -791,8 +812,6 @@ def test_formatting_duplicate_columns(self): """Ensure that render query runs with formatting a select for a column selected twice. """ - from bigquery.query_builder import render_query - result = render_query( dataset='dataset', tables=['2013_06_appspot_1'], @@ -844,8 +863,6 @@ def test_sec_to_micro_formatting(self): """Ensure that render query runs sec_to_micro formatting on a select. """ - from bigquery.query_builder import render_query - result = render_query( dataset='dataset', tables=['2013_06_appspot_1'], @@ -888,8 +905,6 @@ def test_no_table_or_dataset(self): """Ensure that render query returns None if there is no dataset or table. """ - from bigquery.query_builder import render_query - result = render_query( dataset=None, tables=None, @@ -914,8 +929,6 @@ def test_no_table_or_dataset(self): def test_empty_groupings(self): """Ensure that render query can handle an empty list of groupings.""" - from bigquery.query_builder import render_query - result = render_query( dataset='dataset', tables=['2013_06_appspot_1'], @@ -942,8 +955,6 @@ def test_empty_groupings(self): def test_multi_tables(self): """Ensure that render query arguments work with multiple tables.""" - from bigquery.query_builder import render_query - result = render_query( dataset='dataset', tables=['2013_06_appspot_1', '2013_07_appspot_1'], diff --git a/requirements_dev.txt b/requirements_dev.txt index 74162c3..64f5833 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -3,4 +3,7 @@ rednose mock==1.0.1 coverage nose-exclude +six +google-api-python-client +dateutils tox