From 8ebf84f6310b5bfd26de18b9dce50c7f37ff9b94 Mon Sep 17 00:00:00 2001 From: Ege U Date: Tue, 10 Dec 2019 16:14:06 +0300 Subject: [PATCH 1/5] Dry runs return bytes processed, and cache hit now --- bigquery/client.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 125d048..eedafc2 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -243,7 +243,7 @@ def _submit_query_job(self, query_data): ------- tuple job id and query results if query completed. If dry_run is True, - job id will be None and results will be empty if the query is valid + job id will be None and results will be [cacheHit and totalBytesProcessed] if the query is valid or a dict containing the response if invalid. Raises @@ -269,13 +269,17 @@ def _submit_query_job(self, query_data): schema = query_reply.get('schema', {'fields': None})['fields'] rows = query_reply.get('rows', []) job_complete = query_reply.get('jobComplete', False) + cache_hit = query_reply['cacheHit'] + total_bytes_processed = query_reply['totalBytesProcessed'] # raise exceptions if it's not an async query # and job is not completed after timeout if not job_complete and query_data.get("timeoutMs", False): logger.error('BigQuery job %s timeout' % job_id) raise BigQueryTimeoutException() - + + if query_data.get("dryRun", True): + return job_id, [cache_hit, total_bytes_processed] return job_id, [self._transform_row(row, schema) for row in rows] def _get_job_reference(self, job_id): @@ -345,8 +349,8 @@ def query(self, query, max_results=None, timeout=0, dry_run=False, use_legacy_sq How long to wait for the query to complete, in seconds before the request times out and returns. dry_run : bool, optional - If True, the query isn't actually run. A valid query will return an - empty response, while an invalid one will return the same error + If True, the query isn't actually run. A valid query will return + cache hit, and total bytes processed, while an invalid one will return the same error message it would if it wasn't a dry run. use_legacy_sql : bool, optional. Default True. If False, the query will use BigQuery's standard SQL (https://cloud.google.com/bigquery/sql-reference/) @@ -359,7 +363,7 @@ def query(self, query, max_results=None, timeout=0, dry_run=False, use_legacy_sq ------- tuple (job id, query results) if the query completed. If dry_run is True, - job id will be None and results will be empty if the query is valid + job id will be None and results will be [cacheHit and totalBytesProcessed] if the query is valid or a ``dict`` containing the response if invalid. Raises From 01f38be5947df8ae5a9936703181a8062c5fc48c Mon Sep 17 00:00:00 2001 From: Ege U Date: Tue, 10 Dec 2019 16:32:44 +0300 Subject: [PATCH 2/5] Rewrote the tests --- bigquery/tests/test_client.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 5d36aa9..1f2d247 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -297,7 +297,9 @@ def test_query(self): mock_query_job.execute.return_value = { 'jobReference': expected_job_ref, - 'jobComplete': True + 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -329,6 +331,8 @@ def test_query_max_results_set(self): mock_query_job.execute.return_value = { 'jobReference': expected_job_ref, 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -357,6 +361,8 @@ def test_query_timeout_set(self): mock_query_job.execute.return_value = { 'jobReference': expected_job_ref, 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -382,6 +388,8 @@ def test_sync_query_timeout(self): mock_query_job.execute.return_value = { 'jobReference': expected_job_ref, 'jobComplete': False, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -400,6 +408,8 @@ def test_async_query_timeout(self): mock_query_job.execute.return_value = { 'jobReference': expected_job_ref, 'jobComplete': False, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -409,14 +419,18 @@ def test_async_query_timeout(self): self.assertEquals(results, []) def test_query_dry_run_valid(self): - """Ensure that None and an empty list is returned from the query when + """Ensure that None and [cacheHit, totalBytesProcessed] is returned from the query when dry_run is True and the query is valid. """ mock_query_job = mock.Mock() - mock_query_job.execute.return_value = {'jobReference': {}, - 'jobComplete': True} + mock_query_job.execute.return_value = { + 'jobReference': {}, + 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 + } self.mock_job_collection.query.return_value = mock_query_job @@ -428,7 +442,7 @@ def test_query_dry_run_valid(self): 'dryRun': True} ) self.assertIsNone(job_id) - self.assertEqual([], results) + self.assertEqual([False, 0], results) def test_query_dry_run_invalid(self): """Ensure that None and a dict is returned from the query when dry_run @@ -468,6 +482,8 @@ def test_query_with_results(self): 'schema': {'fields': [{'name': 'foo', 'type': 'INTEGER'}]}, 'rows': [{'f': [{'v': 10}]}], 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -491,7 +507,9 @@ def test_query_with_using_legacy_sql(self): mock_query_job.execute.return_value = { 'jobReference': expected_job_ref, - 'jobComplete': True + 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -873,7 +891,7 @@ def test_json_job_body_constructed_correctly(self): body = { "jobReference": { "projectId": self.project_id, - "jobId": "job" + "jobId": "job", }, "configuration": { "load": { From 0d2c801745c48732f20c9002d2a6026995875540 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Tue, 10 Dec 2019 17:48:32 -0600 Subject: [PATCH 3/5] Bump version to 1.15.0 --- bigquery/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/version.py b/bigquery/version.py index c162747..1c19d78 100644 --- a/bigquery/version.py +++ b/bigquery/version.py @@ -1 +1 @@ -__version__ = '1.14.1' +__version__ = '1.15.0' From df42f83b637fbe4a70eac200ae05ea8a5f775316 Mon Sep 17 00:00:00 2001 From: Rahul Kumar Gupta <67097571+rahulshivan05@users.noreply.github.com> Date: Thu, 1 Oct 2020 09:06:03 +0530 Subject: [PATCH 4/5] Update requirements_dev.txt --- requirements_dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev.txt b/requirements_dev.txt index 74162c3..1040dea 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,6 +1,6 @@ nose rednose -mock==1.0.1 +mock==4.0.2 coverage nose-exclude tox From 77a7b1b1f3c3cbe50ce0db20b2ebc39012fbca78 Mon Sep 17 00:00:00 2001 From: Tim Gates Date: Wed, 24 Nov 2021 06:50:37 +1100 Subject: [PATCH 5/5] docs: fix simple typo, offical -> official There is a small typo in bigquery/client.py. Should read `official` rather than `offical`. --- bigquery/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index eedafc2..bb4d50a 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -227,7 +227,7 @@ def _submit_query_job(self, query_data): """ Submit a query job to BigQuery. This is similar to BigQueryClient.query, but gives the user - direct access to the query method on the offical BigQuery + direct access to the query method on the official BigQuery python client. For fine-grained control over a query job, see: @@ -306,7 +306,7 @@ def _get_job_reference(self, job_id): def _insert_job(self, body_object): """ Submit a job to BigQuery - Direct proxy to the insert() method of the offical BigQuery + Direct proxy to the insert() method of the official BigQuery python client. Able to submit load, link, query, copy, or extract jobs.