From 8ccce603045a24bd0df49a2bf26a946ac84cba6e Mon Sep 17 00:00:00 2001 From: e271828- Date: Sat, 27 May 2017 09:27:37 -0700 Subject: [PATCH 01/31] _parse_table_name failed in the event of a name like _YYYYMMDD_ --- bigquery/client.py | 5 ++++- bigquery/tests/test_client.py | 11 ++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 17a3a89..b8971cd 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -1436,6 +1436,8 @@ def _parse_table_name(self, table_id): """Parse a table name in the form of appid_YYYY_MM or YYYY_MM_appid and return a tuple consisting of YYYY-MM and the app id. + Returns (None, None) in the event of a name like _YYYYMMDD_ + Parameters ---------- table_id : str @@ -1463,9 +1465,10 @@ def _parse_table_name(self, table_id): year_month = "-".join(attributes[-2:]) app_id = "-".join(attributes[:-2]) + # Check if date parsed correctly if year_month.count("-") == 1 and all( - [num.isdigit() for num in year_month.split('-')]): + [num.isdigit() for num in year_month.split('-')]) and len(year_month) == 7: return year_month, app_id return None, None diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 1315147..a331387 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -1333,6 +1333,15 @@ def test_not_inside_range(self): "kind": "bigquery#tableList", "etag": "\"GSclnjk0zID1ucM3F-xYinOm1oE/cn58Rpu8v8pB4eoJQaiTe11lPQc\"", "tables": [ + { + "kind": "bigquery#table", + "id": "project:dataset.notanappspottable_20130515_0261", + "tableReference": { + "projectId": "project", + "datasetId": "dataset", + "tableId": "notanappspottable_20130515_0261" + } + }, { "kind": "bigquery#table", "id": "project:dataset.2013_05_appspot_1", @@ -2389,7 +2398,7 @@ def test_get_all_tables(self): bq = client.BigQueryClient(mock_bq_service, 'project') expected_result = [ - '2013_05_appspot', '2013_06_appspot_1', '2013_06_appspot_2', + 'notanappspottable_20130515_0261', '2013_05_appspot', '2013_06_appspot_1', '2013_06_appspot_2', '2013_06_appspot_3', '2013_06_appspot_4', '2013_06_appspot_5', 'appspot_6_2013_06', 'table_not_matching_naming' ] From 2e3509fa3ff8afc5206990fb3e347eae83fccde0 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Sun, 28 May 2017 22:20:33 -0500 Subject: [PATCH 02/31] Bump version to 1.11.2 --- bigquery/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/version.py b/bigquery/version.py index 522ba08..6c371de 100644 --- a/bigquery/version.py +++ b/bigquery/version.py @@ -1 +1 @@ -__version__ = '1.11.1' +__version__ = '1.11.2' From d9e9ac7dc6e732505c13df74fed9f40473a2515d Mon Sep 17 00:00:00 2001 From: e271828- Date: Thu, 8 Jun 2017 18:50:28 -0700 Subject: [PATCH 03/31] document get_table --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2053e05..d53d9a9 100644 --- a/README.md +++ b/README.md @@ -135,7 +135,7 @@ job_id, _ = client.query(query) # Managing Tables -The BigQuery client provides facilities to manage dataset tables, including creating, deleting, and checking the existence of tables. +The BigQuery client provides facilities to manage dataset tables, including creating, deleting, checking the existence, and getting the metadata of tables. ```python # Create a new table. @@ -150,6 +150,10 @@ deleted = client.delete_table('dataset', 'my_table') # Check if a table exists. exists = client.check_table('dataset', 'my_table') + +# Get a table's full metadata. Includes numRows, numBytes, etc. +# See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables +metadata = client.get_table('dataset', 'my_table') ``` There is also functionality for retrieving tables that are associated with a Google App Engine appspot, assuming table names are in the form of appid_YYYY_MM or YYYY_MM_appid. This allows tables between a date range to be selected and queried on. From 503b8a6553b7fc684e784abc9136baad0af17a80 Mon Sep 17 00:00:00 2001 From: tushar Date: Mon, 12 Jun 2017 18:56:38 +0200 Subject: [PATCH 04/31] updated update_table to include tableId= table --- bigquery/client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bigquery/client.py b/bigquery/client.py index b8971cd..db264a2 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -618,6 +618,7 @@ def update_table(self, dataset, table, schema): try: result = self.bigquery.tables().update( projectId=self.project_id, + tableId= table, datasetId=dataset, body=body ).execute() From b60256e97def3f7bd68ad53f6c21de8fcdf47d89 Mon Sep 17 00:00:00 2001 From: tushar Date: Mon, 12 Jun 2017 18:57:03 +0200 Subject: [PATCH 05/31] update update_table unit test to include tableId --- bigquery/tests/test_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index a331387..60bcc42 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -1830,7 +1830,7 @@ def test_table_update_failed(self): self.client.swallow_results = True self.mock_tables.update.assert_called_with( - projectId=self.project, datasetId=self.dataset, body=self.body) + projectId=self.project, tableId=self.table, datasetId=self.dataset, body=self.body) self.mock_tables.update.return_value.execute.assert_called_with() @@ -1856,7 +1856,7 @@ def test_table_update_success(self): self.client.swallow_results = True self.mock_tables.update.assert_called_with( - projectId=self.project, datasetId=self.dataset, body=self.body) + projectId=self.project, tableId=self.table, datasetId=self.dataset, body=self.body) self.mock_tables.update.return_value.execute.assert_called_with() From d6744c37431d1445fd6a419625d140b289a068b9 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Tue, 13 Jun 2017 09:04:41 -0500 Subject: [PATCH 06/31] Bump version to 1.12.0 --- bigquery/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/version.py b/bigquery/version.py index 6c371de..666b2f7 100644 --- a/bigquery/version.py +++ b/bigquery/version.py @@ -1 +1 @@ -__version__ = '1.11.2' +__version__ = '1.12.0' From d18356a93d5af24cc50eed6bfd7cd3154f313947 Mon Sep 17 00:00:00 2001 From: Vishvajit Pathak Date: Thu, 3 Aug 2017 17:57:56 +0530 Subject: [PATCH 07/31] typo fixes --- bigquery/query_builder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bigquery/query_builder.py b/bigquery/query_builder.py index 1054299..b29d0cd 100644 --- a/bigquery/query_builder.py +++ b/bigquery/query_builder.py @@ -29,7 +29,7 @@ def render_query(dataset, tables, select=None, conditions=None, 'comparators' maps to another ``dict`` containing the keys 'condition', 'negate', and 'value'. If 'comparators' = {'condition': '>=', 'negate': False, 'value': 1}, - this example will be rdnered as 'foo >= FLOAT('1')' in the query. + this example will be rendered as 'foo >= FLOAT('1')' in the query. ``list`` of field names to group by order_by : dict, optional Keys = {'field', 'direction'}. `dict` should be formatted as @@ -170,7 +170,7 @@ def _render_conditions(conditions): Parameters ---------- conditions : list - A list of dictionay items to filter a table. + A list of dictionary items to filter a table. Returns ------- From 66c18809061eefb646d5a08d2725d2f85059fdd7 Mon Sep 17 00:00:00 2001 From: Vishvajit Pathak Date: Thu, 3 Aug 2017 18:04:50 +0530 Subject: [PATCH 08/31] typos fix --- bigquery/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/client.py b/bigquery/client.py index db264a2..61933e4 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -983,7 +983,7 @@ def export_data_to_uris( Parameters ---------- - destination_urls : Union[str, list] + destination_uris : Union[str, list] ``str`` or ``list`` of ``str`` objects representing the URIs on cloud storage of the form: gs://bucket/filename dataset : str From 55915c0f7134f4f245bb7da4a9c0425910dc3c66 Mon Sep 17 00:00:00 2001 From: Vishvajit Pathak Date: Thu, 3 Aug 2017 18:28:42 +0530 Subject: [PATCH 09/31] typo fix --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d53d9a9..8171078 100644 --- a/README.md +++ b/README.md @@ -295,7 +295,7 @@ exists = client.check_dataset('mydataset') ```python from bigquery import schema_from_record -schema_from_record({"id":123, "posts": [{"id":123, "text": "tihs is a post"}], "username": "bob"}) +schema_from_record({"id":123, "posts": [{"id":123, "text": "this is a post"}], "username": "bob"}) ``` # Contributing From b34eff532daaa53bb0192a1e2f258d5e47ced51f Mon Sep 17 00:00:00 2001 From: Vishvajit Pathak Date: Thu, 3 Aug 2017 19:01:17 +0530 Subject: [PATCH 10/31] expiration_time changed to int/double --- bigquery/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/client.py b/bigquery/client.py index 61933e4..9af8dcb 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -541,7 +541,7 @@ def create_table(self, dataset, table, schema, The name of the table to create schema : dict The table schema - expiration_time : float, optional + expiration_time : int or double, optional The expiry time in milliseconds since the epoch. time_partitioning : bool, optional Create a time partitioning. From 67c855626a0f6e83f2724387b83c2c0440234a1f Mon Sep 17 00:00:00 2001 From: Yves Bastide Date: Mon, 31 Jul 2017 15:18:18 +0200 Subject: [PATCH 11/31] Add num_retries argument Signed-off-by: Yves Bastide --- bigquery/client.py | 56 +++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 9af8dcb..847c9fb 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -55,7 +55,8 @@ def get_client(project_id=None, credentials=None, service_url=None, service_account=None, private_key=None, private_key_file=None, json_key=None, json_key_file=None, - readonly=True, swallow_results=True): + readonly=True, swallow_results=True, + num_retries=0): """Return a singleton instance of BigQueryClient. Either AssertionCredentials or a service account and private key combination need to be provided in order to authenticate requests to BigQuery. @@ -94,6 +95,9 @@ def get_client(project_id=None, credentials=None, swallow_results : bool If set to False, then return the actual response value instead of converting to boolean. Default True. + num_retries : int, optional + The number of times to retry the request. Default 0 (no retry). + Returns ------- @@ -147,7 +151,8 @@ def get_client(project_id=None, credentials=None, bq_service = _get_bq_service(credentials=credentials, service_url=service_url) - return BigQueryClient(bq_service, project_id, swallow_results) + return BigQueryClient(bq_service, project_id, swallow_results, + num_retries) def get_projects(bq_service): @@ -185,10 +190,12 @@ def _credentials(): class BigQueryClient(object): - def __init__(self, bq_service, project_id, swallow_results=True): + def __init__(self, bq_service, project_id, swallow_results=True, + num_retries=0): self.bigquery = bq_service self.project_id = project_id self.swallow_results = swallow_results + self.num_retries = num_retries self.cache = {} def _submit_query_job(self, query_data): @@ -226,7 +233,8 @@ def _submit_query_job(self, query_data): try: query_reply = job_collection.query( - projectId=self.project_id, body=query_data).execute() + projectId=self.project_id, body=query_data).execute( + num_retries=self.num_retries) except HttpError as e: if query_data.get("dryRun", False): return None, json.loads(e.content.decode('utf8')) @@ -276,7 +284,7 @@ def _insert_job(self, body_object): return job_collection.insert( projectId=self.project_id, body=body_object - ).execute() + ).execute(num_retries=self.num_retries) def query(self, query, max_results=None, timeout=0, dry_run=False, use_legacy_sql=None, external_udf_uris=None): """Submit a query to BigQuery. @@ -375,7 +383,7 @@ def get_table_schema(self, dataset, table): result = self.bigquery.tables().get( projectId=self.project_id, tableId=table, - datasetId=dataset).execute() + datasetId=dataset).execute(num_retries=self.num_retries) except HttpError as e: if int(e.resp['status']) == 404: logger.warn('Table %s.%s does not exist', dataset, table) @@ -481,7 +489,8 @@ def get_dataset(self, dataset_id): """ try: dataset = self.bigquery.datasets().get( - projectId=self.project_id, datasetId=dataset_id).execute() + projectId=self.project_id, datasetId=dataset_id).execute( + num_retries=self.num_retries) except HttpError: dataset = {} @@ -523,7 +532,7 @@ def get_table(self, dataset, table): try: table = self.bigquery.tables().get( projectId=self.project_id, datasetId=dataset, - tableId=table).execute() + tableId=table).execute(num_retries=self.num_retries) except HttpError: table = {} @@ -573,7 +582,7 @@ def create_table(self, dataset, table, schema, projectId=self.project_id, datasetId=dataset, body=body - ).execute() + ).execute(num_retries=self.num_retries) if self.swallow_results: return True else: @@ -621,7 +630,7 @@ def update_table(self, dataset, table, schema): tableId= table, datasetId=dataset, body=body - ).execute() + ).execute(num_retries=self.num_retries) if self.swallow_results: return True else: @@ -668,7 +677,7 @@ def patch_table(self, dataset, table, schema): projectId=self.project_id, datasetId=dataset, body=body - ).execute() + ).execute(num_retries=self.num_retries) if self.swallow_results: return True else: @@ -723,7 +732,7 @@ def create_view(self, dataset, view, query, use_legacy_sql=None): projectId=self.project_id, datasetId=dataset, body=body - ).execute() + ).execute(num_retries=self.num_retries) if self.swallow_results: return True else: @@ -759,7 +768,7 @@ def delete_table(self, dataset, table): projectId=self.project_id, datasetId=dataset, tableId=table - ).execute() + ).execute(num_retries=self.num_retries) if self.swallow_results: return True else: @@ -1212,7 +1221,7 @@ def wait_for_job(self, job, interval=5, timeout=60): sleep(interval) request = self.bigquery.jobs().get(projectId=self.project_id, jobId=job_id) - job_resource = request.execute() + job_resource = request.execute(num_retries=self.num_retries) self._raise_executing_exception_if_error(job_resource) complete = job_resource.get('status').get('state') == u'DONE' elapsed_time = time() - start_time @@ -1288,7 +1297,7 @@ def push_rows(self, dataset, table, rows, insert_id_key=None, datasetId=dataset, tableId=table, body=data - ).execute() + ).execute(num_retries=self.num_retries) if response.get('insertErrors'): logger.error('BigQuery insert errors: %s' % response) @@ -1382,7 +1391,7 @@ def _get_all_tables_for_dataset(self, dataset_id): """ result = self.bigquery.tables().list( projectId=self.project_id, - datasetId=dataset_id).execute() + datasetId=dataset_id).execute(num_retries=self.num_retries) page_token = result.get('nextPageToken') while page_token: @@ -1390,7 +1399,7 @@ def _get_all_tables_for_dataset(self, dataset_id): projectId=self.project_id, datasetId=dataset_id, pageToken=page_token - ).execute() + ).execute(num_retries=self.num_retries) page_token = res.get('nextPageToken') result['tables'] += res.get('tables', []) return result @@ -1553,7 +1562,7 @@ def get_query_results(self, job_id, offset=None, limit=None, startIndex=offset, maxResults=limit, pageToken=page_token, - timeoutMs=timeout * 1000).execute() + timeoutMs=timeout * 1000).execute(num_retries=self.num_retries) def _transform_row(self, row, schema): """Apply the given schema to the given BigQuery data row. @@ -1708,7 +1717,8 @@ def create_dataset(self, dataset_id, friendly_name=None, description=None, location=location) response = datasets.insert(projectId=self.project_id, - body=dataset_data).execute() + body=dataset_data).execute( + num_retries=self.num_retries) if self.swallow_results: return True else: @@ -1732,7 +1742,7 @@ def get_datasets(self): try: datasets = self.bigquery.datasets() request = datasets.list(projectId=self.project_id) - result = request.execute() + result = request.execute(num_retries=self.num_retries) return result.get('datasets', []) except HttpError as e: logger.error("Cannot list datasets: {0}".format(e)) @@ -1766,7 +1776,7 @@ def delete_dataset(self, dataset_id, delete_contents=False): request = datasets.delete(projectId=self.project_id, datasetId=dataset_id, deleteContents=delete_contents) - response = request.execute() + response = request.execute(num_retries=self.num_retries) if self.swallow_results: return True else: @@ -1810,7 +1820,7 @@ def update_dataset(self, dataset_id, friendly_name=None, description=None, request = datasets.update(projectId=self.project_id, datasetId=dataset_id, body=body) - response = request.execute() + response = request.execute(num_retries=self.num_retries) if self.swallow_results: return True else: @@ -1853,7 +1863,7 @@ def patch_dataset(self, dataset_id, friendly_name=None, description=None, description, access) request = datasets.patch(projectId=self.project_id, datasetId=dataset_id, body=body) - response = request.execute() + response = request.execute(num_retries=self.num_retries) if self.swallow_results: return True else: From 289ad25b2b415a3d43ff6b75f3b4fbf48ce61a75 Mon Sep 17 00:00:00 2001 From: Yves Bastide Date: Tue, 5 Sep 2017 15:45:31 +0200 Subject: [PATCH 12/31] Fix tests Add `num_retries=0` to `execute.assert_called_with()`. Signed-off-by: Yves Bastide --- bigquery/tests/test_client.py | 77 +++++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 30 deletions(-) diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 60bcc42..0bf5a18 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -292,7 +292,6 @@ def test_query(self): self.assertEquals(job_id, 'spiderman') self.assertEquals(results, []) - def test_query_max_results_set(self): """Ensure that we retrieve the job id from the query and the maxResults parameter is set. @@ -520,7 +519,7 @@ def test_get_response(self): projectId=self.project_id, jobId=job_id, startIndex=offset, maxResults=limit, pageToken=page_token, timeoutMs=1000) - mock_query_job.execute.assert_called_once_with() + mock_query_job.execute.assert_called_once_with(num_retries=0) self.assertEquals(actual, mock_query_reply) @@ -1485,7 +1484,8 @@ def test_table_exists(self): expected, self.client.get_table_schema(self.dataset, self.table)) self.mock_tables.get.assert_called_once_with( projectId=self.project, tableId=self.table, datasetId=self.dataset) - self.mock_tables.get.return_value.execute.assert_called_once_with() + self.mock_tables.get.return_value.execute. \ + assert_called_once_with(num_retries=0) def test_table_does_not_exist(self): """Ensure that None is returned if the table doesn't exist.""" @@ -1496,7 +1496,8 @@ def test_table_does_not_exist(self): self.client.get_table_schema(self.dataset, self.table)) self.mock_tables.get.assert_called_once_with( projectId=self.project, tableId=self.table, datasetId=self.dataset) - self.mock_tables.get.return_value.execute.assert_called_once_with() + self.mock_tables.get.return_value.execute. \ + assert_called_once_with(num_retries=0) @mock.patch('bigquery.client.BigQueryClient.get_query_results') @@ -1651,7 +1652,8 @@ def test_table_does_not_exist(self): self.mock_tables.get.assert_called_once_with( projectId=self.project, datasetId=self.dataset, tableId=self.table) - self.mock_tables.get.return_value.execute.assert_called_once_with() + self.mock_tables.get.return_value.execute. \ + assert_called_once_with(num_retries=0) def test_table_does_exist(self): """Ensure that if the table does exist, True is returned.""" @@ -1666,7 +1668,8 @@ def test_table_does_exist(self): self.mock_tables.get.assert_called_once_with( projectId=self.project, datasetId=self.dataset, tableId=self.table) - self.mock_tables.get.return_value.execute.assert_called_once_with() + self.mock_tables.get.return_value.execute. \ + assert_called_once_with(num_retries=0) class TestCreateTable(unittest.TestCase): @@ -1716,7 +1719,8 @@ def test_table_create_failed(self): self.mock_tables.insert.assert_called_with( projectId=self.project, datasetId=self.dataset, body=self.body) - self.mock_tables.insert.return_value.execute.assert_called_with() + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=0) def test_table_create_success(self): """Ensure that if creating the table succeeds, True is returned, @@ -1742,7 +1746,8 @@ def test_table_create_success(self): self.mock_tables.insert.assert_called_with( projectId=self.project, datasetId=self.dataset, body=self.body) - self.mock_tables.insert.return_value.execute.assert_called_with() + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=0) def test_table_create_body_with_expiration_time(self): """Ensure that if expiration_time has specified, @@ -1762,7 +1767,8 @@ def test_table_create_body_with_expiration_time(self): self.mock_tables.insert.assert_called_with( projectId=self.project, datasetId=self.dataset, body=body) - self.mock_tables.insert.return_value.execute.assert_called_with() + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=0) def test_table_create_body_with_time_partitioning(self): """Ensure that if time_partitioning has specified, @@ -1783,7 +1789,8 @@ def test_table_create_body_with_time_partitioning(self): self.mock_tables.insert.assert_called_with( projectId=self.project, datasetId=self.dataset, body=body) - self.mock_tables.insert.return_value.execute.assert_called_with() + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=0) class TestUpdateTable(unittest.TestCase): @@ -1830,9 +1837,11 @@ def test_table_update_failed(self): self.client.swallow_results = True self.mock_tables.update.assert_called_with( - projectId=self.project, tableId=self.table, datasetId=self.dataset, body=self.body) + projectId=self.project, tableId=self.table, datasetId=self.dataset, + body=self.body) - self.mock_tables.update.return_value.execute.assert_called_with() + self.mock_tables.update.return_value.execute. \ + assert_called_with(num_retries=0) def test_table_update_success(self): """Ensure that if updating the table succeeds, True is returned, @@ -1856,9 +1865,11 @@ def test_table_update_success(self): self.client.swallow_results = True self.mock_tables.update.assert_called_with( - projectId=self.project, tableId=self.table, datasetId=self.dataset, body=self.body) + projectId=self.project, tableId=self.table, datasetId=self.dataset, + body=self.body) - self.mock_tables.update.return_value.execute.assert_called_with() + self.mock_tables.update.return_value.execute. \ + assert_called_with(num_retries=0) class TestPatchTable(unittest.TestCase): @@ -1907,7 +1918,8 @@ def test_table_patch_failed(self): self.mock_tables.patch.assert_called_with( projectId=self.project, datasetId=self.dataset, body=self.body) - self.mock_tables.patch.return_value.execute.assert_called_with() + self.mock_tables.patch.return_value.execute. \ + assert_called_with(num_retries=0) def test_table_patch_success(self): """Ensure that if patching the table succeeds, True is returned, @@ -1933,7 +1945,8 @@ def test_table_patch_success(self): self.mock_tables.patch.assert_called_with( projectId=self.project, datasetId=self.dataset, body=self.body) - self.mock_tables.patch.return_value.execute.assert_called_with() + self.mock_tables.patch.return_value.execute. \ + assert_called_with(num_retries=0) class TestCreateView(unittest.TestCase): @@ -1978,7 +1991,8 @@ def test_view_create_failed(self): self.mock_tables.insert.assert_called_with( projectId=self.project, datasetId=self.dataset, body=self.body) - self.mock_tables.insert.return_value.execute.assert_called_with() + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=0) def test_view_create_success(self): """Ensure that if creating the table succeeds, True is returned, @@ -2004,7 +2018,8 @@ def test_view_create_success(self): self.mock_tables.insert.assert_called_with( projectId=self.project, datasetId=self.dataset, body=self.body) - self.mock_tables.insert.return_value.execute.assert_called_with() + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=0) class TestDeleteTable(unittest.TestCase): @@ -2040,7 +2055,8 @@ def test_delete_table_fail(self): self.mock_tables.delete.assert_called_with( projectId=self.project, datasetId=self.dataset, tableId=self.table) - self.mock_tables.delete.return_value.execute.assert_called_with() + self.mock_tables.delete.return_value.execute. \ + assert_called_with(num_retries=0) def test_delete_table_success(self): """Ensure that if deleting table succeeds, True is returned, @@ -2064,7 +2080,8 @@ def test_delete_table_success(self): self.mock_tables.delete.assert_called_with( projectId=self.project, datasetId=self.dataset, tableId=self.table) - self.mock_tables.delete.return_value.execute.assert_called_with() + self.mock_tables.delete.return_value.execute. \ + assert_called_with(num_retries=0) class TestParseTableListReponse(unittest.TestCase): @@ -2200,7 +2217,7 @@ def test_push_failed(self): projectId=self.project, datasetId=self.dataset, tableId=self.table, body=self.data) - execute_calls = [mock.call()] + execute_calls = [mock.call(num_retries=0)] self.mock_table_data.insertAll.return_value.execute.assert_has_calls( execute_calls) @@ -2254,7 +2271,7 @@ def test_push_exception(self): projectId=self.project, datasetId=self.dataset, tableId=self.table, body=self.data) - execute_calls = [mock.call()] + execute_calls = [mock.call(num_retries=0)] self.mock_table_data.insertAll.return_value.execute.assert_has_calls( execute_calls) @@ -2286,7 +2303,7 @@ def test_push_success(self): projectId=self.project, datasetId=self.dataset, tableId=self.table, body=self.data) - execute_calls = [mock.call()] + execute_calls = [mock.call(num_retries=0)] self.mock_table_data.insertAll.return_value.execute.assert_has_calls( execute_calls) @@ -2604,7 +2621,7 @@ def test_dataset_create_failed(self): projectId=self.project, body=self.body) self.mock_datasets.insert.return_value.execute. \ - assert_called_with() + assert_called_with(num_retries=0) def test_dataset_create_success(self): """Ensure that if creating the table fails, False is returned.""" @@ -2633,7 +2650,7 @@ def test_dataset_create_success(self): projectId=self.project, body=self.body) self.mock_datasets.insert.return_value.execute. \ - assert_called_with() + assert_called_with(num_retries=0) class TestDeleteDataset(unittest.TestCase): @@ -2669,7 +2686,7 @@ def test_delete_datasets_fail(self): self.client.swallow_results = True self.mock_datasets.delete.return_value.execute. \ - assert_called_with() + assert_called_with(num_retries=0) def test_delete_datasets_success(self): """Ensure that if deleting table succeeds, True is returned.""" @@ -2694,7 +2711,7 @@ def test_delete_datasets_success(self): deleteContents=False) self.mock_datasets.delete.return_value.execute. \ - assert_called_with() + assert_called_with(num_retries=0) def test_delete_datasets_delete_contents_success(self): """Ensure that if deleting table succeeds, True is returned.""" @@ -2719,7 +2736,7 @@ def test_delete_datasets_delete_contents_success(self): deleteContents=True) self.mock_datasets.delete.return_value.execute. \ - assert_called_with() + assert_called_with(num_retries=0) FULL_DATASET_LIST_RESPONSE = { @@ -2879,7 +2896,7 @@ def test_dataset_update_failed(self): projectId=self.project, datasetId=self.dataset, body=self.body) self.mock_datasets.update.return_value.execute. \ - assert_called_with() + assert_called_with(num_retries=0) def test_dataset_update_success(self): """Ensure that if creating the table fails, False is returned.""" @@ -2908,4 +2925,4 @@ def test_dataset_update_success(self): projectId=self.project, datasetId=self.dataset, body=self.body) self.mock_datasets.update.return_value.execute. \ - assert_called_with() + assert_called_with(num_retries=0) From 188ef7d5c3676d104fa996f2b1729b46c14d2694 Mon Sep 17 00:00:00 2001 From: Yves Bastide Date: Tue, 5 Sep 2017 15:45:47 +0200 Subject: [PATCH 13/31] Add tests with num_retries Maybe too many?.. Signed-off-by: Yves Bastide --- bigquery/tests/test_client.py | 215 ++++++++++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 0bf5a18..a5e8161 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -2926,3 +2926,218 @@ def test_dataset_update_success(self): self.mock_datasets.update.return_value.execute. \ assert_called_with(num_retries=0) + + +class TestNumRetries(unittest.TestCase): + + def setUp(self): + client._bq_client = None + + self.mock_bq_service = mock.Mock() + self.mock_tables = mock.Mock() + self.mock_job_collection = mock.Mock() + self.mock_datasets = mock.Mock() + self.mock_table_data = mock.Mock() + self.mock_bq_service.tables.return_value = self.mock_tables + self.mock_bq_service.jobs.return_value = self.mock_job_collection + self.mock_bq_service.datasets.return_value = self.mock_datasets + self.mock_bq_service.tabledata.return_value = self.mock_table_data + + self.project_id = 'project' + self.num_retries = 5 + self.client = client.BigQueryClient(self.mock_bq_service, + self.project_id, + num_retries=self.num_retries) + self.dataset = 'dataset' + self.project = 'project' + self.table = 'table' + self.schema = [ + {'name': 'foo', 'type': 'STRING', 'mode': 'nullable'}, + {'name': 'bar', 'type': 'FLOAT', 'mode': 'nullable'} + ] + self.friendly_name = "friendly name" + self.description = "description" + self.access = [{'userByEmail': "bob@gmail.com"}] + self.query = 'SELECT "bar" foo, "foo" bar' + self.rows = [ + {'one': 'uno', 'two': 'dos'}, {'one': 'ein', 'two': 'zwei'}, + {'two': 'kiwi'}] + self.data = { + "kind": "bigquery#tableDataInsertAllRequest", + "rows": [{'insertId': "uno", 'json': {'one': 'uno', 'two': 'dos'}}, + {'insertId': "ein", 'json': + {'one': 'ein', 'two': 'zwei'}}, + {'json': {'two': 'kiwi'}}] + } + + def test_get_response(self): + job_id = 'bar' + + mock_query_job = mock.Mock() + mock_query_reply = mock.Mock() + mock_query_job.execute.return_value = mock_query_reply + self.mock_job_collection.getQueryResults.return_value = mock_query_job + + offset = 5 + limit = 10 + page_token = "token" + timeout = 1 + + self.client.get_query_results(job_id, offset, limit, + page_token, timeout) + + mock_query_job.execute. \ + assert_called_once_with(num_retries=self.num_retries) + + def test_table_exists(self): + expected = [ + {'type': 'FLOAT', 'name': 'foo', 'mode': 'NULLABLE'}, + {'type': 'INTEGER', 'name': 'bar', 'mode': 'NULLABLE'}, + {'type': 'INTEGER', 'name': 'baz', 'mode': 'NULLABLE'}, + ] + + self.mock_tables.get.return_value.execute.return_value = \ + {'schema': {'fields': expected}} + + self.client.get_table_schema(self.dataset, self.table) + self.mock_tables.get.return_value.execute. \ + assert_called_once_with(num_retries=self.num_retries) + + def test_table_create(self): + self.mock_tables.insert.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + self.client.create_table(self.dataset, self.table, + self.schema) + + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) + + def test_table_update(self): + self.mock_tables.update.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + self.client.update_table(self.dataset, self.table, + self.schema) + + self.mock_tables.update.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) + + def test_table_patch(self): + self.mock_tables.patch.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + self.client.patch_table(self.dataset, self.table, + self.schema) + + self.mock_tables.patch.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) + + def test_view_create(self): + body = { + 'view': {'query': self.query}, + 'tableReference': { + 'tableId': self.table, 'projectId': self.project, + 'datasetId': self.dataset + } + } + + self.mock_tables.insert.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + actual = self.client.create_view(self.dataset, self.table, + self.query) + + self.assertTrue(actual) + + self.mock_tables.insert.assert_called_with( + projectId=self.project, datasetId=self.dataset, body=body) + + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) + + def test_delete_table(self): + self.mock_tables.delete.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + actual = self.client.delete_table(self.dataset, self.table) + + self.assertTrue(actual) + + self.mock_tables.delete.assert_called_with( + projectId=self.project, datasetId=self.dataset, tableId=self.table) + + self.mock_tables.delete.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) + + def test_push(self): + self.mock_table_data.insertAll.return_value.execute.return_value = { + 'status': 'foo'} + + actual = self.client.push_rows(self.dataset, self.table, self.rows, + 'one') + + self.assertTrue(actual) + + self.mock_bq_service.tabledata.assert_called_with() + + self.mock_table_data.insertAll.assert_called_with( + projectId=self.project, datasetId=self.dataset, tableId=self.table, + body=self.data) + + execute_calls = [mock.call(num_retries=self.num_retries)] + self.mock_table_data.insertAll.return_value.execute.assert_has_calls( + execute_calls) + + def test_dataset_create(self): + body = { + 'datasetReference': { + 'datasetId': self.dataset, + 'projectId': self.project}, + 'friendlyName': self.friendly_name, + 'description': self.description, + 'access': self.access + } + + self.mock_datasets.insert.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + actual = self.client.create_dataset(self.dataset, + self.friendly_name, + self.description, + self.access) + self.assertTrue(actual) + + self.mock_datasets.insert.assert_called_with( + projectId=self.project, body=body) + + self.mock_datasets.insert.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) + + def test_delete_datasets(self): + self.mock_datasets.delete.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + actual = self.client.delete_dataset(self.dataset) + + self.assertTrue(actual) + + self.mock_datasets.delete.assert_called_with( + projectId=self.project, datasetId=self.dataset, + deleteContents=False) + + self.mock_datasets.delete.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) + + def test_dataset_update(self): + self.mock_datasets.update.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + actual = self.client.update_dataset(self.dataset, + self.friendly_name, + self.description, + self.access) + self.assertTrue(actual) + + self.mock_datasets.update.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) From 4a569bd3b8ede0297b3973b15e550c998b03f3e9 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Wed, 20 Sep 2017 10:23:36 -0500 Subject: [PATCH 14/31] Try to fix travis --- .travis.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 9f422c6..1e1c28c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,7 @@ language: python +before_install: + - sudo apt-get update -q + - sudo apt-get install pypy -y install: - python setup.py develop - pip install tox @@ -7,7 +10,6 @@ notifications: email: false env: - TOXENV=py27 - - TOXENV=py33 - TOXENV=py34 - TOXENV=nightly - TOXENV=pypy From d308391cf9508f1568fb9647fd711e436a2978f4 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Mon, 2 Oct 2017 16:25:19 -0500 Subject: [PATCH 15/31] Bump version to 1.13.0 --- bigquery/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/version.py b/bigquery/version.py index 666b2f7..84c54b7 100644 --- a/bigquery/version.py +++ b/bigquery/version.py @@ -1 +1 @@ -__version__ = '1.12.0' +__version__ = '1.13.0' From 40de946cf1af7d6317666db28a5740aad42c39ea Mon Sep 17 00:00:00 2001 From: Alireza Date: Tue, 30 Jan 2018 15:09:06 +0100 Subject: [PATCH 16/31] feat: Support `IS NULL`\`IS NOT NULL` condition --- bigquery/query_builder.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bigquery/query_builder.py b/bigquery/query_builder.py index b29d0cd..435bb73 100644 --- a/bigquery/query_builder.py +++ b/bigquery/query_builder.py @@ -241,6 +241,8 @@ def _render_condition(field, field_type, comparators): else: value = _render_condition_value(value, field_type) value = "(" + value + ")" + elif condition == "IS NULL" or condition == "IS NOT NULL": + return field + " " + condition elif condition == "BETWEEN": if isinstance(value, (tuple, list, set)) and len(value) == 2: value = ' AND '.join( From 2ce1b8d9deb4a35c8d2759ef38ba27859a1d3ce5 Mon Sep 17 00:00:00 2001 From: Tuan Vu Date: Wed, 14 Mar 2018 16:48:22 -0700 Subject: [PATCH 17/31] support a different project_id to run job This supports authenticate to 1 project_id but run jobs in a different project_id. --- bigquery/client.py | 271 ++++++++++++++++++++++++++++++++------------- 1 file changed, 195 insertions(+), 76 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 847c9fb..0c6377e 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -198,6 +198,26 @@ def __init__(self, bq_service, project_id, swallow_results=True, self.num_retries = num_retries self.cache = {} + def _get_project_id(self, project_id=None): + """ Get new project_id + + Default is self.project_id, which is the project client authenticate to. + A new project_id is specified when client wants to authenticate to 1 project, + but run jobs in a different project. + + Parameters + ---------- + project_id : str + BigQuery project_id + + Returns + ------- + project_id: BigQuery project_id + """ + if project_id is None: + project_id = self.project_id + return project_id + def _submit_query_job(self, query_data): """ Submit a query job to BigQuery. @@ -253,6 +273,27 @@ def _submit_query_job(self, query_data): return job_id, [self._transform_row(row, schema) for row in rows] + def _get_job_reference(self, job_id): + """ Get job reference from job_id + For more details, see: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#resource + + Parameters + ---------- + job_id: + Id of the job + + Returns + ------- + job_reference: json of job_reference + """ + job_reference = { + "projectId": self.project_id, + "jobId": job_id + } + + return job_reference + def _insert_job(self, body_object): """ Submit a job to BigQuery @@ -362,7 +403,7 @@ def get_query_schema(self, job_id): return query_reply['schema']['fields'] - def get_table_schema(self, dataset, table): + def get_table_schema(self, dataset, table, project_id=None): """Return the table schema. Parameters @@ -371,6 +412,8 @@ def get_table_schema(self, dataset, table): The dataset containing the `table`. table : str The table to get the schema for + project_id: str, optional + The project of the dataset. Returns ------- @@ -380,8 +423,9 @@ def get_table_schema(self, dataset, table): """ try: + project_id = self._get_project_id(project_id) result = self.bigquery.tables().get( - projectId=self.project_id, + projectId=project_id, tableId=table, datasetId=dataset).execute(num_retries=self.num_retries) except HttpError as e: @@ -458,29 +502,33 @@ def get_query_rows(self, job_id, offset=None, limit=None, timeout=0): records += [self._transform_row(row, schema) for row in rows] return records[:limit] if limit else records - def check_dataset(self, dataset_id): + def check_dataset(self, dataset_id, project_id=None): """Check to see if a dataset exists. Parameters ---------- dataset_id : str Dataset unique id + project_id: str, optional + The project the dataset is in Returns ------- bool True if dataset at `dataset_id` exists, else Fasle - """ - dataset = self.get_dataset(dataset_id) + """ + dataset = self.get_dataset(dataset_id, project_id) return bool(dataset) - def get_dataset(self, dataset_id): + def get_dataset(self, dataset_id, project_id=None): """Retrieve a dataset if it exists, otherwise return an empty dict. Parameters ---------- dataset_id : str Dataset unique id + project_id: str, optional + The project the dataset is in Returns ------- @@ -488,15 +536,16 @@ def get_dataset(self, dataset_id): Contains dataset object if it exists, else empty """ try: + project_id = self._get_project_id(project_id) dataset = self.bigquery.datasets().get( - projectId=self.project_id, datasetId=dataset_id).execute( + projectId=project_id, datasetId=dataset_id).execute( num_retries=self.num_retries) except HttpError: dataset = {} return dataset - def check_table(self, dataset, table): + def check_table(self, dataset, table, project_id=None): """Check to see if a table exists. Parameters @@ -505,16 +554,18 @@ def check_table(self, dataset, table): The dataset to check table : str The name of the table + project_id: str, optional + The project the table is in Returns ------- bool True if table exists, else False """ - table = self.get_table(dataset, table) + table = self.get_table(dataset, table, project_id) return bool(table) - def get_table(self, dataset, table): + def get_table(self, dataset, table, project_id=None): """ Retrieve a table if it exists, otherwise return an empty dict. Parameters @@ -523,6 +574,8 @@ def get_table(self, dataset, table): The dataset that the table is in table : str The name of the table + project_id: str, optional + The project that the table is in Returns ------- @@ -530,15 +583,16 @@ def get_table(self, dataset, table): Containing the table object if it exists, else empty """ try: + project_id = self._get_project_id(project_id) table = self.bigquery.tables().get( - projectId=self.project_id, datasetId=dataset, + projectId=project_id, datasetId=dataset, tableId=table).execute(num_retries=self.num_retries) except HttpError: table = {} return table - def create_table(self, dataset, table, schema, + def create_table(self, dataset, table, schema, project_id=None, expiration_time=None, time_partitioning=False): """Create a new table in the dataset. @@ -550,6 +604,8 @@ def create_table(self, dataset, table, schema, The name of the table to create schema : dict The table schema + project_id: str, optional + The project to create the table in expiration_time : int or double, optional The expiry time in milliseconds since the epoch. time_partitioning : bool, optional @@ -561,12 +617,13 @@ def create_table(self, dataset, table, schema, If the table was successfully created, or response from BigQuery if swallow_results is set to False """ + project_id = self._get_project_id(project_id) body = { 'schema': {'fields': schema}, 'tableReference': { 'tableId': table, - 'projectId': self.project_id, + 'projectId': project_id, 'datasetId': dataset } } @@ -579,7 +636,7 @@ def create_table(self, dataset, table, schema, try: table = self.bigquery.tables().insert( - projectId=self.project_id, + projectId=project_id, datasetId=dataset, body=body ).execute(num_retries=self.num_retries) @@ -589,14 +646,14 @@ def create_table(self, dataset, table, schema, return table except HttpError as e: - logger.error(('Cannot create table {0}.{1}\n' - 'Http Error: {2}').format(dataset, table, e.content)) + logger.error(('Cannot create table {0}.{1}.{2}\n' + 'Http Error: {3}').format(project_id, dataset, table, e.content)) if self.swallow_results: return False else: return {} - def update_table(self, dataset, table, schema): + def update_table(self, dataset, table, schema, project_id=None): """Update an existing table in the dataset. Parameters @@ -607,6 +664,8 @@ def update_table(self, dataset, table, schema): The name of the table to update schema : dict Table schema + project_id: str, optional + The project to update the table in Returns ------- @@ -614,19 +673,20 @@ def update_table(self, dataset, table, schema): bool indicating if the table was successfully updated or not, or response from BigQuery if swallow_results is set to False. """ + project_id = self._get_project_id(project_id) body = { 'schema': {'fields': schema}, 'tableReference': { 'tableId': table, - 'projectId': self.project_id, + 'projectId': project_id, 'datasetId': dataset } } try: result = self.bigquery.tables().update( - projectId=self.project_id, + projectId=project_id, tableId= table, datasetId=dataset, body=body @@ -637,14 +697,14 @@ def update_table(self, dataset, table, schema): return result except HttpError as e: - logger.error(('Cannot update table {0}.{1}\n' - 'Http Error: {2}').format(dataset, table, e.content)) + logger.error(('Cannot update table {0}.{1}.{2}\n' + 'Http Error: {3}').format(project_id, dataset, table, e.content)) if self.swallow_results: return False else: return {} - def patch_table(self, dataset, table, schema): + def patch_table(self, dataset, table, schema, project_id=None): """Patch an existing table in the dataset. Parameters @@ -655,6 +715,8 @@ def patch_table(self, dataset, table, schema): The name of the table to patch schema : dict The table schema + project_id: str, optional + The project to patch the table in Returns ------- @@ -662,19 +724,20 @@ def patch_table(self, dataset, table, schema): Bool indicating if the table was successfully patched or not, or response from BigQuery if swallow_results is set to False """ + project_id = self._get_project_id(project_id) body = { 'schema': {'fields': schema}, 'tableReference': { 'tableId': table, - 'projectId': self.project_id, + 'projectId': project_id, 'datasetId': dataset } } try: result = self.bigquery.tables().patch( - projectId=self.project_id, + projectId=project_id, datasetId=dataset, body=body ).execute(num_retries=self.num_retries) @@ -684,14 +747,14 @@ def patch_table(self, dataset, table, schema): return result except HttpError as e: - logger.error(('Cannot patch table {0}.{1}\n' - 'Http Error: {2}').format(dataset, table, e.content)) + logger.error(('Cannot patch table {0}.{1}.{2}\n' + 'Http Error: {3}').format(project_id, dataset, table, e.content)) if self.swallow_results: return False else: return {} - def create_view(self, dataset, view, query, use_legacy_sql=None): + def create_view(self, dataset, view, query, project_id=None, use_legacy_sql=None): """Create a new view in the dataset. Parameters @@ -702,6 +765,8 @@ def create_view(self, dataset, view, query, use_legacy_sql=None): The name of the view to create query : dict A query that BigQuery executes when the view is referenced. + project_id: str, optional + The project to create the view in use_legacy_sql : bool, optional If False, the query will use BigQuery's standard SQL (https://cloud.google.com/bigquery/sql-reference/) @@ -712,11 +777,12 @@ def create_view(self, dataset, view, query, use_legacy_sql=None): bool indicating if the view was successfully created or not, or response from BigQuery if swallow_results is set to False. """ + project_id = self._get_project_id(project_id) body = { 'tableReference': { 'tableId': view, - 'projectId': self.project_id, + 'projectId': project_id, 'datasetId': dataset }, 'view': { @@ -729,7 +795,7 @@ def create_view(self, dataset, view, query, use_legacy_sql=None): try: view = self.bigquery.tables().insert( - projectId=self.project_id, + projectId=project_id, datasetId=dataset, body=body ).execute(num_retries=self.num_retries) @@ -746,7 +812,7 @@ def create_view(self, dataset, view, query, use_legacy_sql=None): else: return {} - def delete_table(self, dataset, table): + def delete_table(self, dataset, table, project_id=None): """Delete a table from the dataset. Parameters @@ -755,6 +821,8 @@ def delete_table(self, dataset, table): The dataset to delete the table from. table : str The name of the table to delete + project_id: str, optional + String id of the project Returns ------- @@ -764,8 +832,9 @@ def delete_table(self, dataset, table): """ try: + project_id = self._get_project_id(project_id) response = self.bigquery.tables().delete( - projectId=self.project_id, + projectId=project_id, datasetId=dataset, tableId=table ).execute(num_retries=self.num_retries) @@ -782,7 +851,7 @@ def delete_table(self, dataset, table): else: return {} - def get_tables(self, dataset_id, app_id, start_time, end_time): + def get_tables(self, dataset_id, app_id, start_time, end_time, project_id=None): """Retrieve a list of tables that are related to the given app id and are inside the range of start and end times. @@ -796,6 +865,8 @@ def get_tables(self, dataset_id, app_id, start_time, end_time): The datetime or unix time after which records will be fetched. end_time : Union[datetime, int] The datetime or unix time up to which records will be fetched. + project_id: str, optional + String id of the project Returns ------- @@ -809,7 +880,7 @@ def get_tables(self, dataset_id, app_id, start_time, end_time): if isinstance(end_time, datetime): end_time = calendar.timegm(end_time.utctimetuple()) - every_table = self._get_all_tables(dataset_id) + every_table = self._get_all_tables(dataset_id, project_id) app_tables = every_table.get(app_id, {}) return self._filter_tables_by_time(app_tables, start_time, end_time) @@ -820,6 +891,7 @@ def import_data_from_uris( dataset, table, schema=None, + project_id=None, job=None, source_format=None, create_disposition=None, @@ -848,11 +920,13 @@ def import_data_from_uris( String id of the dataset table : str String id of the table - job : str, optional - Identifies the job (a unique job id is automatically generated if - not provided) schema : list, optional Represents the BigQuery schema + project_id: str, optional + String id of the project + job : str, optional + Identifies the job (a unique job id is automatically generated if + not provided) source_format : str, optional One of the JOB_SOURCE_FORMAT_* constants create_disposition : str, optional @@ -889,9 +963,11 @@ def import_data_from_uris( source_uris = source_uris if isinstance(source_uris, list) \ else [source_uris] + project_id = self._get_project_id(project_id) + configuration = { "destinationTable": { - "projectId": self.project_id, + "projectId": project_id, "tableId": table, "datasetId": dataset }, @@ -963,10 +1039,7 @@ def import_data_from_uris( "configuration": { 'load': configuration }, - "jobReference": { - "projectId": self.project_id, - "jobId": job - } + "jobReference": self._get_job_reference(job) } logger.debug("Creating load job %s" % body) @@ -979,6 +1052,7 @@ def export_data_to_uris( destination_uris, dataset, table, + project_id=None, job=None, compression=None, destination_format=None, @@ -999,6 +1073,8 @@ def export_data_to_uris( String id of the dataset table : str String id of the table + project_id: str, optional + String id of the project job : str, optional String identifying the job (a unique jobid is automatically generated if not provided) @@ -1024,9 +1100,11 @@ def export_data_to_uris( destination_uris = destination_uris \ if isinstance(destination_uris, list) else [destination_uris] + project_id = self._get_project_id(project_id) + configuration = { "sourceTable": { - "projectId": self.project_id, + "projectId": project_id, "tableId": table, "datasetId": dataset }, @@ -1057,10 +1135,7 @@ def export_data_to_uris( "configuration": { 'extract': configuration }, - "jobReference": { - "projectId": self.project_id, - "jobId": job - } + "jobReference": self._get_job_reference(job_id) } logger.info("Creating export job %s" % body) @@ -1073,6 +1148,7 @@ def write_to_table( query, dataset=None, table=None, + project_id=None, external_udf_uris=None, allow_large_results=None, use_query_cache=None, @@ -1097,6 +1173,8 @@ def write_to_table( String id of the dataset table : str, optional String id of the table + project_id: str, optional + String id of the project external_udf_uris : list, optional Contains external UDF URIs. If given, URIs must be Google Cloud Storage and have .js extensions. @@ -1138,9 +1216,11 @@ def write_to_table( "query": query, } + project_id = self._get_project_id(project_id) + if dataset and table: configuration['destinationTable'] = { - "projectId": self.project_id, + "projectId": project_id, "tableId": table, "datasetId": dataset } @@ -1233,7 +1313,7 @@ def wait_for_job(self, job, interval=5, timeout=60): return job_resource - def push_rows(self, dataset, table, rows, insert_id_key=None, + def push_rows(self, dataset, table, rows, project_id=None, insert_id_key=None, skip_invalid_rows=None, ignore_unknown_values=None, template_suffix=None): """Upload rows to BigQuery table. @@ -1244,6 +1324,8 @@ def push_rows(self, dataset, table, rows, insert_id_key=None, The dataset to upload to table : str The name of the table to insert rows into + project_id: str, optional + The project to upload to rows : list A ``list`` of rows (``dict`` objects) to add to the table insert_id_key : str, optional @@ -1292,8 +1374,9 @@ def push_rows(self, dataset, table, rows, insert_id_key=None, data['templateSuffix'] = template_suffix try: + project_id = self._get_project_id(project_id) response = table_data.insertAll( - projectId=self.project_id, + projectId=project_id, datasetId=dataset, tableId=table, body=data @@ -1325,19 +1408,21 @@ def push_rows(self, dataset, table, rows, insert_id_key=None, }] } - def get_all_tables(self, dataset_id): + def get_all_tables(self, dataset_id, project_id=None): """Retrieve a list of tables for the dataset. Parameters ---------- dataset_id : str The dataset to retrieve table data for. + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset Returns ------- A ``list`` with all table names """ - tables_data = self._get_all_tables_for_dataset(dataset_id) + tables_data = self._get_all_tables_for_dataset(dataset_id, project_id) tables = [] for table in tables_data.get('tables', []): @@ -1346,7 +1431,7 @@ def get_all_tables(self, dataset_id): tables.append(table_name) return tables - def _get_all_tables(self, dataset_id, cache=False): + def _get_all_tables(self, dataset_id, project_id=None, cache=False): """Retrieve the list of tables for dataset, that respect the formats: * appid_YYYY_MM * YYYY_MM_appid @@ -1355,6 +1440,8 @@ def _get_all_tables(self, dataset_id, cache=False): ---------- dataset_id : str The dataset to retrieve table names for + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset cache : bool, optional To use cached value or not (default False). Timeout value equals CACHE_TIMEOUT. @@ -1371,24 +1458,28 @@ def _get_all_tables(self, dataset_id, cache=False): do_fetch = False if do_fetch: - result = self._get_all_tables_for_dataset(dataset_id) + result = self._get_all_tables_for_dataset(dataset_id, project_id) self.cache[dataset_id] = (datetime.now(), result) return self._parse_table_list_response(result) - def _get_all_tables_for_dataset(self, dataset_id): + def _get_all_tables_for_dataset(self, dataset_id, project_id=None): """Retrieve a list of all tables for the dataset. Parameters ---------- dataset_id : str The dataset to retrieve table names for + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset Returns ------- dict A ``dict`` containing tables key with all tables """ + project_id = self._get_project_id(project_id) + result = self.bigquery.tables().list( projectId=self.project_id, datasetId=dataset_id).execute(num_retries=self.num_retries) @@ -1682,7 +1773,7 @@ def _raise_executing_exception_if_error(self, job): # # DataSet manipulation methods # - def create_dataset(self, dataset_id, friendly_name=None, description=None, + def create_dataset(self, dataset_id, project_id=None, friendly_name=None, description=None, access=None, location=None): """Create a new BigQuery dataset. @@ -1691,6 +1782,8 @@ def create_dataset(self, dataset_id, friendly_name=None, description=None, dataset_id : str Unique ``str`` identifying the dataset with the project (the referenceID of the dataset, not the integer id of the dataset) + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset friendly_name: str, optional A human readable name description: str, optional @@ -1708,15 +1801,19 @@ def create_dataset(self, dataset_id, friendly_name=None, description=None, ``bool`` indicating if dataset was created or not, or response from BigQuery if swallow_results is set for False """ - try: + project_id = self._get_project_id(project_id) + + try: datasets = self.bigquery.datasets() - dataset_data = self.dataset_resource(dataset_id, + dataset_data = self.dataset_resource(dataset_id, + project_id=project_id, friendly_name=friendly_name, description=description, access=access, - location=location) + location=location + ) - response = datasets.insert(projectId=self.project_id, + response = datasets.insert(projectId=project_id, body=dataset_data).execute( num_retries=self.num_retries) if self.swallow_results: @@ -1731,31 +1828,40 @@ def create_dataset(self, dataset_id, friendly_name=None, description=None, else: return {} - def get_datasets(self): + def get_datasets(self, project_id=None): """List all datasets in the project. + + Parameters + ---------- + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset Returns ------- list Dataset resources """ - try: + project_id = self._get_project_id(project_id) + + try: datasets = self.bigquery.datasets() - request = datasets.list(projectId=self.project_id) + request = datasets.list(projectId=project_id) result = request.execute(num_retries=self.num_retries) return result.get('datasets', []) except HttpError as e: logger.error("Cannot list datasets: {0}".format(e)) return None - def delete_dataset(self, dataset_id, delete_contents=False): + def delete_dataset(self, dataset_id, project_id=None, delete_contents=False): """Delete a BigQuery dataset. Parameters ---------- dataset_id : str - Unique ``str`` identifying the datset with the project (the + Unique ``str`` identifying the dataset with the project (the referenceId of the dataset) + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset delete_contents : bool, optional If True, forces the deletion of the dataset even when the dataset contains data (Default = False) @@ -1771,9 +1877,11 @@ def delete_dataset(self, dataset_id, delete_contents=False): HttpError 404 when dataset with dataset_id does not exist """ - try: + project_id = self._get_project_id(project_id) + + try: datasets = self.bigquery.datasets() - request = datasets.delete(projectId=self.project_id, + request = datasets.delete(projectId=project_id, datasetId=dataset_id, deleteContents=delete_contents) response = request.execute(num_retries=self.num_retries) @@ -1789,7 +1897,7 @@ def delete_dataset(self, dataset_id, delete_contents=False): else: return {} - def update_dataset(self, dataset_id, friendly_name=None, description=None, + def update_dataset(self, dataset_id, project_id=None, friendly_name=None, description=None, access=None): """Updates information in an existing dataset. The update method replaces the entire dataset resource, whereas the patch method only @@ -1800,6 +1908,8 @@ def update_dataset(self, dataset_id, friendly_name=None, description=None, dataset_id : str Unique ``str`` identifying the dataset with the project (the referencedId of the dataset) + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset friendly_name : str, optional An optional descriptive name for the dataset. description : str, optional @@ -1813,11 +1923,13 @@ def update_dataset(self, dataset_id, friendly_name=None, description=None, ``bool`` indicating if the update was successful or not, or response from BigQuery if swallow_results is set for False. """ - try: + project_id = self._get_project_id(project_id) + + try: datasets = self.bigquery.datasets() - body = self.dataset_resource(dataset_id, friendly_name, + body = self.dataset_resource(dataset_id, project_id, friendly_name, description, access) - request = datasets.update(projectId=self.project_id, + request = datasets.update(projectId=project_id, datasetId=dataset_id, body=body) response = request.execute(num_retries=self.num_retries) @@ -1833,7 +1945,7 @@ def update_dataset(self, dataset_id, friendly_name=None, description=None, else: return {} - def patch_dataset(self, dataset_id, friendly_name=None, description=None, + def patch_dataset(self, dataset_id, project_id=None, friendly_name=None, description=None, access=None): """Updates information in an existing dataset. The update method replaces the entire dataset resource, whereas the patch method only @@ -1844,6 +1956,8 @@ def patch_dataset(self, dataset_id, friendly_name=None, description=None, dataset_id : str Unique string idenfitying the dataset with the project (the referenceId of the dataset) + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset friendly_name : str, optional An optional descriptive name for the dataset. description : str, optional @@ -1857,11 +1971,13 @@ def patch_dataset(self, dataset_id, friendly_name=None, description=None, ``bool`` indicating if the patch was successful or not, or response from BigQuery if swallow_results is set for False. """ - try: + project_id = self._get_project_id(project_id) + + try: datasets = self.bigquery.datasets() - body = self.dataset_resource(dataset_id, friendly_name, + body = self.dataset_resource(dataset_id, project_id, friendly_name, description, access) - request = datasets.patch(projectId=self.project_id, + request = datasets.patch(projectId=project_id, datasetId=dataset_id, body=body) response = request.execute(num_retries=self.num_retries) if self.swallow_results: @@ -1875,7 +1991,7 @@ def patch_dataset(self, dataset_id, friendly_name=None, description=None, else: return {} - def dataset_resource(self, ref_id, friendly_name=None, description=None, + def dataset_resource(self, ref_id, project_id=None, friendly_name=None, description=None, access=None, location=None): """See https://developers.google.com/bigquery/docs/reference/v2/datasets#resource @@ -1884,6 +2000,8 @@ def dataset_resource(self, ref_id, friendly_name=None, description=None, ---------- ref_id : str Dataset id (the reference id, not the integer id) + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset friendly_name : str, optional An optional descriptive name for the dataset description : str, optional @@ -1898,10 +2016,11 @@ def dataset_resource(self, ref_id, friendly_name=None, description=None, dict Representing BigQuery dataset resource """ + project_id = self._get_project_id(project_id) data = { "datasetReference": { "datasetId": ref_id, - "projectId": self.project_id + "projectId": project_id } } if friendly_name: From 855be4e7c0fe4744072fb542a71e2d793263be3c Mon Sep 17 00:00:00 2001 From: Tuan Vu Date: Wed, 14 Mar 2018 21:44:08 -0700 Subject: [PATCH 18/31] update client and test_client to support a different project_id to run job --- bigquery/client.py | 155 ++++++++++++++++++---------------- bigquery/tests/test_client.py | 16 ++-- 2 files changed, 90 insertions(+), 81 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 0c6377e..b9d4e51 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -421,9 +421,9 @@ def get_table_schema(self, dataset, table, project_id=None): A ``list`` of ``dict`` objects that represent the table schema. If the table doesn't exist, None is returned. """ + project_id = self._get_project_id(project_id) - try: - project_id = self._get_project_id(project_id) + try: result = self.bigquery.tables().get( projectId=project_id, tableId=table, @@ -535,8 +535,9 @@ def get_dataset(self, dataset_id, project_id=None): dict Contains dataset object if it exists, else empty """ - try: - project_id = self._get_project_id(project_id) + project_id = self._get_project_id(project_id) + + try: dataset = self.bigquery.datasets().get( projectId=project_id, datasetId=dataset_id).execute( num_retries=self.num_retries) @@ -582,8 +583,8 @@ def get_table(self, dataset, table, project_id=None): dict Containing the table object if it exists, else empty """ - try: - project_id = self._get_project_id(project_id) + project_id = self._get_project_id(project_id) + try: table = self.bigquery.tables().get( projectId=project_id, datasetId=dataset, tableId=table).execute(num_retries=self.num_retries) @@ -592,8 +593,9 @@ def get_table(self, dataset, table, project_id=None): return table - def create_table(self, dataset, table, schema, project_id=None, - expiration_time=None, time_partitioning=False): + def create_table(self, dataset, table, schema, + expiration_time=None, time_partitioning=False, + project_id=None): """Create a new table in the dataset. Parameters @@ -603,13 +605,13 @@ def create_table(self, dataset, table, schema, project_id=None, table : str The name of the table to create schema : dict - The table schema - project_id: str, optional - The project to create the table in + The table schema expiration_time : int or double, optional The expiry time in milliseconds since the epoch. time_partitioning : bool, optional Create a time partitioning. + project_id: str, optional + The project to create the table in Returns ------- @@ -754,7 +756,7 @@ def patch_table(self, dataset, table, schema, project_id=None): else: return {} - def create_view(self, dataset, view, query, project_id=None, use_legacy_sql=None): + def create_view(self, dataset, view, query, use_legacy_sql=None, project_id=None): """Create a new view in the dataset. Parameters @@ -764,12 +766,12 @@ def create_view(self, dataset, view, query, project_id=None, use_legacy_sql=None view : str The name of the view to create query : dict - A query that BigQuery executes when the view is referenced. - project_id: str, optional - The project to create the view in + A query that BigQuery executes when the view is referenced. use_legacy_sql : bool, optional If False, the query will use BigQuery's standard SQL (https://cloud.google.com/bigquery/sql-reference/) + project_id: str, optional + The project to create the view in Returns ------- @@ -830,9 +832,9 @@ def delete_table(self, dataset, table, project_id=None): bool indicating if the table was successfully deleted or not, or response from BigQuery if swallow_results is set for False. """ + project_id = self._get_project_id(project_id) - try: - project_id = self._get_project_id(project_id) + try: response = self.bigquery.tables().delete( projectId=project_id, datasetId=dataset, @@ -890,8 +892,7 @@ def import_data_from_uris( source_uris, dataset, table, - schema=None, - project_id=None, + schema=None, job=None, source_format=None, create_disposition=None, @@ -904,6 +905,7 @@ def import_data_from_uris( field_delimiter=None, quote=None, skip_leading_rows=None, + project_id=None, ): """ Imports data into a BigQuery table from cloud storage. Optional @@ -921,9 +923,7 @@ def import_data_from_uris( table : str String id of the table schema : list, optional - Represents the BigQuery schema - project_id: str, optional - String id of the project + Represents the BigQuery schema job : str, optional Identifies the job (a unique job id is automatically generated if not provided) @@ -949,6 +949,8 @@ def import_data_from_uris( Quote character for csv only skip_leading_rows : int, optional For csv only + project_id: str, optional + String id of the project Returns ------- @@ -1051,13 +1053,13 @@ def export_data_to_uris( self, destination_uris, dataset, - table, - project_id=None, + table, job=None, compression=None, destination_format=None, print_header=None, field_delimiter=None, + project_id=None, ): """ Export data from a BigQuery table to cloud storage. Optional arguments @@ -1072,9 +1074,7 @@ def export_data_to_uris( dataset : str String id of the dataset table : str - String id of the table - project_id: str, optional - String id of the project + String id of the table job : str, optional String identifying the job (a unique jobid is automatically generated if not provided) @@ -1086,6 +1086,8 @@ def export_data_to_uris( Whether or not to print the header field_delimiter : str, optional Character separating fields in delimited file + project_id: str, optional + String id of the project Returns ------- @@ -1135,7 +1137,7 @@ def export_data_to_uris( "configuration": { 'extract': configuration }, - "jobReference": self._get_job_reference(job_id) + "jobReference": self._get_job_reference(job) } logger.info("Creating export job %s" % body) @@ -1147,8 +1149,7 @@ def write_to_table( self, query, dataset=None, - table=None, - project_id=None, + table=None, external_udf_uris=None, allow_large_results=None, use_query_cache=None, @@ -1157,7 +1158,8 @@ def write_to_table( write_disposition=None, use_legacy_sql=None, maximum_billing_tier=None, - flatten=None + flatten=None, + project_id=None, ): """ Write query result to table. If dataset or table is not provided, @@ -1172,9 +1174,7 @@ def write_to_table( dataset : str, optional String id of the dataset table : str, optional - String id of the table - project_id: str, optional - String id of the project + String id of the table external_udf_uris : list, optional Contains external UDF URIs. If given, URIs must be Google Cloud Storage and have .js extensions. @@ -1200,6 +1200,8 @@ def write_to_table( flatten : bool, optional Whether or not to flatten nested and repeated fields in query results + project_id: str, optional + String id of the project Returns ------- @@ -1313,9 +1315,9 @@ def wait_for_job(self, job, interval=5, timeout=60): return job_resource - def push_rows(self, dataset, table, rows, project_id=None, insert_id_key=None, + def push_rows(self, dataset, table, rows, insert_id_key=None, skip_invalid_rows=None, ignore_unknown_values=None, - template_suffix=None): + template_suffix=None, project_id=None): """Upload rows to BigQuery table. Parameters @@ -1323,9 +1325,7 @@ def push_rows(self, dataset, table, rows, project_id=None, insert_id_key=None, dataset : str The dataset to upload to table : str - The name of the table to insert rows into - project_id: str, optional - The project to upload to + The name of the table to insert rows into rows : list A ``list`` of rows (``dict`` objects) to add to the table insert_id_key : str, optional @@ -1338,6 +1338,8 @@ def push_rows(self, dataset, table, rows, project_id=None, insert_id_key=None, template_suffix : str, optional Inserts the rows into an {table}{template_suffix}. If table {table}{template_suffix} doesn't exist, create from {table}. + project_id: str, optional + The project to upload to Returns ------- @@ -1345,7 +1347,7 @@ def push_rows(self, dataset, table, rows, project_id=None, insert_id_key=None, bool indicating if insert succeeded or not, or response from BigQuery if swallow_results is set for False. """ - + project_id = self._get_project_id(project_id) table_data = self.bigquery.tabledata() rows_data = [] @@ -1373,8 +1375,7 @@ def push_rows(self, dataset, table, rows, project_id=None, insert_id_key=None, if template_suffix is not None: data['templateSuffix'] = template_suffix - try: - project_id = self._get_project_id(project_id) + try: response = table_data.insertAll( projectId=project_id, datasetId=dataset, @@ -1431,7 +1432,7 @@ def get_all_tables(self, dataset_id, project_id=None): tables.append(table_name) return tables - def _get_all_tables(self, dataset_id, project_id=None, cache=False): + def _get_all_tables(self, dataset_id, cache=False, project_id=None): """Retrieve the list of tables for dataset, that respect the formats: * appid_YYYY_MM * YYYY_MM_appid @@ -1439,12 +1440,12 @@ def _get_all_tables(self, dataset_id, project_id=None, cache=False): Parameters ---------- dataset_id : str - The dataset to retrieve table names for - project_id: str - Unique ``str`` identifying the BigQuery project contains the dataset + The dataset to retrieve table names for cache : bool, optional To use cached value or not (default False). Timeout value equals CACHE_TIMEOUT. + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset Returns ------- @@ -1773,17 +1774,15 @@ def _raise_executing_exception_if_error(self, job): # # DataSet manipulation methods # - def create_dataset(self, dataset_id, project_id=None, friendly_name=None, description=None, - access=None, location=None): + def create_dataset(self, dataset_id, friendly_name=None, description=None, + access=None, location=None, project_id=None): """Create a new BigQuery dataset. Parameters ---------- dataset_id : str Unique ``str`` identifying the dataset with the project (the - referenceID of the dataset, not the integer id of the dataset) - project_id: str - Unique ``str`` identifying the BigQuery project contains the dataset + referenceID of the dataset, not the integer id of the dataset) friendly_name: str, optional A human readable name description: str, optional @@ -1794,6 +1793,8 @@ def create_dataset(self, dataset_id, project_id=None, friendly_name=None, descri location : str, optional Indicating where dataset should be stored: EU or US (see https://developers.google.com/bigquery/docs/reference/v2/datasets#resource) + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset Returns ------- @@ -1852,19 +1853,19 @@ def get_datasets(self, project_id=None): logger.error("Cannot list datasets: {0}".format(e)) return None - def delete_dataset(self, dataset_id, project_id=None, delete_contents=False): + def delete_dataset(self, dataset_id, delete_contents=False, project_id=None): """Delete a BigQuery dataset. Parameters ---------- dataset_id : str Unique ``str`` identifying the dataset with the project (the - referenceId of the dataset) - project_id: str + referenceId of the dataset) Unique ``str`` identifying the BigQuery project contains the dataset delete_contents : bool, optional If True, forces the deletion of the dataset even when the dataset contains data (Default = False) + project_id: str, optional Returns ------- @@ -1897,8 +1898,8 @@ def delete_dataset(self, dataset_id, project_id=None, delete_contents=False): else: return {} - def update_dataset(self, dataset_id, project_id=None, friendly_name=None, description=None, - access=None): + def update_dataset(self, dataset_id, friendly_name=None, description=None, + access=None, project_id=None): """Updates information in an existing dataset. The update method replaces the entire dataset resource, whereas the patch method only replaces fields that are provided in the submitted dataset resource. @@ -1907,15 +1908,15 @@ def update_dataset(self, dataset_id, project_id=None, friendly_name=None, descri ---------- dataset_id : str Unique ``str`` identifying the dataset with the project (the - referencedId of the dataset) - project_id: str - Unique ``str`` identifying the BigQuery project contains the dataset + referencedId of the dataset) friendly_name : str, optional An optional descriptive name for the dataset. description : str, optional An optional description of the dataset. access : list, optional Indicating access permissions + project_id: str, optional + Unique ``str`` identifying the BigQuery project contains the dataset Returns ------- @@ -1927,8 +1928,12 @@ def update_dataset(self, dataset_id, project_id=None, friendly_name=None, descri try: datasets = self.bigquery.datasets() - body = self.dataset_resource(dataset_id, project_id, friendly_name, - description, access) + body = self.dataset_resource(dataset_id, + friendly_name=friendly_name, + description=description, + access=access, + project_id=project_id) + request = datasets.update(projectId=project_id, datasetId=dataset_id, body=body) @@ -1945,8 +1950,8 @@ def update_dataset(self, dataset_id, project_id=None, friendly_name=None, descri else: return {} - def patch_dataset(self, dataset_id, project_id=None, friendly_name=None, description=None, - access=None): + def patch_dataset(self, dataset_id, friendly_name=None, description=None, + access=None, project_id=None): """Updates information in an existing dataset. The update method replaces the entire dataset resource, whereas the patch method only replaces fields that are provided in the submitted dataset resource. @@ -1955,15 +1960,15 @@ def patch_dataset(self, dataset_id, project_id=None, friendly_name=None, descrip ---------- dataset_id : str Unique string idenfitying the dataset with the project (the - referenceId of the dataset) - project_id: str - Unique ``str`` identifying the BigQuery project contains the dataset + referenceId of the dataset) friendly_name : str, optional An optional descriptive name for the dataset. description : str, optional An optional description of the dataset. access : list, optional Indicating access permissions. + project_id: str, optional + Unique ``str`` identifying the BigQuery project contains the dataset Returns ------- @@ -1975,8 +1980,11 @@ def patch_dataset(self, dataset_id, project_id=None, friendly_name=None, descrip try: datasets = self.bigquery.datasets() - body = self.dataset_resource(dataset_id, project_id, friendly_name, - description, access) + body = self.dataset_resource(dataset_id, + friendly_name=friendly_name, + description=description, + access=access, + project_id=project_id) request = datasets.patch(projectId=project_id, datasetId=dataset_id, body=body) response = request.execute(num_retries=self.num_retries) @@ -1991,17 +1999,15 @@ def patch_dataset(self, dataset_id, project_id=None, friendly_name=None, descrip else: return {} - def dataset_resource(self, ref_id, project_id=None, friendly_name=None, description=None, - access=None, location=None): + def dataset_resource(self, ref_id, friendly_name=None, description=None, + access=None, location=None, project_id=None): """See https://developers.google.com/bigquery/docs/reference/v2/datasets#resource Parameters ---------- ref_id : str - Dataset id (the reference id, not the integer id) - project_id: str - Unique ``str`` identifying the BigQuery project contains the dataset + Dataset id (the reference id, not the integer id) friendly_name : str, optional An optional descriptive name for the dataset description : str, optional @@ -2010,6 +2016,8 @@ def dataset_resource(self, ref_id, project_id=None, friendly_name=None, descript Indicating access permissions location: str, optional, 'EU' or 'US' An optional geographical location for the dataset(EU or US) + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset Returns ------- @@ -2017,6 +2025,7 @@ def dataset_resource(self, ref_id, project_id=None, friendly_name=None, descript Representing BigQuery dataset resource """ project_id = self._get_project_id(project_id) + data = { "datasetReference": { "datasetId": ref_id, diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index a5e8161..b581830 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -2904,18 +2904,18 @@ def test_dataset_update_success(self): self.mock_datasets.update.return_value.execute.side_effect = [{ 'status': 'foo'}, {'status': 'bar'}] - actual = self.client.update_dataset(self.dataset, - self.friendly_name, - self.description, - self.access) + actual = self.client.update_dataset(self.dataset, + friendly_name=self.friendly_name, + description=self.description, + access=self.access) self.assertTrue(actual) self.client.swallow_results = False - actual = self.client.update_dataset(self.dataset, - self.friendly_name, - self.description, - self.access) + actual = self.client.update_dataset(self.dataset, + friendly_name=self.friendly_name, + description=self.description, + access=self.access) self.assertEqual(actual, {'status': 'bar'}) From 1617ad653e3c9bc41706b4512b12b6fb50132ac9 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Fri, 16 Mar 2018 20:27:55 -0500 Subject: [PATCH 19/31] Bump version --- bigquery/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/version.py b/bigquery/version.py index 84c54b7..e4f2ad4 100644 --- a/bigquery/version.py +++ b/bigquery/version.py @@ -1 +1 @@ -__version__ = '1.13.0' +__version__ = '1.14.0' From 700eb9dac5c6cf0bc4fd36078d083abcf828b0bd Mon Sep 17 00:00:00 2001 From: Juan Sandoval Date: Tue, 24 Apr 2018 12:11:06 -0500 Subject: [PATCH 20/31] Remove OAuth cache discovery from google client library. (#1) This fix the warning: ImportError: file_cache is unavailable when using oauth2client >= 4.0.0 And allow us to continue using the latest OAuth library versions --- bigquery/client.py | 9 +++++-- bigquery/tests/test_client.py | 45 +++++++++++++++++++++++++++-------- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index b9d4e51..d76cec7 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -175,8 +175,13 @@ def _get_bq_service(credentials=None, service_url=None): assert credentials, 'Must provide ServiceAccountCredentials' http = credentials.authorize(Http()) - service = build('bigquery', 'v2', http=http, - discoveryServiceUrl=service_url) + service = build( + 'bigquery', + 'v2', + http=http, + discoveryServiceUrl=service_url, + cache_discovery=False + ) return service diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index b581830..9af4f0c 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -67,8 +67,13 @@ def test_initialize_readonly(self, mock_build, mock_return_cred): scopes=BIGQUERY_SCOPE_READ_ONLY) self.assertTrue( mock_cred.from_p12_keyfile_buffer.return_value.authorize.called) - mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, - discoveryServiceUrl=mock_service_url) + mock_build.assert_called_once_with( + 'bigquery', + 'v2', + http=mock_http, + discoveryServiceUrl=mock_service_url, + cache_discovery=False + ) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) @@ -101,8 +106,13 @@ def test_initialize_read_write(self, mock_build, mock_return_cred): service_account, mock.ANY, scopes=BIGQUERY_SCOPE) self.assertTrue( mock_cred.from_p12_keyfile_buffer.return_value.authorize.called) - mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, - discoveryServiceUrl=mock_service_url) + mock_build.assert_called_once_with( + 'bigquery', + 'v2', + http=mock_http, + discoveryServiceUrl=mock_service_url, + cache_discovery=False + ) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) @@ -136,8 +146,13 @@ def test_initialize_key_file(self, mock_build, mock_return_cred): scopes=BIGQUERY_SCOPE) self.assertTrue( mock_cred.from_p12_keyfile.return_value.authorize.called) - mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, - discoveryServiceUrl=mock_service_url) + mock_build.assert_called_once_with( + 'bigquery', + 'v2', + http=mock_http, + discoveryServiceUrl=mock_service_url, + cache_discovery=False + ) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) @@ -172,8 +187,13 @@ def test_initialize_json_key_file(self, mock_open, mock_build, mock_return_cred) scopes=BIGQUERY_SCOPE) self.assertTrue( mock_cred.from_json_keyfile_dict.return_value.authorize.called) - mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, - discoveryServiceUrl=mock_service_url) + mock_build.assert_called_once_with( + 'bigquery', + 'v2', + http=mock_http, + discoveryServiceUrl=mock_service_url, + cache_discovery=False + ) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) @@ -208,8 +228,13 @@ def test_initialize_json_key_file_without_project_id(self, mock_open, mock_build scopes=BIGQUERY_SCOPE) self.assertTrue( mock_cred.from_json_keyfile_dict.return_value.authorize.called) - mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, - discoveryServiceUrl=mock_service_url) + mock_build.assert_called_once_with( + 'bigquery', + 'v2', + http=mock_http, + discoveryServiceUrl=mock_service_url, + cache_discovery=False + ) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(json_key['project_id'], bq_client.project_id) From da151c3b16bbd0a8bc757efe7221f40c8b1c6e61 Mon Sep 17 00:00:00 2001 From: rhoboro Date: Thu, 17 Jan 2019 12:19:09 +0900 Subject: [PATCH 21/31] fix get_all_tables with different project_id --- bigquery/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/client.py b/bigquery/client.py index d76cec7..6bfab16 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -1487,7 +1487,7 @@ def _get_all_tables_for_dataset(self, dataset_id, project_id=None): project_id = self._get_project_id(project_id) result = self.bigquery.tables().list( - projectId=self.project_id, + projectId=project_id, datasetId=dataset_id).execute(num_retries=self.num_retries) page_token = result.get('nextPageToken') From fb47d0459b93646e859464a2a2313e7a5e58a059 Mon Sep 17 00:00:00 2001 From: rhoboro Date: Thu, 17 Jan 2019 14:25:34 +0900 Subject: [PATCH 22/31] fix paging too --- bigquery/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/client.py b/bigquery/client.py index 6bfab16..537e23a 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -1493,7 +1493,7 @@ def _get_all_tables_for_dataset(self, dataset_id, project_id=None): page_token = result.get('nextPageToken') while page_token: res = self.bigquery.tables().list( - projectId=self.project_id, + projectId=project_id, datasetId=dataset_id, pageToken=page_token ).execute(num_retries=self.num_retries) From 8df1c772e93f6335f6a1e8b1db1997a8592f0951 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Wed, 16 Jan 2019 23:34:29 -0600 Subject: [PATCH 23/31] Bump version to 1.14.1 --- bigquery/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/version.py b/bigquery/version.py index e4f2ad4..c162747 100644 --- a/bigquery/version.py +++ b/bigquery/version.py @@ -1 +1 @@ -__version__ = '1.14.0' +__version__ = '1.14.1' From 24cc8c18822e1478920b3144186e8672c5f4dc22 Mon Sep 17 00:00:00 2001 From: sleepless-se Date: Thu, 14 Feb 2019 01:27:24 +0800 Subject: [PATCH 24/31] It was invalid json format. add a comma --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8171078..009f125 100644 --- a/README.md +++ b/README.md @@ -173,7 +173,7 @@ The client provides an API for inserting data into a BigQuery table. The last pa ```python # Insert data into table. rows = [ - {'one': 'ein', 'two': 'zwei'} + {'one': 'ein', 'two': 'zwei'}, {'id': 'NzAzYmRiY', 'one': 'uno', 'two': 'dos'}, {'id': 'NzAzYmRiY', 'one': 'ein', 'two': 'zwei'} # duplicate entry ] From 1491e1bdc0fb1b8a8fa8fd87255032c5834f10dc Mon Sep 17 00:00:00 2001 From: Yves Bastide Date: Tue, 30 Jul 2019 13:18:36 +0200 Subject: [PATCH 25/31] Fix client.patch_table tableId is a required argument of the patch method. Also, there's no need to pass a tableReference in the body. Signed-off-by: Yves Bastide --- bigquery/client.py | 6 +----- bigquery/tests/test_client.py | 9 ++++----- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 537e23a..125d048 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -735,17 +735,13 @@ def patch_table(self, dataset, table, schema, project_id=None): body = { 'schema': {'fields': schema}, - 'tableReference': { - 'tableId': table, - 'projectId': project_id, - 'datasetId': dataset - } } try: result = self.bigquery.tables().patch( projectId=project_id, datasetId=dataset, + tableId=table, body=body ).execute(num_retries=self.num_retries) if self.swallow_results: diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 9af4f0c..5d36aa9 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -1913,9 +1913,6 @@ def setUp(self): self.client = client.BigQueryClient(self.mock_bq_service, self.project) self.body = { 'schema': {'fields': self.schema}, - 'tableReference': { - 'tableId': self.table, 'projectId': self.project, - 'datasetId': self.dataset} } self.expiration_time = 1437513693000 @@ -1941,7 +1938,8 @@ def test_table_patch_failed(self): self.client.swallow_results = True self.mock_tables.patch.assert_called_with( - projectId=self.project, datasetId=self.dataset, body=self.body) + projectId=self.project, datasetId=self.dataset, + tableId=self.table, body=self.body) self.mock_tables.patch.return_value.execute. \ assert_called_with(num_retries=0) @@ -1968,7 +1966,8 @@ def test_table_patch_success(self): self.client.swallow_results = True self.mock_tables.patch.assert_called_with( - projectId=self.project, datasetId=self.dataset, body=self.body) + projectId=self.project, datasetId=self.dataset, + tableId=self.table, body=self.body) self.mock_tables.patch.return_value.execute. \ assert_called_with(num_retries=0) From 5cc95ba35913b68bcf19210534e41e708e7e8384 Mon Sep 17 00:00:00 2001 From: Yves Bastide Date: Tue, 30 Jul 2019 14:22:51 +0200 Subject: [PATCH 26/31] Fix Travis and tox envlist Replace python 3.3 and 3.4 with 3.5 and 3.6. Signed-off-by: Yves Bastide --- .travis.yml | 3 ++- tox.ini | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1e1c28c..ba3cdc8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,6 +10,7 @@ notifications: email: false env: - TOXENV=py27 - - TOXENV=py34 + - TOXENV=py35 + - TOXENV=py36 - TOXENV=nightly - TOXENV=pypy diff --git a/tox.ini b/tox.ini index ce76190..58dadc9 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ # and then run "tox" from this directory. [tox] -envlist = py27, py33, py34, nightly, pypy +envlist = py27, py35, py36, nightly, pypy [testenv] commands = nosetests --logging-level=ERROR -a slow --with-coverage --cover-package=bigquery From 8ebf84f6310b5bfd26de18b9dce50c7f37ff9b94 Mon Sep 17 00:00:00 2001 From: Ege U Date: Tue, 10 Dec 2019 16:14:06 +0300 Subject: [PATCH 27/31] Dry runs return bytes processed, and cache hit now --- bigquery/client.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 125d048..eedafc2 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -243,7 +243,7 @@ def _submit_query_job(self, query_data): ------- tuple job id and query results if query completed. If dry_run is True, - job id will be None and results will be empty if the query is valid + job id will be None and results will be [cacheHit and totalBytesProcessed] if the query is valid or a dict containing the response if invalid. Raises @@ -269,13 +269,17 @@ def _submit_query_job(self, query_data): schema = query_reply.get('schema', {'fields': None})['fields'] rows = query_reply.get('rows', []) job_complete = query_reply.get('jobComplete', False) + cache_hit = query_reply['cacheHit'] + total_bytes_processed = query_reply['totalBytesProcessed'] # raise exceptions if it's not an async query # and job is not completed after timeout if not job_complete and query_data.get("timeoutMs", False): logger.error('BigQuery job %s timeout' % job_id) raise BigQueryTimeoutException() - + + if query_data.get("dryRun", True): + return job_id, [cache_hit, total_bytes_processed] return job_id, [self._transform_row(row, schema) for row in rows] def _get_job_reference(self, job_id): @@ -345,8 +349,8 @@ def query(self, query, max_results=None, timeout=0, dry_run=False, use_legacy_sq How long to wait for the query to complete, in seconds before the request times out and returns. dry_run : bool, optional - If True, the query isn't actually run. A valid query will return an - empty response, while an invalid one will return the same error + If True, the query isn't actually run. A valid query will return + cache hit, and total bytes processed, while an invalid one will return the same error message it would if it wasn't a dry run. use_legacy_sql : bool, optional. Default True. If False, the query will use BigQuery's standard SQL (https://cloud.google.com/bigquery/sql-reference/) @@ -359,7 +363,7 @@ def query(self, query, max_results=None, timeout=0, dry_run=False, use_legacy_sq ------- tuple (job id, query results) if the query completed. If dry_run is True, - job id will be None and results will be empty if the query is valid + job id will be None and results will be [cacheHit and totalBytesProcessed] if the query is valid or a ``dict`` containing the response if invalid. Raises From 01f38be5947df8ae5a9936703181a8062c5fc48c Mon Sep 17 00:00:00 2001 From: Ege U Date: Tue, 10 Dec 2019 16:32:44 +0300 Subject: [PATCH 28/31] Rewrote the tests --- bigquery/tests/test_client.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 5d36aa9..1f2d247 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -297,7 +297,9 @@ def test_query(self): mock_query_job.execute.return_value = { 'jobReference': expected_job_ref, - 'jobComplete': True + 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -329,6 +331,8 @@ def test_query_max_results_set(self): mock_query_job.execute.return_value = { 'jobReference': expected_job_ref, 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -357,6 +361,8 @@ def test_query_timeout_set(self): mock_query_job.execute.return_value = { 'jobReference': expected_job_ref, 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -382,6 +388,8 @@ def test_sync_query_timeout(self): mock_query_job.execute.return_value = { 'jobReference': expected_job_ref, 'jobComplete': False, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -400,6 +408,8 @@ def test_async_query_timeout(self): mock_query_job.execute.return_value = { 'jobReference': expected_job_ref, 'jobComplete': False, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -409,14 +419,18 @@ def test_async_query_timeout(self): self.assertEquals(results, []) def test_query_dry_run_valid(self): - """Ensure that None and an empty list is returned from the query when + """Ensure that None and [cacheHit, totalBytesProcessed] is returned from the query when dry_run is True and the query is valid. """ mock_query_job = mock.Mock() - mock_query_job.execute.return_value = {'jobReference': {}, - 'jobComplete': True} + mock_query_job.execute.return_value = { + 'jobReference': {}, + 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 + } self.mock_job_collection.query.return_value = mock_query_job @@ -428,7 +442,7 @@ def test_query_dry_run_valid(self): 'dryRun': True} ) self.assertIsNone(job_id) - self.assertEqual([], results) + self.assertEqual([False, 0], results) def test_query_dry_run_invalid(self): """Ensure that None and a dict is returned from the query when dry_run @@ -468,6 +482,8 @@ def test_query_with_results(self): 'schema': {'fields': [{'name': 'foo', 'type': 'INTEGER'}]}, 'rows': [{'f': [{'v': 10}]}], 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -491,7 +507,9 @@ def test_query_with_using_legacy_sql(self): mock_query_job.execute.return_value = { 'jobReference': expected_job_ref, - 'jobComplete': True + 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -873,7 +891,7 @@ def test_json_job_body_constructed_correctly(self): body = { "jobReference": { "projectId": self.project_id, - "jobId": "job" + "jobId": "job", }, "configuration": { "load": { From 0d2c801745c48732f20c9002d2a6026995875540 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Tue, 10 Dec 2019 17:48:32 -0600 Subject: [PATCH 29/31] Bump version to 1.15.0 --- bigquery/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/version.py b/bigquery/version.py index c162747..1c19d78 100644 --- a/bigquery/version.py +++ b/bigquery/version.py @@ -1 +1 @@ -__version__ = '1.14.1' +__version__ = '1.15.0' From df42f83b637fbe4a70eac200ae05ea8a5f775316 Mon Sep 17 00:00:00 2001 From: Rahul Kumar Gupta <67097571+rahulshivan05@users.noreply.github.com> Date: Thu, 1 Oct 2020 09:06:03 +0530 Subject: [PATCH 30/31] Update requirements_dev.txt --- requirements_dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev.txt b/requirements_dev.txt index 74162c3..1040dea 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,6 +1,6 @@ nose rednose -mock==1.0.1 +mock==4.0.2 coverage nose-exclude tox From 77a7b1b1f3c3cbe50ce0db20b2ebc39012fbca78 Mon Sep 17 00:00:00 2001 From: Tim Gates Date: Wed, 24 Nov 2021 06:50:37 +1100 Subject: [PATCH 31/31] docs: fix simple typo, offical -> official There is a small typo in bigquery/client.py. Should read `official` rather than `offical`. --- bigquery/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index eedafc2..bb4d50a 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -227,7 +227,7 @@ def _submit_query_job(self, query_data): """ Submit a query job to BigQuery. This is similar to BigQueryClient.query, but gives the user - direct access to the query method on the offical BigQuery + direct access to the query method on the official BigQuery python client. For fine-grained control over a query job, see: @@ -306,7 +306,7 @@ def _get_job_reference(self, job_id): def _insert_job(self, body_object): """ Submit a job to BigQuery - Direct proxy to the insert() method of the offical BigQuery + Direct proxy to the insert() method of the official BigQuery python client. Able to submit load, link, query, copy, or extract jobs.