diff --git a/.travis.yml b/.travis.yml index eb0ba39..ba3cdc8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,12 +1,16 @@ language: python -python: - - "2.7" - +before_install: + - sudo apt-get update -q + - sudo apt-get install pypy -y install: - - pip install -r requirements.txt - - pip install -r requirements_dev.txt - -script: make test - + - python setup.py develop + - pip install tox +script: tox -e $TOXENV notifications: email: false +env: + - TOXENV=py27 + - TOXENV=py35 + - TOXENV=py36 + - TOXENV=nightly + - TOXENV=pypy diff --git a/README.md b/README.md index 0359d37..009f125 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,8 @@ Simple Python client for interacting with Google BigQuery. This client provides an API for retrieving and inserting BigQuery data by wrapping Google's low-level API client library. It also provides facilities that make it convenient to access data that is tied to an App Engine appspot, such as request logs. +[Documentation](http://tylertreat.github.io/BigQuery-Python/) + # Installation `pip install bigquery-python` @@ -26,6 +28,11 @@ key = 'key.pem' client = get_client(project_id, service_account=service_account, private_key_file=key, readonly=True) +# JSON key provided by Google +json_key = 'key.json' + +client = get_client(json_key_file=json_key, readonly=True) + # Submit an async query. job_id, _results = client.query('SELECT * FROM dataset.my_table LIMIT 1000') @@ -94,13 +101,33 @@ conditions = [ } ] +grouping = ['Timestamp'] + +having = [ + { + 'field': 'Timestamp', + 'type': 'INTEGER', + 'comparators': [ + { + 'condition': '==', + 'negate': False, + 'value': 1399478981 + } + ] + } +] + +order_by ={'fields': ['Timestamp'], 'direction': 'desc'} + query = render_query( 'dataset', ['table'], select=selects, conditions=conditions, - groupings=['Timestamp'], - order_by={'field': 'Timestamp', 'direction': 'desc'} + groupings=grouping, + having=having, + order_by=order_by, + limit=47 ) job_id, _ = client.query(query) @@ -108,7 +135,7 @@ job_id, _ = client.query(query) # Managing Tables -The BigQuery client provides facilities to manage dataset tables, including creating, deleting, and checking the existence of tables. +The BigQuery client provides facilities to manage dataset tables, including creating, deleting, checking the existence, and getting the metadata of tables. ```python # Create a new table. @@ -123,6 +150,10 @@ deleted = client.delete_table('dataset', 'my_table') # Check if a table exists. exists = client.check_table('dataset', 'my_table') + +# Get a table's full metadata. Includes numRows, numBytes, etc. +# See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables +metadata = client.get_table('dataset', 'my_table') ``` There is also functionality for retrieving tables that are associated with a Google App Engine appspot, assuming table names are in the form of appid_YYYY_MM or YYYY_MM_appid. This allows tables between a date range to be selected and queried on. @@ -142,7 +173,7 @@ The client provides an API for inserting data into a BigQuery table. The last pa ```python # Insert data into table. rows = [ - {'one': 'ein', 'two': 'zwei'} + {'one': 'ein', 'two': 'zwei'}, {'id': 'NzAzYmRiY', 'one': 'uno', 'two': 'dos'}, {'id': 'NzAzYmRiY', 'one': 'ein', 'two': 'zwei'} # duplicate entry ] @@ -163,6 +194,34 @@ try: except BigQueryTimeoutException: print "Timeout" +# write to permanent table with UDF in query string +external_udf_uris = ["gs://bigquery-sandbox-udf/url_decode.js"] +query = """SELECT requests, title + FROM + urlDecode( + SELECT + title, sum(requests) AS num_requests + FROM + [fh-bigquery:wikipedia.pagecounts_201504] + WHERE language = 'fr' + GROUP EACH BY title + ) + WHERE title LIKE '%รง%' + ORDER BY requests DESC + LIMIT 100 + """ +job = client.write_to_table( + query, + 'dataset', + 'table', + external_udf_uris=external_udf_uris +) + +try: + job_resource = client.wait_for_job(job, timeout=60) + print job_resource +except BigQueryTimeoutException: + print "Timeout" # write to temporary table job = client.write_to_table('SELECT * FROM dataset.original_table LIMIT 100') @@ -171,6 +230,8 @@ try: print job_resource except BigQueryTimeoutException: print "Timeout" + + ``` # Import data from Google cloud storage @@ -225,20 +286,18 @@ client.update_dataset('mydataset', friendly_name="mon Dataset") # description is # Patch dataset client.patch_dataset('mydataset', friendly_name="mon Dataset") # friendly_name changed; description is preserved + +# Check if dataset exists. +exists = client.check_dataset('mydataset') ``` # Creating a schema from a sample record ```python from bigquery import schema_from_record -schema_from_record({"id":123, "posts": [{"id":123, "text": "tihs is a post"}], "username": "bob"}) +schema_from_record({"id":123, "posts": [{"id":123, "text": "this is a post"}], "username": "bob"}) ``` -# Caveats - -BigQuery [flattens](https://developers.google.com/bigquery/docs/data?hl=ja#flatten) results with repeated records, so a result might actually map to multiple rows. This means that the row count may be larger than the actual number of results because BigQuery reports the number of unrolled rows but the returned results are rolled back up. - - # Contributing Requirements to commit here: diff --git a/bigquery/__init__.py b/bigquery/__init__.py index 2ae326f..b393875 100644 --- a/bigquery/__init__.py +++ b/bigquery/__init__.py @@ -1,5 +1,9 @@ -from client import get_client -from client import ( +from __future__ import absolute_import + +from .version import __version__ + +from .client import get_client +from .client import ( BIGQUERY_SCOPE, BIGQUERY_SCOPE_READ_ONLY, JOB_CREATE_IF_NEEDED, @@ -14,4 +18,4 @@ JOB_ENCODING_ISO_8859_1 ) -from schema_builder import schema_from_record +from .schema_builder import schema_from_record diff --git a/bigquery/client.py b/bigquery/client.py index 68afdc0..bb4d50a 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -1,24 +1,27 @@ import calendar +import json +from logging import getLogger, NullHandler from collections import defaultdict from datetime import datetime, timedelta -from time import sleep -from time import time from hashlib import sha256 -import json -import logging +from io import StringIO +from time import sleep, time +from functools import reduce -from apiclient.discovery import build -from apiclient.errors import HttpError -import httplib2 +import six +from bigquery.errors import (BigQueryTimeoutException, JobExecutingException, + JobInsertException, UnfinishedQueryException) +from googleapiclient.discovery import build, DISCOVERY_URI +from googleapiclient.errors import HttpError +from httplib2 import Http -from bigquery.schema_builder import schema_from_record -from bigquery.errors import ( - JobExecutingException, JobInsertException, - UnfinishedQueryException, BigQueryTimeoutException -) +BIGQUERY_SCOPE = [ + 'https://www.googleapis.com/auth/bigquery' +] -BIGQUERY_SCOPE = 'https://www.googleapis.com/auth/bigquery' -BIGQUERY_SCOPE_READ_ONLY = 'https://www.googleapis.com/auth/bigquery.readonly' +BIGQUERY_SCOPE_READ_ONLY = [ + 'https://www.googleapis.com/auth/bigquery.readonly' +] CACHE_TIMEOUT = timedelta(seconds=30) @@ -44,173 +47,399 @@ JOB_FORMAT_NEWLINE_DELIMITED_JSON JOB_DESTINATION_FORMAT_CSV = JOB_FORMAT_CSV +logger = getLogger(__name__) +logger.addHandler(NullHandler()) -def get_client(project_id, credentials=None, service_account=None, - private_key=None, private_key_file=None, readonly=True, - swallow_results=True): + +def get_client(project_id=None, credentials=None, + service_url=None, service_account=None, + private_key=None, private_key_file=None, + json_key=None, json_key_file=None, + readonly=True, swallow_results=True, + num_retries=0): """Return a singleton instance of BigQueryClient. Either AssertionCredentials or a service account and private key combination need to be provided in order to authenticate requests to BigQuery. - Args: - project_id: the BigQuery project id. - credentials: an AssertionCredentials instance to authenticate requests - to BigQuery. - service_account: the Google API service account name. - private_key: the private key associated with the service account in - PKCS12 or PEM format. - private_key_file: the name of the file containing the private key - associated with the service account in PKCS12 or PEM - format. - readonly: bool indicating if BigQuery access is read-only. Has no - effect if credentials are provided. - swallow_results: If set to false then return the actual response value - instead of converting to a boolean. - - Returns: - an instance of BigQueryClient. + Parameters + ---------- + project_id : str, optional + The BigQuery project id, required unless json_key or json_key_file is + provided. + credentials : oauth2client.client.SignedJwtAssertionCredentials, optional + AssertionCredentials instance to authenticate requests to BigQuery + (optional, must provide `service_account` and (`private_key` or + `private_key_file`) or (`json_key` or `json_key_file`) if not included + service_url : str, optional + A URI string template pointing to the location of Google's API + discovery service. Requires two parameters {api} and {apiVersion} that + when filled in produce an absolute URI to the discovery document for + that service. If not set then the default googleapiclient discovery URI + is used. See `credentials` + service_account : str, optional + The Google API service account name. See `credentials` + private_key : str, optional + The private key associated with the service account in PKCS12 or PEM + format. See `credentials` + private_key_file : str, optional + The name of the file containing the private key associated with the + service account in PKCS12 or PEM format. See `credentials` + json_key : dict, optional + The JSON key associated with the service account. See `credentials` + json_key_file : str, optional + The name of the JSON key file associated with the service account. See + `credentials`. + readonly : bool + Bool indicating if BigQuery access is read-only. Has no effect if + credentials are provided. Default True. + swallow_results : bool + If set to False, then return the actual response value instead of + converting to boolean. Default True. + num_retries : int, optional + The number of times to retry the request. Default 0 (no retry). + + + Returns + ------- + BigQueryClient + An instance of the BigQuery client. """ if not credentials: - assert service_account and (private_key or private_key_file), \ - 'Must provide AssertionCredentials or service account and key' + assert (service_account and (private_key or private_key_file)) or ( + json_key or json_key_file), \ + 'Must provide AssertionCredentials or service account and P12 key\ + or JSON key' + + if not project_id: + assert json_key or json_key_file, \ + 'Must provide project_id unless json_key or json_key_file is\ + provided' + + if service_url is None: + service_url = DISCOVERY_URI + + scope = BIGQUERY_SCOPE_READ_ONLY if readonly else BIGQUERY_SCOPE if private_key_file: - with open(private_key_file, 'rb') as key_file: - private_key = key_file.read() + credentials = _credentials().from_p12_keyfile(service_account, + private_key_file, + scopes=scope) + + if private_key: + try: + if isinstance(private_key, basestring): + private_key = private_key.decode('utf-8') + except NameError: + # python3 -- private_key is already unicode + pass + credentials = _credentials().from_p12_keyfile_buffer( + service_account, + StringIO(private_key), + scopes=scope) + + if json_key_file: + with open(json_key_file, 'r') as key_file: + json_key = json.load(key_file) + + if json_key: + credentials = _credentials().from_json_keyfile_dict(json_key, + scopes=scope) + if not project_id: + project_id = json_key['project_id'] bq_service = _get_bq_service(credentials=credentials, - service_account=service_account, - private_key=private_key, - readonly=readonly) + service_url=service_url) - return BigQueryClient(bq_service, project_id, swallow_results) + return BigQueryClient(bq_service, project_id, swallow_results, + num_retries) -def _get_bq_service(credentials=None, service_account=None, private_key=None, - readonly=True): - """Construct an authorized BigQuery service object.""" +def get_projects(bq_service): + """Given the BigQuery service, return data about all projects.""" + projects_request = bq_service.projects().list().execute() - assert credentials or (service_account and private_key), \ - 'Must provide AssertionCredentials or service account and key' + projects = [] + for project in projects_request.get('projects', []): + project_data = { + 'id': project['id'], + 'name': project['friendlyName'] + } + projects.append(project_data) + return projects - if not credentials: - scope = BIGQUERY_SCOPE_READ_ONLY if readonly else BIGQUERY_SCOPE - credentials = _credentials()(service_account, private_key, scope=scope) - http = httplib2.Http() - http = credentials.authorize(http) - service = build('bigquery', 'v2', http=http) +def _get_bq_service(credentials=None, service_url=None): + """Construct an authorized BigQuery service object.""" + + assert credentials, 'Must provide ServiceAccountCredentials' + + http = credentials.authorize(Http()) + service = build( + 'bigquery', + 'v2', + http=http, + discoveryServiceUrl=service_url, + cache_discovery=False + ) return service def _credentials(): """Import and return SignedJwtAssertionCredentials class""" - from oauth2client.client import SignedJwtAssertionCredentials + from oauth2client.service_account import ServiceAccountCredentials - return SignedJwtAssertionCredentials + return ServiceAccountCredentials class BigQueryClient(object): - def __init__(self, bq_service, project_id, swallow_results=True): + def __init__(self, bq_service, project_id, swallow_results=True, + num_retries=0): self.bigquery = bq_service self.project_id = project_id self.swallow_results = swallow_results + self.num_retries = num_retries self.cache = {} - def query(self, query, max_results=None, timeout=0, dry_run=False): - """Submit a query to BigQuery. + def _get_project_id(self, project_id=None): + """ Get new project_id + + Default is self.project_id, which is the project client authenticate to. + A new project_id is specified when client wants to authenticate to 1 project, + but run jobs in a different project. + + Parameters + ---------- + project_id : str + BigQuery project_id + + Returns + ------- + project_id: BigQuery project_id + """ + if project_id is None: + project_id = self.project_id + return project_id + + def _submit_query_job(self, query_data): + """ Submit a query job to BigQuery. + + This is similar to BigQueryClient.query, but gives the user + direct access to the query method on the official BigQuery + python client. - Args: - query: BigQuery query string. - max_results: maximum number of rows to return per page of results. - timeout: how long to wait for the query to complete, in seconds, - before the request times out and returns. - dry_run: if True, the query isn't actually run. A valid query will - return an empty response, while an invalid one will return - the same error message it would if it wasn't a dry run. + For fine-grained control over a query job, see: + https://google-api-client-libraries.appspot.com/documentation/bigquery/v2/python/latest/bigquery_v2.jobs.html#query - Returns: + Parameters + ---------- + query_data + query object as per "configuration.query" in + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query + + Returns + ------- + tuple job id and query results if query completed. If dry_run is True, - job id will be None and results will be empty if the query is valid + job id will be None and results will be [cacheHit and totalBytesProcessed] if the query is valid or a dict containing the response if invalid. - Raises: - BigQueryTimeoutException on timeout + Raises + ------ + BigQueryTimeoutException + On timeout """ - logging.debug('Executing query: %s' % query) + logger.debug('Submitting query job: %s' % query_data) job_collection = self.bigquery.jobs() - query_data = { - 'query': query, - 'timeoutMs': timeout * 1000, - 'dryRun': dry_run, - 'maxResults': max_results, - } try: query_reply = job_collection.query( - projectId=self.project_id, body=query_data).execute() + projectId=self.project_id, body=query_data).execute( + num_retries=self.num_retries) except HttpError as e: - if dry_run: - return None, json.loads(e.content) + if query_data.get("dryRun", False): + return None, json.loads(e.content.decode('utf8')) raise job_id = query_reply['jobReference'].get('jobId') schema = query_reply.get('schema', {'fields': None})['fields'] rows = query_reply.get('rows', []) job_complete = query_reply.get('jobComplete', False) + cache_hit = query_reply['cacheHit'] + total_bytes_processed = query_reply['totalBytesProcessed'] # raise exceptions if it's not an async query # and job is not completed after timeout - if not job_complete and timeout: - logging.error('BigQuery job %s timeout' % job_id) + if not job_complete and query_data.get("timeoutMs", False): + logger.error('BigQuery job %s timeout' % job_id) raise BigQueryTimeoutException() - + + if query_data.get("dryRun", True): + return job_id, [cache_hit, total_bytes_processed] return job_id, [self._transform_row(row, schema) for row in rows] + def _get_job_reference(self, job_id): + """ Get job reference from job_id + For more details, see: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#resource + + Parameters + ---------- + job_id: + Id of the job + + Returns + ------- + job_reference: json of job_reference + """ + job_reference = { + "projectId": self.project_id, + "jobId": job_id + } + + return job_reference + + def _insert_job(self, body_object): + """ Submit a job to BigQuery + + Direct proxy to the insert() method of the official BigQuery + python client. + + Able to submit load, link, query, copy, or extract jobs. + + For more details, see: + https://google-api-client-libraries.appspot.com/documentation/bigquery/v2/python/latest/bigquery_v2.jobs.html#insert + + Parameters + ---------- + body_object : body object passed to bigquery.jobs().insert() + + Returns + ------- + response of the bigquery.jobs().insert().execute() call + + Raises + ------ + BigQueryTimeoutException on timeout + """ + + logger.debug('Submitting job: %s' % body_object) + + job_collection = self.bigquery.jobs() + + return job_collection.insert( + projectId=self.project_id, + body=body_object + ).execute(num_retries=self.num_retries) + + def query(self, query, max_results=None, timeout=0, dry_run=False, use_legacy_sql=None, external_udf_uris=None): + """Submit a query to BigQuery. + + Parameters + ---------- + query : str + BigQuery query string + max_results : int, optional + The maximum number of rows to return per page of results. + timeout : float, optional + How long to wait for the query to complete, in seconds before + the request times out and returns. + dry_run : bool, optional + If True, the query isn't actually run. A valid query will return + cache hit, and total bytes processed, while an invalid one will return the same error + message it would if it wasn't a dry run. + use_legacy_sql : bool, optional. Default True. + If False, the query will use BigQuery's standard SQL (https://cloud.google.com/bigquery/sql-reference/) + external_udf_uris : list, optional + Contains external UDF URIs. If given, URIs must be Google Cloud + Storage and have .js extensions. + + + Returns + ------- + tuple + (job id, query results) if the query completed. If dry_run is True, + job id will be None and results will be [cacheHit and totalBytesProcessed] if the query is valid + or a ``dict`` containing the response if invalid. + + Raises + ------ + BigQueryTimeoutException + on timeout + """ + + logger.debug('Executing query: %s' % query) + + query_data = { + 'query': query, + 'timeoutMs': timeout * 1000, + 'dryRun': dry_run, + 'maxResults': max_results + } + + if use_legacy_sql is not None: + query_data['useLegacySql'] = use_legacy_sql + + if external_udf_uris: + query_data['userDefinedFunctionResources'] = \ + [ {'resourceUri': u} for u in external_udf_uris ] + + return self._submit_query_job(query_data) + def get_query_schema(self, job_id): """Retrieve the schema of a query by job id. - Args: - job_id: The job_id that references a BigQuery query. - Returns: - A list of dictionaries that represent the schema. + Parameters + ---------- + job_id : str + The job_id that references a BigQuery query + + Returns + ------- + list + A ``list`` of ``dict`` objects that represent the schema. """ - job_collection = self.bigquery.jobs() - query_reply = self._get_query_results( - job_collection, self.project_id, job_id, offset=0, limit=0) + query_reply = self.get_query_results(job_id, offset=0, limit=0) if not query_reply['jobComplete']: - logging.warning('BigQuery job %s not complete' % job_id) + logger.warning('BigQuery job %s not complete' % job_id) raise UnfinishedQueryException() return query_reply['schema']['fields'] - def get_table_schema(self, dataset, table): + def get_table_schema(self, dataset, table, project_id=None): """Return the table schema. - Args: - dataset: the dataset containing the table. - table: the table to get the schema for. - - Returns: - A list of dicts that represent the table schema. If the table - doesn't exist, None is returned. + Parameters + ---------- + dataset : str + The dataset containing the `table`. + table : str + The table to get the schema for + project_id: str, optional + The project of the dataset. + + Returns + ------- + list + A ``list`` of ``dict`` objects that represent the table schema. If + the table doesn't exist, None is returned. """ + project_id = self._get_project_id(project_id) - try: + try: result = self.bigquery.tables().get( - projectId=self.project_id, + projectId=project_id, tableId=table, - datasetId=dataset).execute() - except HttpError, e: + datasetId=dataset).execute(num_retries=self.num_retries) + except HttpError as e: if int(e.resp['status']) == 404: - logging.warn('Table %s.%s does not exist', dataset, table) + logger.warn('Table %s.%s does not exist', dataset, table) return None raise @@ -219,169 +448,437 @@ def get_table_schema(self, dataset, table): def check_job(self, job_id): """Return the state and number of results of a query by job id. - Args: - job_id: The job id of the query to check. - - Returns: - Whether or not the query has completed and the total number of rows - included in the query table if it has completed. + Parameters + ---------- + job_id : str + The job id of the query to check. + + Returns + ------- + tuple + (``bool``, ``int``) Whether or not the query has completed and the + total number of rows included in the query table if it has + completed (else 0) """ - job_collection = self.bigquery.jobs() - query_reply = self._get_query_results( - job_collection, self.project_id, job_id, offset=0, limit=0) + query_reply = self.get_query_results(job_id, offset=0, limit=0) return (query_reply.get('jobComplete', False), int(query_reply.get('totalRows', 0))) - def get_query_rows(self, job_id, offset=None, limit=None): + def get_query_rows(self, job_id, offset=None, limit=None, timeout=0): """Retrieve a list of rows from a query table by job id. - - Args: - job_id: The job id that references a BigQuery query. - offset: The offset of the rows to pull from BigQuery. - limit: The number of rows to retrieve from a query table. - - Returns: - A list of dictionaries that represent table rows. + This method will append results from multiple pages together. If you + want to manually page through results, you can use `get_query_results` + method directly. + + Parameters + ---------- + job_id : str + The job id that references a BigQuery query. + offset : int, optional + The offset of the rows to pull from BigQuery + limit : int, optional + The number of rows to retrieve from a query table. + timeout : float, optional + Timeout in seconds. + + Returns + ------- + list + A ``list`` of ``dict`` objects that represent table rows. """ - job_collection = self.bigquery.jobs() - query_reply = self._get_query_results( - job_collection, self.project_id, job_id, offset=offset, - limit=limit) - + # Get query results + query_reply = self.get_query_results(job_id, offset=offset, + limit=limit, timeout=timeout) if not query_reply['jobComplete']: - logging.warning('BigQuery job %s not complete' % job_id) + logger.warning('BigQuery job %s not complete' % job_id) raise UnfinishedQueryException() - schema = query_reply['schema']['fields'] + schema = query_reply["schema"]["fields"] rows = query_reply.get('rows', []) + page_token = query_reply.get("pageToken") + records = [self._transform_row(row, schema) for row in rows] + + # Append to records if there are multiple pages for query results + while page_token and (not limit or len(records) < limit): + query_reply = self.get_query_results( + job_id, offset=offset, limit=limit, page_token=page_token, + timeout=timeout) + page_token = query_reply.get("pageToken") + rows = query_reply.get('rows', []) + records += [self._transform_row(row, schema) for row in rows] + return records[:limit] if limit else records + + def check_dataset(self, dataset_id, project_id=None): + """Check to see if a dataset exists. + + Parameters + ---------- + dataset_id : str + Dataset unique id + project_id: str, optional + The project the dataset is in + + Returns + ------- + bool + True if dataset at `dataset_id` exists, else Fasle + """ + dataset = self.get_dataset(dataset_id, project_id) + return bool(dataset) + + def get_dataset(self, dataset_id, project_id=None): + """Retrieve a dataset if it exists, otherwise return an empty dict. + + Parameters + ---------- + dataset_id : str + Dataset unique id + project_id: str, optional + The project the dataset is in + + Returns + ------- + dict + Contains dataset object if it exists, else empty + """ + project_id = self._get_project_id(project_id) - return [self._transform_row(row, schema) for row in rows] + try: + dataset = self.bigquery.datasets().get( + projectId=project_id, datasetId=dataset_id).execute( + num_retries=self.num_retries) + except HttpError: + dataset = {} - def check_table(self, dataset, table): - """Check to see if a table exists. + return dataset - Args: - dataset: the dataset to check. - table: the name of the table. + def check_table(self, dataset, table, project_id=None): + """Check to see if a table exists. - Returns: - bool indicating if the table exists. + Parameters + ---------- + dataset : str + The dataset to check + table : str + The name of the table + project_id: str, optional + The project the table is in + + Returns + ------- + bool + True if table exists, else False """ - table = self.get_table(dataset, table) + table = self.get_table(dataset, table, project_id) return bool(table) - def get_table(self, dataset, table): + def get_table(self, dataset, table, project_id=None): + """ Retrieve a table if it exists, otherwise return an empty dict. + + Parameters + ---------- + dataset : str + The dataset that the table is in + table : str + The name of the table + project_id: str, optional + The project that the table is in + + Returns + ------- + dict + Containing the table object if it exists, else empty """ - Retrieve a table if it exists, otherwise return an empty dict. - - Args: - dataset: the dataset that the table is in - table: the name of the table - - Returns: - dictionary containing the table object if it exists, otherwise - an empty dictionary - """ - try: + project_id = self._get_project_id(project_id) + try: table = self.bigquery.tables().get( - projectId=self.project_id, datasetId=dataset, - tableId=table).execute() + projectId=project_id, datasetId=dataset, + tableId=table).execute(num_retries=self.num_retries) except HttpError: table = {} return table - def create_table(self, dataset, table, schema): + def create_table(self, dataset, table, schema, + expiration_time=None, time_partitioning=False, + project_id=None): """Create a new table in the dataset. - Args: - dataset: the dataset to create the table in. - table: the name of table to create. - schema: table schema dict. - - Returns: - bool indicating if the table was successfully created or not, - or response from BigQuery if swallow_results is set for False. + Parameters + ---------- + dataset : str + The dataset to create the table in + table : str + The name of the table to create + schema : dict + The table schema + expiration_time : int or double, optional + The expiry time in milliseconds since the epoch. + time_partitioning : bool, optional + Create a time partitioning. + project_id: str, optional + The project to create the table in + + Returns + ------- + Union[bool, dict] + If the table was successfully created, or response from BigQuery + if swallow_results is set to False """ + project_id = self._get_project_id(project_id) body = { 'schema': {'fields': schema}, 'tableReference': { 'tableId': table, - 'projectId': self.project_id, + 'projectId': project_id, 'datasetId': dataset } } + if expiration_time is not None: + body['expirationTime'] = expiration_time + + if time_partitioning: + body['timePartitioning'] = {'type': 'DAY'} + try: table = self.bigquery.tables().insert( - projectId=self.project_id, + projectId=project_id, datasetId=dataset, body=body - ).execute() + ).execute(num_retries=self.num_retries) if self.swallow_results: return True else: return table except HttpError as e: - logging.error(('Cannot create table {0}.{1}\n' - 'Http Error: {2}').format(dataset, table, - e.content)) + logger.error(('Cannot create table {0}.{1}.{2}\n' + 'Http Error: {3}').format(project_id, dataset, table, e.content)) if self.swallow_results: return False else: return {} - def delete_table(self, dataset, table): - """Delete a table from the dataset. + def update_table(self, dataset, table, schema, project_id=None): + """Update an existing table in the dataset. + + Parameters + ---------- + dataset : str + The dataset to update the table in + table : str + The name of the table to update + schema : dict + Table schema + project_id: str, optional + The project to update the table in + + Returns + ------- + Union[bool, dict] + bool indicating if the table was successfully updated or not, + or response from BigQuery if swallow_results is set to False. + """ + project_id = self._get_project_id(project_id) + + body = { + 'schema': {'fields': schema}, + 'tableReference': { + 'tableId': table, + 'projectId': project_id, + 'datasetId': dataset + } + } + + try: + result = self.bigquery.tables().update( + projectId=project_id, + tableId= table, + datasetId=dataset, + body=body + ).execute(num_retries=self.num_retries) + if self.swallow_results: + return True + else: + return result + + except HttpError as e: + logger.error(('Cannot update table {0}.{1}.{2}\n' + 'Http Error: {3}').format(project_id, dataset, table, e.content)) + if self.swallow_results: + return False + else: + return {} + + def patch_table(self, dataset, table, schema, project_id=None): + """Patch an existing table in the dataset. + + Parameters + ---------- + dataset : str + The dataset to patch the table in + table : str + The name of the table to patch + schema : dict + The table schema + project_id: str, optional + The project to patch the table in + + Returns + ------- + Union[bool, dict] + Bool indicating if the table was successfully patched or not, + or response from BigQuery if swallow_results is set to False + """ + project_id = self._get_project_id(project_id) + + body = { + 'schema': {'fields': schema}, + } + + try: + result = self.bigquery.tables().patch( + projectId=project_id, + datasetId=dataset, + tableId=table, + body=body + ).execute(num_retries=self.num_retries) + if self.swallow_results: + return True + else: + return result + + except HttpError as e: + logger.error(('Cannot patch table {0}.{1}.{2}\n' + 'Http Error: {3}').format(project_id, dataset, table, e.content)) + if self.swallow_results: + return False + else: + return {} + + def create_view(self, dataset, view, query, use_legacy_sql=None, project_id=None): + """Create a new view in the dataset. + + Parameters + ---------- + dataset : str + The dataset to create the view in + view : str + The name of the view to create + query : dict + A query that BigQuery executes when the view is referenced. + use_legacy_sql : bool, optional + If False, the query will use BigQuery's standard SQL + (https://cloud.google.com/bigquery/sql-reference/) + project_id: str, optional + The project to create the view in + + Returns + ------- + Union[bool, dict] + bool indicating if the view was successfully created or not, + or response from BigQuery if swallow_results is set to False. + """ + project_id = self._get_project_id(project_id) + + body = { + 'tableReference': { + 'tableId': view, + 'projectId': project_id, + 'datasetId': dataset + }, + 'view': { + 'query': query + } + } + + if use_legacy_sql is not None: + body['view']['useLegacySql'] = use_legacy_sql + + try: + view = self.bigquery.tables().insert( + projectId=project_id, + datasetId=dataset, + body=body + ).execute(num_retries=self.num_retries) + if self.swallow_results: + return True + else: + return view - Args: - dataset: the dataset to delete the table from. - table: the name of the table to delete. + except HttpError as e: + logger.error(('Cannot create view {0}.{1}\n' + 'Http Error: {2}').format(dataset, view, e.content)) + if self.swallow_results: + return False + else: + return {} - Returns: + def delete_table(self, dataset, table, project_id=None): + """Delete a table from the dataset. + + Parameters + ---------- + dataset : str + The dataset to delete the table from. + table : str + The name of the table to delete + project_id: str, optional + String id of the project + + Returns + ------- + Union[bool, dict] bool indicating if the table was successfully deleted or not, or response from BigQuery if swallow_results is set for False. """ + project_id = self._get_project_id(project_id) - try: + try: response = self.bigquery.tables().delete( - projectId=self.project_id, + projectId=project_id, datasetId=dataset, tableId=table - ).execute() + ).execute(num_retries=self.num_retries) if self.swallow_results: return True else: return response except HttpError as e: - logging.error(('Cannot delete table {0}.{1}\n' - 'Http Error: {2}').format(dataset, table, - e.content)) + logger.error(('Cannot delete table {0}.{1}\n' + 'Http Error: {2}').format(dataset, table, e.content)) if self.swallow_results: return False else: return {} - def get_tables(self, dataset_id, app_id, start_time, end_time): + def get_tables(self, dataset_id, app_id, start_time, end_time, project_id=None): """Retrieve a list of tables that are related to the given app id and are inside the range of start and end times. - Args: - dataset_id: The BigQuery dataset id to consider. - app_id: The appspot name - start_time: The datetime or unix time after which records will be - fetched. - end_time: The datetime or unix time up to which records will be - fetched. - - Returns: - A list of table names. + Parameters + ---------- + dataset_id : str + The BigQuery dataset id to consider. + app_id : str + The appspot name + start_time : Union[datetime, int] + The datetime or unix time after which records will be fetched. + end_time : Union[datetime, int] + The datetime or unix time up to which records will be fetched. + project_id: str, optional + String id of the project + + Returns + ------- + list + A ``list`` of table names. """ if isinstance(start_time, datetime): @@ -390,7 +887,7 @@ def get_tables(self, dataset_id, app_id, start_time, end_time): if isinstance(end_time, datetime): end_time = calendar.timegm(end_time.utctimetuple()) - every_table = self._get_all_tables(dataset_id) + every_table = self._get_all_tables(dataset_id, project_id) app_tables = every_table.get(app_id, {}) return self._filter_tables_by_time(app_tables, start_time, end_time) @@ -400,7 +897,7 @@ def import_data_from_uris( source_uris, dataset, table, - schema=None, + schema=None, job=None, source_format=None, create_disposition=None, @@ -413,49 +910,71 @@ def import_data_from_uris( field_delimiter=None, quote=None, skip_leading_rows=None, + project_id=None, ): """ - Imports data into a BigQuery table from cloud storage. - Args: - source_uris: required string or list of strings representing - the uris on cloud storage of the form: - gs://bucket/filename - dataset: required string id of the dataset - table: required string id of the table - job: optional string identifying the job (a unique jobid - is automatically generated if not provided) - schema: optional list representing the bigquery schema - source_format: optional string - (one of the JOB_SOURCE_FORMAT_* constants) - create_disposition: optional string - (one of the JOB_CREATE_* constants) - write_disposition: optional string - (one of the JOB_WRITE_* constants) - encoding: optional string default - (one of the JOB_ENCODING_* constants) - ignore_unknown_values: optional boolean - max_bad_records: optional boolean - allow_jagged_rows: optional boolean for csv only - allow_quoted_newlines: optional boolean for csv only - field_delimiter: optional string for csv only - quote: optional string the quote character for csv only - skip_leading_rows: optional int for csv only - - Optional arguments with value None are determined by - BigQuery as described: + Imports data into a BigQuery table from cloud storage. Optional + arguments that are not specified are determined by BigQuery as + described: https://developers.google.com/bigquery/docs/reference/v2/jobs - Returns: - dict, a BigQuery job resource - Raises: - JobInsertException on http/auth failures or error in result + Parameters + ---------- + source_urls : list + A ``list`` of ``str`` objects representing the urls on cloud + storage of the form: gs://bucket/filename + dataset : str + String id of the dataset + table : str + String id of the table + schema : list, optional + Represents the BigQuery schema + job : str, optional + Identifies the job (a unique job id is automatically generated if + not provided) + source_format : str, optional + One of the JOB_SOURCE_FORMAT_* constants + create_disposition : str, optional + One of the JOB_CREATE_* constants + write_disposition : str, optional + One of the JOB_WRITE_* constants + encoding : str, optional + One of the JOB_ENCODING_* constants + ignore_unknown_values : bool, optional + Whether or not to ignore unknown values + max_bad_records : int, optional + Maximum number of bad records + allow_jagged_rows : bool, optional + For csv only + allow_quoted_newlines : bool, optional + For csv only + field_delimiter : str, optional + For csv only + quote : str, optional + Quote character for csv only + skip_leading_rows : int, optional + For csv only + project_id: str, optional + String id of the project + + Returns + ------- + dict + A BigQuery job response + + Raises + ------ + JobInsertException + on http/auth failures or error in result """ source_uris = source_uris if isinstance(source_uris, list) \ else [source_uris] + project_id = self._get_project_id(project_id) + configuration = { "destinationTable": { - "projectId": self.project_id, + "projectId": project_id, "tableId": table, "datasetId": dataset }, @@ -515,7 +1034,7 @@ def import_data_from_uris( skip_leading_rows=skip_leading_rows, quote=quote) non_null_values = dict((k, v) for k, v - in all_values.items() + in list(all_values.items()) if v) raise Exception("Parameters field_delimiter, allow_jagged_rows, " "allow_quoted_newlines, quote and " @@ -527,16 +1046,11 @@ def import_data_from_uris( "configuration": { 'load': configuration }, - "jobReference": { - "projectId": self.project_id, - "jobId": job - } + "jobReference": self._get_job_reference(job) } - logging.debug("Creating load job %s" % body) - job_resource = self.bigquery.jobs() \ - .insert(projectId=self.project_id, body=body) \ - .execute() + logger.debug("Creating load job %s" % body) + job_resource = self._insert_job(body) self._raise_insert_exception_if_error(job_resource) return job_resource @@ -544,45 +1058,60 @@ def export_data_to_uris( self, destination_uris, dataset, - table, + table, job=None, compression=None, destination_format=None, print_header=None, field_delimiter=None, + project_id=None, ): """ - Export data from a BigQuery table to cloud storage. - Args: - destination_uris: required string or list of strings representing - the uris on cloud storage of the form: - gs://bucket/filename - dataset: required string id of the dataset - table: required string id of the table - job: optional string identifying the job (a unique jobid - is automatically generated if not provided) - compression: optional string - (one of the JOB_COMPRESSION_* constants) - destination_format: optional string - (one of the JOB_DESTINATION_FORMAT_* constants) - print_header: optional boolean - field_delimiter: optional string - - Optional arguments with value None are determined by - BigQuery as described: - https://developers.google.com/bigquery/docs/reference/v2/jobs - - Returns: - dict, a BigQuery job resource - Raises: - JobInsertException on http/auth failures or error in result + Export data from a BigQuery table to cloud storage. Optional arguments + that are not specified are determined by BigQuery as described: + https://developers.google.com/bigquery/docs/reference/v2/jobs + + Parameters + ---------- + destination_uris : Union[str, list] + ``str`` or ``list`` of ``str`` objects representing the URIs on + cloud storage of the form: gs://bucket/filename + dataset : str + String id of the dataset + table : str + String id of the table + job : str, optional + String identifying the job (a unique jobid is automatically + generated if not provided) + compression : str, optional + One of the JOB_COMPRESSION_* constants + destination_format : str, optional + One of the JOB_DESTination_FORMAT_* constants + print_header : bool, optional + Whether or not to print the header + field_delimiter : str, optional + Character separating fields in delimited file + project_id: str, optional + String id of the project + + Returns + ------- + dict + A BigQuery job resource + + Raises + ------ + JobInsertException + On http/auth failures or error in result """ destination_uris = destination_uris \ if isinstance(destination_uris, list) else [destination_uris] + project_id = self._get_project_id(project_id) + configuration = { "sourceTable": { - "projectId": self.project_id, + "projectId": project_id, "tableId": table, "datasetId": dataset }, @@ -613,16 +1142,11 @@ def export_data_to_uris( "configuration": { 'extract': configuration }, - "jobReference": { - "projectId": self.project_id, - "jobId": job - } + "jobReference": self._get_job_reference(job) } - logging.info("Creating export job %s" % body) - job_resource = self.bigquery.jobs() \ - .insert(projectId=self.project_id, body=body) \ - .execute() + logger.info("Creating export job %s" % body) + job_resource = self._insert_job(body) self._raise_insert_exception_if_error(job_resource) return job_resource @@ -630,46 +1154,80 @@ def write_to_table( self, query, dataset=None, - table=None, + table=None, + external_udf_uris=None, allow_large_results=None, use_query_cache=None, priority=None, create_disposition=None, write_disposition=None, + use_legacy_sql=None, + maximum_billing_tier=None, + flatten=None, + project_id=None, ): """ Write query result to table. If dataset or table is not provided, - Bigquery will write the result to temporary table. - Args: - query: required BigQuery query string. - dataset: optional string id of the dataset - table: optional string id of the table - allow_large_results: optional boolean - use_query_cache: optional boolean - priority: optional string - (one of the JOB_PRIORITY_* constants) - create_disposition: optional string - (one of the JOB_CREATE_* constants) - write_disposition: optional string - (one of the JOB_WRITE_* constants) - - Optional arguments with value None are determined by - BigQuery as described: - https://developers.google.com/bigquery/docs/reference/v2/jobs - - Returns: - dict, a BigQuery job resource - Raises: - JobInsertException on http/auth failures or error in result + Bigquery will write the result to temporary table. Optional arguments + that are not specified are determined by BigQuery as described: + https://developers.google.com/bigquery/docs/reference/v2/jobs + + Parameters + ---------- + query : str + BigQuery query string + dataset : str, optional + String id of the dataset + table : str, optional + String id of the table + external_udf_uris : list, optional + Contains external UDF URIs. If given, URIs must be Google Cloud + Storage and have .js extensions. + allow_large_results : bool, optional + Whether or not to allow large results + use_query_cache : bool, optional + Whether or not to use query cache + priority : str, optional + One of the JOB_PRIORITY_* constants + create_disposition : str, optional + One of the JOB_CREATE_* constants + write_disposition : str, optional + One of the JOB_WRITE_* constants + use_legacy_sql: bool, optional + If False, the query will use BigQuery's standard SQL + (https://cloud.google.com/bigquery/sql-reference/) + maximum_billing_tier : integer, optional + Limits the billing tier for this job. Queries that have resource + usage beyond this tier will fail (without incurring a charge). If + unspecified, this will be set to your project default. For more + information, + see https://cloud.google.com/bigquery/pricing#high-compute + flatten : bool, optional + Whether or not to flatten nested and repeated fields + in query results + project_id: str, optional + String id of the project + + Returns + ------- + dict + A BigQuery job resource + + Raises + ------ + JobInsertException + On http/auth failures or error in result """ configuration = { "query": query, } + project_id = self._get_project_id(project_id) + if dataset and table: configuration['destinationTable'] = { - "projectId": self.project_id, + "projectId": project_id, "tableId": table, "datasetId": dataset } @@ -677,9 +1235,18 @@ def write_to_table( if allow_large_results is not None: configuration['allowLargeResults'] = allow_large_results + if flatten is not None: + configuration['flattenResults'] = flatten + + if maximum_billing_tier is not None: + configuration['maximumBillingTier'] = maximum_billing_tier + if use_query_cache is not None: configuration['useQueryCache'] = use_query_cache + if use_legacy_sql is not None: + configuration['useLegacySql'] = use_legacy_sql + if priority: configuration['priority'] = priority @@ -689,36 +1256,50 @@ def write_to_table( if write_disposition: configuration['writeDisposition'] = write_disposition + if external_udf_uris: + configuration['userDefinedFunctionResources'] = \ + [ {'resourceUri': u} for u in external_udf_uris ] + body = { "configuration": { 'query': configuration } } - logging.info("Creating write to table job %s" % body) - job_resource = self.bigquery.jobs() \ - .insert(projectId=self.project_id, body=body) \ - .execute() + logger.info("Creating write to table job %s" % body) + job_resource = self._insert_job(body) self._raise_insert_exception_if_error(job_resource) return job_resource def wait_for_job(self, job, interval=5, timeout=60): """ Waits until the job indicated by job_resource is done or has failed - Args: - job: dict, representing a BigQuery job resource - interval: optional float polling interval in seconds, default = 5 - timeout: optional float timeout in seconds, default = 60 - Returns: - dict, final state of the job_resource, as described here: - https://developers.google.com/resources/api-libraries/documentation - /bigquery/v2/python/latest/bigquery_v2.jobs.html#get - Raises: - JobExecutingException on http/auth failures or error in result - BigQueryTimeoutException on timeout + + Parameters + ---------- + job : Union[dict, str] + ``dict`` representing a BigQuery job resource, or a ``str`` + representing the BigQuery job id + interval : float, optional + Polling interval in seconds, default = 5 + timeout : float, optional + Timeout in seconds, default = 60 + + Returns + ------- + dict + Final state of the job resouce, as described here: + https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/python/latest/bigquery_v2.jobs.html#get + + Raises + ------ + Union[JobExecutingException, BigQueryTimeoutException] + On http/auth failures or timeout """ complete = False - job_id = job['jobReference']['jobId'] + job_id = str(job if isinstance(job, + (six.binary_type, six.text_type, int)) + else job['jobReference']['jobId']) job_resource = None start_time = time() @@ -727,40 +1308,62 @@ def wait_for_job(self, job, interval=5, timeout=60): sleep(interval) request = self.bigquery.jobs().get(projectId=self.project_id, jobId=job_id) - job_resource = request.execute() + job_resource = request.execute(num_retries=self.num_retries) self._raise_executing_exception_if_error(job_resource) complete = job_resource.get('status').get('state') == u'DONE' elapsed_time = time() - start_time # raise exceptions if timeout if not complete: - logging.error('BigQuery job %s timeout' % job_id) + logger.error('BigQuery job %s timeout' % job_id) raise BigQueryTimeoutException() return job_resource - def push_rows(self, dataset, table, rows, insert_id_key=None): + def push_rows(self, dataset, table, rows, insert_id_key=None, + skip_invalid_rows=None, ignore_unknown_values=None, + template_suffix=None, project_id=None): """Upload rows to BigQuery table. - Args: - dataset: the dataset to upload to. - table: the name of the table to insert rows into. - rows: list of rows to add to table - insert_id_key: key for insertId in row - - Returns: + Parameters + ---------- + dataset : str + The dataset to upload to + table : str + The name of the table to insert rows into + rows : list + A ``list`` of rows (``dict`` objects) to add to the table + insert_id_key : str, optional + Key for insertId in row. + You can use dot separated key for nested column. + skip_invalid_rows : bool, optional + Insert all valid rows of a request, even if invalid rows exist. + ignore_unknown_values : bool, optional + Accept rows that contain values that do not match the schema. + template_suffix : str, optional + Inserts the rows into an {table}{template_suffix}. + If table {table}{template_suffix} doesn't exist, create from {table}. + project_id: str, optional + The project to upload to + + Returns + ------- + Union[bool, dict] bool indicating if insert succeeded or not, or response from BigQuery if swallow_results is set for False. """ - + project_id = self._get_project_id(project_id) table_data = self.bigquery.tabledata() rows_data = [] for row in rows: each_row = {} each_row["json"] = row - if insert_id_key in row: - each_row["insertId"] = row[insert_id_key] + if insert_id_key is not None: + keys = insert_id_key.split('.') + val = reduce(lambda d, key: d.get(key) if d else None, keys, row) + if val is not None: + each_row["insertId"] = val rows_data.append(each_row) data = { @@ -768,16 +1371,25 @@ def push_rows(self, dataset, table, rows, insert_id_key=None): "rows": rows_data } - try: + if skip_invalid_rows is not None: + data['skipInvalidRows'] = skip_invalid_rows + + if ignore_unknown_values is not None: + data['ignoreUnknownValues'] = ignore_unknown_values + + if template_suffix is not None: + data['templateSuffix'] = template_suffix + + try: response = table_data.insertAll( - projectId=self.project_id, + projectId=project_id, datasetId=dataset, tableId=table, body=data - ).execute() + ).execute(num_retries=self.num_retries) if response.get('insertErrors'): - logging.error('BigQuery insert errors: %s' % response) + logger.error('BigQuery insert errors: %s' % response) if self.swallow_results: return False else: @@ -789,7 +1401,7 @@ def push_rows(self, dataset, table, rows, insert_id_key=None): return response except HttpError as e: - logging.exception('Problem with BigQuery insertAll') + logger.exception('Problem with BigQuery insertAll') if self.swallow_results: return False else: @@ -802,15 +1414,48 @@ def push_rows(self, dataset, table, rows, insert_id_key=None): }] } - def _get_all_tables(self, dataset_id, cache=False): - """Retrieve a list of all tables for the dataset. + def get_all_tables(self, dataset_id, project_id=None): + """Retrieve a list of tables for the dataset. + + Parameters + ---------- + dataset_id : str + The dataset to retrieve table data for. + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset - Args: - dataset_id: the dataset to retrieve table names for. - cache: To use cached value or not. Timeout value - equals CACHE_TIMEOUT. - Returns: - a dictionary of app ids mapped to their table names. + Returns + ------- + A ``list`` with all table names + """ + tables_data = self._get_all_tables_for_dataset(dataset_id, project_id) + + tables = [] + for table in tables_data.get('tables', []): + table_name = table.get('tableReference', {}).get('tableId') + if table_name: + tables.append(table_name) + return tables + + def _get_all_tables(self, dataset_id, cache=False, project_id=None): + """Retrieve the list of tables for dataset, that respect the formats: + * appid_YYYY_MM + * YYYY_MM_appid + + Parameters + ---------- + dataset_id : str + The dataset to retrieve table names for + cache : bool, optional + To use cached value or not (default False). Timeout value equals + CACHE_TIMEOUT. + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset + + Returns + ------- + dict + A ``dict`` of app ids mapped to their table names """ do_fetch = True if cache and self.cache.get(dataset_id): @@ -819,32 +1464,55 @@ def _get_all_tables(self, dataset_id, cache=False): do_fetch = False if do_fetch: - result = self.bigquery.tables().list( - projectId=self.project_id, - datasetId=dataset_id).execute() - - page_token = result.get('nextPageToken') - while page_token: - res = self.bigquery.tables().list( - projectId=self.project_id, - datasetId=dataset_id, - pageToken=page_token - ).execute() - page_token = res.get('nextPageToken') - result['tables'] += res.get('tables', []) + result = self._get_all_tables_for_dataset(dataset_id, project_id) self.cache[dataset_id] = (datetime.now(), result) return self._parse_table_list_response(result) + def _get_all_tables_for_dataset(self, dataset_id, project_id=None): + """Retrieve a list of all tables for the dataset. + + Parameters + ---------- + dataset_id : str + The dataset to retrieve table names for + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset + + Returns + ------- + dict + A ``dict`` containing tables key with all tables + """ + project_id = self._get_project_id(project_id) + + result = self.bigquery.tables().list( + projectId=project_id, + datasetId=dataset_id).execute(num_retries=self.num_retries) + + page_token = result.get('nextPageToken') + while page_token: + res = self.bigquery.tables().list( + projectId=project_id, + datasetId=dataset_id, + pageToken=page_token + ).execute(num_retries=self.num_retries) + page_token = res.get('nextPageToken') + result['tables'] += res.get('tables', []) + return result + def _parse_table_list_response(self, list_response): """Parse the response received from calling list on tables. - Args: - list_response: The response found by calling list on a BigQuery - table object. + Parameters + ---------- + list_response + The response found by calling list on a BigQuery table object. - Returns: - The dictionary of dates referenced by table names. + Returns + ------- + dict + Dates referenced by table names """ tables = defaultdict(dict) @@ -875,12 +1543,18 @@ def _parse_table_name(self, table_id): """Parse a table name in the form of appid_YYYY_MM or YYYY_MM_appid and return a tuple consisting of YYYY-MM and the app id. - Args: - table_id: The table id as listed by BigQuery. + Returns (None, None) in the event of a name like _YYYYMMDD_ - Returns: - Tuple containing year/month and app id. Returns None, None if the - table id cannot be parsed. + Parameters + ---------- + table_id : str + The table id as listed by BigQuery + + Returns + ------- + tuple + (year/month, app id), or (None, None) if the table id cannot be + parsed. """ # Prefix date @@ -898,9 +1572,10 @@ def _parse_table_name(self, table_id): year_month = "-".join(attributes[-2:]) app_id = "-".join(attributes[:-2]) + # Check if date parsed correctly if year_month.count("-") == 1 and all( - [num.isdigit() for num in year_month.split('-')]): + [num.isdigit() for num in year_month.split('-')]) and len(year_month) == 7: return year_month, app_id return None, None @@ -909,27 +1584,39 @@ def _filter_tables_by_time(self, tables, start_time, end_time): """Filter a table dictionary and return table names based on the range of start and end times in unix seconds. - Args: - tables: The dictionary of dates referenced by table names - start_time: The unix time after which records will be fetched. - end_time: The unix time up to which records will be fetched. - - Returns: - A list of table names that are inside the time range. + Parameters + ---------- + tables : dict + Dates referenced by table names + start_time : int + The unix time after which records will be fetched + end_time : int + The unix time up to which records will be fetched + + Returns + ------- + list + Table names that are inside the time range """ - return [table_name for (table_name, unix_seconds) in tables.iteritems() + return [table_name for (table_name, unix_seconds) in tables.items() if self._in_range(start_time, end_time, unix_seconds)] def _in_range(self, start_time, end_time, time): """Indicate if the given time falls inside of the given range. - Args: - start_time: The unix time for the start of the range. - end_time: The unix time for the end of the range. - time: The unix time to check. - - Returns: + Parameters + ---------- + start_time : int + The unix time for the start of the range + end_time : int + The unix time for the end of the range + time : int + The unix time to check + + Returns + ------- + bool True if the time falls within the range, False otherwise. """ @@ -939,39 +1626,56 @@ def _in_range(self, start_time, end_time, time): time <= start_time <= time + ONE_MONTH or \ time <= end_time <= time + ONE_MONTH - def _get_query_results(self, job_collection, project_id, job_id, - offset=None, limit=None): - """Execute the query job indicated by the given job id. - - Args: - job_collection: The collection the job belongs to. - project_id: The project id of the table. - job_id: The job id of the query to check. - offset: The index the result set should start at. - limit: The maximum number of results to retrieve. - - Returns: - The query reply. + def get_query_results(self, job_id, offset=None, limit=None, + page_token=None, timeout=0): + """Execute the query job indicated by the given job id. This is direct + mapping to bigquery api + https://cloud.google.com/bigquery/docs/reference/v2/jobs/getQueryResults + + Parameters + ---------- + job_id : str + The job id of the query to check + offset : optional + The index the result set should start at. + limit : int, optional + The maximum number of results to retrieve. + page_token : optional + Page token, returned by previous call, to request the next page of + results. + timeout : float, optional + Timeout in seconds + + Returns + ------- + out + The query reply """ + job_collection = self.bigquery.jobs() return job_collection.getQueryResults( - projectId=project_id, + projectId=self.project_id, jobId=job_id, startIndex=offset, maxResults=limit, - timeoutMs=0).execute() + pageToken=page_token, + timeoutMs=timeout * 1000).execute(num_retries=self.num_retries) def _transform_row(self, row, schema): """Apply the given schema to the given BigQuery data row. - Args: - row: A single BigQuery row to transform. - schema: The BigQuery table schema to apply to the row, specifically - the list of field dicts. - - Returns: - Dict containing keys that match the schema and values that match - the row. + Parameters + ---------- + row + A single BigQuery row to transform + schema : list + The BigQuery table schema to apply to the row, specifically + the list of field dicts. + + Returns + ------- + dict + Mapping schema to row """ log = {} @@ -999,6 +1703,9 @@ def _transform_row(self, row, schema): elif col_dict['type'] == 'BOOLEAN': row_value = row_value in ('True', 'true', 'TRUE') + elif col_dict['type'] == 'TIMESTAMP': + row_value = float(row_value) + log[col_name] = row_value return log @@ -1007,12 +1714,16 @@ def _recurse_on_row(self, col_dict, nested_value): """Apply the schema specified by the given dict to the nested value by recursing on it. - Args: - col_dict: A dict containing the schema to apply to the nested - value. - nested_value: A value nested in a BigQuery row. - Returns: - Dict or list of dicts from applied schema. + Parameters + ---------- + col_dict : dict + The schema to apply to the nested value. + nested_value : A value nested in a BigQuery row. + + Returns + ------- + Union[dict, list] + ``dict`` or ``list`` of ``dict`` objects from applied schema. """ row_value = None @@ -1031,12 +1742,17 @@ def _recurse_on_row(self, col_dict, nested_value): def _generate_hex_for_uris(self, uris): """Given uris, generate and return hex version of it - Args: - uris: A list containing all uris - Returns: - string of hexed uris + Parameters + ---------- + uris : list + Containing all uris + + Returns + ------- + str + Hexed uris """ - return sha256(":".join(uris) + str(time())).hexdigest() + return sha256((":".join(uris) + str(time())).encode()).hexdigest() def _raise_insert_exception_if_error(self, job): error_http = job.get('error') @@ -1064,182 +1780,261 @@ def _raise_executing_exception_if_error(self, job): # DataSet manipulation methods # def create_dataset(self, dataset_id, friendly_name=None, description=None, - access=None): + access=None, location=None, project_id=None): """Create a new BigQuery dataset. - Args: - dataset_id: required unique string identifying the dataset with the - project (the referenceId of the dataset, not the - integer id of the dataset) - friendly_name: optional string providing a human readable name - description: optional longer string providing a description - access: optional object indicating access permissions (see - https://developers.google.com/bigquery/docs/reference/v2/ - datasets#resource) - - Returns: - bool indicating if dataset was created or not, or response + Parameters + ---------- + dataset_id : str + Unique ``str`` identifying the dataset with the project (the + referenceID of the dataset, not the integer id of the dataset) + friendly_name: str, optional + A human readable name + description: str, optional + Longer string providing a description + access : list, optional + Indicating access permissions (see + https://developers.google.com/bigquery/docs/reference/v2/datasets#resource) + location : str, optional + Indicating where dataset should be stored: EU or US (see + https://developers.google.com/bigquery/docs/reference/v2/datasets#resource) + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset + + Returns + ------- + Union[bool, dict] + ``bool`` indicating if dataset was created or not, or response from BigQuery if swallow_results is set for False """ - try: + project_id = self._get_project_id(project_id) + + try: datasets = self.bigquery.datasets() - dataset_data = self.dataset_resource(dataset_id, + dataset_data = self.dataset_resource(dataset_id, + project_id=project_id, friendly_name=friendly_name, description=description, - access=access) + access=access, + location=location + ) - response = datasets.insert(projectId=self.project_id, - body=dataset_data).execute() + response = datasets.insert(projectId=project_id, + body=dataset_data).execute( + num_retries=self.num_retries) if self.swallow_results: return True else: return response except HttpError as e: - logging.error('Cannot create dataset {0}, {1}'.format(dataset_id, - e)) + logger.error( + 'Cannot create dataset {0}, {1}'.format(dataset_id, e)) if self.swallow_results: return False else: return {} - def get_datasets(self): + def get_datasets(self, project_id=None): """List all datasets in the project. - - Returns: - a list of dataset resources + + Parameters + ---------- + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset + + Returns + ------- + list + Dataset resources """ - try: + project_id = self._get_project_id(project_id) + + try: datasets = self.bigquery.datasets() - request = datasets.list(projectId=self.project_id) - result = request.execute() + request = datasets.list(projectId=project_id) + result = request.execute(num_retries=self.num_retries) return result.get('datasets', []) except HttpError as e: - logging.error("Cannot list datasets: {0}".format(e)) + logger.error("Cannot list datasets: {0}".format(e)) return None - def delete_dataset(self, dataset_id, delete_contents=False): + def delete_dataset(self, dataset_id, delete_contents=False, project_id=None): """Delete a BigQuery dataset. - Args: - dataset_id: required unique string identifying the dataset with the - project (the referenceId of the dataset) - delete_contents: forces deletion of the dataset even when the - dataset contains data - Returns: - bool indicating if the delete was successful or not, or response + Parameters + ---------- + dataset_id : str + Unique ``str`` identifying the dataset with the project (the + referenceId of the dataset) + Unique ``str`` identifying the BigQuery project contains the dataset + delete_contents : bool, optional + If True, forces the deletion of the dataset even when the dataset + contains data (Default = False) + project_id: str, optional + + Returns + ------- + Union[bool, dict[ + ool indicating if the delete was successful or not, or response from BigQuery if swallow_results is set for False - Raises: - HttpError 404 when dataset with dataset_id does not exist + Raises + ------- + HttpError + 404 when dataset with dataset_id does not exist """ - try: + project_id = self._get_project_id(project_id) + + try: datasets = self.bigquery.datasets() - request = datasets.delete(projectId=self.project_id, + request = datasets.delete(projectId=project_id, datasetId=dataset_id, deleteContents=delete_contents) - response = request.execute() + response = request.execute(num_retries=self.num_retries) if self.swallow_results: return True else: return response except HttpError as e: - logging.error('Cannot delete dataset {0}: {1}'.format(dataset_id, - e)) + logger.error( + 'Cannot delete dataset {0}: {1}'.format(dataset_id, e)) if self.swallow_results: return False else: return {} def update_dataset(self, dataset_id, friendly_name=None, description=None, - access=None): + access=None, project_id=None): """Updates information in an existing dataset. The update method replaces the entire dataset resource, whereas the patch method only replaces fields that are provided in the submitted dataset resource. - Args: - dataset_id: required unique string identifying the dataset with the - project (the referenceId of the dataset). - friendly_name: an optional descriptive name for the dataset. - description: an optional description of the dataset. - access: an optional object indicating access permissions. - - Returns: - bool indicating if the update was successful or not, or response - from BigQuery if swallow_results is set for False. + Parameters + ---------- + dataset_id : str + Unique ``str`` identifying the dataset with the project (the + referencedId of the dataset) + friendly_name : str, optional + An optional descriptive name for the dataset. + description : str, optional + An optional description of the dataset. + access : list, optional + Indicating access permissions + project_id: str, optional + Unique ``str`` identifying the BigQuery project contains the dataset + + Returns + ------- + Union[bool, dict] + ``bool`` indicating if the update was successful or not, or + response from BigQuery if swallow_results is set for False. """ - try: + project_id = self._get_project_id(project_id) + + try: datasets = self.bigquery.datasets() - body = self.dataset_resource(dataset_id, friendly_name, - description, access) - request = datasets.update(projectId=self.project_id, + body = self.dataset_resource(dataset_id, + friendly_name=friendly_name, + description=description, + access=access, + project_id=project_id) + + request = datasets.update(projectId=project_id, datasetId=dataset_id, body=body) - response = request.execute() + response = request.execute(num_retries=self.num_retries) if self.swallow_results: return True else: return response except HttpError as e: - logging.error('Cannot update dataset {0}: {1}'.format(dataset_id, - e)) + logger.error( + 'Cannot update dataset {0}: {1}'.format(dataset_id, e)) if self.swallow_results: return False else: return {} def patch_dataset(self, dataset_id, friendly_name=None, description=None, - access=None): + access=None, project_id=None): """Updates information in an existing dataset. The update method replaces the entire dataset resource, whereas the patch method only replaces fields that are provided in the submitted dataset resource. - Args: - dataset_id: required unique string identifying the dataset with the - projedct (the referenceId of the dataset). - friendly_name: an optional descriptive name for the dataset. - description: an optional description of the dataset. - access: an optional object indicating access permissions. - Returns: - bool indicating if the patch was successful or not, or response + Parameters + ---------- + dataset_id : str + Unique string idenfitying the dataset with the project (the + referenceId of the dataset) + friendly_name : str, optional + An optional descriptive name for the dataset. + description : str, optional + An optional description of the dataset. + access : list, optional + Indicating access permissions. + project_id: str, optional + Unique ``str`` identifying the BigQuery project contains the dataset + + Returns + ------- + Union[bool, dict] + ``bool`` indicating if the patch was successful or not, or response from BigQuery if swallow_results is set for False. """ - try: + project_id = self._get_project_id(project_id) + + try: datasets = self.bigquery.datasets() - body = self.dataset_resource(dataset_id, friendly_name, - description, access) - request = datasets.patch(projectId=self.project_id, + body = self.dataset_resource(dataset_id, + friendly_name=friendly_name, + description=description, + access=access, + project_id=project_id) + request = datasets.patch(projectId=project_id, datasetId=dataset_id, body=body) - response = request.execute() + response = request.execute(num_retries=self.num_retries) if self.swallow_results: return True else: return response except HttpError as e: - logging.error('Cannot patch dataset {0}: {1}'.format(dataset_id, - e)) + logger.error('Cannot patch dataset {0}: {1}'.format(dataset_id, e)) if self.swallow_results: return False else: return {} def dataset_resource(self, ref_id, friendly_name=None, description=None, - access=None): - """See https://developers.google.com/bigquery/docs/reference/v2/ - datasets#resource - - Args: - ref_id: string dataset id (the reference id, not the integer id) - friendly_name: opt string - description: opt string - access: opt list - - Returns: - a dictionary representing a BigQuery dataset resource + access=None, location=None, project_id=None): + """See + https://developers.google.com/bigquery/docs/reference/v2/datasets#resource + + Parameters + ---------- + ref_id : str + Dataset id (the reference id, not the integer id) + friendly_name : str, optional + An optional descriptive name for the dataset + description : str, optional + An optional description for the dataset + access : list, optional + Indicating access permissions + location: str, optional, 'EU' or 'US' + An optional geographical location for the dataset(EU or US) + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset + + Returns + ------- + dict + Representing BigQuery dataset resource """ + project_id = self._get_project_id(project_id) + data = { "datasetReference": { "datasetId": ref_id, - "projectId": self.project_id + "projectId": project_id } } if friendly_name: @@ -1248,6 +2043,8 @@ def dataset_resource(self, ref_id, friendly_name=None, description=None, data["description"] = description if access: data["access"] = access + if location: + data["location"] = location return data @@ -1256,18 +2053,27 @@ def schema_from_record(cls, record): """Given a dict representing a record instance to be inserted into BigQuery, calculate the schema. - Args: - record: dict representing a record to be inserted into big query, - where all keys are strings (representing column names in - the record) and all values are of type int, str, unicode, - float,bool, timestamp or dict. A dict value represents a - record, and must conform to the same restrictions as record - - Returns: - a list representing a BigQuery schema - - Note: results are undefined if a different value types are provided for - a repeated field: E.g. - { rfield: [ { x: 1}, {x: "a string"} ] } # undefined! + Parameters + ---------- + record : dict + representing a record to be inserted into big query, + where all keys are ``str`` objects (representing column names in + the record) and all values are of type ``int``, ``str``, + ``unicode``, ``float``, ``bool``, ``datetime``, or ``dict``. A + ``dict`` value represents a record, and must conform to the same + restrictions as record. + + Returns + ------- + list + BigQuery schema + + Notes + ----- + Results are undefined if a different value type is provided for a + repeated field: E.g. + + >>> { rfield: [ { x: 1}, {x: "a string"} ] } # undefined! """ + from bigquery.schema_builder import schema_from_record return schema_from_record(record) diff --git a/bigquery/query_builder.py b/bigquery/query_builder.py index 942f78e..435bb73 100644 --- a/bigquery/query_builder.py +++ b/bigquery/query_builder.py @@ -1,57 +1,59 @@ -import logging +from logging import getLogger, NullHandler + +logger = getLogger(__name__) +logger.addHandler(NullHandler()) def render_query(dataset, tables, select=None, conditions=None, - groupings=None, order_by=None): + groupings=None, having=None, order_by=None, limit=None): """Render a query that will run over the given tables using the specified parameters. - Args: - dataset: the BigQuery data set to query data from. - tables: the tables in dataset to query. - select: a dictionary of selections for a table. The keys function as - column names and the values function as options to apply to - the select field such as alias and format. For example, - { - 'start_time': { - 'alias': 'StartTime', - 'format': 'INTEGER-FORMAT_UTC_USEC' - } - } - is represented as 'SEC_TO_TIMESTAMP(INTEGER(start_time)) as - StartTime' in a query. Pass None to select all. - conditions: a list of dicts to filter results by. - Each dict should be formatted as the following: - { - 'field': 'foo', - 'type': 'FLOAT', - 'comparators': [ - { - 'condition': '>=', - 'negate': False, - 'value': '1' - } - ] - } - which is rendered as 'foo >= FLOAT('1')' in the query. - groupings: a list of field names to group by. - order_by: a dict with two keys, field and direction. - Such that the dictionary should be formatted as - {'field':'TimeStamp, 'direction':'desc'}. - - Returns: - a query string. + Parameters + ---------- + dataset : str + The BigQuery dataset to query data from + tables : Union[dict, list] + The table in `dataset` to query. + select : dict, optional + The keys function as column names and the values function as options to + apply to the select field such as alias and format. For example, + select['start_time'] might have the form + {'alias': 'StartTime', 'format': 'INTEGER-FORMAT_UTC_USEC'}, which + would be represented as 'SEC_TO_TIMESTAMP(INTEGER(start_time)) as + StartTime' in a query. Pass `None` to select all. + conditions : list, optional + a ``list`` of ``dict`` objects to filter results by. Each dict should + have the keys 'field', 'type', and 'comparators'. The first two map to + strings representing the field (e.g. 'foo') and type (e.g. 'FLOAT'). + 'comparators' maps to another ``dict`` containing the keys 'condition', + 'negate', and 'value'. + If 'comparators' = {'condition': '>=', 'negate': False, 'value': 1}, + this example will be rendered as 'foo >= FLOAT('1')' in the query. + ``list`` of field names to group by + order_by : dict, optional + Keys = {'field', 'direction'}. `dict` should be formatted as + {'field':'TimeStamp, 'direction':'desc'} or similar + limit : int, optional + Limit the amount of data needed to be returned. + + Returns + ------- + str + A rendered query """ if None in (dataset, tables): return None - query = "%s %s %s %s %s" % ( + query = "%s %s %s %s %s %s %s" % ( _render_select(select), _render_sources(dataset, tables), _render_conditions(conditions), _render_groupings(groupings), + _render_having(having), _render_order(order_by), + _render_limit(limit) ) return query @@ -60,24 +62,26 @@ def render_query(dataset, tables, select=None, conditions=None, def _render_select(selections): """Render the selection part of a query. - Args: - selections: a dictionary of selections for a table. The - keys function as column names and the values function as - options to apply to the select field such as alias and format. - For example {'start_time': {'alias': 'StartTime', 'format': - 'INTEGER-FORMAT_UTC_USEC'}} is represented as - 'SEC_TO_TIMESTAMP(INTEGER(start_time))' in a query. Pass None to - select all. - - Returns: - a string that represents the select part of a query. + Parameters + ---------- + selections : dict + Selections for a table + + Returns + ------- + str + A string for the "select" part of a query + + See Also + -------- + render_query : Further clarification of `selections` dict formatting """ if not selections: return 'SELECT *' rendered_selections = [] - for name, options in selections.iteritems(): + for name, options in selections.items(): if not isinstance(options, list): options = [options] @@ -99,15 +103,20 @@ def _render_select(selections): def _format_select(formatter, name): """Modify the query selector by applying any formatters to it. - Args: - formatter: hyphen-delimited formatter string where formatters are - applied inside-out, e.g. the formatter string - SEC_TO_MICRO-INTEGER-FORMAT_UTC_USEC applied to the selector - foo would result in FORMAT_UTC_USEC(INTEGER(foo*1000000)). - name: the name of the selector to apply formatters to. - - Returns: - formatted selector. + Parameters + ---------- + formatter : str + Hyphen-delimited formatter string where formatters are + applied inside-out, e.g. the formatter string + SEC_TO_MICRO-INTEGER-FORMAT_UTC_USEC applied to the selector + foo would result in FORMAT_UTC_USEC(INTEGER(foo*1000000)). + name: str + The name of the selector to apply formatters to. + + Returns + ------- + str + The formatted selector """ for caster in formatter.split('-'): @@ -125,30 +134,52 @@ def _format_select(formatter, name): def _render_sources(dataset, tables): """Render the source part of a query. - Args: - dataset: the data set to fetch log data from. - tables: the tables to fetch log data from. - - Returns: - a string that represents the from part of a query. + Parameters + ---------- + dataset : str + The data set to fetch log data from. + tables : Union[dict, list] + The tables to fetch log data from + + Returns + ------- + str + A string that represents the "from" part of a query. """ - return "FROM " + ", ".join( - ["[%s.%s]" % (dataset, table) for table in tables]) + if isinstance(tables, dict): + if tables.get('date_range', False): + try: + dataset_table = '.'.join([dataset, tables['table']]) + return "FROM (TABLE_DATE_RANGE([{}], TIMESTAMP('{}'),"\ + " TIMESTAMP('{}'))) ".format(dataset_table, + tables['from_date'], + tables['to_date']) + except KeyError as exp: + logger.warn( + 'Missing parameter %s in selecting sources' % (exp)) + + else: + return "FROM " + ", ".join( + ["[%s.%s]" % (dataset, table) for table in tables]) def _render_conditions(conditions): """Render the conditions part of a query. - Args: - conditions: a list of dictionary items to filter a table. - Each dict should be formatted as {'field': 'start_time', - 'value': {'value': 1, 'negate': False}, 'comparator': '>', - 'type': 'FLOAT'} which is represetned as - 'start_time > FLOAT('1')' in the query. + Parameters + ---------- + conditions : list + A list of dictionary items to filter a table. + + Returns + ------- + str + A string that represents the "where" part of a query - Returns: - a string that represents the where part of a query. + See Also + -------- + render_query : Further clarification of `conditions` formatting. """ if not conditions: @@ -162,7 +193,7 @@ def _render_conditions(conditions): comparators = condition.get('comparators') if None in (field, field_type, comparators) or not comparators: - logging.warn('Invalid condition passed in: %s' % condition) + logger.warn('Invalid condition passed in: %s' % condition) continue rendered_conditions.append( @@ -177,14 +208,18 @@ def _render_conditions(conditions): def _render_condition(field, field_type, comparators): """Render a single query condition. - Args: - field: the field the condition applies to. - field_type: the data type of the field. - comparator: the logic operator to use. - value_dicts: a list of value dicts of the form - {'value': 'foo', 'negate': False} - - Returns: + Parameters + ---------- + field : str + The field the condition applies to + field_type : str + The data type of the field. + comparators : array_like + An iterable of logic operators to use. + + Returns + ------- + str a condition string. """ @@ -200,11 +235,23 @@ def _render_condition(field, field_type, comparators): if condition == "IN": if isinstance(value, (list, tuple, set)): value = ', '.join( - [_render_condition_value(v, field_type) for v in value] + sorted([_render_condition_value(v, field_type) + for v in value]) ) else: value = _render_condition_value(value, field_type) value = "(" + value + ")" + elif condition == "IS NULL" or condition == "IS NOT NULL": + return field + " " + condition + elif condition == "BETWEEN": + if isinstance(value, (tuple, list, set)) and len(value) == 2: + value = ' AND '.join( + sorted([_render_condition_value(v, field_type) + for v in value]) + ) + elif isinstance(value, (tuple, list, set)) and len(value) != 2: + logger.warn('Invalid condition passed in: %s' % condition) + else: value = _render_condition_value(value, field_type) @@ -228,12 +275,17 @@ def _render_condition(field, field_type, comparators): def _render_condition_value(value, field_type): """Render a query condition value. - Args: - value: the value of the condition. - field_type: the data type of the field. - - Returns: - a value string. + Parameters + ---------- + value : Union[bool, int, float, str, datetime] + The value of the condition + field_type : str + The data type of the field + + Returns + ------- + str + A value string. """ # BigQuery cannot cast strings to booleans, convert to ints @@ -241,38 +293,107 @@ def _render_condition_value(value, field_type): value = 1 if value else 0 elif field_type in ("STRING", "INTEGER", "FLOAT"): value = "'%s'" % (value) + elif field_type in ("TIMESTAMP"): + value = "'%s'" % (str(value)) return "%s(%s)" % (field_type, value) -def _render_order(order): - """Render the order by part of a query. +def _render_groupings(fields): + """Render the group by part of a query. - Args: - order: a dictionary with two keys, field and direction. - Such that the dictionary should be formatted as - {'field':'TimeStamp, 'direction':'desc'}. + Parameters + ---------- + fields : list + A list of fields to group by. - Returns: - a string that represents the order by part of a query. + Returns + ------- + str + A string that represents the "group by" part of a query. """ - if not order or 'field' not in order or 'direction' not in order: - return '' + if not fields: + return "" - return "ORDER BY %s %s" % (order['field'], order['direction']) + return "GROUP BY " + ", ".join(fields) -def _render_groupings(fields): - """Render the group by part of a query. +def _render_having(having_conditions): + """Render the having part of a query. - Args: - fields: a list of fields to group by. + Parameters + ---------- + having_conditions : list + A ``list`` of ``dict``s to filter the rows - Returns: - a string that represents the group by part of a query. + Returns + ------- + str + A string that represents the "having" part of a query. + + See Also + -------- + render_query : Further clarification of `conditions` formatting. """ + if not having_conditions: + return "" - if not fields: + rendered_conditions = [] + + for condition in having_conditions: + field = condition.get('field') + field_type = condition.get('type') + comparators = condition.get('comparators') + + if None in (field, field_type, comparators) or not comparators: + logger.warn('Invalid condition passed in: %s' % condition) + continue + + rendered_conditions.append( + _render_condition(field, field_type, comparators)) + + if not rendered_conditions: return "" - return "GROUP BY " + ", ".join(fields) + return "HAVING %s" % (" AND ".join(rendered_conditions)) + + +def _render_order(order): + """Render the order by part of a query. + + Parameters + ---------- + order : dict + A dictionary with two keys, fields and direction. + Such that the dictionary should be formatted as + {'fields': ['TimeStamp'], 'direction':'desc'}. + + Returns + ------- + str + A string that represents the "order by" part of a query. + """ + + if not order or 'fields' not in order or 'direction' not in order: + return '' + + return "ORDER BY %s %s" % (", ".join(order['fields']), order['direction']) + + +def _render_limit(limit): + """Render the limit part of a query. + + Parameters + ---------- + limit : int, optional + Limit the amount of data needed to be returned. + + Returns + ------- + str + A string that represents the "limit" part of a query. + """ + if not limit: + return '' + + return "LIMIT %s" % limit diff --git a/bigquery/schema_builder.py b/bigquery/schema_builder.py index 3b063f2..65027b8 100644 --- a/bigquery/schema_builder.py +++ b/bigquery/schema_builder.py @@ -1,10 +1,12 @@ +from __future__ import absolute_import __author__ = 'Aneil Mallavarapu (http://github.com/aneilbaboo)' from datetime import datetime +import six import dateutil.parser -from errors import InvalidTypeException +from .errors import InvalidTypeException def default_timestamp_parser(s): @@ -21,16 +23,20 @@ def schema_from_record(record, timestamp_parser=default_timestamp_parser): """Generate a BigQuery schema given an example of a record that is to be inserted into BigQuery. - Args: - record: dict - timestamp_parser: unary function taking a string and return non-NIL if - string represents a date - - Returns: - schema: list + Parameters + ---------- + record : dict + Example of a record that is to be inserted into BigQuery + timestamp_parser : function, optional + Unary function taking a ``str`` and returning and ``bool`` that is + True if the string represents a date + + Returns + ------- + Schema: list """ return [describe_field(k, v, timestamp_parser=timestamp_parser) - for k, v in record.items()] + for k, v in list(record.items())] def describe_field(k, v, timestamp_parser=default_timestamp_parser): @@ -39,16 +45,25 @@ def describe_field(k, v, timestamp_parser=default_timestamp_parser): element describing that field. Raise errors if invalid value types are provided. - Args: - k: str/unicode, key representing the column - v: str/unicode/int/float/datetime/object - - Returns: - object describing the field - - Raises: - Exception: if invalid value types are provided. - + Parameters + ---------- + k : Union[str, unicode] + Key representing the column + v : Union[str, unicode, int, float, datetime, object] + Value mapped to by `k` + + Returns + ------- + object + Describing the field + + Raises + ------ + Exception + If invalid value types are provided. + + Examples + -------- >>> describe_field("username", "Bob") {"name": "username", "type": "string", "mode": "nullable"} >>> describe_field("users", [{"username": "Bob"}]) @@ -75,8 +90,8 @@ def bq_schema_field(name, bq_type, mode): field = bq_schema_field(k, bq_type, mode) if bq_type == "record": try: - field['fields'] = schema_from_record(v) - except InvalidTypeException, e: + field['fields'] = schema_from_record(v, timestamp_parser) + except InvalidTypeException as e: # recursively construct the key causing the error raise InvalidTypeException("%s.%s" % (k, e.key), e.value) @@ -88,9 +103,22 @@ def bigquery_type(o, timestamp_parser=default_timestamp_parser): one of str/unicode/int/float/datetime/record, where record is a dict containing value which have matching BigQuery types. - Returns: - str or None if no matching type could be found - + Parameters + ---------- + o : object + A Python object + time_stamp_parser : function, optional + Unary function taking a ``str`` and returning and ``bool`` that is + True if the string represents a date + + Returns + ------- + Union[str, None] + Name of the corresponding BigQuery type for `o`, or None if no type + could be found + + Examples + -------- >>> bigquery_type("abc") "string" >>> bigquery_type(123) @@ -98,9 +126,9 @@ def bigquery_type(o, timestamp_parser=default_timestamp_parser): """ t = type(o) - if t == int: + if t in six.integer_types: return "integer" - elif t == str or t == unicode: + elif (t == six.binary_type and six.PY2) or t == six.text_type: if timestamp_parser and timestamp_parser(o): return "timestamp" else: diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 0c65ea3..1f2d247 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -1,18 +1,17 @@ import unittest import mock -from nose.tools import raises - -from apiclient.errors import HttpError +import six from bigquery import client from bigquery.errors import ( JobInsertException, JobExecutingException, BigQueryTimeoutException ) +from googleapiclient.errors import HttpError +from nose.tools import raises class HttpResponse(object): - def __init__(self, status, reason='There was an error'): """ Args: @@ -23,7 +22,6 @@ def __init__(self, status, reason='There was an error'): class TestGetClient(unittest.TestCase): - def setUp(self): client._bq_client = None @@ -49,7 +47,8 @@ def test_initialize_readonly(self, mock_build, mock_return_cred): mock_cred = mock.Mock() mock_http = mock.Mock() - mock_cred.return_value.authorize.return_value = mock_http + mock_service_url = mock.Mock() + mock_cred.from_p12_keyfile_buffer.return_value.authorize.return_value = mock_http mock_bq = mock.Mock() mock_build.return_value = mock_bq key = 'key' @@ -58,14 +57,23 @@ def test_initialize_readonly(self, mock_build, mock_return_cred): mock_return_cred.return_value = mock_cred bq_client = client.get_client( - project_id, service_account=service_account, private_key=key, + project_id, service_url=mock_service_url, + service_account=service_account, private_key=key, readonly=True) mock_return_cred.assert_called_once_with() - mock_cred.assert_called_once_with(service_account, key, - scope=BIGQUERY_SCOPE_READ_ONLY) - mock_cred.authorize.assert_called_once() - mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http) + mock_cred.from_p12_keyfile_buffer.assert_called_once_with( + service_account, mock.ANY, + scopes=BIGQUERY_SCOPE_READ_ONLY) + self.assertTrue( + mock_cred.from_p12_keyfile_buffer.return_value.authorize.called) + mock_build.assert_called_once_with( + 'bigquery', + 'v2', + http=mock_http, + discoveryServiceUrl=mock_service_url, + cache_discovery=False + ) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) @@ -79,7 +87,8 @@ def test_initialize_read_write(self, mock_build, mock_return_cred): mock_cred = mock.Mock() mock_http = mock.Mock() - mock_cred.return_value.authorize.return_value = mock_http + mock_service_url = mock.Mock() + mock_cred.from_p12_keyfile_buffer.return_value.authorize.return_value = mock_http mock_bq = mock.Mock() mock_build.return_value = mock_bq key = 'key' @@ -88,22 +97,28 @@ def test_initialize_read_write(self, mock_build, mock_return_cred): mock_return_cred.return_value = mock_cred bq_client = client.get_client( - project_id, service_account=service_account, private_key=key, + project_id, service_url=mock_service_url, + service_account=service_account, private_key=key, readonly=False) mock_return_cred.assert_called_once_with() - mock_cred.assert_called_once_with(service_account, key, - scope=BIGQUERY_SCOPE) - mock_cred.authorize.assert_called_once() - mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http) + mock_cred.from_p12_keyfile_buffer.assert_called_once_with( + service_account, mock.ANY, scopes=BIGQUERY_SCOPE) + self.assertTrue( + mock_cred.from_p12_keyfile_buffer.return_value.authorize.called) + mock_build.assert_called_once_with( + 'bigquery', + 'v2', + http=mock_http, + discoveryServiceUrl=mock_service_url, + cache_discovery=False + ) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) @mock.patch('bigquery.client._credentials') @mock.patch('bigquery.client.build') - @mock.patch('__builtin__.open') - def test_initialize_key_file(self, mock_open, mock_build, - mock_return_cred): + def test_initialize_key_file(self, mock_build, mock_return_cred): """Ensure that a BigQueryClient is initialized and returned with read/write permissions using a private key file. """ @@ -111,29 +126,151 @@ def test_initialize_key_file(self, mock_open, mock_build, mock_cred = mock.Mock() mock_http = mock.Mock() - mock_cred.return_value.authorize.return_value = mock_http + mock_service_url = mock.Mock() + mock_cred.from_p12_keyfile.return_value.authorize.return_value = mock_http mock_bq = mock.Mock() mock_build.return_value = mock_bq key_file = 'key.pem' - key = 'key' - mock_open.return_value.__enter__.return_value.read.return_value = key service_account = 'account' project_id = 'project' mock_return_cred.return_value = mock_cred bq_client = client.get_client( - project_id, service_account=service_account, + project_id, service_url=mock_service_url, + service_account=service_account, private_key_file=key_file, readonly=False) - mock_open.assert_called_once_with(key_file, 'rb') mock_return_cred.assert_called_once_with() - mock_cred.assert_called_once_with(service_account, key, - scope=BIGQUERY_SCOPE) - mock_cred.authorize.assert_called_once() - mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http) + mock_cred.from_p12_keyfile.assert_called_once_with(service_account, + key_file, + scopes=BIGQUERY_SCOPE) + self.assertTrue( + mock_cred.from_p12_keyfile.return_value.authorize.called) + mock_build.assert_called_once_with( + 'bigquery', + 'v2', + http=mock_http, + discoveryServiceUrl=mock_service_url, + cache_discovery=False + ) + self.assertEquals(mock_bq, bq_client.bigquery) + self.assertEquals(project_id, bq_client.project_id) + + @mock.patch('bigquery.client._credentials') + @mock.patch('bigquery.client.build') + @mock.patch('__builtin__.open' if six.PY2 else 'builtins.open') + def test_initialize_json_key_file(self, mock_open, mock_build, mock_return_cred): + """Ensure that a BigQueryClient is initialized and returned with + read/write permissions using a JSON key file. + """ + from bigquery.client import BIGQUERY_SCOPE + import json + + mock_cred = mock.Mock() + mock_http = mock.Mock() + mock_service_url = mock.Mock() + mock_cred.from_json_keyfile_dict.return_value.authorize.return_value = mock_http + mock_bq = mock.Mock() + mock_build.return_value = mock_bq + json_key_file = 'key.json' + json_key = {'client_email': 'mail', 'private_key': 'pkey'} + mock_open.return_value.__enter__.return_value.read.return_value = json.dumps(json_key) + project_id = 'project' + mock_return_cred.return_value = mock_cred + + bq_client = client.get_client( + project_id, service_url=mock_service_url, + json_key_file=json_key_file, readonly=False) + + mock_return_cred.assert_called_once_with() + mock_cred.from_json_keyfile_dict.assert_called_once_with(json_key, + scopes=BIGQUERY_SCOPE) + self.assertTrue( + mock_cred.from_json_keyfile_dict.return_value.authorize.called) + mock_build.assert_called_once_with( + 'bigquery', + 'v2', + http=mock_http, + discoveryServiceUrl=mock_service_url, + cache_discovery=False + ) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) + @mock.patch('bigquery.client._credentials') + @mock.patch('bigquery.client.build') + @mock.patch('__builtin__.open' if six.PY2 else 'builtins.open') + def test_initialize_json_key_file_without_project_id(self, mock_open, mock_build, + mock_return_cred): + """Ensure that a BigQueryClient is initialized and returned with + read/write permissions using a JSON key file without project_id. + """ + from bigquery.client import BIGQUERY_SCOPE + import json + + mock_cred = mock.Mock() + mock_http = mock.Mock() + mock_service_url = mock.Mock() + mock_cred.from_json_keyfile_dict.return_value.authorize.return_value = mock_http + mock_bq = mock.Mock() + mock_build.return_value = mock_bq + json_key_file = 'key.json' + json_key = {'client_email': 'mail', 'private_key': 'pkey', 'project_id': 'project'} + mock_open.return_value.__enter__.return_value.read.return_value = json.dumps(json_key) + mock_return_cred.return_value = mock_cred + + bq_client = client.get_client( + service_url=mock_service_url, json_key_file=json_key_file, readonly=False) + + mock_open.assert_called_once_with(json_key_file, 'r') + mock_return_cred.assert_called_once_with() + mock_cred.from_json_keyfile_dict.assert_called_once_with(json_key, + scopes=BIGQUERY_SCOPE) + self.assertTrue( + mock_cred.from_json_keyfile_dict.return_value.authorize.called) + mock_build.assert_called_once_with( + 'bigquery', + 'v2', + http=mock_http, + discoveryServiceUrl=mock_service_url, + cache_discovery=False + ) + self.assertEquals(mock_bq, bq_client.bigquery) + self.assertEquals(json_key['project_id'], bq_client.project_id) + + +class TestGetProjectIds(unittest.TestCase): + + def test_get_project_ids(self): + mock_bq_service = mock.Mock() + mock_bq_service.projects().list().execute.return_value = { + 'kind': 'bigquery#projectList', + 'projects': [ + { + 'friendlyName': 'Big Query Test', + 'id': 'big-query-test', + 'kind': 'bigquery#project', + 'numericId': '1435372465', + 'projectReference': {'projectId': 'big-query-test'} + }, + { + 'friendlyName': 'BQ Company project', + 'id': 'bq-project', + 'kind': 'bigquery#project', + 'numericId': '4263574685796', + 'projectReference': {'projectId': 'bq-project'} + } + ], + 'totalItems': 2 + } + + projects = client.get_projects(mock_bq_service) + expected_projects_data = [ + {'id': 'big-query-test', 'name': 'Big Query Test'}, + {'id': 'bq-project', 'name': 'BQ Company project'} + ] + self.assertEqual(projects, expected_projects_data) + class TestQuery(unittest.TestCase): @@ -147,6 +284,7 @@ def setUp(self): self.query = 'foo' self.project_id = 'project' + self.external_udf_uris = ['gs://bucket/external_udf.js'] self.client = client.BigQueryClient(self.mock_bq_service, self.project_id) @@ -159,17 +297,24 @@ def test_query(self): mock_query_job.execute.return_value = { 'jobReference': expected_job_ref, - 'jobComplete': True + 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job - job_id, results = self.client.query(self.query) + job_id, results = self.client.query(self.query, external_udf_uris=self.external_udf_uris) self.mock_job_collection.query.assert_called_once_with( projectId=self.project_id, - body={'query': self.query, 'timeoutMs': 0, 'dryRun': False, - 'maxResults': None} + body={ + 'query': self.query, + 'userDefinedFunctionResources': [ {'resourceUri': u} for u in self.external_udf_uris ], + 'timeoutMs': 0, + 'dryRun': False, + 'maxResults': None + } ) self.assertEquals(job_id, 'spiderman') self.assertEquals(results, []) @@ -186,6 +331,8 @@ def test_query_max_results_set(self): mock_query_job.execute.return_value = { 'jobReference': expected_job_ref, 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -214,6 +361,8 @@ def test_query_timeout_set(self): mock_query_job.execute.return_value = { 'jobReference': expected_job_ref, 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -239,6 +388,8 @@ def test_sync_query_timeout(self): mock_query_job.execute.return_value = { 'jobReference': expected_job_ref, 'jobComplete': False, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -257,6 +408,8 @@ def test_async_query_timeout(self): mock_query_job.execute.return_value = { 'jobReference': expected_job_ref, 'jobComplete': False, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -266,14 +419,18 @@ def test_async_query_timeout(self): self.assertEquals(results, []) def test_query_dry_run_valid(self): - """Ensure that None and an empty list is returned from the query when + """Ensure that None and [cacheHit, totalBytesProcessed] is returned from the query when dry_run is True and the query is valid. """ mock_query_job = mock.Mock() - mock_query_job.execute.return_value = {'jobReference': {}, - 'jobComplete': True} + mock_query_job.execute.return_value = { + 'jobReference': {}, + 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 + } self.mock_job_collection.query.return_value = mock_query_job @@ -285,7 +442,7 @@ def test_query_dry_run_valid(self): 'dryRun': True} ) self.assertIsNone(job_id) - self.assertEqual([], results) + self.assertEqual([False, 0], results) def test_query_dry_run_invalid(self): """Ensure that None and a dict is returned from the query when dry_run @@ -295,7 +452,7 @@ def test_query_dry_run_invalid(self): mock_query_job = mock.Mock() mock_query_job.execute.side_effect = HttpError( - 'crap', '{"message": "Bad query"}') + 'crap', '{"message": "Bad query"}'.encode('utf8')) self.mock_job_collection.query.return_value = mock_query_job @@ -325,6 +482,8 @@ def test_query_with_results(self): 'schema': {'fields': [{'name': 'foo', 'type': 'INTEGER'}]}, 'rows': [{'f': [{'v': 10}]}], 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -339,6 +498,32 @@ def test_query_with_results(self): self.assertEquals(job_id, 'spiderman') self.assertEquals(results, [{'foo': 10}]) + def test_query_with_using_legacy_sql(self): + """Ensure that use_legacy_sql bool gets used""" + + mock_query_job = mock.Mock() + expected_job_id = 'spiderman' + expected_job_ref = {'jobId': expected_job_id} + + mock_query_job.execute.return_value = { + 'jobReference': expected_job_ref, + 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 + } + + self.mock_job_collection.query.return_value = mock_query_job + + job_id, results = self.client.query(self.query, use_legacy_sql=False) + + self.mock_job_collection.query.assert_called_once_with( + projectId=self.project_id, + body={'query': self.query, 'timeoutMs': 0, 'dryRun': False, + 'maxResults': None, 'useLegacySql': False} + ) + self.assertEquals(job_id, 'spiderman') + self.assertEquals(results, []) + class TestGetQueryResults(unittest.TestCase): @@ -358,7 +543,6 @@ def test_get_response(self): """Ensure that the query is executed and the query reply is returned. """ - project_id = 'foo' job_id = 'bar' mock_query_job = mock.Mock() @@ -368,15 +552,17 @@ def test_get_response(self): offset = 5 limit = 10 + page_token = "token" + timeout = 1 - actual = self.client._get_query_results(self.mock_job_collection, - project_id, job_id, - offset, limit) + actual = self.client.get_query_results(job_id, offset, limit, + page_token, timeout) self.mock_job_collection.getQueryResults.assert_called_once_with( - timeoutMs=0, projectId=project_id, jobId=job_id, - startIndex=offset, maxResults=limit) - mock_query_job.execute.assert_called_once() + projectId=self.project_id, jobId=job_id, startIndex=offset, + maxResults=limit, pageToken=page_token, timeoutMs=1000) + + mock_query_job.execute.assert_called_once_with(num_retries=0) self.assertEquals(actual, mock_query_reply) @@ -401,7 +587,7 @@ def test_transform_row(self): {'name': 'bar', 'type': 'FLOAT'}, {'name': 'baz', 'type': 'STRING'}, {'name': 'qux', 'type': 'BOOLEAN'}, - {'name': 'timestamp', 'type': 'FLOAT'}] + {'name': 'timestamp', 'type': 'TIMESTAMP'}] row = {'f': [{'v': '42'}, {'v': None}, {'v': 'batman'}, {'v': 'True'}, {'v': '1.371145650319132E9'}]} @@ -458,7 +644,7 @@ def test_transform_row_with_nested_repeated(self): self.assertEquals(actual, expected) -@mock.patch('bigquery.client.BigQueryClient._get_query_results') +@mock.patch('bigquery.client.BigQueryClient.get_query_results') class TestCheckJob(unittest.TestCase): def setUp(self): @@ -586,6 +772,44 @@ def test_wait_job_error_result(self): interval=.01, timeout=.01) + def test_accepts_job_id(self): + """Ensure it accepts a job Id rather than a full job resource""" + + return_values = [{'status': {'state': u'RUNNING'}, + 'jobReference': {'jobId': "testJob"}}, + {'status': {'state': u'DONE'}, + 'jobReference': {'jobId': "testJob"}}] + + def side_effect(*args, **kwargs): + return return_values.pop(0) + + self.api_mock.jobs().get().execute.side_effect = side_effect + + job_resource = self.client.wait_for_job("testJob", + interval=.01, + timeout=5) + + self.assertEqual(self.api_mock.jobs().get().execute.call_count, 2) + self.assertIsInstance(job_resource, dict) + + def test_accepts_integer_job_id(self): + return_values = [{'status': {'state': u'RUNNING'}, + 'jobReference': {'jobId': "testJob"}}, + {'status': {'state': u'DONE'}, + 'jobReference': {'jobId': "testJob"}}] + + def side_effect(*args, **kwargs): + return return_values.pop(0) + + self.api_mock.jobs().get().execute.side_effect = side_effect + + job_resource = self.client.wait_for_job(1234567, + interval=.01, + timeout=600) + + self.assertEqual(self.api_mock.jobs().get().execute.call_count, 2) + self.assertIsInstance(job_resource, dict) + class TestImportDataFromURIs(unittest.TestCase): @@ -667,7 +891,7 @@ def test_json_job_body_constructed_correctly(self): body = { "jobReference": { "projectId": self.project_id, - "jobId": "job" + "jobId": "job", }, "configuration": { "load": { @@ -857,8 +1081,8 @@ def test_export(self, mock_generate_hex): body = { "jobReference": { "projectId": self.project_id, - "jobId": "%s-%s-destinationuri" % - (self.dataset_id, self.table_id) + "jobId": "%s-%s-destinationuri" % (self.dataset_id, + self.table_id) }, "configuration": { "extract": { @@ -941,8 +1165,11 @@ def setUp(self): self.project_id = 'project' self.dataset_id = 'dataset' self.table_id = 'table' + self.maximum_billing_tier = 1000 + self.external_udf_uris = ['gs://bucket/external_udf.js'] self.use_query_cache = False self.priority = "INTERACTIVE" + self.flatten_results = False self.client = client.BigQueryClient(self.mock_api, self.project_id) @@ -961,8 +1188,12 @@ def test_write(self): "tableId": self.table_id }, "query": self.query, + "userDefinedFunctionResources": [{ + "resourceUri": self.external_udf_uris[0] + }], "useQueryCache": self.use_query_cache, "priority": self.priority, + "flattenResults": self.flatten_results, } } } @@ -971,7 +1202,9 @@ def test_write(self): result = self.client.write_to_table(self.query, self.dataset_id, self.table_id, + external_udf_uris=self.external_udf_uris, use_query_cache=False, + flatten=False, priority=self.priority) self.mock_api.jobs().insert.assert_called_with( @@ -981,6 +1214,44 @@ def test_write(self): self.assertEqual(result, expected_result) + def test_write_maxbilltier(self): + """ Ensure that write is working when maximumBillingTier is set""" + expected_result = { + 'status': {'state': u'RUNNING'}, + } + + body = { + "configuration": { + "query": { + "destinationTable": { + "projectId": self.project_id, + "datasetId": self.dataset_id, + "tableId": self.table_id + }, + "query": self.query, + "userDefinedFunctionResources": [{ + "resourceUri": self.external_udf_uris[0] + }], + "useQueryCache": self.use_query_cache, + "priority": self.priority, + "maximumBillingTier": self.maximum_billing_tier + } + } + } + + self.mock_api.jobs().insert().execute.return_value = expected_result + result = self.client.write_to_table( + self.query, self.dataset_id, self.table_id, priority=self.priority, + external_udf_uris=self.external_udf_uris, use_query_cache=False, + maximum_billing_tier=self.maximum_billing_tier) + + self.mock_api.jobs().insert.assert_called_with( + projectId=self.project_id, + body=body + ) + + self.assertEqual(result, expected_result) + def test_write_http_error(self): """ Test write with http error""" expected_result = { @@ -1042,8 +1313,9 @@ def test_multi_inside_range(self): }, 1370002000, 1370000000) self.assertEqual( - ['Daenerys Targaryen', 'William Shatner', 'Gordon Freeman'], - tables + sorted( + ['Daenerys Targaryen', 'William Shatner', 'Gordon Freeman']), + sorted(tables) ) def test_not_inside_range(self): @@ -1103,6 +1375,15 @@ def test_not_inside_range(self): "kind": "bigquery#tableList", "etag": "\"GSclnjk0zID1ucM3F-xYinOm1oE/cn58Rpu8v8pB4eoJQaiTe11lPQc\"", "tables": [ + { + "kind": "bigquery#table", + "id": "project:dataset.notanappspottable_20130515_0261", + "tableReference": { + "projectId": "project", + "datasetId": "dataset", + "tableId": "notanappspottable_20130515_0261" + } + }, { "kind": "bigquery#table", "id": "project:dataset.2013_05_appspot_1", @@ -1166,16 +1447,25 @@ def test_not_inside_range(self): "tableId": "appspot_6_2013_06" } }, + { + "kind": "bigquery#table", + "id": "project:dataset.table_not_matching_naming", + "tableReference": { + "projectId": "project", + "datasetId": "dataset", + "tableId": "table_not_matching_naming" + } + }, { "kind": "bigquery#table", "id": "bad table data" - } + }, ], - "totalItems": 8 + "totalItems": 9 } -@mock.patch('bigquery.client.BigQueryClient._get_query_results') +@mock.patch('bigquery.client.BigQueryClient.get_query_results') class TestGetQuerySchema(unittest.TestCase): def test_query_complete(self, get_query_mock): @@ -1237,21 +1527,23 @@ def test_table_exists(self): expected, self.client.get_table_schema(self.dataset, self.table)) self.mock_tables.get.assert_called_once_with( projectId=self.project, tableId=self.table, datasetId=self.dataset) - self.mock_tables.get.return_value.execute.assert_called_once_with() + self.mock_tables.get.return_value.execute. \ + assert_called_once_with(num_retries=0) def test_table_does_not_exist(self): """Ensure that None is returned if the table doesn't exist.""" self.mock_tables.get.return_value.execute.side_effect = \ - HttpError({'status': "404"}, '{}') + HttpError({'status': "404"}, '{}'.encode('utf8')) self.assertIsNone( self.client.get_table_schema(self.dataset, self.table)) self.mock_tables.get.assert_called_once_with( projectId=self.project, tableId=self.table, datasetId=self.dataset) - self.mock_tables.get.return_value.execute.assert_called_once_with() + self.mock_tables.get.return_value.execute. \ + assert_called_once_with(num_retries=0) -@mock.patch('bigquery.client.BigQueryClient._get_query_results') +@mock.patch('bigquery.client.BigQueryClient.get_query_results') class TestGetQueryRows(unittest.TestCase): def test_query_complete(self, get_query_mock): @@ -1281,6 +1573,77 @@ def test_query_complete(self, get_query_mock): {'foo': 'abc', 'spider': 'xyz'}] self.assertEquals(result_rows, expected_rows) + def test_query_complete_with_page_token(self, get_query_mock): + """Ensure that get_query_rows works with page token.""" + from bigquery.client import BigQueryClient + + page_one_resp = { + "jobComplete": True, + "kind": "bigquery#getQueryResultsResponse", + "pageToken": "TOKEN_TO_PAGE_2", + "schema": { + "fields": [{ + "name": "first_name", + "type": "STRING", + }, { + "name": "last_name", + "type": "STRING", + }] + }, + "rows": [{ + "f": [{ + "v": "foo", + }, { + "v": "bar" + }] + }, { + "f": [{ + "v": "abc", + }, { + "v": "xyz" + }] + }], + "totalRows": "4" + } + + page_two_resp = { + "jobComplete": True, + "kind": "bigquery#getQueryResultsResponse", + "schema": { + "fields": [{ + "name": "first_name", + "type": "STRING", + }, { + "name": "last_name", + "type": "STRING", + }] + }, + "rows": [{ + "f": [{ + "v": "the", + }, { + "v": "beatles" + }] + }, { + "f": [{ + "v": "monty", + }, { + "v": "python" + }] + }], + "totalRows": "4" + } + + bq = BigQueryClient(mock.Mock(), 'project') + get_query_mock.side_effect = [page_one_resp, page_two_resp] + result_rows = bq.get_query_rows(job_id=123, offset=0, limit=0) + + expected_rows = [{'first_name': 'foo', 'last_name': 'bar'}, + {'first_name': 'abc', 'last_name': 'xyz'}, + {'first_name': 'the', 'last_name': 'beatles'}, + {'first_name': 'monty', 'last_name': 'python'}] + self.assertEquals(result_rows, expected_rows) + def test_query_incomplete(self, get_query_mock): """Ensure that get_query_rows handles scenarios where the query is not finished. @@ -1323,7 +1686,7 @@ def test_table_does_not_exist(self): """Ensure that if the table does not exist, False is returned.""" self.mock_tables.get.return_value.execute.side_effect = ( - HttpError(HttpResponse(404), 'There was an error')) + HttpError(HttpResponse(404), 'There was an error'.encode('utf8'))) actual = self.client.check_table(self.dataset, self.table) @@ -1332,7 +1695,8 @@ def test_table_does_not_exist(self): self.mock_tables.get.assert_called_once_with( projectId=self.project, datasetId=self.dataset, tableId=self.table) - self.mock_tables.get.return_value.execute.assert_called_once_with() + self.mock_tables.get.return_value.execute. \ + assert_called_once_with(num_retries=0) def test_table_does_exist(self): """Ensure that if the table does exist, True is returned.""" @@ -1347,7 +1711,8 @@ def test_table_does_exist(self): self.mock_tables.get.assert_called_once_with( projectId=self.project, datasetId=self.dataset, tableId=self.table) - self.mock_tables.get.return_value.execute.assert_called_once_with() + self.mock_tables.get.return_value.execute. \ + assert_called_once_with(num_retries=0) class TestCreateTable(unittest.TestCase): @@ -1370,13 +1735,15 @@ def setUp(self): 'tableId': self.table, 'projectId': self.project, 'datasetId': self.dataset} } + self.expiration_time = 1437513693000 + self.time_partitioning = True def test_table_create_failed(self): """Ensure that if creating the table fails, False is returned, or if swallow_results is False an empty dict is returned.""" self.mock_tables.insert.return_value.execute.side_effect = ( - HttpError(HttpResponse(404), 'There was an error')) + HttpError(HttpResponse(404), 'There was an error'.encode('utf8'))) actual = self.client.create_table(self.dataset, self.table, self.schema) @@ -1395,7 +1762,8 @@ def test_table_create_failed(self): self.mock_tables.insert.assert_called_with( projectId=self.project, datasetId=self.dataset, body=self.body) - self.mock_tables.insert.return_value.execute.assert_called_with() + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=0) def test_table_create_success(self): """Ensure that if creating the table succeeds, True is returned, @@ -1421,81 +1789,355 @@ def test_table_create_success(self): self.mock_tables.insert.assert_called_with( projectId=self.project, datasetId=self.dataset, body=self.body) - self.mock_tables.insert.return_value.execute.assert_called_with() + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=0) + def test_table_create_body_with_expiration_time(self): + """Ensure that if expiration_time has specified, + it passed to the body.""" -class TestDeleteTable(unittest.TestCase): + self.mock_tables.insert.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + self.client.create_table(self.dataset, self.table, + self.schema, self.expiration_time) + + body = self.body.copy() + body.update({ + 'expirationTime': self.expiration_time + }) + + self.mock_tables.insert.assert_called_with( + projectId=self.project, datasetId=self.dataset, body=body) + + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=0) + + def test_table_create_body_with_time_partitioning(self): + """Ensure that if time_partitioning has specified, + it passed to the body.""" + + self.mock_tables.insert.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + self.client.create_table(self.dataset, self.table, + self.schema, + time_partitioning=self.time_partitioning) + + body = self.body.copy() + body.update({ + 'timePartitioning': {'type': 'DAY'} + }) + + self.mock_tables.insert.assert_called_with( + projectId=self.project, datasetId=self.dataset, body=body) + + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=0) + + +class TestUpdateTable(unittest.TestCase): def setUp(self): self.mock_bq_service = mock.Mock() self.mock_tables = mock.Mock() self.mock_bq_service.tables.return_value = self.mock_tables self.table = 'table' + self.schema = [ + {'name': 'foo', 'type': 'STRING', 'mode': 'nullable'}, + {'name': 'bar', 'type': 'FLOAT', 'mode': 'nullable'} + ] self.project = 'project' self.dataset = 'dataset' self.client = client.BigQueryClient(self.mock_bq_service, self.project) + self.body = { + 'schema': {'fields': self.schema}, + 'tableReference': { + 'tableId': self.table, 'projectId': self.project, + 'datasetId': self.dataset} + } + self.expiration_time = 1437513693000 - def test_delete_table_fail(self): - """Ensure that if deleting table fails, False is returned, - or the actual response is swallow_results is False.""" + def test_table_update_failed(self): + """Ensure that if updating the table fails, False is returned, + or if swallow_results is False an empty dict is returned.""" - self.mock_tables.delete.return_value.execute.side_effect = ( - HttpError(HttpResponse(404), 'There was an error')) + self.mock_tables.update.return_value.execute.side_effect = ( + HttpError(HttpResponse(404), 'There was an error'.encode('utf8'))) - actual = self.client.delete_table(self.dataset, self.table) + actual = self.client.update_table(self.dataset, self.table, + self.schema) self.assertFalse(actual) self.client.swallow_results = False - actual = self.client.delete_table(self.dataset, self.table) + actual = self.client.update_table(self.dataset, self.table, + self.schema) self.assertEqual(actual, {}) self.client.swallow_results = True - self.mock_tables.delete.assert_called_with( - projectId=self.project, datasetId=self.dataset, tableId=self.table) + self.mock_tables.update.assert_called_with( + projectId=self.project, tableId=self.table, datasetId=self.dataset, + body=self.body) - self.mock_tables.delete.return_value.execute.assert_called_with() + self.mock_tables.update.return_value.execute. \ + assert_called_with(num_retries=0) - def test_delete_table_success(self): - """Ensure that if deleting table succeeds, True is returned, - or the actual response if swallow_results is False.""" + def test_table_update_success(self): + """Ensure that if updating the table succeeds, True is returned, + or if swallow_results is False the actual response is returned.""" - self.mock_tables.delete.return_value.execute.side_effect = [{ + self.mock_tables.update.return_value.execute.side_effect = [{ 'status': 'foo'}, {'status': 'bar'}] - actual = self.client.delete_table(self.dataset, self.table) + actual = self.client.update_table(self.dataset, self.table, + self.schema) self.assertTrue(actual) self.client.swallow_results = False - actual = self.client.delete_table(self.dataset, self.table) + actual = self.client.update_table(self.dataset, self.table, + self.schema) self.assertEqual(actual, {'status': 'bar'}) self.client.swallow_results = True - self.mock_tables.delete.assert_called_with( - projectId=self.project, datasetId=self.dataset, tableId=self.table) + self.mock_tables.update.assert_called_with( + projectId=self.project, tableId=self.table, datasetId=self.dataset, + body=self.body) - self.mock_tables.delete.return_value.execute.assert_called_with() + self.mock_tables.update.return_value.execute. \ + assert_called_with(num_retries=0) -class TestParseTableListReponse(unittest.TestCase): +class TestPatchTable(unittest.TestCase): - def test_full_parse(self): - """Ensures we can parse a full list response.""" + def setUp(self): + self.mock_bq_service = mock.Mock() + self.mock_tables = mock.Mock() + self.mock_bq_service.tables.return_value = self.mock_tables + self.table = 'table' + self.schema = [ + {'name': 'foo', 'type': 'STRING', 'mode': 'nullable'}, + {'name': 'bar', 'type': 'FLOAT', 'mode': 'nullable'} + ] + self.project = 'project' + self.dataset = 'dataset' + self.client = client.BigQueryClient(self.mock_bq_service, self.project) + self.body = { + 'schema': {'fields': self.schema}, + } + self.expiration_time = 1437513693000 - bq = client.BigQueryClient(None, 'project') + def test_table_patch_failed(self): + """Ensure that if patching the table fails, False is returned, + or if swallow_results is False an empty dict is returned.""" - tables = bq._parse_table_list_response(FULL_TABLE_LIST_RESPONSE) + self.mock_tables.patch.return_value.execute.side_effect = ( + HttpError(HttpResponse(404), 'There was an error'.encode('utf8'))) - expected_result = { - 'appspot-3': {'2013_06_appspot_3': 1370044800}, - 'appspot-2': {'2013_06_appspot_2': 1370044800}, + actual = self.client.patch_table(self.dataset, self.table, + self.schema) + + self.assertFalse(actual) + + self.client.swallow_results = False + + actual = self.client.patch_table(self.dataset, self.table, + self.schema) + + self.assertEqual(actual, {}) + + self.client.swallow_results = True + + self.mock_tables.patch.assert_called_with( + projectId=self.project, datasetId=self.dataset, + tableId=self.table, body=self.body) + + self.mock_tables.patch.return_value.execute. \ + assert_called_with(num_retries=0) + + def test_table_patch_success(self): + """Ensure that if patching the table succeeds, True is returned, + or if swallow_results is False the actual response is returned.""" + + self.mock_tables.patch.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + actual = self.client.patch_table(self.dataset, self.table, + self.schema) + + self.assertTrue(actual) + + self.client.swallow_results = False + + actual = self.client.patch_table(self.dataset, self.table, + self.schema) + + self.assertEqual(actual, {'status': 'bar'}) + + self.client.swallow_results = True + + self.mock_tables.patch.assert_called_with( + projectId=self.project, datasetId=self.dataset, + tableId=self.table, body=self.body) + + self.mock_tables.patch.return_value.execute. \ + assert_called_with(num_retries=0) + + +class TestCreateView(unittest.TestCase): + + def setUp(self): + self.mock_bq_service = mock.Mock() + self.mock_tables = mock.Mock() + self.mock_bq_service.tables.return_value = self.mock_tables + self.table = 'table' + self.project = 'project' + self.dataset = 'dataset' + self.query = 'SELECT "bar" foo, "foo" bar' + self.client = client.BigQueryClient(self.mock_bq_service, self.project) + self.body = { + 'view': {'query': self.query}, + 'tableReference': { + 'tableId': self.table, 'projectId': self.project, + 'datasetId': self.dataset} + } + + def test_view_create_failed(self): + """Ensure that if creating the table fails, False is returned, + or if swallow_results is False an empty dict is returned.""" + + self.mock_tables.insert.return_value.execute.side_effect = ( + HttpError(HttpResponse(404), 'There was an error'.encode('utf8'))) + + actual = self.client.create_view(self.dataset, self.table, + self.query) + + self.assertFalse(actual) + + self.client.swallow_results = False + + actual = self.client.create_view(self.dataset, self.table, + self.query) + + self.assertEqual(actual, {}) + + self.client.swallow_results = True + + self.mock_tables.insert.assert_called_with( + projectId=self.project, datasetId=self.dataset, body=self.body) + + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=0) + + def test_view_create_success(self): + """Ensure that if creating the table succeeds, True is returned, + or if swallow_results is False the actual response is returned.""" + + self.mock_tables.insert.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + actual = self.client.create_view(self.dataset, self.table, + self.query) + + self.assertTrue(actual) + + self.client.swallow_results = False + + actual = self.client.create_view(self.dataset, self.table, + self.query) + + self.assertEqual(actual, {'status': 'bar'}) + + self.client.swallow_results = True + + self.mock_tables.insert.assert_called_with( + projectId=self.project, datasetId=self.dataset, body=self.body) + + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=0) + + +class TestDeleteTable(unittest.TestCase): + + def setUp(self): + self.mock_bq_service = mock.Mock() + self.mock_tables = mock.Mock() + self.mock_bq_service.tables.return_value = self.mock_tables + self.table = 'table' + self.project = 'project' + self.dataset = 'dataset' + self.client = client.BigQueryClient(self.mock_bq_service, self.project) + + def test_delete_table_fail(self): + """Ensure that if deleting table fails, False is returned, + or the actual response is swallow_results is False.""" + + self.mock_tables.delete.return_value.execute.side_effect = ( + HttpError(HttpResponse(404), 'There was an error'.encode('utf8'))) + + actual = self.client.delete_table(self.dataset, self.table) + + self.assertFalse(actual) + + self.client.swallow_results = False + + actual = self.client.delete_table(self.dataset, self.table) + + self.assertEqual(actual, {}) + + self.client.swallow_results = True + + self.mock_tables.delete.assert_called_with( + projectId=self.project, datasetId=self.dataset, tableId=self.table) + + self.mock_tables.delete.return_value.execute. \ + assert_called_with(num_retries=0) + + def test_delete_table_success(self): + """Ensure that if deleting table succeeds, True is returned, + or the actual response if swallow_results is False.""" + + self.mock_tables.delete.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + actual = self.client.delete_table(self.dataset, self.table) + + self.assertTrue(actual) + + self.client.swallow_results = False + + actual = self.client.delete_table(self.dataset, self.table) + + self.assertEqual(actual, {'status': 'bar'}) + + self.client.swallow_results = True + + self.mock_tables.delete.assert_called_with( + projectId=self.project, datasetId=self.dataset, tableId=self.table) + + self.mock_tables.delete.return_value.execute. \ + assert_called_with(num_retries=0) + + +class TestParseTableListReponse(unittest.TestCase): + + def test_full_parse(self): + """Ensures we can parse a full list response.""" + + bq = client.BigQueryClient(None, 'project') + + tables = bq._parse_table_list_response(FULL_TABLE_LIST_RESPONSE) + + expected_result = { + 'appspot-3': {'2013_06_appspot_3': 1370044800}, + 'appspot-2': {'2013_06_appspot_2': 1370044800}, 'appspot-1': {'2013_06_appspot_1': 1370044800}, 'appspot-6': {'appspot_6_2013_06': 1370044800}, 'appspot-5': {'2013_06_appspot_5': 1370044800}, @@ -1617,7 +2259,7 @@ def test_push_failed(self): projectId=self.project, datasetId=self.dataset, tableId=self.table, body=self.data) - execute_calls = [mock.call()] + execute_calls = [mock.call(num_retries=0)] self.mock_table_data.insertAll.return_value.execute.assert_has_calls( execute_calls) @@ -1642,7 +2284,7 @@ def test_push_failed_swallow_results_false(self): def test_push_exception(self): """Ensure that if insertAll raises an exception, False is returned.""" - e = HttpError(HttpResponse(404), 'There was an error') + e = HttpError(HttpResponse(404), 'There was an error'.encode('utf8')) self.mock_table_data.insertAll.return_value.execute.side_effect = e actual = self.client.push_rows(self.dataset, self.table, self.rows, @@ -1671,7 +2313,7 @@ def test_push_exception(self): projectId=self.project, datasetId=self.dataset, tableId=self.table, body=self.data) - execute_calls = [mock.call()] + execute_calls = [mock.call(num_retries=0)] self.mock_table_data.insertAll.return_value.execute.assert_has_calls( execute_calls) @@ -1703,14 +2345,104 @@ def test_push_success(self): projectId=self.project, datasetId=self.dataset, tableId=self.table, body=self.data) - execute_calls = [mock.call()] + execute_calls = [mock.call(num_retries=0)] self.mock_table_data.insertAll.return_value.execute.assert_has_calls( execute_calls) + def test_request_data_with_options(self): + """Ensure that insertAll body has optional property only when + the optional parameter of push_rows passed. + """ + expected_body = self.data.copy() + + self.client.push_rows( + self.dataset, self.table, self.rows, + insert_id_key='one') + self.mock_table_data.insertAll.assert_called_with( + projectId=self.project, + datasetId=self.dataset, + tableId=self.table, + body=expected_body) + + self.client.push_rows( + self.dataset, self.table, self.rows, + insert_id_key='one', + ignore_unknown_values=False, + skip_invalid_rows=False) + expected_body['ignoreUnknownValues'] = False + expected_body['skipInvalidRows'] = False + self.mock_table_data.insertAll.assert_called_with( + projectId=self.project, + datasetId=self.dataset, + tableId=self.table, + body=expected_body) + + self.client.push_rows( + self.dataset, self.table, self.rows, + insert_id_key='one', + ignore_unknown_values=True, + skip_invalid_rows=True, + template_suffix='20160428' + ) + expected_body['ignoreUnknownValues'] = True + expected_body['skipInvalidRows'] = True + expected_body['templateSuffix'] = '20160428' + self.mock_table_data.insertAll.assert_called_with( + projectId=self.project, + datasetId=self.dataset, + tableId=self.table, + body=expected_body) + + def test_insert_id_key_with_nested_column(self): + """Ensure that dot separated insert_id_key properly extracted with nested column value.""" + rows = [ + {'nested': {'col': 'nested_col1'}, 'val': 1}, + {'nested': {'col': 'nested_col2'}, 'val': 2}, + ] + expected_body = self.data.copy() + expected_body['rows'] = [ + {'insertId': 'nested_col1', 'json': {'nested': {'col': 'nested_col1'}, 'val': 1}}, + {'insertId': 'nested_col2', 'json': {'nested': {'col': 'nested_col2'}, 'val': 2}}, + ] + + self.client.push_rows(self.dataset, self.table, rows, + insert_id_key='nested.col') + self.mock_table_data.insertAll.assert_called_with( + projectId=self.project, + datasetId=self.dataset, + tableId=self.table, + body=expected_body) + + expected_body = self.data.copy() + expected_body['rows'] = [ + {'insertId': 1, 'json': {'nested': {'col': 'nested_col1'}, 'val': 1}}, + {'insertId': 2, 'json': {'nested': {'col': 'nested_col2'}, 'val': 2}}, + ] + self.client.push_rows(self.dataset, self.table, rows, + insert_id_key='val') + self.mock_table_data.insertAll.assert_called_with( + projectId=self.project, + datasetId=self.dataset, + tableId=self.table, + body=expected_body) + + expected_body = self.data.copy() + expected_body['rows'] = [ + {'json': {'nested': {'col': 'nested_col1'}, 'val': 1}}, + {'json': {'nested': {'col': 'nested_col2'}, 'val': 2}}, + ] + self.client.push_rows(self.dataset, self.table, rows, + insert_id_key='no_such.column') + self.mock_table_data.insertAll.assert_called_with( + projectId=self.project, + datasetId=self.dataset, + tableId=self.table, + body=expected_body) + class TestGetAllTables(unittest.TestCase): - def test_get_tables(self): + def test_get_all_tables(self): """Ensure get_all_tables fetches table names from BigQuery.""" mock_execute = mock.Mock() @@ -1724,6 +2456,29 @@ def test_get_tables(self): bq = client.BigQueryClient(mock_bq_service, 'project') + expected_result = [ + 'notanappspottable_20130515_0261', '2013_05_appspot', '2013_06_appspot_1', '2013_06_appspot_2', + '2013_06_appspot_3', '2013_06_appspot_4', '2013_06_appspot_5', + 'appspot_6_2013_06', 'table_not_matching_naming' + ] + + tables = bq.get_all_tables('dataset') + self.assertEquals(expected_result, tables) + + def test_get_tables(self): + """Ensure _get_all_tables fetches table names from BigQuery.""" + + mock_execute = mock.Mock() + mock_execute.execute.return_value = FULL_TABLE_LIST_RESPONSE + + mock_tables = mock.Mock() + mock_tables.list.return_value = mock_execute + + mock_bq_service = mock.Mock() + mock_bq_service.tables.return_value = mock_tables + + bq = client.BigQueryClient(mock_bq_service, 'project') + expected_result = { 'appspot-3': {'2013_06_appspot_3': 1370044800}, 'appspot-2': {'2013_06_appspot_2': 1370044800}, @@ -1831,7 +2586,7 @@ def test_get_tables(self): bq = client.BigQueryClient(mock_bq_service, 'project') tables = bq.get_tables('dataset', 'appspot-1', 0, 10000000000) - self.assertItemsEqual(tables, ['2013_06_appspot_1']) + six.assertCountEqual(self, tables, ['2013_06_appspot_1']) def test_get_tables_from_datetimes(self): """Ensure tables falling in the time window, specified with datetimes, @@ -1854,7 +2609,7 @@ def test_get_tables_from_datetimes(self): end = datetime(2013, 7, 10) tables = bq.get_tables('dataset', 'appspot-1', start, end) - self.assertItemsEqual(tables, ['2013_06_appspot_1']) + six.assertCountEqual(self, tables, ['2013_06_appspot_1']) # @@ -1885,7 +2640,7 @@ def test_dataset_create_failed(self): """Ensure that if creating the table fails, False is returned.""" self.mock_datasets.insert.return_value.execute.side_effect = \ - HttpError(HttpResponse(404), 'There was an error') + HttpError(HttpResponse(404), 'There was an error'.encode('utf8')) actual = self.client.create_dataset(self.dataset, friendly_name=self.friendly_name, @@ -1908,7 +2663,7 @@ def test_dataset_create_failed(self): projectId=self.project, body=self.body) self.mock_datasets.insert.return_value.execute. \ - assert_called_with() + assert_called_with(num_retries=0) def test_dataset_create_success(self): """Ensure that if creating the table fails, False is returned.""" @@ -1937,7 +2692,7 @@ def test_dataset_create_success(self): projectId=self.project, body=self.body) self.mock_datasets.insert.return_value.execute. \ - assert_called_with() + assert_called_with(num_retries=0) class TestDeleteDataset(unittest.TestCase): @@ -1954,7 +2709,7 @@ def test_delete_datasets_fail(self): """Ensure that if deleting table fails, False is returned.""" self.mock_datasets.delete.return_value.execute.side_effect = \ - HttpError(HttpResponse(404), 'There was an error') + HttpError(HttpResponse(404), 'There was an error'.encode('utf8')) actual = self.client.delete_dataset(self.dataset) @@ -1973,7 +2728,7 @@ def test_delete_datasets_fail(self): self.client.swallow_results = True self.mock_datasets.delete.return_value.execute. \ - assert_called_with() + assert_called_with(num_retries=0) def test_delete_datasets_success(self): """Ensure that if deleting table succeeds, True is returned.""" @@ -1998,7 +2753,7 @@ def test_delete_datasets_success(self): deleteContents=False) self.mock_datasets.delete.return_value.execute. \ - assert_called_with() + assert_called_with(num_retries=0) def test_delete_datasets_delete_contents_success(self): """Ensure that if deleting table succeeds, True is returned.""" @@ -2023,7 +2778,7 @@ def test_delete_datasets_delete_contents_success(self): deleteContents=True) self.mock_datasets.delete.return_value.execute. \ - assert_called_with() + assert_called_with(num_retries=0) FULL_DATASET_LIST_RESPONSE = { @@ -2112,7 +2867,8 @@ def test_get_datasets(self): bq = client.BigQueryClient(mock_bq_service, 'project') datasets = bq.get_datasets() - self.assertItemsEqual(datasets, FULL_DATASET_LIST_RESPONSE['datasets']) + six.assertCountEqual(self, datasets, + FULL_DATASET_LIST_RESPONSE['datasets']) def test_get_datasets_returns_no_list(self): """Ensure we handle the no datasets case""" @@ -2131,7 +2887,7 @@ def test_get_datasets_returns_no_list(self): bq = client.BigQueryClient(mock_bq_service, 'project') datasets = bq.get_datasets() - self.assertItemsEqual(datasets, []) + six.assertCountEqual(self, datasets, []) class TestUpdateDataset(unittest.TestCase): @@ -2159,7 +2915,7 @@ def test_dataset_update_failed(self): """Ensure that if creating the table fails, False is returned.""" self.mock_datasets.update.return_value.execute.side_effect = \ - HttpError(HttpResponse(404), 'There was an error') + HttpError(HttpResponse(404), 'There was an error'.encode('utf8')) actual = self.client.update_dataset(self.dataset, friendly_name=self.friendly_name, @@ -2182,7 +2938,7 @@ def test_dataset_update_failed(self): projectId=self.project, datasetId=self.dataset, body=self.body) self.mock_datasets.update.return_value.execute. \ - assert_called_with() + assert_called_with(num_retries=0) def test_dataset_update_success(self): """Ensure that if creating the table fails, False is returned.""" @@ -2190,18 +2946,18 @@ def test_dataset_update_success(self): self.mock_datasets.update.return_value.execute.side_effect = [{ 'status': 'foo'}, {'status': 'bar'}] - actual = self.client.update_dataset(self.dataset, - self.friendly_name, - self.description, - self.access) + actual = self.client.update_dataset(self.dataset, + friendly_name=self.friendly_name, + description=self.description, + access=self.access) self.assertTrue(actual) self.client.swallow_results = False - actual = self.client.update_dataset(self.dataset, - self.friendly_name, - self.description, - self.access) + actual = self.client.update_dataset(self.dataset, + friendly_name=self.friendly_name, + description=self.description, + access=self.access) self.assertEqual(actual, {'status': 'bar'}) @@ -2211,4 +2967,219 @@ def test_dataset_update_success(self): projectId=self.project, datasetId=self.dataset, body=self.body) self.mock_datasets.update.return_value.execute. \ - assert_called_with() + assert_called_with(num_retries=0) + + +class TestNumRetries(unittest.TestCase): + + def setUp(self): + client._bq_client = None + + self.mock_bq_service = mock.Mock() + self.mock_tables = mock.Mock() + self.mock_job_collection = mock.Mock() + self.mock_datasets = mock.Mock() + self.mock_table_data = mock.Mock() + self.mock_bq_service.tables.return_value = self.mock_tables + self.mock_bq_service.jobs.return_value = self.mock_job_collection + self.mock_bq_service.datasets.return_value = self.mock_datasets + self.mock_bq_service.tabledata.return_value = self.mock_table_data + + self.project_id = 'project' + self.num_retries = 5 + self.client = client.BigQueryClient(self.mock_bq_service, + self.project_id, + num_retries=self.num_retries) + self.dataset = 'dataset' + self.project = 'project' + self.table = 'table' + self.schema = [ + {'name': 'foo', 'type': 'STRING', 'mode': 'nullable'}, + {'name': 'bar', 'type': 'FLOAT', 'mode': 'nullable'} + ] + self.friendly_name = "friendly name" + self.description = "description" + self.access = [{'userByEmail': "bob@gmail.com"}] + self.query = 'SELECT "bar" foo, "foo" bar' + self.rows = [ + {'one': 'uno', 'two': 'dos'}, {'one': 'ein', 'two': 'zwei'}, + {'two': 'kiwi'}] + self.data = { + "kind": "bigquery#tableDataInsertAllRequest", + "rows": [{'insertId': "uno", 'json': {'one': 'uno', 'two': 'dos'}}, + {'insertId': "ein", 'json': + {'one': 'ein', 'two': 'zwei'}}, + {'json': {'two': 'kiwi'}}] + } + + def test_get_response(self): + job_id = 'bar' + + mock_query_job = mock.Mock() + mock_query_reply = mock.Mock() + mock_query_job.execute.return_value = mock_query_reply + self.mock_job_collection.getQueryResults.return_value = mock_query_job + + offset = 5 + limit = 10 + page_token = "token" + timeout = 1 + + self.client.get_query_results(job_id, offset, limit, + page_token, timeout) + + mock_query_job.execute. \ + assert_called_once_with(num_retries=self.num_retries) + + def test_table_exists(self): + expected = [ + {'type': 'FLOAT', 'name': 'foo', 'mode': 'NULLABLE'}, + {'type': 'INTEGER', 'name': 'bar', 'mode': 'NULLABLE'}, + {'type': 'INTEGER', 'name': 'baz', 'mode': 'NULLABLE'}, + ] + + self.mock_tables.get.return_value.execute.return_value = \ + {'schema': {'fields': expected}} + + self.client.get_table_schema(self.dataset, self.table) + self.mock_tables.get.return_value.execute. \ + assert_called_once_with(num_retries=self.num_retries) + + def test_table_create(self): + self.mock_tables.insert.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + self.client.create_table(self.dataset, self.table, + self.schema) + + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) + + def test_table_update(self): + self.mock_tables.update.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + self.client.update_table(self.dataset, self.table, + self.schema) + + self.mock_tables.update.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) + + def test_table_patch(self): + self.mock_tables.patch.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + self.client.patch_table(self.dataset, self.table, + self.schema) + + self.mock_tables.patch.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) + + def test_view_create(self): + body = { + 'view': {'query': self.query}, + 'tableReference': { + 'tableId': self.table, 'projectId': self.project, + 'datasetId': self.dataset + } + } + + self.mock_tables.insert.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + actual = self.client.create_view(self.dataset, self.table, + self.query) + + self.assertTrue(actual) + + self.mock_tables.insert.assert_called_with( + projectId=self.project, datasetId=self.dataset, body=body) + + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) + + def test_delete_table(self): + self.mock_tables.delete.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + actual = self.client.delete_table(self.dataset, self.table) + + self.assertTrue(actual) + + self.mock_tables.delete.assert_called_with( + projectId=self.project, datasetId=self.dataset, tableId=self.table) + + self.mock_tables.delete.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) + + def test_push(self): + self.mock_table_data.insertAll.return_value.execute.return_value = { + 'status': 'foo'} + + actual = self.client.push_rows(self.dataset, self.table, self.rows, + 'one') + + self.assertTrue(actual) + + self.mock_bq_service.tabledata.assert_called_with() + + self.mock_table_data.insertAll.assert_called_with( + projectId=self.project, datasetId=self.dataset, tableId=self.table, + body=self.data) + + execute_calls = [mock.call(num_retries=self.num_retries)] + self.mock_table_data.insertAll.return_value.execute.assert_has_calls( + execute_calls) + + def test_dataset_create(self): + body = { + 'datasetReference': { + 'datasetId': self.dataset, + 'projectId': self.project}, + 'friendlyName': self.friendly_name, + 'description': self.description, + 'access': self.access + } + + self.mock_datasets.insert.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + actual = self.client.create_dataset(self.dataset, + self.friendly_name, + self.description, + self.access) + self.assertTrue(actual) + + self.mock_datasets.insert.assert_called_with( + projectId=self.project, body=body) + + self.mock_datasets.insert.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) + + def test_delete_datasets(self): + self.mock_datasets.delete.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + actual = self.client.delete_dataset(self.dataset) + + self.assertTrue(actual) + + self.mock_datasets.delete.assert_called_with( + projectId=self.project, datasetId=self.dataset, + deleteContents=False) + + self.mock_datasets.delete.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) + + def test_dataset_update(self): + self.mock_datasets.update.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + actual = self.client.update_dataset(self.dataset, + self.friendly_name, + self.description, + self.access) + self.assertTrue(actual) + + self.mock_datasets.update.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) diff --git a/bigquery/tests/test_query_builder.py b/bigquery/tests/test_query_builder.py index b2e2de1..6e9e9ee 100644 --- a/bigquery/tests/test_query_builder.py +++ b/bigquery/tests/test_query_builder.py @@ -1,5 +1,8 @@ +import six import unittest +unittest.TestCase.maxDiff = None + class TestRenderSelect(unittest.TestCase): @@ -18,11 +21,13 @@ def test_multiple_selects(self): 'ip': {'alias': 'IP'}, 'app_logs': {'alias': 'AppLogs'}}) - expected = 'SELECT status as Status, latency as Latency, ' \ - 'max_log_level as MaxLogLevel, resource as URL, user as ' \ - 'User, ip as IP, start_time as TimeStamp, version_id as ' \ - 'Version, app_logs as AppLogs' - self.assertEqual(expected, result) + expected = ('SELECT status as Status, latency as Latency, ' + 'max_log_level as MaxLogLevel, resource as URL, user as ' + 'User, ip as IP, start_time as TimeStamp, version_id as ' + 'Version, app_logs as AppLogs') + six.assertCountEqual( + self, sorted(expected[len('SELECT '):].split(', ')), + sorted(result[len('SELECT '):].split(', '))) def test_casting(self): """Ensure that render select can handle custom casting.""" @@ -75,6 +80,22 @@ def test_no_dataset(self): self.assertEqual(result, 'FROM [.man], [.pig], [.bro]') + def test_tables_in_date_range(self): + """Ensure that render sources can handle tables in DATE RANGE.""" + from bigquery.query_builder import _render_sources + + tables = { + 'date_range': True, + 'from_date': '2015-08-23', + 'to_date': '2015-10-10', + 'table': 'pets_' + } + + result = _render_sources('animals', tables) + + self.assertEqual(result, "FROM (TABLE_DATE_RANGE([animals.pets_], " + "TIMESTAMP('2015-08-23'), TIMESTAMP('2015-10-10'))) ") + class TestRenderConditions(unittest.TestCase): @@ -202,14 +223,52 @@ def test_in_comparator(self): } ]) - self.assertEqual(result, "WHERE ((foobar IN (STRING('a'), STRING('b'))" - " AND foobar IN (STRING('c'), STRING('d')) " - "AND foobar IN (STRING('e'), STRING('f')) AND" - " foobar IN (STRING('g'))) AND (NOT foobar IN" - " (STRING('h'), STRING('i')) AND NOT foobar " - "IN (STRING('k'), STRING('j')) AND NOT foobar" - " IN (STRING('l'), STRING('m')) AND NOT " - "foobar IN (STRING('n'))))") + six.assertCountEqual(self, result[len('WHERE '):].split(' AND '), + "WHERE ((foobar IN (STRING('a'), STRING('b'))" + " AND foobar IN (STRING('c'), STRING('d')) " + "AND foobar IN (STRING('e'), STRING('f')) AND" + " foobar IN (STRING('g'))) AND (NOT foobar IN" + " (STRING('h'), STRING('i')) AND NOT foobar " + "IN (STRING('j'), STRING('k')) AND NOT foobar" + " IN (STRING('l'), STRING('m')) AND NOT " + "foobar IN (STRING('n'))))" [len('WHERE '):] + .split(' AND ')) + + def test_between_comparator(self): + """Ensure that render conditions can handle "BETWEEN" condition.""" + from bigquery.query_builder import _render_conditions + + result = _render_conditions([ + { + 'field': 'foobar', + 'type': 'STRING', + 'comparators': [ + {'condition': 'BETWEEN', 'negate': False, + 'value': ['a', 'b']}, + {'condition': 'BETWEEN', 'negate': False, + 'value': {'c', 'd'}}, + {'condition': 'BETWEEN', 'negate': False, + 'value': ('e', 'f')}, + {'condition': 'BETWEEN', 'negate': True, + 'value': ['h', 'i']}, + {'condition': 'BETWEEN', 'negate': True, + 'value': {'j', 'k'}}, + {'condition': 'BETWEEN', 'negate': True, + 'value': ('l', 'm')} + ] + } + ]) + + six.assertCountEqual(self, result[len('WHERE '):].split(' AND '), + "WHERE ((foobar BETWEEN STRING('a') AND " + "STRING('b') AND foobar BETWEEN STRING('c') " + "AND STRING('d') AND foobar BETWEEN " + "STRING('e') AND STRING('f')) AND (NOT foobar " + "BETWEEN STRING('h') AND STRING('i') AND NOT " + "foobar BETWEEN STRING('j') AND STRING('k') " + "AND NOT foobar BETWEEN STRING('l') AND " + "STRING('m')))" [len('WHERE '):] + .split(' AND ')) class TestRenderOrder(unittest.TestCase): @@ -218,7 +277,7 @@ def test_order(self): """Ensure that render order can work under expected conditions.""" from bigquery.query_builder import _render_order - result = _render_order({'field': 'foo', 'direction': 'desc'}) + result = _render_order({'fields': ['foo'], 'direction': 'desc'}) self.assertEqual(result, "ORDER BY foo desc") @@ -252,6 +311,56 @@ def test_no_fields(self): self.assertEqual(result, "") +class TestRenderHaving(unittest.TestCase): + + def test_mutliple_fields(self): + """Ensure that render having works with multiple fields.""" + from bigquery.query_builder \ + import _render_having + + result = _render_having([ + { + 'field': 'bar', + 'type': 'STRING', + 'comparators': [ + {'condition': '>=', 'negate': False, 'value': '1'} + ] + } + ]) + + self.assertEqual(result, "HAVING (bar >= STRING('1'))") + + def test_no_fields(self): + """Ensure that render having can work with out any arguments.""" + from bigquery.query_builder \ + import _render_having + + result = _render_having(None) + + self.assertEqual(result, "") + + +class TestLimit(unittest.TestCase): + + def test_with_limit(self): + """Ensure that render limit works.""" + from bigquery.query_builder \ + import _render_limit + + result = _render_limit(8) + + self.assertEqual(result, "LIMIT 8") + + def test_no_fields(self): + """Ensure that render limit can work without any arguments.""" + from bigquery.query_builder \ + import _render_limit + + result = _render_limit(None) + + self.assertEqual(result, "") + + class TestRenderQuery(unittest.TestCase): def test_full_query(self): @@ -291,14 +400,36 @@ def test_full_query(self): } ], groupings=['timestamp', 'status'], - order_by={'field': 'timestamp', 'direction': 'desc'}) + having=[ + { + 'field': 'status', + 'comparators': [ + { + 'condition': '==', + 'value': 1, + 'negate': False + } + ], + 'type': 'INTEGER' + } + ], + order_by={'fields': ['timestamp'], 'direction': 'desc'}, + limit=10) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM [dataset.2013_06_appspot_1]" " WHERE (start_time <= INTEGER('1371566954')) AND " "(start_time >= INTEGER('1371556954')) GROUP BY " - "timestamp, status ORDER BY timestamp desc") - self.assertEqual(result, expected_query) + "timestamp, status HAVING (status == INTEGER('1')) " + "ORDER BY timestamp desc LIMIT 10") + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_empty_conditions(self): """Ensure that render query can handle an empty list of conditions.""" @@ -313,13 +444,21 @@ def test_empty_conditions(self): 'resource': {'alias': 'url'} }, conditions=[], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " - "[dataset.2013_06_appspot_1] ORDER BY " - "timestamp desc") - self.assertEqual(result, expected_query) + "[dataset.2013_06_appspot_1] ORDER BY " + "timestamp desc ") + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_incorrect_conditions(self): """Ensure that render query can handle incorrectly formatted @@ -342,13 +481,20 @@ def test_incorrect_conditions(self): 'negate': False}, 'compoorattor': '>=', 'type': 'INTEGER'} ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " - "[dataset.2013_06_appspot_1] ORDER BY " - "timestamp desc") - self.assertEqual(result, expected_query) + "[dataset.2013_06_appspot_1] ORDER BY " + "timestamp desc ") + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_multiple_condition_values(self): """Ensure that render query can handle conditions with multiple values. @@ -383,7 +529,7 @@ def test_multiple_condition_values(self): 'negate': False}], 'type': 'STRING'} ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " @@ -392,8 +538,15 @@ def test_multiple_condition_values(self): "INTEGER('1371556954')) AND " "((resource CONTAINS STRING('foo') AND resource " "CONTAINS STRING('baz')) AND (NOT resource CONTAINS " - "STRING('bar'))) ORDER BY timestamp desc") - self.assertEqual(result, expected_query) + "STRING('bar'))) ORDER BY timestamp desc ") + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_negated_condition_value(self): """Ensure that render query can handle conditions with negated values. @@ -414,13 +567,20 @@ def test_negated_condition_value(self): 'negate': True}], 'type': 'STRING'} ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (NOT resource " - "CONTAINS STRING('foo')) ORDER BY timestamp desc") - self.assertEqual(result, expected_query) + "CONTAINS STRING('foo')) ORDER BY timestamp desc ") + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_multiple_negated_condition_values(self): """Ensure that render query can handle conditions with multiple negated @@ -448,15 +608,22 @@ def test_multiple_negated_condition_values(self): 'negate': True}], 'type': 'STRING'} ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (NOT resource " "CONTAINS STRING('foo') AND NOT resource CONTAINS " "STRING('baz') AND NOT resource CONTAINS " - "STRING('bar')) ORDER BY timestamp desc") - self.assertEqual(result, expected_query) + "STRING('bar')) ORDER BY timestamp desc ") + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_empty_order(self): """Ensure that render query can handle an empty formatted order.""" @@ -486,8 +653,15 @@ def test_empty_order(self): "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ") - self.assertEqual(result, expected_query) + "INTEGER('1371556954')) ") + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_incorrect_order(self): """Ensure that render query can handle inccorectly formatted order.""" @@ -517,8 +691,15 @@ def test_incorrect_order(self): "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ") - self.assertEqual(result, expected_query) + "INTEGER('1371556954')) ") + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_empty_select(self): """Ensure that render query corrently handles no selection.""" @@ -538,12 +719,12 @@ def test_empty_select(self): 'negate': False}], 'type': 'INTEGER'}, ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT * FROM [dataset.2013_06_appspot_1] " "WHERE (start_time <= INTEGER('1371566954')) AND " - "(start_time >= INTEGER('1371556954')) ORDER BY " - "timestamp desc") + "(start_time >= INTEGER('1371556954')) ORDER BY " + "timestamp desc ") self.assertEqual(result, expected_query) def test_no_alias(self): @@ -568,13 +749,23 @@ def test_no_alias(self): 'negate': False}], 'type': 'INTEGER'} ], - order_by={'field': 'start_time', 'direction': 'desc'}) + order_by={'fields': ['start_time'], 'direction': 'desc'}) expected_query = ("SELECT status , start_time , resource FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ORDER BY start_time desc") - self.assertEqual(result, expected_query) + "INTEGER('1371556954')) ORDER BY start_time desc ") + expected_select = (field.strip() for field in + expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = (expected_query[len('SELECT '):].split('FROM')[1] + .strip()) + result_select = (field.strip() for field in + result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1].strip() + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_formatting(self): """Ensure that render query runs with formatting a select.""" @@ -601,15 +792,22 @@ def test_formatting(self): 'negate': False}], 'type': 'INTEGER'}, ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, " "FORMAT_UTC_USEC(INTEGER(start_time)) as timestamp, " "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ORDER BY timestamp desc") - self.assertEqual(result, expected_query) + "INTEGER('1371556954')) ORDER BY timestamp desc ") + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_formatting_duplicate_columns(self): """Ensure that render query runs with formatting a select for a @@ -645,7 +843,7 @@ def test_formatting_duplicate_columns(self): 'negate': False}], 'type': 'INTEGER'}, ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, " "FORMAT_UTC_USEC(INTEGER(start_time)) as timestamp, " @@ -653,9 +851,16 @@ def test_formatting_duplicate_columns(self): "10) as day, resource as url FROM " "[dataset.2013_06_appspot_1] WHERE " "(start_time <= INTEGER('1371566954')) AND " - "(start_time >= INTEGER('1371556954')) ORDER BY " - "timestamp desc") - self.assertEqual(result, expected_query) + "(start_time >= INTEGER('1371556954')) ORDER BY " + "timestamp desc ") + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_sec_to_micro_formatting(self): """Ensure that render query runs sec_to_micro formatting on a @@ -684,15 +889,22 @@ def test_sec_to_micro_formatting(self): 'negate': False}], 'type': 'INTEGER'}, ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, " "SEC_TO_TIMESTAMP(INTEGER(start_time*1000000)) as " "timestamp, resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ORDER BY timestamp desc") - self.assertEqual(result, expected_query) + "INTEGER('1371556954')) ORDER BY timestamp desc ") + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_no_table_or_dataset(self): """Ensure that render query returns None if there is no dataset or @@ -718,7 +930,8 @@ def test_no_table_or_dataset(self): 'negate': False}], 'type': 'INTEGER'}, ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}, + limit=10) self.assertIsNone(result) @@ -735,13 +948,20 @@ def test_empty_groupings(self): 'resource': {'alias': 'url'} }, groupings=[], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " - "[dataset.2013_06_appspot_1] ORDER BY " - "timestamp desc") - self.assertEqual(result, expected_query) + "[dataset.2013_06_appspot_1] ORDER BY " + "timestamp desc ") + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_multi_tables(self): """Ensure that render query arguments work with multiple tables.""" @@ -766,13 +986,20 @@ def test_multi_tables(self): 'type': 'INTEGER'}, ], groupings=['timestamp', 'status'], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " "[dataset.2013_06_appspot_1], " "[dataset.2013_07_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) GROUP BY timestamp, status " - "ORDER BY timestamp desc") - self.assertEqual(result, expected_query) + "INTEGER('1371556954')) GROUP BY timestamp, status " + "ORDER BY timestamp desc ") + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) diff --git a/bigquery/tests/test_schema_builder.py b/bigquery/tests/test_schema_builder.py index 2207060..060162b 100644 --- a/bigquery/tests/test_schema_builder.py +++ b/bigquery/tests/test_schema_builder.py @@ -1,7 +1,8 @@ +from six.moves.builtins import object from datetime import datetime import unittest - +import six from bigquery.schema_builder import schema_from_record from bigquery.schema_builder import describe_field from bigquery.schema_builder import bigquery_type @@ -11,48 +12,49 @@ class TestBigQueryTypes(unittest.TestCase): def test_str_is_string(self): - self.assertItemsEqual(bigquery_type("Bob"), 'string') + six.assertCountEqual(self, bigquery_type("Bob"), 'string') def test_unicode_is_string(self): - self.assertItemsEqual(bigquery_type(u"Here is a happy face \u263A"), - 'string') + six.assertCountEqual(self, bigquery_type(u"Here is a happy face \u263A"), + 'string') def test_int_is_integer(self): - self.assertItemsEqual(bigquery_type(123), 'integer') + six.assertCountEqual(self, bigquery_type(123), 'integer') def test_datetime_is_timestamp(self): - self.assertItemsEqual(bigquery_type(datetime.now()), 'timestamp') + six.assertCountEqual(self, bigquery_type(datetime.now()), 'timestamp') def test_isoformat_timestring(self): - self.assertItemsEqual(bigquery_type(datetime.now().isoformat()), - 'timestamp') + six.assertCountEqual(self, bigquery_type(datetime.now().isoformat()), + 'timestamp') def test_timestring_feb_20_1973(self): - self.assertItemsEqual(bigquery_type("February 20th 1973"), 'timestamp') + six.assertCountEqual(self, bigquery_type("February 20th 1973"), + 'timestamp') def test_timestring_thu_1_july_2004_22_30_00(self): - self.assertItemsEqual(bigquery_type("Thu, 1 July 2004 22:30:00"), - 'timestamp') + six.assertCountEqual(self, bigquery_type("Thu, 1 July 2004 22:30:00"), + 'timestamp') def test_today_is_not_timestring(self): - self.assertItemsEqual(bigquery_type("today"), 'string') + six.assertCountEqual(self, bigquery_type("today"), 'string') def test_timestring_next_thursday(self): - self.assertItemsEqual(bigquery_type("February 20th 1973"), 'timestamp') + six.assertCountEqual(self, bigquery_type("February 20th 1973"), 'timestamp') def test_timestring_arbitrary_fn_success(self): - self.assertItemsEqual( - bigquery_type("whatever", timestamp_parser=lambda x: True), + six.assertCountEqual( + self, bigquery_type("whatever", timestamp_parser=lambda x: True), 'timestamp') def test_timestring_arbitrary_fn_fail(self): - self.assertItemsEqual( - bigquery_type("February 20th 1973", - timestamp_parser=lambda x: False), + six.assertCountEqual( + self, bigquery_type("February 20th 1973", + timestamp_parser=lambda x: False), 'string') def test_class_instance_is_invalid_type(self): - class SomeClass: + class SomeClass(object): pass self.assertIsNone(bigquery_type(SomeClass())) @@ -61,15 +63,15 @@ def test_list_is_invalid_type(self): self.assertIsNone(bigquery_type([1, 2, 3])) def test_dict_is_record(self): - self.assertItemsEqual(bigquery_type({"a": 1}), 'record') + six.assertCountEqual(self, bigquery_type({"a": 1}), 'record') class TestFieldDescription(unittest.TestCase): def test_simple_string_field(self): - self.assertItemsEqual(describe_field("user", "Bob"), - {"name": "user", "type": "string", "mode": - "nullable"}) + six.assertCountEqual(self, describe_field("user", "Bob"), + {"name": "user", "type": "string", "mode": + "nullable"}) class TestSchemaGenerator(unittest.TestCase): @@ -79,7 +81,7 @@ def test_simple_record(self): schema = [{"name": "username", "type": "string", "mode": "nullable"}, {"name": "id", "type": "integer", "mode": "nullable"}] - self.assertItemsEqual(schema_from_record(record), schema) + six.assertCountEqual(self, schema_from_record(record), schema) def test_hierarchical_record(self): record = {"user": {"username": "Bob", "id": 123}} @@ -87,14 +89,42 @@ def test_hierarchical_record(self): "fields": [{"name": "username", "type": "string", "mode": "nullable"}, {"name": "id", "type": "integer", "mode": "nullable"}]}] - - self.assertItemsEqual(schema_from_record(record), schema) + generated_schema = schema_from_record(record) + schema_fields = schema[0].pop('fields') + generated_fields = generated_schema[0].pop('fields') + six.assertCountEqual(self, schema_fields, generated_fields) + six.assertCountEqual(self, generated_schema, schema) + + def test_hierarchical_record_with_timestamps(self): + record = {"global": "2001-01-01", "user": {"local": "2001-01-01"}} + + schema_with_ts = [ + {"name": "global", "type": "timestamp", "mode": "nullable"}, + {"name": "user", "type": "record", "mode": "nullable", + "fields": [{ + "name": "local", + "type": "timestamp", + "mode": "nullable"}]}] + + schema_without_ts = [ + {"name": "global", "type": "string", "mode": "nullable"}, + {"name": "user", "type": "record", "mode": "nullable", + "fields": [{ + "name": "local", + "type": "string", + "mode": "nullable"}]}] + + six.assertCountEqual(self, schema_from_record(record), schema_with_ts) + + six.assertCountEqual( + self, schema_from_record(record, timestamp_parser=lambda x: False), + schema_without_ts) def test_repeated_field(self): record = {"ids": [1, 2, 3, 4, 5]} schema = [{"name": "ids", "type": "integer", "mode": "repeated"}] - self.assertItemsEqual(schema_from_record(record), schema) + six.assertCountEqual(self, schema_from_record(record), schema) def test_nested_invalid_type_reported_correctly(self): key = "wrong answer" @@ -102,7 +132,7 @@ def test_nested_invalid_type_reported_correctly(self): try: schema_from_record({"a": {"b": [{"c": None}]}}) - except InvalidTypeException, e: + except InvalidTypeException as e: key = e.key value = e.value diff --git a/bigquery/version.py b/bigquery/version.py new file mode 100644 index 0000000..1c19d78 --- /dev/null +++ b/bigquery/version.py @@ -0,0 +1 @@ +__version__ = '1.15.0' diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..3f83b08 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,216 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = _build + +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " applehelp to make an Apple Help Book" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + @echo " coverage to run coverage check of the documentation (if enabled)" + +.PHONY: clean +clean: + rm -rf $(BUILDDIR)/* + +.PHONY: html +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +.PHONY: dirhtml +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +.PHONY: singlehtml +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +.PHONY: pickle +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +.PHONY: json +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +.PHONY: htmlhelp +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +.PHONY: qthelp +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/BigQuery-Python.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/BigQuery-Python.qhc" + +.PHONY: applehelp +applehelp: + $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp + @echo + @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." + @echo "N.B. You won't be able to view it unless you put it in" \ + "~/Library/Documentation/Help or install it in your application" \ + "bundle." + +.PHONY: devhelp +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/BigQuery-Python" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/BigQuery-Python" + @echo "# devhelp" + +.PHONY: epub +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +.PHONY: latex +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +.PHONY: latexpdf +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +.PHONY: latexpdfja +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +.PHONY: text +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +.PHONY: man +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +.PHONY: texinfo +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +.PHONY: info +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +.PHONY: gettext +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +.PHONY: changes +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +.PHONY: linkcheck +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +.PHONY: doctest +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +.PHONY: coverage +coverage: + $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage + @echo "Testing of coverage in the sources finished, look at the " \ + "results in $(BUILDDIR)/coverage/python.txt." + +.PHONY: xml +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." + +.PHONY: pseudoxml +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..a97fc34 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,294 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# BigQuery-Python documentation build configuration file, created by +# sphinx-quickstart on Sat Apr 9 13:11:15 2016. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys +import os + +#numpydoc_show_class_members = False + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +sys.path.insert(0, os.path.abspath('../')) +import bigquery + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.mathjax', + 'numpydoc', + 'sphinx.ext.autosummary' +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = 'BigQuery-Python' +copyright = '2016, Tyler Treat' +author = 'Tyler Treat' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = bigquery.__version__ +# The full version, including alpha/beta/rc tags. +release = bigquery.__version__ + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +#keep_warnings = False + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'sphinxdoc' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +#html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Language to be used for generating the HTML full-text search index. +# Sphinx supports the following languages: +# 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' +# 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr' +#html_search_language = 'en' + +# A dictionary with options for the search language support, empty by default. +# Now only 'ja' uses this config value +#html_search_options = {'type': 'default'} + +# The name of a javascript file (relative to the configuration directory) that +# implements a search results scorer. If empty, the default will be used. +#html_search_scorer = 'scorer.js' + +# Output file base name for HTML help builder. +htmlhelp_basename = 'BigQuery-Pythondoc' + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', + +# Latex figure (float) alignment +#'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'BigQuery-Python.tex', 'BigQuery-Python Documentation', + 'Tyler Treat', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'bigquery-python', 'BigQuery-Python Documentation', + [author], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'BigQuery-Python', 'BigQuery-Python Documentation', + author, 'BigQuery-Python', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +#texinfo_no_detailmenu = False diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..0708835 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,29 @@ +.. BigQuery-Python documentation master file, created by + sphinx-quickstart on Sat Apr 9 13:11:15 2016. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to BigQuery-Python's documentation! +=========================================== + +Content +------- + +.. toctree:: + + pages/client + pages/query_builder + pages/schema_builder + +References +---------- +* `BigQuery-Python Source Code `_ +* `BigQuery API Reference `_ + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..2b8c095 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,263 @@ +@ECHO OFF + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set BUILDDIR=_build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . +set I18NSPHINXOPTS=%SPHINXOPTS% . +if NOT "%PAPER%" == "" ( + set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% + set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% +) + +if "%1" == "" goto help + +if "%1" == "help" ( + :help + echo.Please use `make ^` where ^ is one of + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. singlehtml to make a single large HTML file + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. devhelp to make HTML files and a Devhelp project + echo. epub to make an epub + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. text to make text files + echo. man to make manual pages + echo. texinfo to make Texinfo files + echo. gettext to make PO message catalogs + echo. changes to make an overview over all changed/added/deprecated items + echo. xml to make Docutils-native XML files + echo. pseudoxml to make pseudoxml-XML files for display purposes + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled + echo. coverage to run coverage check of the documentation if enabled + goto end +) + +if "%1" == "clean" ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i + del /q /s %BUILDDIR%\* + goto end +) + + +REM Check if sphinx-build is available and fallback to Python version if any +%SPHINXBUILD% 1>NUL 2>NUL +if errorlevel 9009 goto sphinx_python +goto sphinx_ok + +:sphinx_python + +set SPHINXBUILD=python -m sphinx.__init__ +%SPHINXBUILD% 2> nul +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +:sphinx_ok + + +if "%1" == "html" ( + %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/html. + goto end +) + +if "%1" == "dirhtml" ( + %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. + goto end +) + +if "%1" == "singlehtml" ( + %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. + goto end +) + +if "%1" == "pickle" ( + %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the pickle files. + goto end +) + +if "%1" == "json" ( + %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the JSON files. + goto end +) + +if "%1" == "htmlhelp" ( + %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run HTML Help Workshop with the ^ +.hhp project file in %BUILDDIR%/htmlhelp. + goto end +) + +if "%1" == "qthelp" ( + %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run "qcollectiongenerator" with the ^ +.qhcp project file in %BUILDDIR%/qthelp, like this: + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\BigQuery-Python.qhcp + echo.To view the help file: + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\BigQuery-Python.ghc + goto end +) + +if "%1" == "devhelp" ( + %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. + goto end +) + +if "%1" == "epub" ( + %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The epub file is in %BUILDDIR%/epub. + goto end +) + +if "%1" == "latex" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "latexpdf" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + cd %BUILDDIR%/latex + make all-pdf + cd %~dp0 + echo. + echo.Build finished; the PDF files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "latexpdfja" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + cd %BUILDDIR%/latex + make all-pdf-ja + cd %~dp0 + echo. + echo.Build finished; the PDF files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "text" ( + %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The text files are in %BUILDDIR%/text. + goto end +) + +if "%1" == "man" ( + %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The manual pages are in %BUILDDIR%/man. + goto end +) + +if "%1" == "texinfo" ( + %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. + goto end +) + +if "%1" == "gettext" ( + %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The message catalogs are in %BUILDDIR%/locale. + goto end +) + +if "%1" == "changes" ( + %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + if errorlevel 1 exit /b 1 + echo. + echo.The overview file is in %BUILDDIR%/changes. + goto end +) + +if "%1" == "linkcheck" ( + %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + if errorlevel 1 exit /b 1 + echo. + echo.Link check complete; look for any errors in the above output ^ +or in %BUILDDIR%/linkcheck/output.txt. + goto end +) + +if "%1" == "doctest" ( + %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + if errorlevel 1 exit /b 1 + echo. + echo.Testing of doctests in the sources finished, look at the ^ +results in %BUILDDIR%/doctest/output.txt. + goto end +) + +if "%1" == "coverage" ( + %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage + if errorlevel 1 exit /b 1 + echo. + echo.Testing of coverage in the sources finished, look at the ^ +results in %BUILDDIR%/coverage/python.txt. + goto end +) + +if "%1" == "xml" ( + %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The XML files are in %BUILDDIR%/xml. + goto end +) + +if "%1" == "pseudoxml" ( + %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. + goto end +) + +:end diff --git a/docs/pages/client.rst b/docs/pages/client.rst new file mode 100644 index 0000000..f21a864 --- /dev/null +++ b/docs/pages/client.rst @@ -0,0 +1,13 @@ +.. _client: + +client +====== + +.. automodule:: bigquery.client + :members: + +:mod:`BigQueryClient` Class +--------------------------- + +.. autoclass:: bigquery.client.BigQueryClient + :members: diff --git a/docs/pages/query_builder.rst b/docs/pages/query_builder.rst new file mode 100644 index 0000000..4053073 --- /dev/null +++ b/docs/pages/query_builder.rst @@ -0,0 +1,7 @@ +.. _query_builder + +query_builder +============= + +.. automodule:: bigquery.query_builder + :members: diff --git a/docs/pages/schema_builder.rst b/docs/pages/schema_builder.rst new file mode 100644 index 0000000..0d16def --- /dev/null +++ b/docs/pages/schema_builder.rst @@ -0,0 +1,7 @@ +.. _schema_builder + +schema_builder +============== + +.. automodule:: bigquery.schema_builder + :members: diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 00c05ef..0000000 --- a/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -google-api-python-client -httplib2 -pyopenssl -python-dateutil \ No newline at end of file diff --git a/requirements_dev.txt b/requirements_dev.txt index a36ba42..1040dea 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,5 +1,6 @@ nose rednose -mock +mock==4.0.2 coverage nose-exclude +tox diff --git a/setup.py b/setup.py index 5220cfd..fc1c5de 100644 --- a/setup.py +++ b/setup.py @@ -1,22 +1,29 @@ +from distutils.util import convert_path from setuptools import find_packages from setuptools import setup -VERSION = '1.0.0' +ns = {} +version_path = convert_path('bigquery/version.py') +with open(version_path) as version_file: + exec(version_file.read(), ns) setup_args = dict( name='BigQuery-Python', description='Simple Python client for interacting with Google BigQuery.', url='https://github.com/tylertreat/BigQuery-Python', - version=VERSION, + version=ns['__version__'], license='Apache', packages=find_packages(), include_package_data=True, - install_requires=['google-api-python-client', 'pyopenssl', 'httplib2', - 'python-dateutil'], + install_requires=[ + 'google-api-python-client', + 'httplib2', + 'python-dateutil' + ], author='Tyler Treat', author_email='ttreat31@gmail.com', classifiers=[ - 'Development Status :: 4 - Beta', + 'Development Status :: 5 - Production/Stable', 'Environment :: Web Environment', 'Intended Audience :: Developers', 'Operating System :: OS Independent', @@ -26,4 +33,3 @@ if __name__ == '__main__': setup(**setup_args) - diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..58dadc9 --- /dev/null +++ b/tox.ini @@ -0,0 +1,12 @@ +# Tox (http://tox.testrun.org/) is a tool for running tests +# in multiple virtualenvs. This configuration file will run the +# test suite on all supported python versions. To use it, "pip install tox" +# and then run "tox" from this directory. + +[tox] +envlist = py27, py35, py36, nightly, pypy + +[testenv] +commands = nosetests --logging-level=ERROR -a slow --with-coverage --cover-package=bigquery +deps = -rrequirements_dev.txt +skip_missing_interpreters = True