Merge remote-tracking branch 'upstream/master' into bigquery-b2
diff --git a/bigquery/.coveragerc b/bigquery/.coveragerc
new file mode 100644
index 0000000..d097511
--- /dev/null
+++ b/bigquery/.coveragerc
@@ -0,0 +1,13 @@
+[run]
+branch = True
+
+[report]
+fail_under = 100
+show_missing = True
+exclude_lines =
+ # Re-enable the standard pragma
+ pragma: NO COVER
+ # Ignore debug-only repr
+ def __repr__
+ # Ignore abstract methods
+ raise NotImplementedError
diff --git a/bigquery/README.rst b/bigquery/README.rst
new file mode 100644
index 0000000..01a1194
--- /dev/null
+++ b/bigquery/README.rst
@@ -0,0 +1,117 @@
+Python Client for Google BigQuery
+=================================
+
+ Python idiomatic client for `Google BigQuery`_
+
+.. _Google BigQuery: https://cloud.google.com/bigquery/what-is-bigquery
+
+|pypi| |versions|
+
+- `Documentation`_
+
+.. _Documentation: https://googlecloudplatform.github.io/google-cloud-python/latest/bigquery/usage.html
+
+Quick Start
+-----------
+
+.. code-block:: console
+
+ $ pip install --upgrade google-cloud-bigquery
+
+Fore more information on setting up your Python development environment, such as installing ``pip`` and on your system, please refer to `Python Development Environment Setup Guide`_ for Google Cloud Platform.
+
+.. _Python Development Environment Setup Guide: https://cloud.google.com/python/setup
+
+Authentication
+--------------
+
+With ``google-cloud-python`` we try to make authentication as painless as
+possible. Check out the `Authentication section`_ in our documentation to
+learn more. You may also find the `authentication document`_ shared by all
+the ``google-cloud-*`` libraries to be helpful.
+
+.. _Authentication section: https://google-cloud-python.readthedocs.io/en/latest/core/auth.html
+.. _authentication document: https://github.com/GoogleCloudPlatform/google-cloud-common/tree/master/authentication
+
+Using the API
+-------------
+
+Querying massive datasets can be time consuming and expensive without the
+right hardware and infrastructure. Google `BigQuery`_ (`BigQuery API docs`_)
+solves this problem by enabling super-fast, SQL queries against
+append-mostly tables, using the processing power of Google's infrastructure.
+
+.. _BigQuery: https://cloud.google.com/bigquery/what-is-bigquery
+.. _BigQuery API docs: https://cloud.google.com/bigquery/docs/reference/v2/
+
+Create a dataset
+~~~~~~~~~~~~~~~~
+
+.. code:: python
+
+ from google.cloud import bigquery
+ from google.cloud.bigquery import Dataset
+
+ client = bigquery.Client()
+
+ dataset_ref = client.dataset('dataset_name')
+ dataset = Dataset(dataset_ref)
+ dataset.description = 'my dataset'
+ dataset = client.create_dataset(dataset) # API request
+
+Load data from CSV
+~~~~~~~~~~~~~~~~~~
+
+.. code:: python
+
+ import csv
+
+ from google.cloud import bigquery
+ from google.cloud.bigquery import LoadJobConfig
+ from google.cloud.bigquery import SchemaField
+
+ client = bigquery.Client()
+
+ SCHEMA = [
+ SchemaField('full_name', 'STRING', mode='required'),
+ SchemaField('age', 'INTEGER', mode='required'),
+ ]
+ table_ref = client.dataset('dataset_name').table('table_name')
+
+ load_config = LoadJobConfig()
+ load_config.skip_leading_rows = 1
+ load_config.schema = SCHEMA
+
+ # Contents of csv_file.csv:
+ # Name,Age
+ # Tim,99
+ with open('csv_file.csv', 'rb') as readable:
+ client.load_table_from_file(
+ readable, table_ref, job_config=load_config) # API request
+
+Perform a query
+~~~~~~~~~~~~~~~
+
+.. code:: python
+
+ # Perform a query.
+ QUERY = (
+ 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` '
+ 'WHERE state = "TX" '
+ 'LIMIT 100')
+ query_job = client.query(QUERY) # API request
+ rows = query_job.result() # Waits for query to finish
+
+ for row in rows:
+ print(row.name)
+
+
+See the ``google-cloud-python`` API `BigQuery documentation`_ to learn how
+to connect to BigQuery using this Client Library.
+
+.. _BigQuery documentation: https://googlecloudplatform.github.io/google-cloud-python/latest/bigquery/usage.html
+
+.. |pypi| image:: https://img.shields.io/pypi/v/google-cloud-bigquery.svg
+ :target: https://pypi.org/project/google-cloud-bigquery/
+.. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-bigquery.svg
+ :target: https://pypi.org/project/google-cloud-bigquery/
diff --git a/bigquery/google/cloud/bigquery/__init__.py b/bigquery/google/cloud/bigquery/__init__.py
new file mode 100644
index 0000000..4c3fcd7
--- /dev/null
+++ b/bigquery/google/cloud/bigquery/__init__.py
@@ -0,0 +1,78 @@
+# Copyright 2015 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Google BigQuery API wrapper.
+
+The main concepts with this API are:
+
+- :class:`~google.cloud.bigquery.dataset.Dataset` represents a
+ collection of tables.
+
+- :class:`~google.cloud.bigquery.table.Table` represents a single "relation".
+"""
+
+
+from pkg_resources import get_distribution
+__version__ = get_distribution('google-cloud-bigquery').version
+
+from google.cloud.bigquery._helpers import Row
+from google.cloud.bigquery._helpers import DEFAULT_RETRY
+from google.cloud.bigquery.client import Client
+from google.cloud.bigquery.dataset import AccessEntry
+from google.cloud.bigquery.dataset import Dataset
+from google.cloud.bigquery.dataset import DatasetReference
+from google.cloud.bigquery.job import CopyJobConfig
+from google.cloud.bigquery.job import ExtractJobConfig
+from google.cloud.bigquery.job import QueryJobConfig
+from google.cloud.bigquery.job import LoadJobConfig
+from google.cloud.bigquery.query import ArrayQueryParameter
+from google.cloud.bigquery.query import ScalarQueryParameter
+from google.cloud.bigquery.query import StructQueryParameter
+from google.cloud.bigquery.query import UDFResource
+from google.cloud.bigquery.schema import SchemaField
+from google.cloud.bigquery.table import Table
+from google.cloud.bigquery.table import TableReference
+from google.cloud.bigquery.external_config import ExternalConfig
+from google.cloud.bigquery.external_config import BigtableOptions
+from google.cloud.bigquery.external_config import BigtableColumnFamily
+from google.cloud.bigquery.external_config import BigtableColumn
+from google.cloud.bigquery.external_config import CSVOptions
+from google.cloud.bigquery.external_config import GoogleSheetsOptions
+
+__all__ = [
+ '__version__',
+ 'AccessEntry',
+ 'ArrayQueryParameter',
+ 'Client',
+ 'Dataset',
+ 'DatasetReference',
+ 'CopyJobConfig',
+ 'ExtractJobConfig',
+ 'QueryJobConfig',
+ 'Row',
+ 'LoadJobConfig',
+ 'ScalarQueryParameter',
+ 'SchemaField',
+ 'StructQueryParameter',
+ 'Table',
+ 'TableReference',
+ 'UDFResource',
+ 'DEFAULT_RETRY',
+ 'ExternalConfig',
+ 'BigtableOptions',
+ 'BigtableColumnFamily',
+ 'BigtableColumn',
+ 'CSVOptions',
+ 'GoogleSheetsOptions',
+]
diff --git a/bigquery/google/cloud/bigquery/_helpers.py b/bigquery/google/cloud/bigquery/_helpers.py
new file mode 100644
index 0000000..1ba9233
--- /dev/null
+++ b/bigquery/google/cloud/bigquery/_helpers.py
@@ -0,0 +1,562 @@
+# Copyright 2015 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Shared helper functions for BigQuery API classes."""
+
+import base64
+import datetime
+import operator
+
+import six
+
+from google.api_core import retry
+from google.cloud._helpers import UTC
+from google.cloud._helpers import _date_from_iso8601_date
+from google.cloud._helpers import _datetime_from_microseconds
+from google.cloud._helpers import _microseconds_from_datetime
+from google.cloud._helpers import _RFC3339_NO_FRACTION
+from google.cloud._helpers import _time_from_iso8601_time_naive
+from google.cloud._helpers import _to_bytes
+
+_RFC3339_MICROS_NO_ZULU = '%Y-%m-%dT%H:%M:%S.%f'
+
+
+def _not_null(value, field):
+ """Check whether 'value' should be coerced to 'field' type."""
+ return value is not None or field.mode != 'NULLABLE'
+
+
+def _int_from_json(value, field):
+ """Coerce 'value' to an int, if set or not nullable."""
+ if _not_null(value, field):
+ return int(value)
+
+
+def _float_from_json(value, field):
+ """Coerce 'value' to a float, if set or not nullable."""
+ if _not_null(value, field):
+ return float(value)
+
+
+def _bool_from_json(value, field):
+ """Coerce 'value' to a bool, if set or not nullable."""
+ if _not_null(value, field):
+ return value.lower() in ['t', 'true', '1']
+
+
+def _string_from_json(value, _):
+ """NOOP string -> string coercion"""
+ return value
+
+
+def _bytes_from_json(value, field):
+ """Base64-decode value"""
+ if _not_null(value, field):
+ return base64.standard_b64decode(_to_bytes(value))
+
+
+def _timestamp_from_json(value, field):
+ """Coerce 'value' to a datetime, if set or not nullable."""
+ if _not_null(value, field):
+ # value will be a float in seconds, to microsecond precision, in UTC.
+ return _datetime_from_microseconds(1e6 * float(value))
+
+
+def _timestamp_query_param_from_json(value, field):
+ """Coerce 'value' to a datetime, if set or not nullable.
+
+ Args:
+ value (str): The timestamp.
+ field (.SchemaField): The field corresponding to the value.
+
+ Returns:
+ Optional[datetime.datetime]: The parsed datetime object from
+ ``value`` if the ``field`` is not null (otherwise it is
+ :data:`None`).
+ """
+ if _not_null(value, field):
+ # Canonical formats for timestamps in BigQuery are flexible. See:
+ # g.co/cloud/bigquery/docs/reference/standard-sql/data-types#timestamp-type
+ # The separator between the date and time can be 'T' or ' '.
+ value = value.replace(' ', 'T', 1)
+ # The UTC timezone may be formatted as Z or +00:00.
+ value = value.replace('Z', '')
+ value = value.replace('+00:00', '')
+
+ if '.' in value:
+ # YYYY-MM-DDTHH:MM:SS.ffffff
+ return datetime.datetime.strptime(
+ value, _RFC3339_MICROS_NO_ZULU).replace(tzinfo=UTC)
+ else:
+ # YYYY-MM-DDTHH:MM:SS
+ return datetime.datetime.strptime(
+ value, _RFC3339_NO_FRACTION).replace(tzinfo=UTC)
+ else:
+ return None
+
+
+def _datetime_from_json(value, field):
+ """Coerce 'value' to a datetime, if set or not nullable.
+
+ Args:
+ value (str): The timestamp.
+ field (.SchemaField): The field corresponding to the value.
+
+ Returns:
+ Optional[datetime.datetime]: The parsed datetime object from
+ ``value`` if the ``field`` is not null (otherwise it is
+ :data:`None`).
+ """
+ if _not_null(value, field):
+ if '.' in value:
+ # YYYY-MM-DDTHH:MM:SS.ffffff
+ return datetime.datetime.strptime(value, _RFC3339_MICROS_NO_ZULU)
+ else:
+ # YYYY-MM-DDTHH:MM:SS
+ return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION)
+ else:
+ return None
+
+
+def _date_from_json(value, field):
+ """Coerce 'value' to a datetime date, if set or not nullable"""
+ if _not_null(value, field):
+ # value will be a string, in YYYY-MM-DD form.
+ return _date_from_iso8601_date(value)
+
+
+def _time_from_json(value, field):
+ """Coerce 'value' to a datetime date, if set or not nullable"""
+ if _not_null(value, field):
+ # value will be a string, in HH:MM:SS form.
+ return _time_from_iso8601_time_naive(value)
+
+
+def _record_from_json(value, field):
+ """Coerce 'value' to a mapping, if set or not nullable."""
+ if _not_null(value, field):
+ record = {}
+ record_iter = zip(field.fields, value['f'])
+ for subfield, cell in record_iter:
+ converter = _CELLDATA_FROM_JSON[subfield.field_type]
+ if subfield.mode == 'REPEATED':
+ value = [converter(item['v'], subfield) for item in cell['v']]
+ else:
+ value = converter(cell['v'], subfield)
+ record[subfield.name] = value
+ return record
+
+
+_CELLDATA_FROM_JSON = {
+ 'INTEGER': _int_from_json,
+ 'INT64': _int_from_json,
+ 'FLOAT': _float_from_json,
+ 'FLOAT64': _float_from_json,
+ 'BOOLEAN': _bool_from_json,
+ 'BOOL': _bool_from_json,
+ 'STRING': _string_from_json,
+ 'BYTES': _bytes_from_json,
+ 'TIMESTAMP': _timestamp_from_json,
+ 'DATETIME': _datetime_from_json,
+ 'DATE': _date_from_json,
+ 'TIME': _time_from_json,
+ 'RECORD': _record_from_json,
+}
+
+_QUERY_PARAMS_FROM_JSON = dict(_CELLDATA_FROM_JSON)
+_QUERY_PARAMS_FROM_JSON['TIMESTAMP'] = _timestamp_query_param_from_json
+
+
+class Row(object):
+ """A BigQuery row.
+
+ Values can be accessed by position (index), by key like a dict,
+ or as properties.
+
+ :type values: tuple
+ :param values: the row values
+
+ :type field_to_index: dict
+ :param field_to_index: a mapping from schema field names to indexes
+ """
+
+ # Choose unusual field names to try to avoid conflict with schema fields.
+ __slots__ = ('_xxx_values', '_xxx_field_to_index')
+
+ def __init__(self, values, field_to_index):
+ self._xxx_values = values
+ self._xxx_field_to_index = field_to_index
+
+ def values(self):
+ return self._xxx_values
+
+ def __getattr__(self, name):
+ i = self._xxx_field_to_index.get(name)
+ if i is None:
+ raise AttributeError('no row field "%s"' % name)
+ return self._xxx_values[i]
+
+ def __len__(self):
+ return len(self._xxx_values)
+
+ def __getitem__(self, key):
+ if isinstance(key, six.string_types):
+ i = self._xxx_field_to_index.get(key)
+ if i is None:
+ raise KeyError('no row field "%s"' % key)
+ key = i
+ return self._xxx_values[key]
+
+ def __eq__(self, other):
+ if not isinstance(other, Row):
+ return NotImplemented
+ return(
+ self._xxx_values == other._xxx_values and
+ self._xxx_field_to_index == other._xxx_field_to_index)
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __repr__(self):
+ # sort field dict by value, for determinism
+ items = sorted(self._xxx_field_to_index.items(),
+ key=operator.itemgetter(1))
+ f2i = '{' + ', '.join('%r: %d' % i for i in items) + '}'
+ return 'Row({}, {})'.format(self._xxx_values, f2i)
+
+
+def _field_to_index_mapping(schema):
+ """Create a mapping from schema field name to index of field."""
+ return {f.name: i for i, f in enumerate(schema)}
+
+
+def _row_tuple_from_json(row, schema):
+ """Convert JSON row data to row with appropriate types.
+
+ Note: ``row['f']`` and ``schema`` are presumed to be of the same length.
+
+ :type row: dict
+ :param row: A JSON response row to be converted.
+
+ :type schema: tuple
+ :param schema: A tuple of
+ :class:`~google.cloud.bigquery.schema.SchemaField`.
+
+ :rtype: tuple
+ :returns: A tuple of data converted to native types.
+ """
+ row_data = []
+ for field, cell in zip(schema, row['f']):
+ converter = _CELLDATA_FROM_JSON[field.field_type]
+ if field.mode == 'REPEATED':
+ row_data.append([converter(item['v'], field)
+ for item in cell['v']])
+ else:
+ row_data.append(converter(cell['v'], field))
+
+ return tuple(row_data)
+
+
+def _rows_from_json(values, schema):
+ """Convert JSON row data to rows with appropriate types."""
+ field_to_index = _field_to_index_mapping(schema)
+ return [Row(_row_tuple_from_json(r, schema), field_to_index)
+ for r in values]
+
+
+def _int_to_json(value):
+ """Coerce 'value' to an JSON-compatible representation."""
+ if isinstance(value, int):
+ value = str(value)
+ return value
+
+
+def _float_to_json(value):
+ """Coerce 'value' to an JSON-compatible representation."""
+ return value
+
+
+def _bool_to_json(value):
+ """Coerce 'value' to an JSON-compatible representation."""
+ if isinstance(value, bool):
+ value = 'true' if value else 'false'
+ return value
+
+
+def _bytes_to_json(value):
+ """Coerce 'value' to an JSON-compatible representation."""
+ if isinstance(value, bytes):
+ value = base64.standard_b64encode(value).decode('ascii')
+ return value
+
+
+def _timestamp_to_json_parameter(value):
+ """Coerce 'value' to an JSON-compatible representation.
+
+ This version returns the string representation used in query parameters.
+ """
+ if isinstance(value, datetime.datetime):
+ if value.tzinfo not in (None, UTC):
+ # Convert to UTC and remove the time zone info.
+ value = value.replace(tzinfo=None) - value.utcoffset()
+ value = '%s %s+00:00' % (
+ value.date().isoformat(), value.time().isoformat())
+ return value
+
+
+def _timestamp_to_json_row(value):
+ """Coerce 'value' to an JSON-compatible representation.
+
+ This version returns floating-point seconds value used in row data.
+ """
+ if isinstance(value, datetime.datetime):
+ value = _microseconds_from_datetime(value) * 1e-6
+ return value
+
+
+def _datetime_to_json(value):
+ """Coerce 'value' to an JSON-compatible representation."""
+ if isinstance(value, datetime.datetime):
+ value = value.strftime(_RFC3339_MICROS_NO_ZULU)
+ return value
+
+
+def _date_to_json(value):
+ """Coerce 'value' to an JSON-compatible representation."""
+ if isinstance(value, datetime.date):
+ value = value.isoformat()
+ return value
+
+
+def _time_to_json(value):
+ """Coerce 'value' to an JSON-compatible representation."""
+ if isinstance(value, datetime.time):
+ value = value.isoformat()
+ return value
+
+
+# Converters used for scalar values marshalled as row data.
+_SCALAR_VALUE_TO_JSON_ROW = {
+ 'INTEGER': _int_to_json,
+ 'INT64': _int_to_json,
+ 'FLOAT': _float_to_json,
+ 'FLOAT64': _float_to_json,
+ 'BOOLEAN': _bool_to_json,
+ 'BOOL': _bool_to_json,
+ 'BYTES': _bytes_to_json,
+ 'TIMESTAMP': _timestamp_to_json_row,
+ 'DATETIME': _datetime_to_json,
+ 'DATE': _date_to_json,
+ 'TIME': _time_to_json,
+}
+
+
+# Converters used for scalar values marshalled as query parameters.
+_SCALAR_VALUE_TO_JSON_PARAM = _SCALAR_VALUE_TO_JSON_ROW.copy()
+_SCALAR_VALUE_TO_JSON_PARAM['TIMESTAMP'] = _timestamp_to_json_parameter
+
+
+def _snake_to_camel_case(value):
+ """Convert snake case string to camel case."""
+ words = value.split('_')
+ return words[0] + ''.join(map(str.capitalize, words[1:]))
+
+
+class _ApiResourceProperty(object):
+ """Base property implementation.
+
+ Values will be stored on a `_properties` helper attribute of the
+ property's job instance.
+
+ :type name: str
+ :param name: name of the property
+
+ :type resource_name: str
+ :param resource_name: name of the property in the resource dictionary
+ """
+
+ def __init__(self, name, resource_name):
+ self.name = name
+ self.resource_name = resource_name
+
+ def __get__(self, instance, owner):
+ """Descriptor protocol: accessor"""
+ if instance is None:
+ return self
+ return instance._properties.get(self.resource_name)
+
+ def _validate(self, value):
+ """Subclasses override to impose validation policy."""
+ pass
+
+ def __set__(self, instance, value):
+ """Descriptor protocol: mutator"""
+ self._validate(value)
+ instance._properties[self.resource_name] = value
+
+ def __delete__(self, instance):
+ """Descriptor protocol: deleter"""
+ del instance._properties[self.resource_name]
+
+
+class _TypedApiResourceProperty(_ApiResourceProperty):
+ """Property implementation: validates based on value type.
+
+ :type name: str
+ :param name: name of the property
+
+ :type resource_name: str
+ :param resource_name: name of the property in the resource dictionary
+
+ :type property_type: type or sequence of types
+ :param property_type: type to be validated
+ """
+ def __init__(self, name, resource_name, property_type):
+ super(_TypedApiResourceProperty, self).__init__(
+ name, resource_name)
+ self.property_type = property_type
+
+ def _validate(self, value):
+ """Ensure that 'value' is of the appropriate type.
+
+ :raises: ValueError on a type mismatch.
+ """
+ if value is None:
+ return
+ if not isinstance(value, self.property_type):
+ raise ValueError('Required type: %s' % (self.property_type,))
+
+
+class _ListApiResourceProperty(_ApiResourceProperty):
+ """Property implementation: validates based on value type.
+
+ :type name: str
+ :param name: name of the property
+
+ :type resource_name: str
+ :param resource_name: name of the property in the resource dictionary
+
+ :type property_type: type or sequence of types
+ :param property_type: type to be validated
+ """
+ def __init__(self, name, resource_name, property_type):
+ super(_ListApiResourceProperty, self).__init__(
+ name, resource_name)
+ self.property_type = property_type
+
+ def __get__(self, instance, owner):
+ """Descriptor protocol: accessor"""
+ if instance is None:
+ return self
+ return instance._properties.get(self.resource_name, [])
+
+ def _validate(self, value):
+ """Ensure that 'value' is of the appropriate type.
+
+ :raises: ValueError on a type mismatch.
+ """
+ if value is None:
+ raise ValueError((
+ 'Required type: list of {}. '
+ 'To unset, use del or set to empty list').format(
+ self.property_type,))
+ if not all(isinstance(item, self.property_type) for item in value):
+ raise ValueError(
+ 'Required type: list of %s' % (self.property_type,))
+
+
+class _EnumApiResourceProperty(_ApiResourceProperty):
+ """Pseudo-enumeration class.
+
+ :type name: str
+ :param name: name of the property.
+
+ :type resource_name: str
+ :param resource_name: name of the property in the resource dictionary
+ """
+
+
+def _item_to_row(iterator, resource):
+ """Convert a JSON row to the native object.
+
+ .. note::
+
+ This assumes that the ``schema`` attribute has been
+ added to the iterator after being created, which
+ should be done by the caller.
+
+ :type iterator: :class:`~google.api_core.page_iterator.Iterator`
+ :param iterator: The iterator that is currently in use.
+
+ :type resource: dict
+ :param resource: An item to be converted to a row.
+
+ :rtype: :class:`Row`
+ :returns: The next row in the page.
+ """
+ return Row(_row_tuple_from_json(resource, iterator.schema),
+ iterator._field_to_index)
+
+
+# pylint: disable=unused-argument
+def _rows_page_start(iterator, page, response):
+ """Grab total rows when :class:`~google.cloud.iterator.Page` starts.
+
+ :type iterator: :class:`~google.api_core.page_iterator.Iterator`
+ :param iterator: The iterator that is currently in use.
+
+ :type page: :class:`~google.api_core.page_iterator.Page`
+ :param page: The page that was just created.
+
+ :type response: dict
+ :param response: The JSON API response for a page of rows in a table.
+ """
+ total_rows = response.get('totalRows')
+ if total_rows is not None:
+ total_rows = int(total_rows)
+ iterator.total_rows = total_rows
+# pylint: enable=unused-argument
+
+
+def _should_retry(exc):
+ """Predicate for determining when to retry.
+
+ We retry if and only if the 'reason' is 'backendError'
+ or 'rateLimitExceeded'.
+ """
+ if not hasattr(exc, 'errors'):
+ return False
+ if len(exc.errors) == 0:
+ return False
+ reason = exc.errors[0]['reason']
+ return reason == 'backendError' or reason == 'rateLimitExceeded'
+
+
+DEFAULT_RETRY = retry.Retry(predicate=_should_retry)
+"""The default retry object.
+
+Any method with a ``retry`` parameter will be retried automatically,
+with reasonable defaults. To disable retry, pass ``retry=None``.
+To modify the default retry behavior, call a ``with_XXX`` method
+on ``DEFAULT_RETRY``. For example, to change the deadline to 30 seconds,
+pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``.
+"""
+
+
+def _int_or_none(value):
+ """Helper: deserialize int value from JSON string."""
+ if isinstance(value, int):
+ return value
+ if value is not None:
+ return int(value)
diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py
new file mode 100644
index 0000000..712b218
--- /dev/null
+++ b/bigquery/google/cloud/bigquery/client.py
@@ -0,0 +1,1357 @@
+# Copyright 2015 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Client for interacting with the Google BigQuery API."""
+
+from __future__ import absolute_import
+
+import collections
+import functools
+import os
+import uuid
+
+import six
+
+from google import resumable_media
+from google.resumable_media.requests import MultipartUpload
+from google.resumable_media.requests import ResumableUpload
+
+from google.api_core import page_iterator
+
+from google.cloud import exceptions
+from google.cloud.client import ClientWithProject
+from google.cloud.bigquery._http import Connection
+from google.cloud.bigquery.dataset import Dataset
+from google.cloud.bigquery.dataset import DatasetReference
+from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA
+from google.cloud.bigquery.table import TableReference
+from google.cloud.bigquery.table import _row_from_mapping
+from google.cloud.bigquery.job import CopyJob
+from google.cloud.bigquery.job import ExtractJob
+from google.cloud.bigquery.job import LoadJob
+from google.cloud.bigquery.job import QueryJob, QueryJobConfig
+from google.cloud.bigquery.query import QueryResults
+from google.cloud.bigquery._helpers import _item_to_row
+from google.cloud.bigquery._helpers import _rows_page_start
+from google.cloud.bigquery._helpers import _field_to_index_mapping
+from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW
+from google.cloud.bigquery._helpers import DEFAULT_RETRY
+from google.cloud.bigquery._helpers import _snake_to_camel_case
+
+
+_DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB
+_MAX_MULTIPART_SIZE = 5 * 1024 * 1024
+_DEFAULT_NUM_RETRIES = 6
+_BASE_UPLOAD_TEMPLATE = (
+ u'https://www.googleapis.com/upload/bigquery/v2/projects/'
+ u'{project}/jobs?uploadType=')
+_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u'multipart'
+_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u'resumable'
+_GENERIC_CONTENT_TYPE = u'*/*'
+_READ_LESS_THAN_SIZE = (
+ 'Size {:d} was specified but the file-like object only had '
+ '{:d} bytes remaining.')
+
+
+class Project(object):
+ """Wrapper for resource describing a BigQuery project.
+
+ :type project_id: str
+ :param project_id: Opaque ID of the project
+
+ :type numeric_id: int
+ :param numeric_id: Numeric ID of the project
+
+ :type friendly_name: str
+ :param friendly_name: Display name of the project
+ """
+ def __init__(self, project_id, numeric_id, friendly_name):
+ self.project_id = project_id
+ self.numeric_id = numeric_id
+ self.friendly_name = friendly_name
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct an instance from a resource dict."""
+ return cls(
+ resource['id'], resource['numericId'], resource['friendlyName'])
+
+
+class Client(ClientWithProject):
+ """Client to bundle configuration needed for API requests.
+
+ :type project: str
+ :param project: the project which the client acts on behalf of. Will be
+ passed when creating a dataset / job. If not passed,
+ falls back to the default inferred from the environment.
+
+ :type credentials: :class:`~google.auth.credentials.Credentials`
+ :param credentials: (Optional) The OAuth2 Credentials to use for this
+ client. If not passed (and if no ``_http`` object is
+ passed), falls back to the default inferred from the
+ environment.
+
+ :type _http: :class:`~requests.Session`
+ :param _http: (Optional) HTTP object to make requests. Can be any object
+ that defines ``request()`` with the same interface as
+ :meth:`requests.Session.request`. If not passed, an
+ ``_http`` object is created that is bound to the
+ ``credentials`` for the current object.
+ This parameter should be considered private, and could
+ change in the future.
+ """
+
+ SCOPE = ('https://www.googleapis.com/auth/bigquery',
+ 'https://www.googleapis.com/auth/cloud-platform')
+ """The scopes required for authenticating as a BigQuery consumer."""
+
+ def __init__(self, project=None, credentials=None, _http=None):
+ super(Client, self).__init__(
+ project=project, credentials=credentials, _http=_http)
+ self._connection = Connection(self)
+
+ def list_projects(self, max_results=None, page_token=None,
+ retry=DEFAULT_RETRY):
+ """List projects for the project associated with this client.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/projects/list
+
+ :type max_results: int
+ :param max_results: maximum number of projects to return, If not
+ passed, defaults to a value set by the API.
+
+ :type page_token: str
+ :param page_token: opaque marker for the next "page" of projects. If
+ not passed, the API will return the first page of
+ projects.
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+
+ :rtype: :class:`~google.api_core.page_iterator.Iterator`
+ :returns: Iterator of :class:`~google.cloud.bigquery.client.Project`
+ accessible to the current client.
+ """
+ return page_iterator.HTTPIterator(
+ client=self,
+ api_request=functools.partial(self._call_api, retry),
+ path='/projects',
+ item_to_value=_item_to_project,
+ items_key='projects',
+ page_token=page_token,
+ max_results=max_results)
+
+ def list_datasets(self, include_all=False, filter=None, max_results=None,
+ page_token=None, retry=DEFAULT_RETRY):
+ """List datasets for the project associated with this client.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list
+
+ :type include_all: bool
+ :param include_all: True if results include hidden datasets.
+
+ :type filter: str
+ :param filter: an expression for filtering the results by label.
+ For syntax, see
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list#filter.
+
+ :type max_results: int
+ :param max_results: maximum number of datasets to return, If not
+ passed, defaults to a value set by the API.
+
+ :type page_token: str
+ :param page_token: opaque marker for the next "page" of datasets. If
+ not passed, the API will return the first page of
+ datasets.
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+
+ :rtype: :class:`~google.api_core.page_iterator.Iterator`
+ :returns: Iterator of :class:`~google.cloud.bigquery.dataset.Dataset`.
+ accessible to the current client.
+ """
+ extra_params = {}
+ if include_all:
+ extra_params['all'] = True
+ if filter:
+ # TODO: consider supporting a dict of label -> value for filter,
+ # and converting it into a string here.
+ extra_params['filter'] = filter
+ path = '/projects/%s/datasets' % (self.project,)
+ return page_iterator.HTTPIterator(
+ client=self,
+ api_request=functools.partial(self._call_api, retry),
+ path=path,
+ item_to_value=_item_to_dataset,
+ items_key='datasets',
+ page_token=page_token,
+ max_results=max_results,
+ extra_params=extra_params)
+
+ def dataset(self, dataset_id, project=None):
+ """Construct a reference to a dataset.
+
+ :type dataset_id: str
+ :param dataset_id: ID of the dataset.
+
+ :type project: str
+ :param project: (Optional) project ID for the dataset (defaults to
+ the project of the client).
+
+ :rtype: :class:`google.cloud.bigquery.dataset.DatasetReference`
+ :returns: a new ``DatasetReference`` instance
+ """
+ if project is None:
+ project = self.project
+
+ return DatasetReference(project, dataset_id)
+
+ def create_dataset(self, dataset):
+ """API call: create the dataset via a PUT request.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert
+
+ :type dataset: :class:`~google.cloud.bigquery.dataset.Dataset`
+ :param dataset: A ``Dataset`` populated with the desired initial state.
+ If project is missing, it defaults to the project of
+ the client.
+
+ :rtype: ":class:`~google.cloud.bigquery.dataset.Dataset`"
+ :returns: a new ``Dataset`` returned from the service.
+ """
+ path = '/projects/%s/datasets' % (dataset.project,)
+ api_response = self._connection.api_request(
+ method='POST', path=path, data=dataset._build_resource())
+ return Dataset.from_api_repr(api_response)
+
+ def create_table(self, table):
+ """API call: create a table via a PUT request
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert
+
+ :type table: :class:`~google.cloud.bigquery.table.Table`
+ :param table: A ``Table`` populated with the desired initial state.
+
+ :rtype: ":class:`~google.cloud.bigquery.table.Table`"
+ :returns: a new ``Table`` returned from the service.
+ """
+ path = '/projects/%s/datasets/%s/tables' % (
+ table.project, table.dataset_id)
+ resource = table._build_resource(Table.all_fields)
+ doomed = [field for field in resource if resource[field] is None]
+ for field in doomed:
+ del resource[field]
+ api_response = self._connection.api_request(
+ method='POST', path=path, data=resource)
+ return Table.from_api_repr(api_response)
+
+ def _call_api(self, retry, **kwargs):
+ call = functools.partial(self._connection.api_request, **kwargs)
+ if retry:
+ call = retry(call)
+ return call()
+
+ def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY):
+ """Fetch the dataset referenced by ``dataset_ref``
+
+ :type dataset_ref:
+ :class:`google.cloud.bigquery.dataset.DatasetReference`
+ :param dataset_ref: the dataset to use.
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+
+ :rtype: :class:`google.cloud.bigquery.dataset.Dataset`
+ :returns: a ``Dataset`` instance
+ """
+ api_response = self._call_api(retry,
+ method='GET',
+ path=dataset_ref.path)
+ return Dataset.from_api_repr(api_response)
+
+ def get_table(self, table_ref, retry=DEFAULT_RETRY):
+ """Fetch the table referenced by ``table_ref``
+
+ :type table_ref:
+ :class:`google.cloud.bigquery.table.TableReference`
+ :param table_ref: the table to use.
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+
+ :rtype: :class:`google.cloud.bigquery.table.Table`
+ :returns: a ``Table`` instance
+ """
+ api_response = self._call_api(retry, method='GET', path=table_ref.path)
+ return Table.from_api_repr(api_response)
+
+ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY):
+ """Change some fields of a dataset.
+
+ Use ``fields`` to specify which fields to update. At least one field
+ must be provided. If a field is listed in ``fields`` and is ``None`` in
+ ``dataset``, it will be deleted.
+
+ If ``dataset.etag`` is not ``None``, the update will only
+ succeed if the dataset on the server has the same ETag. Thus
+ reading a dataset with ``get_dataset``, changing its fields,
+ and then passing it ``update_dataset`` will ensure that the changes
+ will only be saved if no modifications to the dataset occurred
+ since the read.
+
+ :type dataset: :class:`google.cloud.bigquery.dataset.Dataset`
+ :param dataset: the dataset to update.
+
+ :type fields: sequence of string
+ :param fields: the fields of ``dataset`` to change, spelled as the
+ Dataset properties (e.g. "friendly_name").
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+
+ :rtype: :class:`google.cloud.bigquery.dataset.Dataset`
+ :returns: the modified ``Dataset`` instance
+ """
+ path = '/projects/%s/datasets/%s' % (dataset.project,
+ dataset.dataset_id)
+ partial = {}
+ for f in fields:
+ if not hasattr(dataset, f):
+ raise ValueError('No Dataset field %s' % f)
+ # All dataset attributes are trivially convertible to JSON except
+ # for access entries.
+ if f == 'access_entries':
+ attr = dataset._build_access_resource()
+ api_field = 'access'
+ else:
+ attr = getattr(dataset, f)
+ api_field = _snake_to_camel_case(f)
+ partial[api_field] = attr
+ if dataset.etag is not None:
+ headers = {'If-Match': dataset.etag}
+ else:
+ headers = None
+ api_response = self._call_api(
+ retry, method='PATCH', path=path, data=partial, headers=headers)
+ return Dataset.from_api_repr(api_response)
+
+ def update_table(self, table, properties, retry=DEFAULT_RETRY):
+ """API call: update table properties via a PUT request
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/update
+
+ :type table:
+ :class:`google.cloud.bigquery.table.Table`
+ :param table_ref: the table to update.
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+
+ :rtype: :class:`google.cloud.bigquery.table.Table`
+ :returns: a ``Table`` instance
+ """
+ partial = table._build_resource(properties)
+ if table.etag is not None:
+ headers = {'If-Match': table.etag}
+ else:
+ headers = None
+ api_response = self._call_api(
+ retry,
+ method='PATCH', path=table.path, data=partial, headers=headers)
+ return Table.from_api_repr(api_response)
+
+ def list_dataset_tables(self, dataset, max_results=None, page_token=None,
+ retry=DEFAULT_RETRY):
+ """List tables in the dataset.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list
+
+ :type dataset: One of:
+ :class:`~google.cloud.bigquery.dataset.Dataset`
+ :class:`~google.cloud.bigquery.dataset.DatasetReference`
+ :param dataset: the dataset whose tables to list, or a reference to it.
+
+ :type max_results: int
+ :param max_results: (Optional) Maximum number of tables to return.
+ If not passed, defaults to a value set by the API.
+
+ :type page_token: str
+ :param page_token: (Optional) Opaque marker for the next "page" of
+ datasets. If not passed, the API will return the
+ first page of datasets.
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+
+ :rtype: :class:`~google.api_core.page_iterator.Iterator`
+ :returns: Iterator of :class:`~google.cloud.bigquery.table.Table`
+ contained within the current dataset.
+ """
+ if not isinstance(dataset, (Dataset, DatasetReference)):
+ raise TypeError('dataset must be a Dataset or a DatasetReference')
+ path = '%s/tables' % dataset.path
+ result = page_iterator.HTTPIterator(
+ client=self,
+ api_request=functools.partial(self._call_api, retry),
+ path=path,
+ item_to_value=_item_to_table,
+ items_key='tables',
+ page_token=page_token,
+ max_results=max_results)
+ result.dataset = dataset
+ return result
+
+ def delete_dataset(self, dataset, retry=DEFAULT_RETRY):
+ """Delete a dataset.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete
+
+ :type dataset: One of:
+ :class:`~google.cloud.bigquery.dataset.Dataset`
+ :class:`~google.cloud.bigquery.dataset.DatasetReference`
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+
+ :param dataset: the dataset to delete, or a reference to it.
+ """
+ if not isinstance(dataset, (Dataset, DatasetReference)):
+ raise TypeError('dataset must be a Dataset or a DatasetReference')
+ self._call_api(retry, method='DELETE', path=dataset.path)
+
+ def delete_table(self, table, retry=DEFAULT_RETRY):
+ """Delete a table
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/delete
+
+ :type table: One of:
+ :class:`~google.cloud.bigquery.table.Table`
+ :class:`~google.cloud.bigquery.table.TableReference`
+ :param table: the table to delete, or a reference to it.
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+ """
+ if not isinstance(table, (Table, TableReference)):
+ raise TypeError('table must be a Table or a TableReference')
+ self._call_api(retry, method='DELETE', path=table.path)
+
+ def _get_query_results(self, job_id, retry, project=None, timeout_ms=None):
+ """Get the query results object for a query job.
+
+ :type job_id: str
+ :param job_id: Name of the query job.
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+
+ :type project: str
+ :param project:
+ (Optional) project ID for the query job (defaults to the project of
+ the client).
+
+ :type timeout_ms: int
+ :param timeout_ms:
+ (Optional) number of milliseconds the the API call should wait for
+ the query to complete before the request times out.
+
+ :rtype: :class:`google.cloud.bigquery.query.QueryResults`
+ :returns: a new ``QueryResults`` instance
+ """
+
+ extra_params = {'maxResults': 0}
+
+ if project is None:
+ project = self.project
+
+ if timeout_ms is not None:
+ extra_params['timeoutMs'] = timeout_ms
+
+ path = '/projects/{}/queries/{}'.format(project, job_id)
+
+ # This call is typically made in a polling loop that checks whether the
+ # job is complete (from QueryJob.done(), called ultimately from
+ # QueryJob.result()). So we don't need to poll here.
+ resource = self._call_api(
+ retry, method='GET', path=path, query_params=extra_params)
+ return QueryResults.from_api_repr(resource)
+
+ def job_from_resource(self, resource):
+ """Detect correct job type from resource and instantiate.
+
+ :type resource: dict
+ :param resource: one job resource from API response
+
+ :rtype: One of:
+ :class:`google.cloud.bigquery.job.LoadJob`,
+ :class:`google.cloud.bigquery.job.CopyJob`,
+ :class:`google.cloud.bigquery.job.ExtractJob`,
+ :class:`google.cloud.bigquery.job.QueryJob`,
+ :class:`google.cloud.bigquery.job.RunSyncQueryJob`
+ :returns: the job instance, constructed via the resource
+ """
+ config = resource['configuration']
+ if 'load' in config:
+ return LoadJob.from_api_repr(resource, self)
+ elif 'copy' in config:
+ return CopyJob.from_api_repr(resource, self)
+ elif 'extract' in config:
+ return ExtractJob.from_api_repr(resource, self)
+ elif 'query' in config:
+ return QueryJob.from_api_repr(resource, self)
+ raise ValueError('Cannot parse job resource')
+
+ def get_job(self, job_id, project=None, retry=DEFAULT_RETRY):
+ """Fetch a job for the project associated with this client.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get
+
+ :type job_id: str
+ :param job_id: Name of the job.
+
+ :type project: str
+ :param project:
+ project ID owning the job (defaults to the client's project)
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+
+ :rtype: :class:`~google.cloud.bigquery.job._AsyncJob`
+ :returns:
+ Concrete job instance, based on the resource returned by the API.
+ """
+ extra_params = {'projection': 'full'}
+
+ if project is None:
+ project = self.project
+
+ path = '/projects/{}/jobs/{}'.format(project, job_id)
+
+ resource = self._call_api(
+ retry, method='GET', path=path, query_params=extra_params)
+
+ return self.job_from_resource(resource)
+
+ def list_jobs(self, max_results=None, page_token=None, all_users=None,
+ state_filter=None, retry=DEFAULT_RETRY):
+ """List jobs for the project associated with this client.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/list
+
+ :type max_results: int
+ :param max_results: maximum number of jobs to return, If not
+ passed, defaults to a value set by the API.
+
+ :type page_token: str
+ :param page_token: opaque marker for the next "page" of jobs. If
+ not passed, the API will return the first page of
+ jobs.
+
+ :type all_users: bool
+ :param all_users: if true, include jobs owned by all users in the
+ project.
+
+ :type state_filter: str
+ :param state_filter: if passed, include only jobs matching the given
+ state. One of
+
+ * ``"done"``
+ * ``"pending"``
+ * ``"running"``
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+
+ :rtype: :class:`~google.api_core.page_iterator.Iterator`
+ :returns: Iterable of job instances.
+ """
+ extra_params = {'projection': 'full'}
+
+ if all_users is not None:
+ extra_params['allUsers'] = all_users
+
+ if state_filter is not None:
+ extra_params['stateFilter'] = state_filter
+
+ path = '/projects/%s/jobs' % (self.project,)
+ return page_iterator.HTTPIterator(
+ client=self,
+ api_request=functools.partial(self._call_api, retry),
+ path=path,
+ item_to_value=_item_to_job,
+ items_key='jobs',
+ page_token=page_token,
+ max_results=max_results,
+ extra_params=extra_params)
+
+ def load_table_from_uri(self, source_uris, destination,
+ job_id=None, job_id_prefix=None,
+ job_config=None, retry=DEFAULT_RETRY):
+ """Starts a job for loading data into a table from CloudStorage.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load
+
+ :type source_uris: One of:
+ str
+ sequence of string
+ :param source_uris: URIs of data files to be loaded; in format
+ ``gs://<bucket_name>/<object_name_or_glob>``.
+
+ :type destination: :class:`google.cloud.bigquery.table.TableReference`
+ :param destination: Table into which data is to be loaded.
+
+ :type job_id: str
+ :param job_id: (Optional) Name of the job.
+
+ :type job_id_prefix: str or ``NoneType``
+ :param job_id_prefix: (Optional) the user-provided prefix for a
+ randomly generated job ID. This parameter will be
+ ignored if a ``job_id`` is also given.
+
+ :type job_config: :class:`google.cloud.bigquery.job.LoadJobConfig`
+ :param job_config: (Optional) Extra configuration options for the job.
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+
+ :rtype: :class:`google.cloud.bigquery.job.LoadJob`
+ :returns: a new ``LoadJob`` instance
+ """
+ job_id = _make_job_id(job_id, job_id_prefix)
+ if isinstance(source_uris, six.string_types):
+ source_uris = [source_uris]
+ job = LoadJob(job_id, source_uris, destination, self, job_config)
+ job.begin(retry=retry)
+ return job
+
+ def load_table_from_file(self, file_obj, destination,
+ rewind=False,
+ size=None,
+ num_retries=_DEFAULT_NUM_RETRIES,
+ job_id=None, job_id_prefix=None, job_config=None):
+ """Upload the contents of this table from a file-like object.
+
+ Like load_table_from_uri, this creates, starts and returns
+ a ``LoadJob``.
+
+ :type file_obj: file
+ :param file_obj: A file handle opened in binary mode for reading.
+
+ :type destination: :class:`google.cloud.bigquery.table.TableReference`
+ :param destination: Table into which data is to be loaded.
+
+ :type rewind: bool
+ :param rewind: If True, seek to the beginning of the file handle before
+ reading the file.
+
+ :type size: int
+ :param size: The number of bytes to read from the file handle.
+ If size is ``None`` or large, resumable upload will be
+ used. Otherwise, multipart upload will be used.
+
+ :type num_retries: int
+ :param num_retries: Number of upload retries. Defaults to 6.
+
+ :type job_id: str
+ :param job_id: (Optional) Name of the job.
+
+ :type job_id_prefix: str or ``NoneType``
+ :param job_id_prefix: (Optional) the user-provided prefix for a
+ randomly generated job ID. This parameter will be
+ ignored if a ``job_id`` is also given.
+
+ :type job_config: :class:`google.cloud.bigquery.job.LoadJobConfig`
+ :param job_config: (Optional) Extra configuration options for the job.
+
+ :rtype: :class:`~google.cloud.bigquery.jobs.LoadJob`
+
+ :returns: the job instance used to load the data (e.g., for
+ querying status). Note that the job is already started:
+ do not call ``job.begin()``.
+ :raises: :class:`ValueError` if ``size`` is not passed in and can not
+ be determined, or if the ``file_obj`` can be detected to be
+ a file opened in text mode.
+ """
+ job_id = _make_job_id(job_id, job_id_prefix)
+ job = LoadJob(job_id, None, destination, self, job_config)
+ job_resource = job._build_resource()
+ if rewind:
+ file_obj.seek(0, os.SEEK_SET)
+ _check_mode(file_obj)
+ try:
+ if size is None or size >= _MAX_MULTIPART_SIZE:
+ response = self._do_resumable_upload(
+ file_obj, job_resource, num_retries)
+ else:
+ response = self._do_multipart_upload(
+ file_obj, job_resource, size, num_retries)
+ except resumable_media.InvalidResponse as exc:
+ raise exceptions.from_http_response(exc.response)
+ return self.job_from_resource(response.json())
+
+ def _do_resumable_upload(self, stream, metadata, num_retries):
+ """Perform a resumable upload.
+
+ :type stream: IO[bytes]
+ :param stream: A bytes IO object open for reading.
+
+ :type metadata: dict
+ :param metadata: The metadata associated with the upload.
+
+ :type num_retries: int
+ :param num_retries: Number of upload retries. (Deprecated: This
+ argument will be removed in a future release.)
+
+ :rtype: :class:`~requests.Response`
+ :returns: The "200 OK" response object returned after the final chunk
+ is uploaded.
+ """
+ upload, transport = self._initiate_resumable_upload(
+ stream, metadata, num_retries)
+
+ while not upload.finished:
+ response = upload.transmit_next_chunk(transport)
+
+ return response
+
+ def _initiate_resumable_upload(self, stream, metadata, num_retries):
+ """Initiate a resumable upload.
+
+ :type stream: IO[bytes]
+ :param stream: A bytes IO object open for reading.
+
+ :type metadata: dict
+ :param metadata: The metadata associated with the upload.
+
+ :type num_retries: int
+ :param num_retries: Number of upload retries. (Deprecated: This
+ argument will be removed in a future release.)
+
+ :rtype: tuple
+ :returns:
+ Pair of
+
+ * The :class:`~google.resumable_media.requests.ResumableUpload`
+ that was created
+ * The ``transport`` used to initiate the upload.
+ """
+ chunk_size = _DEFAULT_CHUNKSIZE
+ transport = self._http
+ headers = _get_upload_headers(self._connection.USER_AGENT)
+ upload_url = _RESUMABLE_URL_TEMPLATE.format(project=self.project)
+ # TODO: modify ResumableUpload to take a retry.Retry object
+ # that it can use for the initial RPC.
+ upload = ResumableUpload(upload_url, chunk_size, headers=headers)
+
+ if num_retries is not None:
+ upload._retry_strategy = resumable_media.RetryStrategy(
+ max_retries=num_retries)
+
+ upload.initiate(
+ transport, stream, metadata, _GENERIC_CONTENT_TYPE,
+ stream_final=False)
+
+ return upload, transport
+
+ def _do_multipart_upload(self, stream, metadata, size, num_retries):
+ """Perform a multipart upload.
+
+ :type stream: IO[bytes]
+ :param stream: A bytes IO object open for reading.
+
+ :type metadata: dict
+ :param metadata: The metadata associated with the upload.
+
+ :type size: int
+ :param size: The number of bytes to be uploaded (which will be read
+ from ``stream``). If not provided, the upload will be
+ concluded once ``stream`` is exhausted (or :data:`None`).
+
+ :type num_retries: int
+ :param num_retries: Number of upload retries. (Deprecated: This
+ argument will be removed in a future release.)
+
+ :rtype: :class:`~requests.Response`
+ :returns: The "200 OK" response object returned after the multipart
+ upload request.
+ :raises: :exc:`ValueError` if the ``stream`` has fewer than ``size``
+ bytes remaining.
+ """
+ data = stream.read(size)
+ if len(data) < size:
+ msg = _READ_LESS_THAN_SIZE.format(size, len(data))
+ raise ValueError(msg)
+
+ headers = _get_upload_headers(self._connection.USER_AGENT)
+
+ upload_url = _MULTIPART_URL_TEMPLATE.format(project=self.project)
+ upload = MultipartUpload(upload_url, headers=headers)
+
+ if num_retries is not None:
+ upload._retry_strategy = resumable_media.RetryStrategy(
+ max_retries=num_retries)
+
+ response = upload.transmit(
+ self._http, data, metadata, _GENERIC_CONTENT_TYPE)
+
+ return response
+
+ def copy_table(self, sources, destination, job_id=None, job_id_prefix=None,
+ job_config=None, retry=DEFAULT_RETRY):
+ """Start a job for copying one or more tables into another table.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy
+
+ :type sources: One of:
+ :class:`~google.cloud.bigquery.table.TableReference`
+ sequence of
+ :class:`~google.cloud.bigquery.table.TableReference`
+ :param sources: Table or tables to be copied.
+
+
+ :type destination: :class:`google.cloud.bigquery.table.TableReference`
+ :param destination: Table into which data is to be copied.
+
+ :type job_id: str
+ :param job_id: (Optional) The ID of the job.
+
+ :type job_id_prefix: str or ``NoneType``
+ :param job_id_prefix: (Optional) the user-provided prefix for a
+ randomly generated job ID. This parameter will be
+ ignored if a ``job_id`` is also given.
+
+ :type job_config: :class:`google.cloud.bigquery.job.CopyJobConfig`
+ :param job_config: (Optional) Extra configuration options for the job.
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+
+ :rtype: :class:`google.cloud.bigquery.job.CopyJob`
+ :returns: a new ``CopyJob`` instance
+ """
+ job_id = _make_job_id(job_id, job_id_prefix)
+
+ if not isinstance(sources, collections.Sequence):
+ sources = [sources]
+ job = CopyJob(job_id, sources, destination, client=self,
+ job_config=job_config)
+ job.begin(retry=retry)
+ return job
+
+ def extract_table(
+ self, source, destination_uris, job_config=None, job_id=None,
+ job_id_prefix=None, retry=DEFAULT_RETRY):
+ """Start a job to extract a table into Cloud Storage files.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract
+
+ :type source: :class:`google.cloud.bigquery.table.TableReference`
+ :param source: table to be extracted.
+
+ :type destination_uris: One of:
+ str or
+ sequence of str
+ :param destination_uris:
+ URIs of Cloud Storage file(s) into which table data is to be
+ extracted; in format ``gs://<bucket_name>/<object_name_or_glob>``.
+
+ :type kwargs: dict
+ :param kwargs: Additional keyword arguments.
+
+ :type job_id: str
+ :param job_id: (Optional) The ID of the job.
+
+ :type job_id_prefix: str or ``NoneType``
+ :param job_id_prefix: (Optional) the user-provided prefix for a
+ randomly generated job ID. This parameter will be
+ ignored if a ``job_id`` is also given.
+
+ :type job_config: :class:`google.cloud.bigquery.job.ExtractJobConfig`
+ :param job_config: (Optional) Extra configuration options for the job.
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+
+ :rtype: :class:`google.cloud.bigquery.job.ExtractJob`
+ :returns: a new ``ExtractJob`` instance
+ """
+ job_id = _make_job_id(job_id, job_id_prefix)
+
+ if isinstance(destination_uris, six.string_types):
+ destination_uris = [destination_uris]
+
+ job = ExtractJob(
+ job_id, source, destination_uris, client=self,
+ job_config=job_config)
+ job.begin(retry=retry)
+ return job
+
+ def query(self, query, job_config=None, job_id=None, job_id_prefix=None,
+ retry=DEFAULT_RETRY):
+ """Start a job that runs a SQL query.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query
+
+ :type query: str
+ :param query:
+ SQL query to be executed. Defaults to the standard SQL dialect.
+ Use the ``job_config`` parameter to change dialects.
+
+ :type job_config: :class:`google.cloud.bigquery.job.QueryJobConfig`
+ :param job_config: (Optional) Extra configuration options for the job.
+
+ :type job_id: str
+ :param job_id: (Optional) ID to use for the query job.
+
+ :type job_id_prefix: str or ``NoneType``
+ :param job_id_prefix: (Optional) the user-provided prefix for a
+ randomly generated job ID. This parameter will be
+ ignored if a ``job_id`` is also given.
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+
+ :rtype: :class:`google.cloud.bigquery.job.QueryJob`
+ :returns: a new ``QueryJob`` instance
+ """
+ job_id = _make_job_id(job_id, job_id_prefix)
+ job = QueryJob(job_id, query, client=self, job_config=job_config)
+ job.begin(retry=retry)
+ return job
+
+ def create_rows(self, table, rows, selected_fields=None, **kwargs):
+ """API call: insert table data via a POST request
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll
+
+ :type table: One of:
+ :class:`~google.cloud.bigquery.table.Table`
+ :class:`~google.cloud.bigquery.table.TableReference`
+ :param table: the destination table for the row data, or a reference
+ to it.
+
+ :type rows: One of:
+ list of tuples
+ list of dictionaries
+ :param rows: Row data to be inserted. If a list of tuples is given,
+ each tuple should contain data for each schema field on
+ the current table and in the same order as the schema
+ fields. If a list of dictionaries is given, the keys must
+ include all required fields in the schema. Keys which do
+ not correspond to a field in the schema are ignored.
+
+ :type selected_fields: list of :class:`SchemaField`
+ :param selected_fields:
+ The fields to return. Required if ``table`` is a
+ :class:`~google.cloud.bigquery.table.TableReference`.
+
+ :type kwargs: dict
+ :param kwargs: Keyword arguments to
+ `~google.cloud.bigquery.client.Client.create_rows_json`
+
+ :rtype: list of mappings
+ :returns: One mapping per row with insert errors: the "index" key
+ identifies the row, and the "errors" key contains a list
+ of the mappings describing one or more problems with the
+ row.
+ :raises: ValueError if table's schema is not set
+ """
+ if selected_fields is not None:
+ schema = selected_fields
+ elif isinstance(table, TableReference):
+ raise ValueError('need selected_fields with TableReference')
+ elif isinstance(table, Table):
+ if len(table._schema) == 0:
+ raise ValueError(_TABLE_HAS_NO_SCHEMA)
+ schema = table.schema
+ else:
+ raise TypeError('table should be Table or TableReference')
+
+ json_rows = []
+
+ for index, row in enumerate(rows):
+ if isinstance(row, dict):
+ row = _row_from_mapping(row, schema)
+ json_row = {}
+
+ for field, value in zip(schema, row):
+ converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type)
+ if converter is not None: # STRING doesn't need converting
+ value = converter(value)
+ json_row[field.name] = value
+
+ json_rows.append(json_row)
+
+ return self.create_rows_json(table, json_rows, **kwargs)
+
+ def create_rows_json(self, table, json_rows, row_ids=None,
+ skip_invalid_rows=None, ignore_unknown_values=None,
+ template_suffix=None, retry=DEFAULT_RETRY):
+ """API call: insert table data via a POST request
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll
+
+ :type table: One of:
+ :class:`~google.cloud.bigquery.table.Table`
+ :class:`~google.cloud.bigquery.table.TableReference`
+ :param table: the destination table for the row data, or a reference
+ to it.
+
+ :type json_rows: list of dictionaries
+ :param json_rows: Row data to be inserted. Keys must match the table
+ schema fields and values must be JSON-compatible
+ representations.
+
+ :type row_ids: list of string
+ :param row_ids: (Optional) Unique ids, one per row being inserted.
+ If omitted, unique IDs are created.
+
+ :type skip_invalid_rows: bool
+ :param skip_invalid_rows: (Optional) Insert all valid rows of a
+ request, even if invalid rows exist.
+ The default value is False, which causes
+ the entire request to fail if any invalid
+ rows exist.
+
+ :type ignore_unknown_values: bool
+ :param ignore_unknown_values: (Optional) Accept rows that contain
+ values that do not match the schema.
+ The unknown values are ignored. Default
+ is False, which treats unknown values as
+ errors.
+
+ :type template_suffix: str
+ :param template_suffix:
+ (Optional) treat ``name`` as a template table and provide a suffix.
+ BigQuery will create the table ``<name> + <template_suffix>`` based
+ on the schema of the template table. See
+ https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+
+ :rtype: list of mappings
+ :returns: One mapping per row with insert errors: the "index" key
+ identifies the row, and the "errors" key contains a list
+ of the mappings describing one or more problems with the
+ row.
+ """
+ rows_info = []
+ data = {'rows': rows_info}
+
+ for index, row in enumerate(json_rows):
+ info = {'json': row}
+ if row_ids is not None:
+ info['insertId'] = row_ids[index]
+ else:
+ info['insertId'] = str(uuid.uuid4())
+ rows_info.append(info)
+
+ if skip_invalid_rows is not None:
+ data['skipInvalidRows'] = skip_invalid_rows
+
+ if ignore_unknown_values is not None:
+ data['ignoreUnknownValues'] = ignore_unknown_values
+
+ if template_suffix is not None:
+ data['templateSuffix'] = template_suffix
+
+ # We can always retry, because every row has an insert ID.
+ response = self._call_api(
+ retry,
+ method='POST',
+ path='%s/insertAll' % table.path,
+ data=data)
+ errors = []
+
+ for error in response.get('insertErrors', ()):
+ errors.append({'index': int(error['index']),
+ 'errors': error['errors']})
+
+ return errors
+
+ def query_rows(self, query, job_config=None, job_id=None, timeout=None,
+ retry=DEFAULT_RETRY):
+ """Start a query job and wait for the results.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query
+
+ :type query: str
+ :param query:
+ SQL query to be executed. Defaults to the standard SQL dialect.
+ Use the ``job_config`` parameter to change dialects.
+
+ :type job_config: :class:`google.cloud.bigquery.job.QueryJobConfig`
+ :param job_config: (Optional) Extra configuration options for the job.
+
+ :type job_id: str
+ :param job_id: (Optional) ID to use for the query job.
+
+ :type timeout: float
+ :param timeout:
+ (Optional) How long (in seconds) to wait for job to complete
+ before raising a :class:`TimeoutError`.
+
+ :rtype: :class:`~google.api_core.page_iterator.Iterator`
+ :returns:
+ Iterator of row data :class:`tuple`s. During each page, the
+ iterator will have the ``total_rows`` attribute set, which counts
+ the total number of rows **in the result set** (this is distinct
+ from the total number of rows in the current page:
+ ``iterator.page.num_items``).
+
+ :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the job
+ failed or :class:`TimeoutError` if the job did not complete in the
+ given timeout.
+ """
+ job = self.query(
+ query, job_config=job_config, job_id=job_id, retry=retry)
+ return job.result(timeout=timeout)
+
+ def list_rows(self, table, selected_fields=None, max_results=None,
+ page_token=None, start_index=None, retry=DEFAULT_RETRY):
+ """List the rows of the table.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list
+
+ .. note::
+
+ This method assumes that the provided schema is up-to-date with the
+ schema as defined on the back-end: if the two schemas are not
+ identical, the values returned may be incomplete. To ensure that the
+ local copy of the schema is up-to-date, call ``client.get_table``.
+
+ :type table: One of:
+ :class:`~google.cloud.bigquery.table.Table`
+ :class:`~google.cloud.bigquery.table.TableReference`
+ :param table: the table to list, or a reference to it.
+
+ :type selected_fields: list of :class:`SchemaField`
+ :param selected_fields:
+ The fields to return. Required if ``table`` is a
+ :class:`~google.cloud.bigquery.table.TableReference`.
+
+ :type max_results: int
+ :param max_results: maximum number of rows to return.
+
+ :type page_token: str
+ :param page_token: (Optional) Token representing a cursor into the
+ table's rows.
+
+ :type start_index: int
+ :param page_token: (Optional) The zero-based index of the starting
+ row to read.
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+
+ :rtype: :class:`~google.api_core.page_iterator.Iterator`
+ :returns: Iterator of row data :class:`tuple`s. During each page, the
+ iterator will have the ``total_rows`` attribute set,
+ which counts the total number of rows **in the table**
+ (this is distinct from the total number of rows in the
+ current page: ``iterator.page.num_items``).
+
+ """
+ if selected_fields is not None:
+ schema = selected_fields
+ elif isinstance(table, TableReference):
+ raise ValueError('need selected_fields with TableReference')
+ elif isinstance(table, Table):
+ if len(table._schema) == 0:
+ raise ValueError(_TABLE_HAS_NO_SCHEMA)
+ schema = table.schema
+ else:
+ raise TypeError('table should be Table or TableReference')
+
+ params = {}
+ if selected_fields is not None:
+ params['selectedFields'] = ','.join(
+ field.name for field in selected_fields)
+
+ if start_index is not None:
+ params['startIndex'] = start_index
+
+ iterator = page_iterator.HTTPIterator(
+ client=self,
+ api_request=functools.partial(self._call_api, retry),
+ path='%s/data' % (table.path,),
+ item_to_value=_item_to_row,
+ items_key='rows',
+ page_token=page_token,
+ next_token='pageToken',
+ max_results=max_results,
+ page_start=_rows_page_start,
+ extra_params=params)
+ iterator.schema = schema
+ iterator._field_to_index = _field_to_index_mapping(schema)
+ return iterator
+
+ def list_partitions(self, table, retry=DEFAULT_RETRY):
+ """List the partitions in a table.
+
+ :type table: One of:
+ :class:`~google.cloud.bigquery.table.Table`
+ :class:`~google.cloud.bigquery.table.TableReference`
+ :param table: the table to list, or a reference to it.
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+
+ :rtype: list
+ :returns: a list of time partitions
+ """
+ config = QueryJobConfig()
+ config.use_legacy_sql = True # required for '$' syntax
+ rows = self.query_rows(
+ 'SELECT partition_id from [%s:%s.%s$__PARTITIONS_SUMMARY__]' %
+ (table.project, table.dataset_id, table.table_id),
+ job_config=config,
+ retry=retry)
+ return [row[0] for row in rows]
+
+
+# pylint: disable=unused-argument
+def _item_to_project(iterator, resource):
+ """Convert a JSON project to the native object.
+
+ :type iterator: :class:`~google.api_core.page_iterator.Iterator`
+ :param iterator: The iterator that is currently in use.
+
+ :type resource: dict
+ :param resource: An item to be converted to a project.
+
+ :rtype: :class:`.Project`
+ :returns: The next project in the page.
+ """
+ return Project.from_api_repr(resource)
+# pylint: enable=unused-argument
+
+
+def _item_to_dataset(iterator, resource):
+ """Convert a JSON dataset to the native object.
+
+ :type iterator: :class:`~google.api_core.page_iterator.Iterator`
+ :param iterator: The iterator that is currently in use.
+
+ :type resource: dict
+ :param resource: An item to be converted to a dataset.
+
+ :rtype: :class:`.Dataset`
+ :returns: The next dataset in the page.
+ """
+ return Dataset.from_api_repr(resource)
+
+
+def _item_to_job(iterator, resource):
+ """Convert a JSON job to the native object.
+
+ :type iterator: :class:`~google.api_core.page_iterator.Iterator`
+ :param iterator: The iterator that is currently in use.
+
+ :type resource: dict
+ :param resource: An item to be converted to a job.
+
+ :rtype: job instance.
+ :returns: The next job in the page.
+ """
+ return iterator.client.job_from_resource(resource)
+
+
+def _item_to_table(iterator, resource):
+ """Convert a JSON table to the native object.
+
+ :type iterator: :class:`~google.api_core.page_iterator.Iterator`
+ :param iterator: The iterator that is currently in use.
+
+ :type resource: dict
+ :param resource: An item to be converted to a table.
+
+ :rtype: :class:`~google.cloud.bigquery.table.Table`
+ :returns: The next table in the page.
+ """
+ return Table.from_api_repr(resource)
+
+
+def _make_job_id(job_id, prefix=None):
+ """Construct an ID for a new job.
+
+ :type job_id: str or ``NoneType``
+ :param job_id: the user-provided job ID
+
+ :type prefix: str or ``NoneType``
+ :param prefix: (Optional) the user-provided prefix for a job ID
+
+ :rtype: str
+ :returns: A job ID
+ """
+ if job_id is not None:
+ return job_id
+ elif prefix is not None:
+ return str(prefix) + str(uuid.uuid4())
+ else:
+ return str(uuid.uuid4())
+
+
+def _check_mode(stream):
+ """Check that a stream was opened in read-binary mode.
+
+ :type stream: IO[bytes]
+ :param stream: A bytes IO object open for reading.
+
+ :raises: :exc:`ValueError` if the ``stream.mode`` is a valid attribute
+ and is not among ``rb``, ``r+b`` or ``rb+``.
+ """
+ mode = getattr(stream, 'mode', None)
+
+ if mode is not None and mode not in ('rb', 'r+b', 'rb+'):
+ raise ValueError(
+ "Cannot upload files opened in text mode: use "
+ "open(filename, mode='rb') or open(filename, mode='r+b')")
+
+
+def _get_upload_headers(user_agent):
+ """Get the headers for an upload request.
+
+ :type user_agent: str
+ :param user_agent: The user-agent for requests.
+
+ :rtype: dict
+ :returns: The headers to be used for the request.
+ """
+ return {
+ 'Accept': 'application/json',
+ 'Accept-Encoding': 'gzip, deflate',
+ 'User-Agent': user_agent,
+ 'content-type': 'application/json',
+ }
diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py
new file mode 100644
index 0000000..e464fcf
--- /dev/null
+++ b/bigquery/google/cloud/bigquery/dataset.py
@@ -0,0 +1,536 @@
+# Copyright 2015 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Define API Datasets."""
+
+from __future__ import absolute_import
+
+import six
+
+from google.cloud._helpers import _datetime_from_microseconds
+from google.cloud.bigquery.table import TableReference
+
+
+class AccessEntry(object):
+ """Represent grant of an access role to an entity.
+
+ Every entry in the access list will have exactly one of
+ ``userByEmail``, ``groupByEmail``, ``domain``, ``specialGroup`` or
+ ``view`` set. And if anything but ``view`` is set, it'll also have a
+ ``role`` specified. ``role`` is omitted for a ``view``, since
+ ``view`` s are always read-only.
+
+ See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets.
+
+ :type role: str
+ :param role: Role granted to the entity. One of
+
+ * ``'OWNER'``
+ * ``'WRITER'``
+ * ``'READER'``
+
+ May also be ``None`` if the ``entity_type`` is ``view``.
+
+ :type entity_type: str
+ :param entity_type: Type of entity being granted the role. One of
+ :attr:`ENTITY_TYPES`.
+
+ :type entity_id: str
+ :param entity_id: ID of entity being granted the role.
+
+ :raises: :class:`ValueError` if the ``entity_type`` is not among
+ :attr:`ENTITY_TYPES`, or if a ``view`` has ``role`` set or
+ a non ``view`` **does not** have a ``role`` set.
+ """
+
+ ENTITY_TYPES = frozenset(['userByEmail', 'groupByEmail', 'domain',
+ 'specialGroup', 'view'])
+ """Allowed entity types."""
+
+ def __init__(self, role, entity_type, entity_id):
+ if entity_type not in self.ENTITY_TYPES:
+ message = 'Entity type %r not among: %s' % (
+ entity_type, ', '.join(self.ENTITY_TYPES))
+ raise ValueError(message)
+ if entity_type == 'view':
+ if role is not None:
+ raise ValueError('Role must be None for a view. Received '
+ 'role: %r' % (role,))
+ else:
+ if role is None:
+ raise ValueError('Role must be set for entity '
+ 'type %r' % (entity_type,))
+
+ self.role = role
+ self.entity_type = entity_type
+ self.entity_id = entity_id
+
+ def __eq__(self, other):
+ if not isinstance(other, AccessEntry):
+ return NotImplemented
+ return (
+ self.role == other.role and
+ self.entity_type == other.entity_type and
+ self.entity_id == other.entity_id)
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __repr__(self):
+ return '<AccessEntry: role=%s, %s=%s>' % (
+ self.role, self.entity_type, self.entity_id)
+
+
+class DatasetReference(object):
+ """DatasetReferences are pointers to datasets.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets
+
+ :type project: str
+ :param project: the ID of the project
+
+ :type dataset_id: str
+ :param dataset_id: the ID of the dataset
+ """
+
+ def __init__(self, project, dataset_id):
+ if not isinstance(project, six.string_types):
+ raise ValueError("Pass a string for project")
+ if not isinstance(dataset_id, six.string_types):
+ raise ValueError("Pass a string for dataset_id")
+ self._project = project
+ self._dataset_id = dataset_id
+
+ @property
+ def project(self):
+ """Project ID of the dataset.
+
+ :rtype: str
+ :returns: the project ID.
+ """
+ return self._project
+
+ @property
+ def dataset_id(self):
+ """Dataset ID.
+
+ :rtype: str
+ :returns: the dataset ID.
+ """
+ return self._dataset_id
+
+ @property
+ def path(self):
+ """URL path for the dataset's APIs.
+
+ :rtype: str
+ :returns: the path based on project and dataset name.
+ """
+ return '/projects/%s/datasets/%s' % (self.project, self.dataset_id)
+
+ def table(self, table_id):
+ """Constructs a TableReference.
+
+ :type table_id: str
+ :param table_id: the ID of the table.
+
+ :rtype: :class:`google.cloud.bigquery.table.TableReference`
+ :returns: a TableReference for a table in this dataset.
+ """
+ return TableReference(self, table_id)
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ project = resource['projectId']
+ dataset_id = resource['datasetId']
+ return cls(project, dataset_id)
+
+ def to_api_repr(self):
+ return {
+ 'projectId': self._project,
+ 'datasetId': self._dataset_id,
+ }
+
+ def _key(self):
+ """A tuple key that uniquely describes this field.
+
+ Used to compute this instance's hashcode and evaluate equality.
+
+ Returns:
+ tuple: The contents of this :class:`DatasetReference`.
+ """
+ return (
+ self._project,
+ self._dataset_id,
+ )
+
+ def __eq__(self, other):
+ if not isinstance(other, DatasetReference):
+ return NotImplemented
+ return self._key() == other._key()
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __hash__(self):
+ return hash(self._key())
+
+ def __repr__(self):
+ return 'DatasetReference{}'.format(self._key())
+
+
+class Dataset(object):
+ """Datasets are containers for tables.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets
+
+ :type dataset_ref: :class:`~google.cloud.bigquery.dataset.DatasetReference`
+ :param dataset_ref: a pointer to a dataset
+ """
+
+ def __init__(self, dataset_ref):
+ self._project = dataset_ref.project
+ self._dataset_id = dataset_ref.dataset_id
+ self._properties = {'labels': {}}
+ self._access_entries = ()
+
+ @property
+ def project(self):
+ """Project bound to the dataset.
+
+ :rtype: str
+ :returns: the project.
+ """
+ return self._project
+
+ @property
+ def path(self):
+ """URL path for the dataset's APIs.
+
+ :rtype: str
+ :returns: the path based on project and dataset ID.
+ """
+ return '/projects/%s/datasets/%s' % (self.project, self.dataset_id)
+
+ @property
+ def access_entries(self):
+ """Dataset's access entries.
+
+ :rtype: list of :class:`AccessEntry`
+ :returns: roles granted to entities for this dataset
+ """
+ return list(self._access_entries)
+
+ @access_entries.setter
+ def access_entries(self, value):
+ """Update dataset's access entries
+
+ :type value: list of :class:`AccessEntry`
+ :param value: roles granted to entities for this dataset
+
+ :raises: TypeError if 'value' is not a sequence, or ValueError if
+ any item in the sequence is not an AccessEntry
+ """
+ if not all(isinstance(field, AccessEntry) for field in value):
+ raise ValueError('Values must be AccessEntry instances')
+ self._access_entries = tuple(value)
+
+ @property
+ def created(self):
+ """Datetime at which the dataset was created.
+
+ :rtype: ``datetime.datetime``, or ``NoneType``
+ :returns: the creation time (None until set from the server).
+ """
+ creation_time = self._properties.get('creationTime')
+ if creation_time is not None:
+ # creation_time will be in milliseconds.
+ return _datetime_from_microseconds(1000.0 * creation_time)
+
+ @property
+ def dataset_id(self):
+ """Dataset ID.
+
+ :rtype: str
+ :returns: the dataset ID.
+ """
+ return self._dataset_id
+
+ @property
+ def full_dataset_id(self):
+ """ID for the dataset resource, in the form "project_id:dataset_id".
+
+ :rtype: str, or ``NoneType``
+ :returns: the ID (None until set from the server).
+ """
+ return self._properties.get('id')
+
+ @property
+ def etag(self):
+ """ETag for the dataset resource.
+
+ :rtype: str, or ``NoneType``
+ :returns: the ETag (None until set from the server).
+ """
+ return self._properties.get('etag')
+
+ @property
+ def modified(self):
+ """Datetime at which the dataset was last modified.
+
+ :rtype: ``datetime.datetime``, or ``NoneType``
+ :returns: the modification time (None until set from the server).
+ """
+ modified_time = self._properties.get('lastModifiedTime')
+ if modified_time is not None:
+ # modified_time will be in milliseconds.
+ return _datetime_from_microseconds(1000.0 * modified_time)
+
+ @property
+ def self_link(self):
+ """URL for the dataset resource.
+
+ :rtype: str, or ``NoneType``
+ :returns: the URL (None until set from the server).
+ """
+ return self._properties.get('selfLink')
+
+ @property
+ def default_table_expiration_ms(self):
+ """Default expiration time for tables in the dataset.
+
+ :rtype: int, or ``NoneType``
+ :returns: The time in milliseconds, or None (the default).
+ """
+ return self._properties.get('defaultTableExpirationMs')
+
+ @default_table_expiration_ms.setter
+ def default_table_expiration_ms(self, value):
+ """Update default expiration time for tables in the dataset.
+
+ :type value: int
+ :param value: (Optional) new default time, in milliseconds
+
+ :raises: ValueError for invalid value types.
+ """
+ if not isinstance(value, six.integer_types) and value is not None:
+ raise ValueError("Pass an integer, or None")
+ self._properties['defaultTableExpirationMs'] = value
+
+ @property
+ def description(self):
+ """Description of the dataset.
+
+ :rtype: str, or ``NoneType``
+ :returns: The description as set by the user, or None (the default).
+ """
+ return self._properties.get('description')
+
+ @description.setter
+ def description(self, value):
+ """Update description of the dataset.
+
+ :type value: str
+ :param value: (Optional) new description
+
+ :raises: ValueError for invalid value types.
+ """
+ if not isinstance(value, six.string_types) and value is not None:
+ raise ValueError("Pass a string, or None")
+ self._properties['description'] = value
+
+ @property
+ def friendly_name(self):
+ """Title of the dataset.
+
+ :rtype: str, or ``NoneType``
+ :returns: The name as set by the user, or None (the default).
+ """
+ return self._properties.get('friendlyName')
+
+ @friendly_name.setter
+ def friendly_name(self, value):
+ """Update title of the dataset.
+
+ :type value: str
+ :param value: (Optional) new title
+
+ :raises: ValueError for invalid value types.
+ """
+ if not isinstance(value, six.string_types) and value is not None:
+ raise ValueError("Pass a string, or None")
+ self._properties['friendlyName'] = value
+
+ @property
+ def location(self):
+ """Location in which the dataset is hosted.
+
+ :rtype: str, or ``NoneType``
+ :returns: The location as set by the user, or None (the default).
+ """
+ return self._properties.get('location')
+
+ @location.setter
+ def location(self, value):
+ """Update location in which the dataset is hosted.
+
+ :type value: str
+ :param value: (Optional) new location
+
+ :raises: ValueError for invalid value types.
+ """
+ if not isinstance(value, six.string_types) and value is not None:
+ raise ValueError("Pass a string, or None")
+ self._properties['location'] = value
+
+ @property
+ def labels(self):
+ """Labels for the dataset.
+
+ This method always returns a dict. To change a dataset's labels,
+ modify the dict, then call ``Client.update_dataset``. To delete a
+ label, set its value to ``None`` before updating.
+
+ :rtype: dict, {str -> str}
+ :returns: A dict of the the dataset's labels.
+ """
+ return self._properties['labels']
+
+ @labels.setter
+ def labels(self, value):
+ """Update labels for the dataset.
+
+ :type value: dict, {str -> str}
+ :param value: new labels
+
+ :raises: ValueError for invalid value types.
+ """
+ if not isinstance(value, dict):
+ raise ValueError("Pass a dict")
+ self._properties['labels'] = value
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct a dataset given its API representation
+
+ :type resource: dict
+ :param resource: dataset resource representation returned from the API
+
+ :rtype: :class:`google.cloud.bigquery.dataset.Dataset`
+ :returns: Dataset parsed from ``resource``.
+ """
+ dsr = resource.get('datasetReference')
+ if dsr is None or 'datasetId' not in dsr:
+ raise KeyError('Resource lacks required identity information:'
+ '["datasetReference"]["datasetId"]')
+ dataset_id = dsr['datasetId']
+ dataset = cls(DatasetReference(dsr['projectId'], dataset_id))
+ dataset._set_properties(resource)
+ return dataset
+
+ @staticmethod
+ def _parse_access_entries(access):
+ """Parse a resource fragment into a set of access entries.
+
+ ``role`` augments the entity type and present **unless** the entity
+ type is ``view``.
+
+ :type access: list of mappings
+ :param access: each mapping represents a single access entry.
+
+ :rtype: list of :class:`AccessEntry`
+ :returns: a list of parsed entries.
+ :raises: :class:`ValueError` if a entry in ``access`` has more keys
+ than ``role`` and one additional key.
+ """
+ result = []
+ for entry in access:
+ entry = entry.copy()
+ role = entry.pop('role', None)
+ entity_type, entity_id = entry.popitem()
+ if len(entry) != 0:
+ raise ValueError('Entry has unexpected keys remaining.', entry)
+ result.append(
+ AccessEntry(role, entity_type, entity_id))
+ return result
+
+ def _set_properties(self, api_response):
+ """Update properties from resource in body of ``api_response``
+
+ :type api_response: dict
+ :param api_response: response returned from an API call.
+ """
+ self._properties.clear()
+ cleaned = api_response.copy()
+ access = cleaned.pop('access', ())
+ self.access_entries = self._parse_access_entries(access)
+ if 'creationTime' in cleaned:
+ cleaned['creationTime'] = float(cleaned['creationTime'])
+ if 'lastModifiedTime' in cleaned:
+ cleaned['lastModifiedTime'] = float(cleaned['lastModifiedTime'])
+ if 'defaultTableExpirationMs' in cleaned:
+ cleaned['defaultTableExpirationMs'] = int(
+ cleaned['defaultTableExpirationMs'])
+ if 'labels' not in cleaned:
+ cleaned['labels'] = {}
+ self._properties.update(cleaned)
+
+ def _build_access_resource(self):
+ """Generate a resource fragment for dataset's access entries."""
+ result = []
+ for entry in self.access_entries:
+ info = {entry.entity_type: entry.entity_id}
+ if entry.role is not None:
+ info['role'] = entry.role
+ result.append(info)
+ return result
+
+ def _build_resource(self):
+ """Generate a resource for ``create`` or ``update``."""
+ resource = {
+ 'datasetReference': {
+ 'projectId': self.project, 'datasetId': self.dataset_id},
+ }
+ if self.default_table_expiration_ms is not None:
+ value = self.default_table_expiration_ms
+ resource['defaultTableExpirationMs'] = value
+
+ if self.description is not None:
+ resource['description'] = self.description
+
+ if self.friendly_name is not None:
+ resource['friendlyName'] = self.friendly_name
+
+ if self.location is not None:
+ resource['location'] = self.location
+
+ if len(self.access_entries) > 0:
+ resource['access'] = self._build_access_resource()
+
+ resource['labels'] = self.labels # labels is never None
+
+ return resource
+
+ def table(self, table_id):
+ """Constructs a TableReference.
+
+ :type table_id: str
+ :param table_id: the ID of the table.
+
+ :rtype: :class:`google.cloud.bigquery.table.TableReference`
+ :returns: a TableReference for a table in this dataset.
+ """
+ return TableReference(self, table_id)
diff --git a/bigquery/google/cloud/bigquery/dbapi/_helpers.py b/bigquery/google/cloud/bigquery/dbapi/_helpers.py
new file mode 100644
index 0000000..a2cee9c
--- /dev/null
+++ b/bigquery/google/cloud/bigquery/dbapi/_helpers.py
@@ -0,0 +1,108 @@
+# Copyright 2017 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import collections
+import datetime
+import numbers
+
+import six
+
+from google.cloud import bigquery
+from google.cloud.bigquery.dbapi import exceptions
+
+
+def scalar_to_query_parameter(value, name=None):
+ """Convert a scalar value into a query parameter.
+
+ :type value: any
+ :param value: A scalar value to convert into a query parameter.
+
+ :type name: str
+ :param name: (Optional) Name of the query parameter.
+
+ :rtype: :class:`~google.cloud.bigquery.ScalarQueryParameter`
+ :returns:
+ A query parameter corresponding with the type and value of the plain
+ Python object.
+ :raises: :class:`~google.cloud.bigquery.dbapi.exceptions.ProgrammingError`
+ if the type cannot be determined.
+ """
+ parameter_type = None
+
+ if isinstance(value, bool):
+ parameter_type = 'BOOL'
+ elif isinstance(value, numbers.Integral):
+ parameter_type = 'INT64'
+ elif isinstance(value, numbers.Real):
+ parameter_type = 'FLOAT64'
+ elif isinstance(value, six.text_type):
+ parameter_type = 'STRING'
+ elif isinstance(value, six.binary_type):
+ parameter_type = 'BYTES'
+ elif isinstance(value, datetime.datetime):
+ parameter_type = 'DATETIME' if value.tzinfo is None else 'TIMESTAMP'
+ elif isinstance(value, datetime.date):
+ parameter_type = 'DATE'
+ elif isinstance(value, datetime.time):
+ parameter_type = 'TIME'
+ else:
+ raise exceptions.ProgrammingError(
+ 'encountered parameter {} with value {} of unexpected type'.format(
+ name, value))
+ return bigquery.ScalarQueryParameter(name, parameter_type, value)
+
+
+def to_query_parameters_list(parameters):
+ """Converts a sequence of parameter values into query parameters.
+
+ :type parameters: Sequence[Any]
+ :param parameters: Sequence of query parameter values.
+
+ :rtype: List[google.cloud.bigquery.query._AbstractQueryParameter]
+ :returns: A list of query parameters.
+ """
+ return [scalar_to_query_parameter(value) for value in parameters]
+
+
+def to_query_parameters_dict(parameters):
+ """Converts a dictionary of parameter values into query parameters.
+
+ :type parameters: Mapping[str, Any]
+ :param parameters: Dictionary of query parameter values.
+
+ :rtype: List[google.cloud.bigquery.query._AbstractQueryParameter]
+ :returns: A list of named query parameters.
+ """
+ return [
+ scalar_to_query_parameter(value, name=name)
+ for name, value
+ in six.iteritems(parameters)]
+
+
+def to_query_parameters(parameters):
+ """Converts DB-API parameter values into query parameters.
+
+ :type parameters: Mapping[str, Any] or Sequence[Any]
+ :param parameters: A dictionary or sequence of query parameter values.
+
+ :rtype: List[google.cloud.bigquery.query._AbstractQueryParameter]
+ :returns: A list of query parameters.
+ """
+ if parameters is None:
+ return []
+
+ if isinstance(parameters, collections.Mapping):
+ return to_query_parameters_dict(parameters)
+
+ return to_query_parameters_list(parameters)
diff --git a/bigquery/google/cloud/bigquery/dbapi/cursor.py b/bigquery/google/cloud/bigquery/dbapi/cursor.py
new file mode 100644
index 0000000..914d2e0
--- /dev/null
+++ b/bigquery/google/cloud/bigquery/dbapi/cursor.py
@@ -0,0 +1,340 @@
+# Copyright 2017 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Cursor for the Google BigQuery DB-API."""
+
+import collections
+
+import six
+
+from google.cloud.bigquery import job
+from google.cloud.bigquery.dbapi import _helpers
+from google.cloud.bigquery.dbapi import exceptions
+import google.cloud.exceptions
+
+# Per PEP 249: A 7-item sequence containing information describing one result
+# column. The first two items (name and type_code) are mandatory, the other
+# five are optional and are set to None if no meaningful values can be
+# provided.
+Column = collections.namedtuple(
+ 'Column',
+ [
+ 'name', 'type_code', 'display_size', 'internal_size', 'precision',
+ 'scale', 'null_ok',
+ ])
+
+
+class Cursor(object):
+ """DB-API Cursor to Google BigQuery.
+
+ :type connection: :class:`~google.cloud.bigquery.dbapi.Connection`
+ :param connection: A DB-API connection to Google BigQuery.
+ """
+ def __init__(self, connection):
+ self.connection = connection
+ self.description = None
+ # Per PEP 249: The attribute is -1 in case no .execute*() has been
+ # performed on the cursor or the rowcount of the last operation
+ # cannot be determined by the interface.
+ self.rowcount = -1
+ # Per PEP 249: The arraysize attribute defaults to 1, meaning to fetch
+ # a single row at a time.
+ self.arraysize = 1
+ self._query_data = None
+ self._query_job = None
+
+ def close(self):
+ """No-op."""
+
+ def _set_description(self, schema):
+ """Set description from schema.
+
+ :type schema: Sequence[google.cloud.bigquery.schema.SchemaField]
+ :param schema: A description of fields in the schema.
+ """
+ if schema is None:
+ self.description = None
+ return
+
+ self.description = tuple([
+ Column(
+ name=field.name,
+ type_code=field.field_type,
+ display_size=None,
+ internal_size=None,
+ precision=None,
+ scale=None,
+ null_ok=field.is_nullable)
+ for field in schema])
+
+ def _set_rowcount(self, query_results):
+ """Set the rowcount from query results.
+
+ Normally, this sets rowcount to the number of rows returned by the
+ query, but if it was a DML statement, it sets rowcount to the number
+ of modified rows.
+
+ :type query_results:
+ :class:`~google.cloud.bigquery.query.QueryResults`
+ :param query_results: results of a query
+ """
+ total_rows = 0
+ num_dml_affected_rows = query_results.num_dml_affected_rows
+
+ if (query_results.total_rows is not None
+ and query_results.total_rows > 0):
+ total_rows = query_results.total_rows
+ if num_dml_affected_rows is not None and num_dml_affected_rows > 0:
+ total_rows = num_dml_affected_rows
+ self.rowcount = total_rows
+
+ def execute(self, operation, parameters=None, job_id=None):
+ """Prepare and execute a database operation.
+
+ .. note::
+ When setting query parameters, values which are "text"
+ (``unicode`` in Python2, ``str`` in Python3) will use
+ the 'STRING' BigQuery type. Values which are "bytes" (``str`` in
+ Python2, ``bytes`` in Python3), will use using the 'BYTES' type.
+
+ A `~datetime.datetime` parameter without timezone information uses
+ the 'DATETIME' BigQuery type (example: Global Pi Day Celebration
+ March 14, 2017 at 1:59pm). A `~datetime.datetime` parameter with
+ timezone information uses the 'TIMESTAMP' BigQuery type (example:
+ a wedding on April 29, 2011 at 11am, British Summer Time).
+
+ For more information about BigQuery data types, see:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
+
+ ``STRUCT``/``RECORD`` and ``REPEATED`` query parameters are not
+ yet supported. See:
+ https://github.com/GoogleCloudPlatform/google-cloud-python/issues/3524
+
+ :type operation: str
+ :param operation: A Google BigQuery query string.
+
+ :type parameters: Mapping[str, Any] or Sequence[Any]
+ :param parameters:
+ (Optional) dictionary or sequence of parameter values.
+
+ :type job_id: str
+ :param job_id: (Optional) The job_id to use. If not set, a job ID
+ is generated at random.
+ """
+ self._query_data = None
+ self._query_job = None
+ client = self.connection._client
+
+ # The DB-API uses the pyformat formatting, since the way BigQuery does
+ # query parameters was not one of the standard options. Convert both
+ # the query and the parameters to the format expected by the client
+ # libraries.
+ formatted_operation = _format_operation(
+ operation, parameters=parameters)
+ query_parameters = _helpers.to_query_parameters(parameters)
+
+ config = job.QueryJobConfig()
+ config.query_parameters = query_parameters
+ config.use_legacy_sql = False
+ self._query_job = client.query(
+ formatted_operation, job_config=config, job_id=job_id)
+
+ # Wait for the query to finish.
+ try:
+ self._query_job.result()
+ except google.cloud.exceptions.GoogleCloudError:
+ raise exceptions.DatabaseError(self._query_job.errors)
+
+ query_results = self._query_job.query_results()
+ self._set_rowcount(query_results)
+ self._set_description(query_results.schema)
+
+ def executemany(self, operation, seq_of_parameters):
+ """Prepare and execute a database operation multiple times.
+
+ :type operation: str
+ :param operation: A Google BigQuery query string.
+
+ :type seq_of_parameters: Sequence[Mapping[str, Any] or Sequence[Any]]
+ :param parameters: Sequence of many sets of parameter values.
+ """
+ for parameters in seq_of_parameters:
+ self.execute(operation, parameters)
+
+ def _try_fetch(self, size=None):
+ """Try to start fetching data, if not yet started.
+
+ Mutates self to indicate that iteration has started.
+ """
+ if self._query_job is None:
+ raise exceptions.InterfaceError(
+ 'No query results: execute() must be called before fetch.')
+
+ is_dml = (
+ self._query_job.statement_type
+ and self._query_job.statement_type.upper() != 'SELECT')
+ if is_dml:
+ self._query_data = iter([])
+ return
+
+ if self._query_data is None:
+ client = self.connection._client
+ # TODO(tswast): pass in page size to list_rows based on arraysize
+ rows_iter = client.list_rows(
+ self._query_job.destination,
+ selected_fields=self._query_job.query_results().schema)
+ self._query_data = iter(rows_iter)
+
+ def fetchone(self):
+ """Fetch a single row from the results of the last ``execute*()`` call.
+
+ :rtype: tuple
+ :returns:
+ A tuple representing a row or ``None`` if no more data is
+ available.
+ :raises: :class:`~google.cloud.bigquery.dbapi.InterfaceError`
+ if called before ``execute()``.
+ """
+ self._try_fetch()
+ try:
+ return six.next(self._query_data)
+ except StopIteration:
+ return None
+
+ def fetchmany(self, size=None):
+ """Fetch multiple results from the last ``execute*()`` call.
+
+ .. note::
+ The size parameter is not used for the request/response size.
+ Set the ``arraysize`` attribute before calling ``execute()`` to
+ set the batch size.
+
+ :type size: int
+ :param size:
+ (Optional) Maximum number of rows to return. Defaults to the
+ ``arraysize`` property value.
+
+ :rtype: List[tuple]
+ :returns: A list of rows.
+ :raises: :class:`~google.cloud.bigquery.dbapi.InterfaceError`
+ if called before ``execute()``.
+ """
+ if size is None:
+ size = self.arraysize
+
+ self._try_fetch(size=size)
+ rows = []
+
+ for row in self._query_data:
+ rows.append(row)
+ if len(rows) >= size:
+ break
+
+ return rows
+
+ def fetchall(self):
+ """Fetch all remaining results from the last ``execute*()`` call.
+
+ :rtype: List[tuple]
+ :returns: A list of all the rows in the results.
+ :raises: :class:`~google.cloud.bigquery.dbapi.InterfaceError`
+ if called before ``execute()``.
+ """
+ self._try_fetch()
+ return list(self._query_data)
+
+ def setinputsizes(self, sizes):
+ """No-op."""
+
+ def setoutputsize(self, size, column=None):
+ """No-op."""
+
+
+def _format_operation_list(operation, parameters):
+ """Formats parameters in operation in the way BigQuery expects.
+
+ The input operation will be a query like ``SELECT %s`` and the output
+ will be a query like ``SELECT ?``.
+
+ :type operation: str
+ :param operation: A Google BigQuery query string.
+
+ :type parameters: Sequence[Any]
+ :param parameters: Sequence of parameter values.
+
+ :rtype: str
+ :returns: A formatted query string.
+ :raises: :class:`~google.cloud.bigquery.dbapi.ProgrammingError`
+ if a parameter used in the operation is not found in the
+ ``parameters`` argument.
+ """
+ formatted_params = ['?' for _ in parameters]
+
+ try:
+ return operation % tuple(formatted_params)
+ except TypeError as exc:
+ raise exceptions.ProgrammingError(exc)
+
+
+def _format_operation_dict(operation, parameters):
+ """Formats parameters in operation in the way BigQuery expects.
+
+ The input operation will be a query like ``SELECT %(namedparam)s`` and
+ the output will be a query like ``SELECT @namedparam``.
+
+ :type operation: str
+ :param operation: A Google BigQuery query string.
+
+ :type parameters: Mapping[str, Any]
+ :param parameters: Dictionary of parameter values.
+
+ :rtype: str
+ :returns: A formatted query string.
+ :raises: :class:`~google.cloud.bigquery.dbapi.ProgrammingError`
+ if a parameter used in the operation is not found in the
+ ``parameters`` argument.
+ """
+ formatted_params = {}
+ for name in parameters:
+ escaped_name = name.replace('`', r'\`')
+ formatted_params[name] = '@`{}`'.format(escaped_name)
+
+ try:
+ return operation % formatted_params
+ except KeyError as exc:
+ raise exceptions.ProgrammingError(exc)
+
+
+def _format_operation(operation, parameters=None):
+ """Formats parameters in operation in way BigQuery expects.
+
+ :type: str
+ :param operation: A Google BigQuery query string.
+
+ :type: Mapping[str, Any] or Sequence[Any]
+ :param parameters: Optional parameter values.
+
+ :rtype: str
+ :returns: A formatted query string.
+ :raises: :class:`~google.cloud.bigquery.dbapi.ProgrammingError`
+ if a parameter used in the operation is not found in the
+ ``parameters`` argument.
+ """
+ if parameters is None:
+ return operation
+
+ if isinstance(parameters, collections.Mapping):
+ return _format_operation_dict(operation, parameters)
+
+ return _format_operation_list(operation, parameters)
diff --git a/bigquery/google/cloud/bigquery/external_config.py b/bigquery/google/cloud/bigquery/external_config.py
new file mode 100644
index 0000000..e356022
--- /dev/null
+++ b/bigquery/google/cloud/bigquery/external_config.py
@@ -0,0 +1,492 @@
+# Copyright 2017 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Define classes that describe external data sources.
+
+ These are used for both Table.externalDataConfiguration and
+ Job.configuration.query.tableDefinitions.
+"""
+
+from __future__ import absolute_import
+
+import base64
+import copy
+
+import six
+
+from google.cloud.bigquery._helpers import _to_bytes
+from google.cloud.bigquery._helpers import _bytes_to_json
+from google.cloud.bigquery._helpers import _TypedApiResourceProperty
+from google.cloud.bigquery._helpers import _ListApiResourceProperty
+from google.cloud.bigquery._helpers import _int_or_none
+from google.cloud.bigquery.schema import SchemaField
+from google.cloud.bigquery.table import _build_schema_resource
+from google.cloud.bigquery.table import _parse_schema_resource
+
+
+class BigtableColumn(object):
+ """Options for a Bigtable column."""
+
+ def __init__(self):
+ self._properties = {}
+
+ encoding = _TypedApiResourceProperty(
+ 'encoding', 'encoding', six.string_types)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.encoding
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.encoding
+ """
+
+ field_name = _TypedApiResourceProperty(
+ 'field_name', 'fieldName', six.string_types)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.field_name
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.field_name
+ """
+
+ only_read_latest = _TypedApiResourceProperty(
+ 'only_read_latest', 'onlyReadLatest', bool)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.only_read_latest
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.only_read_latest
+ """
+
+ qualifier_encoded = _TypedApiResourceProperty(
+ 'qualifier_encoded', 'qualifierEncoded', six.binary_type)
+ """The qualifier encoded in binary. The type is ``str`` (Python 2.x) or
+ ``bytes`` (Python 3.x). The module will handle base64 encoding for you.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.qualifier_encoded
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.qualifier_encoded
+ """
+
+ qualifier_string = _TypedApiResourceProperty(
+ 'qualifier_string', 'qualifierString', six.string_types)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.qualifier_string
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.qualifier_string
+ """
+
+ type_ = _TypedApiResourceProperty('type_', 'type', six.string_types)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns.type
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns.type
+ """
+
+ def to_api_repr(self):
+ """Build an API representation of this object.
+
+ :rtype: dict
+ :returns: A dictionary in the format used by the BigQuery API.
+ """
+ config = copy.deepcopy(self._properties)
+ qe = config.get('qualifierEncoded')
+ if qe is not None:
+ config['qualifierEncoded'] = _bytes_to_json(qe)
+ return config
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct a BigtableColumn given its API representation
+
+ :type resource: dict
+ :param resource:
+ A column in the same representation as is returned from the API.
+
+ :rtype: :class:`google.cloud.bigquery.external_config.BigtableColumn`
+ :returns: Configuration parsed from ``resource``.
+ """
+ config = cls()
+ config._properties = copy.deepcopy(resource)
+ qe = resource.get('qualifierEncoded')
+ if qe:
+ config.qualifier_encoded = base64.standard_b64decode(_to_bytes(qe))
+ return config
+
+
+class BigtableColumnFamily(object):
+ """Options for a Bigtable column family."""
+
+ def __init__(self):
+ self._properties = {}
+
+ encoding = _TypedApiResourceProperty(
+ 'encoding', 'encoding', six.string_types)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.encoding
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.encoding
+ """
+
+ family_id = _TypedApiResourceProperty(
+ 'family_id', 'familyId', six.string_types)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.familyId
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.familyId
+ """
+
+ only_read_latest = _TypedApiResourceProperty(
+ 'only_read_latest', 'onlyReadLatest', bool)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.onlyReadLatest
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.onlyReadLatest
+ """
+
+ type_ = _TypedApiResourceProperty('type_', 'type', six.string_types)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.type
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.type
+ """
+
+ columns = _ListApiResourceProperty(
+ 'columns', 'columns', BigtableColumn)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies.columns
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies.columns
+ """
+
+ def to_api_repr(self):
+ """Build an API representation of this object.
+
+ :rtype: dict
+ :returns: A dictionary in the format used by the BigQuery API.
+ """
+ config = copy.deepcopy(self._properties)
+ config['columns'] = [c.to_api_repr() for c in config['columns']]
+ return config
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct a BigtableColumnFamily given its
+ API representation
+
+ :type resource: dict
+ :param resource:
+ A column family in the same representation as is returned
+ from the API.
+
+ :rtype:
+ :class:`google.cloud.bigquery.external_config.BigtableColumnFamily`
+ :returns: Configuration parsed from ``resource``.
+ """
+ config = cls()
+ config._properties = copy.deepcopy(resource)
+ config.columns = [BigtableColumn.from_api_repr(c)
+ for c in resource['columns']]
+ return config
+
+
+class BigtableOptions(object):
+ """Options that describe how to treat Bigtable tables
+ as BigQuery tables."""
+
+ _SOURCE_FORMAT = 'BIGTABLE'
+ _RESOURCE_NAME = 'bigtableOptions'
+
+ def __init__(self):
+ self._properties = {}
+
+ ignore_unspecified_column_families = _TypedApiResourceProperty(
+ 'ignore_unspecified_column_families',
+ 'ignoreUnspecifiedColumnFamilies', bool)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.ignoreUnspecifiedColumnFamilies
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.ignoreUnspecifiedColumnFamilies
+ """
+
+ read_rowkey_as_string = _TypedApiResourceProperty(
+ 'read_rowkey_as_string', 'readRowkeyAsString', bool)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.readRowkeyAsString
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.readRowkeyAsString
+ """
+
+ column_families = _ListApiResourceProperty(
+ 'column_families', 'columnFamilies', BigtableColumnFamily)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).bigtableOptions.columnFamilies
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.bigtableOptions.columnFamilies
+ """
+
+ def to_api_repr(self):
+ """Build an API representation of this object.
+
+ :rtype: dict
+ :returns: A dictionary in the format used by the BigQuery API.
+ """
+ config = copy.deepcopy(self._properties)
+ config['columnFamilies'] = [cf.to_api_repr()
+ for cf in config['columnFamilies']]
+ return config
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct a BigtableOptions given its API representation
+
+ :type resource: dict
+ :param resource:
+ A BigtableOptions in the same representation as is returned
+ from the API.
+
+ :rtype: :class:`google.cloud.bigquery.external_config.BigtableOptions`
+ :returns: Configuration parsed from ``resource``.
+ """
+ config = cls()
+ config._properties = copy.deepcopy(resource)
+ config.column_families = [BigtableColumnFamily.from_api_repr(cf)
+ for cf in resource['columnFamilies']]
+ return config
+
+
+class CSVOptions(object):
+ """Options that describe how to treat CSV files as BigQuery tables."""
+
+ _SOURCE_FORMAT = 'CSV'
+ _RESOURCE_NAME = 'csvOptions'
+
+ def __init__(self):
+ self._properties = {}
+
+ allow_jagged_rows = _TypedApiResourceProperty(
+ 'allow_jagged_rows', 'allowJaggedRows', bool)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.allowJaggedRows
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.allowJaggedRows
+ """
+
+ allow_quoted_newlines = _TypedApiResourceProperty(
+ 'allow_quoted_newlines', 'allowQuotedNewlines', bool)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.allowQuotedNewlines
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.allowQuotedNewlines
+ """
+
+ encoding = _TypedApiResourceProperty(
+ 'encoding', 'encoding', six.string_types)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.encoding
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.encoding
+ """
+
+ field_delimiter = _TypedApiResourceProperty(
+ 'field_delimiter', 'fieldDelimiter', six.string_types)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.fieldDelimiter
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.fieldDelimiter
+ """
+
+ quote_character = _TypedApiResourceProperty(
+ 'quote_character', 'quote', six.string_types)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.quote
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.quote
+ """
+
+ skip_leading_rows = _TypedApiResourceProperty(
+ 'skip_leading_rows', 'skipLeadingRows', six.integer_types)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.skipLeadingRows
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.skipLeadingRows
+ """
+
+ def to_api_repr(self):
+ """Build an API representation of this object.
+
+ :rtype: dict
+ :returns: A dictionary in the format used by the BigQuery API.
+ """
+ config = copy.deepcopy(self._properties)
+ slr = config.pop('skipLeadingRows', None)
+ if slr is not None:
+ config['skipLeadingRows'] = str(slr)
+ return config
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct a CSVOptions given its API representation
+
+ :type resource: dict
+ :param resource:
+ A CSVOptions in the same representation as is
+ returned from the API.
+
+ :rtype: :class:`google.cloud.bigquery.external_config.CSVOptions`
+ :returns: Configuration parsed from ``resource``.
+ """
+ slr = resource.get('skipLeadingRows')
+ config = cls()
+ config._properties = copy.deepcopy(resource)
+ config.skip_leading_rows = _int_or_none(slr)
+ return config
+
+
+class GoogleSheetsOptions(object):
+ """Options that describe how to treat Google Sheets as BigQuery tables."""
+
+ _SOURCE_FORMAT = 'GOOGLE_SHEETS'
+ _RESOURCE_NAME = 'googleSheetsOptions'
+
+ def __init__(self):
+ self._properties = {}
+
+ skip_leading_rows = _TypedApiResourceProperty(
+ 'skip_leading_rows', 'skipLeadingRows', six.integer_types)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).googleSheetsOptions.skipLeadingRows
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.googleSheetsOptions.skipLeadingRows
+ """
+
+ def to_api_repr(self):
+ """Build an API representation of this object.
+
+ :rtype: dict
+ :returns: A dictionary in the format used by the BigQuery API.
+ """
+ config = copy.deepcopy(self._properties)
+ slr = config.pop('skipLeadingRows', None)
+ if slr is not None:
+ config['skipLeadingRows'] = str(slr)
+ return config
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct a GoogleSheetsOptions given its API representation
+
+ :type resource: dict
+ :param resource:
+ An GoogleSheetsOptions in the same representation as is
+ returned from the API.
+
+ :rtype:
+ :class:`google.cloud.bigquery.external_config.GoogleSheetsOptions`
+ :returns: Configuration parsed from ``resource``.
+ """
+ slr = resource.get('skipLeadingRows')
+ config = cls()
+ config._properties = copy.deepcopy(resource)
+ config.skip_leading_rows = _int_or_none(slr)
+ return config
+
+
+_OPTION_CLASSES = (BigtableOptions, CSVOptions, GoogleSheetsOptions)
+
+
+class ExternalConfig(object):
+ """Description of an external data source.
+
+ :type source_format: str
+ :param source_format: the format of the external data. See
+ the ``source_format`` property on this class.
+ """
+
+ def __init__(self, source_format):
+ self._properties = {'sourceFormat': source_format}
+ self._options = None
+ for optcls in _OPTION_CLASSES:
+ if source_format == optcls._SOURCE_FORMAT:
+ self._options = optcls()
+ break
+
+ @property
+ def source_format(self):
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceFormat
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceFormat
+ """
+ return self._properties['sourceFormat']
+
+ @property
+ def options(self):
+ """Source-specific options."""
+ return self._options
+
+ autodetect = _TypedApiResourceProperty(
+ 'autodetect', 'autodetect', bool)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).autodetect
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.autodetect
+ """
+
+ compression = _TypedApiResourceProperty(
+ 'compression', 'compression', six.string_types)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).compression
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.compression
+ """
+
+ ignore_unknown_values = _TypedApiResourceProperty(
+ 'ignore_unknown_values', 'ignoreUnknownValues', bool)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).ignoreUnknownValues
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.ignoreUnknownValues
+ """
+
+ max_bad_records = _TypedApiResourceProperty(
+ 'max_bad_records', 'maxBadRecords', six.integer_types)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).maxBadRecords
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.maxBadRecords
+ """
+
+ source_uris = _ListApiResourceProperty(
+ 'source_uris', 'sourceUris', six.string_types)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceUris
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.sourceUris
+ """
+
+ schema = _ListApiResourceProperty('schema', 'schema', SchemaField)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).schema
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.schema
+ """
+
+ def to_api_repr(self):
+ """Build an API representation of this object.
+
+ :rtype: dict
+ :returns: A dictionary in the format used by the BigQuery API.
+ """
+ config = copy.deepcopy(self._properties)
+ if self.schema:
+ config['schema'] = {'fields': _build_schema_resource(self.schema)}
+ if self.options is not None:
+ r = self.options.to_api_repr()
+ if r != {}:
+ config[self.options._RESOURCE_NAME] = r
+ return config
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct a CSVOptions given its API representation
+
+ :type resource: dict
+ :param resource:
+ An extract job configuration in the same representation as is
+ returned from the API.
+
+ :rtype: :class:`google.cloud.bigquery.external_config.CSVOptions`
+ :returns: Configuration parsed from ``resource``.
+ """
+ config = cls(resource['sourceFormat'])
+ schema = resource.get('schema')
+ for optcls in _OPTION_CLASSES:
+ opts = resource.get(optcls._RESOURCE_NAME)
+ if opts is not None:
+ config._options = optcls.from_api_repr(opts)
+ break
+ config._properties = copy.deepcopy(resource)
+ if schema:
+ config.schema = _parse_schema_resource(schema)
+ return config
diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py
new file mode 100644
index 0000000..5c7ffd3
--- /dev/null
+++ b/bigquery/google/cloud/bigquery/job.py
@@ -0,0 +1,2085 @@
+# Copyright 2015 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Define API Jobs."""
+
+import copy
+import threading
+
+import six
+from six.moves import http_client
+
+import google.api_core.future.polling
+from google.cloud import exceptions
+from google.cloud.exceptions import NotFound
+from google.cloud._helpers import _datetime_from_microseconds
+from google.cloud.bigquery.dataset import DatasetReference
+from google.cloud.bigquery.external_config import ExternalConfig
+from google.cloud.bigquery.query import _AbstractQueryParameter
+from google.cloud.bigquery.query import _query_param_from_api_repr
+from google.cloud.bigquery.query import ArrayQueryParameter
+from google.cloud.bigquery.query import ScalarQueryParameter
+from google.cloud.bigquery.query import StructQueryParameter
+from google.cloud.bigquery.query import UDFResource
+from google.cloud.bigquery.schema import SchemaField
+from google.cloud.bigquery.table import TableReference
+from google.cloud.bigquery.table import _build_schema_resource
+from google.cloud.bigquery.table import _parse_schema_resource
+from google.cloud.bigquery._helpers import _EnumApiResourceProperty
+from google.cloud.bigquery._helpers import _ListApiResourceProperty
+from google.cloud.bigquery._helpers import _TypedApiResourceProperty
+from google.cloud.bigquery._helpers import DEFAULT_RETRY
+from google.cloud.bigquery._helpers import _int_or_none
+
+_DONE_STATE = 'DONE'
+_STOPPED_REASON = 'stopped'
+_TIMEOUT_BUFFER_SECS = 0.1
+
+_ERROR_REASON_TO_EXCEPTION = {
+ 'accessDenied': http_client.FORBIDDEN,
+ 'backendError': http_client.INTERNAL_SERVER_ERROR,
+ 'billingNotEnabled': http_client.FORBIDDEN,
+ 'billingTierLimitExceeded': http_client.BAD_REQUEST,
+ 'blocked': http_client.FORBIDDEN,
+ 'duplicate': http_client.CONFLICT,
+ 'internalError': http_client.INTERNAL_SERVER_ERROR,
+ 'invalid': http_client.BAD_REQUEST,
+ 'invalidQuery': http_client.BAD_REQUEST,
+ 'notFound': http_client.NOT_FOUND,
+ 'notImplemented': http_client.NOT_IMPLEMENTED,
+ 'quotaExceeded': http_client.FORBIDDEN,
+ 'rateLimitExceeded': http_client.FORBIDDEN,
+ 'resourceInUse': http_client.BAD_REQUEST,
+ 'resourcesExceeded': http_client.BAD_REQUEST,
+ 'responseTooLarge': http_client.FORBIDDEN,
+ 'stopped': http_client.OK,
+ 'tableUnavailable': http_client.BAD_REQUEST,
+}
+
+
+def _error_result_to_exception(error_result):
+ """Maps BigQuery error reasons to an exception.
+
+ The reasons and their matching HTTP status codes are documented on
+ the `troubleshooting errors`_ page.
+
+ .. _troubleshooting errors: https://cloud.google.com/bigquery\
+ /troubleshooting-errors
+
+ :type error_result: Mapping[str, str]
+ :param error_result: The error result from BigQuery.
+
+ :rtype google.cloud.exceptions.GoogleCloudError:
+ :returns: The mapped exception.
+ """
+ reason = error_result.get('reason')
+ status_code = _ERROR_REASON_TO_EXCEPTION.get(
+ reason, http_client.INTERNAL_SERVER_ERROR)
+ return exceptions.from_http_status(
+ status_code, error_result.get('message', ''), errors=[error_result])
+
+
+class Compression(_EnumApiResourceProperty):
+ """Pseudo-enum for ``compression`` properties."""
+ GZIP = 'GZIP'
+ NONE = 'NONE'
+
+
+class CreateDisposition(_EnumApiResourceProperty):
+ """Pseudo-enum for ``create_disposition`` properties."""
+ CREATE_IF_NEEDED = 'CREATE_IF_NEEDED'
+ CREATE_NEVER = 'CREATE_NEVER'
+
+
+class DestinationFormat(_EnumApiResourceProperty):
+ """Pseudo-enum for ``destination_format`` properties."""
+ CSV = 'CSV'
+ NEWLINE_DELIMITED_JSON = 'NEWLINE_DELIMITED_JSON'
+ AVRO = 'AVRO'
+
+
+class Encoding(_EnumApiResourceProperty):
+ """Pseudo-enum for ``encoding`` properties."""
+ UTF_8 = 'UTF-8'
+ ISO_8559_1 = 'ISO-8559-1'
+
+
+class QueryPriority(_EnumApiResourceProperty):
+ """Pseudo-enum for ``QueryJob.priority`` property."""
+ INTERACTIVE = 'INTERACTIVE'
+ BATCH = 'BATCH'
+
+
+class SourceFormat(_EnumApiResourceProperty):
+ """Pseudo-enum for ``source_format`` properties."""
+ CSV = 'CSV'
+ DATASTORE_BACKUP = 'DATASTORE_BACKUP'
+ NEWLINE_DELIMITED_JSON = 'NEWLINE_DELIMITED_JSON'
+ AVRO = 'AVRO'
+
+
+class WriteDisposition(_EnumApiResourceProperty):
+ """Pseudo-enum for ``write_disposition`` properties."""
+ WRITE_APPEND = 'WRITE_APPEND'
+ WRITE_TRUNCATE = 'WRITE_TRUNCATE'
+ WRITE_EMPTY = 'WRITE_EMPTY'
+
+
+class AutoDetectSchema(_TypedApiResourceProperty):
+ """Property for ``autodetect`` properties.
+
+ :raises ValueError: on ``set`` operation if ``instance.schema``
+ is already defined.
+ """
+ def __set__(self, instance, value):
+ self._validate(value)
+ instance._properties[self.resource_name] = value
+
+
+class _AsyncJob(google.api_core.future.polling.PollingFuture):
+ """Base class for asynchronous jobs.
+
+ :type job_id: str
+ :param job_id: the job's ID in the project associated with the client.
+
+ :type client: :class:`google.cloud.bigquery.client.Client`
+ :param client: A client which holds credentials and project configuration.
+ """
+ def __init__(self, job_id, client):
+ super(_AsyncJob, self).__init__()
+ self.job_id = job_id
+ self._client = client
+ self._properties = {}
+ self._result_set = False
+ self._completion_lock = threading.Lock()
+
+ @property
+ def project(self):
+ """Project bound to the job.
+
+ :rtype: str
+ :returns: the project (derived from the client).
+ """
+ return self._client.project
+
+ def _require_client(self, client):
+ """Check client or verify over-ride.
+
+ :type client: :class:`~google.cloud.bigquery.client.Client` or
+ ``NoneType``
+ :param client: the client to use. If not passed, falls back to the
+ ``client`` stored on the current dataset.
+
+ :rtype: :class:`google.cloud.bigquery.client.Client`
+ :returns: The client passed in or the currently bound client.
+ """
+ if client is None:
+ client = self._client
+ return client
+
+ @property
+ def job_type(self):
+ """Type of job
+
+ :rtype: str
+ :returns: one of 'load', 'copy', 'extract', 'query'
+ """
+ return self._JOB_TYPE
+
+ @property
+ def path(self):
+ """URL path for the job's APIs.
+
+ :rtype: str
+ :returns: the path based on project and job ID.
+ """
+ return '/projects/%s/jobs/%s' % (self.project, self.job_id)
+
+ @property
+ def etag(self):
+ """ETag for the job resource.
+
+ :rtype: str, or ``NoneType``
+ :returns: the ETag (None until set from the server).
+ """
+ return self._properties.get('etag')
+
+ @property
+ def self_link(self):
+ """URL for the job resource.
+
+ :rtype: str, or ``NoneType``
+ :returns: the URL (None until set from the server).
+ """
+ return self._properties.get('selfLink')
+
+ @property
+ def user_email(self):
+ """E-mail address of user who submitted the job.
+
+ :rtype: str, or ``NoneType``
+ :returns: the URL (None until set from the server).
+ """
+ return self._properties.get('user_email')
+
+ @property
+ def created(self):
+ """Datetime at which the job was created.
+
+ :rtype: ``datetime.datetime``, or ``NoneType``
+ :returns: the creation time (None until set from the server).
+ """
+ statistics = self._properties.get('statistics')
+ if statistics is not None:
+ millis = statistics.get('creationTime')
+ if millis is not None:
+ return _datetime_from_microseconds(millis * 1000.0)
+
+ @property
+ def started(self):
+ """Datetime at which the job was started.
+
+ :rtype: ``datetime.datetime``, or ``NoneType``
+ :returns: the start time (None until set from the server).
+ """
+ statistics = self._properties.get('statistics')
+ if statistics is not None:
+ millis = statistics.get('startTime')
+ if millis is not None:
+ return _datetime_from_microseconds(millis * 1000.0)
+
+ @property
+ def ended(self):
+ """Datetime at which the job finished.
+
+ :rtype: ``datetime.datetime``, or ``NoneType``
+ :returns: the end time (None until set from the server).
+ """
+ statistics = self._properties.get('statistics')
+ if statistics is not None:
+ millis = statistics.get('endTime')
+ if millis is not None:
+ return _datetime_from_microseconds(millis * 1000.0)
+
+ def _job_statistics(self):
+ """Helper for job-type specific statistics-based properties."""
+ statistics = self._properties.get('statistics', {})
+ return statistics.get(self._JOB_TYPE, {})
+
+ @property
+ def error_result(self):
+ """Error information about the job as a whole.
+
+ :rtype: mapping, or ``NoneType``
+ :returns: the error information (None until set from the server).
+ """
+ status = self._properties.get('status')
+ if status is not None:
+ return status.get('errorResult')
+
+ @property
+ def errors(self):
+ """Information about individual errors generated by the job.
+
+ :rtype: list of mappings, or ``NoneType``
+ :returns: the error information (None until set from the server).
+ """
+ status = self._properties.get('status')
+ if status is not None:
+ return status.get('errors')
+
+ @property
+ def state(self):
+ """Status of the job.
+
+ :rtype: str, or ``NoneType``
+ :returns: the state (None until set from the server).
+ """
+ status = self._properties.get('status')
+ if status is not None:
+ return status.get('state')
+
+ def _scrub_local_properties(self, cleaned):
+ """Helper: handle subclass properties in cleaned."""
+ pass
+
+ def _copy_configuration_properties(self, configuration):
+ """Helper: assign subclass configuration properties in cleaned."""
+ raise NotImplementedError("Abstract")
+
+ def _set_properties(self, api_response):
+ """Update properties from resource in body of ``api_response``
+
+ :type api_response: dict
+ :param api_response: response returned from an API call
+ """
+ cleaned = api_response.copy()
+ self._scrub_local_properties(cleaned)
+
+ statistics = cleaned.get('statistics', {})
+ if 'creationTime' in statistics:
+ statistics['creationTime'] = float(statistics['creationTime'])
+ if 'startTime' in statistics:
+ statistics['startTime'] = float(statistics['startTime'])
+ if 'endTime' in statistics:
+ statistics['endTime'] = float(statistics['endTime'])
+
+ self._properties.clear()
+ self._properties.update(cleaned)
+ configuration = cleaned['configuration'][self._JOB_TYPE]
+ self._copy_configuration_properties(configuration)
+
+ # For Future interface
+ self._set_future_result()
+
+ @classmethod
+ def _get_resource_config(cls, resource):
+ """Helper for :meth:`from_api_repr`
+
+ :type resource: dict
+ :param resource: resource for the job
+
+ :rtype: dict
+ :returns: tuple (string, dict), where the first element is the
+ job ID and the second contains job-specific configuration.
+ :raises: :class:`KeyError` if the resource has no identifier, or
+ is missing the appropriate configuration.
+ """
+ if ('jobReference' not in resource or
+ 'jobId' not in resource['jobReference']):
+ raise KeyError('Resource lacks required identity information: '
+ '["jobReference"]["jobId"]')
+ job_id = resource['jobReference']['jobId']
+ if ('configuration' not in resource or
+ cls._JOB_TYPE not in resource['configuration']):
+ raise KeyError('Resource lacks required configuration: '
+ '["configuration"]["%s"]' % cls._JOB_TYPE)
+ config = resource['configuration'][cls._JOB_TYPE]
+ return job_id, config
+
+ def begin(self, client=None, retry=DEFAULT_RETRY):
+ """API call: begin the job via a POST request
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert
+
+ :type client: :class:`~google.cloud.bigquery.client.Client` or
+ ``NoneType``
+ :param client: the client to use. If not passed, falls back to the
+ ``client`` stored on the current dataset.
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+
+ :raises: :exc:`ValueError` if the job has already begin.
+ """
+ if self.state is not None:
+ raise ValueError("Job already begun.")
+
+ client = self._require_client(client)
+ path = '/projects/%s/jobs' % (self.project,)
+
+ # jobs.insert is idempotent because we ensure that every new
+ # job has an ID.
+ api_response = client._call_api(
+ retry,
+ method='POST', path=path, data=self._build_resource())
+ self._set_properties(api_response)
+
+ def exists(self, client=None, retry=DEFAULT_RETRY):
+ """API call: test for the existence of the job via a GET request
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get
+
+ :type client: :class:`~google.cloud.bigquery.client.Client` or
+ ``NoneType``
+ :param client: the client to use. If not passed, falls back to the
+ ``client`` stored on the current dataset.
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+
+ :rtype: bool
+ :returns: Boolean indicating existence of the job.
+ """
+ client = self._require_client(client)
+
+ try:
+ client._call_api(retry,
+ method='GET', path=self.path,
+ query_params={'fields': 'id'})
+ except NotFound:
+ return False
+ else:
+ return True
+
+ def reload(self, client=None, retry=DEFAULT_RETRY):
+ """API call: refresh job properties via a GET request.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get
+
+ :type client: :class:`~google.cloud.bigquery.client.Client` or
+ ``NoneType``
+ :param client: the client to use. If not passed, falls back to the
+ ``client`` stored on the current dataset.
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+ """
+ client = self._require_client(client)
+
+ api_response = client._call_api(retry, method='GET', path=self.path)
+ self._set_properties(api_response)
+
+ def cancel(self, client=None):
+ """API call: cancel job via a POST request
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel
+
+ :type client: :class:`~google.cloud.bigquery.client.Client` or
+ ``NoneType``
+ :param client: the client to use. If not passed, falls back to the
+ ``client`` stored on the current dataset.
+
+ :rtype: bool
+ :returns: Boolean indicating that the cancel request was sent.
+ """
+ client = self._require_client(client)
+
+ api_response = client._connection.api_request(
+ method='POST', path='%s/cancel' % (self.path,))
+ self._set_properties(api_response['job'])
+ # The Future interface requires that we return True if the *attempt*
+ # to cancel was successful.
+ return True
+
+ # The following methods implement the PollingFuture interface. Note that
+ # the methods above are from the pre-Future interface and are left for
+ # compatibility. The only "overloaded" method is :meth:`cancel`, which
+ # satisfies both interfaces.
+
+ def _set_future_result(self):
+ """Set the result or exception from the job if it is complete."""
+ # This must be done in a lock to prevent the polling thread
+ # and main thread from both executing the completion logic
+ # at the same time.
+ with self._completion_lock:
+ # If the operation isn't complete or if the result has already been
+ # set, do not call set_result/set_exception again.
+ # Note: self._result_set is set to True in set_result and
+ # set_exception, in case those methods are invoked directly.
+ if self.state != _DONE_STATE or self._result_set:
+ return
+
+ if self.error_result is not None:
+ exception = _error_result_to_exception(self.error_result)
+ self.set_exception(exception)
+ else:
+ self.set_result(self)
+
+ def done(self, retry=DEFAULT_RETRY):
+ """Refresh the job and checks if it is complete.
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+
+ :rtype: bool
+ :returns: True if the job is complete, False otherwise.
+ """
+ # Do not refresh is the state is already done, as the job will not
+ # change once complete.
+ if self.state != _DONE_STATE:
+ self.reload(retry=retry)
+ return self.state == _DONE_STATE
+
+ def result(self, timeout=None):
+ """Start the job and wait for it to complete and get the result.
+
+ :type timeout: int
+ :param timeout: How long to wait for job to complete before raising
+ a :class:`TimeoutError`.
+
+ :rtype: _AsyncJob
+ :returns: This instance.
+
+ :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the job
+ failed or :class:`TimeoutError` if the job did not complete in the
+ given timeout.
+ """
+ if self.state is None:
+ self.begin()
+ # TODO: modify PollingFuture so it can pass a retry argument to done().
+ return super(_AsyncJob, self).result(timeout=timeout)
+
+ def cancelled(self):
+ """Check if the job has been cancelled.
+
+ This always returns False. It's not possible to check if a job was
+ cancelled in the API. This method is here to satisfy the interface
+ for :class:`google.api_core.future.Future`.
+
+ :rtype: bool
+ :returns: False
+ """
+ return (self.error_result is not None
+ and self.error_result.get('reason') == _STOPPED_REASON)
+
+
+class LoadJobConfig(object):
+ """Configuration options for load jobs.
+
+ All properties in this class are optional. Values which are ``None`` ->
+ server defaults.
+ """
+
+ def __init__(self):
+ self._properties = {}
+ self._schema = ()
+
+ allow_jagged_rows = _TypedApiResourceProperty(
+ 'allow_jagged_rows', 'allowJaggedRows', bool)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowJaggedRows
+ """
+
+ allow_quoted_newlines = _TypedApiResourceProperty(
+ 'allow_quoted_newlines', 'allowQuotedNewlines', bool)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.allowQuotedNewlines
+ """
+
+ autodetect = AutoDetectSchema('autodetect', 'autodetect', bool)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.autodetect
+ """
+
+ create_disposition = CreateDisposition('create_disposition',
+ 'createDisposition')
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.createDisposition
+ """
+
+ encoding = Encoding('encoding', 'encoding')
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.encoding
+ """
+
+ field_delimiter = _TypedApiResourceProperty(
+ 'field_delimiter', 'fieldDelimiter', six.string_types)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.fieldDelimiter
+ """
+
+ ignore_unknown_values = _TypedApiResourceProperty(
+ 'ignore_unknown_values', 'ignoreUnknownValues', bool)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.ignoreUnknownValues
+ """
+
+ max_bad_records = _TypedApiResourceProperty(
+ 'max_bad_records', 'maxBadRecords', six.integer_types)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.maxBadRecords
+ """
+
+ null_marker = _TypedApiResourceProperty(
+ 'null_marker', 'nullMarker', six.string_types)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.nullMarker
+ """
+
+ quote_character = _TypedApiResourceProperty(
+ 'quote_character', 'quote', six.string_types)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.quote
+ """
+
+ skip_leading_rows = _TypedApiResourceProperty(
+ 'skip_leading_rows', 'skipLeadingRows', six.integer_types)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.skipLeadingRows
+ """
+
+ source_format = SourceFormat('source_format', 'sourceFormat')
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceFormat
+ """
+
+ write_disposition = WriteDisposition('write_disposition',
+ 'writeDisposition')
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.writeDisposition
+ """
+
+ @property
+ def schema(self):
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema
+ """
+ return list(self._schema)
+
+ @schema.setter
+ def schema(self, value):
+ if not all(isinstance(field, SchemaField) for field in value):
+ raise ValueError('Schema items must be fields')
+ self._schema = tuple(value)
+
+ def to_api_repr(self):
+ """Build an API representation of the load job config.
+
+ :rtype: dict
+ :returns: A dictionary in the format used by the BigQuery API.
+ """
+ config = copy.deepcopy(self._properties)
+ if len(self.schema) > 0:
+ config['schema'] = {'fields': _build_schema_resource(self.schema)}
+ # skipLeadingRows is a string because it's defined as an int64, which
+ # can't be represented as a JSON number.
+ slr = config.get('skipLeadingRows')
+ if slr is not None:
+ config['skipLeadingRows'] = str(slr)
+ return config
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct a job configuration given its API representation
+
+ :type resource: dict
+ :param resource:
+ An extract job configuration in the same representation as is
+ returned from the API.
+
+ :rtype: :class:`google.cloud.bigquery.job.ExtractJobConfig`
+ :returns: Configuration parsed from ``resource``.
+ """
+ schema = resource.pop('schema', {'fields': ()})
+ slr = resource.pop('skipLeadingRows', None)
+ config = cls()
+ config._properties = copy.deepcopy(resource)
+ config.schema = _parse_schema_resource(schema)
+ config.skip_leading_rows = _int_or_none(slr)
+
+
+class LoadJob(_AsyncJob):
+ """Asynchronous job for loading data into a table.
+
+ Can load from Google Cloud Storage URIs or from a file.
+
+ :type job_id: str
+ :param job_id: the job's ID
+
+ :type source_uris: sequence of string or ``NoneType``
+ :param source_uris:
+ URIs of one or more data files to be loaded. See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceUris
+ for supported URI formats. Pass None for jobs that load from a file.
+
+ :type destination: :class:`google.cloud.bigquery.table.TableReference`
+ :param destination: reference to table into which data is to be loaded.
+
+ :type client: :class:`google.cloud.bigquery.client.Client`
+ :param client: A client which holds credentials and project configuration
+ for the dataset (which requires a project).
+ """
+
+ _JOB_TYPE = 'load'
+
+ def __init__(self, job_id, source_uris, destination, client,
+ job_config=None):
+ super(LoadJob, self).__init__(job_id, client)
+
+ if job_config is None:
+ job_config = LoadJobConfig()
+
+ self.source_uris = source_uris
+ self.destination = destination
+ self._configuration = job_config
+
+ @property
+ def allow_jagged_rows(self):
+ """See
+ :class:`~google.cloud.bigquery.job.LoadJobConfig.allow_jagged_rows`.
+ """
+ return self._configuration.allow_jagged_rows
+
+ @property
+ def allow_quoted_newlines(self):
+ """See
+ :class:`~google.cloud.bigquery.job.LoadJobConfig.allow_quoted_newlines`.
+ """
+ return self._configuration.allow_quoted_newlines
+
+ @property
+ def autodetect(self):
+ """See
+ :class:`~google.cloud.bigquery.job.LoadJobConfig.autodetect`.
+ """
+ return self._configuration.autodetect
+
+ @property
+ def create_disposition(self):
+ """See
+ :class:`~google.cloud.bigquery.job.LoadJobConfig.create_disposition`.
+ """
+ return self._configuration.create_disposition
+
+ @property
+ def encoding(self):
+ """See
+ :class:`~google.cloud.bigquery.job.LoadJobConfig.encoding`.
+ """
+ return self._configuration.encoding
+
+ @property
+ def field_delimiter(self):
+ """See
+ :class:`~google.cloud.bigquery.job.LoadJobConfig.field_delimiter`.
+ """
+ return self._configuration.field_delimiter
+
+ @property
+ def ignore_unknown_values(self):
+ """See
+ :class:`~google.cloud.bigquery.job.LoadJobConfig.ignore_unknown_values`.
+ """
+ return self._configuration.ignore_unknown_values
+
+ @property
+ def max_bad_records(self):
+ """See
+ :class:`~google.cloud.bigquery.job.LoadJobConfig.max_bad_records`.
+ """
+ return self._configuration.max_bad_records
+
+ @property
+ def null_marker(self):
+ """See
+ :class:`~google.cloud.bigquery.job.LoadJobConfig.null_marker`.
+ """
+ return self._configuration.null_marker
+
+ @property
+ def quote_character(self):
+ """See
+ :class:`~google.cloud.bigquery.job.LoadJobConfig.quote_character`.
+ """
+ return self._configuration.quote_character
+
+ @property
+ def skip_leading_rows(self):
+ """See
+ :class:`~google.cloud.bigquery.job.LoadJobConfig.skip_leading_rows`.
+ """
+ return self._configuration.skip_leading_rows
+
+ @property
+ def source_format(self):
+ """See
+ :class:`~google.cloud.bigquery.job.LoadJobConfig.source_format`.
+ """
+ return self._configuration.source_format
+
+ @property
+ def write_disposition(self):
+ """See
+ :class:`~google.cloud.bigquery.job.LoadJobConfig.write_disposition`.
+ """
+ return self._configuration.write_disposition
+
+ @property
+ def schema(self):
+ """See
+ :class:`~google.cloud.bigquery.job.LoadJobConfig.schema`.
+ """
+ return self._configuration.schema
+
+ @property
+ def input_file_bytes(self):
+ """Count of bytes loaded from source files.
+
+ :rtype: int, or ``NoneType``
+ :returns: the count (None until set from the server).
+ """
+ statistics = self._properties.get('statistics')
+ if statistics is not None:
+ return int(statistics['load']['inputFileBytes'])
+
+ @property
+ def input_files(self):
+ """Count of source files.
+
+ :rtype: int, or ``NoneType``
+ :returns: the count (None until set from the server).
+ """
+ statistics = self._properties.get('statistics')
+ if statistics is not None:
+ return int(statistics['load']['inputFiles'])
+
+ @property
+ def output_bytes(self):
+ """Count of bytes saved to destination table.
+
+ :rtype: int, or ``NoneType``
+ :returns: the count (None until set from the server).
+ """
+ statistics = self._properties.get('statistics')
+ if statistics is not None:
+ return int(statistics['load']['outputBytes'])
+
+ @property
+ def output_rows(self):
+ """Count of rows saved to destination table.
+
+ :rtype: int, or ``NoneType``
+ :returns: the count (None until set from the server).
+ """
+ statistics = self._properties.get('statistics')
+ if statistics is not None:
+ return int(statistics['load']['outputRows'])
+
+ def _build_resource(self):
+ """Generate a resource for :meth:`begin`."""
+ configuration = self._configuration.to_api_repr()
+ if self.source_uris is not None:
+ configuration['sourceUris'] = self.source_uris
+ configuration['destinationTable'] = self.destination.to_api_repr()
+
+ return {
+ 'jobReference': {
+ 'projectId': self.project,
+ 'jobId': self.job_id,
+ },
+ 'configuration': {
+ self._JOB_TYPE: configuration,
+ },
+ }
+
+ def _copy_configuration_properties(self, configuration):
+ """Helper: assign subclass configuration properties in cleaned."""
+ self._configuration._properties = copy.deepcopy(configuration)
+
+ @classmethod
+ def from_api_repr(cls, resource, client):
+ """Factory: construct a job given its API representation
+
+ .. note:
+
+ This method assumes that the project found in the resource matches
+ the client's project.
+
+ :type resource: dict
+ :param resource: dataset job representation returned from the API
+
+ :type client: :class:`google.cloud.bigquery.client.Client`
+ :param client: Client which holds credentials and project
+ configuration for the dataset.
+
+ :rtype: :class:`google.cloud.bigquery.job.LoadJob`
+ :returns: Job parsed from ``resource``.
+ """
+ job_id, config_resource = cls._get_resource_config(resource)
+ config = LoadJobConfig.from_api_repr(config_resource)
+ dest_config = config_resource['destinationTable']
+ ds_ref = DatasetReference(dest_config['projectId'],
+ dest_config['datasetId'],)
+ destination = TableReference(ds_ref, dest_config['tableId'])
+ # sourceUris will be absent if this is a file upload.
+ source_uris = config_resource.get('sourceUris')
+ job = cls(job_id, source_uris, destination, client, config)
+ job._set_properties(resource)
+ return job
+
+
+class CopyJobConfig(object):
+ """Configuration options for copy jobs.
+
+ All properties in this class are optional. Values which are ``None`` ->
+ server defaults.
+ """
+
+ def __init__(self):
+ self._properties = {}
+
+ create_disposition = CreateDisposition('create_disposition',
+ 'createDisposition')
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.createDisposition
+ """
+
+ write_disposition = WriteDisposition('write_disposition',
+ 'writeDisposition')
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.copy.writeDisposition
+ """
+
+ def to_api_repr(self):
+ """Build an API representation of the copy job config.
+
+ :rtype: dict
+ :returns: A dictionary in the format used by the BigQuery API.
+ """
+ return copy.deepcopy(self._properties)
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct a job configuration given its API representation
+
+ :type resource: dict
+ :param resource:
+ An extract job configuration in the same representation as is
+ returned from the API.
+
+ :rtype: :class:`google.cloud.bigquery.job.ExtractJobConfig`
+ :returns: Configuration parsed from ``resource``.
+ """
+ config = cls()
+ config._properties = copy.deepcopy(resource)
+ return config
+
+
+class CopyJob(_AsyncJob):
+ """Asynchronous job: copy data into a table from other tables.
+
+ :type job_id: str
+ :param job_id: the job's ID, within the project belonging to ``client``.
+
+ :type sources: list of :class:`google.cloud.bigquery.table.TableReference`
+ :param sources: Table into which data is to be loaded.
+
+ :type destination: :class:`google.cloud.bigquery.table.TableReference`
+ :param destination: Table into which data is to be loaded.
+
+ :type client: :class:`google.cloud.bigquery.client.Client`
+ :param client: A client which holds credentials and project configuration
+ for the dataset (which requires a project).
+
+ :type job_config: :class:`~google.cloud.bigquery.job.CopyJobConfig`
+ :param job_config:
+ (Optional) Extra configuration options for the copy job.
+ """
+ _JOB_TYPE = 'copy'
+
+ def __init__(self, job_id, sources, destination, client, job_config=None):
+ super(CopyJob, self).__init__(job_id, client)
+
+ if job_config is None:
+ job_config = CopyJobConfig()
+
+ self.destination = destination
+ self.sources = sources
+ self._configuration = job_config
+
+ @property
+ def create_disposition(self):
+ """See
+ :class:`~google.cloud.bigquery.job.CopyJobConfig.create_disposition`.
+ """
+ return self._configuration.create_disposition
+
+ @property
+ def write_disposition(self):
+ """See
+ :class:`~google.cloud.bigquery.job.CopyJobConfig.write_disposition`.
+ """
+ return self._configuration.write_disposition
+
+ def _build_resource(self):
+ """Generate a resource for :meth:`begin`."""
+
+ source_refs = [{
+ 'projectId': table.project,
+ 'datasetId': table.dataset_id,
+ 'tableId': table.table_id,
+ } for table in self.sources]
+
+ configuration = self._configuration.to_api_repr()
+ configuration['sourceTables'] = source_refs
+ configuration['destinationTable'] = {
+ 'projectId': self.destination.project,
+ 'datasetId': self.destination.dataset_id,
+ 'tableId': self.destination.table_id,
+ }
+
+ return {
+ 'jobReference': {
+ 'projectId': self.project,
+ 'jobId': self.job_id,
+ },
+ 'configuration': {
+ self._JOB_TYPE: configuration,
+ },
+ }
+
+ def _copy_configuration_properties(self, configuration):
+ """Helper: assign subclass configuration properties in cleaned."""
+ self._configuration._properties = copy.deepcopy(configuration)
+
+ @classmethod
+ def from_api_repr(cls, resource, client):
+ """Factory: construct a job given its API representation
+
+ .. note:
+
+ This method assumes that the project found in the resource matches
+ the client's project.
+
+ :type resource: dict
+ :param resource: dataset job representation returned from the API
+
+ :type client: :class:`google.cloud.bigquery.client.Client`
+ :param client: Client which holds credentials and project
+ configuration for the dataset.
+
+ :rtype: :class:`google.cloud.bigquery.job.CopyJob`
+ :returns: Job parsed from ``resource``.
+ """
+ job_id, config_resource = cls._get_resource_config(resource)
+ config = CopyJobConfig.from_api_repr(config_resource)
+ destination = TableReference.from_api_repr(
+ config_resource['destinationTable'])
+ sources = []
+ source_configs = config_resource.get('sourceTables')
+ if source_configs is None:
+ single = config_resource.get('sourceTable')
+ if single is None:
+ raise KeyError(
+ "Resource missing 'sourceTables' / 'sourceTable'")
+ source_configs = [single]
+ for source_config in source_configs:
+ table_ref = TableReference.from_api_repr(source_config)
+ sources.append(table_ref)
+ job = cls(
+ job_id, sources, destination, client=client, job_config=config)
+ job._set_properties(resource)
+ return job
+
+
+class ExtractJobConfig(object):
+ """Configuration options for extract jobs.
+
+ All properties in this class are optional. Values which are ``None`` ->
+ server defaults.
+ """
+
+ def __init__(self):
+ self._properties = {}
+
+ compression = Compression('compression', 'compression')
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.compression
+ """
+
+ destination_format = DestinationFormat(
+ 'destination_format', 'destinationFormat')
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.destinationFormat
+ """
+
+ field_delimiter = _TypedApiResourceProperty(
+ 'field_delimiter', 'fieldDelimiter', six.string_types)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.fieldDelimiter
+ """
+
+ print_header = _TypedApiResourceProperty(
+ 'print_header', 'printHeader', bool)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.printHeader
+ """
+
+ def to_api_repr(self):
+ """Build an API representation of the extract job config.
+
+ :rtype: dict
+ :returns: A dictionary in the format used by the BigQuery API.
+ """
+ return copy.deepcopy(self._properties)
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct a job configuration given its API representation
+
+ :type resource: dict
+ :param resource:
+ An extract job configuration in the same representation as is
+ returned from the API.
+
+ :rtype: :class:`google.cloud.bigquery.job.ExtractJobConfig`
+ :returns: Configuration parsed from ``resource``.
+ """
+ config = cls()
+ config._properties = copy.deepcopy(resource)
+ return config
+
+
+class ExtractJob(_AsyncJob):
+ """Asynchronous job: extract data from a table into Cloud Storage.
+
+ :type job_id: str
+ :param job_id: the job's ID
+
+ :type source: :class:`google.cloud.bigquery.table.TableReference`
+ :param source: Table into which data is to be loaded.
+
+ :type destination_uris: list of string
+ :param destination_uris:
+ URIs describing where the extracted data will be written in Cloud
+ Storage, using the format ``gs://<bucket_name>/<object_name_or_glob>``.
+
+ :type client: :class:`google.cloud.bigquery.client.Client`
+ :param client:
+ A client which holds credentials and project configuration.
+
+ :type job_config: :class:`~google.cloud.bigquery.job.ExtractJobConfig`
+ :param job_config:
+ (Optional) Extra configuration options for the extract job.
+ """
+ _JOB_TYPE = 'extract'
+
+ def __init__(
+ self, job_id, source, destination_uris, client, job_config=None):
+ super(ExtractJob, self).__init__(job_id, client)
+
+ if job_config is None:
+ job_config = ExtractJobConfig()
+
+ self.source = source
+ self.destination_uris = destination_uris
+ self._configuration = job_config
+
+ @property
+ def compression(self):
+ """See
+ :class:`~google.cloud.bigquery.job.ExtractJobConfig.compression`.
+ """
+ return self._configuration.compression
+
+ @property
+ def destination_format(self):
+ """See
+ :class:`~google.cloud.bigquery.job.ExtractJobConfig.destination_format`.
+ """
+ return self._configuration.destination_format
+
+ @property
+ def field_delimiter(self):
+ """See
+ :class:`~google.cloud.bigquery.job.ExtractJobConfig.field_delimiter`.
+ """
+ return self._configuration.field_delimiter
+
+ @property
+ def print_header(self):
+ """See
+ :class:`~google.cloud.bigquery.job.ExtractJobConfig.print_header`.
+ """
+ return self._configuration.print_header
+
+ @property
+ def destination_uri_file_counts(self):
+ """Return file counts from job statistics, if present.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.extract.destinationUriFileCounts
+
+ :rtype: int or None
+ :returns: number of DML rows affectd by the job, or None if job is not
+ yet complete.
+ """
+ result = self._job_statistics().get('destinationUriFileCounts')
+ if result is not None:
+ result = int(result)
+ return result
+
+ def _build_resource(self):
+ """Generate a resource for :meth:`begin`."""
+
+ source_ref = {
+ 'projectId': self.source.project,
+ 'datasetId': self.source.dataset_id,
+ 'tableId': self.source.table_id,
+ }
+
+ configuration = self._configuration.to_api_repr()
+ configuration['sourceTable'] = source_ref
+ configuration['destinationUris'] = self.destination_uris
+
+ resource = {
+ 'jobReference': {
+ 'projectId': self.project,
+ 'jobId': self.job_id,
+ },
+ 'configuration': {
+ self._JOB_TYPE: configuration,
+ },
+ }
+
+ return resource
+
+ def _copy_configuration_properties(self, configuration):
+ """Helper: assign subclass configuration properties in cleaned."""
+ self._configuration._properties = copy.deepcopy(configuration)
+
+ @classmethod
+ def from_api_repr(cls, resource, client):
+ """Factory: construct a job given its API representation
+
+ .. note:
+
+ This method assumes that the project found in the resource matches
+ the client's project.
+
+ :type resource: dict
+ :param resource: dataset job representation returned from the API
+
+ :type client: :class:`google.cloud.bigquery.client.Client`
+ :param client: Client which holds credentials and project
+ configuration for the dataset.
+
+ :rtype: :class:`google.cloud.bigquery.job.ExtractJob`
+ :returns: Job parsed from ``resource``.
+ """
+ job_id, config_resource = cls._get_resource_config(resource)
+ config = ExtractJobConfig.from_api_repr(config_resource)
+ source_config = config_resource['sourceTable']
+ dataset = DatasetReference(
+ source_config['projectId'], source_config['datasetId'])
+ source = dataset.table(source_config['tableId'])
+ destination_uris = config_resource['destinationUris']
+
+ job = cls(
+ job_id, source, destination_uris, client=client, job_config=config)
+ job._set_properties(resource)
+ return job
+
+
+def _from_api_repr_query_parameters(resource):
+ return [
+ _query_param_from_api_repr(mapping)
+ for mapping in resource
+ ]
+
+
+def _to_api_repr_query_parameters(value):
+ return [
+ query_parameter.to_api_repr()
+ for query_parameter in value
+ ]
+
+
+def _from_api_repr_udf_resources(resource):
+ udf_resources = []
+ for udf_mapping in resource:
+ for udf_type, udf_value in udf_mapping.items():
+ udf_resources.append(UDFResource(udf_type, udf_value))
+ return udf_resources
+
+
+def _to_api_repr_udf_resources(value):
+ return [
+ {udf_resource.udf_type: udf_resource.value}
+ for udf_resource in value
+ ]
+
+
+def _from_api_repr_table_defs(resource):
+ return {k: ExternalConfig.from_api_repr(v) for k, v in resource.items()}
+
+
+def _to_api_repr_table_defs(value):
+ return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()}
+
+
+class QueryJobConfig(object):
+ """Configuration options for query jobs.
+
+ All properties in this class are optional. Values which are ``None`` ->
+ server defaults.
+ """
+
+ _QUERY_PARAMETERS_KEY = 'queryParameters'
+ _UDF_RESOURCES_KEY = 'userDefinedFunctionResources'
+
+ def __init__(self):
+ self._properties = {}
+
+ def to_api_repr(self):
+ """Build an API representation of the copy job config.
+
+ :rtype: dict
+ :returns: A dictionary in the format used by the BigQuery API.
+ """
+ resource = copy.deepcopy(self._properties)
+
+ # Query parameters have an addition property associated with them
+ # to indicate if the query is using named or positional parameters.
+ query_parameters = resource.get(self._QUERY_PARAMETERS_KEY)
+ if query_parameters:
+ if query_parameters[0].name is None:
+ resource['parameterMode'] = 'POSITIONAL'
+ else:
+ resource['parameterMode'] = 'NAMED'
+
+ for prop, convert in self._NESTED_PROPERTIES.items():
+ _, to_resource = convert
+ nested_resource = resource.get(prop)
+ if nested_resource is not None:
+ resource[prop] = to_resource(nested_resource)
+
+ return resource
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct a job configuration given its API representation
+
+ :type resource: dict
+ :param resource:
+ An extract job configuration in the same representation as is
+ returned from the API.
+
+ :rtype: :class:`google.cloud.bigquery.job.ExtractJobConfig`
+ :returns: Configuration parsed from ``resource``.
+ """
+ config = cls()
+ config._properties = copy.deepcopy(resource)
+
+ for prop, convert in cls._NESTED_PROPERTIES.items():
+ from_resource, _ = convert
+ nested_resource = resource.get(prop)
+ if nested_resource is not None:
+ config._properties[prop] = from_resource(nested_resource)
+
+ return config
+
+ allow_large_results = _TypedApiResourceProperty(
+ 'allow_large_results', 'allowLargeResults', bool)
+ """See
+ https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.allowLargeResults
+ """
+
+ create_disposition = CreateDisposition(
+ 'create_disposition', 'createDisposition')
+ """See
+ https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.createDisposition
+ """
+
+ default_dataset = _TypedApiResourceProperty(
+ 'default_dataset', 'defaultDataset', DatasetReference)
+ """See
+ https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.query.defaultDataset
+ """
+
+ destination = _TypedApiResourceProperty(
+ 'destination', 'destinationTable', TableReference)
+ """See
+ https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.destinationTable
+ """
+
+ dry_run = _TypedApiResourceProperty('dry_run', 'dryRun', bool)
+ """See
+ https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.dryRun
+ """
+
+ flatten_results = _TypedApiResourceProperty(
+ 'flatten_results', 'flattenResults', bool)
+ """See
+ https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.flattenResults
+ """
+
+ maximum_billing_tier = _TypedApiResourceProperty(
+ 'maximum_billing_tier', 'maximumBillingTier', int)
+ """See
+ https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBillingTier
+ """
+
+ maximum_bytes_billed = _TypedApiResourceProperty(
+ 'maximum_bytes_billed', 'maximumBytesBilled', int)
+ """See
+ https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.maximumBytesBilled
+ """
+
+ priority = QueryPriority('priority', 'priority')
+ """See
+ https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.priority
+ """
+
+ query_parameters = _ListApiResourceProperty(
+ 'query_parameters', _QUERY_PARAMETERS_KEY, _AbstractQueryParameter)
+ """
+ A list of
+ :class:`google.cloud.bigquery.query.ArrayQueryParameter`,
+ :class:`google.cloud.bigquery.query.ScalarQueryParameter`, or
+ :class:`google.cloud.bigquery.query.StructQueryParameter`
+ (empty by default)
+
+ See:
+ https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.queryParameters
+ """
+
+ udf_resources = _ListApiResourceProperty(
+ 'udf_resources', _UDF_RESOURCES_KEY, UDFResource)
+ """
+ A list of :class:`google.cloud.bigquery.query.UDFResource` (empty
+ by default)
+
+ See:
+ https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.userDefinedFunctionResources
+ """
+
+ use_legacy_sql = _TypedApiResourceProperty(
+ 'use_legacy_sql', 'useLegacySql', bool)
+ """See
+ https://g.co/cloud/bigquery/docs/reference/v2/jobs#configuration.query.useLegacySql
+ """
+
+ use_query_cache = _TypedApiResourceProperty(
+ 'use_query_cache', 'useQueryCache', bool)
+ """See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.useQueryCache
+ """
+
+ write_disposition = WriteDisposition(
+ 'write_disposition', 'writeDisposition')
+ """See
+ https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.writeDisposition
+ """
+
+ table_definitions = _TypedApiResourceProperty(
+ 'table_definitions', 'tableDefinitions', dict)
+ """
+ Definitions for external tables. A dictionary from table names (strings)
+ to :class:`google.cloud.bigquery.external_config.ExternalConfig`.
+
+ See
+ https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions
+ """
+
+ _maximum_billing_tier = None
+ _maximum_bytes_billed = None
+
+ _NESTED_PROPERTIES = {
+ 'defaultDataset': (
+ DatasetReference.from_api_repr, DatasetReference.to_api_repr),
+ 'destinationTable': (
+ TableReference.from_api_repr, TableReference.to_api_repr),
+ 'maximumBytesBilled': (int, str),
+ 'tableDefinitions': (_from_api_repr_table_defs,
+ _to_api_repr_table_defs),
+ _QUERY_PARAMETERS_KEY: (
+ _from_api_repr_query_parameters, _to_api_repr_query_parameters),
+ _UDF_RESOURCES_KEY: (
+ _from_api_repr_udf_resources, _to_api_repr_udf_resources),
+ }
+
+
+class QueryJob(_AsyncJob):
+ """Asynchronous job: query tables.
+
+ :type job_id: str
+ :param job_id: the job's ID, within the project belonging to ``client``.
+
+ :type query: str
+ :param query: SQL query string
+
+ :type client: :class:`google.cloud.bigquery.client.Client`
+ :param client: A client which holds credentials and project configuration
+ for the dataset (which requires a project).
+
+ :type job_config: :class:`~google.cloud.bigquery.job.QueryJobConfig`
+ :param job_config:
+ (Optional) Extra configuration options for the query job.
+ """
+ _JOB_TYPE = 'query'
+ _UDF_KEY = 'userDefinedFunctionResources'
+
+ def __init__(self, job_id, query, client, job_config=None):
+ super(QueryJob, self).__init__(job_id, client)
+
+ if job_config is None:
+ job_config = QueryJobConfig()
+ if job_config.use_legacy_sql is None:
+ job_config.use_legacy_sql = False
+
+ self.query = query
+ self._configuration = job_config
+ self._query_results = None
+ self._done_timeout = None
+
+ @property
+ def allow_large_results(self):
+ """See
+ :class:`~google.cloud.bigquery.job.QueryJobConfig.allow_large_results`.
+ """
+ return self._configuration.allow_large_results
+
+ @property
+ def create_disposition(self):
+ """See
+ :class:`~google.cloud.bigquery.job.QueryJobConfig.create_disposition`.
+ """
+ return self._configuration.create_disposition
+
+ @property
+ def default_dataset(self):
+ """See
+ :class:`~google.cloud.bigquery.job.QueryJobConfig.default_dataset`.
+ """
+ return self._configuration.default_dataset
+
+ @property
+ def destination(self):
+ """See
+ :class:`~google.cloud.bigquery.job.QueryJobConfig.destination`.
+ """
+ return self._configuration.destination
+
+ @property
+ def dry_run(self):
+ """See
+ :class:`~google.cloud.bigquery.job.QueryJobConfig.dry_run`.
+ """
+ return self._configuration.dry_run
+
+ @property
+ def flatten_results(self):
+ """See
+ :class:`~google.cloud.bigquery.job.QueryJobConfig.flatten_results`.
+ """
+ return self._configuration.flatten_results
+
+ @property
+ def priority(self):
+ """See
+ :class:`~google.cloud.bigquery.job.QueryJobConfig.priority`.
+ """
+ return self._configuration.priority
+
+ @property
+ def query_parameters(self):
+ """See
+ :class:`~google.cloud.bigquery.job.QueryJobConfig.query_parameters`.
+ """
+ return self._configuration.query_parameters
+
+ @property
+ def udf_resources(self):
+ """See
+ :class:`~google.cloud.bigquery.job.QueryJobConfig.udf_resources`.
+ """
+ return self._configuration.udf_resources
+
+ @property
+ def use_legacy_sql(self):
+ """See
+ :class:`~google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`.
+ """
+ return self._configuration.use_legacy_sql
+
+ @property
+ def use_query_cache(self):
+ """See
+ :class:`~google.cloud.bigquery.job.QueryJobConfig.use_query_cache`.
+ """
+ return self._configuration.use_query_cache
+
+ @property
+ def write_disposition(self):
+ """See
+ :class:`~google.cloud.bigquery.job.QueryJobConfig.write_disposition`.
+ """
+ return self._configuration.write_disposition
+
+ @property
+ def maximum_billing_tier(self):
+ """See
+ :class:`~google.cloud.bigquery.job.QueryJobConfig.maximum_billing_tier`.
+ """
+ return self._configuration.maximum_billing_tier
+
+ @property
+ def maximum_bytes_billed(self):
+ """See
+ :class:`~google.cloud.bigquery.job.QueryJobConfig.maximum_bytes_billed`.
+ """
+ return self._configuration.maximum_bytes_billed
+
+ @property
+ def table_definitions(self):
+ """See
+ :class:`~google.cloud.bigquery.job.QueryJobConfig.table_definitions`.
+ """
+ return self._configuration.table_definitions
+
+ def _build_resource(self):
+ """Generate a resource for :meth:`begin`."""
+ configuration = self._configuration.to_api_repr()
+
+ resource = {
+ 'jobReference': {
+ 'projectId': self.project,
+ 'jobId': self.job_id,
+ },
+ 'configuration': {
+ self._JOB_TYPE: configuration,
+ },
+ }
+
+ # The dryRun property only applies to query jobs, but it is defined at
+ # a level higher up. We need to remove it from the query config.
+ if 'dryRun' in configuration:
+ dry_run = configuration['dryRun']
+ del configuration['dryRun']
+ resource['configuration']['dryRun'] = dry_run
+
+ configuration['query'] = self.query
+
+ return resource
+
+ def _scrub_local_properties(self, cleaned):
+ """Helper: handle subclass properties in cleaned.
+
+ .. note:
+
+ This method assumes that the project found in the resource matches
+ the client's project.
+ """
+ configuration = cleaned['configuration']['query']
+ self.query = configuration['query']
+
+ # The dryRun property only applies to query jobs, but it is defined at
+ # a level higher up. We need to copy it to the query config.
+ self._configuration.dry_run = cleaned['configuration'].get('dryRun')
+
+ def _copy_configuration_properties(self, configuration):
+ """Helper: assign subclass configuration properties in cleaned."""
+ # The dryRun property only applies to query jobs, but it is defined at
+ # a level higher up. We need to copy it to the query config.
+ # It should already be correctly set by the _scrub_local_properties()
+ # method.
+ dry_run = self.dry_run
+ self._configuration = QueryJobConfig.from_api_repr(configuration)
+ self._configuration.dry_run = dry_run
+
+ @classmethod
+ def from_api_repr(cls, resource, client):
+ """Factory: construct a job given its API representation
+
+ :type resource: dict
+ :param resource: dataset job representation returned from the API
+
+ :type client: :class:`google.cloud.bigquery.client.Client`
+ :param client: Client which holds credentials and project
+ configuration for the dataset.
+
+ :rtype: :class:`google.cloud.bigquery.job.RunAsyncQueryJob`
+ :returns: Job parsed from ``resource``.
+ """
+ job_id, config = cls._get_resource_config(resource)
+ query = config['query']
+ job = cls(job_id, query, client=client)
+ job._set_properties(resource)
+ return job
+
+ @property
+ def query_plan(self):
+ """Return query plan from job statistics, if present.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.queryPlan
+
+ :rtype: list of :class:`QueryPlanEntry`
+ :returns: mappings describing the query plan, or an empty list
+ if the query has not yet completed.
+ """
+ plan_entries = self._job_statistics().get('queryPlan', ())
+ return [QueryPlanEntry.from_api_repr(entry) for entry in plan_entries]
+
+ @property
+ def total_bytes_processed(self):
+ """Return total bytes processed from job statistics, if present.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.totalBytesProcessed
+
+ :rtype: int or None
+ :returns: total bytes processed by the job, or None if job is not
+ yet complete.
+ """
+ result = self._job_statistics().get('totalBytesProcessed')
+ if result is not None:
+ result = int(result)
+ return result
+
+ @property
+ def total_bytes_billed(self):
+ """Return total bytes billed from job statistics, if present.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.totalBytesBilled
+
+ :rtype: int or None
+ :returns: total bytes processed by the job, or None if job is not
+ yet complete.
+ """
+ result = self._job_statistics().get('totalBytesBilled')
+ if result is not None:
+ result = int(result)
+ return result
+
+ @property
+ def billing_tier(self):
+ """Return billing tier from job statistics, if present.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.billingTier
+
+ :rtype: int or None
+ :returns: billing tier used by the job, or None if job is not
+ yet complete.
+ """
+ return self._job_statistics().get('billingTier')
+
+ @property
+ def cache_hit(self):
+ """Return billing tier from job statistics, if present.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.cacheHit
+
+ :rtype: bool or None
+ :returns: whether the query results were returned from cache, or None
+ if job is not yet complete.
+ """
+ return self._job_statistics().get('cacheHit')
+
+ @property
+ def num_dml_affected_rows(self):
+ """Return total bytes billed from job statistics, if present.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.numDmlAffectedRows
+
+ :rtype: int or None
+ :returns: number of DML rows affectd by the job, or None if job is not
+ yet complete.
+ """
+ result = self._job_statistics().get('numDmlAffectedRows')
+ if result is not None:
+ result = int(result)
+ return result
+
+ @property
+ def statement_type(self):
+ """Return statement type from job statistics, if present.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.statementType
+
+ :rtype: str or None
+ :returns: type of statement used by the job, or None if job is not
+ yet complete.
+ """
+ return self._job_statistics().get('statementType')
+
+ @property
+ def referenced_tables(self):
+ """Return referenced tables from job statistics, if present.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.referencedTables
+
+ :rtype: list of dict
+ :returns: mappings describing the query plan, or an empty list
+ if the query has not yet completed.
+ """
+ tables = []
+ datasets_by_project_name = {}
+
+ for table in self._job_statistics().get('referencedTables', ()):
+
+ t_project = table['projectId']
+
+ ds_id = table['datasetId']
+ t_dataset = datasets_by_project_name.get((t_project, ds_id))
+ if t_dataset is None:
+ t_dataset = DatasetReference(t_project, ds_id)
+ datasets_by_project_name[(t_project, ds_id)] = t_dataset
+
+ t_name = table['tableId']
+ tables.append(t_dataset.table(t_name))
+
+ return tables
+
+ @property
+ def undeclared_query_paramters(self):
+ """Return undeclared query parameters from job statistics, if present.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.undeclaredQueryParamters
+
+ :rtype:
+ list of
+ :class:`~google.cloud.bigquery.query.ArrayQueryParameter`,
+ :class:`~google.cloud.bigquery.query.ScalarQueryParameter`, or
+ :class:`~google.cloud.bigquery.query.StructQueryParameter`
+ :returns: undeclared parameters, or an empty list if the query has
+ not yet completed.
+ """
+ parameters = []
+ undeclared = self._job_statistics().get('undeclaredQueryParamters', ())
+
+ for parameter in undeclared:
+ p_type = parameter['parameterType']
+
+ if 'arrayType' in p_type:
+ klass = ArrayQueryParameter
+ elif 'structTypes' in p_type:
+ klass = StructQueryParameter
+ else:
+ klass = ScalarQueryParameter
+
+ parameters.append(klass.from_api_repr(parameter))
+
+ return parameters
+
+ def query_results(self, retry=DEFAULT_RETRY):
+ """Construct a QueryResults instance, bound to this job.
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the RPC.
+
+ :rtype: :class:`~google.cloud.bigquery.query.QueryResults`
+ :returns: results instance
+ """
+ if not self._query_results:
+ self._query_results = self._client._get_query_results(
+ self.job_id, retry, project=self.project)
+ return self._query_results
+
+ def done(self, retry=DEFAULT_RETRY):
+ """Refresh the job and checks if it is complete.
+
+ :rtype: bool
+ :returns: True if the job is complete, False otherwise.
+ """
+ # Since the API to getQueryResults can hang up to the timeout value
+ # (default of 10 seconds), set the timeout parameter to ensure that
+ # the timeout from the futures API is respected. See:
+ # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4135
+ timeout_ms = None
+ if self._done_timeout is not None:
+ # Subtract a buffer for context switching, network latency, etc.
+ timeout = self._done_timeout - _TIMEOUT_BUFFER_SECS
+ timeout = max(min(timeout, 10), 0)
+ self._done_timeout -= timeout
+ self._done_timeout = max(0, self._done_timeout)
+ timeout_ms = int(timeout * 1000)
+
+ # Do not refresh is the state is already done, as the job will not
+ # change once complete.
+ if self.state != _DONE_STATE:
+ self._query_results = self._client._get_query_results(
+ self.job_id, retry,
+ project=self.project, timeout_ms=timeout_ms)
+
+ # Only reload the job once we know the query is complete.
+ # This will ensure that fields such as the destination table are
+ # correctly populated.
+ if self._query_results.complete:
+ self.reload(retry=retry)
+
+ return self.state == _DONE_STATE
+
+ def _blocking_poll(self, timeout=None):
+ self._done_timeout = timeout
+ super(QueryJob, self)._blocking_poll(timeout=timeout)
+
+ def result(self, timeout=None, retry=DEFAULT_RETRY):
+ """Start the job and wait for it to complete and get the result.
+
+ :type timeout: float
+ :param timeout:
+ How long to wait for job to complete before raising a
+ :class:`TimeoutError`.
+
+ :type retry: :class:`google.api_core.retry.Retry`
+ :param retry: (Optional) How to retry the call that retrieves rows.
+
+ :rtype: :class:`~google.api_core.page_iterator.Iterator`
+ :returns:
+ Iterator of row data :class:`tuple`s. During each page, the
+ iterator will have the ``total_rows`` attribute set, which counts
+ the total number of rows **in the result set** (this is distinct
+ from the total number of rows in the current page:
+ ``iterator.page.num_items``).
+
+ :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the job
+ failed or :class:`TimeoutError` if the job did not complete in the
+ given timeout.
+ """
+ super(QueryJob, self).result(timeout=timeout)
+ # Return an iterator instead of returning the job.
+ schema = self.query_results().schema
+ dest_table = self.destination
+ return self._client.list_rows(dest_table, selected_fields=schema,
+ retry=retry)
+
+
+class QueryPlanEntryStep(object):
+ """Map a single step in a query plan entry.
+
+ :type kind: str
+ :param kind: step type
+
+ :type substeps:
+ :param substeps: names of substeps
+ """
+ def __init__(self, kind, substeps):
+ self.kind = kind
+ self.substeps = list(substeps)
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct instance from the JSON repr.
+
+ :type resource: dict
+ :param resource: JSON representation of the entry
+
+ :rtype: :class:`QueryPlanEntryStep`
+ :return: new instance built from the resource
+ """
+ return cls(
+ kind=resource.get('kind'),
+ substeps=resource.get('substeps', ()),
+ )
+
+ def __eq__(self, other):
+ if not isinstance(other, self.__class__):
+ return NotImplemented
+ return self.kind == other.kind and self.substeps == other.substeps
+
+
+class QueryPlanEntry(object):
+ """Map a single entry in a query plan.
+
+ :type name: str
+ :param name: name of the entry
+
+ :type entry_id: int
+ :param entry_id: ID of the entry
+
+ :type wait_ratio_avg: float
+ :param wait_ratio_avg: average wait ratio
+
+ :type wait_ratio_max: float
+ :param wait_ratio_avg: maximum wait ratio
+
+ :type read_ratio_avg: float
+ :param read_ratio_avg: average read ratio
+
+ :type read_ratio_max: float
+ :param read_ratio_avg: maximum read ratio
+
+ :type copute_ratio_avg: float
+ :param copute_ratio_avg: average copute ratio
+
+ :type copute_ratio_max: float
+ :param copute_ratio_avg: maximum copute ratio
+
+ :type write_ratio_avg: float
+ :param write_ratio_avg: average write ratio
+
+ :type write_ratio_max: float
+ :param write_ratio_avg: maximum write ratio
+
+ :type records_read: int
+ :param records_read: number of records read
+
+ :type records_written: int
+ :param records_written: number of records written
+
+ :type status: str
+ :param status: entry status
+
+ :type steps: List(QueryPlanEntryStep)
+ :param steps: steps in the entry
+ """
+ def __init__(self,
+ name,
+ entry_id,
+ wait_ratio_avg,
+ wait_ratio_max,
+ read_ratio_avg,
+ read_ratio_max,
+ compute_ratio_avg,
+ compute_ratio_max,
+ write_ratio_avg,
+ write_ratio_max,
+ records_read,
+ records_written,
+ status,
+ steps):
+ self.name = name
+ self.entry_id = entry_id
+ self.wait_ratio_avg = wait_ratio_avg
+ self.wait_ratio_max = wait_ratio_max
+ self.read_ratio_avg = read_ratio_avg
+ self.read_ratio_max = read_ratio_max
+ self.compute_ratio_avg = compute_ratio_avg
+ self.compute_ratio_max = compute_ratio_max
+ self.write_ratio_avg = write_ratio_avg
+ self.write_ratio_max = write_ratio_max
+ self.records_read = records_read
+ self.records_written = records_written
+ self.status = status
+ self.steps = steps
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct instance from the JSON repr.
+
+ :type resource: dict
+ :param resource: JSON representation of the entry
+
+ :rtype: :class:`QueryPlanEntry`
+ :return: new instance built from the resource
+ """
+ records_read = resource.get('recordsRead')
+ if records_read is not None:
+ records_read = int(records_read)
+
+ records_written = resource.get('recordsWritten')
+ if records_written is not None:
+ records_written = int(records_written)
+
+ return cls(
+ name=resource.get('name'),
+ entry_id=resource.get('id'),
+ wait_ratio_avg=resource.get('waitRatioAvg'),
+ wait_ratio_max=resource.get('waitRatioMax'),
+ read_ratio_avg=resource.get('readRatioAvg'),
+ read_ratio_max=resource.get('readRatioMax'),
+ compute_ratio_avg=resource.get('computeRatioAvg'),
+ compute_ratio_max=resource.get('computeRatioMax'),
+ write_ratio_avg=resource.get('writeRatioAvg'),
+ write_ratio_max=resource.get('writeRatioMax'),
+ records_read=records_read,
+ records_written=records_written,
+ status=resource.get('status'),
+ steps=[QueryPlanEntryStep.from_api_repr(step)
+ for step in resource.get('steps', ())],
+ )
diff --git a/bigquery/google/cloud/bigquery/query.py b/bigquery/google/cloud/bigquery/query.py
new file mode 100644
index 0000000..9577fa5
--- /dev/null
+++ b/bigquery/google/cloud/bigquery/query.py
@@ -0,0 +1,633 @@
+# Copyright 2015 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""BigQuery query processing."""
+
+from collections import OrderedDict
+import copy
+
+from google.cloud.bigquery.table import _parse_schema_resource
+from google.cloud.bigquery._helpers import _rows_from_json
+from google.cloud.bigquery._helpers import _QUERY_PARAMS_FROM_JSON
+from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_PARAM
+
+
+class UDFResource(object):
+ """Describe a single user-defined function (UDF) resource.
+
+ :type udf_type: str
+ :param udf_type: the type of the resource ('inlineCode' or 'resourceUri')
+
+ :type value: str
+ :param value: the inline code or resource URI.
+
+ See
+ https://cloud.google.com/bigquery/user-defined-functions#api
+ """
+ def __init__(self, udf_type, value):
+ self.udf_type = udf_type
+ self.value = value
+
+ def __eq__(self, other):
+ if not isinstance(other, UDFResource):
+ return NotImplemented
+ return(
+ self.udf_type == other.udf_type and
+ self.value == other.value)
+
+ def __ne__(self, other):
+ return not self == other
+
+
+class _AbstractQueryParameter(object):
+ """Base class for named / positional query parameters.
+ """
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct parameter from JSON resource.
+
+ :type resource: dict
+ :param resource: JSON mapping of parameter
+
+ :rtype: :class:`ScalarQueryParameter`
+ """
+ raise NotImplementedError
+
+ def to_api_repr(self):
+ """Construct JSON API representation for the parameter.
+
+ :rtype: dict
+ """
+ raise NotImplementedError
+
+
+class ScalarQueryParameter(_AbstractQueryParameter):
+ """Named / positional query parameters for scalar values.
+
+ :type name: str or None
+ :param name: Parameter name, used via ``@foo`` syntax. If None, the
+ parameter can only be addressed via position (``?``).
+
+ :type type_: str
+ :param type_: name of parameter type. One of 'STRING', 'INT64',
+ 'FLOAT64', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'.
+
+ :type value: str, int, float, bool, :class:`datetime.datetime`, or
+ :class:`datetime.date`.
+ :param value: the scalar parameter value.
+ """
+ def __init__(self, name, type_, value):
+ self.name = name
+ self.type_ = type_
+ self.value = value
+
+ @classmethod
+ def positional(cls, type_, value):
+ """Factory for positional paramater.
+
+ :type type_: str
+ :param type_:
+ name of parameter type. One of 'STRING', 'INT64',
+ 'FLOAT64', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'.
+
+ :type value: str, int, float, bool, :class:`datetime.datetime`, or
+ :class:`datetime.date`.
+ :param value: the scalar parameter value.
+
+ :rtype: :class:`ScalarQueryParameter`
+ :returns: instance without name
+ """
+ return cls(None, type_, value)
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct parameter from JSON resource.
+
+ :type resource: dict
+ :param resource: JSON mapping of parameter
+
+ :rtype: :class:`ScalarQueryParameter`
+ :returns: instance
+ """
+ name = resource.get('name')
+ type_ = resource['parameterType']['type']
+ value = resource['parameterValue']['value']
+ converted = _QUERY_PARAMS_FROM_JSON[type_](value, None)
+ return cls(name, type_, converted)
+
+ def to_api_repr(self):
+ """Construct JSON API representation for the parameter.
+
+ :rtype: dict
+ :returns: JSON mapping
+ """
+ value = self.value
+ converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_)
+ if converter is not None:
+ value = converter(value)
+ resource = {
+ 'parameterType': {
+ 'type': self.type_,
+ },
+ 'parameterValue': {
+ 'value': value,
+ },
+ }
+ if self.name is not None:
+ resource['name'] = self.name
+ return resource
+
+ def _key(self):
+ """A tuple key that uniquely describes this field.
+
+ Used to compute this instance's hashcode and evaluate equality.
+
+ Returns:
+ tuple: The contents of this :class:`ScalarQueryParameter`.
+ """
+ return (
+ self.name,
+ self.type_.upper(),
+ self.value,
+ )
+
+ def __eq__(self, other):
+ if not isinstance(other, ScalarQueryParameter):
+ return NotImplemented
+ return self._key() == other._key()
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __repr__(self):
+ return 'ScalarQueryParameter{}'.format(self._key())
+
+
+class ArrayQueryParameter(_AbstractQueryParameter):
+ """Named / positional query parameters for array values.
+
+ :type name: str or None
+ :param name: Parameter name, used via ``@foo`` syntax. If None, the
+ parameter can only be addressed via position (``?``).
+
+ :type array_type: str
+ :param array_type:
+ name of type of array elements. One of `'STRING'`, `'INT64'`,
+ `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`.
+
+ :type values: list of appropriate scalar type.
+ :param values: the parameter array values.
+ """
+ def __init__(self, name, array_type, values):
+ self.name = name
+ self.array_type = array_type
+ self.values = values
+
+ @classmethod
+ def positional(cls, array_type, values):
+ """Factory for positional parameters.
+
+ :type array_type: str
+ :param array_type:
+ name of type of array elements. One of `'STRING'`, `'INT64'`,
+ `'FLOAT64'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`.
+
+ :type values: list of appropriate scalar type
+ :param values: the parameter array values.
+
+ :rtype: :class:`ArrayQueryParameter`
+ :returns: instance without name
+ """
+ return cls(None, array_type, values)
+
+ @classmethod
+ def _from_api_repr_struct(cls, resource):
+ name = resource.get('name')
+ converted = []
+ # We need to flatten the array to use the StructQueryParameter
+ # parse code.
+ resource_template = {
+ # The arrayType includes all the types of the fields of the STRUCT
+ 'parameterType': resource['parameterType']['arrayType']
+ }
+ for array_value in resource['parameterValue']['arrayValues']:
+ struct_resource = copy.deepcopy(resource_template)
+ struct_resource['parameterValue'] = array_value
+ struct_value = StructQueryParameter.from_api_repr(struct_resource)
+ converted.append(struct_value)
+ return cls(name, 'STRUCT', converted)
+
+ @classmethod
+ def _from_api_repr_scalar(cls, resource):
+ name = resource.get('name')
+ array_type = resource['parameterType']['arrayType']['type']
+ values = [
+ value['value']
+ for value
+ in resource['parameterValue']['arrayValues']]
+ converted = [
+ _QUERY_PARAMS_FROM_JSON[array_type](value, None)
+ for value in values
+ ]
+ return cls(name, array_type, converted)
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct parameter from JSON resource.
+
+ :type resource: dict
+ :param resource: JSON mapping of parameter
+
+ :rtype: :class:`ArrayQueryParameter`
+ :returns: instance
+ """
+ array_type = resource['parameterType']['arrayType']['type']
+ if array_type == 'STRUCT':
+ return cls._from_api_repr_struct(resource)
+ return cls._from_api_repr_scalar(resource)
+
+ def to_api_repr(self):
+ """Construct JSON API representation for the parameter.
+
+ :rtype: dict
+ :returns: JSON mapping
+ """
+ values = self.values
+ if self.array_type == 'RECORD' or self.array_type == 'STRUCT':
+ reprs = [value.to_api_repr() for value in values]
+ a_type = reprs[0]['parameterType']
+ a_values = [repr_['parameterValue'] for repr_ in reprs]
+ else:
+ a_type = {'type': self.array_type}
+ converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.array_type)
+ if converter is not None:
+ values = [converter(value) for value in values]
+ a_values = [{'value': value} for value in values]
+ resource = {
+ 'parameterType': {
+ 'type': 'ARRAY',
+ 'arrayType': a_type,
+ },
+ 'parameterValue': {
+ 'arrayValues': a_values,
+ },
+ }
+ if self.name is not None:
+ resource['name'] = self.name
+ return resource
+
+ def _key(self):
+ """A tuple key that uniquely describes this field.
+
+ Used to compute this instance's hashcode and evaluate equality.
+
+ Returns:
+ tuple: The contents of this :class:`ArrayQueryParameter`.
+ """
+ return (
+ self.name,
+ self.array_type.upper(),
+ self.values,
+ )
+
+ def __eq__(self, other):
+ if not isinstance(other, ArrayQueryParameter):
+ return NotImplemented
+ return self._key() == other._key()
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __repr__(self):
+ return 'ArrayQueryParameter{}'.format(self._key())
+
+
+class StructQueryParameter(_AbstractQueryParameter):
+ """Named / positional query parameters for struct values.
+
+ :type name: str or None
+ :param name: Parameter name, used via ``@foo`` syntax. If None, the
+ parameter can only be addressed via position (``?``).
+
+ :type sub_params: tuple of :class:`ScalarQueryParameter`
+ :param sub_params: the sub-parameters for the struct
+ """
+ def __init__(self, name, *sub_params):
+ self.name = name
+ types = self.struct_types = OrderedDict()
+ values = self.struct_values = {}
+ for sub in sub_params:
+ if isinstance(sub, self.__class__):
+ types[sub.name] = 'STRUCT'
+ values[sub.name] = sub
+ elif isinstance(sub, ArrayQueryParameter):
+ types[sub.name] = 'ARRAY'
+ values[sub.name] = sub
+ else:
+ types[sub.name] = sub.type_
+ values[sub.name] = sub.value
+
+ @classmethod
+ def positional(cls, *sub_params):
+ """Factory for positional parameters.
+
+ :type sub_params: tuple of :class:`ScalarQueryParameter`
+ :param sub_params: the sub-parameters for the struct
+
+ :rtype: :class:`StructQueryParameter`
+ :returns: instance without name
+ """
+ return cls(None, *sub_params)
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct parameter from JSON resource.
+
+ :type resource: dict
+ :param resource: JSON mapping of parameter
+
+ :rtype: :class:`StructQueryParameter`
+ :returns: instance
+ """
+ name = resource.get('name')
+ instance = cls(name)
+ type_resources = {}
+ types = instance.struct_types
+ for item in resource['parameterType']['structTypes']:
+ types[item['name']] = item['type']['type']
+ type_resources[item['name']] = item['type']
+ struct_values = resource['parameterValue']['structValues']
+ for key, value in struct_values.items():
+ type_ = types[key]
+ converted = None
+ if type_ == 'STRUCT':
+ struct_resource = {
+ 'name': key,
+ 'parameterType': type_resources[key],
+ 'parameterValue': value,
+ }
+ converted = StructQueryParameter.from_api_repr(struct_resource)
+ elif type_ == 'ARRAY':
+ struct_resource = {
+ 'name': key,
+ 'parameterType': type_resources[key],
+ 'parameterValue': value,
+ }
+ converted = ArrayQueryParameter.from_api_repr(struct_resource)
+ else:
+ value = value['value']
+ converted = _QUERY_PARAMS_FROM_JSON[type_](value, None)
+ instance.struct_values[key] = converted
+ return instance
+
+ def to_api_repr(self):
+ """Construct JSON API representation for the parameter.
+
+ :rtype: dict
+ :returns: JSON mapping
+ """
+ s_types = {}
+ values = {}
+ for name, value in self.struct_values.items():
+ type_ = self.struct_types[name]
+ if type_ in ('STRUCT', 'ARRAY'):
+ repr_ = value.to_api_repr()
+ s_types[name] = {'name': name, 'type': repr_['parameterType']}
+ values[name] = repr_['parameterValue']
+ else:
+ s_types[name] = {'name': name, 'type': {'type': type_}}
+ converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_)
+ if converter is not None:
+ value = converter(value)
+ values[name] = {'value': value}
+
+ resource = {
+ 'parameterType': {
+ 'type': 'STRUCT',
+ 'structTypes': [s_types[key] for key in self.struct_types],
+ },
+ 'parameterValue': {
+ 'structValues': values,
+ },
+ }
+ if self.name is not None:
+ resource['name'] = self.name
+ return resource
+
+ def _key(self):
+ """A tuple key that uniquely describes this field.
+
+ Used to compute this instance's hashcode and evaluate equality.
+
+ Returns:
+ tuple: The contents of this :class:`ArrayQueryParameter`.
+ """
+ return (
+ self.name,
+ self.struct_types,
+ self.struct_values,
+ )
+
+ def __eq__(self, other):
+ if not isinstance(other, StructQueryParameter):
+ return NotImplemented
+ return self._key() == other._key()
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __repr__(self):
+ return 'StructQueryParameter{}'.format(self._key())
+
+
+class QueryResults(object):
+ """Results of a query.
+
+ See:
+ https://g.co/cloud/bigquery/docs/reference/rest/v2/jobs/getQueryResults
+ """
+
+ def __init__(self, properties):
+ self._properties = {}
+ self._set_properties(properties)
+
+ @classmethod
+ def from_api_repr(cls, api_response):
+ return cls(api_response)
+
+ @property
+ def project(self):
+ """Project bound to the query job.
+
+ :rtype: str
+ :returns: the project that the query job is associated with.
+ """
+ return self._properties.get('jobReference', {}).get('projectId')
+
+ @property
+ def cache_hit(self):
+ """Query results served from cache.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#cacheHit
+
+ :rtype: bool or ``NoneType``
+ :returns: True if the query results were served from cache (None
+ until set by the server).
+ """
+ return self._properties.get('cacheHit')
+
+ @property
+ def complete(self):
+ """Server completed query.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#jobComplete
+
+ :rtype: bool or ``NoneType``
+ :returns: True if the query completed on the server (None
+ until set by the server).
+ """
+ return self._properties.get('jobComplete')
+
+ @property
+ def errors(self):
+ """Errors generated by the query.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#errors
+
+ :rtype: list of mapping, or ``NoneType``
+ :returns: Mappings describing errors generated on the server (None
+ until set by the server).
+ """
+ return self._properties.get('errors')
+
+ @property
+ def job_id(self):
+ """Job ID of the query job these results are from.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#jobReference
+
+ :rtype: string
+ :returns: Job ID of the query job.
+ """
+ return self._properties.get('jobReference', {}).get('jobId')
+
+ @property
+ def page_token(self):
+ """Token for fetching next bach of results.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#pageToken
+
+ :rtype: str, or ``NoneType``
+ :returns: Token generated on the server (None until set by the server).
+ """
+ return self._properties.get('pageToken')
+
+ @property
+ def total_rows(self):
+ """Total number of rows returned by the query.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#totalRows
+
+ :rtype: int, or ``NoneType``
+ :returns: Count generated on the server (None until set by the server).
+ """
+ total_rows = self._properties.get('totalRows')
+ if total_rows is not None:
+ return int(total_rows)
+
+ @property
+ def total_bytes_processed(self):
+ """Total number of bytes processed by the query.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#totalBytesProcessed
+
+ :rtype: int, or ``NoneType``
+ :returns: Count generated on the server (None until set by the server).
+ """
+ total_bytes_processed = self._properties.get('totalBytesProcessed')
+ if total_bytes_processed is not None:
+ return int(total_bytes_processed)
+
+ @property
+ def num_dml_affected_rows(self):
+ """Total number of rows affected by a DML query.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#numDmlAffectedRows
+
+ :rtype: int, or ``NoneType``
+ :returns: Count generated on the server (None until set by the server).
+ """
+ num_dml_affected_rows = self._properties.get('numDmlAffectedRows')
+ if num_dml_affected_rows is not None:
+ return int(num_dml_affected_rows)
+
+ @property
+ def rows(self):
+ """Query results.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#rows
+
+ :rtype: list of :class:`~google.cloud.bigquery.Row`
+ :returns: fields describing the schema (None until set by the server).
+ """
+ return _rows_from_json(self._properties.get('rows', ()), self.schema)
+
+ @property
+ def schema(self):
+ """Schema for query results.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#schema
+
+ :rtype: list of :class:`SchemaField`, or ``NoneType``
+ :returns: fields describing the schema (None until set by the server).
+ """
+ return _parse_schema_resource(self._properties.get('schema', {}))
+
+ def _set_properties(self, api_response):
+ """Update properties from resource in body of ``api_response``
+
+ :type api_response: dict
+ :param api_response: response returned from an API call
+ """
+ job_id_present = (
+ 'jobReference' in api_response
+ and 'jobId' in api_response['jobReference']
+ and 'projectId' in api_response['jobReference'])
+ if not job_id_present:
+ raise ValueError('QueryResult requires a job reference')
+
+ self._properties.clear()
+ self._properties.update(copy.deepcopy(api_response))
+
+
+def _query_param_from_api_repr(resource):
+ """Helper: construct concrete query parameter from JSON resource."""
+ qp_type = resource['parameterType']
+ if 'arrayType' in qp_type:
+ klass = ArrayQueryParameter
+ elif 'structTypes' in qp_type:
+ klass = StructQueryParameter
+ else:
+ klass = ScalarQueryParameter
+ return klass.from_api_repr(resource)
diff --git a/bigquery/google/cloud/bigquery/schema.py b/bigquery/google/cloud/bigquery/schema.py
new file mode 100644
index 0000000..1aa9527
--- /dev/null
+++ b/bigquery/google/cloud/bigquery/schema.py
@@ -0,0 +1,203 @@
+# Copyright 2015 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Schemas for BigQuery tables / queries."""
+
+
+class SchemaField(object):
+ """Describe a single field within a table schema.
+
+ :type name: str
+ :param name: the name of the field.
+
+ :type field_type: str
+ :param field_type: the type of the field (one of 'STRING', 'INTEGER',
+ 'FLOAT', 'BOOLEAN', 'TIMESTAMP' or 'RECORD').
+
+ :type mode: str
+ :param mode: the mode of the field (one of 'NULLABLE', 'REQUIRED',
+ or 'REPEATED').
+
+ :type description: str
+ :param description: optional description for the field.
+
+ :type fields: tuple of :class:`SchemaField`
+ :param fields: subfields (requires ``field_type`` of 'RECORD').
+ """
+ def __init__(self, name, field_type, mode='NULLABLE',
+ description=None, fields=()):
+ self._name = name
+ self._field_type = field_type
+ self._mode = mode
+ self._description = description
+ self._fields = tuple(fields)
+
+ @classmethod
+ def from_api_repr(cls, api_repr):
+ """Return a ``SchemaField`` object deserialized from a dictionary.
+
+ Args:
+ api_repr (Mapping[str, str]): The serialized representation
+ of the SchemaField, such as what is output by
+ :meth:`to_api_repr`.
+
+ Returns:
+ SchemaField: The ``SchemaField`` object.
+ """
+ return cls(
+ field_type=api_repr['type'].upper(),
+ fields=[cls.from_api_repr(f) for f in api_repr.get('fields', ())],
+ mode=api_repr['mode'].upper(),
+ name=api_repr['name'],
+ )
+
+ @property
+ def name(self):
+ """str: The name of the field."""
+ return self._name
+
+ @property
+ def field_type(self):
+ """str: The type of the field.
+
+ Will be one of 'STRING', 'INTEGER', 'FLOAT', 'BOOLEAN',
+ 'TIMESTAMP' or 'RECORD'.
+ """
+ return self._field_type
+
+ @property
+ def mode(self):
+ """str: The mode of the field.
+
+ Will be one of 'NULLABLE', 'REQUIRED', or 'REPEATED'.
+ """
+ return self._mode
+
+ @property
+ def is_nullable(self):
+ """Check whether 'mode' is 'nullable'."""
+ return self._mode == 'NULLABLE'
+
+ @property
+ def description(self):
+ """Optional[str]: Description for the field."""
+ return self._description
+
+ @property
+ def fields(self):
+ """tuple: Subfields contained in this field.
+
+ If ``field_type`` is not 'RECORD', this property must be
+ empty / unset.
+ """
+ return self._fields
+
+ def to_api_repr(self):
+ """Return a dictionary representing this schema field.
+
+ Returns:
+ dict: A dictionary representing the SchemaField in a serialized
+ form.
+ """
+ # Put together the basic representation. See http://bit.ly/2hOAT5u.
+ answer = {
+ 'mode': self.mode.lower(),
+ 'name': self.name,
+ 'type': self.field_type.lower(),
+ }
+
+ # If this is a RECORD type, then sub-fields are also included,
+ # add this to the serialized representation.
+ if self.field_type.upper() == 'RECORD':
+ answer['fields'] = [f.to_api_repr() for f in self.fields]
+
+ # Done; return the serialized dictionary.
+ return answer
+
+ def _key(self):
+ """A tuple key that uniquely describes this field.
+
+ Used to compute this instance's hashcode and evaluate equality.
+
+ Returns:
+ tuple: The contents of this :class:`SchemaField`.
+ """
+ return (
+ self._name,
+ self._field_type.lower(),
+ self._mode,
+ self._description,
+ self._fields,
+ )
+
+ def __eq__(self, other):
+ if not isinstance(other, SchemaField):
+ return NotImplemented
+ return self._key() == other._key()
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __hash__(self):
+ return hash(self._key())
+
+ def __repr__(self):
+ return 'SchemaField{}'.format(self._key())
+
+
+def _parse_schema_resource(info):
+ """Parse a resource fragment into a schema field.
+
+ :type info: mapping
+ :param info: should contain a "fields" key to be parsed
+
+ :rtype: list of :class:`SchemaField`, or ``NoneType``
+ :returns: a list of parsed fields, or ``None`` if no "fields" key is
+ present in ``info``.
+ """
+ if 'fields' not in info:
+ return ()
+
+ schema = []
+ for r_field in info['fields']:
+ name = r_field['name']
+ field_type = r_field['type']
+ mode = r_field.get('mode', 'NULLABLE')
+ description = r_field.get('description')
+ sub_fields = _parse_schema_resource(r_field)
+ schema.append(
+ SchemaField(name, field_type, mode, description, sub_fields))
+ return schema
+
+
+def _build_schema_resource(fields):
+ """Generate a resource fragment for a schema.
+
+ :type fields: sequence of :class:`SchemaField`
+ :param fields: schema to be dumped
+
+ :rtype: mapping
+ :returns: a mapping describing the schema of the supplied fields.
+ """
+ infos = []
+ for field in fields:
+ info = {'name': field.name,
+ 'type': field.field_type,
+ 'mode': field.mode}
+ if field.description is not None:
+ info['description'] = field.description
+ if field.fields:
+ info['fields'] = _build_schema_resource(field.fields)
+ infos.append(info)
+ return infos
diff --git a/bigquery/google/cloud/bigquery/table.py b/bigquery/google/cloud/bigquery/table.py
new file mode 100644
index 0000000..2b9dea0
--- /dev/null
+++ b/bigquery/google/cloud/bigquery/table.py
@@ -0,0 +1,760 @@
+# Copyright 2015 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Define API Datasets."""
+
+from __future__ import absolute_import
+
+import datetime
+
+import six
+
+from google.cloud._helpers import _datetime_from_microseconds
+from google.cloud._helpers import _millis_from_datetime
+from google.cloud.bigquery._helpers import _snake_to_camel_case
+from google.cloud.bigquery.schema import SchemaField
+from google.cloud.bigquery.schema import _build_schema_resource
+from google.cloud.bigquery.schema import _parse_schema_resource
+from google.cloud.bigquery.external_config import ExternalConfig
+
+
+_TABLE_HAS_NO_SCHEMA = "Table has no schema: call 'client.get_table()'"
+_MARKER = object()
+
+
+class TableReference(object):
+ """TableReferences are pointers to tables.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables
+
+ :type dataset_ref: :class:`google.cloud.bigquery.dataset.DatasetReference`
+ :param dataset_ref: a pointer to the dataset
+
+ :type table_id: str
+ :param table_id: the ID of the table
+ """
+
+ def __init__(self, dataset_ref, table_id):
+ self._project = dataset_ref.project
+ self._dataset_id = dataset_ref.dataset_id
+ self._table_id = table_id
+
+ @property
+ def project(self):
+ """Project bound to the table.
+
+ :rtype: str
+ :returns: the project (derived from the dataset reference).
+ """
+ return self._project
+
+ @property
+ def dataset_id(self):
+ """ID of dataset containing the table.
+
+ :rtype: str
+ :returns: the ID (derived from the dataset reference).
+ """
+ return self._dataset_id
+
+ @property
+ def table_id(self):
+ """Table ID.
+
+ :rtype: str
+ :returns: the table ID.
+ """
+ return self._table_id
+
+ @property
+ def path(self):
+ """URL path for the table's APIs.
+
+ :rtype: str
+ :returns: the path based on project, dataset and table IDs.
+ """
+ return '/projects/%s/datasets/%s/tables/%s' % (
+ self._project, self._dataset_id, self._table_id)
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct a table reference given its API representation
+
+ :type resource: dict
+ :param resource: table reference representation returned from the API
+
+ :rtype: :class:`google.cloud.bigquery.table.TableReference`
+ :returns: Table reference parsed from ``resource``.
+ """
+ from google.cloud.bigquery.dataset import DatasetReference
+
+ project = resource['projectId']
+ dataset_id = resource['datasetId']
+ table_id = resource['tableId']
+ return cls(DatasetReference(project, dataset_id), table_id)
+
+ def to_api_repr(self):
+ """Construct the API resource representation of this table reference.
+
+ :rtype: dict
+ :returns: Table reference as represented as an API resource
+ """
+ return {
+ 'projectId': self._project,
+ 'datasetId': self._dataset_id,
+ 'tableId': self._table_id,
+ }
+
+ def _key(self):
+ """A tuple key that uniquely describes this field.
+
+ Used to compute this instance's hashcode and evaluate equality.
+
+ Returns:
+ tuple: The contents of this :class:`DatasetReference`.
+ """
+ return (
+ self._project,
+ self._dataset_id,
+ self._table_id,
+ )
+
+ def __eq__(self, other):
+ if not isinstance(other, TableReference):
+ return NotImplemented
+ return self._key() == other._key()
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __hash__(self):
+ return hash(self._key())
+
+ def __repr__(self):
+ return 'TableReference{}'.format(self._key())
+
+
+class Table(object):
+ """Tables represent a set of rows whose values correspond to a schema.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables
+
+ :type table_ref: :class:`google.cloud.bigquery.table.TableReference`
+ :param table_ref: a pointer to a table
+
+ :type schema: list of :class:`SchemaField`
+ :param schema: The table's schema
+ """
+
+ _schema = None
+
+ all_fields = [
+ 'description', 'friendly_name', 'expires', 'location',
+ 'partitioning_type', 'view_use_legacy_sql', 'view_query', 'schema',
+ 'external_data_configuration', 'labels',
+ ]
+
+ def __init__(self, table_ref, schema=()):
+ self._project = table_ref.project
+ self._table_id = table_ref.table_id
+ self._dataset_id = table_ref.dataset_id
+ self._external_config = None
+ self._properties = {'labels': {}}
+ # Let the @property do validation.
+ self.schema = schema
+
+ @property
+ def project(self):
+ """Project bound to the table.
+
+ :rtype: str
+ :returns: the project (derived from the dataset).
+ """
+ return self._project
+
+ @property
+ def dataset_id(self):
+ """ID of dataset containing the table.
+
+ :rtype: str
+ :returns: the ID (derived from the dataset).
+ """
+ return self._dataset_id
+
+ @property
+ def table_id(self):
+ """ID of the table.
+
+ :rtype: str
+ :returns: the table ID.
+ """
+ return self._table_id
+
+ @property
+ def path(self):
+ """URL path for the table's APIs.
+
+ :rtype: str
+ :returns: the path based on project, dataset and table IDs.
+ """
+ return '/projects/%s/datasets/%s/tables/%s' % (
+ self._project, self._dataset_id, self._table_id)
+
+ @property
+ def schema(self):
+ """Table's schema.
+
+ :rtype: list of :class:`SchemaField`
+ :returns: fields describing the schema
+ """
+ return list(self._schema)
+
+ @schema.setter
+ def schema(self, value):
+ """Update table's schema
+
+ :type value: list of :class:`SchemaField`
+ :param value: fields describing the schema
+
+ :raises: TypeError if 'value' is not a sequence, or ValueError if
+ any item in the sequence is not a SchemaField
+ """
+ if value is None:
+ self._schema = ()
+ elif not all(isinstance(field, SchemaField) for field in value):
+ raise ValueError('Schema items must be fields')
+ else:
+ self._schema = tuple(value)
+
+ @property
+ def labels(self):
+ """Labels for the table.
+
+ This method always returns a dict. To change a table's labels,
+ modify the dict, then call ``Client.update_table``. To delete a
+ label, set its value to ``None`` before updating.
+
+ :rtype: dict, {str -> str}
+ :returns: A dict of the the table's labels.
+ """
+ return self._properties['labels']
+
+ @labels.setter
+ def labels(self, value):
+ """Update labels for the table.
+
+ :type value: dict, {str -> str}
+ :param value: new labels
+
+ :raises: ValueError for invalid value types.
+ """
+ if not isinstance(value, dict):
+ raise ValueError("Pass a dict")
+ self._properties['labels'] = value
+
+ @property
+ def created(self):
+ """Datetime at which the table was created.
+
+ :rtype: ``datetime.datetime``, or ``NoneType``
+ :returns: the creation time (None until set from the server).
+ """
+ creation_time = self._properties.get('creationTime')
+ if creation_time is not None:
+ # creation_time will be in milliseconds.
+ return _datetime_from_microseconds(1000.0 * creation_time)
+
+ @property
+ def etag(self):
+ """ETag for the table resource.
+
+ :rtype: str, or ``NoneType``
+ :returns: the ETag (None until set from the server).
+ """
+ return self._properties.get('etag')
+
+ @property
+ def modified(self):
+ """Datetime at which the table was last modified.
+
+ :rtype: ``datetime.datetime``, or ``NoneType``
+ :returns: the modification time (None until set from the server).
+ """
+ modified_time = self._properties.get('lastModifiedTime')
+ if modified_time is not None:
+ # modified_time will be in milliseconds.
+ return _datetime_from_microseconds(1000.0 * modified_time)
+
+ @property
+ def num_bytes(self):
+ """The size of the table in bytes.
+
+ :rtype: int, or ``NoneType``
+ :returns: the byte count (None until set from the server).
+ """
+ num_bytes_as_str = self._properties.get('numBytes')
+ if num_bytes_as_str is not None:
+ return int(num_bytes_as_str)
+
+ @property
+ def num_rows(self):
+ """The number of rows in the table.
+
+ :rtype: int, or ``NoneType``
+ :returns: the row count (None until set from the server).
+ """
+ num_rows_as_str = self._properties.get('numRows')
+ if num_rows_as_str is not None:
+ return int(num_rows_as_str)
+
+ @property
+ def self_link(self):
+ """URL for the table resource.
+
+ :rtype: str, or ``NoneType``
+ :returns: the URL (None until set from the server).
+ """
+ return self._properties.get('selfLink')
+
+ @property
+ def full_table_id(self):
+ """ID for the table, in the form ``project_id:dataset_id:table_id``.
+
+ :rtype: str, or ``NoneType``
+ :returns: the full ID (None until set from the server).
+ """
+ return self._properties.get('id')
+
+ @property
+ def table_type(self):
+ """The type of the table.
+
+ Possible values are "TABLE", "VIEW", or "EXTERNAL".
+
+ :rtype: str, or ``NoneType``
+ :returns: the URL (None until set from the server).
+ """
+ return self._properties.get('type')
+
+ @property
+ def partitioning_type(self):
+ """Time partitioning of the table.
+ :rtype: str, or ``NoneType``
+ :returns: Returns type if the table is partitioned, None otherwise.
+ """
+ return self._properties.get('timePartitioning', {}).get('type')
+
+ @partitioning_type.setter
+ def partitioning_type(self, value):
+ """Update the partitioning type of the table
+
+ :type value: str
+ :param value: partitioning type only "DAY" is currently supported
+ """
+ if value not in ('DAY', None):
+ raise ValueError("value must be one of ['DAY', None]")
+
+ if value is None:
+ self._properties.pop('timePartitioning', None)
+ else:
+ time_part = self._properties.setdefault('timePartitioning', {})
+ time_part['type'] = value.upper()
+
+ @property
+ def partition_expiration(self):
+ """Expiration time in ms for a partition
+ :rtype: int, or ``NoneType``
+ :returns: Returns the time in ms for partition expiration
+ """
+ return self._properties.get('timePartitioning', {}).get('expirationMs')
+
+ @partition_expiration.setter
+ def partition_expiration(self, value):
+ """Update the experation time in ms for a partition
+
+ :type value: int
+ :param value: partition experiation time in ms
+ """
+ if not isinstance(value, (int, type(None))):
+ raise ValueError(
+ "must be an integer representing millisseconds or None")
+
+ if value is None:
+ if 'timePartitioning' in self._properties:
+ self._properties['timePartitioning'].pop('expirationMs')
+ else:
+ try:
+ self._properties['timePartitioning']['expirationMs'] = value
+ except KeyError:
+ self._properties['timePartitioning'] = {'type': 'DAY'}
+ self._properties['timePartitioning']['expirationMs'] = value
+
+ @property
+ def description(self):
+ """Description of the table.
+
+ :rtype: str, or ``NoneType``
+ :returns: The description as set by the user, or None (the default).
+ """
+ return self._properties.get('description')
+
+ @description.setter
+ def description(self, value):
+ """Update description of the table.
+
+ :type value: str
+ :param value: (Optional) new description
+
+ :raises: ValueError for invalid value types.
+ """
+ if not isinstance(value, six.string_types) and value is not None:
+ raise ValueError("Pass a string, or None")
+ self._properties['description'] = value
+
+ @property
+ def expires(self):
+ """Datetime at which the table will be removed.
+
+ :rtype: ``datetime.datetime``, or ``NoneType``
+ :returns: the expiration time, or None
+ """
+ expiration_time = self._properties.get('expirationTime')
+ if expiration_time is not None:
+ # expiration_time will be in milliseconds.
+ return _datetime_from_microseconds(1000.0 * expiration_time)
+
+ @expires.setter
+ def expires(self, value):
+ """Update datetime at which the table will be removed.
+
+ :type value: ``datetime.datetime``
+ :param value: (Optional) the new expiration time, or None
+ """
+ if not isinstance(value, datetime.datetime) and value is not None:
+ raise ValueError("Pass a datetime, or None")
+ self._properties['expirationTime'] = _millis_from_datetime(value)
+
+ @property
+ def friendly_name(self):
+ """Title of the table.
+
+ :rtype: str, or ``NoneType``
+ :returns: The name as set by the user, or None (the default).
+ """
+ return self._properties.get('friendlyName')
+
+ @friendly_name.setter
+ def friendly_name(self, value):
+ """Update title of the table.
+
+ :type value: str
+ :param value: (Optional) new title
+
+ :raises: ValueError for invalid value types.
+ """
+ if not isinstance(value, six.string_types) and value is not None:
+ raise ValueError("Pass a string, or None")
+ self._properties['friendlyName'] = value
+
+ @property
+ def location(self):
+ """Location in which the table is hosted.
+
+ :rtype: str, or ``NoneType``
+ :returns: The location as set by the user, or None (the default).
+ """
+ return self._properties.get('location')
+
+ @location.setter
+ def location(self, value):
+ """Update location in which the table is hosted.
+
+ :type value: str
+ :param value: (Optional) new location
+
+ :raises: ValueError for invalid value types.
+ """
+ if not isinstance(value, six.string_types) and value is not None:
+ raise ValueError("Pass a string, or None")
+ self._properties['location'] = value
+
+ @property
+ def view_query(self):
+ """SQL query defining the table as a view.
+
+ By default, the query is treated as Standard SQL. To use Legacy
+ SQL, set view_use_legacy_sql to True.
+
+ :rtype: str, or ``NoneType``
+ :returns: The query as set by the user, or None (the default).
+ """
+ view = self._properties.get('view')
+ if view is not None:
+ return view.get('query')
+
+ @view_query.setter
+ def view_query(self, value):
+ """Update SQL query defining the table as a view.
+
+ :type value: str
+ :param value: new query
+
+ :raises: ValueError for invalid value types.
+ """
+ if not isinstance(value, six.string_types):
+ raise ValueError("Pass a string")
+ view = self._properties.get('view')
+ if view is None:
+ view = self._properties['view'] = {}
+ view['query'] = value
+ # The service defaults useLegacySql to True, but this
+ # client uses Standard SQL by default.
+ if view.get('useLegacySql') is None:
+ view['useLegacySql'] = False
+
+ @view_query.deleter
+ def view_query(self):
+ """Delete SQL query defining the table as a view."""
+ self._properties.pop('view', None)
+
+ @property
+ def view_use_legacy_sql(self):
+ """Specifies whether to execute the view with Legacy or Standard SQL.
+
+ The default is False for views (use Standard SQL).
+ If this table is not a view, None is returned.
+
+ :rtype: bool or ``NoneType``
+ :returns: The boolean for view.useLegacySql, or None if not a view.
+ """
+ view = self._properties.get('view')
+ if view is not None:
+ # useLegacySql is never missing from the view dict if this table
+ # was created client-side, because the view_query setter populates
+ # it. So a missing or None can only come from the server, whose
+ # default is True.
+ return view.get('useLegacySql', True)
+
+ @view_use_legacy_sql.setter
+ def view_use_legacy_sql(self, value):
+ """Update the view sub-property 'useLegacySql'.
+
+ This boolean specifies whether to execute the view with Legacy SQL
+ (True) or Standard SQL (False). The default, if not specified, is
+ 'False'.
+
+ :type value: bool
+ :param value: The boolean for view.useLegacySql
+
+ :raises: ValueError for invalid value types.
+ """
+ if not isinstance(value, bool):
+ raise ValueError("Pass a boolean")
+ if self._properties.get('view') is None:
+ self._properties['view'] = {}
+ self._properties['view']['useLegacySql'] = value
+
+ @property
+ def streaming_buffer(self):
+ """Information about a table's streaming buffer.
+
+ :rtype: :class:`StreamingBuffer`
+ :returns: Streaming buffer information, returned from get_table.
+ """
+ sb = self._properties.get('streamingBuffer')
+ if sb is not None:
+ return StreamingBuffer(sb)
+
+ @property
+ def external_data_configuration(self):
+ """Configuration for an external data source.
+
+ If not set, None is returned.
+
+ :rtype: :class:`ExternalConfig`, or ``NoneType``
+ :returns: The external configuration, or None (the default).
+ """
+ return self._external_config
+
+ @external_data_configuration.setter
+ def external_data_configuration(self, value):
+ """Sets the configuration for an external data source.
+
+ :type value: :class:`ExternalConfig`, or ``NoneType``
+ :param value: The ExternalConfig, or None to unset.
+ """
+ if not (value is None or isinstance(value, ExternalConfig)):
+ raise ValueError("Pass an ExternalConfig or None")
+ self._external_config = value
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct a table given its API representation
+
+ :type resource: dict
+ :param resource: table resource representation returned from the API
+
+ :type dataset: :class:`google.cloud.bigquery.dataset.Dataset`
+ :param dataset: The dataset containing the table.
+
+ :rtype: :class:`google.cloud.bigquery.table.Table`
+ :returns: Table parsed from ``resource``.
+ """
+ from google.cloud.bigquery import dataset
+
+ if ('tableReference' not in resource or
+ 'tableId' not in resource['tableReference']):
+ raise KeyError('Resource lacks required identity information:'
+ '["tableReference"]["tableId"]')
+ project_id = resource['tableReference']['projectId']
+ table_id = resource['tableReference']['tableId']
+ dataset_id = resource['tableReference']['datasetId']
+ dataset_ref = dataset.DatasetReference(project_id, dataset_id)
+
+ table = cls(dataset_ref.table(table_id))
+ table._set_properties(resource)
+ return table
+
+ def _set_properties(self, api_response):
+ """Update properties from resource in body of ``api_response``
+
+ :type api_response: dict
+ :param api_response: response returned from an API call
+ """
+ self._properties.clear()
+ cleaned = api_response.copy()
+ schema = cleaned.pop('schema', {'fields': ()})
+ self.schema = _parse_schema_resource(schema)
+ ec = cleaned.pop('externalDataConfiguration', None)
+ if ec:
+ self.external_data_configuration = ExternalConfig.from_api_repr(ec)
+ if 'creationTime' in cleaned:
+ cleaned['creationTime'] = float(cleaned['creationTime'])
+ if 'lastModifiedTime' in cleaned:
+ cleaned['lastModifiedTime'] = float(cleaned['lastModifiedTime'])
+ if 'expirationTime' in cleaned:
+ cleaned['expirationTime'] = float(cleaned['expirationTime'])
+ if 'labels' not in cleaned:
+ cleaned['labels'] = {}
+ self._properties.update(cleaned)
+
+ def _populate_expires_resource(self, resource):
+ resource['expirationTime'] = _millis_from_datetime(self.expires)
+
+ def _populate_partitioning_type_resource(self, resource):
+ resource['timePartitioning'] = self._properties.get('timePartitioning')
+
+ def _populate_view_use_legacy_sql_resource(self, resource):
+ if 'view' not in resource:
+ resource['view'] = {}
+ resource['view']['useLegacySql'] = self.view_use_legacy_sql
+
+ def _populate_view_query_resource(self, resource):
+ if self.view_query is None:
+ resource['view'] = None
+ return
+ if 'view' not in resource:
+ resource['view'] = {}
+ resource['view']['query'] = self.view_query
+
+ def _populate_schema_resource(self, resource):
+ if not self._schema:
+ resource['schema'] = None
+ else:
+ resource['schema'] = {
+ 'fields': _build_schema_resource(self._schema),
+ }
+
+ def _populate_external_config(self, resource):
+ if not self.external_data_configuration:
+ resource['externalDataConfiguration'] = None
+ else:
+ resource['externalDataConfiguration'] = ExternalConfig.to_api_repr(
+ self.external_data_configuration)
+
+ custom_resource_fields = {
+ 'expires': _populate_expires_resource,
+ 'partitioning_type': _populate_partitioning_type_resource,
+ 'view_query': _populate_view_query_resource,
+ 'view_use_legacy_sql': _populate_view_use_legacy_sql_resource,
+ 'schema': _populate_schema_resource,
+ 'external_data_configuration': _populate_external_config,
+ }
+
+ def _build_resource(self, filter_fields):
+ """Generate a resource for ``create`` or ``update``."""
+ resource = {
+ 'tableReference': {
+ 'projectId': self._project,
+ 'datasetId': self._dataset_id,
+ 'tableId': self.table_id},
+ }
+ for f in filter_fields:
+ if f in self.custom_resource_fields:
+ self.custom_resource_fields[f](self, resource)
+ else:
+ api_field = _snake_to_camel_case(f)
+ resource[api_field] = getattr(self, f)
+ return resource
+
+
+def _row_from_mapping(mapping, schema):
+ """Convert a mapping to a row tuple using the schema.
+
+ :type mapping: dict
+ :param mapping: Mapping of row data: must contain keys for all
+ required fields in the schema. Keys which do not correspond
+ to a field in the schema are ignored.
+
+ :type schema: list of :class:`SchemaField`
+ :param schema: The schema of the table destination for the rows
+
+ :rtype: tuple
+ :returns: Tuple whose elements are ordered according to the schema.
+ :raises: ValueError if schema is empty
+ """
+ if len(schema) == 0:
+ raise ValueError(_TABLE_HAS_NO_SCHEMA)
+
+ row = []
+ for field in schema:
+ if field.mode == 'REQUIRED':
+ row.append(mapping[field.name])
+ elif field.mode == 'REPEATED':
+ row.append(mapping.get(field.name, ()))
+ elif field.mode == 'NULLABLE':
+ row.append(mapping.get(field.name))
+ else:
+ raise ValueError(
+ "Unknown field mode: {}".format(field.mode))
+ return tuple(row)
+
+
+class StreamingBuffer(object):
+ """Information about a table's streaming buffer.
+
+ See https://cloud.google.com/bigquery/streaming-data-into-bigquery.
+
+ :type resource: dict
+ :param resource: streaming buffer representation returned from the API
+ """
+
+ def __init__(self, resource):
+ self.estimated_bytes = int(resource['estimatedBytes'])
+ self.estimated_rows = int(resource['estimatedRows'])
+ # time is in milliseconds since the epoch.
+ self.oldest_entry_time = _datetime_from_microseconds(
+ 1000.0 * int(resource['oldestEntryTime']))
diff --git a/bigquery/tests/system.py b/bigquery/tests/system.py
new file mode 100644
index 0000000..335beda
--- /dev/null
+++ b/bigquery/tests/system.py
@@ -0,0 +1,1431 @@
+# Copyright 2015 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import base64
+import csv
+import datetime
+import json
+import operator
+import os
+import time
+import unittest
+import uuid
+
+import six
+
+from google.api_core.exceptions import PreconditionFailed
+from google.cloud import bigquery
+from google.cloud.bigquery.dataset import Dataset, DatasetReference
+from google.cloud.bigquery.table import Table
+from google.cloud._helpers import UTC
+from google.cloud.bigquery import dbapi
+from google.cloud.exceptions import Forbidden, NotFound
+
+from test_utils.retry import RetryErrors
+from test_utils.retry import RetryInstanceState
+from test_utils.retry import RetryResult
+from test_utils.system import unique_resource_id
+
+
+JOB_TIMEOUT = 120 # 2 minutes
+WHERE = os.path.abspath(os.path.dirname(__file__))
+
+# Common table data used for many tests.
+ROWS = [
+ ('Phred Phlyntstone', 32),
+ ('Bharney Rhubble', 33),
+ ('Wylma Phlyntstone', 29),
+ ('Bhettye Rhubble', 27),
+]
+HEADER_ROW = ('Full Name', 'Age')
+SCHEMA = [
+ bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'),
+ bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'),
+]
+
+
+def _has_rows(result):
+ return len(result) > 0
+
+
+def _make_dataset_id(prefix):
+ return '%s%s' % (prefix, unique_resource_id())
+
+
+def _load_json_schema(filename='data/schema.json'):
+ from google.cloud.bigquery.table import _parse_schema_resource
+
+ json_filename = os.path.join(WHERE, filename)
+
+ with open(json_filename, 'r') as schema_file:
+ return _parse_schema_resource(json.load(schema_file))
+
+
+def _rate_limit_exceeded(forbidden):
+ """Predicate: pass only exceptions with 'rateLimitExceeded' as reason."""
+ return any(error['reason'] == 'rateLimitExceeded'
+ for error in forbidden._errors)
+
+
+# We need to wait to stay within the rate limits.
+# The alternative outcome is a 403 Forbidden response from upstream, which
+# they return instead of the more appropriate 429.
+# See https://cloud.google.com/bigquery/quota-policy
+retry_403 = RetryErrors(Forbidden, error_predicate=_rate_limit_exceeded)
+
+
+class Config(object):
+ """Run-time configuration to be modified at set-up.
+
+ This is a mutable stand-in to allow test set-up to modify
+ global state.
+ """
+ CLIENT = None
+ CURSOR = None
+
+
+def setUpModule():
+ Config.CLIENT = bigquery.Client()
+ Config.CURSOR = dbapi.connect(Config.CLIENT).cursor()
+
+
+class TestBigQuery(unittest.TestCase):
+
+ def setUp(self):
+ self.to_delete = []
+
+ def tearDown(self):
+ from google.cloud.storage import Bucket
+ from google.cloud.exceptions import BadRequest
+ from google.cloud.exceptions import Conflict
+
+ def _still_in_use(bad_request):
+ return any(error['reason'] == 'resourceInUse'
+ for error in bad_request._errors)
+
+ retry_in_use = RetryErrors(BadRequest, error_predicate=_still_in_use)
+ retry_409 = RetryErrors(Conflict)
+ for doomed in self.to_delete:
+ if isinstance(doomed, Bucket):
+ retry_409(doomed.delete)(force=True)
+ elif isinstance(doomed, Dataset):
+ retry_in_use(Config.CLIENT.delete_dataset)(doomed)
+ elif isinstance(doomed, Table):
+ retry_in_use(Config.CLIENT.delete_table)(doomed)
+ else:
+ doomed.delete()
+
+ def test_create_dataset(self):
+ DATASET_ID = _make_dataset_id('create_dataset')
+ dataset = self.temp_dataset(DATASET_ID)
+
+ self.assertTrue(_dataset_exists(dataset))
+ self.assertEqual(dataset.dataset_id, DATASET_ID)
+ self.assertEqual(dataset.project, Config.CLIENT.project)
+
+ def test_get_dataset(self):
+ DATASET_ID = _make_dataset_id('get_dataset')
+ client = Config.CLIENT
+ dataset_arg = Dataset(client.dataset(DATASET_ID))
+ dataset_arg.friendly_name = 'Friendly'
+ dataset_arg.description = 'Description'
+ dataset = retry_403(client.create_dataset)(dataset_arg)
+ self.to_delete.append(dataset)
+ dataset_ref = client.dataset(DATASET_ID)
+
+ got = client.get_dataset(dataset_ref)
+
+ self.assertEqual(got.friendly_name, 'Friendly')
+ self.assertEqual(got.description, 'Description')
+
+ def test_update_dataset(self):
+ dataset = self.temp_dataset(_make_dataset_id('update_dataset'))
+ self.assertTrue(_dataset_exists(dataset))
+ self.assertIsNone(dataset.friendly_name)
+ self.assertIsNone(dataset.description)
+ self.assertEquals(dataset.labels, {})
+
+ dataset.friendly_name = 'Friendly'
+ dataset.description = 'Description'
+ dataset.labels = {'priority': 'high', 'color': 'blue'}
+ ds2 = Config.CLIENT.update_dataset(
+ dataset,
+ ('friendly_name', 'description', 'labels'))
+ self.assertEqual(ds2.friendly_name, 'Friendly')
+ self.assertEqual(ds2.description, 'Description')
+ self.assertEqual(ds2.labels, {'priority': 'high', 'color': 'blue'})
+
+ ds2.labels = {
+ 'color': 'green', # change
+ 'shape': 'circle', # add
+ 'priority': None, # delete
+ }
+ ds3 = Config.CLIENT.update_dataset(ds2, ['labels'])
+ self.assertEqual(ds3.labels, {'color': 'green', 'shape': 'circle'})
+
+ # If we try to update using d2 again, it will fail because the
+ # previous update changed the ETag.
+ ds2.description = 'no good'
+ with self.assertRaises(PreconditionFailed):
+ Config.CLIENT.update_dataset(ds2, ['description'])
+
+ def test_list_datasets(self):
+ datasets_to_create = [
+ 'new' + unique_resource_id(),
+ 'newer' + unique_resource_id(),
+ 'newest' + unique_resource_id(),
+ ]
+ for dataset_id in datasets_to_create:
+ self.temp_dataset(dataset_id)
+
+ # Retrieve the datasets.
+ iterator = Config.CLIENT.list_datasets()
+ all_datasets = list(iterator)
+ self.assertIsNone(iterator.next_page_token)
+ created = [dataset for dataset in all_datasets
+ if dataset.dataset_id in datasets_to_create and
+ dataset.project == Config.CLIENT.project]
+ self.assertEqual(len(created), len(datasets_to_create))
+
+ def test_create_table(self):
+ dataset = self.temp_dataset(_make_dataset_id('create_table'))
+ table_id = 'test_table'
+ table_arg = Table(dataset.table(table_id), schema=SCHEMA)
+ self.assertFalse(_table_exists(table_arg))
+
+ table = retry_403(Config.CLIENT.create_table)(table_arg)
+ self.to_delete.insert(0, table)
+
+ self.assertTrue(_table_exists(table))
+ self.assertEqual(table.table_id, table_id)
+
+ def test_get_table_w_public_dataset(self):
+ PUBLIC = 'bigquery-public-data'
+ DATASET_ID = 'samples'
+ TABLE_ID = 'shakespeare'
+ table_ref = DatasetReference(PUBLIC, DATASET_ID).table(TABLE_ID)
+
+ table = Config.CLIENT.get_table(table_ref)
+
+ self.assertEqual(table.table_id, TABLE_ID)
+ self.assertEqual(table.dataset_id, DATASET_ID)
+ self.assertEqual(table.project, PUBLIC)
+ schema_names = [field.name for field in table.schema]
+ self.assertEqual(
+ schema_names, ['word', 'word_count', 'corpus', 'corpus_date'])
+
+ def test_list_dataset_tables(self):
+ DATASET_ID = _make_dataset_id('list_tables')
+ dataset = self.temp_dataset(DATASET_ID)
+ # Retrieve tables before any are created for the dataset.
+ iterator = Config.CLIENT.list_dataset_tables(dataset)
+ all_tables = list(iterator)
+ self.assertEqual(all_tables, [])
+ self.assertIsNone(iterator.next_page_token)
+
+ # Insert some tables to be listed.
+ tables_to_create = [
+ 'new' + unique_resource_id(),
+ 'newer' + unique_resource_id(),
+ 'newest' + unique_resource_id(),
+ ]
+ for table_name in tables_to_create:
+ table = Table(dataset.table(table_name), schema=SCHEMA)
+ created_table = retry_403(Config.CLIENT.create_table)(table)
+ self.to_delete.insert(0, created_table)
+
+ # Retrieve the tables.
+ iterator = Config.CLIENT.list_dataset_tables(dataset)
+ all_tables = list(iterator)
+ self.assertIsNone(iterator.next_page_token)
+ created = [table for table in all_tables
+ if (table.table_id in tables_to_create and
+ table.dataset_id == DATASET_ID)]
+ self.assertEqual(len(created), len(tables_to_create))
+
+ def test_update_table(self):
+ dataset = self.temp_dataset(_make_dataset_id('update_table'))
+
+ TABLE_NAME = 'test_table'
+ table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA)
+ self.assertFalse(_table_exists(table_arg))
+ table = retry_403(Config.CLIENT.create_table)(table_arg)
+ self.to_delete.insert(0, table)
+ self.assertTrue(_table_exists(table))
+ self.assertIsNone(table.friendly_name)
+ self.assertIsNone(table.description)
+ self.assertEquals(table.labels, {})
+ table.friendly_name = 'Friendly'
+ table.description = 'Description'
+ table.labels = {'priority': 'high', 'color': 'blue'}
+
+ table2 = Config.CLIENT.update_table(
+ table, ['friendly_name', 'description', 'labels'])
+
+ self.assertEqual(table2.friendly_name, 'Friendly')
+ self.assertEqual(table2.description, 'Description')
+ self.assertEqual(table2.labels, {'priority': 'high', 'color': 'blue'})
+
+ table2.description = None
+ table2.labels = {
+ 'color': 'green', # change
+ 'shape': 'circle', # add
+ 'priority': None, # delete
+ }
+ table3 = Config.CLIENT.update_table(table2, ['description', 'labels'])
+ self.assertIsNone(table3.description)
+ self.assertEqual(table3.labels, {'color': 'green', 'shape': 'circle'})
+
+ # If we try to update using table2 again, it will fail because the
+ # previous update changed the ETag.
+ table2.description = 'no good'
+ with self.assertRaises(PreconditionFailed):
+ Config.CLIENT.update_table(table2, ['description'])
+
+ def test_update_table_schema(self):
+ dataset = self.temp_dataset(_make_dataset_id('update_table'))
+
+ TABLE_NAME = 'test_table'
+ table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA)
+ self.assertFalse(_table_exists(table_arg))
+ table = retry_403(Config.CLIENT.create_table)(table_arg)
+ self.to_delete.insert(0, table)
+ self.assertTrue(_table_exists(table))
+ voter = bigquery.SchemaField('voter', 'BOOLEAN', mode='NULLABLE')
+ schema = table.schema
+ schema.append(voter)
+ table.schema = schema
+
+ updated_table = Config.CLIENT.update_table(table, ['schema'])
+
+ self.assertEqual(len(updated_table.schema), len(schema))
+ for found, expected in zip(updated_table.schema, schema):
+ self.assertEqual(found.name, expected.name)
+ self.assertEqual(found.field_type, expected.field_type)
+ self.assertEqual(found.mode, expected.mode)
+
+ @staticmethod
+ def _fetch_single_page(table, selected_fields=None):
+ iterator = Config.CLIENT.list_rows(
+ table, selected_fields=selected_fields)
+ page = six.next(iterator.pages)
+ return list(page)
+
+ def test_create_rows_then_dump_table(self):
+ NOW_SECONDS = 1448911495.484366
+ NOW = datetime.datetime.utcfromtimestamp(
+ NOW_SECONDS).replace(tzinfo=UTC)
+ ROWS = [
+ ('Phred Phlyntstone', 32, NOW),
+ ('Bharney Rhubble', 33, NOW + datetime.timedelta(seconds=10)),
+ ('Wylma Phlyntstone', 29, NOW + datetime.timedelta(seconds=20)),
+ ('Bhettye Rhubble', 27, None),
+ ]
+ ROW_IDS = range(len(ROWS))
+
+ dataset = self.temp_dataset(_make_dataset_id('create_rows_then_dump'))
+ TABLE_ID = 'test_table'
+ schema = [
+ bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'),
+ bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'),
+ bigquery.SchemaField('now', 'TIMESTAMP'),
+ ]
+ table_arg = Table(dataset.table(TABLE_ID), schema=schema)
+ self.assertFalse(_table_exists(table_arg))
+ table = retry_403(Config.CLIENT.create_table)(table_arg)
+ self.to_delete.insert(0, table)
+ self.assertTrue(_table_exists(table))
+
+ errors = Config.CLIENT.create_rows(table, ROWS, row_ids=ROW_IDS)
+ self.assertEqual(len(errors), 0)
+
+ rows = ()
+
+ # Allow for "warm up" before rows visible. See
+ # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability
+ # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds
+ retry = RetryResult(_has_rows, max_tries=8)
+ rows = retry(self._fetch_single_page)(table)
+ row_tuples = [r.values() for r in rows]
+ by_age = operator.itemgetter(1)
+ self.assertEqual(sorted(row_tuples, key=by_age),
+ sorted(ROWS, key=by_age))
+
+ def test_load_table_from_local_file_then_dump_table(self):
+ from google.cloud._testing import _NamedTemporaryFile
+
+ TABLE_NAME = 'test_table'
+
+ dataset = self.temp_dataset(_make_dataset_id('load_local_then_dump'))
+ table_ref = dataset.table(TABLE_NAME)
+ table_arg = Table(table_ref, schema=SCHEMA)
+ table = retry_403(Config.CLIENT.create_table)(table_arg)
+ self.to_delete.insert(0, table)
+
+ with _NamedTemporaryFile() as temp:
+ with open(temp.name, 'w') as csv_write:
+ writer = csv.writer(csv_write)
+ writer.writerow(HEADER_ROW)
+ writer.writerows(ROWS)
+
+ with open(temp.name, 'rb') as csv_read:
+ config = bigquery.LoadJobConfig()
+ config.source_format = 'CSV'
+ config.skip_leading_rows = 1
+ config.create_disposition = 'CREATE_NEVER'
+ config.write_disposition = 'WRITE_EMPTY'
+ config.schema = table.schema
+ job = Config.CLIENT.load_table_from_file(
+ csv_read, table_ref, job_config=config)
+
+ # Retry until done.
+ job.result(timeout=JOB_TIMEOUT)
+
+ self.assertEqual(job.output_rows, len(ROWS))
+
+ rows = self._fetch_single_page(table)
+ row_tuples = [r.values() for r in rows]
+ by_age = operator.itemgetter(1)
+ self.assertEqual(sorted(row_tuples, key=by_age),
+ sorted(ROWS, key=by_age))
+
+ def test_load_table_from_local_avro_file_then_dump_table(self):
+ TABLE_NAME = 'test_table_avro'
+ ROWS = [
+ ("violet", 400),
+ ("indigo", 445),
+ ("blue", 475),
+ ("green", 510),
+ ("yellow", 570),
+ ("orange", 590),
+ ("red", 650)]
+
+ dataset = self.temp_dataset(_make_dataset_id('load_local_then_dump'))
+ table_ref = dataset.table(TABLE_NAME)
+ table = Table(table_ref)
+ self.to_delete.insert(0, table)
+
+ with open(os.path.join(WHERE, 'data', 'colors.avro'), 'rb') as avrof:
+ config = bigquery.LoadJobConfig()
+ config.source_format = 'AVRO'
+ config.write_disposition = 'WRITE_TRUNCATE'
+ job = Config.CLIENT.load_table_from_file(
+ avrof, table_ref, job_config=config)
+ # Retry until done.
+ job.result(timeout=JOB_TIMEOUT)
+
+ self.assertEqual(job.output_rows, len(ROWS))
+
+ table = Config.CLIENT.get_table(table)
+ rows = self._fetch_single_page(table)
+ row_tuples = [r.values() for r in rows]
+ by_wavelength = operator.itemgetter(1)
+ self.assertEqual(sorted(row_tuples, key=by_wavelength),
+ sorted(ROWS, key=by_wavelength))
+
+ def test_load_table_from_uri_then_dump_table(self):
+ TABLE_ID = 'test_table'
+ GS_URL = self._write_csv_to_storage(
+ 'bq_load_test' + unique_resource_id(), 'person_ages.csv',
+ HEADER_ROW, ROWS)
+
+ dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump'))
+
+ table_arg = Table(dataset.table(TABLE_ID), schema=SCHEMA)
+ table = retry_403(Config.CLIENT.create_table)(table_arg)
+ self.to_delete.insert(0, table)
+
+ config = bigquery.LoadJobConfig()
+ config.create_disposition = 'CREATE_NEVER'
+ config.skip_leading_rows = 1
+ config.source_format = 'CSV'
+ config.write_disposition = 'WRITE_EMPTY'
+ job = Config.CLIENT.load_table_from_uri(
+ GS_URL, dataset.table(TABLE_ID), job_config=config)
+
+ # Allow for 90 seconds of "warm up" before rows visible. See
+ # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability
+ # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds
+ retry = RetryInstanceState(_job_done, max_tries=8)
+ retry(job.reload)()
+
+ rows = self._fetch_single_page(table)
+ row_tuples = [r.values() for r in rows]
+ by_age = operator.itemgetter(1)
+ self.assertEqual(sorted(row_tuples, key=by_age),
+ sorted(ROWS, key=by_age))
+
+ def test_load_table_from_uri_w_autodetect_schema_then_get_job(self):
+ from google.cloud.bigquery import SchemaField
+ from google.cloud.bigquery.job import LoadJob
+
+ rows = ROWS * 100
+ # BigQuery internally uses the first 100 rows to detect schema
+
+ gs_url = self._write_csv_to_storage(
+ 'bq_load_test' + unique_resource_id(), 'person_ages.csv',
+ HEADER_ROW, rows)
+ dataset = self.temp_dataset(_make_dataset_id('load_gcs_then_dump'))
+ table_ref = dataset.table('test_table')
+ JOB_ID = 'load_table_w_autodetect_{}'.format(str(uuid.uuid4()))
+
+ config = bigquery.LoadJobConfig()
+ config.autodetect = True
+ job = Config.CLIENT.load_table_from_uri(
+ gs_url, table_ref, job_config=config, job_id=JOB_ID)
+
+ # Allow for 90 seconds of "warm up" before rows visible. See
+ # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability
+ # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds
+ retry = RetryInstanceState(_job_done, max_tries=8)
+ retry(job.reload)()
+
+ table = Config.CLIENT.get_table(table_ref)
+ self.to_delete.insert(0, table)
+ field_name = SchemaField(
+ u'Full_Name', u'string', u'NULLABLE', None, ())
+ field_age = SchemaField(u'Age', u'integer', u'NULLABLE', None, ())
+ self.assertEqual(table.schema, [field_name, field_age])
+
+ actual_rows = self._fetch_single_page(table)
+ actual_row_tuples = [r.values() for r in actual_rows]
+ by_age = operator.itemgetter(1)
+ self.assertEqual(
+ sorted(actual_row_tuples, key=by_age), sorted(rows, key=by_age))
+
+ fetched_job = Config.CLIENT.get_job(JOB_ID)
+
+ self.assertIsInstance(fetched_job, LoadJob)
+ self.assertEqual(fetched_job.job_id, JOB_ID)
+ self.assertEqual(fetched_job.autodetect, True)
+
+ def _write_csv_to_storage(self, bucket_name, blob_name, header_row,
+ data_rows):
+ from google.cloud._testing import _NamedTemporaryFile
+ from google.cloud.storage import Client as StorageClient
+
+ storage_client = StorageClient()
+
+ # In the **very** rare case the bucket name is reserved, this
+ # fails with a ConnectionError.
+ bucket = storage_client.create_bucket(bucket_name)
+ self.to_delete.append(bucket)
+
+ blob = bucket.blob(blob_name)
+
+ with _NamedTemporaryFile() as temp:
+ with open(temp.name, 'w') as csv_write:
+ writer = csv.writer(csv_write)
+ writer.writerow(header_row)
+ writer.writerows(data_rows)
+
+ with open(temp.name, 'rb') as csv_read:
+ blob.upload_from_file(csv_read, content_type='text/csv')
+
+ self.to_delete.insert(0, blob)
+
+ return 'gs://{}/{}'.format(bucket_name, blob_name)
+
+ def _load_table_for_extract_table(
+ self, storage_client, rows, bucket_name, blob_name, table):
+ from google.cloud._testing import _NamedTemporaryFile
+
+ gs_url = 'gs://{}/{}'.format(bucket_name, blob_name)
+
+ # In the **very** rare case the bucket name is reserved, this
+ # fails with a ConnectionError.
+ bucket = storage_client.create_bucket(bucket_name)
+ self.to_delete.append(bucket)
+ blob = bucket.blob(blob_name)
+
+ with _NamedTemporaryFile() as temp:
+ with open(temp.name, 'w') as csv_write:
+ writer = csv.writer(csv_write)
+ writer.writerow(HEADER_ROW)
+ writer.writerows(rows)
+
+ with open(temp.name, 'rb') as csv_read:
+ blob.upload_from_file(csv_read, content_type='text/csv')
+ self.to_delete.insert(0, blob)
+
+ dataset = self.temp_dataset(table.dataset_id)
+ table_ref = dataset.table(table.table_id)
+ config = bigquery.LoadJobConfig()
+ config.autodetect = True
+ job = Config.CLIENT.load_table_from_uri(gs_url, table_ref,
+ job_config=config)
+ # TODO(jba): do we need this retry now that we have job.result()?
+ # Allow for 90 seconds of "warm up" before rows visible. See
+ # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability
+ # 8 tries -> 1 + 2 + 4 + 8 + 16 + 32 + 64 = 127 seconds
+ retry = RetryInstanceState(_job_done, max_tries=8)
+ retry(job.reload)()
+
+ def test_extract_table(self):
+ from google.cloud.storage import Client as StorageClient
+
+ storage_client = StorageClient()
+ local_id = unique_resource_id()
+ bucket_name = 'bq_extract_test' + local_id
+ blob_name = 'person_ages.csv'
+ dataset_id = _make_dataset_id('load_gcs_then_extract')
+ table_id = 'test_table'
+ table_ref = Config.CLIENT.dataset(dataset_id).table(table_id)
+ table = Table(table_ref)
+ self.to_delete.insert(0, table)
+ self._load_table_for_extract_table(
+ storage_client, ROWS, bucket_name, blob_name, table_ref)
+ bucket = storage_client.bucket(bucket_name)
+ destination_blob_name = 'person_ages_out.csv'
+ destination = bucket.blob(destination_blob_name)
+ destination_uri = 'gs://{}/person_ages_out.csv'.format(bucket_name)
+
+ job = Config.CLIENT.extract_table(table_ref, destination_uri)
+ job.result(timeout=100)
+
+ self.to_delete.insert(0, destination)
+ got = destination.download_as_string().decode('utf-8')
+ self.assertIn('Bharney Rhubble', got)
+
+ def test_extract_table_w_job_config(self):
+ from google.cloud.storage import Client as StorageClient
+
+ storage_client = StorageClient()
+ local_id = unique_resource_id()
+ bucket_name = 'bq_extract_test' + local_id
+ blob_name = 'person_ages.csv'
+ dataset_id = _make_dataset_id('load_gcs_then_extract')
+ table_id = 'test_table'
+ table_ref = Config.CLIENT.dataset(dataset_id).table(table_id)
+ table = Table(table_ref)
+ self.to_delete.insert(0, table)
+ self._load_table_for_extract_table(
+ storage_client, ROWS, bucket_name, blob_name, table_ref)
+ bucket = storage_client.bucket(bucket_name)
+ destination_blob_name = 'person_ages_out.csv'
+ destination = bucket.blob(destination_blob_name)
+ destination_uri = 'gs://{}/person_ages_out.csv'.format(bucket_name)
+
+ job_config = bigquery.ExtractJobConfig()
+ job_config.destination_format = 'NEWLINE_DELIMITED_JSON'
+ job = Config.CLIENT.extract_table(
+ table, destination_uri, job_config=job_config)
+ job.result()
+
+ self.to_delete.insert(0, destination)
+ got = destination.download_as_string().decode('utf-8')
+ self.assertIn('"Bharney Rhubble"', got)
+
+ def test_copy_table(self):
+ # If we create a new table to copy from, the test won't work
+ # because the new rows will be stored in the streaming buffer,
+ # and copy jobs don't read the streaming buffer.
+ # We could wait for the streaming buffer to empty, but that could
+ # take minutes. Instead we copy a small public table.
+ source_dataset = DatasetReference('bigquery-public-data', 'samples')
+ source_ref = source_dataset.table('shakespeare')
+ dest_dataset = self.temp_dataset(_make_dataset_id('copy_table'))
+ dest_ref = dest_dataset.table('destination_table')
+ job_config = bigquery.CopyJobConfig()
+ job = Config.CLIENT.copy_table(
+ source_ref, dest_ref, job_config=job_config)
+ job.result()
+
+ dest_table = Config.CLIENT.get_table(dest_ref)
+ self.to_delete.insert(0, dest_table)
+ # Just check that we got some rows.
+ got_rows = self._fetch_single_page(dest_table)
+ self.assertTrue(len(got_rows) > 0)
+
+ def test_job_cancel(self):
+ DATASET_ID = _make_dataset_id('job_cancel')
+ JOB_ID_PREFIX = 'fetch_' + DATASET_ID
+ TABLE_NAME = 'test_table'
+ QUERY = 'SELECT * FROM %s.%s' % (DATASET_ID, TABLE_NAME)
+
+ dataset = self.temp_dataset(DATASET_ID)
+
+ table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA)
+ table = retry_403(Config.CLIENT.create_table)(table_arg)
+ self.to_delete.insert(0, table)
+
+ job = Config.CLIENT.query(QUERY, job_id_prefix=JOB_ID_PREFIX)
+ job.cancel()
+
+ retry = RetryInstanceState(_job_done, max_tries=8)
+ retry(job.reload)()
+
+ # The `cancel` API doesn't leave any reliable traces on
+ # the status of the job resource, so we can't really assert for
+ # them here. The best we can do is not that the API call didn't
+ # raise an error, and that the job completed (in the `retry()`
+ # above).
+
+ def test_query_rows_w_legacy_sql_types(self):
+ naive = datetime.datetime(2016, 12, 5, 12, 41, 9)
+ stamp = '%s %s' % (naive.date().isoformat(), naive.time().isoformat())
+ zoned = naive.replace(tzinfo=UTC)
+ examples = [
+ {
+ 'sql': 'SELECT 1',
+ 'expected': 1,
+ },
+ {
+ 'sql': 'SELECT 1.3',
+ 'expected': 1.3,
+ },
+ {
+ 'sql': 'SELECT TRUE',
+ 'expected': True,
+ },
+ {
+ 'sql': 'SELECT "ABC"',
+ 'expected': 'ABC',
+ },
+ {
+ 'sql': 'SELECT CAST("foo" AS BYTES)',
+ 'expected': b'foo',
+ },
+ {
+ 'sql': 'SELECT CAST("%s" AS TIMESTAMP)' % (stamp,),
+ 'expected': zoned,
+ },
+ ]
+ for example in examples:
+ job_config = bigquery.QueryJobConfig()
+ job_config.use_legacy_sql = True
+ rows = list(Config.CLIENT.query_rows(
+ example['sql'], job_config=job_config))
+ self.assertEqual(len(rows), 1)
+ self.assertEqual(len(rows[0]), 1)
+ self.assertEqual(rows[0][0], example['expected'])
+
+ def _generate_standard_sql_types_examples(self):
+ naive = datetime.datetime(2016, 12, 5, 12, 41, 9)
+ naive_microseconds = datetime.datetime(2016, 12, 5, 12, 41, 9, 250000)
+ stamp = '%s %s' % (naive.date().isoformat(), naive.time().isoformat())
+ stamp_microseconds = stamp + '.250000'
+ zoned = naive.replace(tzinfo=UTC)
+ zoned_microseconds = naive_microseconds.replace(tzinfo=UTC)
+ return [
+ {
+ 'sql': 'SELECT 1',
+ 'expected': 1,
+ },
+ {
+ 'sql': 'SELECT 1.3',
+ 'expected': 1.3,
+ },
+ {
+ 'sql': 'SELECT TRUE',
+ 'expected': True,
+ },
+ {
+ 'sql': 'SELECT "ABC"',
+ 'expected': 'ABC',
+ },
+ {
+ 'sql': 'SELECT CAST("foo" AS BYTES)',
+ 'expected': b'foo',
+ },
+ {
+ 'sql': 'SELECT TIMESTAMP "%s"' % (stamp,),
+ 'expected': zoned,
+ },
+ {
+ 'sql': 'SELECT TIMESTAMP "%s"' % (stamp_microseconds,),
+ 'expected': zoned_microseconds,
+ },
+ {
+ 'sql': 'SELECT DATETIME(TIMESTAMP "%s")' % (stamp,),
+ 'expected': naive,
+ },
+ {
+ 'sql': 'SELECT DATETIME(TIMESTAMP "%s")' % (
+ stamp_microseconds,),
+ 'expected': naive_microseconds,
+ },
+ {
+ 'sql': 'SELECT DATE(TIMESTAMP "%s")' % (stamp,),
+ 'expected': naive.date(),
+ },
+ {
+ 'sql': 'SELECT TIME(TIMESTAMP "%s")' % (stamp,),
+ 'expected': naive.time(),
+ },
+ {
+ 'sql': 'SELECT (1, 2)',
+ 'expected': {'_field_1': 1, '_field_2': 2},
+ },
+ {
+ 'sql': 'SELECT ((1, 2), (3, 4), 5)',
+ 'expected': {
+ '_field_1': {'_field_1': 1, '_field_2': 2},
+ '_field_2': {'_field_1': 3, '_field_2': 4},
+ '_field_3': 5,
+ },
+ },
+ {
+ 'sql': 'SELECT [1, 2, 3]',
+ 'expected': [1, 2, 3],
+ },
+ {
+ 'sql': 'SELECT ([1, 2], 3, [4, 5])',
+ 'expected':
+ {'_field_1': [1, 2], '_field_2': 3, '_field_3': [4, 5]},
+ },
+ {
+ 'sql': 'SELECT [(1, 2, 3), (4, 5, 6)]',
+ 'expected': [
+ {'_field_1': 1, '_field_2': 2, '_field_3': 3},
+ {'_field_1': 4, '_field_2': 5, '_field_3': 6},
+ ],
+ },
+ {
+ 'sql': 'SELECT [([1, 2, 3], 4), ([5, 6], 7)]',
+ 'expected': [
+ {u'_field_1': [1, 2, 3], u'_field_2': 4},
+ {u'_field_1': [5, 6], u'_field_2': 7},
+ ],
+ },
+ {
+ 'sql': 'SELECT ARRAY(SELECT STRUCT([1, 2]))',
+ 'expected': [{u'_field_1': [1, 2]}],
+ },
+ ]
+
+ def test_query_rows_w_standard_sql_types(self):
+ examples = self._generate_standard_sql_types_examples()
+ for example in examples:
+ rows = list(Config.CLIENT.query_rows(example['sql']))
+ self.assertEqual(len(rows), 1)
+ self.assertEqual(len(rows[0]), 1)
+ self.assertEqual(rows[0][0], example['expected'])
+
+ def test_query_rows_w_failed_query(self):
+ from google.api_core.exceptions import BadRequest
+
+ with self.assertRaises(BadRequest):
+ Config.CLIENT.query_rows('invalid syntax;')
+ # TODO(swast): Ensure that job ID is surfaced in the exception.
+
+ def test_dbapi_w_standard_sql_types(self):
+ examples = self._generate_standard_sql_types_examples()
+ for example in examples:
+ Config.CURSOR.execute(example['sql'])
+ self.assertEqual(Config.CURSOR.rowcount, 1)
+ row = Config.CURSOR.fetchone()
+ self.assertEqual(len(row), 1)
+ self.assertEqual(row[0], example['expected'])
+ row = Config.CURSOR.fetchone()
+ self.assertIsNone(row)
+
+ def test_dbapi_fetchall(self):
+ query = 'SELECT * FROM UNNEST([(1, 2), (3, 4), (5, 6)])'
+
+ for arraysize in range(1, 5):
+ Config.CURSOR.execute(query)
+ self.assertEqual(Config.CURSOR.rowcount, 3, "expected 3 rows")
+ Config.CURSOR.arraysize = arraysize
+ rows = Config.CURSOR.fetchall()
+ row_tuples = [r.values() for r in rows]
+ self.assertEqual(row_tuples, [(1, 2), (3, 4), (5, 6)])
+
+ def _load_table_for_dml(self, rows, dataset_id, table_id):
+ from google.cloud._testing import _NamedTemporaryFile
+
+ dataset = self.temp_dataset(dataset_id)
+ greeting = bigquery.SchemaField(
+ 'greeting', 'STRING', mode='NULLABLE')
+ table_ref = dataset.table(table_id)
+ table_arg = Table(table_ref, schema=[greeting])
+ table = retry_403(Config.CLIENT.create_table)(table_arg)
+ self.to_delete.insert(0, table)
+
+ with _NamedTemporaryFile() as temp:
+ with open(temp.name, 'w') as csv_write:
+ writer = csv.writer(csv_write)
+ writer.writerow(('Greeting',))
+ writer.writerows(rows)
+
+ with open(temp.name, 'rb') as csv_read:
+ config = bigquery.LoadJobConfig()
+ config.source_format = 'CSV'
+ config.skip_leading_rows = 1
+ config.create_disposition = 'CREATE_NEVER'
+ config.write_disposition = 'WRITE_EMPTY'
+ job = Config.CLIENT.load_table_from_file(
+ csv_read, table_ref, job_config=config)
+
+ # Retry until done.
+ job.result(timeout=JOB_TIMEOUT)
+ self._fetch_single_page(table)
+
+ def test_query_w_dml(self):
+ dataset_name = _make_dataset_id('dml_tests')
+ table_name = 'test_table'
+ self._load_table_for_dml([('Hello World',)], dataset_name, table_name)
+ query_template = """UPDATE {}.{}
+ SET greeting = 'Guten Tag'
+ WHERE greeting = 'Hello World'
+ """
+
+ query_job = Config.CLIENT.query(
+ query_template.format(dataset_name, table_name),
+ job_id_prefix='test_query_w_dml_')
+ query_job.result()
+
+ self.assertEqual(query_job.num_dml_affected_rows, 1)
+
+ def test_dbapi_w_dml(self):
+ dataset_name = _make_dataset_id('dml_tests')
+ table_name = 'test_table'
+ self._load_table_for_dml([('Hello World',)], dataset_name, table_name)
+ query_template = """UPDATE {}.{}
+ SET greeting = 'Guten Tag'
+ WHERE greeting = 'Hello World'
+ """
+
+ Config.CURSOR.execute(
+ query_template.format(dataset_name, table_name),
+ job_id='test_dbapi_w_dml_{}'.format(str(uuid.uuid4())))
+ self.assertEqual(Config.CURSOR.rowcount, 1)
+ self.assertIsNone(Config.CURSOR.fetchone())
+
+ def test_query_w_query_params(self):
+ from google.cloud.bigquery.job import QueryJobConfig
+ from google.cloud.bigquery.query import ArrayQueryParameter
+ from google.cloud.bigquery.query import ScalarQueryParameter
+ from google.cloud.bigquery.query import StructQueryParameter
+ question = 'What is the answer to life, the universe, and everything?'
+ question_param = ScalarQueryParameter(
+ name='question', type_='STRING', value=question)
+ answer = 42
+ answer_param = ScalarQueryParameter(
+ name='answer', type_='INT64', value=answer)
+ pi = 3.1415926
+ pi_param = ScalarQueryParameter(
+ name='pi', type_='FLOAT64', value=pi)
+ truthy = True
+ truthy_param = ScalarQueryParameter(
+ name='truthy', type_='BOOL', value=truthy)
+ beef = b'DEADBEEF'
+ beef_param = ScalarQueryParameter(
+ name='beef', type_='BYTES', value=beef)
+ naive = datetime.datetime(2016, 12, 5, 12, 41, 9)
+ naive_param = ScalarQueryParameter(
+ name='naive', type_='DATETIME', value=naive)
+ naive_date_param = ScalarQueryParameter(
+ name='naive_date', type_='DATE', value=naive.date())
+ naive_time_param = ScalarQueryParameter(
+ name='naive_time', type_='TIME', value=naive.time())
+ zoned = naive.replace(tzinfo=UTC)
+ zoned_param = ScalarQueryParameter(
+ name='zoned', type_='TIMESTAMP', value=zoned)
+ array_param = ArrayQueryParameter(
+ name='array_param', array_type='INT64', values=[1, 2])
+ struct_param = StructQueryParameter(
+ 'hitchhiker', question_param, answer_param)
+ phred_name = 'Phred Phlyntstone'
+ phred_name_param = ScalarQueryParameter(
+ name='name', type_='STRING', value=phred_name)
+ phred_age = 32
+ phred_age_param = ScalarQueryParameter(
+ name='age', type_='INT64', value=phred_age)
+ phred_param = StructQueryParameter(
+ None, phred_name_param, phred_age_param)
+ bharney_name = 'Bharney Rhubbyl'
+ bharney_name_param = ScalarQueryParameter(
+ name='name', type_='STRING', value=bharney_name)
+ bharney_age = 31
+ bharney_age_param = ScalarQueryParameter(
+ name='age', type_='INT64', value=bharney_age)
+ bharney_param = StructQueryParameter(
+ None, bharney_name_param, bharney_age_param)
+ characters_param = ArrayQueryParameter(
+ name=None, array_type='RECORD',
+ values=[phred_param, bharney_param])
+ hero_param = StructQueryParameter(
+ 'hero', phred_name_param, phred_age_param)
+ sidekick_param = StructQueryParameter(
+ 'sidekick', bharney_name_param, bharney_age_param)
+ roles_param = StructQueryParameter(
+ 'roles', hero_param, sidekick_param)
+ friends_param = ArrayQueryParameter(
+ name='friends', array_type='STRING',
+ values=[phred_name, bharney_name])
+ with_friends_param = StructQueryParameter(None, friends_param)
+ top_left_param = StructQueryParameter(
+ 'top_left',
+ ScalarQueryParameter('x', 'INT64', 12),
+ ScalarQueryParameter('y', 'INT64', 102))
+ bottom_right_param = StructQueryParameter(
+ 'bottom_right',
+ ScalarQueryParameter('x', 'INT64', 22),
+ ScalarQueryParameter('y', 'INT64', 92))
+ rectangle_param = StructQueryParameter(
+ 'rectangle', top_left_param, bottom_right_param)
+ examples = [
+ {
+ 'sql': 'SELECT @question',
+ 'expected': question,
+ 'query_parameters': [question_param],
+ },
+ {
+ 'sql': 'SELECT @answer',
+ 'expected': answer,
+ 'query_parameters': [answer_param],
+ },
+ {
+ 'sql': 'SELECT @pi',
+ 'expected': pi,
+ 'query_parameters': [pi_param],
+ },
+ {
+ 'sql': 'SELECT @truthy',
+ 'expected': truthy,
+ 'query_parameters': [truthy_param],
+ },
+ {
+ 'sql': 'SELECT @beef',
+ 'expected': beef,
+ 'query_parameters': [beef_param],
+ },
+ {
+ 'sql': 'SELECT @naive',
+ 'expected': naive,
+ 'query_parameters': [naive_param],
+ },
+ {
+ 'sql': 'SELECT @naive_date',
+ 'expected': naive.date(),
+ 'query_parameters': [naive_date_param],
+ },
+ {
+ 'sql': 'SELECT @naive_time',
+ 'expected': naive.time(),
+ 'query_parameters': [naive_time_param],
+ },
+ {
+ 'sql': 'SELECT @zoned',
+ 'expected': zoned,
+ 'query_parameters': [zoned_param],
+ },
+ {
+ 'sql': 'SELECT @array_param',
+ 'expected': [1, 2],
+ 'query_parameters': [array_param],
+ },
+ {
+ 'sql': 'SELECT (@hitchhiker.question, @hitchhiker.answer)',
+ 'expected': ({'_field_1': question, '_field_2': answer}),
+ 'query_parameters': [struct_param],
+ },
+ {
+ 'sql':
+ 'SELECT '
+ '((@rectangle.bottom_right.x - @rectangle.top_left.x) '
+ '* (@rectangle.top_left.y - @rectangle.bottom_right.y))',
+ 'expected': 100,
+ 'query_parameters': [rectangle_param],
+ },
+ {
+ 'sql': 'SELECT ?',
+ 'expected': [
+ {'name': phred_name, 'age': phred_age},
+ {'name': bharney_name, 'age': bharney_age},
+ ],
+ 'query_parameters': [characters_param],
+ },
+ {
+ 'sql': 'SELECT @roles',
+ 'expected': {
+ 'hero': {'name': phred_name, 'age': phred_age},
+ 'sidekick': {'name': bharney_name, 'age': bharney_age},
+ },
+ 'query_parameters': [roles_param],
+ },
+ {
+ 'sql': 'SELECT ?',
+ 'expected': {
+ 'friends': [phred_name, bharney_name],
+ },
+ 'query_parameters': [with_friends_param],
+ },
+ ]
+ for example in examples:
+ jconfig = QueryJobConfig()
+ jconfig.query_parameters = example['query_parameters']
+ query_job = Config.CLIENT.query(
+ example['sql'],
+ job_config=jconfig,
+ job_id_prefix='test_query_w_query_params')
+ rows = list(query_job.result())
+ self.assertEqual(len(rows), 1)
+ self.assertEqual(len(rows[0]), 1)
+ self.assertEqual(rows[0][0], example['expected'])
+
+ def test_dbapi_w_query_parameters(self):
+ examples = [
+ {
+ 'sql': 'SELECT %(boolval)s',
+ 'expected': True,
+ 'query_parameters': {
+ 'boolval': True,
+ },
+ },
+ {
+ 'sql': 'SELECT %(a "very" weird `name`)s',
+ 'expected': True,
+ 'query_parameters': {
+ 'a "very" weird `name`': True,
+ },
+ },
+ {
+ 'sql': 'SELECT %(select)s',
+ 'expected': True,
+ 'query_parameters': {
+ 'select': True, # this name is a keyword
+ },
+ },
+ {
+ 'sql': 'SELECT %s',
+ 'expected': False,
+ 'query_parameters': [False],
+ },
+ {
+ 'sql': 'SELECT %(intval)s',
+ 'expected': 123,
+ 'query_parameters': {
+ 'intval': 123,
+ },
+ },
+ {
+ 'sql': 'SELECT %s',
+ 'expected': -123456789,
+ 'query_parameters': [-123456789],
+ },
+ {
+ 'sql': 'SELECT %(floatval)s',
+ 'expected': 1.25,
+ 'query_parameters': {
+ 'floatval': 1.25,
+ },
+ },
+ {
+ 'sql': 'SELECT LOWER(%(strval)s)',
+ 'query_parameters': {
+ 'strval': 'I Am A String',
+ },
+ 'expected': 'i am a string',
+ },
+ {
+ 'sql': 'SELECT DATE_SUB(%(dateval)s, INTERVAL 1 DAY)',
+ 'query_parameters': {
+ 'dateval': datetime.date(2017, 4, 2),
+ },
+ 'expected': datetime.date(2017, 4, 1),
+ },
+ {
+ 'sql': 'SELECT TIME_ADD(%(timeval)s, INTERVAL 4 SECOND)',
+ 'query_parameters': {
+ 'timeval': datetime.time(12, 34, 56),
+ },
+ 'expected': datetime.time(12, 35, 0),
+ },
+ {
+ 'sql': (
+ 'SELECT DATETIME_ADD(%(datetimeval)s, INTERVAL 53 SECOND)'
+ ),
+ 'query_parameters': {
+ 'datetimeval': datetime.datetime(2012, 3, 4, 5, 6, 7),
+ },
+ 'expected': datetime.datetime(2012, 3, 4, 5, 7, 0),
+ },
+ {
+ 'sql': 'SELECT TIMESTAMP_TRUNC(%(zoned)s, MINUTE)',
+ 'query_parameters': {
+ 'zoned': datetime.datetime(
+ 2012, 3, 4, 5, 6, 7, tzinfo=UTC),
+ },
+ 'expected': datetime.datetime(2012, 3, 4, 5, 6, 0, tzinfo=UTC),
+ },
+ {
+ 'sql': 'SELECT TIMESTAMP_TRUNC(%(zoned)s, MINUTE)',
+ 'query_parameters': {
+ 'zoned': datetime.datetime(
+ 2012, 3, 4, 5, 6, 7, 250000, tzinfo=UTC),
+ },
+ 'expected': datetime.datetime(2012, 3, 4, 5, 6, 0, tzinfo=UTC),
+ },
+ ]
+ for example in examples:
+ msg = 'sql: {} query_parameters: {}'.format(
+ example['sql'], example['query_parameters'])
+
+ Config.CURSOR.execute(example['sql'], example['query_parameters'])
+
+ self.assertEqual(Config.CURSOR.rowcount, 1, msg=msg)
+ row = Config.CURSOR.fetchone()
+ self.assertEqual(len(row), 1, msg=msg)
+ self.assertEqual(row[0], example['expected'], msg=msg)
+ row = Config.CURSOR.fetchone()
+ self.assertIsNone(row, msg=msg)
+
+ def test_dump_table_w_public_data(self):
+ PUBLIC = 'bigquery-public-data'
+ DATASET_ID = 'samples'
+ TABLE_NAME = 'natality'
+
+ table_ref = DatasetReference(PUBLIC, DATASET_ID).table(TABLE_NAME)
+ table = Config.CLIENT.get_table(table_ref)
+ self._fetch_single_page(table)
+
+ def test_dump_table_w_public_data_selected_fields(self):
+ PUBLIC = 'bigquery-public-data'
+ DATASET_ID = 'samples'
+ TABLE_NAME = 'natality'
+ selected_fields = [
+ bigquery.SchemaField('year', 'INTEGER', mode='NULLABLE'),
+ bigquery.SchemaField('month', 'INTEGER', mode='NULLABLE'),
+ bigquery.SchemaField('day', 'INTEGER', mode='NULLABLE'),
+ ]
+ table_ref = DatasetReference(PUBLIC, DATASET_ID).table(TABLE_NAME)
+
+ rows = self._fetch_single_page(
+ table_ref, selected_fields=selected_fields)
+
+ self.assertGreater(len(rows), 0)
+ self.assertEqual(len(rows[0]), 3)
+
+ def test_large_query_w_public_data(self):
+ PUBLIC = 'bigquery-public-data'
+ DATASET_ID = 'samples'
+ TABLE_NAME = 'natality'
+ LIMIT = 1000
+ SQL = 'SELECT * from `{}.{}.{}` LIMIT {}'.format(
+ PUBLIC, DATASET_ID, TABLE_NAME, LIMIT)
+
+ iterator = Config.CLIENT.query_rows(SQL)
+
+ rows = list(iterator)
+ self.assertEqual(len(rows), LIMIT)
+
+ def test_query_future(self):
+ query_job = Config.CLIENT.query('SELECT 1')
+ iterator = query_job.result(timeout=JOB_TIMEOUT)
+ row_tuples = [r.values() for r in iterator]
+ self.assertEqual(row_tuples, [(1,)])
+
+ def test_query_table_def(self):
+ gs_url = self._write_csv_to_storage(
+ 'bq_external_test' + unique_resource_id(), 'person_ages.csv',
+ HEADER_ROW, ROWS)
+
+ job_config = bigquery.QueryJobConfig()
+ table_id = 'flintstones'
+ ec = bigquery.ExternalConfig('CSV')
+ ec.source_uris = [gs_url]
+ ec.schema = SCHEMA
+ ec.options.skip_leading_rows = 1 # skip the header row
+ job_config.table_definitions = {table_id: ec}
+ sql = 'SELECT * FROM %s' % table_id
+
+ got_rows = Config.CLIENT.query_rows(sql, job_config=job_config)
+
+ row_tuples = [r.values() for r in got_rows]
+ by_age = operator.itemgetter(1)
+ self.assertEqual(sorted(row_tuples, key=by_age),
+ sorted(ROWS, key=by_age))
+
+ def test_query_external_table(self):
+ gs_url = self._write_csv_to_storage(
+ 'bq_external_test' + unique_resource_id(), 'person_ages.csv',
+ HEADER_ROW, ROWS)
+ dataset_id = _make_dataset_id('query_external_table')
+ dataset = self.temp_dataset(dataset_id)
+ table_id = 'flintstones'
+ table_arg = Table(dataset.table(table_id), schema=SCHEMA)
+ ec = bigquery.ExternalConfig('CSV')
+ ec.source_uris = [gs_url]
+ ec.options.skip_leading_rows = 1 # skip the header row
+ table_arg.external_data_configuration = ec
+ table = Config.CLIENT.create_table(table_arg)
+ self.to_delete.insert(0, table)
+
+ sql = 'SELECT * FROM %s.%s' % (dataset_id, table_id)
+
+ got_rows = Config.CLIENT.query_rows(sql)
+
+ row_tuples = [r.values() for r in got_rows]
+ by_age = operator.itemgetter(1)
+ self.assertEqual(sorted(row_tuples, key=by_age),
+ sorted(ROWS, key=by_age))
+
+ def test_create_rows_nested_nested(self):
+ # See #2951
+ SF = bigquery.SchemaField
+ schema = [
+ SF('string_col', 'STRING', mode='NULLABLE'),
+ SF('record_col', 'RECORD', mode='NULLABLE', fields=[
+ SF('nested_string', 'STRING', mode='NULLABLE'),
+ SF('nested_repeated', 'INTEGER', mode='REPEATED'),
+ SF('nested_record', 'RECORD', mode='NULLABLE', fields=[
+ SF('nested_nested_string', 'STRING', mode='NULLABLE'),
+ ]),
+ ]),
+ ]
+ record = {
+ 'nested_string': 'another string value',
+ 'nested_repeated': [0, 1, 2],
+ 'nested_record': {'nested_nested_string': 'some deep insight'},
+ }
+ to_insert = [
+ ('Some value', record)
+ ]
+ table_id = 'test_table'
+ dataset = self.temp_dataset(_make_dataset_id('issue_2951'))
+ table_arg = Table(dataset.table(table_id), schema=schema)
+ table = retry_403(Config.CLIENT.create_table)(table_arg)
+ self.to_delete.insert(0, table)
+
+ Config.CLIENT.create_rows(table, to_insert)
+
+ retry = RetryResult(_has_rows, max_tries=8)
+ rows = retry(self._fetch_single_page)(table)
+ row_tuples = [r.values() for r in rows]
+ self.assertEqual(row_tuples, to_insert)
+
+ def test_create_rows_nested_nested_dictionary(self):
+ # See #2951
+ SF = bigquery.SchemaField
+ schema = [
+ SF('string_col', 'STRING', mode='NULLABLE'),
+ SF('record_col', 'RECORD', mode='NULLABLE', fields=[
+ SF('nested_string', 'STRING', mode='NULLABLE'),
+ SF('nested_repeated', 'INTEGER', mode='REPEATED'),
+ SF('nested_record', 'RECORD', mode='NULLABLE', fields=[
+ SF('nested_nested_string', 'STRING', mode='NULLABLE'),
+ ]),
+ ]),
+ ]
+ record = {
+ 'nested_string': 'another string value',
+ 'nested_repeated': [0, 1, 2],
+ 'nested_record': {'nested_nested_string': 'some deep insight'},
+ }
+ to_insert = [
+ {'string_col': 'Some value', 'record_col': record}
+ ]
+ table_id = 'test_table'
+ dataset = self.temp_dataset(_make_dataset_id('issue_2951'))
+ table_arg = Table(dataset.table(table_id), schema=schema)
+ table = retry_403(Config.CLIENT.create_table)(table_arg)
+ self.to_delete.insert(0, table)
+
+ Config.CLIENT.create_rows(table, to_insert)
+
+ retry = RetryResult(_has_rows, max_tries=8)
+ rows = retry(self._fetch_single_page)(table)
+ row_tuples = [r.values() for r in rows]
+ expected_rows = [('Some value', record)]
+ self.assertEqual(row_tuples, expected_rows)
+
+ def test_create_table_rows_fetch_nested_schema(self):
+ table_name = 'test_table'
+ dataset = self.temp_dataset(
+ _make_dataset_id('create_table_nested_schema'))
+ schema = _load_json_schema()
+ table_arg = Table(dataset.table(table_name), schema=schema)
+ table = retry_403(Config.CLIENT.create_table)(table_arg)
+ self.to_delete.insert(0, table)
+ self.assertTrue(_table_exists(table))
+ self.assertEqual(table.table_id, table_name)
+
+ to_insert = []
+ # Data is in "JSON Lines" format, see http://jsonlines.org/
+ json_filename = os.path.join(WHERE, 'data', 'characters.jsonl')
+ with open(json_filename) as rows_file:
+ for line in rows_file:
+ to_insert.append(json.loads(line))
+
+ errors = Config.CLIENT.create_rows_json(table, to_insert)
+ self.assertEqual(len(errors), 0)
+
+ retry = RetryResult(_has_rows, max_tries=8)
+ fetched = retry(self._fetch_single_page)(table)
+ fetched_tuples = [f.values() for f in fetched]
+
+ self.assertEqual(len(fetched), len(to_insert))
+
+ for found, expected in zip(sorted(fetched_tuples), to_insert):
+ self.assertEqual(found[0], expected['Name'])
+ self.assertEqual(found[1], int(expected['Age']))
+ self.assertEqual(found[2], expected['Weight'])
+ self.assertEqual(found[3], expected['IsMagic'])
+
+ self.assertEqual(len(found[4]), len(expected['Spells']))
+ for f_spell, e_spell in zip(found[4], expected['Spells']):
+ self.assertEqual(f_spell['Name'], e_spell['Name'])
+ parts = time.strptime(
+ e_spell['LastUsed'], '%Y-%m-%d %H:%M:%S UTC')
+ e_used = datetime.datetime(*parts[0:6], tzinfo=UTC)
+ self.assertEqual(f_spell['LastUsed'], e_used)
+ self.assertEqual(f_spell['DiscoveredBy'],
+ e_spell['DiscoveredBy'])
+ self.assertEqual(f_spell['Properties'], e_spell['Properties'])
+
+ e_icon = base64.standard_b64decode(
+ e_spell['Icon'].encode('ascii'))
+ self.assertEqual(f_spell['Icon'], e_icon)
+
+ parts = time.strptime(expected['TeaTime'], '%H:%M:%S')
+ e_teatime = datetime.time(*parts[3:6])
+ self.assertEqual(found[5], e_teatime)
+
+ parts = time.strptime(expected['NextVacation'], '%Y-%m-%d')
+ e_nextvac = datetime.date(*parts[0:3])
+ self.assertEqual(found[6], e_nextvac)
+
+ parts = time.strptime(expected['FavoriteTime'],
+ '%Y-%m-%dT%H:%M:%S')
+ e_favtime = datetime.datetime(*parts[0:6])
+ self.assertEqual(found[7], e_favtime)
+
+ def temp_dataset(self, dataset_id):
+ dataset = retry_403(Config.CLIENT.create_dataset)(
+ Dataset(Config.CLIENT.dataset(dataset_id)))
+ self.to_delete.append(dataset)
+ return dataset
+
+
+def _job_done(instance):
+ return instance.state.lower() == 'done'
+
+
+def _dataset_exists(ds):
+ try:
+ Config.CLIENT.get_dataset(DatasetReference(ds.project, ds.dataset_id))
+ return True
+ except NotFound:
+ return False
+
+
+def _table_exists(t):
+ try:
+ tr = DatasetReference(t.project, t.dataset_id).table(t.table_id)
+ Config.CLIENT.get_table(tr)
+ return True
+ except NotFound:
+ return False
diff --git a/bigquery/tests/unit/test__helpers.py b/bigquery/tests/unit/test__helpers.py
new file mode 100644
index 0000000..15a6210
--- /dev/null
+++ b/bigquery/tests/unit/test__helpers.py
@@ -0,0 +1,903 @@
+# Copyright 2015 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import base64
+import datetime
+import unittest
+
+
+class Test_not_null(unittest.TestCase):
+
+ def _call_fut(self, value, field):
+ from google.cloud.bigquery._helpers import _not_null
+
+ return _not_null(value, field)
+
+ def test_w_none_nullable(self):
+ self.assertFalse(self._call_fut(None, _Field('NULLABLE')))
+
+ def test_w_none_required(self):
+ self.assertTrue(self._call_fut(None, _Field('REQUIRED')))
+
+ def test_w_value(self):
+ self.assertTrue(self._call_fut(object(), object()))
+
+
+class Test_int_from_json(unittest.TestCase):
+
+ def _call_fut(self, value, field):
+ from google.cloud.bigquery._helpers import _int_from_json
+
+ return _int_from_json(value, field)
+
+ def test_w_none_nullable(self):
+ self.assertIsNone(self._call_fut(None, _Field('NULLABLE')))
+
+ def test_w_none_required(self):
+ with self.assertRaises(TypeError):
+ self._call_fut(None, _Field('REQUIRED'))
+
+ def test_w_string_value(self):
+ coerced = self._call_fut('42', object())
+ self.assertEqual(coerced, 42)
+
+ def test_w_float_value(self):
+ coerced = self._call_fut(42, object())
+ self.assertEqual(coerced, 42)
+
+
+class Test_float_from_json(unittest.TestCase):
+
+ def _call_fut(self, value, field):
+ from google.cloud.bigquery._helpers import _float_from_json
+
+ return _float_from_json(value, field)
+
+ def test_w_none_nullable(self):
+ self.assertIsNone(self._call_fut(None, _Field('NULLABLE')))
+
+ def test_w_none_required(self):
+ with self.assertRaises(TypeError):
+ self._call_fut(None, _Field('REQUIRED'))
+
+ def test_w_string_value(self):
+ coerced = self._call_fut('3.1415', object())
+ self.assertEqual(coerced, 3.1415)
+
+ def test_w_float_value(self):
+ coerced = self._call_fut(3.1415, object())
+ self.assertEqual(coerced, 3.1415)
+
+
+class Test_bool_from_json(unittest.TestCase):
+
+ def _call_fut(self, value, field):
+ from google.cloud.bigquery._helpers import _bool_from_json
+
+ return _bool_from_json(value, field)
+
+ def test_w_none_nullable(self):
+ self.assertIsNone(self._call_fut(None, _Field('NULLABLE')))
+
+ def test_w_none_required(self):
+ with self.assertRaises(AttributeError):
+ self._call_fut(None, _Field('REQUIRED'))
+
+ def test_w_value_t(self):
+ coerced = self._call_fut('T', object())
+ self.assertTrue(coerced)
+
+ def test_w_value_true(self):
+ coerced = self._call_fut('True', object())
+ self.assertTrue(coerced)
+
+ def test_w_value_1(self):
+ coerced = self._call_fut('1', object())
+ self.assertTrue(coerced)
+
+ def test_w_value_other(self):
+ coerced = self._call_fut('f', object())
+ self.assertFalse(coerced)
+
+
+class Test_string_from_json(unittest.TestCase):
+
+ def _call_fut(self, value, field):
+ from google.cloud.bigquery._helpers import _string_from_json
+
+ return _string_from_json(value, field)
+
+ def test_w_none_nullable(self):
+ self.assertIsNone(self._call_fut(None, _Field('NULLABLE')))
+
+ def test_w_none_required(self):
+ self.assertIsNone(self._call_fut(None, _Field('REQUIRED')))
+
+ def test_w_string_value(self):
+ coerced = self._call_fut('Wonderful!', object())
+ self.assertEqual(coerced, 'Wonderful!')
+
+
+class Test_bytes_from_json(unittest.TestCase):
+
+ def _call_fut(self, value, field):
+ from google.cloud.bigquery._helpers import _bytes_from_json
+
+ return _bytes_from_json(value, field)
+
+ def test_w_none_nullable(self):
+ self.assertIsNone(self._call_fut(None, _Field('NULLABLE')))
+
+ def test_w_none_required(self):
+ with self.assertRaises(TypeError):
+ self._call_fut(None, _Field('REQUIRED'))
+
+ def test_w_base64_encoded_bytes(self):
+ expected = b'Wonderful!'
+ encoded = base64.standard_b64encode(expected)
+ coerced = self._call_fut(encoded, object())
+ self.assertEqual(coerced, expected)
+
+ def test_w_base64_encoded_text(self):
+ expected = b'Wonderful!'
+ encoded = base64.standard_b64encode(expected).decode('ascii')
+ coerced = self._call_fut(encoded, object())
+ self.assertEqual(coerced, expected)
+
+
+class Test_timestamp_query_param_from_json(unittest.TestCase):
+
+ def _call_fut(self, value, field):
+ from google.cloud.bigquery import _helpers
+
+ return _helpers._timestamp_query_param_from_json(value, field)
+
+ def test_w_none_nullable(self):
+ self.assertIsNone(self._call_fut(None, _Field('NULLABLE')))
+
+ def test_w_timestamp_valid(self):
+ from google.cloud._helpers import UTC
+
+ samples = [
+ (
+ '2016-12-20 15:58:27.339328+00:00',
+ datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC)
+ ),
+ (
+ '2016-12-20 15:58:27+00:00',
+ datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC)
+ ),
+ (
+ '2016-12-20T15:58:27.339328+00:00',
+ datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC)
+ ),
+ (
+ '2016-12-20T15:58:27+00:00',
+ datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC)
+ ),
+ (
+ '2016-12-20 15:58:27.339328Z',
+ datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC)
+ ),
+ (
+ '2016-12-20 15:58:27Z',
+ datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC)
+ ),
+ (
+ '2016-12-20T15:58:27.339328Z',
+ datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC)
+ ),
+ (
+ '2016-12-20T15:58:27Z',
+ datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC)
+ ),
+ ]
+ for timestamp_str, expected_result in samples:
+ self.assertEqual(
+ self._call_fut(timestamp_str, _Field('NULLABLE')),
+ expected_result)
+
+ def test_w_timestamp_invalid(self):
+ with self.assertRaises(ValueError):
+ self._call_fut('definitely-not-a-timestamp', _Field('NULLABLE'))
+
+
+class Test_timestamp_from_json(unittest.TestCase):
+
+ def _call_fut(self, value, field):
+ from google.cloud.bigquery._helpers import _timestamp_from_json
+
+ return _timestamp_from_json(value, field)
+
+ def test_w_none_nullable(self):
+ self.assertIsNone(self._call_fut(None, _Field('NULLABLE')))
+
+ def test_w_none_required(self):
+ with self.assertRaises(TypeError):
+ self._call_fut(None, _Field('REQUIRED'))
+
+ def test_w_string_value(self):
+ from google.cloud._helpers import _EPOCH
+
+ coerced = self._call_fut('1.234567', object())
+ self.assertEqual(
+ coerced,
+ _EPOCH + datetime.timedelta(seconds=1, microseconds=234567))
+
+ def test_w_float_value(self):
+ from google.cloud._helpers import _EPOCH
+
+ coerced = self._call_fut(1.234567, object())
+ self.assertEqual(
+ coerced,
+ _EPOCH + datetime.timedelta(seconds=1, microseconds=234567))
+
+
+class Test_datetime_from_json(unittest.TestCase):
+
+ def _call_fut(self, value, field):
+ from google.cloud.bigquery._helpers import _datetime_from_json
+
+ return _datetime_from_json(value, field)
+
+ def test_w_none_nullable(self):
+ self.assertIsNone(self._call_fut(None, _Field('NULLABLE')))
+
+ def test_w_none_required(self):
+ with self.assertRaises(TypeError):
+ self._call_fut(None, _Field('REQUIRED'))
+
+ def test_w_string_value(self):
+ coerced = self._call_fut('2016-12-02T18:51:33', object())
+ self.assertEqual(
+ coerced,
+ datetime.datetime(2016, 12, 2, 18, 51, 33))
+
+ def test_w_microseconds(self):
+ coerced = self._call_fut('2015-05-22T10:11:12.987654', object())
+ self.assertEqual(
+ coerced,
+ datetime.datetime(2015, 5, 22, 10, 11, 12, 987654))
+
+
+class Test_date_from_json(unittest.TestCase):
+
+ def _call_fut(self, value, field):
+ from google.cloud.bigquery._helpers import _date_from_json
+
+ return _date_from_json(value, field)
+
+ def test_w_none_nullable(self):
+ self.assertIsNone(self._call_fut(None, _Field('NULLABLE')))
+
+ def test_w_none_required(self):
+ with self.assertRaises(TypeError):
+ self._call_fut(None, _Field('REQUIRED'))
+
+ def test_w_string_value(self):
+ coerced = self._call_fut('1987-09-22', object())
+ self.assertEqual(
+ coerced,
+ datetime.date(1987, 9, 22))
+
+
+class Test_time_from_json(unittest.TestCase):
+
+ def _call_fut(self, value, field):
+ from google.cloud.bigquery._helpers import _time_from_json
+
+ return _time_from_json(value, field)
+
+ def test_w_none_nullable(self):
+ self.assertIsNone(self._call_fut(None, _Field('NULLABLE')))
+
+ def test_w_none_required(self):
+ with self.assertRaises(TypeError):
+ self._call_fut(None, _Field('REQUIRED'))
+
+ def test_w_string_value(self):
+ coerced = self._call_fut('12:12:27', object())
+ self.assertEqual(
+ coerced,
+ datetime.time(12, 12, 27))
+
+
+class Test_record_from_json(unittest.TestCase):
+
+ def _call_fut(self, value, field):
+ from google.cloud.bigquery._helpers import _record_from_json
+
+ return _record_from_json(value, field)
+
+ def test_w_none_nullable(self):
+ self.assertIsNone(self._call_fut(None, _Field('NULLABLE')))
+
+ def test_w_none_required(self):
+ with self.assertRaises(TypeError):
+ self._call_fut(None, _Field('REQUIRED'))
+
+ def test_w_nullable_subfield_none(self):
+ subfield = _Field('NULLABLE', 'age', 'INTEGER')
+ field = _Field('REQUIRED', fields=[subfield])
+ value = {'f': [{'v': None}]}
+ coerced = self._call_fut(value, field)
+ self.assertEqual(coerced, {'age': None})
+
+ def test_w_scalar_subfield(self):
+ subfield = _Field('REQUIRED', 'age', 'INTEGER')
+ field = _Field('REQUIRED', fields=[subfield])
+ value = {'f': [{'v': 42}]}
+ coerced = self._call_fut(value, field)
+ self.assertEqual(coerced, {'age': 42})
+
+ def test_w_repeated_subfield(self):
+ subfield = _Field('REPEATED', 'color', 'STRING')
+ field = _Field('REQUIRED', fields=[subfield])
+ value = {'f': [{'v': [{'v': 'red'}, {'v': 'yellow'}, {'v': 'blue'}]}]}
+ coerced = self._call_fut(value, field)
+ self.assertEqual(coerced, {'color': ['red', 'yellow', 'blue']})
+
+ def test_w_record_subfield(self):
+ full_name = _Field('REQUIRED', 'full_name', 'STRING')
+ area_code = _Field('REQUIRED', 'area_code', 'STRING')
+ local_number = _Field('REQUIRED', 'local_number', 'STRING')
+ rank = _Field('REQUIRED', 'rank', 'INTEGER')
+ phone = _Field('NULLABLE', 'phone', 'RECORD',
+ fields=[area_code, local_number, rank])
+ person = _Field('REQUIRED', 'person', 'RECORD',
+ fields=[full_name, phone])
+ value = {
+ 'f': [
+ {'v': 'Phred Phlyntstone'},
+ {'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}},
+ ],
+ }
+ expected = {
+ 'full_name': 'Phred Phlyntstone',
+ 'phone': {
+ 'area_code': '800',
+ 'local_number': '555-1212',
+ 'rank': 1,
+ }
+ }
+ coerced = self._call_fut(value, person)
+ self.assertEqual(coerced, expected)
+
+
+class Test_row_tuple_from_json(unittest.TestCase):
+
+ def _call_fut(self, row, schema):
+ from google.cloud.bigquery._helpers import _row_tuple_from_json
+
+ return _row_tuple_from_json(row, schema)
+
+ def test_w_single_scalar_column(self):
+ # SELECT 1 AS col
+ col = _Field('REQUIRED', 'col', 'INTEGER')
+ row = {u'f': [{u'v': u'1'}]}
+ self.assertEqual(self._call_fut(row, schema=[col]), (1,))
+
+ def test_w_single_struct_column(self):
+ # SELECT (1, 2) AS col
+ sub_1 = _Field('REQUIRED', 'sub_1', 'INTEGER')
+ sub_2 = _Field('REQUIRED', 'sub_2', 'INTEGER')
+ col = _Field('REQUIRED', 'col', 'RECORD', fields=[sub_1, sub_2])
+ row = {u'f': [{u'v': {u'f': [{u'v': u'1'}, {u'v': u'2'}]}}]}
+ self.assertEqual(self._call_fut(row, schema=[col]),
+ ({'sub_1': 1, 'sub_2': 2},))
+
+ def test_w_single_array_column(self):
+ # SELECT [1, 2, 3] as col
+ col = _Field('REPEATED', 'col', 'INTEGER')
+ row = {u'f': [{u'v': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]}]}
+ self.assertEqual(self._call_fut(row, schema=[col]),
+ ([1, 2, 3],))
+
+ def test_w_struct_w_nested_array_column(self):
+ # SELECT ([1, 2], 3, [4, 5]) as col
+ first = _Field('REPEATED', 'first', 'INTEGER')
+ second = _Field('REQUIRED', 'second', 'INTEGER')
+ third = _Field('REPEATED', 'third', 'INTEGER')
+ col = _Field('REQUIRED', 'col', 'RECORD',
+ fields=[first, second, third])
+ row = {
+ u'f': [
+ {u'v': {
+ u'f': [
+ {u'v': [{u'v': u'1'}, {u'v': u'2'}]},
+ {u'v': u'3'},
+ {u'v': [{u'v': u'4'}, {u'v': u'5'}]}
+ ]
+ }},
+ ]
+ }
+ self.assertEqual(
+ self._call_fut(row, schema=[col]),
+ ({u'first': [1, 2], u'second': 3, u'third': [4, 5]},))
+
+ def test_w_array_of_struct(self):
+ # SELECT [(1, 2, 3), (4, 5, 6)] as col
+ first = _Field('REQUIRED', 'first', 'INTEGER')
+ second = _Field('REQUIRED', 'second', 'INTEGER')
+ third = _Field('REQUIRED', 'third', 'INTEGER')
+ col = _Field('REPEATED', 'col', 'RECORD',
+ fields=[first, second, third])
+ row = {u'f': [{u'v': [
+ {u'v': {u'f': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]}},
+ {u'v': {u'f': [{u'v': u'4'}, {u'v': u'5'}, {u'v': u'6'}]}},
+ ]}]}
+ self.assertEqual(
+ self._call_fut(row, schema=[col]),
+ ([
+ {u'first': 1, u'second': 2, u'third': 3},
+ {u'first': 4, u'second': 5, u'third': 6},
+ ],))
+
+ def test_w_array_of_struct_w_array(self):
+ # SELECT [([1, 2, 3], 4), ([5, 6], 7)]
+ first = _Field('REPEATED', 'first', 'INTEGER')
+ second = _Field('REQUIRED', 'second', 'INTEGER')
+ col = _Field('REPEATED', 'col', 'RECORD', fields=[first, second])
+ row = {u'f': [{u'v': [
+ {u'v': {u'f': [
+ {u'v': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]},
+ {u'v': u'4'}
+ ]}},
+ {u'v': {u'f': [
+ {u'v': [{u'v': u'5'}, {u'v': u'6'}]},
+ {u'v': u'7'}
+ ]}}
+ ]}]}
+ self.assertEqual(
+ self._call_fut(row, schema=[col]),
+ ([
+ {u'first': [1, 2, 3], u'second': 4},
+ {u'first': [5, 6], u'second': 7},
+ ],))
+
+ def test_row(self):
+ from google.cloud.bigquery._helpers import Row
+
+ VALUES = (1, 2, 3)
+ r = Row(VALUES, {'a': 0, 'b': 1, 'c': 2})
+ self.assertEqual(r.a, 1)
+ self.assertEqual(r[1], 2)
+ self.assertEqual(r['c'], 3)
+ self.assertEqual(len(r), 3)
+ self.assertEqual(r.values(), VALUES)
+ self.assertEqual(repr(r),
+ "Row((1, 2, 3), {'a': 0, 'b': 1, 'c': 2})")
+ self.assertFalse(r != r)
+ self.assertFalse(r == 3)
+ with self.assertRaises(AttributeError):
+ r.z
+ with self.assertRaises(KeyError):
+ r['z']
+
+
+class Test_rows_from_json(unittest.TestCase):
+
+ def _call_fut(self, rows, schema):
+ from google.cloud.bigquery._helpers import _rows_from_json
+
+ return _rows_from_json(rows, schema)
+
+ def test_w_record_subfield(self):
+ from google.cloud.bigquery._helpers import Row
+
+ full_name = _Field('REQUIRED', 'full_name', 'STRING')
+ area_code = _Field('REQUIRED', 'area_code', 'STRING')
+ local_number = _Field('REQUIRED', 'local_number', 'STRING')
+ rank = _Field('REQUIRED', 'rank', 'INTEGER')
+ phone = _Field('NULLABLE', 'phone', 'RECORD',
+ fields=[area_code, local_number, rank])
+ color = _Field('REPEATED', 'color', 'STRING')
+ schema = [full_name, phone, color]
+ rows = [
+ {'f': [
+ {'v': 'Phred Phlyntstone'},
+ {'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}},
+ {'v': [{'v': 'orange'}, {'v': 'black'}]},
+ ]},
+ {'f': [
+ {'v': 'Bharney Rhubble'},
+ {'v': {'f': [{'v': '877'}, {'v': '768-5309'}, {'v': 2}]}},
+ {'v': [{'v': 'brown'}]},
+ ]},
+ {'f': [
+ {'v': 'Wylma Phlyntstone'},
+ {'v': None},
+ {'v': []},
+ ]},
+ ]
+ phred_phone = {
+ 'area_code': '800',
+ 'local_number': '555-1212',
+ 'rank': 1,
+ }
+ bharney_phone = {
+ 'area_code': '877',
+ 'local_number': '768-5309',
+ 'rank': 2,
+ }
+ f2i = {'full_name': 0, 'phone': 1, 'color': 2}
+ expected = [
+ Row(('Phred Phlyntstone', phred_phone, ['orange', 'black']), f2i),
+ Row(('Bharney Rhubble', bharney_phone, ['brown']), f2i),
+ Row(('Wylma Phlyntstone', None, []), f2i),
+ ]
+ coerced = self._call_fut(rows, schema)
+ self.assertEqual(coerced, expected)
+
+ def test_w_int64_float64_bool(self):
+ from google.cloud.bigquery._helpers import Row
+
+ # "Standard" SQL dialect uses 'INT64', 'FLOAT64', 'BOOL'.
+ candidate = _Field('REQUIRED', 'candidate', 'STRING')
+ votes = _Field('REQUIRED', 'votes', 'INT64')
+ percentage = _Field('REQUIRED', 'percentage', 'FLOAT64')
+ incumbent = _Field('REQUIRED', 'incumbent', 'BOOL')
+ schema = [candidate, votes, percentage, incumbent]
+ rows = [
+ {'f': [
+ {'v': 'Phred Phlyntstone'},
+ {'v': 8},
+ {'v': 0.25},
+ {'v': 'true'},
+ ]},
+ {'f': [
+ {'v': 'Bharney Rhubble'},
+ {'v': 4},
+ {'v': 0.125},
+ {'v': 'false'},
+ ]},
+ {'f': [
+ {'v': 'Wylma Phlyntstone'},
+ {'v': 20},
+ {'v': 0.625},
+ {'v': 'false'},
+ ]},
+ ]
+ f2i = {'candidate': 0, 'votes': 1, 'percentage': 2, 'incumbent': 3}
+ expected = [
+ Row(('Phred Phlyntstone', 8, 0.25, True), f2i),
+ Row(('Bharney Rhubble', 4, 0.125, False), f2i),
+ Row(('Wylma Phlyntstone', 20, 0.625, False), f2i),
+ ]
+ coerced = self._call_fut(rows, schema)
+ self.assertEqual(coerced, expected)
+
+
+class Test_int_to_json(unittest.TestCase):
+
+ def _call_fut(self, value):
+ from google.cloud.bigquery._helpers import _int_to_json
+
+ return _int_to_json(value)
+
+ def test_w_int(self):
+ self.assertEqual(self._call_fut(123), '123')
+
+ def test_w_string(self):
+ self.assertEqual(self._call_fut('123'), '123')
+
+
+class Test_float_to_json(unittest.TestCase):
+
+ def _call_fut(self, value):
+ from google.cloud.bigquery._helpers import _float_to_json
+
+ return _float_to_json(value)
+
+ def test_w_float(self):
+ self.assertEqual(self._call_fut(1.23), 1.23)
+
+
+class Test_bool_to_json(unittest.TestCase):
+
+ def _call_fut(self, value):
+ from google.cloud.bigquery._helpers import _bool_to_json
+
+ return _bool_to_json(value)
+
+ def test_w_true(self):
+ self.assertEqual(self._call_fut(True), 'true')
+
+ def test_w_false(self):
+ self.assertEqual(self._call_fut(False), 'false')
+
+ def test_w_string(self):
+ self.assertEqual(self._call_fut('false'), 'false')
+
+
+class Test_bytes_to_json(unittest.TestCase):
+
+ def _call_fut(self, value):
+ from google.cloud.bigquery._helpers import _bytes_to_json
+
+ return _bytes_to_json(value)
+
+ def test_w_non_bytes(self):
+ non_bytes = object()
+ self.assertIs(self._call_fut(non_bytes), non_bytes)
+
+ def test_w_bytes(self):
+ source = b'source'
+ expected = u'c291cmNl'
+ converted = self._call_fut(source)
+ self.assertEqual(converted, expected)
+
+
+class Test_timestamp_to_json_parameter(unittest.TestCase):
+
+ def _call_fut(self, value):
+ from google.cloud.bigquery._helpers import _timestamp_to_json_parameter
+
+ return _timestamp_to_json_parameter(value)
+
+ def test_w_float(self):
+ self.assertEqual(self._call_fut(1.234567), 1.234567)
+
+ def test_w_string(self):
+ ZULU = '2016-12-20 15:58:27.339328+00:00'
+ self.assertEqual(self._call_fut(ZULU), ZULU)
+
+ def test_w_datetime_wo_zone(self):
+ ZULU = '2016-12-20 15:58:27.339328+00:00'
+ when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328)
+ self.assertEqual(self._call_fut(when), ZULU)
+
+ def test_w_datetime_w_non_utc_zone(self):
+ class _Zone(datetime.tzinfo):
+
+ def utcoffset(self, _):
+ return datetime.timedelta(minutes=-240)
+
+ ZULU = '2016-12-20 19:58:27.339328+00:00'
+ when = datetime.datetime(
+ 2016, 12, 20, 15, 58, 27, 339328, tzinfo=_Zone())
+ self.assertEqual(self._call_fut(when), ZULU)
+
+ def test_w_datetime_w_utc_zone(self):
+ from google.cloud._helpers import UTC
+
+ ZULU = '2016-12-20 15:58:27.339328+00:00'
+ when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC)
+ self.assertEqual(self._call_fut(when), ZULU)
+
+
+class Test_timestamp_to_json_row(unittest.TestCase):
+
+ def _call_fut(self, value):
+ from google.cloud.bigquery._helpers import _timestamp_to_json_row
+
+ return _timestamp_to_json_row(value)
+
+ def test_w_float(self):
+ self.assertEqual(self._call_fut(1.234567), 1.234567)
+
+ def test_w_string(self):
+ ZULU = '2016-12-20 15:58:27.339328+00:00'
+ self.assertEqual(self._call_fut(ZULU), ZULU)
+
+ def test_w_datetime(self):
+ from google.cloud._helpers import _microseconds_from_datetime
+
+ when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328)
+ self.assertEqual(
+ self._call_fut(when), _microseconds_from_datetime(when) / 1e6)
+
+
+class Test_datetime_to_json(unittest.TestCase):
+
+ def _call_fut(self, value):
+ from google.cloud.bigquery._helpers import _datetime_to_json
+
+ return _datetime_to_json(value)
+
+ def test_w_string(self):
+ RFC3339 = '2016-12-03T14:14:51Z'
+ self.assertEqual(self._call_fut(RFC3339), RFC3339)
+
+ def test_w_datetime(self):
+ from google.cloud._helpers import UTC
+
+ when = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456, tzinfo=UTC)
+ self.assertEqual(self._call_fut(when), '2016-12-03T14:11:27.123456')
+
+
+class Test_date_to_json(unittest.TestCase):
+
+ def _call_fut(self, value):
+ from google.cloud.bigquery._helpers import _date_to_json
+
+ return _date_to_json(value)
+
+ def test_w_string(self):
+ RFC3339 = '2016-12-03'
+ self.assertEqual(self._call_fut(RFC3339), RFC3339)
+
+ def test_w_datetime(self):
+ when = datetime.date(2016, 12, 3)
+ self.assertEqual(self._call_fut(when), '2016-12-03')
+
+
+class Test_time_to_json(unittest.TestCase):
+
+ def _call_fut(self, value):
+ from google.cloud.bigquery._helpers import _time_to_json
+
+ return _time_to_json(value)
+
+ def test_w_string(self):
+ RFC3339 = '12:13:41'
+ self.assertEqual(self._call_fut(RFC3339), RFC3339)
+
+ def test_w_datetime(self):
+ when = datetime.time(12, 13, 41)
+ self.assertEqual(self._call_fut(when), '12:13:41')
+
+
+class Test_snake_to_camel_case(unittest.TestCase):
+
+ def _call_fut(self, value):
+ from google.cloud.bigquery._helpers import _snake_to_camel_case
+
+ return _snake_to_camel_case(value)
+
+ def test_w_snake_case_string(self):
+ self.assertEqual(self._call_fut('friendly_name'), 'friendlyName')
+
+ def test_w_camel_case_string(self):
+ self.assertEqual(self._call_fut('friendlyName'), 'friendlyName')
+
+
+class Test_TypedApiResourceProperty(unittest.TestCase):
+
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery._helpers import _TypedApiResourceProperty
+
+ return _TypedApiResourceProperty
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def test_it(self):
+
+ class Wrapper(object):
+ attr = self._make_one('attr', 'back', int)
+
+ def __init__(self):
+ self._properties = {}
+
+ self.assertIsNotNone(Wrapper.attr)
+
+ wrapper = Wrapper()
+ with self.assertRaises(ValueError):
+ wrapper.attr = 'BOGUS'
+
+ wrapper.attr = 42
+ self.assertEqual(wrapper.attr, 42)
+ self.assertEqual(wrapper._properties['back'], 42)
+
+ wrapper.attr = None
+ self.assertIsNone(wrapper.attr)
+ self.assertIsNone(wrapper._properties['back'])
+
+ wrapper.attr = 23
+ self.assertEqual(wrapper.attr, 23)
+ self.assertEqual(wrapper._properties['back'], 23)
+
+ del wrapper.attr
+ self.assertIsNone(wrapper.attr)
+ with self.assertRaises(KeyError):
+ wrapper._properties['back']
+
+
+class Test_ListApiResourceProperty(unittest.TestCase):
+
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery._helpers import _ListApiResourceProperty
+
+ return _ListApiResourceProperty
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def _descriptor_and_klass(self):
+ from google.cloud.bigquery.query import _AbstractQueryParameter
+
+ descriptor = self._make_one(
+ 'query_parameters', 'queryParameters', _AbstractQueryParameter)
+
+ class _Test(object):
+ def __init__(self):
+ self._properties = {}
+
+ query_parameters = descriptor
+
+ return descriptor, _Test
+
+ def test_class_getter(self):
+ descriptor, klass = self._descriptor_and_klass()
+ self.assertIs(klass.query_parameters, descriptor)
+
+ def test_instance_getter_empty(self):
+ _, klass = self._descriptor_and_klass()
+ instance = klass()
+ self.assertEqual(instance.query_parameters, [])
+
+ def test_instance_getter_w_non_empty_list(self):
+ from google.cloud.bigquery.query import ScalarQueryParameter
+
+ query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)]
+ _, klass = self._descriptor_and_klass()
+ instance = klass()
+ instance._properties['queryParameters'] = query_parameters
+
+ self.assertEqual(instance.query_parameters, query_parameters)
+
+ def test_instance_setter_w_empty_list(self):
+ from google.cloud.bigquery.query import ScalarQueryParameter
+
+ query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)]
+ _, klass = self._descriptor_and_klass()
+ instance = klass()
+ instance._query_parameters = query_parameters
+
+ instance.query_parameters = []
+
+ self.assertEqual(instance.query_parameters, [])
+
+ def test_instance_setter_w_none(self):
+ from google.cloud.bigquery.query import ScalarQueryParameter
+
+ query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)]
+ _, klass = self._descriptor_and_klass()
+ instance = klass()
+ instance._query_parameters = query_parameters
+
+ with self.assertRaises(ValueError):
+ instance.query_parameters = None
+
+ def test_instance_setter_w_valid_udf(self):
+ from google.cloud.bigquery.query import ScalarQueryParameter
+
+ query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)]
+ _, klass = self._descriptor_and_klass()
+ instance = klass()
+
+ instance.query_parameters = query_parameters
+
+ self.assertEqual(instance.query_parameters, query_parameters)
+
+ def test_instance_setter_w_bad_udfs(self):
+ _, klass = self._descriptor_and_klass()
+ instance = klass()
+
+ with self.assertRaises(ValueError):
+ instance.query_parameters = ["foo"]
+
+ self.assertEqual(instance.query_parameters, [])
+
+
+class _Field(object):
+
+ def __init__(self, mode, name='unknown', field_type='UNKNOWN', fields=()):
+ self.mode = mode
+ self.name = name
+ self.field_type = field_type
+ self.fields = fields
diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py
new file mode 100644
index 0000000..c71847e
--- /dev/null
+++ b/bigquery/tests/unit/test_client.py
@@ -0,0 +1,3236 @@
+# Copyright 2015 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+import email
+import io
+import json
+import unittest
+
+import mock
+import six
+from six.moves import http_client
+import pytest
+
+from google.cloud.bigquery.dataset import DatasetReference
+
+
+def _make_credentials():
+ import google.auth.credentials
+
+ return mock.Mock(spec=google.auth.credentials.Credentials)
+
+
+class TestClient(unittest.TestCase):
+
+ PROJECT = 'PROJECT'
+ DS_ID = 'DATASET_ID'
+ TABLE_ID = 'TABLE_ID'
+ TABLE_REF = DatasetReference(PROJECT, DS_ID).table(TABLE_ID)
+
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.client import Client
+
+ return Client
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def test_ctor(self):
+ from google.cloud.bigquery._http import Connection
+
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ self.assertIsInstance(client._connection, Connection)
+ self.assertIs(client._connection.credentials, creds)
+ self.assertIs(client._connection.http, http)
+
+ def test__get_query_results_miss_w_explicit_project_and_timeout(self):
+ from google.cloud.exceptions import NotFound
+
+ creds = _make_credentials()
+ client = self._make_one(self.PROJECT, creds)
+ conn = client._connection = _Connection()
+
+ with self.assertRaises(NotFound):
+ client._get_query_results(
+ 'nothere', None, project='other-project', timeout_ms=500)
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(
+ req['path'], '/projects/other-project/queries/nothere')
+ self.assertEqual(
+ req['query_params'], {'maxResults': 0, 'timeoutMs': 500})
+
+ def test__get_query_results_hit(self):
+ job_id = 'query_job'
+ data = {
+ 'kind': 'bigquery#getQueryResultsResponse',
+ 'etag': 'some-tag',
+ 'schema': {
+ 'fields': [
+ {
+ 'name': 'title',
+ 'type': 'STRING',
+ 'mode': 'NULLABLE'
+ },
+ {
+ 'name': 'unique_words',
+ 'type': 'INTEGER',
+ 'mode': 'NULLABLE'
+ }
+ ]
+ },
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': job_id,
+ },
+ 'totalRows': '10',
+ 'totalBytesProcessed': '2464625',
+ 'jobComplete': True,
+ 'cacheHit': False,
+ }
+
+ creds = _make_credentials()
+ client = self._make_one(self.PROJECT, creds)
+ client._connection = _Connection(data)
+ query_results = client._get_query_results(job_id, None)
+
+ self.assertEqual(query_results.total_rows, 10)
+ self.assertTrue(query_results.complete)
+
+ def test_list_projects_defaults(self):
+ from google.cloud.bigquery.client import Project
+
+ PROJECT_1 = 'PROJECT_ONE'
+ PROJECT_2 = 'PROJECT_TWO'
+ PATH = 'projects'
+ TOKEN = 'TOKEN'
+ DATA = {
+ 'nextPageToken': TOKEN,
+ 'projects': [
+ {'kind': 'bigquery#project',
+ 'id': PROJECT_1,
+ 'numericId': 1,
+ 'projectReference': {'projectId': PROJECT_1},
+ 'friendlyName': 'One'},
+ {'kind': 'bigquery#project',
+ 'id': PROJECT_2,
+ 'numericId': 2,
+ 'projectReference': {'projectId': PROJECT_2},
+ 'friendlyName': 'Two'},
+ ]
+ }
+ creds = _make_credentials()
+ client = self._make_one(PROJECT_1, creds)
+ conn = client._connection = _Connection(DATA)
+
+ iterator = client.list_projects()
+ page = six.next(iterator.pages)
+ projects = list(page)
+ token = iterator.next_page_token
+
+ self.assertEqual(len(projects), len(DATA['projects']))
+ for found, expected in zip(projects, DATA['projects']):
+ self.assertIsInstance(found, Project)
+ self.assertEqual(found.project_id, expected['id'])
+ self.assertEqual(found.numeric_id, expected['numericId'])
+ self.assertEqual(found.friendly_name, expected['friendlyName'])
+ self.assertEqual(token, TOKEN)
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], '/%s' % PATH)
+
+ def test_list_projects_explicit_response_missing_projects_key(self):
+ PATH = 'projects'
+ TOKEN = 'TOKEN'
+ DATA = {}
+ creds = _make_credentials()
+ client = self._make_one(self.PROJECT, creds)
+ conn = client._connection = _Connection(DATA)
+
+ iterator = client.list_projects(max_results=3, page_token=TOKEN)
+ page = six.next(iterator.pages)
+ projects = list(page)
+ token = iterator.next_page_token
+
+ self.assertEqual(len(projects), 0)
+ self.assertIsNone(token)
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], '/%s' % PATH)
+ self.assertEqual(req['query_params'],
+ {'maxResults': 3, 'pageToken': TOKEN})
+
+ def test_list_datasets_defaults(self):
+ from google.cloud.bigquery.dataset import Dataset
+
+ DATASET_1 = 'dataset_one'
+ DATASET_2 = 'dataset_two'
+ PATH = 'projects/%s/datasets' % self.PROJECT
+ TOKEN = 'TOKEN'
+ DATA = {
+ 'nextPageToken': TOKEN,
+ 'datasets': [
+ {'kind': 'bigquery#dataset',
+ 'id': '%s:%s' % (self.PROJECT, DATASET_1),
+ 'datasetReference': {'datasetId': DATASET_1,
+ 'projectId': self.PROJECT},
+ 'friendlyName': None},
+ {'kind': 'bigquery#dataset',
+ 'id': '%s:%s' % (self.PROJECT, DATASET_2),
+ 'datasetReference': {'datasetId': DATASET_2,
+ 'projectId': self.PROJECT},
+ 'friendlyName': 'Two'},
+ ]
+ }
+ creds = _make_credentials()
+ client = self._make_one(self.PROJECT, creds)
+ conn = client._connection = _Connection(DATA)
+
+ iterator = client.list_datasets()
+ page = six.next(iterator.pages)
+ datasets = list(page)
+ token = iterator.next_page_token
+
+ self.assertEqual(len(datasets), len(DATA['datasets']))
+ for found, expected in zip(datasets, DATA['datasets']):
+ self.assertIsInstance(found, Dataset)
+ self.assertEqual(found.full_dataset_id, expected['id'])
+ self.assertEqual(found.friendly_name, expected['friendlyName'])
+ self.assertEqual(token, TOKEN)
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], '/%s' % PATH)
+
+ def test_list_datasets_explicit_response_missing_datasets_key(self):
+ PATH = 'projects/%s/datasets' % self.PROJECT
+ TOKEN = 'TOKEN'
+ FILTER = 'FILTER'
+ DATA = {}
+ creds = _make_credentials()
+ client = self._make_one(self.PROJECT, creds)
+ conn = client._connection = _Connection(DATA)
+
+ iterator = client.list_datasets(
+ include_all=True, filter=FILTER,
+ max_results=3, page_token=TOKEN)
+ page = six.next(iterator.pages)
+ datasets = list(page)
+ token = iterator.next_page_token
+
+ self.assertEqual(len(datasets), 0)
+ self.assertIsNone(token)
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], '/%s' % PATH)
+ self.assertEqual(req['query_params'],
+ {'all': True, 'filter': FILTER,
+ 'maxResults': 3, 'pageToken': TOKEN})
+
+ def test_dataset_with_specified_project(self):
+ from google.cloud.bigquery.dataset import DatasetReference
+
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ dataset = client.dataset(self.DS_ID, self.PROJECT)
+ self.assertIsInstance(dataset, DatasetReference)
+ self.assertEqual(dataset.dataset_id, self.DS_ID)
+ self.assertEqual(dataset.project, self.PROJECT)
+
+ def test_dataset_with_default_project(self):
+ from google.cloud.bigquery.dataset import DatasetReference
+
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ dataset = client.dataset(self.DS_ID)
+ self.assertIsInstance(dataset, DatasetReference)
+ self.assertEqual(dataset.dataset_id, self.DS_ID)
+ self.assertEqual(dataset.project, self.PROJECT)
+
+ def test_get_dataset(self):
+ from google.cloud.exceptions import ServerError
+
+ path = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID)
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ resource = {
+ 'id': '%s:%s' % (self.PROJECT, self.DS_ID),
+ 'datasetReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ },
+ }
+ conn = client._connection = _Connection(resource)
+ dataset_ref = client.dataset(self.DS_ID)
+
+ dataset = client.get_dataset(dataset_ref)
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], '/%s' % path)
+ self.assertEqual(dataset.dataset_id, self.DS_ID)
+
+ # Test retry.
+
+ # Not a cloud API exception (missing 'errors' field).
+ client._connection = _Connection(Exception(''), resource)
+ with self.assertRaises(Exception):
+ client.get_dataset(dataset_ref)
+
+ # Zero-length errors field.
+ client._connection = _Connection(ServerError(''), resource)
+ with self.assertRaises(ServerError):
+ client.get_dataset(dataset_ref)
+
+ # Non-retryable reason.
+ client._connection = _Connection(
+ ServerError('', errors=[{'reason': 'serious'}]),
+ resource)
+ with self.assertRaises(ServerError):
+ client.get_dataset(dataset_ref)
+
+ # Retryable reason, but retry is disabled.
+ client._connection = _Connection(
+ ServerError('', errors=[{'reason': 'backendError'}]),
+ resource)
+ with self.assertRaises(ServerError):
+ client.get_dataset(dataset_ref, retry=None)
+
+ # Retryable reason, default retry: success.
+ client._connection = _Connection(
+ ServerError('', errors=[{'reason': 'backendError'}]),
+ resource)
+ dataset = client.get_dataset(dataset_ref)
+ self.assertEqual(dataset.dataset_id, self.DS_ID)
+
+ def test_create_dataset_minimal(self):
+ from google.cloud.bigquery.dataset import Dataset
+
+ PATH = 'projects/%s/datasets' % self.PROJECT
+ RESOURCE = {
+ 'datasetReference':
+ {'projectId': self.PROJECT, 'datasetId': self.DS_ID},
+ 'etag': "etag",
+ 'id': "%s:%s" % (self.PROJECT, self.DS_ID),
+ }
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ conn = client._connection = _Connection(RESOURCE)
+ ds = client.create_dataset(Dataset(client.dataset(self.DS_ID)))
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/%s' % PATH)
+ SENT = {
+ 'datasetReference':
+ {'projectId': self.PROJECT, 'datasetId': self.DS_ID},
+ 'labels': {},
+ }
+ self.assertEqual(req['data'], SENT)
+ self.assertEqual(ds.dataset_id, self.DS_ID)
+ self.assertEqual(ds.project, self.PROJECT)
+ self.assertEqual(ds.etag, RESOURCE['etag'])
+ self.assertEqual(ds.full_dataset_id, RESOURCE['id'])
+
+ def test_create_dataset_w_attrs(self):
+ from google.cloud.bigquery.dataset import Dataset, AccessEntry
+
+ PATH = 'projects/%s/datasets' % self.PROJECT
+ DESCRIPTION = 'DESC'
+ FRIENDLY_NAME = 'FN'
+ LOCATION = 'US'
+ USER_EMAIL = 'phred@example.com'
+ LABELS = {'color': 'red'}
+ VIEW = {
+ 'projectId': 'my-proj',
+ 'datasetId': 'starry-skies',
+ 'tableId': 'northern-hemisphere',
+ }
+ RESOURCE = {
+ 'datasetReference':
+ {'projectId': self.PROJECT, 'datasetId': self.DS_ID},
+ 'etag': "etag",
+ 'id': "%s:%s" % (self.PROJECT, self.DS_ID),
+ 'description': DESCRIPTION,
+ 'friendlyName': FRIENDLY_NAME,
+ 'location': LOCATION,
+ 'defaultTableExpirationMs': 3600,
+ 'labels': LABELS,
+ 'access': [
+ {'role': 'OWNER', 'userByEmail': USER_EMAIL},
+ {'view': VIEW}],
+ }
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ conn = client._connection = _Connection(RESOURCE)
+ entries = [AccessEntry('OWNER', 'userByEmail', USER_EMAIL),
+ AccessEntry(None, 'view', VIEW)]
+ ds_arg = Dataset(client.dataset(self.DS_ID))
+ ds_arg.access_entries = entries
+ ds_arg.description = DESCRIPTION
+ ds_arg.friendly_name = FRIENDLY_NAME
+ ds_arg.default_table_expiration_ms = 3600
+ ds_arg.location = LOCATION
+ ds_arg.labels = LABELS
+ ds = client.create_dataset(ds_arg)
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/%s' % PATH)
+ SENT = {
+ 'datasetReference':
+ {'projectId': self.PROJECT, 'datasetId': self.DS_ID},
+ 'description': DESCRIPTION,
+ 'friendlyName': FRIENDLY_NAME,
+ 'location': LOCATION,
+ 'defaultTableExpirationMs': 3600,
+ 'access': [
+ {'role': 'OWNER', 'userByEmail': USER_EMAIL},
+ {'view': VIEW}],
+ 'labels': LABELS,
+ }
+ self.assertEqual(req['data'], SENT)
+ self.assertEqual(ds.dataset_id, self.DS_ID)
+ self.assertEqual(ds.project, self.PROJECT)
+ self.assertEqual(ds.etag, RESOURCE['etag'])
+ self.assertEqual(ds.full_dataset_id, RESOURCE['id'])
+ self.assertEqual(ds.description, DESCRIPTION)
+ self.assertEqual(ds.friendly_name, FRIENDLY_NAME)
+ self.assertEqual(ds.location, LOCATION)
+ self.assertEqual(ds.default_table_expiration_ms, 3600)
+ self.assertEqual(ds.labels, LABELS)
+
+ def test_create_table_w_day_partition(self):
+ from google.cloud.bigquery.table import Table
+
+ path = 'projects/%s/datasets/%s/tables' % (
+ self.PROJECT, self.DS_ID)
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ resource = {
+ 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
+ 'tableReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID
+ },
+ }
+ conn = client._connection = _Connection(resource)
+ table = Table(self.TABLE_REF)
+ table.partitioning_type = 'DAY'
+
+ got = client.create_table(table)
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/%s' % path)
+ sent = {
+ 'tableReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID
+ },
+ 'timePartitioning': {'type': 'DAY'},
+ 'labels': {},
+ }
+ self.assertEqual(req['data'], sent)
+ self.assertEqual(table.partitioning_type, "DAY")
+ self.assertEqual(got.table_id, self.TABLE_ID)
+
+ def test_create_table_w_day_partition_and_expire(self):
+ from google.cloud.bigquery.table import Table
+
+ path = 'projects/%s/datasets/%s/tables' % (
+ self.PROJECT, self.DS_ID)
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ resource = {
+ 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
+ 'tableReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID
+ },
+ }
+ conn = client._connection = _Connection(resource)
+ table = Table(self.TABLE_REF)
+ table.partitioning_type = 'DAY'
+ table.partition_expiration = 100
+
+ got = client.create_table(table)
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/%s' % path)
+ sent = {
+ 'tableReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID
+ },
+ 'timePartitioning': {'type': 'DAY', 'expirationMs': 100},
+ 'labels': {},
+ }
+ self.assertEqual(req['data'], sent)
+ self.assertEqual(table.partitioning_type, "DAY")
+ self.assertEqual(table.partition_expiration, 100)
+ self.assertEqual(got.table_id, self.TABLE_ID)
+
+ def test_create_table_w_schema_and_query(self):
+ from google.cloud.bigquery.table import Table, SchemaField
+
+ path = 'projects/%s/datasets/%s/tables' % (
+ self.PROJECT, self.DS_ID)
+ query = 'SELECT * from %s:%s' % (self.DS_ID, self.TABLE_ID)
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ resource = {
+ 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
+ 'tableReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID
+ },
+ 'schema': {'fields': [
+ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'},
+ {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]
+ },
+ 'view': {'query': query},
+ }
+ schema = [
+ SchemaField('full_name', 'STRING', mode='REQUIRED'),
+ SchemaField('age', 'INTEGER', mode='REQUIRED')
+ ]
+ conn = client._connection = _Connection(resource)
+ table = Table(self.TABLE_REF, schema=schema)
+ table.view_query = query
+
+ got = client.create_table(table)
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/%s' % path)
+ sent = {
+ 'tableReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID,
+ },
+ 'schema': {'fields': [
+ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'},
+ {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]
+ },
+ 'view': {'query': query, 'useLegacySql': False},
+ 'labels': {},
+ }
+ self.assertEqual(req['data'], sent)
+ self.assertEqual(got.table_id, self.TABLE_ID)
+ self.assertEqual(got.project, self.PROJECT)
+ self.assertEqual(got.dataset_id, self.DS_ID)
+ self.assertEqual(got.schema, schema)
+ self.assertEqual(got.view_query, query)
+
+ def test_create_table_w_external(self):
+ from google.cloud.bigquery.table import Table
+ from google.cloud.bigquery.external_config import ExternalConfig
+
+ path = 'projects/%s/datasets/%s/tables' % (
+ self.PROJECT, self.DS_ID)
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ resource = {
+ 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
+ 'tableReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID
+ },
+ 'externalDataConfiguration': {
+ 'sourceFormat': 'CSV',
+ 'autodetect': True,
+ },
+ }
+ conn = client._connection = _Connection(resource)
+ table = Table(self.TABLE_REF)
+ ec = ExternalConfig('CSV')
+ ec.autodetect = True
+ table.external_data_configuration = ec
+
+ got = client.create_table(table)
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/%s' % path)
+ sent = {
+ 'tableReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID,
+ },
+ 'externalDataConfiguration': {
+ 'sourceFormat': 'CSV',
+ 'autodetect': True,
+ },
+ 'labels': {},
+ }
+ self.assertEqual(req['data'], sent)
+ self.assertEqual(got.table_id, self.TABLE_ID)
+ self.assertEqual(got.project, self.PROJECT)
+ self.assertEqual(got.dataset_id, self.DS_ID)
+ self.assertEqual(got.external_data_configuration.source_format, 'CSV')
+ self.assertEqual(got.external_data_configuration.autodetect, True)
+
+ def test_get_table(self):
+ path = 'projects/%s/datasets/%s/tables/%s' % (
+ self.PROJECT, self.DS_ID, self.TABLE_ID)
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ resource = {
+ 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
+ 'tableReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID,
+ },
+ }
+ conn = client._connection = _Connection(resource)
+ table = client.get_table(self.TABLE_REF)
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], '/%s' % path)
+ self.assertEqual(table.table_id, self.TABLE_ID)
+
+ def test_update_dataset_w_invalid_field(self):
+ from google.cloud.bigquery.dataset import Dataset
+
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ with self.assertRaises(ValueError):
+ client.update_dataset(Dataset(client.dataset(self.DS_ID)), ["foo"])
+
+ def test_update_dataset(self):
+ from google.cloud.bigquery.dataset import Dataset, AccessEntry
+
+ PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID)
+ DESCRIPTION = 'DESCRIPTION'
+ FRIENDLY_NAME = 'TITLE'
+ LOCATION = 'loc'
+ LABELS = {'priority': 'high'}
+ ACCESS = [
+ {'role': 'OWNER', 'userByEmail': 'phred@example.com'},
+ ]
+ EXP = 17
+ RESOURCE = {
+ 'datasetReference':
+ {'projectId': self.PROJECT, 'datasetId': self.DS_ID},
+ 'etag': "etag",
+ 'description': DESCRIPTION,
+ 'friendlyName': FRIENDLY_NAME,
+ 'location': LOCATION,
+ 'defaultTableExpirationMs': EXP,
+ 'labels': LABELS,
+ 'access': ACCESS,
+ }
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ conn = client._connection = _Connection(RESOURCE, RESOURCE)
+ ds = Dataset(client.dataset(self.DS_ID))
+ ds.description = DESCRIPTION
+ ds.friendly_name = FRIENDLY_NAME
+ ds.location = LOCATION
+ ds.default_table_expiration_ms = EXP
+ ds.labels = LABELS
+ ds.access_entries = [
+ AccessEntry('OWNER', 'userByEmail', 'phred@example.com')]
+ ds2 = client.update_dataset(
+ ds, ['description', 'friendly_name', 'location', 'labels',
+ 'access_entries'])
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'PATCH')
+ SENT = {
+ 'description': DESCRIPTION,
+ 'friendlyName': FRIENDLY_NAME,
+ 'location': LOCATION,
+ 'labels': LABELS,
+ 'access': ACCESS,
+ }
+ self.assertEqual(req['data'], SENT)
+ self.assertEqual(req['path'], '/' + PATH)
+ self.assertIsNone(req['headers'])
+ self.assertEqual(ds2.description, ds.description)
+ self.assertEqual(ds2.friendly_name, ds.friendly_name)
+ self.assertEqual(ds2.location, ds.location)
+ self.assertEqual(ds2.labels, ds.labels)
+ self.assertEqual(ds2.access_entries, ds.access_entries)
+
+ # ETag becomes If-Match header.
+ ds._properties['etag'] = 'etag'
+ client.update_dataset(ds, [])
+ req = conn._requested[1]
+ self.assertEqual(req['headers']['If-Match'], 'etag')
+
+ def test_update_table(self):
+ from google.cloud.bigquery.table import Table, SchemaField
+
+ path = 'projects/%s/datasets/%s/tables/%s' % (
+ self.PROJECT, self.DS_ID, self.TABLE_ID)
+ description = 'description'
+ title = 'title'
+ resource = {
+ 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
+ 'tableReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID
+ },
+ 'schema': {'fields': [
+ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'},
+ {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]
+ },
+ 'etag': 'etag',
+ 'description': description,
+ 'friendlyName': title,
+ 'labels': {'x': 'y'},
+ }
+ schema = [
+ SchemaField('full_name', 'STRING', mode='REQUIRED'),
+ SchemaField('age', 'INTEGER', mode='REQUIRED')
+ ]
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ conn = client._connection = _Connection(resource, resource)
+ table = Table(self.TABLE_REF, schema=schema)
+ table.description = description
+ table.friendly_name = title
+ table.labels = {'x': 'y'}
+
+ updated_table = client.update_table(
+ table, ['schema', 'description', 'friendly_name', 'labels'])
+
+ sent = {
+ 'tableReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID
+ },
+ 'schema': {'fields': [
+ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'},
+ {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]},
+ 'description': description,
+ 'friendlyName': title,
+ 'labels': {'x': 'y'},
+ }
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'PATCH')
+ self.assertEqual(req['data'], sent)
+ self.assertEqual(req['path'], '/' + path)
+ self.assertIsNone(req['headers'])
+ self.assertEqual(updated_table.description, table.description)
+ self.assertEqual(updated_table.friendly_name, table.friendly_name)
+ self.assertEqual(updated_table.schema, table.schema)
+ self.assertEqual(updated_table.labels, table.labels)
+
+ # ETag becomes If-Match header.
+ table._properties['etag'] = 'etag'
+ client.update_table(table, [])
+ req = conn._requested[1]
+ self.assertEqual(req['headers']['If-Match'], 'etag')
+
+ def test_update_table_only_use_legacy_sql(self):
+ from google.cloud.bigquery.table import Table
+
+ path = 'projects/%s/datasets/%s/tables/%s' % (
+ self.PROJECT, self.DS_ID, self.TABLE_ID)
+ resource = {
+ 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
+ 'tableReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID
+ },
+ 'view': {'useLegacySql': True}
+ }
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ conn = client._connection = _Connection(resource)
+ table = Table(self.TABLE_REF)
+ table.view_use_legacy_sql = True
+
+ updated_table = client.update_table(table, ['view_use_legacy_sql'])
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'PATCH')
+ self.assertEqual(req['path'], '/%s' % path)
+ sent = {
+ 'tableReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID
+ },
+ 'view': {'useLegacySql': True}
+ }
+ self.assertEqual(req['data'], sent)
+ self.assertEqual(
+ updated_table.view_use_legacy_sql, table.view_use_legacy_sql)
+
+ def test_update_table_w_query(self):
+ import datetime
+ from google.cloud._helpers import UTC
+ from google.cloud._helpers import _millis
+ from google.cloud.bigquery.table import Table, SchemaField
+
+ path = 'projects/%s/datasets/%s/tables/%s' % (
+ self.PROJECT, self.DS_ID, self.TABLE_ID)
+ query = 'select fullname, age from person_ages'
+ location = 'EU'
+ exp_time = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC)
+ schema_resource = {'fields': [
+ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'},
+ {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}
+ schema = [
+ SchemaField('full_name', 'STRING', mode='REQUIRED'),
+ SchemaField('age', 'INTEGER', mode='REQUIRED')
+ ]
+ resource = {
+ 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
+ 'tableReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID
+ },
+ 'schema': schema_resource,
+ 'view': {'query': query, 'useLegacySql': True},
+ 'location': location,
+ 'expirationTime': _millis(exp_time)
+ }
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ conn = client._connection = _Connection(resource)
+ table = Table(self.TABLE_REF, schema=schema)
+ table.location = location
+ table.expires = exp_time
+ table.view_query = query
+ table.view_use_legacy_sql = True
+ updated_properties = ['schema', 'view_query', 'location',
+ 'expires', 'view_use_legacy_sql']
+
+ updated_table = client.update_table(table, updated_properties)
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'PATCH')
+ self.assertEqual(req['path'], '/%s' % path)
+ sent = {
+ 'tableReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID
+ },
+ 'view': {'query': query, 'useLegacySql': True},
+ 'location': location,
+ 'expirationTime': _millis(exp_time),
+ 'schema': schema_resource,
+ }
+ self.assertEqual(req['data'], sent)
+ self.assertEqual(updated_table.schema, table.schema)
+ self.assertEqual(updated_table.view_query, table.view_query)
+ self.assertEqual(updated_table.location, table.location)
+ self.assertEqual(updated_table.expires, table.expires)
+ self.assertEqual(
+ updated_table.view_use_legacy_sql, table.view_use_legacy_sql)
+
+ def test_update_table_w_schema_None(self):
+ # Simulate deleting schema: not sure if back-end will actually
+ # allow this operation, but the spec says it is optional.
+ path = 'projects/%s/datasets/%s/tables/%s' % (
+ self.PROJECT, self.DS_ID, self.TABLE_ID)
+ resource1 = {
+ 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
+ 'tableReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID},
+ 'schema': {'fields': [
+ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'},
+ {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]}
+ }
+ resource2 = {
+ 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
+ 'tableReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID},
+ 'schema': {'fields': []},
+ }
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ conn = client._connection = _Connection(resource1, resource2)
+ table = client.get_table(self.TABLE_REF)
+ table.schema = None
+
+ updated_table = client.update_table(table, ['schema'])
+
+ self.assertEqual(len(conn._requested), 2)
+ req = conn._requested[1]
+ self.assertEqual(req['method'], 'PATCH')
+ sent = {
+ 'tableReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID
+ },
+ 'schema': None
+ }
+ self.assertEqual(req['data'], sent)
+ self.assertEqual(req['path'], '/%s' % path)
+ self.assertEqual(updated_table.schema, table.schema)
+
+ def test_update_table_delete_property(self):
+ from google.cloud.bigquery.table import Table
+
+ description = 'description'
+ title = 'title'
+ path = 'projects/%s/datasets/%s/tables/%s' % (
+ self.PROJECT, self.DS_ID, self.TABLE_ID)
+ resource1 = {
+ 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
+ 'tableReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID
+ },
+ 'description': description,
+ 'friendlyName': title,
+ }
+ resource2 = {
+ 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_ID),
+ 'tableReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID
+ },
+ 'description': None,
+ }
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ conn = client._connection = _Connection(resource1, resource2)
+ table = Table(self.TABLE_REF)
+ table.description = description
+ table.friendly_name = title
+ table2 = client.update_table(table, ['description', 'friendly_name'])
+ self.assertEqual(table2.description, table.description)
+ table2.description = None
+
+ table3 = client.update_table(table2, ['description'])
+ self.assertEqual(len(conn._requested), 2)
+ req = conn._requested[1]
+ self.assertEqual(req['method'], 'PATCH')
+ self.assertEqual(req['path'], '/%s' % path)
+ sent = {
+ 'tableReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID
+ },
+ 'description': None,
+ }
+ self.assertEqual(req['data'], sent)
+ self.assertIsNone(table3.description)
+
+ def test_list_dataset_tables_empty(self):
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ conn = client._connection = _Connection({})
+
+ dataset = client.dataset(self.DS_ID)
+ iterator = client.list_dataset_tables(dataset)
+ self.assertIs(iterator.dataset, dataset)
+ page = six.next(iterator.pages)
+ tables = list(page)
+ token = iterator.next_page_token
+
+ self.assertEqual(tables, [])
+ self.assertIsNone(token)
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID)
+ self.assertEqual(req['path'], '/%s' % PATH)
+
+ def test_list_dataset_tables_defaults(self):
+ from google.cloud.bigquery.table import Table
+
+ TABLE_1 = 'table_one'
+ TABLE_2 = 'table_two'
+ PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID)
+ TOKEN = 'TOKEN'
+ DATA = {
+ 'nextPageToken': TOKEN,
+ 'tables': [
+ {'kind': 'bigquery#table',
+ 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_1),
+ 'tableReference': {'tableId': TABLE_1,
+ 'datasetId': self.DS_ID,
+ 'projectId': self.PROJECT},
+ 'type': 'TABLE'},
+ {'kind': 'bigquery#table',
+ 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_2),
+ 'tableReference': {'tableId': TABLE_2,
+ 'datasetId': self.DS_ID,
+ 'projectId': self.PROJECT},
+ 'type': 'TABLE'},
+ ]
+ }
+
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ conn = client._connection = _Connection(DATA)
+ dataset = client.dataset(self.DS_ID)
+
+ iterator = client.list_dataset_tables(dataset)
+ self.assertIs(iterator.dataset, dataset)
+ page = six.next(iterator.pages)
+ tables = list(page)
+ token = iterator.next_page_token
+
+ self.assertEqual(len(tables), len(DATA['tables']))
+ for found, expected in zip(tables, DATA['tables']):
+ self.assertIsInstance(found, Table)
+ self.assertEqual(found.full_table_id, expected['id'])
+ self.assertEqual(found.table_type, expected['type'])
+ self.assertEqual(token, TOKEN)
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], '/%s' % PATH)
+
+ def test_list_dataset_tables_explicit(self):
+ from google.cloud.bigquery.table import Table
+
+ TABLE_1 = 'table_one'
+ TABLE_2 = 'table_two'
+ PATH = 'projects/%s/datasets/%s/tables' % (self.PROJECT, self.DS_ID)
+ TOKEN = 'TOKEN'
+ DATA = {
+ 'tables': [
+ {'kind': 'bigquery#dataset',
+ 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_1),
+ 'tableReference': {'tableId': TABLE_1,
+ 'datasetId': self.DS_ID,
+ 'projectId': self.PROJECT},
+ 'type': 'TABLE'},
+ {'kind': 'bigquery#dataset',
+ 'id': '%s:%s.%s' % (self.PROJECT, self.DS_ID, TABLE_2),
+ 'tableReference': {'tableId': TABLE_2,
+ 'datasetId': self.DS_ID,
+ 'projectId': self.PROJECT},
+ 'type': 'TABLE'},
+ ]
+ }
+
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ conn = client._connection = _Connection(DATA)
+ dataset = client.dataset(self.DS_ID)
+
+ iterator = client.list_dataset_tables(
+ dataset, max_results=3, page_token=TOKEN)
+ self.assertIs(iterator.dataset, dataset)
+ page = six.next(iterator.pages)
+ tables = list(page)
+ token = iterator.next_page_token
+
+ self.assertEqual(len(tables), len(DATA['tables']))
+ for found, expected in zip(tables, DATA['tables']):
+ self.assertIsInstance(found, Table)
+ self.assertEqual(found.full_table_id, expected['id'])
+ self.assertEqual(found.table_type, expected['type'])
+ self.assertIsNone(token)
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], '/%s' % PATH)
+ self.assertEqual(req['query_params'],
+ {'maxResults': 3, 'pageToken': TOKEN})
+
+ def test_list_dataset_tables_wrong_type(self):
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ with self.assertRaises(TypeError):
+ client.list_dataset_tables(client.dataset(self.DS_ID).table("foo"))
+
+ def test_delete_dataset(self):
+ from google.cloud.bigquery.dataset import Dataset
+
+ PATH = 'projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID)
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ conn = client._connection = _Connection({}, {})
+ ds_ref = client.dataset(self.DS_ID)
+ for arg in (ds_ref, Dataset(ds_ref)):
+ client.delete_dataset(arg)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'DELETE')
+ self.assertEqual(req['path'], '/%s' % PATH)
+
+ def test_delete_dataset_wrong_type(self):
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ with self.assertRaises(TypeError):
+ client.delete_dataset(client.dataset(self.DS_ID).table("foo"))
+
+ def test_delete_table(self):
+ from google.cloud.bigquery.table import Table
+
+ path = 'projects/%s/datasets/%s/tables/%s' % (
+ self.PROJECT, self.DS_ID, self.TABLE_ID)
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ conn = client._connection = _Connection({}, {})
+
+ for arg in (self.TABLE_REF, Table(self.TABLE_REF)):
+ client.delete_table(arg)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'DELETE')
+ self.assertEqual(req['path'], '/%s' % path)
+
+ def test_delete_table_w_wrong_type(self):
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ with self.assertRaises(TypeError):
+ client.delete_table(client.dataset(self.DS_ID))
+
+ def test_job_from_resource_unknown_type(self):
+ creds = _make_credentials()
+ client = self._make_one(self.PROJECT, creds)
+ with self.assertRaises(ValueError):
+ client.job_from_resource({'configuration': {'nonesuch': {}}})
+
+ def test_get_job_miss_w_explict_project(self):
+ from google.cloud.exceptions import NotFound
+
+ OTHER_PROJECT = 'OTHER_PROJECT'
+ JOB_ID = 'NONESUCH'
+ creds = _make_credentials()
+ client = self._make_one(self.PROJECT, creds)
+ conn = client._connection = _Connection()
+
+ with self.assertRaises(NotFound):
+ client.get_job(JOB_ID, project=OTHER_PROJECT)
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], '/projects/OTHER_PROJECT/jobs/NONESUCH')
+ self.assertEqual(req['query_params'], {'projection': 'full'})
+
+ def test_get_job_hit(self):
+ from google.cloud.bigquery.job import QueryJob
+
+ JOB_ID = 'query_job'
+ QUERY_DESTINATION_TABLE = 'query_destination_table'
+ QUERY = 'SELECT * from test_dataset:test_table'
+ ASYNC_QUERY_DATA = {
+ 'id': '{}:{}'.format(self.PROJECT, JOB_ID),
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': 'query_job',
+ },
+ 'state': 'DONE',
+ 'configuration': {
+ 'query': {
+ 'query': QUERY,
+ 'destinationTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': QUERY_DESTINATION_TABLE,
+ },
+ 'createDisposition': 'CREATE_IF_NEEDED',
+ 'writeDisposition': 'WRITE_TRUNCATE',
+ }
+ },
+ }
+ creds = _make_credentials()
+ client = self._make_one(self.PROJECT, creds)
+ conn = client._connection = _Connection(ASYNC_QUERY_DATA)
+
+ job = client.get_job(JOB_ID)
+
+ self.assertIsInstance(job, QueryJob)
+ self.assertEqual(job.job_id, JOB_ID)
+ self.assertEqual(job.create_disposition, 'CREATE_IF_NEEDED')
+ self.assertEqual(job.write_disposition, 'WRITE_TRUNCATE')
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], '/projects/PROJECT/jobs/query_job')
+ self.assertEqual(req['query_params'], {'projection': 'full'})
+
+ def test_list_jobs_defaults(self):
+ from google.cloud.bigquery.job import LoadJob
+ from google.cloud.bigquery.job import CopyJob
+ from google.cloud.bigquery.job import ExtractJob
+ from google.cloud.bigquery.job import QueryJob
+
+ SOURCE_TABLE = 'source_table'
+ DESTINATION_TABLE = 'destination_table'
+ QUERY_DESTINATION_TABLE = 'query_destination_table'
+ SOURCE_URI = 'gs://test_bucket/src_object*'
+ DESTINATION_URI = 'gs://test_bucket/dst_object*'
+ JOB_TYPES = {
+ 'load_job': LoadJob,
+ 'copy_job': CopyJob,
+ 'extract_job': ExtractJob,
+ 'query_job': QueryJob,
+ }
+ PATH = 'projects/%s/jobs' % self.PROJECT
+ TOKEN = 'TOKEN'
+ QUERY = 'SELECT * from test_dataset:test_table'
+ ASYNC_QUERY_DATA = {
+ 'id': '%s:%s' % (self.PROJECT, 'query_job'),
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': 'query_job',
+ },
+ 'state': 'DONE',
+ 'configuration': {
+ 'query': {
+ 'query': QUERY,
+ 'destinationTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': QUERY_DESTINATION_TABLE,
+ },
+ 'createDisposition': 'CREATE_IF_NEEDED',
+ 'writeDisposition': 'WRITE_TRUNCATE',
+ }
+ },
+ }
+ EXTRACT_DATA = {
+ 'id': '%s:%s' % (self.PROJECT, 'extract_job'),
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': 'extract_job',
+ },
+ 'state': 'DONE',
+ 'configuration': {
+ 'extract': {
+ 'sourceTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': SOURCE_TABLE,
+ },
+ 'destinationUris': [DESTINATION_URI],
+ }
+ },
+ }
+ COPY_DATA = {
+ 'id': '%s:%s' % (self.PROJECT, 'copy_job'),
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': 'copy_job',
+ },
+ 'state': 'DONE',
+ 'configuration': {
+ 'copy': {
+ 'sourceTables': [{
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': SOURCE_TABLE,
+ }],
+ 'destinationTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': DESTINATION_TABLE,
+ },
+ }
+ },
+ }
+ LOAD_DATA = {
+ 'id': '%s:%s' % (self.PROJECT, 'load_job'),
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': 'load_job',
+ },
+ 'state': 'DONE',
+ 'configuration': {
+ 'load': {
+ 'destinationTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': SOURCE_TABLE,
+ },
+ 'sourceUris': [SOURCE_URI],
+ }
+ },
+ }
+ DATA = {
+ 'nextPageToken': TOKEN,
+ 'jobs': [
+ ASYNC_QUERY_DATA,
+ EXTRACT_DATA,
+ COPY_DATA,
+ LOAD_DATA,
+ ]
+ }
+ creds = _make_credentials()
+ client = self._make_one(self.PROJECT, creds)
+ conn = client._connection = _Connection(DATA)
+
+ iterator = client.list_jobs()
+ page = six.next(iterator.pages)
+ jobs = list(page)
+ token = iterator.next_page_token
+
+ self.assertEqual(len(jobs), len(DATA['jobs']))
+ for found, expected in zip(jobs, DATA['jobs']):
+ name = expected['jobReference']['jobId']
+ self.assertIsInstance(found, JOB_TYPES[name])
+ self.assertEqual(found.job_id, name)
+ self.assertEqual(token, TOKEN)
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], '/%s' % PATH)
+ self.assertEqual(req['query_params'], {'projection': 'full'})
+
+ def test_list_jobs_load_job_wo_sourceUris(self):
+ from google.cloud.bigquery.job import LoadJob
+
+ SOURCE_TABLE = 'source_table'
+ JOB_TYPES = {
+ 'load_job': LoadJob,
+ }
+ PATH = 'projects/%s/jobs' % self.PROJECT
+ TOKEN = 'TOKEN'
+ LOAD_DATA = {
+ 'id': '%s:%s' % (self.PROJECT, 'load_job'),
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': 'load_job',
+ },
+ 'state': 'DONE',
+ 'configuration': {
+ 'load': {
+ 'destinationTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': SOURCE_TABLE,
+ },
+ }
+ },
+ }
+ DATA = {
+ 'nextPageToken': TOKEN,
+ 'jobs': [
+ LOAD_DATA,
+ ]
+ }
+ creds = _make_credentials()
+ client = self._make_one(self.PROJECT, creds)
+ conn = client._connection = _Connection(DATA)
+
+ iterator = client.list_jobs()
+ page = six.next(iterator.pages)
+ jobs = list(page)
+ token = iterator.next_page_token
+
+ self.assertEqual(len(jobs), len(DATA['jobs']))
+ for found, expected in zip(jobs, DATA['jobs']):
+ name = expected['jobReference']['jobId']
+ self.assertIsInstance(found, JOB_TYPES[name])
+ self.assertEqual(found.job_id, name)
+ self.assertEqual(token, TOKEN)
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], '/%s' % PATH)
+ self.assertEqual(req['query_params'], {'projection': 'full'})
+
+ def test_list_jobs_explicit_missing(self):
+ PATH = 'projects/%s/jobs' % self.PROJECT
+ DATA = {}
+ TOKEN = 'TOKEN'
+ creds = _make_credentials()
+ client = self._make_one(self.PROJECT, creds)
+ conn = client._connection = _Connection(DATA)
+
+ iterator = client.list_jobs(max_results=1000, page_token=TOKEN,
+ all_users=True, state_filter='done')
+ page = six.next(iterator.pages)
+ jobs = list(page)
+ token = iterator.next_page_token
+
+ self.assertEqual(len(jobs), 0)
+ self.assertIsNone(token)
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], '/%s' % PATH)
+ self.assertEqual(req['query_params'],
+ {'projection': 'full',
+ 'maxResults': 1000,
+ 'pageToken': TOKEN,
+ 'allUsers': True,
+ 'stateFilter': 'done'})
+
+ def test_load_table_from_uri(self):
+ from google.cloud.bigquery.job import LoadJob
+
+ JOB = 'job_name'
+ DESTINATION = 'destination_table'
+ SOURCE_URI = 'http://example.com/source.csv'
+ RESOURCE = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': JOB,
+ },
+ 'configuration': {
+ 'load': {
+ 'sourceUris': [SOURCE_URI],
+ 'destinationTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': DESTINATION,
+ },
+ },
+ },
+ }
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ conn = client._connection = _Connection(RESOURCE)
+ destination = client.dataset(self.DS_ID).table(DESTINATION)
+
+ job = client.load_table_from_uri(SOURCE_URI, destination, job_id=JOB)
+
+ # Check that load_table_from_uri actually starts the job.
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/projects/%s/jobs' % self.PROJECT)
+
+ self.assertIsInstance(job, LoadJob)
+ self.assertIs(job._client, client)
+ self.assertEqual(job.job_id, JOB)
+ self.assertEqual(list(job.source_uris), [SOURCE_URI])
+ self.assertIs(job.destination, destination)
+
+ conn = client._connection = _Connection(RESOURCE)
+
+ job = client.load_table_from_uri([SOURCE_URI], destination, job_id=JOB)
+ self.assertIsInstance(job, LoadJob)
+ self.assertIs(job._client, client)
+ self.assertEqual(job.job_id, JOB)
+ self.assertEqual(list(job.source_uris), [SOURCE_URI])
+ self.assertIs(job.destination, destination)
+
+ @staticmethod
+ def _mock_requests_response(status_code, headers, content=b''):
+ return mock.Mock(
+ content=content, headers=headers, status_code=status_code,
+ spec=['content', 'headers', 'status_code'])
+
+ def _mock_transport(self, status_code, headers, content=b''):
+ fake_transport = mock.Mock(spec=['request'])
+ fake_response = self._mock_requests_response(
+ status_code, headers, content=content)
+ fake_transport.request.return_value = fake_response
+ return fake_transport
+
+ def _initiate_resumable_upload_helper(self, num_retries=None):
+ from google.resumable_media.requests import ResumableUpload
+ from google.cloud.bigquery.client import _DEFAULT_CHUNKSIZE
+ from google.cloud.bigquery.client import _GENERIC_CONTENT_TYPE
+ from google.cloud.bigquery.client import _get_upload_headers
+ from google.cloud.bigquery.job import LoadJob, LoadJobConfig
+
+ # Create mocks to be checked for doing transport.
+ resumable_url = 'http://test.invalid?upload_id=hey-you'
+ response_headers = {'location': resumable_url}
+ fake_transport = self._mock_transport(
+ http_client.OK, response_headers)
+ client = self._make_one(project=self.PROJECT, _http=fake_transport)
+ conn = client._connection = _Connection()
+
+ # Create some mock arguments and call the method under test.
+ data = b'goodbye gudbi gootbee'
+ stream = io.BytesIO(data)
+ config = LoadJobConfig()
+ config.source_format = 'CSV'
+ job = LoadJob(None, None, self.TABLE_REF, client, job_config=config)
+ metadata = job._build_resource()
+ upload, transport = client._initiate_resumable_upload(
+ stream, metadata, num_retries)
+
+ # Check the returned values.
+ self.assertIsInstance(upload, ResumableUpload)
+ upload_url = (
+ 'https://www.googleapis.com/upload/bigquery/v2/projects/' +
+ self.PROJECT +
+ '/jobs?uploadType=resumable')
+ self.assertEqual(upload.upload_url, upload_url)
+ expected_headers = _get_upload_headers(conn.USER_AGENT)
+ self.assertEqual(upload._headers, expected_headers)
+ self.assertFalse(upload.finished)
+ self.assertEqual(upload._chunk_size, _DEFAULT_CHUNKSIZE)
+ self.assertIs(upload._stream, stream)
+ self.assertIsNone(upload._total_bytes)
+ self.assertEqual(upload._content_type, _GENERIC_CONTENT_TYPE)
+ self.assertEqual(upload.resumable_url, resumable_url)
+
+ retry_strategy = upload._retry_strategy
+ self.assertEqual(retry_strategy.max_sleep, 64.0)
+ if num_retries is None:
+ self.assertEqual(retry_strategy.max_cumulative_retry, 600.0)
+ self.assertIsNone(retry_strategy.max_retries)
+ else:
+ self.assertIsNone(retry_strategy.max_cumulative_retry)
+ self.assertEqual(retry_strategy.max_retries, num_retries)
+ self.assertIs(transport, fake_transport)
+ # Make sure we never read from the stream.
+ self.assertEqual(stream.tell(), 0)
+
+ # Check the mocks.
+ request_headers = expected_headers.copy()
+ request_headers['x-upload-content-type'] = _GENERIC_CONTENT_TYPE
+ fake_transport.request.assert_called_once_with(
+ 'POST',
+ upload_url,
+ data=json.dumps(metadata).encode('utf-8'),
+ headers=request_headers,
+ )
+
+ def test__initiate_resumable_upload(self):
+ self._initiate_resumable_upload_helper()
+
+ def test__initiate_resumable_upload_with_retry(self):
+ self._initiate_resumable_upload_helper(num_retries=11)
+
+ def _do_multipart_upload_success_helper(
+ self, get_boundary, num_retries=None):
+ from google.cloud.bigquery.client import _get_upload_headers
+ from google.cloud.bigquery.job import LoadJob, LoadJobConfig
+
+ fake_transport = self._mock_transport(http_client.OK, {})
+ client = self._make_one(project=self.PROJECT, _http=fake_transport)
+ conn = client._connection = _Connection()
+
+ # Create some mock arguments.
+ data = b'Bzzzz-zap \x00\x01\xf4'
+ stream = io.BytesIO(data)
+ config = LoadJobConfig()
+ config.source_format = 'CSV'
+ job = LoadJob(None, None, self.TABLE_REF, client, job_config=config)
+ metadata = job._build_resource()
+ size = len(data)
+ response = client._do_multipart_upload(
+ stream, metadata, size, num_retries)
+
+ # Check the mocks and the returned value.
+ self.assertIs(response, fake_transport.request.return_value)
+ self.assertEqual(stream.tell(), size)
+ get_boundary.assert_called_once_with()
+
+ upload_url = (
+ 'https://www.googleapis.com/upload/bigquery/v2/projects/' +
+ self.PROJECT +
+ '/jobs?uploadType=multipart')
+ payload = (
+ b'--==0==\r\n' +
+ b'content-type: application/json; charset=UTF-8\r\n\r\n' +
+ json.dumps(metadata).encode('utf-8') + b'\r\n' +
+ b'--==0==\r\n' +
+ b'content-type: */*\r\n\r\n' +
+ data + b'\r\n' +
+ b'--==0==--')
+ headers = _get_upload_headers(conn.USER_AGENT)
+ headers['content-type'] = b'multipart/related; boundary="==0=="'
+ fake_transport.request.assert_called_once_with(
+ 'POST',
+ upload_url,
+ data=payload,
+ headers=headers,
+ )
+
+ @mock.patch(u'google.resumable_media._upload.get_boundary',
+ return_value=b'==0==')
+ def test__do_multipart_upload(self, get_boundary):
+ self._do_multipart_upload_success_helper(get_boundary)
+
+ @mock.patch(u'google.resumable_media._upload.get_boundary',
+ return_value=b'==0==')
+ def test__do_multipart_upload_with_retry(self, get_boundary):
+ self._do_multipart_upload_success_helper(get_boundary, num_retries=8)
+
+ def test_copy_table(self):
+ from google.cloud.bigquery.job import CopyJob
+
+ JOB = 'job_name'
+ SOURCE = 'source_table'
+ DESTINATION = 'destination_table'
+ RESOURCE = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': JOB,
+ },
+ 'configuration': {
+ 'copy': {
+ 'sourceTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': SOURCE,
+ },
+ 'destinationTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': DESTINATION,
+ },
+ },
+ },
+ }
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ conn = client._connection = _Connection(RESOURCE)
+ dataset = client.dataset(self.DS_ID)
+ source = dataset.table(SOURCE)
+ destination = dataset.table(DESTINATION)
+
+ job = client.copy_table(source, destination, job_id=JOB)
+
+ # Check that copy_table actually starts the job.
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/projects/%s/jobs' % self.PROJECT)
+
+ self.assertIsInstance(job, CopyJob)
+ self.assertIs(job._client, client)
+ self.assertEqual(job.job_id, JOB)
+ self.assertEqual(list(job.sources), [source])
+ self.assertIs(job.destination, destination)
+
+ conn = client._connection = _Connection(RESOURCE)
+ source2 = dataset.table(SOURCE + '2')
+ job = client.copy_table([source, source2], destination, job_id=JOB)
+ self.assertIsInstance(job, CopyJob)
+ self.assertIs(job._client, client)
+ self.assertEqual(job.job_id, JOB)
+ self.assertEqual(list(job.sources), [source, source2])
+ self.assertIs(job.destination, destination)
+
+ def test_extract_table(self):
+ from google.cloud.bigquery.job import ExtractJob
+
+ JOB = 'job_id'
+ SOURCE = 'source_table'
+ DESTINATION = 'gs://bucket_name/object_name'
+ RESOURCE = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': JOB,
+ },
+ 'configuration': {
+ 'extract': {
+ 'sourceTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': SOURCE,
+ },
+ 'destinationUris': [DESTINATION],
+ },
+ },
+ }
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ conn = client._connection = _Connection(RESOURCE)
+ dataset = client.dataset(self.DS_ID)
+ source = dataset.table(SOURCE)
+
+ job = client.extract_table(source, DESTINATION, job_id=JOB)
+
+ # Check that extract_table actually starts the job.
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/projects/PROJECT/jobs')
+
+ # Check the job resource.
+ self.assertIsInstance(job, ExtractJob)
+ self.assertIs(job._client, client)
+ self.assertEqual(job.job_id, JOB)
+ self.assertEqual(job.source, source)
+ self.assertEqual(list(job.destination_uris), [DESTINATION])
+
+ def test_extract_table_generated_job_id(self):
+ from google.cloud.bigquery.job import ExtractJob
+ from google.cloud.bigquery.job import ExtractJobConfig
+ from google.cloud.bigquery.job import DestinationFormat
+
+ JOB = 'job_id'
+ SOURCE = 'source_table'
+ DESTINATION = 'gs://bucket_name/object_name'
+ RESOURCE = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': JOB,
+ },
+ 'configuration': {
+ 'extract': {
+ 'sourceTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': SOURCE,
+ },
+ 'destinationUris': [DESTINATION],
+ 'destinationFormat': 'NEWLINE_DELIMITED_JSON',
+ },
+ },
+ }
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ conn = client._connection = _Connection(RESOURCE)
+ dataset = client.dataset(self.DS_ID)
+ source = dataset.table(SOURCE)
+ job_config = ExtractJobConfig()
+ job_config.destination_format = (
+ DestinationFormat.NEWLINE_DELIMITED_JSON)
+
+ job = client.extract_table(source, DESTINATION, job_config=job_config)
+
+ # Check that extract_table actually starts the job.
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/projects/PROJECT/jobs')
+ self.assertIsInstance(
+ req['data']['jobReference']['jobId'], six.string_types)
+
+ # Check the job resource.
+ self.assertIsInstance(job, ExtractJob)
+ self.assertIs(job._client, client)
+ self.assertEqual(job.source, source)
+ self.assertEqual(list(job.destination_uris), [DESTINATION])
+
+ def test_extract_table_w_destination_uris(self):
+ from google.cloud.bigquery.job import ExtractJob
+
+ JOB = 'job_id'
+ SOURCE = 'source_table'
+ DESTINATION1 = 'gs://bucket_name/object_one'
+ DESTINATION2 = 'gs://bucket_name/object_two'
+ RESOURCE = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': JOB,
+ },
+ 'configuration': {
+ 'extract': {
+ 'sourceTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': SOURCE,
+ },
+ 'destinationUris': [
+ DESTINATION1,
+ DESTINATION2,
+ ],
+ },
+ },
+ }
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ conn = client._connection = _Connection(RESOURCE)
+ dataset = client.dataset(self.DS_ID)
+ source = dataset.table(SOURCE)
+
+ job = client.extract_table(
+ source, [DESTINATION1, DESTINATION2], job_id=JOB)
+
+ # Check that extract_table actually starts the job.
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/projects/PROJECT/jobs')
+
+ # Check the job resource.
+ self.assertIsInstance(job, ExtractJob)
+ self.assertIs(job._client, client)
+ self.assertEqual(job.job_id, JOB)
+ self.assertEqual(job.source, source)
+ self.assertEqual(
+ list(job.destination_uris), [DESTINATION1, DESTINATION2])
+
+ def test_query_defaults(self):
+ from google.cloud.bigquery.job import QueryJob
+
+ QUERY = 'select count(*) from persons'
+ RESOURCE = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': 'some-random-id',
+ },
+ 'configuration': {
+ 'query': {
+ 'query': QUERY,
+ 'useLegacySql': False,
+ },
+ },
+ }
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ conn = client._connection = _Connection(RESOURCE)
+
+ job = client.query(QUERY)
+
+ self.assertIsInstance(job, QueryJob)
+ self.assertIsInstance(job.job_id, six.string_types)
+ self.assertIs(job._client, client)
+ self.assertEqual(job.query, QUERY)
+ self.assertEqual(job.udf_resources, [])
+ self.assertEqual(job.query_parameters, [])
+
+ # Check that query actually starts the job.
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/projects/PROJECT/jobs')
+ sent = req['data']
+ self.assertIsInstance(
+ sent['jobReference']['jobId'], six.string_types)
+ sent_config = sent['configuration']['query']
+ self.assertEqual(sent_config['query'], QUERY)
+ self.assertFalse(sent_config['useLegacySql'])
+
+ def test_query_w_udf_resources(self):
+ from google.cloud.bigquery.job import QueryJob
+ from google.cloud.bigquery.job import QueryJobConfig
+ from google.cloud.bigquery.query import UDFResource
+
+ RESOURCE_URI = 'gs://some-bucket/js/lib.js'
+ JOB = 'job_name'
+ QUERY = 'select count(*) from persons'
+ RESOURCE = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': JOB,
+ },
+ 'configuration': {
+ 'query': {
+ 'query': QUERY,
+ 'useLegacySql': True,
+ 'userDefinedFunctionResources': [
+ {'resourceUri': RESOURCE_URI},
+ ],
+ },
+ },
+ }
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ conn = client._connection = _Connection(RESOURCE)
+ udf_resources = [UDFResource("resourceUri", RESOURCE_URI)]
+ config = QueryJobConfig()
+ config.udf_resources = udf_resources
+ config.use_legacy_sql = True
+
+ job = client.query(QUERY, job_config=config, job_id=JOB)
+
+ self.assertIsInstance(job, QueryJob)
+ self.assertIs(job._client, client)
+ self.assertEqual(job.job_id, JOB)
+ self.assertEqual(job.query, QUERY)
+ self.assertEqual(job.udf_resources, udf_resources)
+ self.assertEqual(job.query_parameters, [])
+
+ # Check that query actually starts the job.
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/projects/PROJECT/jobs')
+ sent = req['data']
+ self.assertIsInstance(
+ sent['jobReference']['jobId'], six.string_types)
+ sent_config = sent['configuration']['query']
+ self.assertEqual(sent_config['query'], QUERY)
+ self.assertTrue(sent_config['useLegacySql'])
+ self.assertEqual(
+ sent_config['userDefinedFunctionResources'][0],
+ {'resourceUri': RESOURCE_URI})
+
+ def test_query_w_query_parameters(self):
+ from google.cloud.bigquery.job import QueryJob
+ from google.cloud.bigquery.job import QueryJobConfig
+ from google.cloud.bigquery.query import ScalarQueryParameter
+
+ JOB = 'job_name'
+ QUERY = 'select count(*) from persons'
+ RESOURCE = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': JOB,
+ },
+ 'configuration': {
+ 'query': {
+ 'query': QUERY,
+ 'useLegacySql': False,
+ 'queryParameters': [
+ {
+ 'name': 'foo',
+ 'parameterType': {'type': 'INT64'},
+ 'parameterValue': {'value': '123'}
+ },
+ ],
+ },
+ },
+ }
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ conn = client._connection = _Connection(RESOURCE)
+ query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)]
+ config = QueryJobConfig()
+ config.query_parameters = query_parameters
+
+ job = client.query(QUERY, job_config=config, job_id=JOB)
+
+ self.assertIsInstance(job, QueryJob)
+ self.assertIs(job._client, client)
+ self.assertEqual(job.job_id, JOB)
+ self.assertEqual(job.query, QUERY)
+ self.assertEqual(job.udf_resources, [])
+ self.assertEqual(job.query_parameters, query_parameters)
+
+ # Check that query actually starts the job.
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/projects/PROJECT/jobs')
+ sent = req['data']
+ self.assertEqual(sent['jobReference']['jobId'], JOB)
+ sent_config = sent['configuration']['query']
+ self.assertEqual(sent_config['query'], QUERY)
+ self.assertFalse(sent_config['useLegacySql'])
+ self.assertEqual(
+ sent_config['queryParameters'][0],
+ {
+ 'name': 'foo',
+ 'parameterType': {'type': 'INT64'},
+ 'parameterValue': {'value': '123'}
+ })
+
+ def test_create_rows_wo_schema(self):
+ from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA
+
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ table = Table(self.TABLE_REF)
+ ROWS = [
+ ('Phred Phlyntstone', 32),
+ ('Bharney Rhubble', 33),
+ ('Wylma Phlyntstone', 29),
+ ('Bhettye Rhubble', 27),
+ ]
+
+ with self.assertRaises(ValueError) as exc:
+ client.create_rows(table, ROWS)
+
+ self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,))
+
+ def test_create_rows_w_schema(self):
+ import datetime
+ from google.cloud._helpers import UTC
+ from google.cloud._helpers import _datetime_to_rfc3339
+ from google.cloud._helpers import _microseconds_from_datetime
+ from google.cloud.bigquery.table import Table, SchemaField
+
+ WHEN_TS = 1437767599.006
+ WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace(
+ tzinfo=UTC)
+ PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % (
+ self.PROJECT, self.DS_ID, self.TABLE_ID)
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ conn = client._connection = _Connection({})
+ schema = [
+ SchemaField('full_name', 'STRING', mode='REQUIRED'),
+ SchemaField('age', 'INTEGER', mode='REQUIRED'),
+ SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'),
+ ]
+ table = Table(self.TABLE_REF, schema=schema)
+ ROWS = [
+ ('Phred Phlyntstone', 32, _datetime_to_rfc3339(WHEN)),
+ ('Bharney Rhubble', 33, WHEN + datetime.timedelta(seconds=1)),
+ ('Wylma Phlyntstone', 29, WHEN + datetime.timedelta(seconds=2)),
+ ('Bhettye Rhubble', 27, None),
+ ]
+
+ def _row_data(row):
+ joined = row[2]
+ if isinstance(row[2], datetime.datetime):
+ joined = _microseconds_from_datetime(joined) * 1e-6
+ return {'full_name': row[0],
+ 'age': str(row[1]),
+ 'joined': joined}
+
+ SENT = {
+ 'rows': [{
+ 'json': _row_data(row),
+ 'insertId': str(i),
+ } for i, row in enumerate(ROWS)],
+ }
+
+ with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))):
+ errors = client.create_rows(table, ROWS)
+
+ self.assertEqual(len(errors), 0)
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/%s' % PATH)
+ self.assertEqual(req['data'], SENT)
+
+ def test_create_rows_w_list_of_dictionaries(self):
+ import datetime
+ from google.cloud._helpers import UTC
+ from google.cloud._helpers import _datetime_to_rfc3339
+ from google.cloud._helpers import _microseconds_from_datetime
+ from google.cloud.bigquery.table import Table, SchemaField
+
+ WHEN_TS = 1437767599.006
+ WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace(
+ tzinfo=UTC)
+ PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % (
+ self.PROJECT, self.DS_ID, self.TABLE_ID)
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ conn = client._connection = _Connection({})
+ schema = [
+ SchemaField('full_name', 'STRING', mode='REQUIRED'),
+ SchemaField('age', 'INTEGER', mode='REQUIRED'),
+ SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'),
+ ]
+ table = Table(self.TABLE_REF, schema=schema)
+ ROWS = [
+ {
+ 'full_name': 'Phred Phlyntstone', 'age': 32,
+ 'joined': _datetime_to_rfc3339(WHEN)
+ },
+ {
+ 'full_name': 'Bharney Rhubble', 'age': 33,
+ 'joined': WHEN + datetime.timedelta(seconds=1)
+ },
+ {
+ 'full_name': 'Wylma Phlyntstone', 'age': 29,
+ 'joined': WHEN + datetime.timedelta(seconds=2)
+ },
+ {
+ 'full_name': 'Bhettye Rhubble', 'age': 27, 'joined': None
+ },
+ ]
+
+ def _row_data(row):
+ joined = row['joined']
+ if isinstance(joined, datetime.datetime):
+ row['joined'] = _microseconds_from_datetime(joined) * 1e-6
+ row['age'] = str(row['age'])
+ return row
+
+ SENT = {
+ 'rows': [{
+ 'json': _row_data(row),
+ 'insertId': str(i),
+ } for i, row in enumerate(ROWS)],
+ }
+
+ with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))):
+ errors = client.create_rows(table, ROWS)
+
+ self.assertEqual(len(errors), 0)
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/%s' % PATH)
+ self.assertEqual(req['data'], SENT)
+
+ def test_create_rows_w_list_of_Rows(self):
+ from google.cloud.bigquery._helpers import Row
+ from google.cloud.bigquery.table import Table, SchemaField
+
+ PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % (
+ self.PROJECT, self.DS_ID, self.TABLE_ID)
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ conn = client._connection = _Connection({})
+ schema = [
+ SchemaField('full_name', 'STRING', mode='REQUIRED'),
+ SchemaField('age', 'INTEGER', mode='REQUIRED'),
+ ]
+ table = Table(self.TABLE_REF, schema=schema)
+ f2i = {'full_name': 0, 'age': 1}
+ ROWS = [
+ Row(('Phred Phlyntstone', 32), f2i),
+ Row(('Bharney Rhubble', 33), f2i),
+ Row(('Wylma Phlyntstone', 29), f2i),
+ Row(('Bhettye Rhubble', 27), f2i),
+ ]
+
+ def _row_data(row):
+ return {'full_name': row[0], 'age': str(row[1])}
+
+ SENT = {
+ 'rows': [{
+ 'json': _row_data(row),
+ 'insertId': str(i),
+ } for i, row in enumerate(ROWS)],
+ }
+
+ with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))):
+ errors = client.create_rows(table, ROWS)
+
+ self.assertEqual(len(errors), 0)
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/%s' % PATH)
+ self.assertEqual(req['data'], SENT)
+
+ def test_create_rows_w_skip_invalid_and_ignore_unknown(self):
+ from google.cloud.bigquery.table import Table, SchemaField
+
+ PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % (
+ self.PROJECT, self.DS_ID, self.TABLE_ID)
+ RESPONSE = {
+ 'insertErrors': [
+ {'index': 1,
+ 'errors': [
+ {'reason': 'REASON',
+ 'location': 'LOCATION',
+ 'debugInfo': 'INFO',
+ 'message': 'MESSAGE'}
+ ]},
+ ]}
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ conn = client._connection = _Connection(RESPONSE)
+ schema = [
+ SchemaField('full_name', 'STRING', mode='REQUIRED'),
+ SchemaField('age', 'INTEGER', mode='REQUIRED'),
+ SchemaField('voter', 'BOOLEAN', mode='NULLABLE'),
+ ]
+ table = Table(self.TABLE_REF, schema=schema)
+ ROWS = [
+ ('Phred Phlyntstone', 32, True),
+ ('Bharney Rhubble', 33, False),
+ ('Wylma Phlyntstone', 29, True),
+ ('Bhettye Rhubble', 27, True),
+ ]
+
+ def _row_data(row):
+ return {
+ 'full_name': row[0],
+ 'age': str(row[1]),
+ 'voter': row[2] and 'true' or 'false',
+ }
+
+ SENT = {
+ 'skipInvalidRows': True,
+ 'ignoreUnknownValues': True,
+ 'templateSuffix': '20160303',
+ 'rows': [{'insertId': index, 'json': _row_data(row)}
+ for index, row in enumerate(ROWS)],
+ }
+
+ errors = client.create_rows(
+ table,
+ ROWS,
+ row_ids=[index for index, _ in enumerate(ROWS)],
+ skip_invalid_rows=True,
+ ignore_unknown_values=True,
+ template_suffix='20160303',
+ )
+
+ self.assertEqual(len(errors), 1)
+ self.assertEqual(errors[0]['index'], 1)
+ self.assertEqual(len(errors[0]['errors']), 1)
+ self.assertEqual(errors[0]['errors'][0],
+ RESPONSE['insertErrors'][0]['errors'][0])
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/%s' % PATH)
+ self.assertEqual(req['data'], SENT)
+
+ def test_create_rows_w_repeated_fields(self):
+ from google.cloud.bigquery.table import Table, SchemaField
+
+ PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % (
+ self.PROJECT, self.DS_ID, self.TABLE_ID)
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ conn = client._connection = _Connection({})
+ full_name = SchemaField('color', 'STRING', mode='REPEATED')
+ index = SchemaField('index', 'INTEGER', 'REPEATED')
+ score = SchemaField('score', 'FLOAT', 'REPEATED')
+ struct = SchemaField('struct', 'RECORD', mode='REPEATED',
+ fields=[index, score])
+ table = Table(self.TABLE_REF, schema=[full_name, struct])
+ ROWS = [
+ (['red', 'green'], [{'index': [1, 2], 'score': [3.1415, 1.414]}]),
+ ]
+
+ def _row_data(row):
+ return {'color': row[0],
+ 'struct': row[1]}
+
+ SENT = {
+ 'rows': [{
+ 'json': _row_data(row),
+ 'insertId': str(i),
+ } for i, row in enumerate(ROWS)],
+ }
+
+ with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))):
+ errors = client.create_rows(table, ROWS)
+
+ self.assertEqual(len(errors), 0)
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/%s' % PATH)
+ self.assertEqual(req['data'], SENT)
+
+ def test_create_rows_w_record_schema(self):
+ from google.cloud.bigquery.table import SchemaField
+
+ PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % (
+ self.PROJECT, self.DS_ID, self.TABLE_ID)
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ conn = client._connection = _Connection({})
+ full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
+ area_code = SchemaField('area_code', 'STRING', 'REQUIRED')
+ local_number = SchemaField('local_number', 'STRING', 'REQUIRED')
+ rank = SchemaField('rank', 'INTEGER', 'REQUIRED')
+ phone = SchemaField('phone', 'RECORD', mode='NULLABLE',
+ fields=[area_code, local_number, rank])
+ ROWS = [
+ ('Phred Phlyntstone', {'area_code': '800',
+ 'local_number': '555-1212',
+ 'rank': 1}),
+ ('Bharney Rhubble', {'area_code': '877',
+ 'local_number': '768-5309',
+ 'rank': 2}),
+ ('Wylma Phlyntstone', None),
+ ]
+
+ def _row_data(row):
+ return {'full_name': row[0],
+ 'phone': row[1]}
+
+ SENT = {
+ 'rows': [{
+ 'json': _row_data(row),
+ 'insertId': str(i),
+ } for i, row in enumerate(ROWS)],
+ }
+
+ with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))):
+ errors = client.create_rows(self.TABLE_REF, ROWS,
+ selected_fields=[full_name, phone])
+
+ self.assertEqual(len(errors), 0)
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/%s' % PATH)
+ self.assertEqual(req['data'], SENT)
+
+ def test_create_rows_errors(self):
+ from google.cloud.bigquery.table import Table
+
+ ROWS = [
+ ('Phred Phlyntstone', 32, True),
+ ('Bharney Rhubble', 33, False),
+ ('Wylma Phlyntstone', 29, True),
+ ('Bhettye Rhubble', 27, True),
+ ]
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+
+ # table ref with no selected fields
+ with self.assertRaises(ValueError):
+ client.create_rows(self.TABLE_REF, ROWS)
+
+ # table with no schema
+ with self.assertRaises(ValueError):
+ client.create_rows(Table(self.TABLE_REF), ROWS)
+
+ # neither Table nor tableReference
+ with self.assertRaises(TypeError):
+ client.create_rows(1, ROWS)
+
+ def test_create_rows_json(self):
+ from google.cloud.bigquery.table import Table, SchemaField
+ from google.cloud.bigquery.dataset import DatasetReference
+
+ PROJECT = 'PROJECT'
+ DS_ID = 'DS_ID'
+ TABLE_ID = 'TABLE_ID'
+ PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % (
+ PROJECT, DS_ID, TABLE_ID)
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=PROJECT, credentials=creds, _http=http)
+ conn = client._connection = _Connection({})
+ table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID)
+ schema = [
+ SchemaField('full_name', 'STRING', mode='REQUIRED'),
+ SchemaField('age', 'INTEGER', mode='REQUIRED'),
+ SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'),
+ ]
+ table = Table(table_ref, schema=schema)
+ ROWS = [
+ {
+ 'full_name': 'Phred Phlyntstone', 'age': '32',
+ 'joined': '2015-07-24T19:53:19.006000Z'
+ },
+ {
+ 'full_name': 'Bharney Rhubble', 'age': '33',
+ 'joined': 1437767600.006
+ },
+ {
+ 'full_name': 'Wylma Phlyntstone', 'age': '29',
+ 'joined': 1437767601.006
+ },
+ {
+ 'full_name': 'Bhettye Rhubble', 'age': '27', 'joined': None
+ },
+ ]
+
+ SENT = {
+ 'rows': [{
+ 'json': row,
+ 'insertId': str(i),
+ } for i, row in enumerate(ROWS)],
+ }
+
+ with mock.patch('uuid.uuid4', side_effect=map(str, range(len(ROWS)))):
+ errors = client.create_rows_json(table, ROWS)
+
+ self.assertEqual(len(errors), 0)
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/%s' % PATH)
+ self.assertEqual(req['data'], SENT)
+
+ def test_query_rows_defaults(self):
+ from google.api_core.page_iterator import HTTPIterator
+ from google.cloud.bigquery._helpers import Row
+
+ JOB = 'job-id'
+ QUERY = 'SELECT COUNT(*) FROM persons'
+ RESOURCE = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': JOB,
+ },
+ 'configuration': {
+ 'query': {
+ 'query': QUERY,
+ 'destinationTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': '_temp_dataset',
+ 'tableId': '_temp_table',
+ },
+ },
+ },
+ 'status': {
+ 'state': 'DONE',
+ },
+ }
+ RESULTS_RESOURCE = {
+ 'jobReference': RESOURCE['jobReference'],
+ 'jobComplete': True,
+ 'schema': {
+ 'fields': [
+ {'name': 'field0', 'type': 'INTEGER', 'mode': 'NULLABLE'},
+ ]
+ },
+ 'totalRows': '3',
+ 'pageToken': 'next-page',
+ }
+ FIRST_PAGE = copy.deepcopy(RESULTS_RESOURCE)
+ FIRST_PAGE['rows'] = [
+ {'f': [{'v': '1'}]},
+ {'f': [{'v': '2'}]},
+ ]
+ LAST_PAGE = copy.deepcopy(RESULTS_RESOURCE)
+ LAST_PAGE['rows'] = [
+ {'f': [{'v': '3'}]},
+ ]
+ del LAST_PAGE['pageToken']
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ conn = client._connection = _Connection(
+ RESOURCE, RESULTS_RESOURCE, FIRST_PAGE, LAST_PAGE)
+
+ rows_iter = client.query_rows(QUERY)
+ rows = list(rows_iter)
+
+ self.assertEqual(rows, [Row((i,), {'field0': 0}) for i in (1, 2, 3)])
+ self.assertIs(rows_iter.client, client)
+ self.assertIsInstance(rows_iter, HTTPIterator)
+ self.assertEqual(len(conn._requested), 4)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/projects/PROJECT/jobs')
+ self.assertIsInstance(
+ req['data']['jobReference']['jobId'], six.string_types)
+
+ def test_query_rows_w_job_id(self):
+ from google.api_core.page_iterator import HTTPIterator
+
+ JOB = 'job-id'
+ QUERY = 'SELECT COUNT(*) FROM persons'
+ RESOURCE = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': JOB,
+ },
+ 'configuration': {
+ 'query': {
+ 'query': QUERY,
+ 'destinationTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': '_temp_dataset',
+ 'tableId': '_temp_table',
+ },
+ },
+ },
+ 'status': {
+ 'state': 'DONE',
+ },
+ }
+ RESULTS_RESOURCE = {
+ 'jobReference': RESOURCE['jobReference'],
+ 'jobComplete': True,
+ 'schema': {
+ 'fields': [
+ {'name': 'field0', 'type': 'INTEGER', 'mode': 'NULLABLE'},
+ ]
+ },
+ 'totalRows': '0',
+ }
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ conn = client._connection = _Connection(
+ RESOURCE, RESULTS_RESOURCE, RESULTS_RESOURCE)
+
+ rows_iter = client.query_rows(QUERY, job_id=JOB)
+ rows = list(rows_iter)
+
+ self.assertEqual(rows, [])
+ self.assertIs(rows_iter.client, client)
+ self.assertIsInstance(rows_iter, HTTPIterator)
+ self.assertEqual(len(conn._requested), 3)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/projects/PROJECT/jobs')
+ self.assertEqual(req['data']['jobReference']['jobId'], JOB)
+
+ def test_query_rows_w_job_config(self):
+ from google.cloud.bigquery.job import QueryJobConfig
+ from google.api_core.page_iterator import HTTPIterator
+
+ JOB = 'job-id'
+ QUERY = 'SELECT COUNT(*) FROM persons'
+ RESOURCE = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': JOB,
+ },
+ 'configuration': {
+ 'query': {
+ 'query': QUERY,
+ 'useLegacySql': True,
+ 'destinationTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': '_temp_dataset',
+ 'tableId': '_temp_table',
+ },
+ },
+ 'dryRun': True,
+ },
+ 'status': {
+ 'state': 'DONE',
+ },
+ }
+ RESULTS_RESOURCE = {
+ 'jobReference': RESOURCE['jobReference'],
+ 'jobComplete': True,
+ 'schema': {
+ 'fields': [
+ {'name': 'field0', 'type': 'INTEGER', 'mode': 'NULLABLE'},
+ ]
+ },
+ 'totalRows': '0',
+ }
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ conn = client._connection = _Connection(
+ RESOURCE, RESULTS_RESOURCE, RESULTS_RESOURCE)
+
+ job_config = QueryJobConfig()
+ job_config.use_legacy_sql = True
+ job_config.dry_run = True
+ rows_iter = client.query_rows(QUERY, job_id=JOB, job_config=job_config)
+
+ self.assertIsInstance(rows_iter, HTTPIterator)
+ self.assertEqual(len(conn._requested), 2)
+ req = conn._requested[0]
+ configuration = req['data']['configuration']
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], '/projects/PROJECT/jobs')
+ self.assertEqual(req['data']['jobReference']['jobId'], JOB)
+ self.assertEqual(configuration['query']['useLegacySql'], True)
+ self.assertEqual(configuration['dryRun'], True)
+
+ def test_list_rows(self):
+ import datetime
+ from google.cloud._helpers import UTC
+ from google.cloud.bigquery.table import Table, SchemaField
+ from google.cloud.bigquery._helpers import Row
+
+ PATH = 'projects/%s/datasets/%s/tables/%s/data' % (
+ self.PROJECT, self.DS_ID, self.TABLE_ID)
+ WHEN_TS = 1437767599.006
+ WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace(
+ tzinfo=UTC)
+ WHEN_1 = WHEN + datetime.timedelta(seconds=1)
+ WHEN_2 = WHEN + datetime.timedelta(seconds=2)
+ ROWS = 1234
+ TOKEN = 'TOKEN'
+
+ def _bigquery_timestamp_float_repr(ts_float):
+ # Preserve microsecond precision for E+09 timestamps
+ return '%0.15E' % (ts_float,)
+
+ DATA = {
+ 'totalRows': str(ROWS),
+ 'pageToken': TOKEN,
+ 'rows': [
+ {'f': [
+ {'v': 'Phred Phlyntstone'},
+ {'v': '32'},
+ {'v': _bigquery_timestamp_float_repr(WHEN_TS)},
+ ]},
+ {'f': [
+ {'v': 'Bharney Rhubble'},
+ {'v': '33'},
+ {'v': _bigquery_timestamp_float_repr(WHEN_TS + 1)},
+ ]},
+ {'f': [
+ {'v': 'Wylma Phlyntstone'},
+ {'v': '29'},
+ {'v': _bigquery_timestamp_float_repr(WHEN_TS + 2)},
+ ]},
+ {'f': [
+ {'v': 'Bhettye Rhubble'},
+ {'v': None},
+ {'v': None},
+ ]},
+ ]
+ }
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ conn = client._connection = _Connection(DATA, DATA)
+ full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
+ age = SchemaField('age', 'INTEGER', mode='NULLABLE')
+ joined = SchemaField('joined', 'TIMESTAMP', mode='NULLABLE')
+ table = Table(self.TABLE_REF, schema=[full_name, age, joined])
+
+ iterator = client.list_rows(table)
+ page = six.next(iterator.pages)
+ rows = list(page)
+ total_rows = iterator.total_rows
+ page_token = iterator.next_page_token
+
+ f2i = {'full_name': 0, 'age': 1, 'joined': 2}
+ self.assertEqual(len(rows), 4)
+ self.assertEqual(rows[0], Row(('Phred Phlyntstone', 32, WHEN), f2i))
+ self.assertEqual(rows[1], Row(('Bharney Rhubble', 33, WHEN_1), f2i))
+ self.assertEqual(rows[2], Row(('Wylma Phlyntstone', 29, WHEN_2), f2i))
+ self.assertEqual(rows[3], Row(('Bhettye Rhubble', None, None), f2i))
+ self.assertEqual(total_rows, ROWS)
+ self.assertEqual(page_token, TOKEN)
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], '/%s' % PATH)
+ self.assertEqual(req['query_params'], {})
+
+ def test_list_rows_query_params(self):
+ from google.cloud.bigquery.table import Table, SchemaField
+
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ table = Table(self.TABLE_REF,
+ schema=[SchemaField('age', 'INTEGER', mode='NULLABLE')])
+ tests = [
+ ({}, {}),
+ ({'start_index': 1}, {'startIndex': 1}),
+ ({'max_results': 2}, {'maxResults': 2}),
+ ({'start_index': 1, 'max_results': 2},
+ {'startIndex': 1, 'maxResults': 2}),
+ ]
+ conn = client._connection = _Connection(*len(tests)*[{}])
+ for i, test in enumerate(tests):
+ iterator = client.list_rows(table, **test[0])
+ six.next(iterator.pages)
+ req = conn._requested[i]
+ self.assertEqual(req['query_params'], test[1],
+ 'for kwargs %s' % test[0])
+
+ def test_list_rows_repeated_fields(self):
+ from google.cloud.bigquery.table import SchemaField
+
+ PATH = 'projects/%s/datasets/%s/tables/%s/data' % (
+ self.PROJECT, self.DS_ID, self.TABLE_ID)
+ ROWS = 1234
+ TOKEN = 'TOKEN'
+ DATA = {
+ 'totalRows': ROWS,
+ 'pageToken': TOKEN,
+ 'rows': [
+ {'f': [
+ {'v': [{'v': 'red'}, {'v': 'green'}]},
+ {'v': [{
+ 'v': {
+ 'f': [
+ {'v': [{'v': '1'}, {'v': '2'}]},
+ {'v': [{'v': '3.1415'}, {'v': '1.414'}]},
+ ]}
+ }]},
+ ]},
+ ]
+ }
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ conn = client._connection = _Connection(DATA)
+ color = SchemaField('color', 'STRING', mode='REPEATED')
+ index = SchemaField('index', 'INTEGER', 'REPEATED')
+ score = SchemaField('score', 'FLOAT', 'REPEATED')
+ struct = SchemaField('struct', 'RECORD', mode='REPEATED',
+ fields=[index, score])
+
+ iterator = client.list_rows(self.TABLE_REF,
+ selected_fields=[color, struct])
+ page = six.next(iterator.pages)
+ rows = list(page)
+ total_rows = iterator.total_rows
+ page_token = iterator.next_page_token
+
+ self.assertEqual(len(rows), 1)
+ self.assertEqual(rows[0][0], ['red', 'green'])
+ self.assertEqual(rows[0][1], [{'index': [1, 2],
+ 'score': [3.1415, 1.414]}])
+ self.assertEqual(total_rows, ROWS)
+ self.assertEqual(page_token, TOKEN)
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], '/%s' % PATH)
+
+ def test_list_rows_w_record_schema(self):
+ from google.cloud.bigquery.table import Table, SchemaField
+
+ PATH = 'projects/%s/datasets/%s/tables/%s/data' % (
+ self.PROJECT, self.DS_ID, self.TABLE_ID)
+ ROWS = 1234
+ TOKEN = 'TOKEN'
+ DATA = {
+ 'totalRows': ROWS,
+ 'pageToken': TOKEN,
+ 'rows': [
+ {'f': [
+ {'v': 'Phred Phlyntstone'},
+ {'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}},
+ ]},
+ {'f': [
+ {'v': 'Bharney Rhubble'},
+ {'v': {'f': [{'v': '877'}, {'v': '768-5309'}, {'v': 2}]}},
+ ]},
+ {'f': [
+ {'v': 'Wylma Phlyntstone'},
+ {'v': None},
+ ]},
+ ]
+ }
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ conn = client._connection = _Connection(DATA)
+ full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
+ area_code = SchemaField('area_code', 'STRING', 'REQUIRED')
+ local_number = SchemaField('local_number', 'STRING', 'REQUIRED')
+ rank = SchemaField('rank', 'INTEGER', 'REQUIRED')
+ phone = SchemaField('phone', 'RECORD', mode='NULLABLE',
+ fields=[area_code, local_number, rank])
+ table = Table(self.TABLE_REF, schema=[full_name, phone])
+
+ iterator = client.list_rows(table)
+ page = six.next(iterator.pages)
+ rows = list(page)
+ total_rows = iterator.total_rows
+ page_token = iterator.next_page_token
+
+ self.assertEqual(len(rows), 3)
+ self.assertEqual(rows[0][0], 'Phred Phlyntstone')
+ self.assertEqual(rows[0][1], {'area_code': '800',
+ 'local_number': '555-1212',
+ 'rank': 1})
+ self.assertEqual(rows[1][0], 'Bharney Rhubble')
+ self.assertEqual(rows[1][1], {'area_code': '877',
+ 'local_number': '768-5309',
+ 'rank': 2})
+ self.assertEqual(rows[2][0], 'Wylma Phlyntstone')
+ self.assertIsNone(rows[2][1])
+ self.assertEqual(total_rows, ROWS)
+ self.assertEqual(page_token, TOKEN)
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], '/%s' % PATH)
+
+ def test_list_rows_errors(self):
+ from google.cloud.bigquery.table import Table
+
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+
+ # table ref with no selected fields
+ with self.assertRaises(ValueError):
+ client.list_rows(self.TABLE_REF)
+
+ # table with no schema
+ with self.assertRaises(ValueError):
+ client.list_rows(Table(self.TABLE_REF))
+
+ # neither Table nor tableReference
+ with self.assertRaises(TypeError):
+ client.list_rows(1)
+
+ def test_list_partitions(self):
+ RESOURCE = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': 'JOB_ID',
+ },
+ 'configuration': {
+ 'query': {
+ 'query': 'q',
+ 'destinationTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': 'DS_ID',
+ 'tableId': 'TABLE_ID',
+ },
+ },
+ },
+ 'status': {
+ 'state': 'DONE',
+ },
+ }
+ RESULTS_RESOURCE = {
+ 'jobReference': RESOURCE['jobReference'],
+ 'jobComplete': True,
+ 'schema': {
+ 'fields': [
+ {'name': 'partition_id', 'type': 'INTEGER',
+ 'mode': 'REQUIRED'},
+ ]
+ },
+ 'totalRows': '2',
+ 'pageToken': 'next-page',
+ }
+ FIRST_PAGE = copy.deepcopy(RESULTS_RESOURCE)
+ FIRST_PAGE['rows'] = [
+ {'f': [{'v': 20160804}]},
+ {'f': [{'v': 20160805}]},
+ ]
+ del FIRST_PAGE['pageToken']
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds,
+ _http=http)
+ client._connection = _Connection(
+ RESOURCE, RESULTS_RESOURCE, FIRST_PAGE)
+ self.assertEqual(client.list_partitions(self.TABLE_REF),
+ [20160804, 20160805])
+
+
+class Test_make_job_id(unittest.TestCase):
+ def _call_fut(self, job_id, prefix=None):
+ from google.cloud.bigquery.client import _make_job_id
+
+ return _make_job_id(job_id, prefix=prefix)
+
+ def test__make_job_id_wo_suffix(self):
+ job_id = self._call_fut('job_id')
+
+ self.assertEqual(job_id, 'job_id')
+
+ def test__make_job_id_w_suffix(self):
+ with mock.patch('uuid.uuid4', side_effect=['212345']):
+ job_id = self._call_fut(None, prefix='job_id')
+
+ self.assertEqual(job_id, 'job_id212345')
+
+ def test__make_random_job_id(self):
+ with mock.patch('uuid.uuid4', side_effect=['212345']):
+ job_id = self._call_fut(None)
+
+ self.assertEqual(job_id, '212345')
+
+ def test__make_job_id_w_job_id_overrides_prefix(self):
+ job_id = self._call_fut('job_id', prefix='unused_prefix')
+
+ self.assertEqual(job_id, 'job_id')
+
+
+class TestClientUpload(object):
+ # NOTE: This is a "partner" to `TestClient` meant to test some of the
+ # "load_table_from_file" portions of `Client`. It also uses
+ # `pytest`-style tests rather than `unittest`-style.
+
+ TABLE_REF = DatasetReference(
+ 'project_id', 'test_dataset').table('test_table')
+
+ @staticmethod
+ def _make_client(transport=None):
+ from google.cloud.bigquery import _http
+ from google.cloud.bigquery import client
+
+ cl = client.Client(project='project_id',
+ credentials=_make_credentials(),
+ _http=transport)
+ cl._connection = mock.create_autospec(_http.Connection, instance=True)
+ return cl
+
+ @staticmethod
+ def _make_response(status_code, content='', headers={}):
+ """Make a mock HTTP response."""
+ import requests
+ response = requests.Response()
+ response.request = requests.Request(
+ 'POST', 'http://example.com').prepare()
+ response._content = content.encode('utf-8')
+ response.headers.update(headers)
+ response.status_code = status_code
+ return response
+
+ @classmethod
+ def _make_do_upload_patch(cls, client, method,
+ resource={}, side_effect=None):
+ """Patches the low-level upload helpers."""
+ if side_effect is None:
+ side_effect = [cls._make_response(
+ http_client.OK,
+ json.dumps(resource),
+ {'Content-Type': 'application/json'})]
+ return mock.patch.object(
+ client, method, side_effect=side_effect, autospec=True)
+
+ EXPECTED_CONFIGURATION = {
+ 'jobReference': {'projectId': 'project_id', 'jobId': 'job_id'},
+ 'configuration': {
+ 'load': {
+ 'sourceFormat': 'CSV',
+ 'destinationTable': {
+ 'projectId': 'project_id',
+ 'datasetId': 'test_dataset',
+ 'tableId': 'test_table'
+ }
+ }
+ }
+ }
+
+ @staticmethod
+ def _make_file_obj():
+ return io.BytesIO(b'hello, is it me you\'re looking for?')
+
+ @staticmethod
+ def _make_config():
+ from google.cloud.bigquery.job import LoadJobConfig
+
+ config = LoadJobConfig()
+ config.source_format = 'CSV'
+ return config
+
+ # High-level tests
+
+ def test_load_table_from_file_resumable(self):
+ from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
+
+ client = self._make_client()
+ file_obj = self._make_file_obj()
+
+ do_upload_patch = self._make_do_upload_patch(
+ client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION)
+ with do_upload_patch as do_upload:
+ client.load_table_from_file(file_obj, self.TABLE_REF,
+ job_id='job_id',
+ job_config=self._make_config())
+
+ do_upload.assert_called_once_with(
+ file_obj,
+ self.EXPECTED_CONFIGURATION,
+ _DEFAULT_NUM_RETRIES)
+
+ def test_load_table_from_file_resumable_metadata(self):
+ from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
+
+ client = self._make_client()
+ file_obj = self._make_file_obj()
+
+ config = self._make_config()
+ config.allow_jagged_rows = False
+ config.allow_quoted_newlines = False
+ config.create_disposition = 'CREATE_IF_NEEDED'
+ config.encoding = 'utf8'
+ config.field_delimiter = ','
+ config.ignore_unknown_values = False
+ config.max_bad_records = 0
+ config.quote_character = '"'
+ config.skip_leading_rows = 1
+ config.write_disposition = 'WRITE_APPEND'
+ config.null_marker = r'\N'
+
+ expected_config = {
+ 'jobReference': {'projectId': 'project_id', 'jobId': 'job_id'},
+ 'configuration': {
+ 'load': {
+ 'destinationTable': {
+ 'projectId': self.TABLE_REF.project,
+ 'datasetId': self.TABLE_REF.dataset_id,
+ 'tableId': self.TABLE_REF.table_id,
+ },
+ 'sourceFormat': config.source_format,
+ 'allowJaggedRows': config.allow_jagged_rows,
+ 'allowQuotedNewlines': config.allow_quoted_newlines,
+ 'createDisposition': config.create_disposition,
+ 'encoding': config.encoding,
+ 'fieldDelimiter': config.field_delimiter,
+ 'ignoreUnknownValues': config.ignore_unknown_values,
+ 'maxBadRecords': config.max_bad_records,
+ 'quote': config.quote_character,
+ 'skipLeadingRows': str(config.skip_leading_rows),
+ 'writeDisposition': config.write_disposition,
+ 'nullMarker': config.null_marker,
+ },
+ },
+ }
+
+ do_upload_patch = self._make_do_upload_patch(
+ client, '_do_resumable_upload', expected_config)
+ with do_upload_patch as do_upload:
+ client.load_table_from_file(
+ file_obj, self.TABLE_REF, job_id='job_id', job_config=config)
+
+ do_upload.assert_called_once_with(
+ file_obj,
+ expected_config,
+ _DEFAULT_NUM_RETRIES)
+
+ def test_load_table_from_file_multipart(self):
+ from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
+
+ client = self._make_client()
+ file_obj = self._make_file_obj()
+ file_obj_size = 10
+ config = self._make_config()
+
+ do_upload_patch = self._make_do_upload_patch(
+ client, '_do_multipart_upload', self.EXPECTED_CONFIGURATION)
+ with do_upload_patch as do_upload:
+ client.load_table_from_file(
+ file_obj, self.TABLE_REF, job_id='job_id', job_config=config,
+ size=file_obj_size)
+
+ do_upload.assert_called_once_with(
+ file_obj,
+ self.EXPECTED_CONFIGURATION,
+ file_obj_size,
+ _DEFAULT_NUM_RETRIES)
+
+ def test_load_table_from_file_with_retries(self):
+ client = self._make_client()
+ file_obj = self._make_file_obj()
+ num_retries = 20
+
+ do_upload_patch = self._make_do_upload_patch(
+ client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION)
+ with do_upload_patch as do_upload:
+ client.load_table_from_file(
+ file_obj, self.TABLE_REF, num_retries=num_retries,
+ job_id='job_id', job_config=self._make_config())
+
+ do_upload.assert_called_once_with(
+ file_obj,
+ self.EXPECTED_CONFIGURATION,
+ num_retries)
+
+ def test_load_table_from_file_with_rewind(self):
+ client = self._make_client()
+ file_obj = self._make_file_obj()
+ file_obj.seek(2)
+
+ with self._make_do_upload_patch(
+ client, '_do_resumable_upload', self.EXPECTED_CONFIGURATION):
+ client.load_table_from_file(
+ file_obj, self.TABLE_REF, rewind=True)
+
+ assert file_obj.tell() == 0
+
+ def test_load_table_from_file_failure(self):
+ from google.resumable_media import InvalidResponse
+ from google.cloud import exceptions
+
+ client = self._make_client()
+ file_obj = self._make_file_obj()
+
+ response = self._make_response(
+ content='Someone is already in this spot.',
+ status_code=http_client.CONFLICT)
+
+ do_upload_patch = self._make_do_upload_patch(
+ client, '_do_resumable_upload',
+ side_effect=InvalidResponse(response))
+
+ with do_upload_patch, pytest.raises(exceptions.Conflict) as exc_info:
+ client.load_table_from_file(
+ file_obj, self.TABLE_REF, rewind=True)
+
+ assert response.text in exc_info.value.message
+ assert exc_info.value.errors == []
+
+ def test_load_table_from_file_bad_mode(self):
+ client = self._make_client()
+ file_obj = mock.Mock(spec=['mode'])
+ file_obj.mode = 'x'
+
+ with pytest.raises(ValueError):
+ client.load_table_from_file(file_obj, self.TABLE_REF)
+
+ # Low-level tests
+
+ @classmethod
+ def _make_resumable_upload_responses(cls, size):
+ """Make a series of responses for a successful resumable upload."""
+ from google import resumable_media
+
+ resumable_url = 'http://test.invalid?upload_id=and-then-there-was-1'
+ initial_response = cls._make_response(
+ http_client.OK, '', {'location': resumable_url})
+ data_response = cls._make_response(
+ resumable_media.PERMANENT_REDIRECT,
+ '', {'range': 'bytes=0-{:d}'.format(size - 1)})
+ final_response = cls._make_response(
+ http_client.OK,
+ json.dumps({'size': size}),
+ {'Content-Type': 'application/json'})
+ return [initial_response, data_response, final_response]
+
+ @staticmethod
+ def _make_transport(responses=None):
+ import google.auth.transport.requests
+
+ transport = mock.create_autospec(
+ google.auth.transport.requests.AuthorizedSession, instance=True)
+ transport.request.side_effect = responses
+ return transport
+
+ def test__do_resumable_upload(self):
+ file_obj = self._make_file_obj()
+ file_obj_len = len(file_obj.getvalue())
+ transport = self._make_transport(
+ self._make_resumable_upload_responses(file_obj_len))
+ client = self._make_client(transport)
+
+ result = client._do_resumable_upload(
+ file_obj,
+ self.EXPECTED_CONFIGURATION,
+ None)
+
+ content = result.content.decode('utf-8')
+ assert json.loads(content) == {'size': file_obj_len}
+
+ # Verify that configuration data was passed in with the initial
+ # request.
+ transport.request.assert_any_call(
+ 'POST',
+ mock.ANY,
+ data=json.dumps(self.EXPECTED_CONFIGURATION).encode('utf-8'),
+ headers=mock.ANY)
+
+ def test__do_multipart_upload(self):
+ transport = self._make_transport([self._make_response(http_client.OK)])
+ client = self._make_client(transport)
+ file_obj = self._make_file_obj()
+ file_obj_len = len(file_obj.getvalue())
+
+ client._do_multipart_upload(
+ file_obj,
+ self.EXPECTED_CONFIGURATION,
+ file_obj_len,
+ None)
+
+ # Verify that configuration data was passed in with the initial
+ # request.
+ request_args = transport.request.mock_calls[0][2]
+ request_data = request_args['data'].decode('utf-8')
+ request_headers = request_args['headers']
+
+ request_content = email.message_from_string(
+ 'Content-Type: {}\r\n{}'.format(
+ request_headers['content-type'].decode('utf-8'),
+ request_data))
+
+ # There should be two payloads: the configuration and the binary daya.
+ configuration_data = request_content.get_payload(0).get_payload()
+ binary_data = request_content.get_payload(1).get_payload()
+
+ assert json.loads(configuration_data) == self.EXPECTED_CONFIGURATION
+ assert binary_data.encode('utf-8') == file_obj.getvalue()
+
+ def test__do_multipart_upload_wrong_size(self):
+ client = self._make_client()
+ file_obj = self._make_file_obj()
+ file_obj_len = len(file_obj.getvalue())
+
+ with pytest.raises(ValueError):
+ client._do_multipart_upload(
+ file_obj,
+ {},
+ file_obj_len+1,
+ None)
+
+
+class _Connection(object):
+
+ USER_AGENT = 'testing 1.2.3'
+
+ def __init__(self, *responses):
+ self._responses = responses
+ self._requested = []
+
+ def api_request(self, **kw):
+ from google.cloud.exceptions import NotFound
+ self._requested.append(kw)
+
+ if len(self._responses) == 0:
+ raise NotFound('miss')
+
+ response, self._responses = self._responses[0], self._responses[1:]
+ if isinstance(response, Exception):
+ raise response
+ return response
diff --git a/bigquery/tests/unit/test_dataset.py b/bigquery/tests/unit/test_dataset.py
new file mode 100644
index 0000000..c04d154
--- /dev/null
+++ b/bigquery/tests/unit/test_dataset.py
@@ -0,0 +1,459 @@
+# Copyright 2015 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import mock
+
+
+class TestAccessEntry(unittest.TestCase):
+
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.dataset import AccessEntry
+
+ return AccessEntry
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def test_ctor_defaults(self):
+ entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com')
+ self.assertEqual(entry.role, 'OWNER')
+ self.assertEqual(entry.entity_type, 'userByEmail')
+ self.assertEqual(entry.entity_id, 'phred@example.com')
+
+ def test_ctor_bad_entity_type(self):
+ with self.assertRaises(ValueError):
+ self._make_one(None, 'unknown', None)
+
+ def test_ctor_view_with_role(self):
+ role = 'READER'
+ entity_type = 'view'
+ with self.assertRaises(ValueError):
+ self._make_one(role, entity_type, None)
+
+ def test_ctor_view_success(self):
+ role = None
+ entity_type = 'view'
+ entity_id = object()
+ entry = self._make_one(role, entity_type, entity_id)
+ self.assertEqual(entry.role, role)
+ self.assertEqual(entry.entity_type, entity_type)
+ self.assertEqual(entry.entity_id, entity_id)
+
+ def test_ctor_nonview_without_role(self):
+ role = None
+ entity_type = 'userByEmail'
+ with self.assertRaises(ValueError):
+ self._make_one(role, entity_type, None)
+
+ def test___eq___role_mismatch(self):
+ entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com')
+ other = self._make_one('WRITER', 'userByEmail', 'phred@example.com')
+ self.assertNotEqual(entry, other)
+
+ def test___eq___entity_type_mismatch(self):
+ entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com')
+ other = self._make_one('OWNER', 'groupByEmail', 'phred@example.com')
+ self.assertNotEqual(entry, other)
+
+ def test___eq___entity_id_mismatch(self):
+ entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com')
+ other = self._make_one('OWNER', 'userByEmail', 'bharney@example.com')
+ self.assertNotEqual(entry, other)
+
+ def test___eq___hit(self):
+ entry = self._make_one('OWNER', 'userByEmail', 'phred@example.com')
+ other = self._make_one('OWNER', 'userByEmail', 'phred@example.com')
+ self.assertEqual(entry, other)
+
+ def test__eq___type_mismatch(self):
+ entry = self._make_one('OWNER', 'userByEmail', 'silly@example.com')
+ self.assertNotEqual(entry, object())
+ self.assertEqual(entry, mock.ANY)
+
+
+class TestDatasetReference(unittest.TestCase):
+
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.dataset import DatasetReference
+
+ return DatasetReference
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def test_ctor_defaults(self):
+ dataset_ref = self._make_one('some-project-1', 'dataset_1')
+ self.assertEqual(dataset_ref.project, 'some-project-1')
+ self.assertEqual(dataset_ref.dataset_id, 'dataset_1')
+
+ def test_ctor_bad_args(self):
+ with self.assertRaises(ValueError):
+ self._make_one(1, 'd')
+ with self.assertRaises(ValueError):
+ self._make_one('p', 2)
+
+ def test_table(self):
+ dataset_ref = self._make_one('some-project-1', 'dataset_1')
+ table_ref = dataset_ref.table('table_1')
+ self.assertEqual(table_ref.dataset_id, 'dataset_1')
+ self.assertEqual(table_ref.project, 'some-project-1')
+ self.assertEqual(table_ref.table_id, 'table_1')
+
+ def test_to_api_repr(self):
+ dataset = self._make_one('project_1', 'dataset_1')
+
+ resource = dataset.to_api_repr()
+
+ self.assertEqual(
+ resource,
+ {
+ 'projectId': 'project_1',
+ 'datasetId': 'dataset_1',
+ })
+
+ def test_from_api_repr(self):
+ from google.cloud.bigquery.dataset import DatasetReference
+ expected = self._make_one('project_1', 'dataset_1')
+
+ got = DatasetReference.from_api_repr(
+ {
+ 'projectId': 'project_1',
+ 'datasetId': 'dataset_1',
+ })
+
+ self.assertEqual(expected, got)
+
+ def test___eq___wrong_type(self):
+ dataset = self._make_one('project_1', 'dataset_1')
+ other = object()
+ self.assertNotEqual(dataset, other)
+ self.assertEqual(dataset, mock.ANY)
+
+ def test___eq___project_mismatch(self):
+ dataset = self._make_one('project_1', 'dataset_1')
+ other = self._make_one('project_2', 'dataset_1')
+ self.assertNotEqual(dataset, other)
+
+ def test___eq___dataset_mismatch(self):
+ dataset = self._make_one('project_1', 'dataset_1')
+ other = self._make_one('project_1', 'dataset_2')
+ self.assertNotEqual(dataset, other)
+
+ def test___eq___equality(self):
+ dataset = self._make_one('project_1', 'dataset_1')
+ other = self._make_one('project_1', 'dataset_1')
+ self.assertEqual(dataset, other)
+
+ def test___hash__set_equality(self):
+ dataset1 = self._make_one('project_1', 'dataset_1')
+ dataset2 = self._make_one('project_1', 'dataset_2')
+ set_one = {dataset1, dataset2}
+ set_two = {dataset1, dataset2}
+ self.assertEqual(set_one, set_two)
+
+ def test___hash__not_equals(self):
+ dataset1 = self._make_one('project_1', 'dataset_1')
+ dataset2 = self._make_one('project_1', 'dataset_2')
+ set_one = {dataset1}
+ set_two = {dataset2}
+ self.assertNotEqual(set_one, set_two)
+
+ def test___repr__(self):
+ dataset = self._make_one('project1', 'dataset1')
+ expected = "DatasetReference('project1', 'dataset1')"
+ self.assertEqual(repr(dataset), expected)
+
+
+class TestDataset(unittest.TestCase):
+ from google.cloud.bigquery.dataset import DatasetReference
+
+ PROJECT = 'project'
+ DS_ID = 'dataset-id'
+ DS_REF = DatasetReference(PROJECT, DS_ID)
+
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.dataset import Dataset
+
+ return Dataset
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def _setUpConstants(self):
+ import datetime
+ from google.cloud._helpers import UTC
+
+ self.WHEN_TS = 1437767599.006
+ self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(
+ tzinfo=UTC)
+ self.ETAG = 'ETAG'
+ self.DS_FULL_ID = '%s:%s' % (self.PROJECT, self.DS_ID)
+ self.RESOURCE_URL = 'http://example.com/path/to/resource'
+
+ def _makeResource(self):
+ self._setUpConstants()
+ USER_EMAIL = 'phred@example.com'
+ GROUP_EMAIL = 'group-name@lists.example.com'
+ return {
+ 'creationTime': self.WHEN_TS * 1000,
+ 'datasetReference':
+ {'projectId': self.PROJECT, 'datasetId': self.DS_ID},
+ 'etag': self.ETAG,
+ 'id': self.DS_FULL_ID,
+ 'lastModifiedTime': self.WHEN_TS * 1000,
+ 'location': 'US',
+ 'selfLink': self.RESOURCE_URL,
+ 'defaultTableExpirationMs': 3600,
+ 'access': [
+ {'role': 'OWNER', 'userByEmail': USER_EMAIL},
+ {'role': 'OWNER', 'groupByEmail': GROUP_EMAIL},
+ {'role': 'WRITER', 'specialGroup': 'projectWriters'},
+ {'role': 'READER', 'specialGroup': 'projectReaders'}],
+ }
+
+ def _verify_access_entry(self, access_entries, resource):
+ r_entries = []
+ for r_entry in resource['access']:
+ role = r_entry.pop('role')
+ for entity_type, entity_id in sorted(r_entry.items()):
+ r_entries.append({
+ 'role': role,
+ 'entity_type': entity_type,
+ 'entity_id': entity_id})
+
+ self.assertEqual(len(access_entries), len(r_entries))
+ for a_entry, r_entry in zip(access_entries, r_entries):
+ self.assertEqual(a_entry.role, r_entry['role'])
+ self.assertEqual(a_entry.entity_type, r_entry['entity_type'])
+ self.assertEqual(a_entry.entity_id, r_entry['entity_id'])
+
+ def _verify_readonly_resource_properties(self, dataset, resource):
+
+ self.assertEqual(dataset.dataset_id, self.DS_ID)
+
+ if 'creationTime' in resource:
+ self.assertEqual(dataset.created, self.WHEN)
+ else:
+ self.assertIsNone(dataset.created)
+ if 'etag' in resource:
+ self.assertEqual(dataset.etag, self.ETAG)
+ else:
+ self.assertIsNone(dataset.etag)
+ if 'lastModifiedTime' in resource:
+ self.assertEqual(dataset.modified, self.WHEN)
+ else:
+ self.assertIsNone(dataset.modified)
+ if 'selfLink' in resource:
+ self.assertEqual(dataset.self_link, self.RESOURCE_URL)
+ else:
+ self.assertIsNone(dataset.self_link)
+
+ def _verify_resource_properties(self, dataset, resource):
+
+ self._verify_readonly_resource_properties(dataset, resource)
+
+ if 'defaultTableExpirationMs' in resource:
+ self.assertEqual(dataset.default_table_expiration_ms,
+ int(resource.get('defaultTableExpirationMs')))
+ else:
+ self.assertIsNone(dataset.default_table_expiration_ms)
+ self.assertEqual(dataset.description, resource.get('description'))
+ self.assertEqual(dataset.friendly_name, resource.get('friendlyName'))
+ self.assertEqual(dataset.location, resource.get('location'))
+
+ if 'access' in resource:
+ self._verify_access_entry(dataset.access_entries, resource)
+ else:
+ self.assertEqual(dataset.access_entries, [])
+
+ def test_ctor_defaults(self):
+ dataset = self._make_one(self.DS_REF)
+ self.assertEqual(dataset.dataset_id, self.DS_ID)
+ self.assertEqual(dataset.project, self.PROJECT)
+ self.assertEqual(
+ dataset.path,
+ '/projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID))
+ self.assertEqual(dataset.access_entries, [])
+
+ self.assertIsNone(dataset.created)
+ self.assertIsNone(dataset.full_dataset_id)
+ self.assertIsNone(dataset.etag)
+ self.assertIsNone(dataset.modified)
+ self.assertIsNone(dataset.self_link)
+
+ self.assertIsNone(dataset.default_table_expiration_ms)
+ self.assertIsNone(dataset.description)
+ self.assertIsNone(dataset.friendly_name)
+ self.assertIsNone(dataset.location)
+
+ def test_ctor_explicit(self):
+ from google.cloud.bigquery.dataset import DatasetReference, AccessEntry
+
+ phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com')
+ bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com')
+ entries = [phred, bharney]
+ OTHER_PROJECT = 'foo-bar-123'
+ dataset = self._make_one(DatasetReference(OTHER_PROJECT, self.DS_ID))
+ dataset.access_entries = entries
+ self.assertEqual(dataset.dataset_id, self.DS_ID)
+ self.assertEqual(dataset.project, OTHER_PROJECT)
+ self.assertEqual(
+ dataset.path,
+ '/projects/%s/datasets/%s' % (OTHER_PROJECT, self.DS_ID))
+ self.assertEqual(dataset.access_entries, entries)
+
+ self.assertIsNone(dataset.created)
+ self.assertIsNone(dataset.full_dataset_id)
+ self.assertIsNone(dataset.etag)
+ self.assertIsNone(dataset.modified)
+ self.assertIsNone(dataset.self_link)
+
+ self.assertIsNone(dataset.default_table_expiration_ms)
+ self.assertIsNone(dataset.description)
+ self.assertIsNone(dataset.friendly_name)
+ self.assertIsNone(dataset.location)
+
+ def test_access_entries_setter_non_list(self):
+ dataset = self._make_one(self.DS_REF)
+ with self.assertRaises(TypeError):
+ dataset.access_entries = object()
+
+ def test_access_entries_setter_invalid_field(self):
+ from google.cloud.bigquery.dataset import AccessEntry
+
+ dataset = self._make_one(self.DS_REF)
+ phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com')
+ with self.assertRaises(ValueError):
+ dataset.access_entries = [phred, object()]
+
+ def test_access_entries_setter(self):
+ from google.cloud.bigquery.dataset import AccessEntry
+
+ dataset = self._make_one(self.DS_REF)
+ phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com')
+ bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com')
+ dataset.access_entries = [phred, bharney]
+ self.assertEqual(dataset.access_entries, [phred, bharney])
+
+ def test_default_table_expiration_ms_setter_bad_value(self):
+ dataset = self._make_one(self.DS_REF)
+ with self.assertRaises(ValueError):
+ dataset.default_table_expiration_ms = 'bogus'
+
+ def test_default_table_expiration_ms_setter(self):
+ dataset = self._make_one(self.DS_REF)
+ dataset.default_table_expiration_ms = 12345
+ self.assertEqual(dataset.default_table_expiration_ms, 12345)
+
+ def test_description_setter_bad_value(self):
+ dataset = self._make_one(self.DS_REF)
+ with self.assertRaises(ValueError):
+ dataset.description = 12345
+
+ def test_description_setter(self):
+ dataset = self._make_one(self.DS_REF)
+ dataset.description = 'DESCRIPTION'
+ self.assertEqual(dataset.description, 'DESCRIPTION')
+
+ def test_friendly_name_setter_bad_value(self):
+ dataset = self._make_one(self.DS_REF)
+ with self.assertRaises(ValueError):
+ dataset.friendly_name = 12345
+
+ def test_friendly_name_setter(self):
+ dataset = self._make_one(self.DS_REF)
+ dataset.friendly_name = 'FRIENDLY'
+ self.assertEqual(dataset.friendly_name, 'FRIENDLY')
+
+ def test_location_setter_bad_value(self):
+ dataset = self._make_one(self.DS_REF)
+ with self.assertRaises(ValueError):
+ dataset.location = 12345
+
+ def test_location_setter(self):
+ dataset = self._make_one(self.DS_REF)
+ dataset.location = 'LOCATION'
+ self.assertEqual(dataset.location, 'LOCATION')
+
+ def test_labels_setter(self):
+ dataset = self._make_one(self.DS_REF)
+ dataset.labels = {'color': 'green'}
+ self.assertEqual(dataset.labels, {'color': 'green'})
+
+ def test_labels_setter_bad_value(self):
+ dataset = self._make_one(self.DS_REF)
+ with self.assertRaises(ValueError):
+ dataset.labels = None
+
+ def test_from_api_repr_missing_identity(self):
+ self._setUpConstants()
+ RESOURCE = {}
+ klass = self._get_target_class()
+ with self.assertRaises(KeyError):
+ klass.from_api_repr(RESOURCE)
+
+ def test_from_api_repr_bare(self):
+ self._setUpConstants()
+ RESOURCE = {
+ 'id': '%s:%s' % (self.PROJECT, self.DS_ID),
+ 'datasetReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ }
+ }
+ klass = self._get_target_class()
+ dataset = klass.from_api_repr(RESOURCE)
+ self._verify_resource_properties(dataset, RESOURCE)
+
+ def test_from_api_repr_w_properties(self):
+ RESOURCE = self._makeResource()
+ klass = self._get_target_class()
+ dataset = klass.from_api_repr(RESOURCE)
+ self._verify_resource_properties(dataset, RESOURCE)
+
+ def test__parse_access_entries_w_unknown_entity_type(self):
+ ACCESS = [
+ {'role': 'READER', 'unknown': 'UNKNOWN'},
+ ]
+ dataset = self._make_one(self.DS_REF)
+ with self.assertRaises(ValueError):
+ dataset._parse_access_entries(ACCESS)
+
+ def test__parse_access_entries_w_extra_keys(self):
+ USER_EMAIL = 'phred@example.com'
+ ACCESS = [
+ {
+ 'role': 'READER',
+ 'specialGroup': 'projectReaders',
+ 'userByEmail': USER_EMAIL,
+ },
+ ]
+ dataset = self._make_one(self.DS_REF)
+ with self.assertRaises(ValueError):
+ dataset._parse_access_entries(ACCESS)
+
+ def test_table(self):
+ from google.cloud.bigquery.table import TableReference
+
+ dataset = self._make_one(self.DS_REF)
+ table = dataset.table('table_id')
+ self.assertIsInstance(table, TableReference)
+ self.assertEqual(table.table_id, 'table_id')
+ self.assertEqual(table.dataset_id, self.DS_ID)
+ self.assertEqual(table.project, self.PROJECT)
diff --git a/bigquery/tests/unit/test_dbapi_cursor.py b/bigquery/tests/unit/test_dbapi_cursor.py
new file mode 100644
index 0000000..a16b7b4
--- /dev/null
+++ b/bigquery/tests/unit/test_dbapi_cursor.py
@@ -0,0 +1,308 @@
+# Copyright 2017 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import mock
+
+
+class TestCursor(unittest.TestCase):
+
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.dbapi import Cursor
+ return Cursor
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def _mock_client(
+ self, rows=None, schema=None, num_dml_affected_rows=None):
+ from google.cloud.bigquery import client
+
+ if rows is None:
+ total_rows = 0
+ else:
+ total_rows = len(rows)
+
+ mock_client = mock.create_autospec(client.Client)
+ mock_client.query.return_value = self._mock_job(
+ total_rows=total_rows,
+ schema=schema,
+ num_dml_affected_rows=num_dml_affected_rows)
+ mock_client.list_rows.return_value = rows
+ return mock_client
+
+ def _mock_job(
+ self, total_rows=0, schema=None, num_dml_affected_rows=None):
+ from google.cloud.bigquery import job
+ mock_job = mock.create_autospec(job.QueryJob)
+ mock_job.error_result = None
+ mock_job.state = 'DONE'
+ mock_job.result.return_value = mock_job
+
+ if num_dml_affected_rows is None:
+ mock_job.statement_type = None # API sends back None for SELECT
+ else:
+ mock_job.statement_type = 'UPDATE'
+
+ mock_job.query_results.return_value = self._mock_results(
+ total_rows=total_rows, schema=schema,
+ num_dml_affected_rows=num_dml_affected_rows)
+ return mock_job
+
+ def _mock_results(
+ self, total_rows=0, schema=None, num_dml_affected_rows=None):
+ from google.cloud.bigquery import query
+ mock_results = mock.create_autospec(query.QueryResults)
+ mock_results.schema = schema
+ mock_results.num_dml_affected_rows = num_dml_affected_rows
+ mock_results.total_rows = total_rows
+ return mock_results
+
+ def test_ctor(self):
+ from google.cloud.bigquery.dbapi import connect
+ from google.cloud.bigquery.dbapi import Cursor
+ connection = connect(self._mock_client())
+ cursor = self._make_one(connection)
+ self.assertIsInstance(cursor, Cursor)
+ self.assertIs(cursor.connection, connection)
+
+ def test_close(self):
+ from google.cloud.bigquery.dbapi import connect
+ connection = connect(self._mock_client())
+ cursor = connection.cursor()
+ # close() is a no-op, there is nothing to test.
+ cursor.close()
+
+ def test_fetchone_wo_execute_raises_error(self):
+ from google.cloud.bigquery import dbapi
+ connection = dbapi.connect(self._mock_client())
+ cursor = connection.cursor()
+ self.assertRaises(dbapi.Error, cursor.fetchone)
+
+ def test_fetchone_w_row(self):
+ from google.cloud.bigquery import dbapi
+ connection = dbapi.connect(
+ self._mock_client(rows=[(1,)]))
+ cursor = connection.cursor()
+ cursor.execute('SELECT 1;')
+ row = cursor.fetchone()
+ self.assertEqual(row, (1,))
+ self.assertIsNone(cursor.fetchone())
+
+ def test_fetchmany_wo_execute_raises_error(self):
+ from google.cloud.bigquery import dbapi
+ connection = dbapi.connect(self._mock_client())
+ cursor = connection.cursor()
+ self.assertRaises(dbapi.Error, cursor.fetchmany)
+
+ def test_fetchmany_w_row(self):
+ from google.cloud.bigquery import dbapi
+ connection = dbapi.connect(
+ self._mock_client(rows=[(1,)]))
+ cursor = connection.cursor()
+ cursor.execute('SELECT 1;')
+ rows = cursor.fetchmany()
+ self.assertEqual(len(rows), 1)
+ self.assertEqual(rows[0], (1,))
+
+ def test_fetchmany_w_size(self):
+ from google.cloud.bigquery import dbapi
+ connection = dbapi.connect(
+ self._mock_client(
+ rows=[
+ (1, 2, 3),
+ (4, 5, 6),
+ (7, 8, 9),
+ ]))
+ cursor = connection.cursor()
+ cursor.execute('SELECT a, b, c;')
+ rows = cursor.fetchmany(size=2)
+ self.assertEqual(len(rows), 2)
+ self.assertEqual(rows[0], (1, 2, 3))
+ self.assertEqual(rows[1], (4, 5, 6))
+ second_page = cursor.fetchmany(size=2)
+ self.assertEqual(len(second_page), 1)
+ self.assertEqual(second_page[0], (7, 8, 9))
+ third_page = cursor.fetchmany(size=2)
+ self.assertEqual(third_page, [])
+
+ def test_fetchmany_w_arraysize(self):
+ from google.cloud.bigquery import dbapi
+ connection = dbapi.connect(
+ self._mock_client(
+ rows=[
+ (1, 2, 3),
+ (4, 5, 6),
+ (7, 8, 9),
+ ]))
+ cursor = connection.cursor()
+ cursor.execute('SELECT a, b, c;')
+ cursor.arraysize = 2
+ rows = cursor.fetchmany()
+ self.assertEqual(len(rows), 2)
+ self.assertEqual(rows[0], (1, 2, 3))
+ self.assertEqual(rows[1], (4, 5, 6))
+ second_page = cursor.fetchmany()
+ self.assertEqual(len(second_page), 1)
+ self.assertEqual(second_page[0], (7, 8, 9))
+ third_page = cursor.fetchmany()
+ self.assertEqual(third_page, [])
+
+ def test_fetchall_wo_execute_raises_error(self):
+ from google.cloud.bigquery import dbapi
+ connection = dbapi.connect(self._mock_client())
+ cursor = connection.cursor()
+ self.assertRaises(dbapi.Error, cursor.fetchall)
+
+ def test_fetchall_w_row(self):
+ from google.cloud.bigquery import dbapi
+ connection = dbapi.connect(
+ self._mock_client(rows=[(1,)]))
+ cursor = connection.cursor()
+ cursor.execute('SELECT 1;')
+ self.assertIsNone(cursor.description)
+ self.assertEqual(cursor.rowcount, 1)
+ rows = cursor.fetchall()
+ self.assertEqual(len(rows), 1)
+ self.assertEqual(rows[0], (1,))
+
+ def test_execute_custom_job_id(self):
+ from google.cloud.bigquery.dbapi import connect
+ client = self._mock_client(rows=[], num_dml_affected_rows=0)
+ connection = connect(client)
+ cursor = connection.cursor()
+ cursor.execute('SELECT 1;', job_id='foo')
+ args, kwargs = client.query.call_args
+ self.assertEqual(args[0], 'SELECT 1;')
+ self.assertEqual(kwargs['job_id'], 'foo')
+
+ def test_execute_w_dml(self):
+ from google.cloud.bigquery.dbapi import connect
+ connection = connect(
+ self._mock_client(rows=[], num_dml_affected_rows=12))
+ cursor = connection.cursor()
+ cursor.execute('DELETE FROM UserSessions WHERE user_id = \'test\';')
+ rows = cursor.fetchall()
+ self.assertIsNone(cursor.description)
+ self.assertEqual(cursor.rowcount, 12)
+ self.assertEqual(rows, [])
+
+ def test_execute_w_query(self):
+ from google.cloud.bigquery.schema import SchemaField
+ from google.cloud.bigquery import dbapi
+
+ connection = dbapi.connect(self._mock_client(
+ rows=[('hello', 'world', 1), ('howdy', 'y\'all', 2)],
+ schema=[
+ SchemaField('a', 'STRING', mode='NULLABLE'),
+ SchemaField('b', 'STRING', mode='REQUIRED'),
+ SchemaField('c', 'INTEGER', mode='NULLABLE')]))
+ cursor = connection.cursor()
+ cursor.execute('SELECT a, b, c FROM hello_world WHERE d > 3;')
+
+ # Verify the description.
+ self.assertEqual(len(cursor.description), 3)
+ a_name, a_type, _, _, _, _, a_null_ok = cursor.description[0]
+ self.assertEqual(a_name, 'a')
+ self.assertEqual(a_type, 'STRING')
+ self.assertEqual(a_type, dbapi.STRING)
+ self.assertTrue(a_null_ok)
+ b_name, b_type, _, _, _, _, b_null_ok = cursor.description[1]
+ self.assertEqual(b_name, 'b')
+ self.assertEqual(b_type, 'STRING')
+ self.assertEqual(b_type, dbapi.STRING)
+ self.assertFalse(b_null_ok)
+ c_name, c_type, _, _, _, _, c_null_ok = cursor.description[2]
+ self.assertEqual(c_name, 'c')
+ self.assertEqual(c_type, 'INTEGER')
+ self.assertEqual(c_type, dbapi.NUMBER)
+ self.assertTrue(c_null_ok)
+
+ # Verify the results.
+ self.assertEqual(cursor.rowcount, 2)
+ row = cursor.fetchone()
+ self.assertEqual(row, ('hello', 'world', 1))
+ row = cursor.fetchone()
+ self.assertEqual(row, ('howdy', 'y\'all', 2))
+ row = cursor.fetchone()
+ self.assertIsNone(row)
+
+ def test_execute_raises_if_result_raises(self):
+ import google.cloud.exceptions
+
+ from google.cloud.bigquery import client
+ from google.cloud.bigquery import job
+ from google.cloud.bigquery.dbapi import connect
+ from google.cloud.bigquery.dbapi import exceptions
+
+ job = mock.create_autospec(job.QueryJob)
+ job.result.side_effect = google.cloud.exceptions.GoogleCloudError('')
+ client = mock.create_autospec(client.Client)
+ client.query.return_value = job
+ connection = connect(client)
+ cursor = connection.cursor()
+
+ with self.assertRaises(exceptions.DatabaseError):
+ cursor.execute('SELECT 1')
+
+ def test_executemany_w_dml(self):
+ from google.cloud.bigquery.dbapi import connect
+ connection = connect(
+ self._mock_client(rows=[], num_dml_affected_rows=12))
+ cursor = connection.cursor()
+ cursor.executemany(
+ 'DELETE FROM UserSessions WHERE user_id = %s;',
+ (('test',), ('anothertest',)))
+ self.assertIsNone(cursor.description)
+ self.assertEqual(cursor.rowcount, 12)
+
+ def test__format_operation_w_dict(self):
+ from google.cloud.bigquery.dbapi import cursor
+ formatted_operation = cursor._format_operation(
+ 'SELECT %(somevalue)s, %(a `weird` one)s;',
+ {
+ 'somevalue': 'hi',
+ 'a `weird` one': 'world',
+ })
+ self.assertEqual(
+ formatted_operation, 'SELECT @`somevalue`, @`a \\`weird\\` one`;')
+
+ def test__format_operation_w_wrong_dict(self):
+ from google.cloud.bigquery import dbapi
+ from google.cloud.bigquery.dbapi import cursor
+ self.assertRaises(
+ dbapi.ProgrammingError,
+ cursor._format_operation,
+ 'SELECT %(somevalue)s, %(othervalue)s;',
+ {
+ 'somevalue-not-here': 'hi',
+ 'othervalue': 'world',
+ })
+
+ def test__format_operation_w_sequence(self):
+ from google.cloud.bigquery.dbapi import cursor
+ formatted_operation = cursor._format_operation(
+ 'SELECT %s, %s;', ('hello', 'world'))
+ self.assertEqual(formatted_operation, 'SELECT ?, ?;')
+
+ def test__format_operation_w_too_short_sequence(self):
+ from google.cloud.bigquery import dbapi
+ from google.cloud.bigquery.dbapi import cursor
+ self.assertRaises(
+ dbapi.ProgrammingError,
+ cursor._format_operation,
+ 'SELECT %s, %s;',
+ ('hello',))
diff --git a/bigquery/tests/unit/test_external_config.py b/bigquery/tests/unit/test_external_config.py
new file mode 100644
index 0000000..b788742
--- /dev/null
+++ b/bigquery/tests/unit/test_external_config.py
@@ -0,0 +1,212 @@
+# Copyright 2017 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import base64
+import copy
+import unittest
+
+from google.cloud.bigquery.external_config import ExternalConfig
+
+
+class TestExternalConfig(unittest.TestCase):
+
+ SOURCE_URIS = ['gs://foo', 'gs://bar']
+
+ BASE_RESOURCE = {
+ 'sourceFormat': '',
+ 'sourceUris': SOURCE_URIS,
+ 'maxBadRecords': 17,
+ 'autodetect': True,
+ 'ignoreUnknownValues': False,
+ 'compression': 'compression',
+ }
+
+ def test_api_repr_base(self):
+ from google.cloud.bigquery.schema import SchemaField
+
+ resource = copy.deepcopy(self.BASE_RESOURCE)
+ ec = ExternalConfig.from_api_repr(resource)
+ self._verify_base(ec)
+ self.assertEqual(ec.schema, [])
+ self.assertIsNone(ec.options)
+
+ got_resource = ec.to_api_repr()
+ self.assertEqual(got_resource, self.BASE_RESOURCE)
+
+ resource = _copy_and_update(self.BASE_RESOURCE, {
+ 'schema': {
+ 'fields': [
+ {
+ 'name': 'full_name',
+ 'type': 'STRING',
+ 'mode': 'REQUIRED',
+ },
+ ],
+ },
+ })
+ ec = ExternalConfig.from_api_repr(resource)
+ self._verify_base(ec)
+ self.assertEqual(ec.schema,
+ [SchemaField('full_name', 'STRING', mode='REQUIRED')])
+ self.assertIsNone(ec.options)
+
+ got_resource = ec.to_api_repr()
+ self.assertEqual(got_resource, resource)
+
+ def _verify_base(self, ec):
+ self.assertEqual(ec.autodetect, True)
+ self.assertEqual(ec.compression, 'compression')
+ self.assertEqual(ec.ignore_unknown_values, False)
+ self.assertEqual(ec.max_bad_records, 17)
+ self.assertEqual(ec.source_uris, self.SOURCE_URIS)
+
+ def test_to_api_repr_source_format(self):
+ ec = ExternalConfig('CSV')
+ got = ec.to_api_repr()
+ want = {'sourceFormat': 'CSV'}
+ self.assertEqual(got, want)
+
+ def test_api_repr_sheets(self):
+ from google.cloud.bigquery.external_config import GoogleSheetsOptions
+
+ resource = _copy_and_update(self.BASE_RESOURCE, {
+ 'sourceFormat': 'GOOGLE_SHEETS',
+ 'googleSheetsOptions': {'skipLeadingRows': '123'},
+ })
+
+ ec = ExternalConfig.from_api_repr(resource)
+
+ self._verify_base(ec)
+ self.assertEqual(ec.source_format, 'GOOGLE_SHEETS')
+ self.assertIsInstance(ec.options, GoogleSheetsOptions)
+ self.assertEqual(ec.options.skip_leading_rows, 123)
+
+ got_resource = ec.to_api_repr()
+
+ self.assertEqual(got_resource, resource)
+
+ del resource['googleSheetsOptions']['skipLeadingRows']
+ ec = ExternalConfig.from_api_repr(resource)
+ self.assertIsNone(ec.options.skip_leading_rows)
+ got_resource = ec.to_api_repr()
+ self.assertEqual(got_resource, resource)
+
+ def test_api_repr_csv(self):
+ from google.cloud.bigquery.external_config import CSVOptions
+
+ resource = _copy_and_update(self.BASE_RESOURCE, {
+ 'sourceFormat': 'CSV',
+ 'csvOptions': {
+ 'fieldDelimiter': 'fieldDelimiter',
+ 'skipLeadingRows': '123',
+ 'quote': 'quote',
+ 'allowQuotedNewlines': True,
+ 'allowJaggedRows': False,
+ 'encoding': 'encoding',
+ },
+ })
+
+ ec = ExternalConfig.from_api_repr(resource)
+
+ self._verify_base(ec)
+ self.assertEqual(ec.source_format, 'CSV')
+ self.assertIsInstance(ec.options, CSVOptions)
+ self.assertEqual(ec.options.field_delimiter, 'fieldDelimiter')
+ self.assertEqual(ec.options.skip_leading_rows, 123)
+ self.assertEqual(ec.options.quote_character, 'quote')
+ self.assertEqual(ec.options.allow_quoted_newlines, True)
+ self.assertEqual(ec.options.allow_jagged_rows, False)
+ self.assertEqual(ec.options.encoding, 'encoding')
+
+ got_resource = ec.to_api_repr()
+
+ self.assertEqual(got_resource, resource)
+
+ del resource['csvOptions']['skipLeadingRows']
+ ec = ExternalConfig.from_api_repr(resource)
+ self.assertIsNone(ec.options.skip_leading_rows)
+ got_resource = ec.to_api_repr()
+ self.assertEqual(got_resource, resource)
+
+ def test_api_repr_bigtable(self):
+ from google.cloud.bigquery.external_config import BigtableOptions
+ from google.cloud.bigquery.external_config import BigtableColumnFamily
+
+ QUALIFIER_ENCODED = base64.standard_b64encode(b'q').decode('ascii')
+ resource = _copy_and_update(self.BASE_RESOURCE, {
+ 'sourceFormat': 'BIGTABLE',
+ 'bigtableOptions': {
+ 'ignoreUnspecifiedColumnFamilies': True,
+ 'readRowkeyAsString': False,
+ 'columnFamilies': [
+ {
+ 'familyId': 'familyId',
+ 'type': 'type',
+ 'encoding': 'encoding',
+ 'columns': [
+ {
+ 'qualifierString': 'q',
+ 'fieldName': 'fieldName1',
+ 'type': 'type1',
+ 'encoding': 'encoding1',
+ 'onlyReadLatest': True,
+ },
+ {
+ 'qualifierEncoded': QUALIFIER_ENCODED,
+ 'fieldName': 'fieldName2',
+ 'type': 'type2',
+ 'encoding': 'encoding2',
+ },
+
+ ],
+ 'onlyReadLatest': False,
+ }
+ ],
+ },
+ })
+
+ ec = ExternalConfig.from_api_repr(resource)
+
+ self._verify_base(ec)
+ self.assertEqual(ec.source_format, 'BIGTABLE')
+ self.assertIsInstance(ec.options, BigtableOptions)
+ self.assertEqual(ec.options.ignore_unspecified_column_families, True)
+ self.assertEqual(ec.options.read_rowkey_as_string, False)
+ self.assertEqual(len(ec.options.column_families), 1)
+ fam1 = ec.options.column_families[0]
+ self.assertIsInstance(fam1, BigtableColumnFamily)
+ self.assertEqual(fam1.family_id, 'familyId')
+ self.assertEqual(fam1.type_, 'type')
+ self.assertEqual(fam1.encoding, 'encoding')
+ self.assertEqual(len(fam1.columns), 2)
+ col1 = fam1.columns[0]
+ self.assertEqual(col1.qualifier_string, 'q')
+ self.assertEqual(col1.field_name, 'fieldName1')
+ self.assertEqual(col1.type_, 'type1')
+ self.assertEqual(col1.encoding, 'encoding1')
+ col2 = ec.options.column_families[0].columns[1]
+ self.assertEqual(col2.qualifier_encoded, b'q')
+ self.assertEqual(col2.field_name, 'fieldName2')
+ self.assertEqual(col2.type_, 'type2')
+ self.assertEqual(col2.encoding, 'encoding2')
+
+ got_resource = ec.to_api_repr()
+
+ self.assertEqual(got_resource, resource)
+
+
+def _copy_and_update(d, u):
+ d = copy.deepcopy(d)
+ d.update(u)
+ return d
diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py
new file mode 100644
index 0000000..34c5c92
--- /dev/null
+++ b/bigquery/tests/unit/test_job.py
@@ -0,0 +1,2937 @@
+# Copyright 2015 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+
+from six.moves import http_client
+import unittest
+
+from google.cloud.bigquery.job import ExtractJobConfig, CopyJobConfig
+from google.cloud.bigquery.job import LoadJobConfig
+from google.cloud.bigquery.dataset import DatasetReference
+
+import mock
+
+
+def _make_credentials():
+ import google.auth.credentials
+
+ return mock.Mock(spec=google.auth.credentials.Credentials)
+
+
+def _make_client(project='test-project', connection=None):
+ from google.cloud.bigquery.client import Client
+
+ if connection is None:
+ connection = _Connection()
+
+ client = Client(
+ project=project, credentials=_make_credentials(), _http=object())
+ client._connection = connection
+ return client
+
+
+class Test__int_or_none(unittest.TestCase):
+
+ def _call_fut(self, *args, **kwargs):
+ from google.cloud.bigquery import job
+
+ return job._int_or_none(*args, **kwargs)
+
+ def test_w_int(self):
+ self.assertEqual(self._call_fut(13), 13)
+
+ def test_w_none(self):
+ self.assertIsNone(self._call_fut(None))
+
+ def test_w_str(self):
+ self.assertEqual(self._call_fut('13'), 13)
+
+
+class Test__error_result_to_exception(unittest.TestCase):
+
+ def _call_fut(self, *args, **kwargs):
+ from google.cloud.bigquery import job
+
+ return job._error_result_to_exception(*args, **kwargs)
+
+ def test_simple(self):
+ error_result = {
+ 'reason': 'invalid',
+ 'message': 'bad request'
+ }
+ exception = self._call_fut(error_result)
+ self.assertEqual(exception.code, http_client.BAD_REQUEST)
+ self.assertTrue(exception.message.startswith('bad request'))
+ self.assertIn(error_result, exception.errors)
+
+ def test_missing_reason(self):
+ error_result = {}
+ exception = self._call_fut(error_result)
+ self.assertEqual(exception.code, http_client.INTERNAL_SERVER_ERROR)
+
+
+class _Base(object):
+ from google.cloud.bigquery.dataset import DatasetReference
+ from google.cloud.bigquery.table import TableReference
+
+ PROJECT = 'project'
+ SOURCE1 = 'http://example.com/source1.csv'
+ DS_ID = 'datset_id'
+ DS_REF = DatasetReference(PROJECT, DS_ID)
+ TABLE_ID = 'table_id'
+ TABLE_REF = TableReference(DS_REF, TABLE_ID)
+ JOB_ID = 'JOB_ID'
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def _setUpConstants(self):
+ import datetime
+ from google.cloud._helpers import UTC
+
+ self.WHEN_TS = 1437767599.006
+ self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(
+ tzinfo=UTC)
+ self.ETAG = 'ETAG'
+ self.FULL_JOB_ID = '%s:%s' % (self.PROJECT, self.JOB_ID)
+ self.RESOURCE_URL = 'http://example.com/path/to/resource'
+ self.USER_EMAIL = 'phred@example.com'
+
+ def _table_ref(self, table_id):
+ from google.cloud.bigquery.table import TableReference
+
+ return TableReference(self.DS_REF, table_id)
+
+ def _makeResource(self, started=False, ended=False):
+ self._setUpConstants()
+ resource = {
+ 'configuration': {
+ self.JOB_TYPE: {
+ },
+ },
+ 'statistics': {
+ 'creationTime': self.WHEN_TS * 1000,
+ self.JOB_TYPE: {
+ }
+ },
+ 'etag': self.ETAG,
+ 'id': self.FULL_JOB_ID,
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ 'selfLink': self.RESOURCE_URL,
+ 'user_email': self.USER_EMAIL,
+ }
+
+ if started or ended:
+ resource['statistics']['startTime'] = self.WHEN_TS * 1000
+
+ if ended:
+ resource['statistics']['endTime'] = (self.WHEN_TS + 1000) * 1000
+
+ if self.JOB_TYPE == 'query':
+ resource['configuration']['query']['destinationTable'] = {
+ 'projectId': self.PROJECT,
+ 'datasetId': '_temp_dataset',
+ 'tableId': '_temp_table',
+ }
+
+ return resource
+
+ def _verifyInitialReadonlyProperties(self, job):
+ # root elements of resource
+ self.assertIsNone(job.etag)
+ self.assertIsNone(job.self_link)
+ self.assertIsNone(job.user_email)
+
+ # derived from resource['statistics']
+ self.assertIsNone(job.created)
+ self.assertIsNone(job.started)
+ self.assertIsNone(job.ended)
+
+ # derived from resource['status']
+ self.assertIsNone(job.error_result)
+ self.assertIsNone(job.errors)
+ self.assertIsNone(job.state)
+
+ def _verifyReadonlyResourceProperties(self, job, resource):
+ from datetime import timedelta
+
+ statistics = resource.get('statistics', {})
+
+ if 'creationTime' in statistics:
+ self.assertEqual(job.created, self.WHEN)
+ else:
+ self.assertIsNone(job.created)
+
+ if 'startTime' in statistics:
+ self.assertEqual(job.started, self.WHEN)
+ else:
+ self.assertIsNone(job.started)
+
+ if 'endTime' in statistics:
+ self.assertEqual(job.ended, self.WHEN + timedelta(seconds=1000))
+ else:
+ self.assertIsNone(job.ended)
+
+ if 'etag' in resource:
+ self.assertEqual(job.etag, self.ETAG)
+ else:
+ self.assertIsNone(job.etag)
+
+ if 'selfLink' in resource:
+ self.assertEqual(job.self_link, self.RESOURCE_URL)
+ else:
+ self.assertIsNone(job.self_link)
+
+ if 'user_email' in resource:
+ self.assertEqual(job.user_email, self.USER_EMAIL)
+ else:
+ self.assertIsNone(job.user_email)
+
+
+class TestLoadJob(unittest.TestCase, _Base):
+ JOB_TYPE = 'load'
+
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.job import LoadJob
+
+ return LoadJob
+
+ def _setUpConstants(self):
+ super(TestLoadJob, self)._setUpConstants()
+ self.INPUT_FILES = 2
+ self.INPUT_BYTES = 12345
+ self.OUTPUT_BYTES = 23456
+ self.OUTPUT_ROWS = 345
+
+ def _makeResource(self, started=False, ended=False):
+ resource = super(TestLoadJob, self)._makeResource(
+ started, ended)
+ config = resource['configuration']['load']
+ config['sourceUris'] = [self.SOURCE1]
+ config['destinationTable'] = {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID,
+ }
+
+ if ended:
+ resource['status'] = {'state': 'DONE'}
+ resource['statistics']['load']['inputFiles'] = self.INPUT_FILES
+ resource['statistics']['load']['inputFileBytes'] = self.INPUT_BYTES
+ resource['statistics']['load']['outputBytes'] = self.OUTPUT_BYTES
+ resource['statistics']['load']['outputRows'] = self.OUTPUT_ROWS
+
+ return resource
+
+ def _verifyBooleanConfigProperties(self, job, config):
+ if 'allowJaggedRows' in config:
+ self.assertEqual(job.allow_jagged_rows,
+ config['allowJaggedRows'])
+ else:
+ self.assertIsNone(job.allow_jagged_rows)
+ if 'allowQuotedNewlines' in config:
+ self.assertEqual(job.allow_quoted_newlines,
+ config['allowQuotedNewlines'])
+ else:
+ self.assertIsNone(job.allow_quoted_newlines)
+ if 'autodetect' in config:
+ self.assertEqual(
+ job.autodetect, config['autodetect'])
+ else:
+ self.assertIsNone(job.autodetect)
+ if 'ignoreUnknownValues' in config:
+ self.assertEqual(job.ignore_unknown_values,
+ config['ignoreUnknownValues'])
+ else:
+ self.assertIsNone(job.ignore_unknown_values)
+
+ def _verifyEnumConfigProperties(self, job, config):
+ if 'createDisposition' in config:
+ self.assertEqual(job.create_disposition,
+ config['createDisposition'])
+ else:
+ self.assertIsNone(job.create_disposition)
+ if 'encoding' in config:
+ self.assertEqual(job.encoding,
+ config['encoding'])
+ else:
+ self.assertIsNone(job.encoding)
+ if 'sourceFormat' in config:
+ self.assertEqual(job.source_format,
+ config['sourceFormat'])
+ else:
+ self.assertIsNone(job.source_format)
+ if 'writeDisposition' in config:
+ self.assertEqual(job.write_disposition,
+ config['writeDisposition'])
+ else:
+ self.assertIsNone(job.write_disposition)
+
+ def _verifyResourceProperties(self, job, resource):
+ self._verifyReadonlyResourceProperties(job, resource)
+
+ config = resource.get('configuration', {}).get('load')
+
+ self._verifyBooleanConfigProperties(job, config)
+ self._verifyEnumConfigProperties(job, config)
+
+ self.assertEqual(job.source_uris, config['sourceUris'])
+
+ table_ref = config['destinationTable']
+ self.assertEqual(job.destination.project, table_ref['projectId'])
+ self.assertEqual(job.destination.dataset_id, table_ref['datasetId'])
+ self.assertEqual(job.destination.table_id, table_ref['tableId'])
+
+ if 'fieldDelimiter' in config:
+ self.assertEqual(job.field_delimiter,
+ config['fieldDelimiter'])
+ else:
+ self.assertIsNone(job.field_delimiter)
+ if 'maxBadRecords' in config:
+ self.assertEqual(job.max_bad_records,
+ config['maxBadRecords'])
+ else:
+ self.assertIsNone(job.max_bad_records)
+ if 'nullMarker' in config:
+ self.assertEqual(job.null_marker,
+ config['nullMarker'])
+ else:
+ self.assertIsNone(job.null_marker)
+ if 'quote' in config:
+ self.assertEqual(job.quote_character,
+ config['quote'])
+ else:
+ self.assertIsNone(job.quote_character)
+ if 'skipLeadingRows' in config:
+ self.assertEqual(str(job.skip_leading_rows),
+ config['skipLeadingRows'])
+ else:
+ self.assertIsNone(job.skip_leading_rows)
+
+ def test_ctor(self):
+ client = _make_client(project=self.PROJECT)
+ job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF,
+ client)
+ self.assertIs(job.destination, self.TABLE_REF)
+ self.assertEqual(list(job.source_uris), [self.SOURCE1])
+ self.assertIs(job._client, client)
+ self.assertEqual(job.job_type, self.JOB_TYPE)
+ self.assertEqual(
+ job.path,
+ '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID))
+ self.assertEqual(job.schema, [])
+
+ self._verifyInitialReadonlyProperties(job)
+
+ # derived from resource['statistics']['load']
+ self.assertIsNone(job.input_file_bytes)
+ self.assertIsNone(job.input_files)
+ self.assertIsNone(job.output_bytes)
+ self.assertIsNone(job.output_rows)
+
+ # set/read from resource['configuration']['load']
+ self.assertIsNone(job.allow_jagged_rows)
+ self.assertIsNone(job.allow_quoted_newlines)
+ self.assertIsNone(job.autodetect)
+ self.assertIsNone(job.create_disposition)
+ self.assertIsNone(job.encoding)
+ self.assertIsNone(job.field_delimiter)
+ self.assertIsNone(job.ignore_unknown_values)
+ self.assertIsNone(job.max_bad_records)
+ self.assertIsNone(job.null_marker)
+ self.assertIsNone(job.quote_character)
+ self.assertIsNone(job.skip_leading_rows)
+ self.assertIsNone(job.source_format)
+ self.assertIsNone(job.write_disposition)
+
+ def test_ctor_w_config(self):
+ from google.cloud.bigquery.schema import SchemaField
+
+ client = _make_client(project=self.PROJECT)
+ full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
+ age = SchemaField('age', 'INTEGER', mode='REQUIRED')
+ config = LoadJobConfig()
+ config.schema = [full_name, age]
+ job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF,
+ client, config)
+ self.assertEqual(job.schema, [full_name, age])
+
+ def test_done(self):
+ client = _make_client(project=self.PROJECT)
+ resource = self._makeResource(ended=True)
+ job = self._get_target_class().from_api_repr(resource, client)
+ self.assertTrue(job.done())
+
+ def test_result(self):
+ client = _make_client(project=self.PROJECT)
+ resource = self._makeResource(ended=True)
+ job = self._get_target_class().from_api_repr(resource, client)
+
+ result = job.result()
+
+ self.assertIs(result, job)
+
+ def test_result_invokes_begin(self):
+ begun_resource = self._makeResource()
+ done_resource = copy.deepcopy(begun_resource)
+ done_resource['status'] = {'state': 'DONE'}
+ connection = _Connection(begun_resource, done_resource)
+ client = _make_client(self.PROJECT)
+ client._connection = connection
+
+ job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF,
+ client)
+ job.result()
+
+ self.assertEqual(len(connection._requested), 2)
+ begin_request, reload_request = connection._requested
+ self.assertEqual(begin_request['method'], 'POST')
+ self.assertEqual(reload_request['method'], 'GET')
+
+ def test_schema_setter_non_list(self):
+ config = LoadJobConfig()
+ with self.assertRaises(TypeError):
+ config.schema = object()
+
+ def test_schema_setter_invalid_field(self):
+ from google.cloud.bigquery.schema import SchemaField
+
+ config = LoadJobConfig()
+ full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
+ with self.assertRaises(ValueError):
+ config.schema = [full_name, object()]
+
+ def test_schema_setter(self):
+ from google.cloud.bigquery.schema import SchemaField
+
+ config = LoadJobConfig()
+ full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
+ age = SchemaField('age', 'INTEGER', mode='REQUIRED')
+ config.schema = [full_name, age]
+ self.assertEqual(config.schema, [full_name, age])
+
+ def test_props_set_by_server(self):
+ import datetime
+ from google.cloud._helpers import UTC
+ from google.cloud._helpers import _millis
+
+ CREATED = datetime.datetime(2015, 8, 11, 12, 13, 22, tzinfo=UTC)
+ STARTED = datetime.datetime(2015, 8, 11, 13, 47, 15, tzinfo=UTC)
+ ENDED = datetime.datetime(2015, 8, 11, 14, 47, 15, tzinfo=UTC)
+ FULL_JOB_ID = '%s:%s' % (self.PROJECT, self.JOB_ID)
+ URL = 'http://example.com/projects/%s/jobs/%s' % (
+ self.PROJECT, self.JOB_ID)
+ EMAIL = 'phred@example.com'
+ ERROR_RESULT = {'debugInfo': 'DEBUG',
+ 'location': 'LOCATION',
+ 'message': 'MESSAGE',
+ 'reason': 'REASON'}
+
+ client = _make_client(project=self.PROJECT)
+ table = _Table()
+ job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client)
+ job._properties['etag'] = 'ETAG'
+ job._properties['id'] = FULL_JOB_ID
+ job._properties['selfLink'] = URL
+ job._properties['user_email'] = EMAIL
+
+ statistics = job._properties['statistics'] = {}
+ statistics['creationTime'] = _millis(CREATED)
+ statistics['startTime'] = _millis(STARTED)
+ statistics['endTime'] = _millis(ENDED)
+ load_stats = statistics['load'] = {}
+ load_stats['inputFileBytes'] = 12345
+ load_stats['inputFiles'] = 1
+ load_stats['outputBytes'] = 23456
+ load_stats['outputRows'] = 345
+
+ self.assertEqual(job.etag, 'ETAG')
+ self.assertEqual(job.self_link, URL)
+ self.assertEqual(job.user_email, EMAIL)
+
+ self.assertEqual(job.created, CREATED)
+ self.assertEqual(job.started, STARTED)
+ self.assertEqual(job.ended, ENDED)
+
+ self.assertEqual(job.input_file_bytes, 12345)
+ self.assertEqual(job.input_files, 1)
+ self.assertEqual(job.output_bytes, 23456)
+ self.assertEqual(job.output_rows, 345)
+
+ status = job._properties['status'] = {}
+
+ self.assertIsNone(job.error_result)
+ self.assertIsNone(job.errors)
+ self.assertIsNone(job.state)
+
+ status['errorResult'] = ERROR_RESULT
+ status['errors'] = [ERROR_RESULT]
+ status['state'] = 'STATE'
+
+ self.assertEqual(job.error_result, ERROR_RESULT)
+ self.assertEqual(job.errors, [ERROR_RESULT])
+ self.assertEqual(job.state, 'STATE')
+
+ def test_from_api_repr_missing_identity(self):
+ self._setUpConstants()
+ client = _make_client(project=self.PROJECT)
+ RESOURCE = {}
+ klass = self._get_target_class()
+ with self.assertRaises(KeyError):
+ klass.from_api_repr(RESOURCE, client=client)
+
+ def test_from_api_repr_missing_config(self):
+ self._setUpConstants()
+ client = _make_client(project=self.PROJECT)
+ RESOURCE = {
+ 'id': '%s:%s' % (self.PROJECT, self.JOB_ID),
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ }
+ }
+ klass = self._get_target_class()
+ with self.assertRaises(KeyError):
+ klass.from_api_repr(RESOURCE, client=client)
+
+ def test_from_api_repr_bare(self):
+ self._setUpConstants()
+ client = _make_client(project=self.PROJECT)
+ RESOURCE = {
+ 'id': self.FULL_JOB_ID,
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ 'configuration': {
+ 'load': {
+ 'sourceUris': [self.SOURCE1],
+ 'destinationTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID,
+ },
+ }
+ },
+ }
+ klass = self._get_target_class()
+ job = klass.from_api_repr(RESOURCE, client=client)
+ self.assertIs(job._client, client)
+ self._verifyResourceProperties(job, RESOURCE)
+
+ def test_from_api_repr_w_properties(self):
+ client = _make_client(project=self.PROJECT)
+ RESOURCE = self._makeResource()
+ load_config = RESOURCE['configuration']['load']
+ load_config['createDisposition'] = 'CREATE_IF_NEEDED'
+ klass = self._get_target_class()
+ job = klass.from_api_repr(RESOURCE, client=client)
+ self.assertIs(job._client, client)
+ self._verifyResourceProperties(job, RESOURCE)
+
+ def test_begin_w_already_running(self):
+ conn = _Connection()
+ client = _make_client(project=self.PROJECT, connection=conn)
+ job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF,
+ client)
+ job._properties['status'] = {'state': 'RUNNING'}
+
+ with self.assertRaises(ValueError):
+ job.begin()
+
+ def test_begin_w_bound_client(self):
+ PATH = '/projects/%s/jobs' % (self.PROJECT,)
+ RESOURCE = self._makeResource()
+ # Ensure None for missing server-set props
+ del RESOURCE['statistics']['creationTime']
+ del RESOURCE['etag']
+ del RESOURCE['selfLink']
+ del RESOURCE['user_email']
+ conn = _Connection(RESOURCE)
+ client = _make_client(project=self.PROJECT, connection=conn)
+ job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF,
+ client)
+
+ job.begin()
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], PATH)
+ SENT = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ 'configuration': {
+ 'load': {
+ 'sourceUris': [self.SOURCE1],
+ 'destinationTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID,
+ },
+ },
+ },
+ }
+ self.assertEqual(req['data'], SENT)
+ self._verifyResourceProperties(job, RESOURCE)
+
+ def test_begin_w_autodetect(self):
+ path = '/projects/{}/jobs'.format(self.PROJECT)
+ resource = self._makeResource()
+ resource['configuration']['load']['autodetect'] = True
+ # Ensure None for missing server-set props
+ del resource['statistics']['creationTime']
+ del resource['etag']
+ del resource['selfLink']
+ del resource['user_email']
+ conn = _Connection(resource)
+ client = _make_client(project=self.PROJECT, connection=conn)
+ config = LoadJobConfig()
+ config.autodetect = True
+ job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF,
+ client, config)
+ job.begin()
+
+ sent = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ 'configuration': {
+ 'load': {
+ 'sourceUris': [self.SOURCE1],
+ 'destinationTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID,
+ },
+ 'autodetect': True
+ },
+ },
+ }
+ expected_request = {
+ 'method': 'POST',
+ 'path': path,
+ 'data': sent,
+ }
+ self.assertEqual(conn._requested, [expected_request])
+ self._verifyResourceProperties(job, resource)
+
+ def test_begin_w_alternate_client(self):
+ from google.cloud.bigquery.schema import SchemaField
+
+ PATH = '/projects/%s/jobs' % (self.PROJECT,)
+ RESOURCE = self._makeResource(ended=True)
+ LOAD_CONFIGURATION = {
+ 'sourceUris': [self.SOURCE1],
+ 'destinationTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_ID,
+ },
+ 'allowJaggedRows': True,
+ 'allowQuotedNewlines': True,
+ 'createDisposition': 'CREATE_NEVER',
+ 'encoding': 'ISO-8559-1',
+ 'fieldDelimiter': '|',
+ 'ignoreUnknownValues': True,
+ 'maxBadRecords': 100,
+ 'nullMarker': r'\N',
+ 'quote': "'",
+ 'skipLeadingRows': '1',
+ 'sourceFormat': 'CSV',
+ 'writeDisposition': 'WRITE_TRUNCATE',
+ 'schema': {'fields': [
+ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'},
+ {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'},
+ ]}
+ }
+ RESOURCE['configuration']['load'] = LOAD_CONFIGURATION
+ conn1 = _Connection()
+ client1 = _make_client(project=self.PROJECT, connection=conn1)
+ conn2 = _Connection(RESOURCE)
+ client2 = _make_client(project=self.PROJECT, connection=conn2)
+ full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
+ age = SchemaField('age', 'INTEGER', mode='REQUIRED')
+ config = LoadJobConfig()
+ config.schema = [full_name, age]
+ job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF,
+ client1, config)
+ config.allow_jagged_rows = True
+ config.allow_quoted_newlines = True
+ config.create_disposition = 'CREATE_NEVER'
+ config.encoding = 'ISO-8559-1'
+ config.field_delimiter = '|'
+ config.ignore_unknown_values = True
+ config.max_bad_records = 100
+ config.null_marker = r'\N'
+ config.quote_character = "'"
+ config.skip_leading_rows = 1
+ config.source_format = 'CSV'
+ config.write_disposition = 'WRITE_TRUNCATE'
+
+ job.begin(client=client2)
+
+ self.assertEqual(len(conn1._requested), 0)
+ self.assertEqual(len(conn2._requested), 1)
+ req = conn2._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], PATH)
+ SENT = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ 'configuration': {
+ 'load': LOAD_CONFIGURATION,
+ },
+ }
+ self.maxDiff = None
+ self.assertEqual(req['data'], SENT)
+ self._verifyResourceProperties(job, RESOURCE)
+
+ def test_exists_miss_w_bound_client(self):
+ PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
+ conn = _Connection()
+ client = _make_client(project=self.PROJECT, connection=conn)
+ table = _Table()
+ job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client)
+
+ self.assertFalse(job.exists())
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], PATH)
+ self.assertEqual(req['query_params'], {'fields': 'id'})
+
+ def test_exists_hit_w_alternate_client(self):
+ PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
+ conn1 = _Connection()
+ client1 = _make_client(project=self.PROJECT, connection=conn1)
+ conn2 = _Connection({})
+ client2 = _make_client(project=self.PROJECT, connection=conn2)
+ table = _Table()
+ job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1)
+
+ self.assertTrue(job.exists(client=client2))
+
+ self.assertEqual(len(conn1._requested), 0)
+ self.assertEqual(len(conn2._requested), 1)
+ req = conn2._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], PATH)
+ self.assertEqual(req['query_params'], {'fields': 'id'})
+
+ def test_reload_w_bound_client(self):
+ PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
+ RESOURCE = self._makeResource()
+ conn = _Connection(RESOURCE)
+ client = _make_client(project=self.PROJECT, connection=conn)
+ table = _Table()
+ job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client)
+
+ job.reload()
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], PATH)
+ self._verifyResourceProperties(job, RESOURCE)
+
+ def test_reload_w_alternate_client(self):
+ PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
+ RESOURCE = self._makeResource()
+ conn1 = _Connection()
+ client1 = _make_client(project=self.PROJECT, connection=conn1)
+ conn2 = _Connection(RESOURCE)
+ client2 = _make_client(project=self.PROJECT, connection=conn2)
+ table = _Table()
+ job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1)
+
+ job.reload(client=client2)
+
+ self.assertEqual(len(conn1._requested), 0)
+ self.assertEqual(len(conn2._requested), 1)
+ req = conn2._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], PATH)
+ self._verifyResourceProperties(job, RESOURCE)
+
+ def test_cancel_w_bound_client(self):
+ PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_ID)
+ RESOURCE = self._makeResource(ended=True)
+ RESPONSE = {'job': RESOURCE}
+ conn = _Connection(RESPONSE)
+ client = _make_client(project=self.PROJECT, connection=conn)
+ table = _Table()
+ job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client)
+
+ job.cancel()
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], PATH)
+ self._verifyResourceProperties(job, RESOURCE)
+
+ def test_cancel_w_alternate_client(self):
+ PATH = '/projects/%s/jobs/%s/cancel' % (self.PROJECT, self.JOB_ID)
+ RESOURCE = self._makeResource(ended=True)
+ RESPONSE = {'job': RESOURCE}
+ conn1 = _Connection()
+ client1 = _make_client(project=self.PROJECT, connection=conn1)
+ conn2 = _Connection(RESPONSE)
+ client2 = _make_client(project=self.PROJECT, connection=conn2)
+ table = _Table()
+ job = self._make_one(self.JOB_ID, [self.SOURCE1], table, client1)
+
+ job.cancel(client=client2)
+
+ self.assertEqual(len(conn1._requested), 0)
+ self.assertEqual(len(conn2._requested), 1)
+ req = conn2._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], PATH)
+ self._verifyResourceProperties(job, RESOURCE)
+
+
+class TestCopyJob(unittest.TestCase, _Base):
+ JOB_TYPE = 'copy'
+ SOURCE_TABLE = 'source_table'
+ DESTINATION_TABLE = 'destination_table'
+
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.job import CopyJob
+
+ return CopyJob
+
+ def _makeResource(self, started=False, ended=False):
+ resource = super(TestCopyJob, self)._makeResource(
+ started, ended)
+ config = resource['configuration']['copy']
+ config['sourceTables'] = [{
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.SOURCE_TABLE,
+ }]
+ config['destinationTable'] = {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.DESTINATION_TABLE,
+ }
+
+ return resource
+
+ def _verifyResourceProperties(self, job, resource):
+ self._verifyReadonlyResourceProperties(job, resource)
+
+ config = resource.get('configuration', {}).get('copy')
+
+ table_ref = config['destinationTable']
+ self.assertEqual(job.destination.project, table_ref['projectId'])
+ self.assertEqual(job.destination.dataset_id, table_ref['datasetId'])
+ self.assertEqual(job.destination.table_id, table_ref['tableId'])
+
+ sources = config.get('sourceTables')
+ if sources is None:
+ sources = [config['sourceTable']]
+ self.assertEqual(len(sources), len(job.sources))
+ for table_ref, table in zip(sources, job.sources):
+ self.assertEqual(table.project, table_ref['projectId'])
+ self.assertEqual(table.dataset_id, table_ref['datasetId'])
+ self.assertEqual(table.table_id, table_ref['tableId'])
+
+ if 'createDisposition' in config:
+ self.assertEqual(job.create_disposition,
+ config['createDisposition'])
+ else:
+ self.assertIsNone(job.create_disposition)
+
+ if 'writeDisposition' in config:
+ self.assertEqual(job.write_disposition,
+ config['writeDisposition'])
+ else:
+ self.assertIsNone(job.write_disposition)
+
+ def test_ctor(self):
+ client = _make_client(project=self.PROJECT)
+ source = self._table_ref(self.SOURCE_TABLE)
+ destination = self._table_ref(self.DESTINATION_TABLE)
+ job = self._make_one(self.JOB_ID, [source], destination, client)
+ self.assertIs(job.destination, destination)
+ self.assertEqual(job.sources, [source])
+ self.assertIs(job._client, client)
+ self.assertEqual(job.job_type, self.JOB_TYPE)
+ self.assertEqual(
+ job.path,
+ '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID))
+
+ self._verifyInitialReadonlyProperties(job)
+
+ # set/read from resource['configuration']['copy']
+ self.assertIsNone(job.create_disposition)
+ self.assertIsNone(job.write_disposition)
+
+ def test_from_api_repr_missing_identity(self):
+ self._setUpConstants()
+ client = _make_client(project=self.PROJECT)
+ RESOURCE = {}
+ klass = self._get_target_class()
+ with self.assertRaises(KeyError):
+ klass.from_api_repr(RESOURCE, client=client)
+
+ def test_from_api_repr_missing_config(self):
+ self._setUpConstants()
+ client = _make_client(project=self.PROJECT)
+ RESOURCE = {
+ 'id': '%s:%s' % (self.PROJECT, self.DS_ID),
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ }
+ }
+ klass = self._get_target_class()
+ with self.assertRaises(KeyError):
+ klass.from_api_repr(RESOURCE, client=client)
+
+ def test_from_api_repr_bare(self):
+ self._setUpConstants()
+ client = _make_client(project=self.PROJECT)
+ RESOURCE = {
+ 'id': self.JOB_ID,
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ 'configuration': {
+ 'copy': {
+ 'sourceTables': [{
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.SOURCE_TABLE,
+ }],
+ 'destinationTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.DESTINATION_TABLE,
+ },
+ }
+ },
+ }
+ klass = self._get_target_class()
+ job = klass.from_api_repr(RESOURCE, client=client)
+ self.assertIs(job._client, client)
+ self._verifyResourceProperties(job, RESOURCE)
+
+ def test_from_api_repr_w_sourcetable(self):
+ self._setUpConstants()
+ client = _make_client(project=self.PROJECT)
+ RESOURCE = {
+ 'id': self.JOB_ID,
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ 'configuration': {
+ 'copy': {
+ 'sourceTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.SOURCE_TABLE,
+ },
+ 'destinationTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.DESTINATION_TABLE,
+ },
+ }
+ },
+ }
+ klass = self._get_target_class()
+ job = klass.from_api_repr(RESOURCE, client=client)
+ self.assertIs(job._client, client)
+ self._verifyResourceProperties(job, RESOURCE)
+
+ def test_from_api_repr_wo_sources(self):
+ self._setUpConstants()
+ client = _make_client(project=self.PROJECT)
+ RESOURCE = {
+ 'id': self.JOB_ID,
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ 'configuration': {
+ 'copy': {
+ 'destinationTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.DESTINATION_TABLE,
+ },
+ }
+ },
+ }
+ klass = self._get_target_class()
+ with self.assertRaises(KeyError):
+ klass.from_api_repr(RESOURCE, client=client)
+
+ def test_from_api_repr_w_properties(self):
+ client = _make_client(project=self.PROJECT)
+ RESOURCE = self._makeResource()
+ copy_config = RESOURCE['configuration']['copy']
+ copy_config['createDisposition'] = 'CREATE_IF_NEEDED'
+ klass = self._get_target_class()
+ job = klass.from_api_repr(RESOURCE, client=client)
+ self.assertIs(job._client, client)
+ self._verifyResourceProperties(job, RESOURCE)
+
+ def test_begin_w_bound_client(self):
+ PATH = '/projects/%s/jobs' % (self.PROJECT,)
+ RESOURCE = self._makeResource()
+ # Ensure None for missing server-set props
+ del RESOURCE['statistics']['creationTime']
+ del RESOURCE['etag']
+ del RESOURCE['selfLink']
+ del RESOURCE['user_email']
+ conn = _Connection(RESOURCE)
+ client = _make_client(project=self.PROJECT, connection=conn)
+ source = self._table_ref(self.SOURCE_TABLE)
+ destination = self._table_ref(self.DESTINATION_TABLE)
+ job = self._make_one(self.JOB_ID, [source], destination, client)
+
+ job.begin()
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], PATH)
+ SENT = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ 'configuration': {
+ 'copy': {
+ 'sourceTables': [{
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.SOURCE_TABLE
+ }],
+ 'destinationTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.DESTINATION_TABLE,
+ },
+ },
+ },
+ }
+ self.assertEqual(req['data'], SENT)
+ self._verifyResourceProperties(job, RESOURCE)
+
+ def test_begin_w_alternate_client(self):
+ PATH = '/projects/%s/jobs' % (self.PROJECT,)
+ RESOURCE = self._makeResource(ended=True)
+ COPY_CONFIGURATION = {
+ 'sourceTables': [{
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.SOURCE_TABLE,
+ }],
+ 'destinationTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.DESTINATION_TABLE,
+ },
+ 'createDisposition': 'CREATE_NEVER',
+ 'writeDisposition': 'WRITE_TRUNCATE',
+ }
+ RESOURCE['configuration']['copy'] = COPY_CONFIGURATION
+ conn1 = _Connection()
+ client1 = _make_client(project=self.PROJECT, connection=conn1)
+ conn2 = _Connection(RESOURCE)
+ client2 = _make_client(project=self.PROJECT, connection=conn2)
+ source = self._table_ref(self.SOURCE_TABLE)
+ destination = self._table_ref(self.DESTINATION_TABLE)
+ config = CopyJobConfig()
+ config.create_disposition = 'CREATE_NEVER'
+ config.write_disposition = 'WRITE_TRUNCATE'
+ job = self._make_one(self.JOB_ID, [source], destination, client1,
+ config)
+ job.begin(client=client2)
+
+ self.assertEqual(len(conn1._requested), 0)
+ self.assertEqual(len(conn2._requested), 1)
+ req = conn2._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], PATH)
+ SENT = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ 'configuration': {
+ 'copy': COPY_CONFIGURATION,
+ },
+ }
+ self.assertEqual(req['data'], SENT)
+ self._verifyResourceProperties(job, RESOURCE)
+
+ def test_exists_miss_w_bound_client(self):
+ PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
+ conn = _Connection()
+ client = _make_client(project=self.PROJECT, connection=conn)
+
+ source = self._table_ref(self.SOURCE_TABLE)
+ destination = self._table_ref(self.DESTINATION_TABLE)
+ job = self._make_one(self.JOB_ID, [source], destination, client)
+
+ self.assertFalse(job.exists())
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], PATH)
+ self.assertEqual(req['query_params'], {'fields': 'id'})
+
+ def test_exists_hit_w_alternate_client(self):
+ PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
+ conn1 = _Connection()
+ client1 = _make_client(project=self.PROJECT, connection=conn1)
+ conn2 = _Connection({})
+ client2 = _make_client(project=self.PROJECT, connection=conn2)
+ source = self._table_ref(self.SOURCE_TABLE)
+ destination = self._table_ref(self.DESTINATION_TABLE)
+ job = self._make_one(self.JOB_ID, [source], destination, client1)
+
+ self.assertTrue(job.exists(client=client2))
+
+ self.assertEqual(len(conn1._requested), 0)
+ self.assertEqual(len(conn2._requested), 1)
+ req = conn2._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], PATH)
+ self.assertEqual(req['query_params'], {'fields': 'id'})
+
+ def test_reload_w_bound_client(self):
+ PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
+ RESOURCE = self._makeResource()
+ conn = _Connection(RESOURCE)
+ client = _make_client(project=self.PROJECT, connection=conn)
+ source = self._table_ref(self.SOURCE_TABLE)
+ destination = self._table_ref(self.DESTINATION_TABLE)
+ job = self._make_one(self.JOB_ID, [source], destination, client)
+
+ job.reload()
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], PATH)
+ self._verifyResourceProperties(job, RESOURCE)
+
+ def test_reload_w_alternate_client(self):
+ PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
+ RESOURCE = self._makeResource()
+ conn1 = _Connection()
+ client1 = _make_client(project=self.PROJECT, connection=conn1)
+ conn2 = _Connection(RESOURCE)
+ client2 = _make_client(project=self.PROJECT, connection=conn2)
+ source = self._table_ref(self.SOURCE_TABLE)
+ destination = self._table_ref(self.DESTINATION_TABLE)
+ job = self._make_one(self.JOB_ID, [source], destination, client1)
+
+ job.reload(client=client2)
+
+ self.assertEqual(len(conn1._requested), 0)
+ self.assertEqual(len(conn2._requested), 1)
+ req = conn2._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], PATH)
+ self._verifyResourceProperties(job, RESOURCE)
+
+
+class TestExtractJob(unittest.TestCase, _Base):
+ JOB_TYPE = 'extract'
+ SOURCE_TABLE = 'source_table'
+ DESTINATION_URI = 'gs://bucket_name/object_name'
+
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.job import ExtractJob
+
+ return ExtractJob
+
+ def _makeResource(self, started=False, ended=False):
+ resource = super(TestExtractJob, self)._makeResource(
+ started, ended)
+ config = resource['configuration']['extract']
+ config['sourceTable'] = {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.SOURCE_TABLE,
+ }
+ config['destinationUris'] = [self.DESTINATION_URI]
+ return resource
+
+ def _verifyResourceProperties(self, job, resource):
+ self._verifyReadonlyResourceProperties(job, resource)
+
+ config = resource.get('configuration', {}).get('extract')
+
+ self.assertEqual(job.destination_uris, config['destinationUris'])
+
+ table_ref = config['sourceTable']
+ self.assertEqual(job.source.project, table_ref['projectId'])
+ self.assertEqual(job.source.dataset_id, table_ref['datasetId'])
+ self.assertEqual(job.source.table_id, table_ref['tableId'])
+
+ if 'compression' in config:
+ self.assertEqual(
+ job.compression, config['compression'])
+ else:
+ self.assertIsNone(job.compression)
+
+ if 'destinationFormat' in config:
+ self.assertEqual(
+ job.destination_format, config['destinationFormat'])
+ else:
+ self.assertIsNone(job.destination_format)
+
+ if 'fieldDelimiter' in config:
+ self.assertEqual(
+ job.field_delimiter, config['fieldDelimiter'])
+ else:
+ self.assertIsNone(job.field_delimiter)
+
+ if 'printHeader' in config:
+ self.assertEqual(
+ job.print_header, config['printHeader'])
+ else:
+ self.assertIsNone(job.print_header)
+
+ def test_ctor(self):
+ client = _make_client(project=self.PROJECT)
+ source = _Table(self.SOURCE_TABLE)
+ job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI],
+ client)
+ self.assertEqual(job.source, source)
+ self.assertEqual(job.destination_uris, [self.DESTINATION_URI])
+ self.assertIs(job._client, client)
+ self.assertEqual(job.job_type, self.JOB_TYPE)
+ self.assertEqual(
+ job.path,
+ '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID))
+
+ self._verifyInitialReadonlyProperties(job)
+
+ # set/read from resource['configuration']['extract']
+ self.assertIsNone(job.compression)
+ self.assertIsNone(job.destination_format)
+ self.assertIsNone(job.field_delimiter)
+ self.assertIsNone(job.print_header)
+
+ def test_destination_uri_file_counts(self):
+ file_counts = 23
+ client = _make_client(project=self.PROJECT)
+ source = _Table(self.SOURCE_TABLE)
+ job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI],
+ client)
+ self.assertIsNone(job.destination_uri_file_counts)
+
+ statistics = job._properties['statistics'] = {}
+ self.assertIsNone(job.destination_uri_file_counts)
+
+ extract_stats = statistics['extract'] = {}
+ self.assertIsNone(job.destination_uri_file_counts)
+
+ extract_stats['destinationUriFileCounts'] = str(file_counts)
+ self.assertEqual(job.destination_uri_file_counts, file_counts)
+
+ def test_from_api_repr_missing_identity(self):
+ self._setUpConstants()
+ client = _make_client(project=self.PROJECT)
+ RESOURCE = {}
+ klass = self._get_target_class()
+ with self.assertRaises(KeyError):
+ klass.from_api_repr(RESOURCE, client=client)
+
+ def test_from_api_repr_missing_config(self):
+ self._setUpConstants()
+ client = _make_client(project=self.PROJECT)
+ RESOURCE = {
+ 'id': '%s:%s' % (self.PROJECT, self.DS_ID),
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ }
+ }
+ klass = self._get_target_class()
+ with self.assertRaises(KeyError):
+ klass.from_api_repr(RESOURCE, client=client)
+
+ def test_from_api_repr_bare(self):
+ self._setUpConstants()
+ client = _make_client(project=self.PROJECT)
+ RESOURCE = {
+ 'id': self.JOB_ID,
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ 'configuration': {
+ 'extract': {
+ 'sourceTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.SOURCE_TABLE,
+ },
+ 'destinationUris': [self.DESTINATION_URI],
+ }
+ },
+ }
+ klass = self._get_target_class()
+ job = klass.from_api_repr(RESOURCE, client=client)
+ self.assertIs(job._client, client)
+ self._verifyResourceProperties(job, RESOURCE)
+
+ def test_from_api_repr_w_properties(self):
+ client = _make_client(project=self.PROJECT)
+ RESOURCE = self._makeResource()
+ extract_config = RESOURCE['configuration']['extract']
+ extract_config['compression'] = 'GZIP'
+ klass = self._get_target_class()
+ job = klass.from_api_repr(RESOURCE, client=client)
+ self.assertIs(job._client, client)
+ self._verifyResourceProperties(job, RESOURCE)
+
+ def test_begin_w_bound_client(self):
+ PATH = '/projects/%s/jobs' % (self.PROJECT,)
+ RESOURCE = self._makeResource()
+ # Ensure None for missing server-set props
+ del RESOURCE['statistics']['creationTime']
+ del RESOURCE['etag']
+ del RESOURCE['selfLink']
+ del RESOURCE['user_email']
+ conn = _Connection(RESOURCE)
+ client = _make_client(project=self.PROJECT, connection=conn)
+ source_dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ source = source_dataset.table(self.SOURCE_TABLE)
+ job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI],
+ client)
+
+ job.begin()
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], PATH)
+ SENT = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ 'configuration': {
+ 'extract': {
+ 'sourceTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.SOURCE_TABLE
+ },
+ 'destinationUris': [self.DESTINATION_URI],
+ },
+ },
+ }
+ self.assertEqual(req['data'], SENT)
+ self._verifyResourceProperties(job, RESOURCE)
+
+ def test_begin_w_alternate_client(self):
+ PATH = '/projects/%s/jobs' % (self.PROJECT,)
+ RESOURCE = self._makeResource(ended=True)
+ EXTRACT_CONFIGURATION = {
+ 'sourceTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.SOURCE_TABLE,
+ },
+ 'destinationUris': [self.DESTINATION_URI],
+ 'compression': 'GZIP',
+ 'destinationFormat': 'NEWLINE_DELIMITED_JSON',
+ 'fieldDelimiter': '|',
+ 'printHeader': False,
+ }
+ RESOURCE['configuration']['extract'] = EXTRACT_CONFIGURATION
+ conn1 = _Connection()
+ client1 = _make_client(project=self.PROJECT, connection=conn1)
+ conn2 = _Connection(RESOURCE)
+ client2 = _make_client(project=self.PROJECT, connection=conn2)
+ source_dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ source = source_dataset.table(self.SOURCE_TABLE)
+ job_config = ExtractJobConfig()
+ job_config.compression = 'GZIP'
+ job_config.destination_format = 'NEWLINE_DELIMITED_JSON'
+ job_config.field_delimiter = '|'
+ job_config.print_header = False
+ job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI],
+ client1, job_config)
+
+ job.begin(client=client2)
+
+ self.assertEqual(len(conn1._requested), 0)
+ self.assertEqual(len(conn2._requested), 1)
+ req = conn2._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], PATH)
+ SENT = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ 'configuration': {
+ 'extract': EXTRACT_CONFIGURATION,
+ },
+ }
+ self.assertEqual(req['data'], SENT)
+ self._verifyResourceProperties(job, RESOURCE)
+
+ def test_exists_miss_w_bound_client(self):
+ PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
+ conn = _Connection()
+ client = _make_client(project=self.PROJECT, connection=conn)
+ source = _Table(self.SOURCE_TABLE)
+ job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI],
+ client)
+
+ self.assertFalse(job.exists())
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], PATH)
+ self.assertEqual(req['query_params'], {'fields': 'id'})
+
+ def test_exists_hit_w_alternate_client(self):
+ PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
+ conn1 = _Connection()
+ client1 = _make_client(project=self.PROJECT, connection=conn1)
+ conn2 = _Connection({})
+ client2 = _make_client(project=self.PROJECT, connection=conn2)
+ source = _Table(self.SOURCE_TABLE)
+ job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI],
+ client1)
+
+ self.assertTrue(job.exists(client=client2))
+
+ self.assertEqual(len(conn1._requested), 0)
+ self.assertEqual(len(conn2._requested), 1)
+ req = conn2._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], PATH)
+ self.assertEqual(req['query_params'], {'fields': 'id'})
+
+ def test_reload_w_bound_client(self):
+ PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
+ RESOURCE = self._makeResource()
+ conn = _Connection(RESOURCE)
+ client = _make_client(project=self.PROJECT, connection=conn)
+ source_dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ source = source_dataset.table(self.SOURCE_TABLE)
+ job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI],
+ client)
+
+ job.reload()
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], PATH)
+ self._verifyResourceProperties(job, RESOURCE)
+
+ def test_reload_w_alternate_client(self):
+ PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
+ RESOURCE = self._makeResource()
+ conn1 = _Connection()
+ client1 = _make_client(project=self.PROJECT, connection=conn1)
+ conn2 = _Connection(RESOURCE)
+ client2 = _make_client(project=self.PROJECT, connection=conn2)
+ source_dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ source = source_dataset.table(self.SOURCE_TABLE)
+ job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI],
+ client1)
+
+ job.reload(client=client2)
+
+ self.assertEqual(len(conn1._requested), 0)
+ self.assertEqual(len(conn2._requested), 1)
+ req = conn2._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], PATH)
+ self._verifyResourceProperties(job, RESOURCE)
+
+
+class TestQueryJobConfig(unittest.TestCase, _Base):
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.job import QueryJobConfig
+
+ return QueryJobConfig
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def test_ctor(self):
+ config = self._make_one()
+ self.assertEqual(config._properties, {})
+
+ def test_from_api_repr_empty(self):
+ klass = self._get_target_class()
+ config = klass.from_api_repr({})
+ self.assertIsNone(config.dry_run)
+ self.assertIsNone(config.use_legacy_sql)
+ self.assertIsNone(config.default_dataset)
+
+ def test_from_api_repr_normal(self):
+ resource = {
+ 'useLegacySql': True,
+ 'query': 'no property for me',
+ 'defaultDataset': {
+ 'projectId': 'someproject',
+ 'datasetId': 'somedataset',
+ },
+ 'someNewProperty': 'I should be saved, too.',
+ }
+ klass = self._get_target_class()
+
+ config = klass.from_api_repr(resource)
+
+ self.assertTrue(config.use_legacy_sql)
+ self.assertEqual(
+ config.default_dataset,
+ DatasetReference('someproject', 'somedataset'))
+ # Make sure unknown properties propagate.
+ self.assertEqual(config._properties['query'], 'no property for me')
+ self.assertEqual(
+ config._properties['someNewProperty'], 'I should be saved, too.')
+
+ def test_to_api_repr_normal(self):
+ config = self._make_one()
+ config.use_legacy_sql = True
+ config.default_dataset = DatasetReference(
+ 'someproject', 'somedataset')
+ config._properties['someNewProperty'] = 'Woohoo, alpha stuff.'
+
+ resource = config.to_api_repr()
+
+ self.assertTrue(resource['useLegacySql'])
+ self.assertEqual(
+ resource['defaultDataset']['projectId'], 'someproject')
+ self.assertEqual(
+ resource['defaultDataset']['datasetId'], 'somedataset')
+ # Make sure unknown properties propagate.
+ self.assertEqual(
+ config._properties['someNewProperty'], 'Woohoo, alpha stuff.')
+
+
+class TestQueryJob(unittest.TestCase, _Base):
+ JOB_TYPE = 'query'
+ QUERY = 'select count(*) from persons'
+ DESTINATION_TABLE = 'destination_table'
+
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.job import QueryJob
+
+ return QueryJob
+
+ def _makeResource(self, started=False, ended=False):
+ resource = super(TestQueryJob, self)._makeResource(
+ started, ended)
+ config = resource['configuration']['query']
+ config['query'] = self.QUERY
+
+ if ended:
+ resource['status'] = {'state': 'DONE'}
+
+ return resource
+
+ def _verifyBooleanResourceProperties(self, job, config):
+
+ if 'allowLargeResults' in config:
+ self.assertEqual(job.allow_large_results,
+ config['allowLargeResults'])
+ else:
+ self.assertIsNone(job.allow_large_results)
+ if 'flattenResults' in config:
+ self.assertEqual(job.flatten_results,
+ config['flattenResults'])
+ else:
+ self.assertIsNone(job.flatten_results)
+ if 'useQueryCache' in config:
+ self.assertEqual(job.use_query_cache,
+ config['useQueryCache'])
+ else:
+ self.assertIsNone(job.use_query_cache)
+ if 'useLegacySql' in config:
+ self.assertEqual(job.use_legacy_sql,
+ config['useLegacySql'])
+ else:
+ self.assertIsNone(job.use_legacy_sql)
+
+ def _verifyIntegerResourceProperties(self, job, config):
+ if 'maximumBillingTier' in config:
+ self.assertEqual(
+ job.maximum_billing_tier, config['maximumBillingTier'])
+ else:
+ self.assertIsNone(job.maximum_billing_tier)
+ if 'maximumBytesBilled' in config:
+ self.assertEqual(
+ str(job.maximum_bytes_billed), config['maximumBytesBilled'])
+ self.assertIsInstance(job.maximum_bytes_billed, int)
+ else:
+ self.assertIsNone(job.maximum_bytes_billed)
+
+ def _verify_udf_resources(self, job, config):
+ udf_resources = config.get('userDefinedFunctionResources', ())
+ self.assertEqual(len(job.udf_resources), len(udf_resources))
+ for found, expected in zip(job.udf_resources, udf_resources):
+ if 'resourceUri' in expected:
+ self.assertEqual(found.udf_type, 'resourceUri')
+ self.assertEqual(found.value, expected['resourceUri'])
+ else:
+ self.assertEqual(found.udf_type, 'inlineCode')
+ self.assertEqual(found.value, expected['inlineCode'])
+
+ def _verifyQueryParameters(self, job, config):
+ query_parameters = config.get('queryParameters', ())
+ self.assertEqual(len(job.query_parameters), len(query_parameters))
+ for found, expected in zip(job.query_parameters, query_parameters):
+ self.assertEqual(found.to_api_repr(), expected)
+
+ def _verify_table_definitions(self, job, config):
+ table_defs = config.get('tableDefinitions')
+ if job.table_definitions is None:
+ self.assertIsNone(table_defs)
+ else:
+ self.assertEqual(len(job.table_definitions), len(table_defs))
+ for found_key, found_ec in job.table_definitions.items():
+ expected_ec = table_defs.get(found_key)
+ self.assertIsNotNone(expected_ec)
+ self.assertEqual(found_ec.to_api_repr(), expected_ec)
+
+ def _verify_configuration_properties(self, job, configuration):
+ if 'dryRun' in configuration:
+ self.assertEqual(job.dry_run,
+ configuration['dryRun'])
+ else:
+ self.assertIsNone(job.dry_run)
+
+ def _verifyResourceProperties(self, job, resource):
+ self._verifyReadonlyResourceProperties(job, resource)
+
+ configuration = resource.get('configuration', {})
+ self._verify_configuration_properties(job, configuration)
+
+ query_config = resource.get('configuration', {}).get('query')
+ self._verifyBooleanResourceProperties(job, query_config)
+ self._verifyIntegerResourceProperties(job, query_config)
+ self._verify_udf_resources(job, query_config)
+ self._verifyQueryParameters(job, query_config)
+ self._verify_table_definitions(job, query_config)
+
+ self.assertEqual(job.query, query_config['query'])
+ if 'createDisposition' in query_config:
+ self.assertEqual(job.create_disposition,
+ query_config['createDisposition'])
+ else:
+ self.assertIsNone(job.create_disposition)
+ if 'defaultDataset' in query_config:
+ ds_ref = job.default_dataset
+ ds_ref = {
+ 'projectId': ds_ref.project,
+ 'datasetId': ds_ref.dataset_id,
+ }
+ self.assertEqual(ds_ref, query_config['defaultDataset'])
+ else:
+ self.assertIsNone(job.default_dataset)
+ if 'destinationTable' in query_config:
+ table = job.destination
+ tb_ref = {
+ 'projectId': table.project,
+ 'datasetId': table.dataset_id,
+ 'tableId': table.table_id
+ }
+ self.assertEqual(tb_ref, query_config['destinationTable'])
+ else:
+ self.assertIsNone(job.destination)
+ if 'priority' in query_config:
+ self.assertEqual(job.priority,
+ query_config['priority'])
+ else:
+ self.assertIsNone(job.priority)
+ if 'writeDisposition' in query_config:
+ self.assertEqual(job.write_disposition,
+ query_config['writeDisposition'])
+ else:
+ self.assertIsNone(job.write_disposition)
+
+ def test_ctor_defaults(self):
+ client = _make_client(project=self.PROJECT)
+ job = self._make_one(self.JOB_ID, self.QUERY, client)
+ self.assertEqual(job.query, self.QUERY)
+ self.assertIs(job._client, client)
+ self.assertEqual(job.job_type, self.JOB_TYPE)
+ self.assertEqual(
+ job.path,
+ '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID))
+
+ self._verifyInitialReadonlyProperties(job)
+
+ self.assertFalse(job.use_legacy_sql)
+
+ # set/read from resource['configuration']['query']
+ self.assertIsNone(job.allow_large_results)
+ self.assertIsNone(job.create_disposition)
+ self.assertIsNone(job.default_dataset)
+ self.assertIsNone(job.destination)
+ self.assertIsNone(job.flatten_results)
+ self.assertIsNone(job.priority)
+ self.assertIsNone(job.use_query_cache)
+ self.assertIsNone(job.dry_run)
+ self.assertIsNone(job.write_disposition)
+ self.assertIsNone(job.maximum_billing_tier)
+ self.assertIsNone(job.maximum_bytes_billed)
+ self.assertIsNone(job.table_definitions)
+
+ def test_ctor_w_udf_resources(self):
+ from google.cloud.bigquery.job import QueryJobConfig
+ from google.cloud.bigquery.query import UDFResource
+
+ RESOURCE_URI = 'gs://some-bucket/js/lib.js'
+ udf_resources = [UDFResource("resourceUri", RESOURCE_URI)]
+ client = _make_client(project=self.PROJECT)
+ config = QueryJobConfig()
+ config.udf_resources = udf_resources
+ job = self._make_one(
+ self.JOB_ID, self.QUERY, client, job_config=config)
+ self.assertEqual(job.udf_resources, udf_resources)
+
+ def test_ctor_w_query_parameters(self):
+ from google.cloud.bigquery.job import QueryJobConfig
+ from google.cloud.bigquery.query import ScalarQueryParameter
+
+ query_parameters = [ScalarQueryParameter("foo", 'INT64', 123)]
+ client = _make_client(project=self.PROJECT)
+ config = QueryJobConfig()
+ config.query_parameters = query_parameters
+ job = self._make_one(
+ self.JOB_ID, self.QUERY, client, job_config=config)
+ self.assertEqual(job.query_parameters, query_parameters)
+
+ def test_from_api_repr_missing_identity(self):
+ self._setUpConstants()
+ client = _make_client(project=self.PROJECT)
+ RESOURCE = {}
+ klass = self._get_target_class()
+ with self.assertRaises(KeyError):
+ klass.from_api_repr(RESOURCE, client=client)
+
+ def test_from_api_repr_missing_config(self):
+ self._setUpConstants()
+ client = _make_client(project=self.PROJECT)
+ RESOURCE = {
+ 'id': '%s:%s' % (self.PROJECT, self.DS_ID),
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ }
+ }
+ klass = self._get_target_class()
+ with self.assertRaises(KeyError):
+ klass.from_api_repr(RESOURCE, client=client)
+
+ def test_from_api_repr_bare(self):
+ self._setUpConstants()
+ client = _make_client(project=self.PROJECT)
+ RESOURCE = {
+ 'id': self.JOB_ID,
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ 'configuration': {
+ 'query': {'query': self.QUERY},
+ },
+ }
+ klass = self._get_target_class()
+ job = klass.from_api_repr(RESOURCE, client=client)
+ self.assertIs(job._client, client)
+ self._verifyResourceProperties(job, RESOURCE)
+
+ def test_from_api_repr_w_properties(self):
+ client = _make_client(project=self.PROJECT)
+ RESOURCE = self._makeResource()
+ query_config = RESOURCE['configuration']['query']
+ query_config['createDisposition'] = 'CREATE_IF_NEEDED'
+ query_config['writeDisposition'] = 'WRITE_TRUNCATE'
+ query_config['destinationTable'] = {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.DESTINATION_TABLE,
+ }
+ klass = self._get_target_class()
+ job = klass.from_api_repr(RESOURCE, client=client)
+ self.assertIs(job._client, client)
+ self._verifyResourceProperties(job, RESOURCE)
+
+ def test_cancelled(self):
+ client = _make_client(project=self.PROJECT)
+ job = self._make_one(self.JOB_ID, self.QUERY, client)
+ job._properties['status'] = {
+ 'state': 'DONE',
+ 'errorResult': {
+ 'reason': 'stopped'
+ }
+ }
+
+ self.assertTrue(job.cancelled())
+
+ def test_done(self):
+ client = _make_client(project=self.PROJECT)
+ resource = self._makeResource(ended=True)
+ job = self._get_target_class().from_api_repr(resource, client)
+ self.assertTrue(job.done())
+
+ def test_query_plan(self):
+ from google.cloud.bigquery.job import QueryPlanEntry
+ from google.cloud.bigquery.job import QueryPlanEntryStep
+
+ plan_entries = [{
+ 'name': 'NAME',
+ 'id': 1234,
+ 'waitRatioAvg': 2.71828,
+ 'waitRatioMax': 3.14159,
+ 'readRatioAvg': 1.41421,
+ 'readRatioMax': 1.73205,
+ 'computeRatioAvg': 0.69315,
+ 'computeRatioMax': 1.09861,
+ 'writeRatioAvg': 3.32193,
+ 'writeRatioMax': 2.30258,
+ 'recordsRead': '100',
+ 'recordsWritten': '1',
+ 'status': 'STATUS',
+ 'steps': [{
+ 'kind': 'KIND',
+ 'substeps': ['SUBSTEP1', 'SUBSTEP2'],
+ }],
+ }]
+ client = _make_client(project=self.PROJECT)
+ job = self._make_one(self.JOB_ID, self.QUERY, client)
+ self.assertEqual(job.query_plan, [])
+
+ statistics = job._properties['statistics'] = {}
+ self.assertEqual(job.query_plan, [])
+
+ query_stats = statistics['query'] = {}
+ self.assertEqual(job.query_plan, [])
+
+ query_stats['queryPlan'] = plan_entries
+
+ self.assertEqual(len(job.query_plan), len(plan_entries))
+ for found, expected in zip(job.query_plan, plan_entries):
+ self.assertIsInstance(found, QueryPlanEntry)
+ self.assertEqual(found.name, expected['name'])
+ self.assertEqual(found.entry_id, expected['id'])
+ self.assertEqual(found.wait_ratio_avg, expected['waitRatioAvg'])
+ self.assertEqual(found.wait_ratio_max, expected['waitRatioMax'])
+ self.assertEqual(found.read_ratio_avg, expected['readRatioAvg'])
+ self.assertEqual(found.read_ratio_max, expected['readRatioMax'])
+ self.assertEqual(
+ found.compute_ratio_avg, expected['computeRatioAvg'])
+ self.assertEqual(
+ found.compute_ratio_max, expected['computeRatioMax'])
+ self.assertEqual(found.write_ratio_avg, expected['writeRatioAvg'])
+ self.assertEqual(found.write_ratio_max, expected['writeRatioMax'])
+ self.assertEqual(
+ found.records_read, int(expected['recordsRead']))
+ self.assertEqual(
+ found.records_written, int(expected['recordsWritten']))
+ self.assertEqual(found.status, expected['status'])
+
+ self.assertEqual(len(found.steps), len(expected['steps']))
+ for f_step, e_step in zip(found.steps, expected['steps']):
+ self.assertIsInstance(f_step, QueryPlanEntryStep)
+ self.assertEqual(f_step.kind, e_step['kind'])
+ self.assertEqual(f_step.substeps, e_step['substeps'])
+
+ def test_total_bytes_processed(self):
+ total_bytes = 1234
+ client = _make_client(project=self.PROJECT)
+ job = self._make_one(self.JOB_ID, self.QUERY, client)
+ self.assertIsNone(job.total_bytes_processed)
+
+ statistics = job._properties['statistics'] = {}
+ self.assertIsNone(job.total_bytes_processed)
+
+ query_stats = statistics['query'] = {}
+ self.assertIsNone(job.total_bytes_processed)
+
+ query_stats['totalBytesProcessed'] = str(total_bytes)
+ self.assertEqual(job.total_bytes_processed, total_bytes)
+
+ def test_total_bytes_billed(self):
+ total_bytes = 1234
+ client = _make_client(project=self.PROJECT)
+ job = self._make_one(self.JOB_ID, self.QUERY, client)
+ self.assertIsNone(job.total_bytes_billed)
+
+ statistics = job._properties['statistics'] = {}
+ self.assertIsNone(job.total_bytes_billed)
+
+ query_stats = statistics['query'] = {}
+ self.assertIsNone(job.total_bytes_billed)
+
+ query_stats['totalBytesBilled'] = str(total_bytes)
+ self.assertEqual(job.total_bytes_billed, total_bytes)
+
+ def test_billing_tier(self):
+ billing_tier = 1
+ client = _make_client(project=self.PROJECT)
+ job = self._make_one(self.JOB_ID, self.QUERY, client)
+ self.assertIsNone(job.billing_tier)
+
+ statistics = job._properties['statistics'] = {}
+ self.assertIsNone(job.billing_tier)
+
+ query_stats = statistics['query'] = {}
+ self.assertIsNone(job.billing_tier)
+
+ query_stats['billingTier'] = billing_tier
+ self.assertEqual(job.billing_tier, billing_tier)
+
+ def test_cache_hit(self):
+ client = _make_client(project=self.PROJECT)
+ job = self._make_one(self.JOB_ID, self.QUERY, client)
+ self.assertIsNone(job.cache_hit)
+
+ statistics = job._properties['statistics'] = {}
+ self.assertIsNone(job.cache_hit)
+
+ query_stats = statistics['query'] = {}
+ self.assertIsNone(job.cache_hit)
+
+ query_stats['cacheHit'] = True
+ self.assertTrue(job.cache_hit)
+
+ def test_num_dml_affected_rows(self):
+ num_rows = 1234
+ client = _make_client(project=self.PROJECT)
+ job = self._make_one(self.JOB_ID, self.QUERY, client)
+ self.assertIsNone(job.num_dml_affected_rows)
+
+ statistics = job._properties['statistics'] = {}
+ self.assertIsNone(job.num_dml_affected_rows)
+
+ query_stats = statistics['query'] = {}
+ self.assertIsNone(job.num_dml_affected_rows)
+
+ query_stats['numDmlAffectedRows'] = str(num_rows)
+ self.assertEqual(job.num_dml_affected_rows, num_rows)
+
+ def test_statement_type(self):
+ statement_type = 'SELECT'
+ client = _make_client(project=self.PROJECT)
+ job = self._make_one(self.JOB_ID, self.QUERY, client)
+ self.assertIsNone(job.statement_type)
+
+ statistics = job._properties['statistics'] = {}
+ self.assertIsNone(job.statement_type)
+
+ query_stats = statistics['query'] = {}
+ self.assertIsNone(job.statement_type)
+
+ query_stats['statementType'] = statement_type
+ self.assertEqual(job.statement_type, statement_type)
+
+ def test_referenced_tables(self):
+ from google.cloud.bigquery.table import TableReference
+
+ ref_tables_resource = [{
+ 'projectId': self.PROJECT,
+ 'datasetId': 'dataset',
+ 'tableId': 'local1',
+ }, {
+
+ 'projectId': self.PROJECT,
+ 'datasetId': 'dataset',
+ 'tableId': 'local2',
+ }, {
+
+ 'projectId': 'other-project-123',
+ 'datasetId': 'other-dataset',
+ 'tableId': 'other-table',
+ }]
+ client = _make_client(project=self.PROJECT)
+ job = self._make_one(self.JOB_ID, self.QUERY, client)
+ self.assertEqual(job.referenced_tables, [])
+
+ statistics = job._properties['statistics'] = {}
+ self.assertEqual(job.referenced_tables, [])
+
+ query_stats = statistics['query'] = {}
+ self.assertEqual(job.referenced_tables, [])
+
+ query_stats['referencedTables'] = ref_tables_resource
+
+ local1, local2, remote = job.referenced_tables
+
+ self.assertIsInstance(local1, TableReference)
+ self.assertEqual(local1.table_id, 'local1')
+ self.assertEqual(local1.dataset_id, 'dataset')
+ self.assertEqual(local1.project, self.PROJECT)
+
+ self.assertIsInstance(local2, TableReference)
+ self.assertEqual(local2.table_id, 'local2')
+ self.assertEqual(local2.dataset_id, 'dataset')
+ self.assertEqual(local2.project, self.PROJECT)
+
+ self.assertIsInstance(remote, TableReference)
+ self.assertEqual(remote.table_id, 'other-table')
+ self.assertEqual(remote.dataset_id, 'other-dataset')
+ self.assertEqual(remote.project, 'other-project-123')
+
+ def test_undeclared_query_paramters(self):
+ from google.cloud.bigquery.query import ArrayQueryParameter
+ from google.cloud.bigquery.query import ScalarQueryParameter
+ from google.cloud.bigquery.query import StructQueryParameter
+
+ undeclared = [{
+ 'name': 'my_scalar',
+ 'parameterType': {
+ 'type': 'STRING',
+ },
+ 'parameterValue': {
+ 'value': 'value',
+ },
+ }, {
+ 'name': 'my_array',
+ 'parameterType': {
+ 'type': 'ARRAY',
+ 'arrayType': {
+ 'type': 'INT64',
+ },
+ },
+ 'parameterValue': {
+ 'arrayValues': [
+ {'value': '1066'},
+ {'value': '1745'},
+ ],
+ },
+ }, {
+ 'name': 'my_struct',
+ 'parameterType': {
+ 'type': 'STRUCT',
+ 'structTypes': [{
+ 'name': 'count',
+ 'type': {
+ 'type': 'INT64',
+ }
+ }],
+ },
+ 'parameterValue': {
+ 'structValues': {
+ 'count': {
+ 'value': '123',
+ },
+ }
+ },
+ }]
+ client = _make_client(project=self.PROJECT)
+ job = self._make_one(self.JOB_ID, self.QUERY, client)
+ self.assertEqual(job.undeclared_query_paramters, [])
+
+ statistics = job._properties['statistics'] = {}
+ self.assertEqual(job.undeclared_query_paramters, [])
+
+ query_stats = statistics['query'] = {}
+ self.assertEqual(job.undeclared_query_paramters, [])
+
+ query_stats['undeclaredQueryParamters'] = undeclared
+
+ scalar, array, struct = job.undeclared_query_paramters
+
+ self.assertIsInstance(scalar, ScalarQueryParameter)
+ self.assertEqual(scalar.name, 'my_scalar')
+ self.assertEqual(scalar.type_, 'STRING')
+ self.assertEqual(scalar.value, 'value')
+
+ self.assertIsInstance(array, ArrayQueryParameter)
+ self.assertEqual(array.name, 'my_array')
+ self.assertEqual(array.array_type, 'INT64')
+ self.assertEqual(array.values, [1066, 1745])
+
+ self.assertIsInstance(struct, StructQueryParameter)
+ self.assertEqual(struct.name, 'my_struct')
+ self.assertEqual(struct.struct_types, {'count': 'INT64'})
+ self.assertEqual(struct.struct_values, {'count': 123})
+
+ def test_query_results(self):
+ from google.cloud.bigquery.query import QueryResults
+
+ query_resource = {
+ 'jobComplete': True,
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ }
+ connection = _Connection(query_resource)
+ client = _make_client(self.PROJECT, connection=connection)
+ job = self._make_one(self.JOB_ID, self.QUERY, client)
+ results = job.query_results()
+ self.assertIsInstance(results, QueryResults)
+
+ def test_query_results_w_cached_value(self):
+ from google.cloud.bigquery.query import QueryResults
+
+ client = _make_client(project=self.PROJECT)
+ job = self._make_one(self.JOB_ID, self.QUERY, client)
+ resource = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ }
+ query_results = QueryResults(resource)
+ job._query_results = query_results
+
+ results = job.query_results()
+
+ self.assertIs(results, query_results)
+
+ def test_result(self):
+ query_resource = {
+ 'jobComplete': True,
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ }
+ connection = _Connection(query_resource, query_resource)
+ client = _make_client(self.PROJECT, connection=connection)
+ resource = self._makeResource(ended=True)
+ job = self._get_target_class().from_api_repr(resource, client)
+
+ result = job.result()
+
+ self.assertEqual(list(result), [])
+
+ def test_result_invokes_begins(self):
+ begun_resource = self._makeResource()
+ incomplete_resource = {
+ 'jobComplete': False,
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ }
+ query_resource = copy.deepcopy(incomplete_resource)
+ query_resource['jobComplete'] = True
+ done_resource = copy.deepcopy(begun_resource)
+ done_resource['status'] = {'state': 'DONE'}
+ connection = _Connection(
+ begun_resource, incomplete_resource, query_resource, done_resource,
+ query_resource)
+ client = _make_client(project=self.PROJECT, connection=connection)
+ job = self._make_one(self.JOB_ID, self.QUERY, client)
+
+ job.result()
+
+ self.assertEqual(len(connection._requested), 4)
+ begin_request, _, query_request, reload_request = connection._requested
+ self.assertEqual(begin_request['method'], 'POST')
+ self.assertEqual(query_request['method'], 'GET')
+ self.assertEqual(reload_request['method'], 'GET')
+
+ def test_result_w_timeout(self):
+ begun_resource = self._makeResource()
+ query_resource = {
+ 'jobComplete': True,
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ }
+ done_resource = copy.deepcopy(begun_resource)
+ done_resource['status'] = {'state': 'DONE'}
+ connection = _Connection(
+ begun_resource, query_resource, done_resource)
+ client = _make_client(project=self.PROJECT, connection=connection)
+ job = self._make_one(self.JOB_ID, self.QUERY, client)
+
+ job.result(timeout=1.0)
+
+ self.assertEqual(len(connection._requested), 3)
+ begin_request, query_request, reload_request = connection._requested
+ self.assertEqual(begin_request['method'], 'POST')
+ self.assertEqual(query_request['method'], 'GET')
+ self.assertEqual(
+ query_request['path'],
+ '/projects/{}/queries/{}'.format(self.PROJECT, self.JOB_ID))
+ self.assertEqual(query_request['query_params']['timeoutMs'], 900)
+ self.assertEqual(reload_request['method'], 'GET')
+
+ def test_result_error(self):
+ from google.cloud import exceptions
+
+ client = _make_client(project=self.PROJECT)
+ job = self._make_one(self.JOB_ID, self.QUERY, client)
+ error_result = {
+ 'debugInfo': 'DEBUG',
+ 'location': 'LOCATION',
+ 'message': 'MESSAGE',
+ 'reason': 'invalid'
+ }
+ job._properties['status'] = {
+ 'errorResult': error_result,
+ 'errors': [error_result],
+ 'state': 'DONE'
+ }
+ job._set_future_result()
+
+ with self.assertRaises(exceptions.GoogleCloudError) as exc_info:
+ job.result()
+
+ self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError)
+ self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST)
+
+ def test_begin_w_bound_client(self):
+ from google.cloud.bigquery.dataset import DatasetReference
+ from google.cloud.bigquery.job import QueryJobConfig
+
+ PATH = '/projects/%s/jobs' % (self.PROJECT,)
+ DS_ID = 'DATASET'
+ RESOURCE = self._makeResource()
+ # Ensure None for missing server-set props
+ del RESOURCE['statistics']['creationTime']
+ del RESOURCE['etag']
+ del RESOURCE['selfLink']
+ del RESOURCE['user_email']
+ conn = _Connection(RESOURCE)
+ client = _make_client(project=self.PROJECT, connection=conn)
+
+ config = QueryJobConfig()
+ config.default_dataset = DatasetReference(self.PROJECT, DS_ID)
+ job = self._make_one(
+ self.JOB_ID, self.QUERY, client, job_config=config)
+
+ job.begin()
+
+ self.assertIsNone(job.default_dataset)
+ self.assertEqual(job.udf_resources, [])
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], PATH)
+ SENT = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ 'configuration': {
+ 'query': {
+ 'query': self.QUERY,
+ 'useLegacySql': False,
+ 'defaultDataset': {
+ 'projectId': self.PROJECT,
+ 'datasetId': DS_ID,
+ },
+ },
+ },
+ }
+ self._verifyResourceProperties(job, RESOURCE)
+ self.assertEqual(req['data'], SENT)
+
+ def test_begin_w_alternate_client(self):
+ from google.cloud.bigquery.dataset import DatasetReference
+ from google.cloud.bigquery.job import QueryJobConfig
+
+ PATH = '/projects/%s/jobs' % (self.PROJECT,)
+ TABLE = 'TABLE'
+ DS_ID = 'DATASET'
+ RESOURCE = self._makeResource(ended=True)
+ QUERY_CONFIGURATION = {
+ 'query': self.QUERY,
+ 'allowLargeResults': True,
+ 'createDisposition': 'CREATE_NEVER',
+ 'defaultDataset': {
+ 'projectId': self.PROJECT,
+ 'datasetId': DS_ID,
+ },
+ 'destinationTable': {
+ 'projectId': self.PROJECT,
+ 'datasetId': DS_ID,
+ 'tableId': TABLE,
+ },
+ 'flattenResults': True,
+ 'priority': 'INTERACTIVE',
+ 'useQueryCache': True,
+ 'useLegacySql': True,
+ 'writeDisposition': 'WRITE_TRUNCATE',
+ 'maximumBillingTier': 4,
+ 'maximumBytesBilled': '123456'
+ }
+ RESOURCE['configuration']['query'] = QUERY_CONFIGURATION
+ RESOURCE['configuration']['dryRun'] = True
+ conn1 = _Connection()
+ client1 = _make_client(project=self.PROJECT, connection=conn1)
+ conn2 = _Connection(RESOURCE)
+ client2 = _make_client(project=self.PROJECT, connection=conn2)
+ dataset_ref = DatasetReference(self.PROJECT, DS_ID)
+ table_ref = dataset_ref.table(TABLE)
+
+ config = QueryJobConfig()
+ config.allow_large_results = True
+ config.create_disposition = 'CREATE_NEVER'
+ config.default_dataset = dataset_ref
+ config.destination = table_ref
+ config.dry_run = True
+ config.flatten_results = True
+ config.maximum_billing_tier = 4
+ config.priority = 'INTERACTIVE'
+ config.use_legacy_sql = True
+ config.use_query_cache = True
+ config.write_disposition = 'WRITE_TRUNCATE'
+ config.maximum_bytes_billed = 123456
+ job = self._make_one(
+ self.JOB_ID, self.QUERY, client1, job_config=config)
+
+ job.begin(client=client2)
+
+ self.assertEqual(len(conn1._requested), 0)
+ self.assertEqual(len(conn2._requested), 1)
+ req = conn2._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], PATH)
+ SENT = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ 'configuration': {
+ 'dryRun': True,
+ 'query': QUERY_CONFIGURATION,
+ },
+ }
+ self._verifyResourceProperties(job, RESOURCE)
+ self.assertEqual(req['data'], SENT)
+
+ def test_begin_w_udf(self):
+ from google.cloud.bigquery.job import QueryJobConfig
+ from google.cloud.bigquery.query import UDFResource
+
+ RESOURCE_URI = 'gs://some-bucket/js/lib.js'
+ INLINE_UDF_CODE = 'var someCode = "here";'
+ PATH = '/projects/%s/jobs' % (self.PROJECT,)
+ RESOURCE = self._makeResource()
+ # Ensure None for missing server-set props
+ del RESOURCE['statistics']['creationTime']
+ del RESOURCE['etag']
+ del RESOURCE['selfLink']
+ del RESOURCE['user_email']
+ RESOURCE['configuration']['query']['userDefinedFunctionResources'] = [
+ {'resourceUri': RESOURCE_URI},
+ {'inlineCode': INLINE_UDF_CODE},
+ ]
+ conn = _Connection(RESOURCE)
+ client = _make_client(project=self.PROJECT, connection=conn)
+ udf_resources = [
+ UDFResource("resourceUri", RESOURCE_URI),
+ UDFResource("inlineCode", INLINE_UDF_CODE),
+ ]
+ config = QueryJobConfig()
+ config.udf_resources = udf_resources
+ config.use_legacy_sql = True
+ job = self._make_one(
+ self.JOB_ID, self.QUERY, client, job_config=config)
+
+ job.begin()
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], PATH)
+ self.assertEqual(job.udf_resources, udf_resources)
+ SENT = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ 'configuration': {
+ 'query': {
+ 'query': self.QUERY,
+ 'useLegacySql': True,
+ 'userDefinedFunctionResources': [
+ {'resourceUri': RESOURCE_URI},
+ {'inlineCode': INLINE_UDF_CODE},
+ ]
+ },
+ },
+ }
+ self._verifyResourceProperties(job, RESOURCE)
+ self.assertEqual(req['data'], SENT)
+
+ def test_begin_w_named_query_parameter(self):
+ from google.cloud.bigquery.job import QueryJobConfig
+ from google.cloud.bigquery.query import ScalarQueryParameter
+
+ query_parameters = [ScalarQueryParameter('foo', 'INT64', 123)]
+ PATH = '/projects/%s/jobs' % (self.PROJECT,)
+ RESOURCE = self._makeResource()
+ # Ensure None for missing server-set props
+ del RESOURCE['statistics']['creationTime']
+ del RESOURCE['etag']
+ del RESOURCE['selfLink']
+ del RESOURCE['user_email']
+ config = RESOURCE['configuration']['query']
+ config['parameterMode'] = 'NAMED'
+ config['queryParameters'] = [
+ {
+ 'name': 'foo',
+ 'parameterType': {
+ 'type': 'INT64',
+ },
+ 'parameterValue': {
+ 'value': '123',
+ },
+ },
+ ]
+ conn = _Connection(RESOURCE)
+ client = _make_client(project=self.PROJECT, connection=conn)
+ jconfig = QueryJobConfig()
+ jconfig.query_parameters = query_parameters
+ job = self._make_one(
+ self.JOB_ID, self.QUERY, client, job_config=jconfig)
+
+ job.begin()
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], PATH)
+ self.assertEqual(job.query_parameters, query_parameters)
+ SENT = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ 'configuration': {
+ 'query': {
+ 'query': self.QUERY,
+ 'useLegacySql': False,
+ 'parameterMode': 'NAMED',
+ 'queryParameters': config['queryParameters'],
+ },
+ },
+ }
+ self._verifyResourceProperties(job, RESOURCE)
+ self.assertEqual(req['data'], SENT)
+
+ def test_begin_w_positional_query_parameter(self):
+ from google.cloud.bigquery.job import QueryJobConfig
+ from google.cloud.bigquery.query import ScalarQueryParameter
+
+ query_parameters = [ScalarQueryParameter.positional('INT64', 123)]
+ PATH = '/projects/%s/jobs' % (self.PROJECT,)
+ RESOURCE = self._makeResource()
+ # Ensure None for missing server-set props
+ del RESOURCE['statistics']['creationTime']
+ del RESOURCE['etag']
+ del RESOURCE['selfLink']
+ del RESOURCE['user_email']
+ config = RESOURCE['configuration']['query']
+ config['parameterMode'] = 'POSITIONAL'
+ config['queryParameters'] = [
+ {
+ 'parameterType': {
+ 'type': 'INT64',
+ },
+ 'parameterValue': {
+ 'value': '123',
+ },
+ },
+ ]
+ conn = _Connection(RESOURCE)
+ client = _make_client(project=self.PROJECT, connection=conn)
+ jconfig = QueryJobConfig()
+ jconfig.query_parameters = query_parameters
+ job = self._make_one(
+ self.JOB_ID, self.QUERY, client, job_config=jconfig)
+
+ job.begin()
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], PATH)
+ self.assertEqual(job.query_parameters, query_parameters)
+ SENT = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ 'configuration': {
+ 'query': {
+ 'query': self.QUERY,
+ 'useLegacySql': False,
+ 'parameterMode': 'POSITIONAL',
+ 'queryParameters': config['queryParameters'],
+ },
+ },
+ }
+ self._verifyResourceProperties(job, RESOURCE)
+ self.assertEqual(req['data'], SENT)
+
+ def test_begin_w_table_defs(self):
+ from google.cloud.bigquery.job import QueryJobConfig
+ from google.cloud.bigquery.external_config import ExternalConfig
+ from google.cloud.bigquery.external_config import BigtableColumn
+ from google.cloud.bigquery.external_config import BigtableColumnFamily
+
+ PATH = '/projects/%s/jobs' % (self.PROJECT,)
+ RESOURCE = self._makeResource()
+ # Ensure None for missing server-set props
+ del RESOURCE['statistics']['creationTime']
+ del RESOURCE['etag']
+ del RESOURCE['selfLink']
+ del RESOURCE['user_email']
+
+ bt_config = ExternalConfig('BIGTABLE')
+ bt_config.ignore_unknown_values = True
+ bt_config.options.read_rowkey_as_string = True
+ cf = BigtableColumnFamily()
+ cf.family_id = 'cf'
+ col = BigtableColumn()
+ col.field_name = 'fn'
+ cf.columns = [col]
+ bt_config.options.column_families = [cf]
+ BT_CONFIG_RESOURCE = {
+ 'sourceFormat': 'BIGTABLE',
+ 'ignoreUnknownValues': True,
+ 'bigtableOptions': {
+ 'readRowkeyAsString': True,
+ 'columnFamilies': [{
+ 'familyId': 'cf',
+ 'columns': [{'fieldName': 'fn'}],
+ }],
+ },
+ }
+ CSV_CONFIG_RESOURCE = {
+ 'sourceFormat': 'CSV',
+ 'maxBadRecords': 8,
+ 'csvOptions': {
+ 'allowJaggedRows': True,
+ },
+ }
+ csv_config = ExternalConfig('CSV')
+ csv_config.max_bad_records = 8
+ csv_config.options.allow_jagged_rows = True
+ bt_table = 'bigtable-table'
+ csv_table = 'csv-table'
+ RESOURCE['configuration']['query']['tableDefinitions'] = {
+ bt_table: BT_CONFIG_RESOURCE,
+ csv_table: CSV_CONFIG_RESOURCE,
+ }
+ want_resource = copy.deepcopy(RESOURCE)
+ conn = _Connection(RESOURCE)
+ client = _make_client(project=self.PROJECT, connection=conn)
+ config = QueryJobConfig()
+ config.table_definitions = {
+ bt_table: bt_config,
+ csv_table: csv_config,
+ }
+ config.use_legacy_sql = True
+ job = self._make_one(
+ self.JOB_ID, self.QUERY, client, job_config=config)
+
+ job.begin()
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], PATH)
+ SENT = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ 'configuration': {
+ 'query': {
+ 'query': self.QUERY,
+ 'useLegacySql': True,
+ 'tableDefinitions': {
+ bt_table: BT_CONFIG_RESOURCE,
+ csv_table: CSV_CONFIG_RESOURCE,
+ },
+ },
+ },
+ }
+ self._verifyResourceProperties(job, want_resource)
+ self.assertEqual(req['data'], SENT)
+
+ def test_dry_run_query(self):
+ from google.cloud.bigquery.job import QueryJobConfig
+
+ PATH = '/projects/%s/jobs' % (self.PROJECT,)
+ RESOURCE = self._makeResource()
+ # Ensure None for missing server-set props
+ del RESOURCE['statistics']['creationTime']
+ del RESOURCE['etag']
+ del RESOURCE['selfLink']
+ del RESOURCE['user_email']
+ RESOURCE['configuration']['dryRun'] = True
+ conn = _Connection(RESOURCE)
+ client = _make_client(project=self.PROJECT, connection=conn)
+ config = QueryJobConfig()
+ config.dry_run = True
+ job = self._make_one(
+ self.JOB_ID, self.QUERY, client, job_config=config)
+
+ job.begin()
+ self.assertEqual(job.udf_resources, [])
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'POST')
+ self.assertEqual(req['path'], PATH)
+ SENT = {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ 'configuration': {
+ 'query': {
+ 'query': self.QUERY,
+ 'useLegacySql': False,
+ },
+ 'dryRun': True,
+ },
+ }
+ self._verifyResourceProperties(job, RESOURCE)
+ self.assertEqual(req['data'], SENT)
+
+ def test_exists_miss_w_bound_client(self):
+ PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
+ conn = _Connection()
+ client = _make_client(project=self.PROJECT, connection=conn)
+ job = self._make_one(self.JOB_ID, self.QUERY, client)
+
+ self.assertFalse(job.exists())
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], PATH)
+ self.assertEqual(req['query_params'], {'fields': 'id'})
+
+ def test_exists_hit_w_alternate_client(self):
+ PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
+ conn1 = _Connection()
+ client1 = _make_client(project=self.PROJECT, connection=conn1)
+ conn2 = _Connection({})
+ client2 = _make_client(project=self.PROJECT, connection=conn2)
+ job = self._make_one(self.JOB_ID, self.QUERY, client1)
+
+ self.assertTrue(job.exists(client=client2))
+
+ self.assertEqual(len(conn1._requested), 0)
+ self.assertEqual(len(conn2._requested), 1)
+ req = conn2._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], PATH)
+ self.assertEqual(req['query_params'], {'fields': 'id'})
+
+ def test_reload_w_bound_client(self):
+ from google.cloud.bigquery.dataset import DatasetReference
+ from google.cloud.bigquery.job import QueryJobConfig
+
+ PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
+ DS_ID = 'DATASET'
+ DEST_TABLE = 'dest_table'
+ RESOURCE = self._makeResource()
+ conn = _Connection(RESOURCE)
+ client = _make_client(project=self.PROJECT, connection=conn)
+ dataset_ref = DatasetReference(self.PROJECT, DS_ID)
+ table_ref = dataset_ref.table(DEST_TABLE)
+ config = QueryJobConfig()
+ config.destination = table_ref
+ job = self._make_one(self.JOB_ID, None, client, job_config=config)
+
+ job.reload()
+
+ self.assertNotEqual(job.destination, table_ref)
+
+ self.assertEqual(len(conn._requested), 1)
+ req = conn._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], PATH)
+ self._verifyResourceProperties(job, RESOURCE)
+
+ def test_reload_w_alternate_client(self):
+ PATH = '/projects/%s/jobs/%s' % (self.PROJECT, self.JOB_ID)
+ DS_ID = 'DATASET'
+ DEST_TABLE = 'dest_table'
+ RESOURCE = self._makeResource()
+ q_config = RESOURCE['configuration']['query']
+ q_config['destinationTable'] = {
+ 'projectId': self.PROJECT,
+ 'datasetId': DS_ID,
+ 'tableId': DEST_TABLE,
+ }
+ conn1 = _Connection()
+ client1 = _make_client(project=self.PROJECT, connection=conn1)
+ conn2 = _Connection(RESOURCE)
+ client2 = _make_client(project=self.PROJECT, connection=conn2)
+ job = self._make_one(self.JOB_ID, self.QUERY, client1)
+
+ job.reload(client=client2)
+
+ self.assertEqual(len(conn1._requested), 0)
+ self.assertEqual(len(conn2._requested), 1)
+ req = conn2._requested[0]
+ self.assertEqual(req['method'], 'GET')
+ self.assertEqual(req['path'], PATH)
+ self._verifyResourceProperties(job, RESOURCE)
+
+
+class TestQueryPlanEntryStep(unittest.TestCase, _Base):
+ KIND = 'KIND'
+ SUBSTEPS = ('SUB1', 'SUB2')
+
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.job import QueryPlanEntryStep
+
+ return QueryPlanEntryStep
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def test_ctor(self):
+ step = self._make_one(self.KIND, self.SUBSTEPS)
+ self.assertEqual(step.kind, self.KIND)
+ self.assertEqual(step.substeps, list(self.SUBSTEPS))
+
+ def test_from_api_repr_empty(self):
+ klass = self._get_target_class()
+ step = klass.from_api_repr({})
+ self.assertIsNone(step.kind)
+ self.assertEqual(step.substeps, [])
+
+ def test_from_api_repr_normal(self):
+ resource = {
+ 'kind': self.KIND,
+ 'substeps': self.SUBSTEPS,
+ }
+ klass = self._get_target_class()
+ step = klass.from_api_repr(resource)
+ self.assertEqual(step.kind, self.KIND)
+ self.assertEqual(step.substeps, list(self.SUBSTEPS))
+
+ def test___eq___mismatched_type(self):
+ step = self._make_one(self.KIND, self.SUBSTEPS)
+ self.assertNotEqual(step, object())
+
+ def test___eq___mismatch_kind(self):
+ step = self._make_one(self.KIND, self.SUBSTEPS)
+ other = self._make_one('OTHER', self.SUBSTEPS)
+ self.assertNotEqual(step, other)
+
+ def test___eq___mismatch_substeps(self):
+ step = self._make_one(self.KIND, self.SUBSTEPS)
+ other = self._make_one(self.KIND, ())
+ self.assertNotEqual(step, other)
+
+ def test___eq___hit(self):
+ step = self._make_one(self.KIND, self.SUBSTEPS)
+ other = self._make_one(self.KIND, self.SUBSTEPS)
+ self.assertEqual(step, other)
+
+
+class TestQueryPlanEntry(unittest.TestCase, _Base):
+ NAME = 'NAME'
+ ENTRY_ID = 1234
+ WAIT_RATIO_AVG = 2.71828
+ WAIT_RATIO_MAX = 3.14159
+ READ_RATIO_AVG = 1.41421
+ READ_RATIO_MAX = 1.73205
+ COMPUTE_RATIO_AVG = 0.69315
+ COMPUTE_RATIO_MAX = 1.09861
+ WRITE_RATIO_AVG = 3.32193
+ WRITE_RATIO_MAX = 2.30258
+ RECORDS_READ = 100
+ RECORDS_WRITTEN = 1
+ STATUS = 'STATUS'
+
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.job import QueryPlanEntry
+
+ return QueryPlanEntry
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def test_ctor(self):
+ from google.cloud.bigquery.job import QueryPlanEntryStep
+
+ steps = [QueryPlanEntryStep(
+ kind=TestQueryPlanEntryStep.KIND,
+ substeps=TestQueryPlanEntryStep.SUBSTEPS)]
+ entry = self._make_one(
+ name=self.NAME,
+ entry_id=self.ENTRY_ID,
+ wait_ratio_avg=self.WAIT_RATIO_AVG,
+ wait_ratio_max=self.WAIT_RATIO_MAX,
+ read_ratio_avg=self.READ_RATIO_AVG,
+ read_ratio_max=self.READ_RATIO_MAX,
+ compute_ratio_avg=self.COMPUTE_RATIO_AVG,
+ compute_ratio_max=self.COMPUTE_RATIO_MAX,
+ write_ratio_avg=self.WRITE_RATIO_AVG,
+ write_ratio_max=self.WRITE_RATIO_MAX,
+ records_read=self.RECORDS_READ,
+ records_written=self.RECORDS_WRITTEN,
+ status=self.STATUS,
+ steps=steps,
+ )
+ self.assertEqual(entry.name, self.NAME)
+ self.assertEqual(entry.entry_id, self.ENTRY_ID)
+ self.assertEqual(entry.wait_ratio_avg, self.WAIT_RATIO_AVG)
+ self.assertEqual(entry.wait_ratio_max, self.WAIT_RATIO_MAX)
+ self.assertEqual(entry.read_ratio_avg, self.READ_RATIO_AVG)
+ self.assertEqual(entry.read_ratio_max, self.READ_RATIO_MAX)
+ self.assertEqual(entry.compute_ratio_avg, self.COMPUTE_RATIO_AVG)
+ self.assertEqual(entry.compute_ratio_max, self.COMPUTE_RATIO_MAX)
+ self.assertEqual(entry.write_ratio_avg, self.WRITE_RATIO_AVG)
+ self.assertEqual(entry.write_ratio_max, self.WRITE_RATIO_MAX)
+ self.assertEqual(entry.records_read, self.RECORDS_READ)
+ self.assertEqual(entry.records_written, self.RECORDS_WRITTEN)
+ self.assertEqual(entry.status, self.STATUS)
+ self.assertEqual(entry.steps, steps)
+
+ def test_from_api_repr_empty(self):
+ klass = self._get_target_class()
+
+ entry = klass.from_api_repr({})
+
+ self.assertIsNone(entry.name)
+ self.assertIsNone(entry.entry_id)
+ self.assertIsNone(entry.wait_ratio_avg)
+ self.assertIsNone(entry.wait_ratio_max)
+ self.assertIsNone(entry.read_ratio_avg)
+ self.assertIsNone(entry.read_ratio_max)
+ self.assertIsNone(entry.compute_ratio_avg)
+ self.assertIsNone(entry.compute_ratio_max)
+ self.assertIsNone(entry.write_ratio_avg)
+ self.assertIsNone(entry.write_ratio_max)
+ self.assertIsNone(entry.records_read)
+ self.assertIsNone(entry.records_written)
+ self.assertIsNone(entry.status)
+ self.assertEqual(entry.steps, [])
+
+ def test_from_api_repr_normal(self):
+ from google.cloud.bigquery.job import QueryPlanEntryStep
+
+ steps = [QueryPlanEntryStep(
+ kind=TestQueryPlanEntryStep.KIND,
+ substeps=TestQueryPlanEntryStep.SUBSTEPS)]
+ resource = {
+ 'name': self.NAME,
+ 'id': self.ENTRY_ID,
+ 'waitRatioAvg': self.WAIT_RATIO_AVG,
+ 'waitRatioMax': self.WAIT_RATIO_MAX,
+ 'readRatioAvg': self.READ_RATIO_AVG,
+ 'readRatioMax': self.READ_RATIO_MAX,
+ 'computeRatioAvg': self.COMPUTE_RATIO_AVG,
+ 'computeRatioMax': self.COMPUTE_RATIO_MAX,
+ 'writeRatioAvg': self.WRITE_RATIO_AVG,
+ 'writeRatioMax': self.WRITE_RATIO_MAX,
+ 'recordsRead': str(self.RECORDS_READ),
+ 'recordsWritten': str(self.RECORDS_WRITTEN),
+ 'status': self.STATUS,
+ 'steps': [{
+ 'kind': TestQueryPlanEntryStep.KIND,
+ 'substeps': TestQueryPlanEntryStep.SUBSTEPS,
+ }]
+ }
+ klass = self._get_target_class()
+
+ entry = klass.from_api_repr(resource)
+ self.assertEqual(entry.name, self.NAME)
+ self.assertEqual(entry.entry_id, self.ENTRY_ID)
+ self.assertEqual(entry.wait_ratio_avg, self.WAIT_RATIO_AVG)
+ self.assertEqual(entry.wait_ratio_max, self.WAIT_RATIO_MAX)
+ self.assertEqual(entry.read_ratio_avg, self.READ_RATIO_AVG)
+ self.assertEqual(entry.read_ratio_max, self.READ_RATIO_MAX)
+ self.assertEqual(entry.compute_ratio_avg, self.COMPUTE_RATIO_AVG)
+ self.assertEqual(entry.compute_ratio_max, self.COMPUTE_RATIO_MAX)
+ self.assertEqual(entry.write_ratio_avg, self.WRITE_RATIO_AVG)
+ self.assertEqual(entry.write_ratio_max, self.WRITE_RATIO_MAX)
+ self.assertEqual(entry.records_read, self.RECORDS_READ)
+ self.assertEqual(entry.records_written, self.RECORDS_WRITTEN)
+ self.assertEqual(entry.status, self.STATUS)
+ self.assertEqual(entry.steps, steps)
+
+
+class _Table(object):
+
+ def __init__(self, table_id=None):
+ self._table_id = table_id
+
+ @property
+ def table_id(self):
+ return TestLoadJob.TABLE_ID
+
+ @property
+ def project(self):
+ return TestLoadJob.PROJECT
+
+ @property
+ def dataset_id(self):
+ return TestLoadJob.DS_ID
+
+
+class _Connection(object):
+
+ def __init__(self, *responses):
+ self._responses = responses
+ self._requested = []
+
+ def api_request(self, **kw):
+ from google.cloud.exceptions import NotFound
+
+ self._requested.append(kw)
+
+ try:
+ response, self._responses = self._responses[0], self._responses[1:]
+ except IndexError:
+ raise NotFound('miss')
+ else:
+ return response
diff --git a/bigquery/tests/unit/test_query.py b/bigquery/tests/unit/test_query.py
new file mode 100644
index 0000000..e5c78ca
--- /dev/null
+++ b/bigquery/tests/unit/test_query.py
@@ -0,0 +1,1253 @@
+# Copyright 2015 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import datetime
+import unittest
+
+import mock
+
+
+class Test_UDFResource(unittest.TestCase):
+
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.query import UDFResource
+
+ return UDFResource
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def test_ctor(self):
+ udf = self._make_one('resourceUri', 'gs://some_bucket/some_file')
+ self.assertEqual(udf.udf_type, 'resourceUri')
+ self.assertEqual(udf.value, 'gs://some_bucket/some_file')
+
+ def test___eq__(self):
+ udf = self._make_one('resourceUri', 'gs://some_bucket/some_file')
+ self.assertEqual(udf, udf)
+ self.assertNotEqual(udf, object())
+ wrong_val = self._make_one(
+ 'resourceUri', 'gs://some_bucket/other_file')
+ self.assertNotEqual(udf, wrong_val)
+ wrong_type = self._make_one('inlineCode', udf.value)
+ self.assertNotEqual(udf, wrong_type)
+
+
+class Test__AbstractQueryParameter(unittest.TestCase):
+
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.query import _AbstractQueryParameter
+
+ return _AbstractQueryParameter
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def test_from_api_virtual(self):
+ klass = self._get_target_class()
+ with self.assertRaises(NotImplementedError):
+ klass.from_api_repr({})
+
+ def test_to_api_virtual(self):
+ param = self._make_one()
+ with self.assertRaises(NotImplementedError):
+ param.to_api_repr()
+
+
+class Test_ScalarQueryParameter(unittest.TestCase):
+
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.query import ScalarQueryParameter
+
+ return ScalarQueryParameter
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def test_ctor(self):
+ param = self._make_one(name='foo', type_='INT64', value=123)
+ self.assertEqual(param.name, 'foo')
+ self.assertEqual(param.type_, 'INT64')
+ self.assertEqual(param.value, 123)
+
+ def test___eq__(self):
+ param = self._make_one(name='foo', type_='INT64', value=123)
+ self.assertEqual(param, param)
+ self.assertNotEqual(param, object())
+ alias = self._make_one(name='bar', type_='INT64', value=123)
+ self.assertNotEqual(param, alias)
+ wrong_type = self._make_one(name='foo', type_='FLOAT64', value=123.0)
+ self.assertNotEqual(param, wrong_type)
+ wrong_val = self._make_one(name='foo', type_='INT64', value=234)
+ self.assertNotEqual(param, wrong_val)
+
+ def test_positional(self):
+ klass = self._get_target_class()
+ param = klass.positional(type_='INT64', value=123)
+ self.assertEqual(param.name, None)
+ self.assertEqual(param.type_, 'INT64')
+ self.assertEqual(param.value, 123)
+
+ def test_from_api_repr_w_name(self):
+ RESOURCE = {
+ 'name': 'foo',
+ 'parameterType': {
+ 'type': 'INT64',
+ },
+ 'parameterValue': {
+ 'value': 123,
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.from_api_repr(RESOURCE)
+ self.assertEqual(param.name, 'foo')
+ self.assertEqual(param.type_, 'INT64')
+ self.assertEqual(param.value, 123)
+
+ def test_from_api_repr_wo_name(self):
+ RESOURCE = {
+ 'parameterType': {
+ 'type': 'INT64',
+ },
+ 'parameterValue': {
+ 'value': '123',
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.from_api_repr(RESOURCE)
+ self.assertEqual(param.name, None)
+ self.assertEqual(param.type_, 'INT64')
+ self.assertEqual(param.value, 123)
+
+ def test_to_api_repr_w_name(self):
+ EXPECTED = {
+ 'name': 'foo',
+ 'parameterType': {
+ 'type': 'INT64',
+ },
+ 'parameterValue': {
+ 'value': '123',
+ },
+ }
+ param = self._make_one(name='foo', type_='INT64', value=123)
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_wo_name(self):
+ EXPECTED = {
+ 'parameterType': {
+ 'type': 'INT64',
+ },
+ 'parameterValue': {
+ 'value': '123',
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.positional(type_='INT64', value=123)
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_w_float(self):
+ EXPECTED = {
+ 'parameterType': {
+ 'type': 'FLOAT64',
+ },
+ 'parameterValue': {
+ 'value': 12.345,
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.positional(type_='FLOAT64', value=12.345)
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_w_bool(self):
+ EXPECTED = {
+ 'parameterType': {
+ 'type': 'BOOL',
+ },
+ 'parameterValue': {
+ 'value': 'false',
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.positional(type_='BOOL', value=False)
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_w_timestamp_datetime(self):
+ from google.cloud._helpers import UTC
+
+ STAMP = '2016-12-20 15:58:27.339328+00:00'
+ when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC)
+ EXPECTED = {
+ 'parameterType': {
+ 'type': 'TIMESTAMP',
+ },
+ 'parameterValue': {
+ 'value': STAMP,
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.positional(type_='TIMESTAMP', value=when)
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_w_timestamp_micros(self):
+ from google.cloud._helpers import _microseconds_from_datetime
+
+ now = datetime.datetime.utcnow()
+ seconds = _microseconds_from_datetime(now) / 1.0e6
+ EXPECTED = {
+ 'parameterType': {
+ 'type': 'TIMESTAMP',
+ },
+ 'parameterValue': {
+ 'value': seconds,
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.positional(type_='TIMESTAMP', value=seconds)
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_w_datetime_datetime(self):
+ from google.cloud._helpers import _datetime_to_rfc3339
+
+ now = datetime.datetime.utcnow()
+ EXPECTED = {
+ 'parameterType': {
+ 'type': 'DATETIME',
+ },
+ 'parameterValue': {
+ 'value': _datetime_to_rfc3339(now)[:-1], # strip trailing 'Z'
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.positional(type_='DATETIME', value=now)
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_w_datetime_string(self):
+ from google.cloud._helpers import _datetime_to_rfc3339
+
+ now = datetime.datetime.utcnow()
+ now_str = _datetime_to_rfc3339(now)
+ EXPECTED = {
+ 'parameterType': {
+ 'type': 'DATETIME',
+ },
+ 'parameterValue': {
+ 'value': now_str,
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.positional(type_='DATETIME', value=now_str)
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_w_date_date(self):
+ today = datetime.date.today()
+ EXPECTED = {
+ 'parameterType': {
+ 'type': 'DATE',
+ },
+ 'parameterValue': {
+ 'value': today.isoformat(),
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.positional(type_='DATE', value=today)
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_w_date_string(self):
+ today = datetime.date.today()
+ today_str = today.isoformat(),
+ EXPECTED = {
+ 'parameterType': {
+ 'type': 'DATE',
+ },
+ 'parameterValue': {
+ 'value': today_str,
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.positional(type_='DATE', value=today_str)
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_w_unknown_type(self):
+ EXPECTED = {
+ 'parameterType': {
+ 'type': 'UNKNOWN',
+ },
+ 'parameterValue': {
+ 'value': 'unknown',
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.positional(type_='UNKNOWN', value='unknown')
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test___eq___wrong_type(self):
+ field = self._make_one('test', 'STRING', 'value')
+ other = object()
+ self.assertNotEqual(field, other)
+ self.assertEqual(field, mock.ANY)
+
+ def test___eq___name_mismatch(self):
+ field = self._make_one('test', 'STRING', 'value')
+ other = self._make_one('other', 'STRING', 'value')
+ self.assertNotEqual(field, other)
+
+ def test___eq___field_type_mismatch(self):
+ field = self._make_one('test', 'STRING', None)
+ other = self._make_one('test', 'INT64', None)
+ self.assertNotEqual(field, other)
+
+ def test___eq___value_mismatch(self):
+ field = self._make_one('test', 'STRING', 'hello')
+ other = self._make_one('test', 'STRING', 'world')
+ self.assertNotEqual(field, other)
+
+ def test___eq___hit(self):
+ field = self._make_one('test', 'STRING', 'gotcha')
+ other = self._make_one('test', 'STRING', 'gotcha')
+ self.assertEqual(field, other)
+
+ def test___ne___wrong_type(self):
+ field = self._make_one('toast', 'INT64', 13)
+ other = object()
+ self.assertNotEqual(field, other)
+ self.assertEqual(field, mock.ANY)
+
+ def test___ne___same_value(self):
+ field1 = self._make_one('test', 'INT64', 12)
+ field2 = self._make_one('test', 'INT64', 12)
+ # unittest ``assertEqual`` uses ``==`` not ``!=``.
+ comparison_val = (field1 != field2)
+ self.assertFalse(comparison_val)
+
+ def test___ne___different_values(self):
+ field1 = self._make_one('test', 'INT64', 11)
+ field2 = self._make_one('test', 'INT64', 12)
+ self.assertNotEqual(field1, field2)
+
+ def test___repr__(self):
+ field1 = self._make_one('field1', 'STRING', 'value')
+ expected = "ScalarQueryParameter('field1', 'STRING', 'value')"
+ self.assertEqual(repr(field1), expected)
+
+
+def _make_subparam(name, type_, value):
+ from google.cloud.bigquery.query import ScalarQueryParameter
+
+ return ScalarQueryParameter(name, type_, value)
+
+
+class Test_ArrayQueryParameter(unittest.TestCase):
+
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.query import ArrayQueryParameter
+
+ return ArrayQueryParameter
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def test_ctor(self):
+ param = self._make_one(name='foo', array_type='INT64', values=[1, 2])
+ self.assertEqual(param.name, 'foo')
+ self.assertEqual(param.array_type, 'INT64')
+ self.assertEqual(param.values, [1, 2])
+
+ def test___eq__(self):
+ param = self._make_one(name='foo', array_type='INT64', values=[123])
+ self.assertEqual(param, param)
+ self.assertNotEqual(param, object())
+ alias = self._make_one(name='bar', array_type='INT64', values=[123])
+ self.assertNotEqual(param, alias)
+ wrong_type = self._make_one(
+ name='foo', array_type='FLOAT64', values=[123.0])
+ self.assertNotEqual(param, wrong_type)
+ wrong_val = self._make_one(
+ name='foo', array_type='INT64', values=[234])
+ self.assertNotEqual(param, wrong_val)
+
+ def test_positional(self):
+ klass = self._get_target_class()
+ param = klass.positional(array_type='INT64', values=[1, 2])
+ self.assertEqual(param.name, None)
+ self.assertEqual(param.array_type, 'INT64')
+ self.assertEqual(param.values, [1, 2])
+
+ def test_from_api_repr_w_name(self):
+ RESOURCE = {
+ 'name': 'foo',
+ 'parameterType': {
+ 'type': 'ARRAY',
+ 'arrayType': {
+ 'type': 'INT64',
+ },
+ },
+ 'parameterValue': {
+ 'arrayValues': [
+ {
+ 'value': '1',
+ },
+ {
+ 'value': '2'
+ },
+ ],
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.from_api_repr(RESOURCE)
+ self.assertEqual(param.name, 'foo')
+ self.assertEqual(param.array_type, 'INT64')
+ self.assertEqual(param.values, [1, 2])
+
+ def test_from_api_repr_wo_name(self):
+ RESOURCE = {
+ 'parameterType': {
+ 'type': 'ARRAY',
+ 'arrayType': {
+ 'type': 'INT64',
+ },
+ },
+ 'parameterValue': {
+ 'arrayValues': [
+ {
+ 'value': '1',
+ },
+ {
+ 'value': '2'
+ },
+ ],
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.from_api_repr(RESOURCE)
+ self.assertEqual(param.name, None)
+ self.assertEqual(param.array_type, 'INT64')
+ self.assertEqual(param.values, [1, 2])
+
+ def test_from_api_repr_w_struct_type(self):
+ from google.cloud.bigquery.query import StructQueryParameter
+
+ RESOURCE = {
+ 'parameterType': {
+ 'type': 'ARRAY',
+ 'arrayType': {
+ 'type': 'STRUCT',
+ 'structTypes': [
+ {
+ 'name': 'name',
+ 'type': {'type': 'STRING'},
+ },
+ {
+ 'name': 'age',
+ 'type': {'type': 'INT64'},
+ },
+ ],
+ },
+ },
+ 'parameterValue': {
+ 'arrayValues': [
+ {
+ 'structValues': {
+ 'name': {'value': 'Phred Phlyntstone'},
+ 'age': {'value': '32'},
+ },
+ },
+ {
+ 'structValues': {
+ 'name': {
+ 'value': 'Bharney Rhubbyl',
+ },
+ 'age': {'value': '31'},
+ },
+ },
+ ],
+ },
+ }
+
+ klass = self._get_target_class()
+ param = klass.from_api_repr(RESOURCE)
+
+ phred = StructQueryParameter.positional(
+ _make_subparam('name', 'STRING', 'Phred Phlyntstone'),
+ _make_subparam('age', 'INT64', 32))
+ bharney = StructQueryParameter.positional(
+ _make_subparam('name', 'STRING', 'Bharney Rhubbyl'),
+ _make_subparam('age', 'INT64', 31))
+ self.assertEqual(param.array_type, 'STRUCT')
+ self.assertEqual(param.values, [phred, bharney])
+
+ def test_to_api_repr_w_name(self):
+ EXPECTED = {
+ 'name': 'foo',
+ 'parameterType': {
+ 'type': 'ARRAY',
+ 'arrayType': {
+ 'type': 'INT64',
+ },
+ },
+ 'parameterValue': {
+ 'arrayValues': [
+ {
+ 'value': '1',
+ },
+ {
+ 'value': '2'
+ },
+ ],
+ },
+ }
+ param = self._make_one(name='foo', array_type='INT64', values=[1, 2])
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_wo_name(self):
+ EXPECTED = {
+ 'parameterType': {
+ 'type': 'ARRAY',
+ 'arrayType': {
+ 'type': 'INT64',
+ },
+ },
+ 'parameterValue': {
+ 'arrayValues': [
+ {
+ 'value': '1',
+ },
+ {
+ 'value': '2'
+ },
+ ],
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.positional(array_type='INT64', values=[1, 2])
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_w_unknown_type(self):
+ EXPECTED = {
+ 'parameterType': {
+ 'type': 'ARRAY',
+ 'arrayType': {
+ 'type': 'UNKNOWN',
+ },
+ },
+ 'parameterValue': {
+ 'arrayValues': [
+ {
+ 'value': 'unknown',
+ }
+ ],
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.positional(array_type='UNKNOWN', values=['unknown'])
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_w_record_type(self):
+ from google.cloud.bigquery.query import StructQueryParameter
+
+ EXPECTED = {
+ 'parameterType': {
+ 'type': 'ARRAY',
+ 'arrayType': {
+ 'type': 'STRUCT',
+ 'structTypes': [
+ {'name': 'foo', 'type': {'type': 'STRING'}},
+ {'name': 'bar', 'type': {'type': 'INT64'}},
+ ],
+ },
+ },
+ 'parameterValue': {
+ 'arrayValues': [{
+ 'structValues': {
+ 'foo': {'value': 'Foo'},
+ 'bar': {'value': '123'},
+ }
+ }]
+ },
+ }
+ one = _make_subparam('foo', 'STRING', 'Foo')
+ another = _make_subparam('bar', 'INT64', 123)
+ struct = StructQueryParameter.positional(one, another)
+ klass = self._get_target_class()
+ param = klass.positional(array_type='RECORD', values=[struct])
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test___eq___wrong_type(self):
+ field = self._make_one('test', 'STRING', ['value'])
+ other = object()
+ self.assertNotEqual(field, other)
+ self.assertEqual(field, mock.ANY)
+
+ def test___eq___name_mismatch(self):
+ field = self._make_one('field', 'STRING', ['value'])
+ other = self._make_one('other', 'STRING', ['value'])
+ self.assertNotEqual(field, other)
+
+ def test___eq___field_type_mismatch(self):
+ field = self._make_one('test', 'STRING', [])
+ other = self._make_one('test', 'INT64', [])
+ self.assertNotEqual(field, other)
+
+ def test___eq___value_mismatch(self):
+ field = self._make_one('test', 'STRING', ['hello'])
+ other = self._make_one('test', 'STRING', ['hello', 'world'])
+ self.assertNotEqual(field, other)
+
+ def test___eq___hit(self):
+ field = self._make_one('test', 'STRING', ['gotcha'])
+ other = self._make_one('test', 'STRING', ['gotcha'])
+ self.assertEqual(field, other)
+
+ def test___ne___wrong_type(self):
+ field = self._make_one('toast', 'INT64', [13])
+ other = object()
+ self.assertNotEqual(field, other)
+ self.assertEqual(field, mock.ANY)
+
+ def test___ne___same_value(self):
+ field1 = self._make_one('test', 'INT64', [12])
+ field2 = self._make_one('test', 'INT64', [12])
+ # unittest ``assertEqual`` uses ``==`` not ``!=``.
+ comparison_val = (field1 != field2)
+ self.assertFalse(comparison_val)
+
+ def test___ne___different_values(self):
+ field1 = self._make_one('test', 'INT64', [11])
+ field2 = self._make_one('test', 'INT64', [12])
+ self.assertNotEqual(field1, field2)
+
+ def test___repr__(self):
+ field1 = self._make_one('field1', 'STRING', ['value'])
+ expected = "ArrayQueryParameter('field1', 'STRING', ['value'])"
+ self.assertEqual(repr(field1), expected)
+
+
+class Test_StructQueryParameter(unittest.TestCase):
+
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.query import StructQueryParameter
+
+ return StructQueryParameter
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def test_ctor(self):
+ sub_1 = _make_subparam('bar', 'INT64', 123)
+ sub_2 = _make_subparam('baz', 'STRING', 'abc')
+ param = self._make_one('foo', sub_1, sub_2)
+ self.assertEqual(param.name, 'foo')
+ self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'})
+ self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'})
+
+ def test___eq__(self):
+ sub_1 = _make_subparam('bar', 'INT64', 123)
+ sub_2 = _make_subparam('baz', 'STRING', 'abc')
+ sub_3 = _make_subparam('baz', 'STRING', 'def')
+ sub_1_float = _make_subparam('bar', 'FLOAT64', 123.0)
+ param = self._make_one('foo', sub_1, sub_2)
+ self.assertEqual(param, param)
+ self.assertNotEqual(param, object())
+ alias = self._make_one('bar', sub_1, sub_2)
+ self.assertNotEqual(param, alias)
+ wrong_type = self._make_one('foo', sub_1_float, sub_2)
+ self.assertNotEqual(param, wrong_type)
+ wrong_val = self._make_one('foo', sub_2, sub_3)
+ self.assertNotEqual(param, wrong_val)
+
+ def test_positional(self):
+ sub_1 = _make_subparam('bar', 'INT64', 123)
+ sub_2 = _make_subparam('baz', 'STRING', 'abc')
+ klass = self._get_target_class()
+ param = klass.positional(sub_1, sub_2)
+ self.assertEqual(param.name, None)
+ self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'})
+ self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'})
+
+ def test_from_api_repr_w_name(self):
+ RESOURCE = {
+ 'name': 'foo',
+ 'parameterType': {
+ 'type': 'STRUCT',
+ 'structTypes': [
+ {'name': 'bar', 'type': {'type': 'INT64'}},
+ {'name': 'baz', 'type': {'type': 'STRING'}},
+ ],
+ },
+ 'parameterValue': {
+ 'structValues': {
+ 'bar': {'value': 123},
+ 'baz': {'value': 'abc'},
+ },
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.from_api_repr(RESOURCE)
+ self.assertEqual(param.name, 'foo')
+ self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'})
+ self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'})
+
+ def test_from_api_repr_wo_name(self):
+ RESOURCE = {
+ 'parameterType': {
+ 'type': 'STRUCT',
+ 'structTypes': [
+ {'name': 'bar', 'type': {'type': 'INT64'}},
+ {'name': 'baz', 'type': {'type': 'STRING'}},
+ ],
+ },
+ 'parameterValue': {
+ 'structValues': {
+ 'bar': {'value': 123},
+ 'baz': {'value': 'abc'},
+ },
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.from_api_repr(RESOURCE)
+ self.assertEqual(param.name, None)
+ self.assertEqual(param.struct_types, {'bar': 'INT64', 'baz': 'STRING'})
+ self.assertEqual(param.struct_values, {'bar': 123, 'baz': 'abc'})
+
+ def test_from_api_repr_w_nested_array(self):
+ from google.cloud.bigquery.query import ArrayQueryParameter
+
+ RESOURCE = {
+ 'name': 'foo',
+ 'parameterType': {
+ 'type': 'STRUCT',
+ 'structTypes': [
+ {'name': 'bar', 'type': {'type': 'STRING'}},
+ {'name': 'baz', 'type': {
+ 'type': 'ARRAY',
+ 'arrayType': {'type': 'INT64'},
+ }},
+ ],
+ },
+ 'parameterValue': {
+ 'structValues': {
+ 'bar': {'value': 'abc'},
+ 'baz': {'arrayValues': [
+ {'value': '123'},
+ {'value': '456'},
+ ]},
+ },
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.from_api_repr(RESOURCE)
+ self.assertEqual(
+ param,
+ self._make_one(
+ 'foo',
+ _make_subparam('bar', 'STRING', 'abc'),
+ ArrayQueryParameter('baz', 'INT64', [123, 456])))
+
+ def test_from_api_repr_w_nested_struct(self):
+ RESOURCE = {
+ 'name': 'foo',
+ 'parameterType': {
+ 'type': 'STRUCT',
+ 'structTypes': [
+ {'name': 'bar', 'type': {'type': 'STRING'}},
+ {'name': 'baz', 'type': {
+ 'type': 'STRUCT',
+ 'structTypes': [
+ {'name': 'qux', 'type': {'type': 'INT64'}},
+ {'name': 'spam', 'type': {'type': 'BOOL'}},
+ ],
+ }},
+ ],
+ },
+ 'parameterValue': {
+ 'structValues': {
+ 'bar': {'value': 'abc'},
+ 'baz': {'structValues': {
+ 'qux': {'value': '123'},
+ 'spam': {'value': 'true'},
+ }},
+ },
+ },
+ }
+
+ klass = self._get_target_class()
+ param = klass.from_api_repr(RESOURCE)
+
+ expected = self._make_one(
+ 'foo',
+ _make_subparam('bar', 'STRING', 'abc'),
+ self._make_one(
+ 'baz',
+ _make_subparam('qux', 'INT64', 123),
+ _make_subparam('spam', 'BOOL', True)))
+ self.assertEqual(param.name, 'foo')
+ self.assertEqual(param.struct_types, expected.struct_types)
+ self.assertEqual(param.struct_values, expected.struct_values)
+
+ def test_to_api_repr_w_name(self):
+ EXPECTED = {
+ 'name': 'foo',
+ 'parameterType': {
+ 'type': 'STRUCT',
+ 'structTypes': [
+ {'name': 'bar', 'type': {'type': 'INT64'}},
+ {'name': 'baz', 'type': {'type': 'STRING'}},
+ ],
+ },
+ 'parameterValue': {
+ 'structValues': {
+ 'bar': {'value': '123'},
+ 'baz': {'value': 'abc'},
+ },
+ },
+ }
+ sub_1 = _make_subparam('bar', 'INT64', 123)
+ sub_2 = _make_subparam('baz', 'STRING', 'abc')
+ param = self._make_one('foo', sub_1, sub_2)
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_wo_name(self):
+ EXPECTED = {
+ 'parameterType': {
+ 'type': 'STRUCT',
+ 'structTypes': [
+ {'name': 'bar', 'type': {'type': 'INT64'}},
+ {'name': 'baz', 'type': {'type': 'STRING'}},
+ ],
+ },
+ 'parameterValue': {
+ 'structValues': {
+ 'bar': {'value': '123'},
+ 'baz': {'value': 'abc'},
+ },
+ },
+ }
+ sub_1 = _make_subparam('bar', 'INT64', 123)
+ sub_2 = _make_subparam('baz', 'STRING', 'abc')
+ klass = self._get_target_class()
+ param = klass.positional(sub_1, sub_2)
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_w_nested_array(self):
+ from google.cloud.bigquery.query import ArrayQueryParameter
+
+ EXPECTED = {
+ 'name': 'foo',
+ 'parameterType': {
+ 'type': 'STRUCT',
+ 'structTypes': [
+ {'name': 'bar', 'type': {'type': 'STRING'}},
+ {'name': 'baz', 'type': {
+ 'type': 'ARRAY',
+ 'arrayType': {'type': 'INT64'},
+ }},
+ ],
+ },
+ 'parameterValue': {
+ 'structValues': {
+ 'bar': {'value': 'abc'},
+ 'baz': {'arrayValues': [
+ {'value': '123'},
+ {'value': '456'},
+ ]},
+ },
+ },
+ }
+ scalar = _make_subparam('bar', 'STRING', 'abc')
+ array = ArrayQueryParameter('baz', 'INT64', [123, 456])
+ param = self._make_one('foo', scalar, array)
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_w_nested_struct(self):
+ EXPECTED = {
+ 'name': 'foo',
+ 'parameterType': {
+ 'type': 'STRUCT',
+ 'structTypes': [
+ {'name': 'bar', 'type': {'type': 'STRING'}},
+ {'name': 'baz', 'type': {
+ 'type': 'STRUCT',
+ 'structTypes': [
+ {'name': 'qux', 'type': {'type': 'INT64'}},
+ {'name': 'spam', 'type': {'type': 'BOOL'}},
+ ],
+ }},
+ ],
+ },
+ 'parameterValue': {
+ 'structValues': {
+ 'bar': {'value': 'abc'},
+ 'baz': {'structValues': {
+ 'qux': {'value': '123'},
+ 'spam': {'value': 'true'},
+ }},
+ },
+ },
+ }
+ scalar_1 = _make_subparam('bar', 'STRING', 'abc')
+ scalar_2 = _make_subparam('qux', 'INT64', 123)
+ scalar_3 = _make_subparam('spam', 'BOOL', True)
+ sub = self._make_one('baz', scalar_2, scalar_3)
+ param = self._make_one('foo', scalar_1, sub)
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test___eq___wrong_type(self):
+ field = self._make_one(
+ 'test', _make_subparam('bar', 'STRING', 'abc'))
+ other = object()
+ self.assertNotEqual(field, other)
+ self.assertEqual(field, mock.ANY)
+
+ def test___eq___name_mismatch(self):
+ field = self._make_one(
+ 'test', _make_subparam('bar', 'STRING', 'abc'))
+ other = self._make_one(
+ 'other ', _make_subparam('bar', 'STRING', 'abc'))
+ self.assertNotEqual(field, other)
+
+ def test___eq___field_type_mismatch(self):
+ field = self._make_one(
+ 'test', _make_subparam('bar', 'STRING', None))
+ other = self._make_one(
+ 'test', _make_subparam('bar', 'INT64', None))
+ self.assertNotEqual(field, other)
+
+ def test___eq___value_mismatch(self):
+ field = self._make_one(
+ 'test', _make_subparam('bar', 'STRING', 'hello'))
+ other = self._make_one(
+ 'test', _make_subparam('bar', 'STRING', 'world'))
+ self.assertNotEqual(field, other)
+
+ def test___eq___hit(self):
+ field = self._make_one(
+ 'test', _make_subparam('bar', 'STRING', 'gotcha'))
+ other = self._make_one(
+ 'test', _make_subparam('bar', 'STRING', 'gotcha'))
+ self.assertEqual(field, other)
+
+ def test___ne___wrong_type(self):
+ field = self._make_one(
+ 'test', _make_subparam('bar', 'STRING', 'hello'))
+ other = object()
+ self.assertNotEqual(field, other)
+ self.assertEqual(field, mock.ANY)
+
+ def test___ne___same_value(self):
+ field1 = self._make_one(
+ 'test', _make_subparam('bar', 'STRING', 'hello'))
+ field2 = self._make_one(
+ 'test', _make_subparam('bar', 'STRING', 'hello'))
+ # unittest ``assertEqual`` uses ``==`` not ``!=``.
+ comparison_val = (field1 != field2)
+ self.assertFalse(comparison_val)
+
+ def test___ne___different_values(self):
+ field1 = self._make_one(
+ 'test', _make_subparam('bar', 'STRING', 'hello'))
+ field2 = self._make_one(
+ 'test', _make_subparam('bar', 'STRING', 'world'))
+ self.assertNotEqual(field1, field2)
+
+ def test___repr__(self):
+ field1 = self._make_one(
+ 'test', _make_subparam('field1', 'STRING', 'hello'))
+ got = repr(field1)
+ self.assertIn('StructQueryParameter', got)
+ self.assertIn("'field1', 'STRING'", got)
+ self.assertIn("'field1': 'hello'", got)
+
+
+class TestQueryResults(unittest.TestCase):
+ PROJECT = 'project'
+ JOB_ID = 'test-synchronous-query'
+ TOKEN = 'TOKEN'
+
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.query import QueryResults
+
+ return QueryResults
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def _makeResource(self):
+ return {
+ 'jobReference': {
+ 'projectId': self.PROJECT,
+ 'jobId': self.JOB_ID,
+ },
+ }
+
+ def _verifySchema(self, query, resource):
+ from google.cloud.bigquery.schema import SchemaField
+
+ if 'schema' in resource:
+ fields = resource['schema']['fields']
+ self.assertEqual(len(query.schema), len(fields))
+ for found, expected in zip(query.schema, fields):
+ self.assertIsInstance(found, SchemaField)
+ self.assertEqual(found.name, expected['name'])
+ self.assertEqual(found.field_type, expected['type'])
+ self.assertEqual(found.mode, expected['mode'])
+ self.assertEqual(found.description,
+ expected.get('description'))
+ self.assertEqual(found.fields, expected.get('fields', ()))
+ else:
+ self.assertEqual(query.schema, ())
+
+ def test_ctor_defaults(self):
+ query = self._make_one(self._makeResource())
+ self.assertIsNone(query.cache_hit)
+ self.assertIsNone(query.complete)
+ self.assertIsNone(query.errors)
+ self.assertIsNone(query.page_token)
+ self.assertEqual(query.project, self.PROJECT)
+ self.assertEqual(query.rows, [])
+ self.assertEqual(query.schema, ())
+ self.assertIsNone(query.total_rows)
+ self.assertIsNone(query.total_bytes_processed)
+
+ def test_cache_hit_missing(self):
+ query = self._make_one(self._makeResource())
+ self.assertIsNone(query.cache_hit)
+
+ def test_cache_hit_present(self):
+ resource = self._makeResource()
+ resource['cacheHit'] = True
+ query = self._make_one(resource)
+ self.assertTrue(query.cache_hit)
+
+ def test_complete_missing(self):
+ query = self._make_one(self._makeResource())
+ self.assertIsNone(query.complete)
+
+ def test_complete_present(self):
+ resource = self._makeResource()
+ resource['jobComplete'] = True
+ query = self._make_one(resource)
+ self.assertTrue(query.complete)
+
+ def test_errors_missing(self):
+ query = self._make_one(self._makeResource())
+ self.assertIsNone(query.errors)
+
+ def test_errors_present(self):
+ ERRORS = [
+ {'reason': 'testing'},
+ ]
+ resource = self._makeResource()
+ resource['errors'] = ERRORS
+ query = self._make_one(resource)
+ self.assertEqual(query.errors, ERRORS)
+
+ def test_job_id_missing(self):
+ with self.assertRaises(ValueError):
+ self._make_one({})
+
+ def test_job_id_broken_job_reference(self):
+ resource = {'jobReference': {'bogus': 'BOGUS'}}
+ with self.assertRaises(ValueError):
+ self._make_one(resource)
+
+ def test_job_id_present(self):
+ resource = self._makeResource()
+ resource['jobReference']['jobId'] = 'custom-job'
+ query = self._make_one(resource)
+ self.assertEqual(query.job_id, 'custom-job')
+
+ def test_page_token_missing(self):
+ query = self._make_one(self._makeResource())
+ self.assertIsNone(query.page_token)
+
+ def test_page_token_present(self):
+ resource = self._makeResource()
+ resource['pageToken'] = 'TOKEN'
+ query = self._make_one(resource)
+ self.assertEqual(query.page_token, 'TOKEN')
+
+ def test_total_rows_present_integer(self):
+ resource = self._makeResource()
+ resource['totalRows'] = 42
+ query = self._make_one(resource)
+ self.assertEqual(query.total_rows, 42)
+
+ def test_total_rows_present_string(self):
+ resource = self._makeResource()
+ resource['totalRows'] = '42'
+ query = self._make_one(resource)
+ self.assertEqual(query.total_rows, 42)
+
+ def test_total_bytes_processed_missing(self):
+ query = self._make_one(self._makeResource())
+ self.assertIsNone(query.total_bytes_processed)
+
+ def test_total_bytes_processed_present_integer(self):
+ resource = self._makeResource()
+ resource['totalBytesProcessed'] = 123456
+ query = self._make_one(resource)
+ self.assertEqual(query.total_bytes_processed, 123456)
+
+ def test_total_bytes_processed_present_string(self):
+ resource = self._makeResource()
+ resource['totalBytesProcessed'] = '123456'
+ query = self._make_one(resource)
+ self.assertEqual(query.total_bytes_processed, 123456)
+
+ def test_num_dml_affected_rows_missing(self):
+ query = self._make_one(self._makeResource())
+ self.assertIsNone(query.num_dml_affected_rows)
+
+ def test_num_dml_affected_rows_present_integer(self):
+ resource = self._makeResource()
+ resource['numDmlAffectedRows'] = 123456
+ query = self._make_one(resource)
+ self.assertEqual(query.num_dml_affected_rows, 123456)
+
+ def test_num_dml_affected_rows_present_string(self):
+ resource = self._makeResource()
+ resource['numDmlAffectedRows'] = '123456'
+ query = self._make_one(resource)
+ self.assertEqual(query.num_dml_affected_rows, 123456)
+
+ def test_schema(self):
+ query = self._make_one(self._makeResource())
+ self._verifySchema(query, self._makeResource())
+ resource = self._makeResource()
+ resource['schema'] = {
+ 'fields': [
+ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQURED'},
+ {'name': 'age', 'type': 'INTEGER', 'mode': 'REQURED'},
+ ],
+ }
+ query._set_properties(resource)
+ self._verifySchema(query, resource)
+
+
+class Test__query_param_from_api_repr(unittest.TestCase):
+
+ @staticmethod
+ def _call_fut(resource):
+ from google.cloud.bigquery.query import _query_param_from_api_repr
+
+ return _query_param_from_api_repr(resource)
+
+ def test_w_scalar(self):
+ from google.cloud.bigquery.query import ScalarQueryParameter
+
+ RESOURCE = {
+ 'name': 'foo',
+ 'parameterType': {'type': 'INT64'},
+ 'parameterValue': {'value': '123'},
+ }
+
+ parameter = self._call_fut(RESOURCE)
+
+ self.assertIsInstance(parameter, ScalarQueryParameter)
+ self.assertEqual(parameter.name, 'foo')
+ self.assertEqual(parameter.type_, 'INT64')
+ self.assertEqual(parameter.value, 123)
+
+ def test_w_scalar_timestamp(self):
+ from google.cloud._helpers import UTC
+ from google.cloud.bigquery.query import ScalarQueryParameter
+
+ RESOURCE = {
+ 'name': 'zoned',
+ 'parameterType': {'type': 'TIMESTAMP'},
+ 'parameterValue': {'value': '2012-03-04 05:06:07+00:00'},
+ }
+
+ parameter = self._call_fut(RESOURCE)
+
+ self.assertIsInstance(parameter, ScalarQueryParameter)
+ self.assertEqual(parameter.name, 'zoned')
+ self.assertEqual(parameter.type_, 'TIMESTAMP')
+ self.assertEqual(
+ parameter.value,
+ datetime.datetime(2012, 3, 4, 5, 6, 7, tzinfo=UTC))
+
+ def test_w_scalar_timestamp_micros(self):
+ from google.cloud._helpers import UTC
+ from google.cloud.bigquery.query import ScalarQueryParameter
+
+ RESOURCE = {
+ 'name': 'zoned',
+ 'parameterType': {'type': 'TIMESTAMP'},
+ 'parameterValue': {'value': '2012-03-04 05:06:07.250000+00:00'},
+ }
+
+ parameter = self._call_fut(RESOURCE)
+
+ self.assertIsInstance(parameter, ScalarQueryParameter)
+ self.assertEqual(parameter.name, 'zoned')
+ self.assertEqual(parameter.type_, 'TIMESTAMP')
+ self.assertEqual(
+ parameter.value,
+ datetime.datetime(2012, 3, 4, 5, 6, 7, 250000, tzinfo=UTC))
+
+ def test_w_array(self):
+ from google.cloud.bigquery.query import ArrayQueryParameter
+
+ RESOURCE = {
+ 'name': 'foo',
+ 'parameterType': {
+ 'type': 'ARRAY',
+ 'arrayType': {'type': 'INT64'},
+ },
+ 'parameterValue': {
+ 'arrayValues': [
+ {'value': '123'},
+ ]},
+ }
+
+ parameter = self._call_fut(RESOURCE)
+
+ self.assertIsInstance(parameter, ArrayQueryParameter)
+ self.assertEqual(parameter.name, 'foo')
+ self.assertEqual(parameter.array_type, 'INT64')
+ self.assertEqual(parameter.values, [123])
+
+ def test_w_struct(self):
+ from google.cloud.bigquery.query import StructQueryParameter
+
+ RESOURCE = {
+ 'name': 'foo',
+ 'parameterType': {
+ 'type': 'STRUCT',
+ 'structTypes': [
+ {'name': 'foo', 'type': {'type': 'STRING'}},
+ {'name': 'bar', 'type': {'type': 'INT64'}},
+ ],
+ },
+ 'parameterValue': {
+ 'structValues': {
+ 'foo': {'value': 'Foo'},
+ 'bar': {'value': '123'},
+ }
+ },
+ }
+
+ parameter = self._call_fut(RESOURCE)
+
+ self.assertIsInstance(parameter, StructQueryParameter)
+ self.assertEqual(parameter.name, 'foo')
+ self.assertEqual(
+ parameter.struct_types, {'foo': 'STRING', 'bar': 'INT64'})
+ self.assertEqual(parameter.struct_values, {'foo': 'Foo', 'bar': 123})
diff --git a/bigquery/tests/unit/test_schema.py b/bigquery/tests/unit/test_schema.py
new file mode 100644
index 0000000..84e5d30
--- /dev/null
+++ b/bigquery/tests/unit/test_schema.py
@@ -0,0 +1,367 @@
+# Copyright 2015 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import mock
+
+
+class TestSchemaField(unittest.TestCase):
+
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.schema import SchemaField
+
+ return SchemaField
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def test_constructor_defaults(self):
+ field = self._make_one('test', 'STRING')
+ self.assertEqual(field._name, 'test')
+ self.assertEqual(field._field_type, 'STRING')
+ self.assertEqual(field._mode, 'NULLABLE')
+ self.assertIsNone(field._description)
+ self.assertEqual(field._fields, ())
+
+ def test_constructor_explicit(self):
+ field = self._make_one('test', 'STRING', mode='REQUIRED',
+ description='Testing')
+ self.assertEqual(field._name, 'test')
+ self.assertEqual(field._field_type, 'STRING')
+ self.assertEqual(field._mode, 'REQUIRED')
+ self.assertEqual(field._description, 'Testing')
+ self.assertEqual(field._fields, ())
+
+ def test_constructor_subfields(self):
+ sub_field1 = self._make_one('area_code', 'STRING')
+ sub_field2 = self._make_one('local_number', 'STRING')
+ field = self._make_one(
+ 'phone_number',
+ 'RECORD',
+ fields=[sub_field1, sub_field2],
+ )
+ self.assertEqual(field._name, 'phone_number')
+ self.assertEqual(field._field_type, 'RECORD')
+ self.assertEqual(field._mode, 'NULLABLE')
+ self.assertIsNone(field._description)
+ self.assertEqual(len(field._fields), 2)
+ self.assertIs(field._fields[0], sub_field1)
+ self.assertIs(field._fields[1], sub_field2)
+
+ def test_to_api_repr(self):
+ field = self._make_one('foo', 'INTEGER', 'NULLABLE')
+ self.assertEqual(field.to_api_repr(), {
+ 'mode': 'nullable',
+ 'name': 'foo',
+ 'type': 'integer',
+ })
+
+ def test_to_api_repr_with_subfield(self):
+ subfield = self._make_one('bar', 'INTEGER', 'NULLABLE')
+ field = self._make_one('foo', 'RECORD', 'REQUIRED', fields=(subfield,))
+ self.assertEqual(field.to_api_repr(), {
+ 'fields': [{
+ 'mode': 'nullable',
+ 'name': 'bar',
+ 'type': 'integer',
+ }],
+ 'mode': 'required',
+ 'name': 'foo',
+ 'type': 'record',
+ })
+
+ def test_from_api_repr(self):
+ field = self._get_target_class().from_api_repr({
+ 'fields': [{
+ 'mode': 'nullable',
+ 'name': 'bar',
+ 'type': 'integer',
+ }],
+ 'mode': 'required',
+ 'name': 'foo',
+ 'type': 'record',
+ })
+ self.assertEqual(field.name, 'foo')
+ self.assertEqual(field.field_type, 'RECORD')
+ self.assertEqual(field.mode, 'REQUIRED')
+ self.assertEqual(len(field.fields), 1)
+ self.assertEqual(field.fields[0].name, 'bar')
+ self.assertEqual(field.fields[0].field_type, 'INTEGER')
+ self.assertEqual(field.fields[0].mode, 'NULLABLE')
+
+ def test_name_property(self):
+ name = 'lemon-ness'
+ schema_field = self._make_one(name, 'INTEGER')
+ self.assertIs(schema_field.name, name)
+
+ def test_field_type_property(self):
+ field_type = 'BOOLEAN'
+ schema_field = self._make_one('whether', field_type)
+ self.assertIs(schema_field.field_type, field_type)
+
+ def test_mode_property(self):
+ mode = 'REPEATED'
+ schema_field = self._make_one('again', 'FLOAT', mode=mode)
+ self.assertIs(schema_field.mode, mode)
+
+ def test_is_nullable(self):
+ mode = 'NULLABLE'
+ schema_field = self._make_one('test', 'FLOAT', mode=mode)
+ self.assertTrue(schema_field.is_nullable)
+
+ def test_is_not_nullable(self):
+ mode = 'REPEATED'
+ schema_field = self._make_one('test', 'FLOAT', mode=mode)
+ self.assertFalse(schema_field.is_nullable)
+
+ def test_description_property(self):
+ description = 'It holds some data.'
+ schema_field = self._make_one(
+ 'do', 'TIMESTAMP', description=description)
+ self.assertIs(schema_field.description, description)
+
+ def test_fields_property(self):
+ sub_field1 = self._make_one('one', 'STRING')
+ sub_field2 = self._make_one('fish', 'INTEGER')
+ fields = (sub_field1, sub_field2)
+ schema_field = self._make_one('boat', 'RECORD', fields=fields)
+ self.assertIs(schema_field.fields, fields)
+
+ def test___eq___wrong_type(self):
+ field = self._make_one('test', 'STRING')
+ other = object()
+ self.assertNotEqual(field, other)
+ self.assertEqual(field, mock.ANY)
+
+ def test___eq___name_mismatch(self):
+ field = self._make_one('test', 'STRING')
+ other = self._make_one('other', 'STRING')
+ self.assertNotEqual(field, other)
+
+ def test___eq___field_type_mismatch(self):
+ field = self._make_one('test', 'STRING')
+ other = self._make_one('test', 'INTEGER')
+ self.assertNotEqual(field, other)
+
+ def test___eq___mode_mismatch(self):
+ field = self._make_one('test', 'STRING', mode='REQUIRED')
+ other = self._make_one('test', 'STRING', mode='NULLABLE')
+ self.assertNotEqual(field, other)
+
+ def test___eq___description_mismatch(self):
+ field = self._make_one('test', 'STRING', description='Testing')
+ other = self._make_one('test', 'STRING', description='Other')
+ self.assertNotEqual(field, other)
+
+ def test___eq___fields_mismatch(self):
+ sub1 = self._make_one('sub1', 'STRING')
+ sub2 = self._make_one('sub2', 'STRING')
+ field = self._make_one('test', 'RECORD', fields=[sub1])
+ other = self._make_one('test', 'RECORD', fields=[sub2])
+ self.assertNotEqual(field, other)
+
+ def test___eq___hit(self):
+ field = self._make_one('test', 'STRING', mode='REQUIRED',
+ description='Testing')
+ other = self._make_one('test', 'STRING', mode='REQUIRED',
+ description='Testing')
+ self.assertEqual(field, other)
+
+ def test___eq___hit_case_diff_on_type(self):
+ field = self._make_one('test', 'STRING', mode='REQUIRED',
+ description='Testing')
+ other = self._make_one('test', 'string', mode='REQUIRED',
+ description='Testing')
+ self.assertEqual(field, other)
+
+ def test___eq___hit_w_fields(self):
+ sub1 = self._make_one('sub1', 'STRING')
+ sub2 = self._make_one('sub2', 'STRING')
+ field = self._make_one('test', 'RECORD', fields=[sub1, sub2])
+ other = self._make_one('test', 'RECORD', fields=[sub1, sub2])
+ self.assertEqual(field, other)
+
+ def test___ne___wrong_type(self):
+ field = self._make_one('toast', 'INTEGER')
+ other = object()
+ self.assertNotEqual(field, other)
+ self.assertEqual(field, mock.ANY)
+
+ def test___ne___same_value(self):
+ field1 = self._make_one('test', 'TIMESTAMP', mode='REPEATED')
+ field2 = self._make_one('test', 'TIMESTAMP', mode='REPEATED')
+ # unittest ``assertEqual`` uses ``==`` not ``!=``.
+ comparison_val = (field1 != field2)
+ self.assertFalse(comparison_val)
+
+ def test___ne___different_values(self):
+ field1 = self._make_one(
+ 'test1', 'FLOAT', mode='REPEATED', description='Not same')
+ field2 = self._make_one(
+ 'test2', 'FLOAT', mode='NULLABLE', description='Knot saym')
+ self.assertNotEqual(field1, field2)
+
+ def test___hash__set_equality(self):
+ sub1 = self._make_one('sub1', 'STRING')
+ sub2 = self._make_one('sub2', 'STRING')
+ field1 = self._make_one('test', 'RECORD', fields=[sub1])
+ field2 = self._make_one('test', 'RECORD', fields=[sub2])
+ set_one = {field1, field2}
+ set_two = {field1, field2}
+ self.assertEqual(set_one, set_two)
+
+ def test___hash__not_equals(self):
+ sub1 = self._make_one('sub1', 'STRING')
+ sub2 = self._make_one('sub2', 'STRING')
+ field1 = self._make_one('test', 'RECORD', fields=[sub1])
+ field2 = self._make_one('test', 'RECORD', fields=[sub2])
+ set_one = {field1}
+ set_two = {field2}
+ self.assertNotEqual(set_one, set_two)
+
+ def test___repr__(self):
+ field1 = self._make_one('field1', 'STRING')
+ expected = "SchemaField('field1', 'string', 'NULLABLE', None, ())"
+ self.assertEqual(repr(field1), expected)
+
+
+# TODO: dedup with the same class in test_table.py.
+class _SchemaBase(object):
+
+ def _verify_field(self, field, r_field):
+ self.assertEqual(field.name, r_field['name'])
+ self.assertEqual(field.field_type, r_field['type'])
+ self.assertEqual(field.mode, r_field.get('mode', 'NULLABLE'))
+
+ def _verifySchema(self, schema, resource):
+ r_fields = resource['schema']['fields']
+ self.assertEqual(len(schema), len(r_fields))
+
+ for field, r_field in zip(schema, r_fields):
+ self._verify_field(field, r_field)
+
+
+class Test_parse_schema_resource(unittest.TestCase, _SchemaBase):
+
+ def _call_fut(self, resource):
+ from google.cloud.bigquery.schema import _parse_schema_resource
+
+ return _parse_schema_resource(resource)
+
+ def _makeResource(self):
+ return {
+ 'schema': {'fields': [
+ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'},
+ {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'},
+ ]},
+ }
+
+ def test__parse_schema_resource_defaults(self):
+ RESOURCE = self._makeResource()
+ schema = self._call_fut(RESOURCE['schema'])
+ self._verifySchema(schema, RESOURCE)
+
+ def test__parse_schema_resource_subfields(self):
+ RESOURCE = self._makeResource()
+ RESOURCE['schema']['fields'].append(
+ {'name': 'phone',
+ 'type': 'RECORD',
+ 'mode': 'REPEATED',
+ 'fields': [{'name': 'type',
+ 'type': 'STRING',
+ 'mode': 'REQUIRED'},
+ {'name': 'number',
+ 'type': 'STRING',
+ 'mode': 'REQUIRED'}]})
+ schema = self._call_fut(RESOURCE['schema'])
+ self._verifySchema(schema, RESOURCE)
+
+ def test__parse_schema_resource_fields_without_mode(self):
+ RESOURCE = self._makeResource()
+ RESOURCE['schema']['fields'].append(
+ {'name': 'phone',
+ 'type': 'STRING'})
+
+ schema = self._call_fut(RESOURCE['schema'])
+ self._verifySchema(schema, RESOURCE)
+
+
+class Test_build_schema_resource(unittest.TestCase, _SchemaBase):
+
+ def _call_fut(self, resource):
+ from google.cloud.bigquery.schema import _build_schema_resource
+
+ return _build_schema_resource(resource)
+
+ def test_defaults(self):
+ from google.cloud.bigquery.schema import SchemaField
+
+ full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
+ age = SchemaField('age', 'INTEGER', mode='REQUIRED')
+ resource = self._call_fut([full_name, age])
+ self.assertEqual(len(resource), 2)
+ self.assertEqual(resource[0],
+ {'name': 'full_name',
+ 'type': 'STRING',
+ 'mode': 'REQUIRED'})
+ self.assertEqual(resource[1],
+ {'name': 'age',
+ 'type': 'INTEGER',
+ 'mode': 'REQUIRED'})
+
+ def test_w_description(self):
+ from google.cloud.bigquery.schema import SchemaField
+
+ DESCRIPTION = 'DESCRIPTION'
+ full_name = SchemaField('full_name', 'STRING', mode='REQUIRED',
+ description=DESCRIPTION)
+ age = SchemaField('age', 'INTEGER', mode='REQUIRED')
+ resource = self._call_fut([full_name, age])
+ self.assertEqual(len(resource), 2)
+ self.assertEqual(resource[0],
+ {'name': 'full_name',
+ 'type': 'STRING',
+ 'mode': 'REQUIRED',
+ 'description': DESCRIPTION})
+ self.assertEqual(resource[1],
+ {'name': 'age',
+ 'type': 'INTEGER',
+ 'mode': 'REQUIRED'})
+
+ def test_w_subfields(self):
+ from google.cloud.bigquery.schema import SchemaField
+
+ full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
+ ph_type = SchemaField('type', 'STRING', 'REQUIRED')
+ ph_num = SchemaField('number', 'STRING', 'REQUIRED')
+ phone = SchemaField('phone', 'RECORD', mode='REPEATED',
+ fields=[ph_type, ph_num])
+ resource = self._call_fut([full_name, phone])
+ self.assertEqual(len(resource), 2)
+ self.assertEqual(resource[0],
+ {'name': 'full_name',
+ 'type': 'STRING',
+ 'mode': 'REQUIRED'})
+ self.assertEqual(resource[1],
+ {'name': 'phone',
+ 'type': 'RECORD',
+ 'mode': 'REPEATED',
+ 'fields': [{'name': 'type',
+ 'type': 'STRING',
+ 'mode': 'REQUIRED'},
+ {'name': 'number',
+ 'type': 'STRING',
+ 'mode': 'REQUIRED'}]})
diff --git a/bigquery/tests/unit/test_table.py b/bigquery/tests/unit/test_table.py
new file mode 100644
index 0000000..a40ab16
--- /dev/null
+++ b/bigquery/tests/unit/test_table.py
@@ -0,0 +1,753 @@
+# Copyright 2015 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import mock
+
+from google.cloud.bigquery.dataset import DatasetReference
+
+
+class _SchemaBase(object):
+
+ def _verify_field(self, field, r_field):
+ self.assertEqual(field.name, r_field['name'])
+ self.assertEqual(field.field_type, r_field['type'])
+ self.assertEqual(field.mode, r_field.get('mode', 'NULLABLE'))
+
+ def _verifySchema(self, schema, resource):
+ r_fields = resource['schema']['fields']
+ self.assertEqual(len(schema), len(r_fields))
+
+ for field, r_field in zip(schema, r_fields):
+ self._verify_field(field, r_field)
+
+
+class TestTableReference(unittest.TestCase):
+
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.table import TableReference
+
+ return TableReference
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def test_ctor_defaults(self):
+ from google.cloud.bigquery.dataset import DatasetReference
+ dataset_ref = DatasetReference('project_1', 'dataset_1')
+
+ table_ref = self._make_one(dataset_ref, 'table_1')
+ self.assertEqual(table_ref.dataset_id, dataset_ref.dataset_id)
+ self.assertEqual(table_ref.table_id, 'table_1')
+
+ def test_to_api_repr(self):
+ from google.cloud.bigquery.dataset import DatasetReference
+ dataset_ref = DatasetReference('project_1', 'dataset_1')
+ table_ref = self._make_one(dataset_ref, 'table_1')
+
+ resource = table_ref.to_api_repr()
+
+ self.assertEqual(
+ resource,
+ {
+ 'projectId': 'project_1',
+ 'datasetId': 'dataset_1',
+ 'tableId': 'table_1',
+ })
+
+ def test_from_api_repr(self):
+ from google.cloud.bigquery.dataset import DatasetReference
+ from google.cloud.bigquery.table import TableReference
+ dataset_ref = DatasetReference('project_1', 'dataset_1')
+ expected = self._make_one(dataset_ref, 'table_1')
+
+ got = TableReference.from_api_repr(
+ {
+ 'projectId': 'project_1',
+ 'datasetId': 'dataset_1',
+ 'tableId': 'table_1',
+ })
+
+ self.assertEqual(expected, got)
+
+ def test___eq___wrong_type(self):
+ from google.cloud.bigquery.dataset import DatasetReference
+ dataset_ref = DatasetReference('project_1', 'dataset_1')
+ table = self._make_one(dataset_ref, 'table_1')
+ other = object()
+ self.assertNotEqual(table, other)
+ self.assertEqual(table, mock.ANY)
+
+ def test___eq___project_mismatch(self):
+ from google.cloud.bigquery.dataset import DatasetReference
+ dataset = DatasetReference('project_1', 'dataset_1')
+ other_dataset = DatasetReference('project_2', 'dataset_1')
+ table = self._make_one(dataset, 'table_1')
+ other = self._make_one(other_dataset, 'table_1')
+ self.assertNotEqual(table, other)
+
+ def test___eq___dataset_mismatch(self):
+ from google.cloud.bigquery.dataset import DatasetReference
+ dataset = DatasetReference('project_1', 'dataset_1')
+ other_dataset = DatasetReference('project_1', 'dataset_2')
+ table = self._make_one(dataset, 'table_1')
+ other = self._make_one(other_dataset, 'table_1')
+ self.assertNotEqual(table, other)
+
+ def test___eq___table_mismatch(self):
+ from google.cloud.bigquery.dataset import DatasetReference
+ dataset = DatasetReference('project_1', 'dataset_1')
+ table = self._make_one(dataset, 'table_1')
+ other = self._make_one(dataset, 'table_2')
+ self.assertNotEqual(table, other)
+
+ def test___eq___equality(self):
+ from google.cloud.bigquery.dataset import DatasetReference
+ dataset = DatasetReference('project_1', 'dataset_1')
+ table = self._make_one(dataset, 'table_1')
+ other = self._make_one(dataset, 'table_1')
+ self.assertEqual(table, other)
+
+ def test___hash__set_equality(self):
+ from google.cloud.bigquery.dataset import DatasetReference
+ dataset = DatasetReference('project_1', 'dataset_1')
+ table1 = self._make_one(dataset, 'table1')
+ table2 = self._make_one(dataset, 'table2')
+ set_one = {table1, table2}
+ set_two = {table1, table2}
+ self.assertEqual(set_one, set_two)
+
+ def test___hash__not_equals(self):
+ from google.cloud.bigquery.dataset import DatasetReference
+ dataset = DatasetReference('project_1', 'dataset_1')
+ table1 = self._make_one(dataset, 'table1')
+ table2 = self._make_one(dataset, 'table2')
+ set_one = {table1}
+ set_two = {table2}
+ self.assertNotEqual(set_one, set_two)
+
+ def test___repr__(self):
+ dataset = DatasetReference('project1', 'dataset1')
+ table1 = self._make_one(dataset, 'table1')
+ expected = "TableReference('project1', 'dataset1', 'table1')"
+ self.assertEqual(repr(table1), expected)
+
+
+class TestTable(unittest.TestCase, _SchemaBase):
+
+ PROJECT = 'prahj-ekt'
+ DS_ID = 'dataset-name'
+ TABLE_NAME = 'table-name'
+
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.table import Table
+
+ return Table
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def _setUpConstants(self):
+ import datetime
+ from google.cloud._helpers import UTC
+
+ self.WHEN_TS = 1437767599.006
+ self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(
+ tzinfo=UTC)
+ self.ETAG = 'ETAG'
+ self.TABLE_FULL_ID = '%s:%s:%s' % (
+ self.PROJECT, self.DS_ID, self.TABLE_NAME)
+ self.RESOURCE_URL = 'http://example.com/path/to/resource'
+ self.NUM_BYTES = 12345
+ self.NUM_ROWS = 67
+ self.NUM_EST_BYTES = 1234
+ self.NUM_EST_ROWS = 23
+
+ def _makeResource(self):
+ self._setUpConstants()
+ return {
+ 'creationTime': self.WHEN_TS * 1000,
+ 'tableReference':
+ {'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_NAME},
+ 'schema': {'fields': [
+ {'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED'},
+ {'name': 'age', 'type': 'INTEGER', 'mode': 'REQUIRED'}]},
+ 'etag': 'ETAG',
+ 'id': self.TABLE_FULL_ID,
+ 'lastModifiedTime': self.WHEN_TS * 1000,
+ 'location': 'US',
+ 'selfLink': self.RESOURCE_URL,
+ 'numRows': self.NUM_ROWS,
+ 'numBytes': self.NUM_BYTES,
+ 'type': 'TABLE',
+ 'streamingBuffer': {
+ 'estimatedRows': str(self.NUM_EST_ROWS),
+ 'estimatedBytes': str(self.NUM_EST_BYTES),
+ 'oldestEntryTime': self.WHEN_TS * 1000},
+ 'externalDataConfiguration': {
+ 'sourceFormat': 'CSV',
+ 'csvOptions': {
+ 'allowJaggedRows': True,
+ 'encoding': 'encoding'}},
+ 'labels': {'x': 'y'},
+ }
+
+ def _verifyReadonlyResourceProperties(self, table, resource):
+ if 'creationTime' in resource:
+ self.assertEqual(table.created, self.WHEN)
+ else:
+ self.assertIsNone(table.created)
+
+ if 'etag' in resource:
+ self.assertEqual(table.etag, self.ETAG)
+ else:
+ self.assertIsNone(table.etag)
+
+ if 'numRows' in resource:
+ self.assertEqual(table.num_rows, self.NUM_ROWS)
+ else:
+ self.assertIsNone(table.num_rows)
+
+ if 'numBytes' in resource:
+ self.assertEqual(table.num_bytes, self.NUM_BYTES)
+ else:
+ self.assertIsNone(table.num_bytes)
+
+ if 'selfLink' in resource:
+ self.assertEqual(table.self_link, self.RESOURCE_URL)
+ else:
+ self.assertIsNone(table.self_link)
+
+ if 'streamingBuffer' in resource:
+ self.assertEqual(table.streaming_buffer.estimated_rows,
+ self.NUM_EST_ROWS)
+ self.assertEqual(table.streaming_buffer.estimated_bytes,
+ self.NUM_EST_BYTES)
+ self.assertEqual(table.streaming_buffer.oldest_entry_time,
+ self.WHEN)
+ else:
+ self.assertIsNone(table.streaming_buffer)
+
+ self.assertEqual(table.full_table_id, self.TABLE_FULL_ID)
+ self.assertEqual(table.table_type,
+ 'TABLE' if 'view' not in resource else 'VIEW')
+
+ def _verifyResourceProperties(self, table, resource):
+
+ self._verifyReadonlyResourceProperties(table, resource)
+
+ if 'expirationTime' in resource:
+ self.assertEqual(table.expires, self.EXP_TIME)
+ else:
+ self.assertIsNone(table.expires)
+
+ self.assertEqual(table.description, resource.get('description'))
+ self.assertEqual(table.friendly_name, resource.get('friendlyName'))
+ self.assertEqual(table.location, resource.get('location'))
+
+ if 'view' in resource:
+ self.assertEqual(table.view_query, resource['view']['query'])
+ self.assertEqual(
+ table.view_use_legacy_sql,
+ resource['view'].get('useLegacySql', True))
+ else:
+ self.assertIsNone(table.view_query)
+ self.assertIsNone(table.view_use_legacy_sql)
+
+ if 'schema' in resource:
+ self._verifySchema(table.schema, resource)
+ else:
+ self.assertEqual(table.schema, [])
+
+ if 'externalDataConfiguration' in resource:
+ edc = table.external_data_configuration
+ self.assertEqual(edc.source_format, 'CSV')
+ self.assertEqual(edc.options.allow_jagged_rows, True)
+
+ if 'labels' in resource:
+ self.assertEqual(table.labels, {'x': 'y'})
+ else:
+ self.assertEqual(table.labels, {})
+
+ def test_ctor(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+
+ self.assertEqual(table.table_id, self.TABLE_NAME)
+ self.assertEqual(table.project, self.PROJECT)
+ self.assertEqual(table.dataset_id, self.DS_ID)
+ self.assertEqual(
+ table.path,
+ '/projects/%s/datasets/%s/tables/%s' % (
+ self.PROJECT, self.DS_ID, self.TABLE_NAME))
+ self.assertEqual(table.schema, [])
+
+ self.assertIsNone(table.created)
+ self.assertIsNone(table.etag)
+ self.assertIsNone(table.modified)
+ self.assertIsNone(table.num_bytes)
+ self.assertIsNone(table.num_rows)
+ self.assertIsNone(table.self_link)
+ self.assertIsNone(table.full_table_id)
+ self.assertIsNone(table.table_type)
+ self.assertIsNone(table.description)
+ self.assertIsNone(table.expires)
+ self.assertIsNone(table.friendly_name)
+ self.assertIsNone(table.location)
+ self.assertIsNone(table.view_query)
+ self.assertIsNone(table.view_use_legacy_sql)
+ self.assertIsNone(table.external_data_configuration)
+ self.assertEquals(table.labels, {})
+
+ def test_ctor_w_schema(self):
+ from google.cloud.bigquery.table import SchemaField
+
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
+ age = SchemaField('age', 'INTEGER', mode='REQUIRED')
+ table = self._make_one(table_ref, schema=[full_name, age])
+
+ self.assertEqual(table.schema, [full_name, age])
+
+ def test_num_bytes_getter(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+
+ # Check with no value set.
+ self.assertIsNone(table.num_bytes)
+
+ num_bytes = 1337
+ # Check with integer value set.
+ table._properties = {'numBytes': num_bytes}
+ self.assertEqual(table.num_bytes, num_bytes)
+
+ # Check with a string value set.
+ table._properties = {'numBytes': str(num_bytes)}
+ self.assertEqual(table.num_bytes, num_bytes)
+
+ # Check with invalid int value.
+ table._properties = {'numBytes': 'x'}
+ with self.assertRaises(ValueError):
+ getattr(table, 'num_bytes')
+
+ def test_num_rows_getter(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+
+ # Check with no value set.
+ self.assertIsNone(table.num_rows)
+
+ num_rows = 42
+ # Check with integer value set.
+ table._properties = {'numRows': num_rows}
+ self.assertEqual(table.num_rows, num_rows)
+
+ # Check with a string value set.
+ table._properties = {'numRows': str(num_rows)}
+ self.assertEqual(table.num_rows, num_rows)
+
+ # Check with invalid int value.
+ table._properties = {'numRows': 'x'}
+ with self.assertRaises(ValueError):
+ getattr(table, 'num_rows')
+
+ def test_schema_setter_non_list(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ with self.assertRaises(TypeError):
+ table.schema = object()
+
+ def test_schema_setter_invalid_field(self):
+ from google.cloud.bigquery.table import SchemaField
+
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
+ with self.assertRaises(ValueError):
+ table.schema = [full_name, object()]
+
+ def test_schema_setter(self):
+ from google.cloud.bigquery.table import SchemaField
+
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
+ age = SchemaField('age', 'INTEGER', mode='REQUIRED')
+ table.schema = [full_name, age]
+ self.assertEqual(table.schema, [full_name, age])
+
+ def test_props_set_by_server(self):
+ import datetime
+ from google.cloud._helpers import UTC
+ from google.cloud._helpers import _millis
+
+ CREATED = datetime.datetime(2015, 7, 29, 12, 13, 22, tzinfo=UTC)
+ MODIFIED = datetime.datetime(2015, 7, 29, 14, 47, 15, tzinfo=UTC)
+ TABLE_FULL_ID = '%s:%s:%s' % (
+ self.PROJECT, self.DS_ID, self.TABLE_NAME)
+ URL = 'http://example.com/projects/%s/datasets/%s/tables/%s' % (
+ self.PROJECT, self.DS_ID, self.TABLE_NAME)
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ table._properties['creationTime'] = _millis(CREATED)
+ table._properties['etag'] = 'ETAG'
+ table._properties['lastModifiedTime'] = _millis(MODIFIED)
+ table._properties['numBytes'] = 12345
+ table._properties['numRows'] = 66
+ table._properties['selfLink'] = URL
+ table._properties['id'] = TABLE_FULL_ID
+ table._properties['type'] = 'TABLE'
+
+ self.assertEqual(table.created, CREATED)
+ self.assertEqual(table.etag, 'ETAG')
+ self.assertEqual(table.modified, MODIFIED)
+ self.assertEqual(table.num_bytes, 12345)
+ self.assertEqual(table.num_rows, 66)
+ self.assertEqual(table.self_link, URL)
+ self.assertEqual(table.full_table_id, TABLE_FULL_ID)
+ self.assertEqual(table.table_type, 'TABLE')
+
+ def test_description_setter_bad_value(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ with self.assertRaises(ValueError):
+ table.description = 12345
+
+ def test_description_setter(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ table.description = 'DESCRIPTION'
+ self.assertEqual(table.description, 'DESCRIPTION')
+
+ def test_expires_setter_bad_value(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ with self.assertRaises(ValueError):
+ table.expires = object()
+
+ def test_expires_setter(self):
+ import datetime
+ from google.cloud._helpers import UTC
+
+ WHEN = datetime.datetime(2015, 7, 28, 16, 39, tzinfo=UTC)
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ table.expires = WHEN
+ self.assertEqual(table.expires, WHEN)
+
+ def test_friendly_name_setter_bad_value(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ with self.assertRaises(ValueError):
+ table.friendly_name = 12345
+
+ def test_friendly_name_setter(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ table.friendly_name = 'FRIENDLY'
+ self.assertEqual(table.friendly_name, 'FRIENDLY')
+
+ def test_location_setter_bad_value(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ with self.assertRaises(ValueError):
+ table.location = 12345
+
+ def test_location_setter(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ table.location = 'LOCATION'
+ self.assertEqual(table.location, 'LOCATION')
+
+ def test_view_query_setter_bad_value(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ with self.assertRaises(ValueError):
+ table.view_query = 12345
+
+ def test_view_query_setter(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ table.view_query = 'select * from foo'
+ self.assertEqual(table.view_query, 'select * from foo')
+ self.assertEqual(table.view_use_legacy_sql, False)
+
+ table.view_use_legacy_sql = True
+ self.assertEqual(table.view_use_legacy_sql, True)
+
+ def test_view_query_deleter(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ table.view_query = 'select * from foo'
+ del table.view_query
+ self.assertIsNone(table.view_query)
+ self.assertIsNone(table.view_use_legacy_sql)
+
+ def test_view_use_legacy_sql_setter_bad_value(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ with self.assertRaises(ValueError):
+ table.view_use_legacy_sql = 12345
+
+ def test_view_use_legacy_sql_setter(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ table.view_use_legacy_sql = True
+ table.view_query = 'select * from foo'
+ self.assertEqual(table.view_use_legacy_sql, True)
+ self.assertEqual(table.view_query, 'select * from foo')
+
+ def test_external_data_configuration_setter_bad_value(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ with self.assertRaises(ValueError):
+ table.external_data_configuration = 12345
+
+ def test_labels_setter_bad_value(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ with self.assertRaises(ValueError):
+ table.labels = 12345
+
+ def test_from_api_repr_missing_identity(self):
+ self._setUpConstants()
+ RESOURCE = {}
+ klass = self._get_target_class()
+ with self.assertRaises(KeyError):
+ klass.from_api_repr(RESOURCE)
+
+ def test_from_api_repr_bare(self):
+ self._setUpConstants()
+ RESOURCE = {
+ 'id': '%s:%s:%s' % (self.PROJECT, self.DS_ID, self.TABLE_NAME),
+ 'tableReference': {
+ 'projectId': self.PROJECT,
+ 'datasetId': self.DS_ID,
+ 'tableId': self.TABLE_NAME,
+ },
+ 'type': 'TABLE',
+ }
+ klass = self._get_target_class()
+ table = klass.from_api_repr(RESOURCE)
+ self.assertEqual(table.table_id, self.TABLE_NAME)
+ self._verifyResourceProperties(table, RESOURCE)
+
+ def test_from_api_repr_w_properties(self):
+ import datetime
+ from google.cloud._helpers import UTC
+ from google.cloud._helpers import _millis
+
+ RESOURCE = self._makeResource()
+ RESOURCE['view'] = {'query': 'select fullname, age from person_ages'}
+ RESOURCE['type'] = 'VIEW'
+ RESOURCE['location'] = 'EU'
+ self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC)
+ RESOURCE['expirationTime'] = _millis(self.EXP_TIME)
+ klass = self._get_target_class()
+ table = klass.from_api_repr(RESOURCE)
+ self._verifyResourceProperties(table, RESOURCE)
+
+ def test_partition_type_setter_bad_type(self):
+ from google.cloud.bigquery.table import SchemaField
+
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
+ age = SchemaField('age', 'INTEGER', mode='REQUIRED')
+ table = self._make_one(table_ref, schema=[full_name, age])
+ with self.assertRaises(ValueError):
+ table.partitioning_type = 123
+
+ def test_partition_type_setter_unknown_value(self):
+ from google.cloud.bigquery.table import SchemaField
+
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
+ age = SchemaField('age', 'INTEGER', mode='REQUIRED')
+ table = self._make_one(table_ref, schema=[full_name, age])
+ with self.assertRaises(ValueError):
+ table.partitioning_type = "HASH"
+
+ def test_partition_type_setter_w_known_value(self):
+ from google.cloud.bigquery.table import SchemaField
+
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
+ age = SchemaField('age', 'INTEGER', mode='REQUIRED')
+ table = self._make_one(table_ref, schema=[full_name, age])
+ self.assertIsNone(table.partitioning_type)
+ table.partitioning_type = 'DAY'
+ self.assertEqual(table.partitioning_type, 'DAY')
+
+ def test_partition_type_setter_w_none(self):
+ from google.cloud.bigquery.table import SchemaField
+
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
+ age = SchemaField('age', 'INTEGER', mode='REQUIRED')
+ table = self._make_one(table_ref, schema=[full_name, age])
+ table._properties['timePartitioning'] = {'type': 'DAY'}
+ table.partitioning_type = None
+ self.assertIsNone(table.partitioning_type)
+ self.assertFalse('timePartitioning' in table._properties)
+
+ def test_partition_experation_bad_type(self):
+ from google.cloud.bigquery.table import SchemaField
+
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
+ age = SchemaField('age', 'INTEGER', mode='REQUIRED')
+ table = self._make_one(table_ref, schema=[full_name, age])
+ with self.assertRaises(ValueError):
+ table.partition_expiration = "NEVER"
+
+ def test_partition_expiration_w_integer(self):
+ from google.cloud.bigquery.table import SchemaField
+
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
+ age = SchemaField('age', 'INTEGER', mode='REQUIRED')
+ table = self._make_one(table_ref, schema=[full_name, age])
+ self.assertIsNone(table.partition_expiration)
+ table.partition_expiration = 100
+ self.assertEqual(table.partitioning_type, "DAY")
+ self.assertEqual(table.partition_expiration, 100)
+
+ def test_partition_expiration_w_none(self):
+ from google.cloud.bigquery.table import SchemaField
+
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
+ age = SchemaField('age', 'INTEGER', mode='REQUIRED')
+ table = self._make_one(table_ref, schema=[full_name, age])
+ self.assertIsNone(table.partition_expiration)
+ table._properties['timePartitioning'] = {
+ 'type': 'DAY',
+ 'expirationMs': 100,
+ }
+ table.partition_expiration = None
+ self.assertEqual(table.partitioning_type, "DAY")
+ self.assertIsNone(table.partition_expiration)
+
+ def test_partition_expiration_w_none_no_partition_set(self):
+ from google.cloud.bigquery.table import SchemaField
+
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
+ age = SchemaField('age', 'INTEGER', mode='REQUIRED')
+ table = self._make_one(table_ref, schema=[full_name, age])
+ self.assertIsNone(table.partition_expiration)
+ table.partition_expiration = None
+ self.assertIsNone(table.partitioning_type)
+ self.assertIsNone(table.partition_expiration)
+
+
+class Test_row_from_mapping(unittest.TestCase, _SchemaBase):
+
+ PROJECT = 'prahj-ekt'
+ DS_ID = 'dataset-name'
+ TABLE_NAME = 'table-name'
+
+ def _call_fut(self, mapping, schema):
+ from google.cloud.bigquery.table import _row_from_mapping
+
+ return _row_from_mapping(mapping, schema)
+
+ def test__row_from_mapping_wo_schema(self):
+ from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA
+ MAPPING = {'full_name': 'Phred Phlyntstone', 'age': 32}
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = Table(table_ref)
+
+ with self.assertRaises(ValueError) as exc:
+ self._call_fut(MAPPING, table.schema)
+
+ self.assertEqual(exc.exception.args, (_TABLE_HAS_NO_SCHEMA,))
+
+ def test__row_from_mapping_w_invalid_schema(self):
+ from google.cloud.bigquery.table import Table, SchemaField
+ MAPPING = {
+ 'full_name': 'Phred Phlyntstone',
+ 'age': 32,
+ 'colors': ['red', 'green'],
+ 'bogus': 'WHATEVER',
+ }
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
+ age = SchemaField('age', 'INTEGER', mode='REQUIRED')
+ colors = SchemaField('colors', 'DATETIME', mode='REPEATED')
+ bogus = SchemaField('joined', 'STRING', mode='BOGUS')
+ table = Table(table_ref, schema=[full_name, age, colors, bogus])
+
+ with self.assertRaises(ValueError) as exc:
+ self._call_fut(MAPPING, table.schema)
+
+ self.assertIn('Unknown field mode: BOGUS', str(exc.exception))
+
+ def test__row_from_mapping_w_schema(self):
+ from google.cloud.bigquery.table import Table, SchemaField
+ MAPPING = {
+ 'full_name': 'Phred Phlyntstone',
+ 'age': 32,
+ 'colors': ['red', 'green'],
+ 'extra': 'IGNORED',
+ }
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
+ age = SchemaField('age', 'INTEGER', mode='REQUIRED')
+ colors = SchemaField('colors', 'DATETIME', mode='REPEATED')
+ joined = SchemaField('joined', 'STRING', mode='NULLABLE')
+ table = Table(table_ref, schema=[full_name, age, colors, joined])
+
+ self.assertEqual(
+ self._call_fut(MAPPING, table.schema),
+ ('Phred Phlyntstone', 32, ['red', 'green'], None))
diff --git a/docs/bigquery/snippets.py b/docs/bigquery/snippets.py
new file mode 100644
index 0000000..3ae8486
--- /dev/null
+++ b/docs/bigquery/snippets.py
@@ -0,0 +1,639 @@
+# Copyright 2016 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Testable usage examples for Google BigQuery API wrapper
+
+Each example function takes a ``client`` argument (which must be an instance
+of :class:`google.cloud.bigquery.client.Client`) and uses it to perform a task
+with the API.
+
+To facilitate running the examples as system tests, each example is also passed
+a ``to_delete`` list; the function adds to the list any objects created which
+need to be deleted during teardown.
+"""
+
+import time
+
+import pytest
+import six
+
+from google.cloud import bigquery
+
+ORIGINAL_FRIENDLY_NAME = 'Original friendly name'
+ORIGINAL_DESCRIPTION = 'Original description'
+LOCALLY_CHANGED_FRIENDLY_NAME = 'Locally-changed friendly name'
+LOCALLY_CHANGED_DESCRIPTION = 'Locally-changed description'
+UPDATED_FRIENDLY_NAME = 'Updated friendly name'
+UPDATED_DESCRIPTION = 'Updated description'
+
+SCHEMA = [
+ bigquery.SchemaField('full_name', 'STRING', mode='required'),
+ bigquery.SchemaField('age', 'INTEGER', mode='required'),
+]
+
+ROWS = [
+ ('Phred Phlyntstone', 32),
+ ('Bharney Rhubble', 33),
+ ('Wylma Phlyntstone', 29),
+ ('Bhettye Rhubble', 27),
+]
+
+QUERY = (
+ 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` '
+ 'WHERE state = "TX"')
+
+
+@pytest.fixture(scope='module')
+def client():
+ return bigquery.Client()
+
+
+@pytest.fixture
+def to_delete(client):
+ doomed = []
+ yield doomed
+ for item in doomed:
+ if isinstance(item, bigquery.Dataset):
+ client.delete_dataset(item)
+ elif isinstance(item, bigquery.Table):
+ client.delete_table(item)
+ else:
+ item.delete()
+
+
+def _millis():
+ return time.time() * 1000
+
+
+class _CloseOnDelete(object):
+
+ def __init__(self, wrapped):
+ self._wrapped = wrapped
+
+ def delete(self):
+ self._wrapped.close()
+
+
+def test_client_list_datasets(client):
+ """List datasets for a project."""
+
+ def do_something_with(_):
+ pass
+
+ # [START client_list_datasets]
+ for dataset in client.list_datasets(): # API request(s)
+ do_something_with(dataset)
+ # [END client_list_datasets]
+
+
+def test_create_dataset(client, to_delete):
+ """Create a dataset."""
+ DATASET_ID = 'create_dataset_%d' % (_millis(),)
+
+ # [START create_dataset]
+ # DATASET_ID = 'dataset_ids_are_strings'
+ dataset_ref = client.dataset(DATASET_ID)
+ dataset = bigquery.Dataset(dataset_ref)
+ dataset.description = 'my dataset'
+ dataset = client.create_dataset(dataset) # API request
+ # [END create_dataset]
+
+ to_delete.append(dataset)
+
+
+def test_get_dataset(client, to_delete):
+ """Reload a dataset's metadata."""
+ DATASET_ID = 'get_dataset_%d' % (_millis(),)
+ dataset_ref = client.dataset(DATASET_ID)
+ dataset = bigquery.Dataset(dataset_ref)
+ dataset.description = ORIGINAL_DESCRIPTION
+ dataset = client.create_dataset(dataset) # API request
+ to_delete.append(dataset)
+
+ # [START get_dataset]
+ assert dataset.description == ORIGINAL_DESCRIPTION
+ dataset.description = LOCALLY_CHANGED_DESCRIPTION
+ assert dataset.description == LOCALLY_CHANGED_DESCRIPTION
+ dataset = client.get_dataset(dataset) # API request
+ assert dataset.description == ORIGINAL_DESCRIPTION
+ # [END get_dataset]
+
+
+def test_update_dataset_simple(client, to_delete):
+ """Update a dataset's metadata."""
+ DATASET_ID = 'update_dataset_simple_%d' % (_millis(),)
+ dataset = bigquery.Dataset(client.dataset(DATASET_ID))
+ dataset.description = ORIGINAL_DESCRIPTION
+ client.create_dataset(dataset)
+ to_delete.append(dataset)
+
+ # [START update_dataset_simple]
+ assert dataset.description == ORIGINAL_DESCRIPTION
+ dataset.description = UPDATED_DESCRIPTION
+
+ dataset = client.update_dataset(dataset, ['description']) # API request
+
+ assert dataset.description == UPDATED_DESCRIPTION
+ # [END update_dataset_simple]
+
+
+def test_update_dataset_multiple_properties(client, to_delete):
+ """Update a dataset's metadata."""
+ DATASET_ID = 'update_dataset_multiple_properties_%d' % (_millis(),)
+ dataset = bigquery.Dataset(client.dataset(DATASET_ID))
+ dataset.description = ORIGINAL_DESCRIPTION
+ dataset = client.create_dataset(dataset)
+ to_delete.append(dataset)
+
+ # [START update_dataset_multiple_properties]
+ assert dataset.description == ORIGINAL_DESCRIPTION
+ assert dataset.default_table_expiration_ms is None
+ entry = bigquery.AccessEntry(
+ role='READER', entity_type='domain', entity_id='example.com')
+ assert entry not in dataset.access_entries
+ ONE_DAY_MS = 24 * 60 * 60 * 1000 # in milliseconds
+ dataset.description = UPDATED_DESCRIPTION
+ dataset.default_table_expiration_ms = ONE_DAY_MS
+ entries = list(dataset.access_entries)
+ entries.append(entry)
+ dataset.access_entries = entries
+
+ dataset = client.update_dataset(
+ dataset,
+ ['description', 'default_table_expiration_ms', 'access_entries']
+ ) # API request
+
+ assert dataset.description == UPDATED_DESCRIPTION
+ assert dataset.default_table_expiration_ms == ONE_DAY_MS
+ assert entry in dataset.access_entries
+ # [END update_dataset_multiple_properties]
+
+
+def test_delete_dataset(client):
+ """Delete a dataset."""
+ DATASET_ID = 'delete_dataset_%d' % (_millis(),)
+ dataset = bigquery.Dataset(client.dataset(DATASET_ID))
+ client.create_dataset(dataset)
+
+ # [START delete_dataset]
+ from google.cloud.exceptions import NotFound
+
+ client.delete_dataset(dataset) # API request
+
+ with pytest.raises(NotFound):
+ client.get_dataset(dataset) # API request
+ # [END delete_dataset]
+
+
+def test_list_dataset_tables(client, to_delete):
+ """List tables within a dataset."""
+ DATASET_ID = 'list_dataset_tables_dataset_%d' % (_millis(),)
+ dataset = bigquery.Dataset(client.dataset(DATASET_ID))
+ dataset = client.create_dataset(dataset)
+ to_delete.append(dataset)
+
+ # [START list_dataset_tables]
+ tables = list(client.list_dataset_tables(dataset)) # API request(s)
+ assert len(tables) == 0
+
+ table_ref = dataset.table('my_table')
+ table = bigquery.Table(table_ref)
+ table.view_query = QUERY
+ client.create_table(table) # API request
+ tables = list(client.list_dataset_tables(dataset)) # API request(s)
+
+ assert len(tables) == 1
+ assert tables[0].table_id == 'my_table'
+ # [END list_dataset_tables]
+
+ to_delete.insert(0, table)
+
+
+def test_create_table(client, to_delete):
+ """Create a table."""
+ DATASET_ID = 'create_table_dataset_%d' % (_millis(),)
+ dataset = bigquery.Dataset(client.dataset(DATASET_ID))
+ client.create_dataset(dataset)
+ to_delete.append(dataset)
+
+ # [START create_table]
+ SCHEMA = [
+ bigquery.SchemaField('full_name', 'STRING', mode='required'),
+ bigquery.SchemaField('age', 'INTEGER', mode='required'),
+ ]
+ table_ref = dataset.table('my_table')
+ table = bigquery.Table(table_ref, schema=SCHEMA)
+ table = client.create_table(table) # API request
+
+ assert table.table_id == 'my_table'
+ # [END create_table]
+
+ to_delete.insert(0, table)
+
+
+def test_get_table(client, to_delete):
+ """Reload a table's metadata."""
+ DATASET_ID = 'get_table_dataset_%d' % (_millis(),)
+ TABLE_ID = 'get_table_table_%d' % (_millis(),)
+ dataset = bigquery.Dataset(client.dataset(DATASET_ID))
+ dataset = client.create_dataset(dataset)
+ to_delete.append(dataset)
+
+ table = bigquery.Table(dataset.table(TABLE_ID), schema=SCHEMA)
+ table.description = ORIGINAL_DESCRIPTION
+ table = client.create_table(table)
+ to_delete.insert(0, table)
+
+ # [START get_table]
+ assert table.description == ORIGINAL_DESCRIPTION
+ table.description = LOCALLY_CHANGED_DESCRIPTION
+ table = client.get_table(table) # API request
+ assert table.description == ORIGINAL_DESCRIPTION
+ # [END get_table]
+
+
+def test_update_table_simple(client, to_delete):
+ """Patch a table's metadata."""
+ DATASET_ID = 'update_table_simple_dataset_%d' % (_millis(),)
+ TABLE_ID = 'update_table_simple_table_%d' % (_millis(),)
+ dataset = bigquery.Dataset(client.dataset(DATASET_ID))
+ dataset.description = ORIGINAL_DESCRIPTION
+ client.create_dataset(dataset)
+ to_delete.append(dataset)
+
+ table = bigquery.Table(dataset.table(TABLE_ID), schema=SCHEMA)
+ table.description = ORIGINAL_DESCRIPTION
+ table = client.create_table(table)
+ to_delete.insert(0, table)
+
+ # [START update_table_simple]
+ assert table.description == ORIGINAL_DESCRIPTION
+ table.description = UPDATED_DESCRIPTION
+
+ table = client.update_table(table, ['description']) # API request
+
+ assert table.description == UPDATED_DESCRIPTION
+ # [END update_table_simple]
+
+
+def test_update_table_multiple_properties(client, to_delete):
+ """Update a table's metadata."""
+ DATASET_ID = 'update_table_multiple_properties_dataset_%d' % (_millis(),)
+ TABLE_ID = 'update_table_multiple_properties_table_%d' % (_millis(),)
+ dataset = bigquery.Dataset(client.dataset(DATASET_ID))
+ dataset.description = ORIGINAL_DESCRIPTION
+ client.create_dataset(dataset)
+ to_delete.append(dataset)
+
+ table = bigquery.Table(dataset.table(TABLE_ID), schema=SCHEMA)
+ table.friendly_name = ORIGINAL_FRIENDLY_NAME
+ table.description = ORIGINAL_DESCRIPTION
+ table = client.create_table(table)
+ to_delete.insert(0, table)
+
+ # [START update_table_multiple_properties]
+ assert table.friendly_name == ORIGINAL_FRIENDLY_NAME
+ assert table.description == ORIGINAL_DESCRIPTION
+
+ NEW_SCHEMA = list(table.schema)
+ NEW_SCHEMA.append(bigquery.SchemaField('phone', 'STRING'))
+ table.friendly_name = UPDATED_FRIENDLY_NAME
+ table.description = UPDATED_DESCRIPTION
+ table.schema = NEW_SCHEMA
+ table = client.update_table(
+ table,
+ ['schema', 'friendly_name', 'description']
+ ) # API request
+
+ assert table.friendly_name == UPDATED_FRIENDLY_NAME
+ assert table.description == UPDATED_DESCRIPTION
+ assert table.schema == NEW_SCHEMA
+ # [END update_table_multiple_properties]
+
+
+def test_table_create_rows(client, to_delete):
+ """Insert / fetch table data."""
+ DATASET_ID = 'table_create_rows_dataset_%d' % (_millis(),)
+ TABLE_ID = 'table_create_rows_table_%d' % (_millis(),)
+ dataset = bigquery.Dataset(client.dataset(DATASET_ID))
+ dataset = client.create_dataset(dataset)
+ to_delete.append(dataset)
+
+ table = bigquery.Table(dataset.table(TABLE_ID), schema=SCHEMA)
+ table = client.create_table(table)
+ to_delete.insert(0, table)
+
+ # [START table_create_rows]
+ ROWS_TO_INSERT = [
+ (u'Phred Phlyntstone', 32),
+ (u'Wylma Phlyntstone', 29),
+ ]
+
+ errors = client.create_rows(table, ROWS_TO_INSERT) # API request
+
+ assert errors == []
+ # [END table_create_rows]
+
+
+def test_load_table_from_file(client, to_delete):
+ """Upload table data from a CSV file."""
+ DATASET_ID = 'table_upload_from_file_dataset_%d' % (_millis(),)
+ TABLE_ID = 'table_upload_from_file_table_%d' % (_millis(),)
+ dataset = bigquery.Dataset(client.dataset(DATASET_ID))
+ client.create_dataset(dataset)
+ to_delete.append(dataset)
+
+ table_ref = dataset.table(TABLE_ID)
+ table = bigquery.Table(table_ref, schema=SCHEMA)
+ table = client.create_table(table)
+ to_delete.insert(0, table)
+
+ # [START load_table_from_file]
+ csv_file = six.BytesIO(b"""full_name,age
+Phred Phlyntstone,32
+Wylma Phlyntstone,29
+""")
+
+ table_ref = dataset.table(TABLE_ID)
+ job_config = bigquery.LoadJobConfig()
+ job_config.source_format = 'CSV'
+ job_config.skip_leading_rows = 1
+ job = client.load_table_from_file(
+ csv_file, table_ref, job_config=job_config) # API request
+ job.result() # Waits for table load to complete.
+ # [END load_table_from_file]
+
+ found_rows = []
+
+ def do_something(row):
+ found_rows.append(row)
+
+ # [START table_list_rows]
+ for row in client.list_rows(table): # API request
+ do_something(row)
+ # [END table_list_rows]
+
+ assert len(found_rows) == 2
+
+ # [START table_list_rows_iterator_properties]
+ iterator = client.list_rows(table) # API request
+ page = six.next(iterator.pages)
+ rows = list(page)
+ total = iterator.total_rows
+ token = iterator.next_page_token
+ # [END table_list_rows_iterator_properties]
+
+ row_tuples = [r.values() for r in rows]
+ assert len(rows) == total == 2
+ assert token is None
+ assert (u'Phred Phlyntstone', 32) in row_tuples
+ assert (u'Wylma Phlyntstone', 29) in row_tuples
+
+
+def test_load_table_from_uri(client, to_delete):
+ ROWS = [
+ ('Phred Phlyntstone', 32),
+ ('Bharney Rhubble', 33),
+ ('Wylma Phlyntstone', 29),
+ ('Bhettye Rhubble', 27),
+ ]
+ HEADER_ROW = ('Full Name', 'Age')
+ bucket_name = 'gs_bq_load_test_%d' % (_millis(),)
+ blob_name = 'person_ages.csv'
+ bucket, blob = _write_csv_to_storage(
+ bucket_name, blob_name, HEADER_ROW, ROWS)
+ to_delete.extend((blob, bucket))
+ DATASET_ID = 'delete_table_dataset_%d' % (_millis(),)
+ dataset = bigquery.Dataset(client.dataset(DATASET_ID))
+ client.create_dataset(dataset)
+ to_delete.append(dataset)
+
+ # [START load_table_from_uri]
+ table_ref = dataset.table('person_ages')
+ table = bigquery.Table(table_ref)
+ table.schema = [
+ bigquery.SchemaField('full_name', 'STRING', mode='required'),
+ bigquery.SchemaField('age', 'INTEGER', mode='required')
+ ]
+ client.create_table(table) # API request
+ GS_URL = 'gs://{}/{}'.format(bucket_name, blob_name)
+ job_id_prefix = "my_job"
+ job_config = bigquery.LoadJobConfig()
+ job_config.create_disposition = 'NEVER'
+ job_config.skip_leading_rows = 1
+ job_config.source_format = 'CSV'
+ job_config.write_disposition = 'WRITE_EMPTY'
+ load_job = client.load_table_from_uri(
+ GS_URL, table_ref, job_config=job_config,
+ job_id_prefix=job_id_prefix) # API request
+
+ assert load_job.state == 'RUNNING'
+ assert load_job.job_type == 'load'
+
+ load_job.result() # Waits for table load to complete.
+
+ assert load_job.state == 'DONE'
+ assert load_job.job_id.startswith(job_id_prefix)
+ # [END load_table_from_uri]
+
+ to_delete.insert(0, table)
+
+
+def _write_csv_to_storage(bucket_name, blob_name, header_row, data_rows):
+ import csv
+ from google.cloud._testing import _NamedTemporaryFile
+ from google.cloud.storage import Client as StorageClient
+
+ storage_client = StorageClient()
+
+ # In the **very** rare case the bucket name is reserved, this
+ # fails with a ConnectionError.
+ bucket = storage_client.create_bucket(bucket_name)
+
+ blob = bucket.blob(blob_name)
+
+ with _NamedTemporaryFile() as temp:
+ with open(temp.name, 'w') as csv_write:
+ writer = csv.writer(csv_write)
+ writer.writerow(header_row)
+ writer.writerows(data_rows)
+
+ with open(temp.name, 'rb') as csv_read:
+ blob.upload_from_file(csv_read, content_type='text/csv')
+
+ return bucket, blob
+
+
+def test_copy_table(client, to_delete):
+ DATASET_ID = 'copy_table_dataset_%d' % (_millis(),)
+ # [START copy_table]
+ source_dataset = bigquery.DatasetReference(
+ 'bigquery-public-data', 'samples')
+ source_table_ref = source_dataset.table('shakespeare')
+
+ dest_dataset = bigquery.Dataset(client.dataset(DATASET_ID))
+ dest_dataset = client.create_dataset(dest_dataset) # API request
+ dest_table_ref = dest_dataset.table('destination_table')
+
+ job_config = bigquery.CopyJobConfig()
+ job = client.copy_table(
+ source_table_ref, dest_table_ref, job_config=job_config) # API request
+ job.result() # Waits for job to complete.
+
+ assert job.state == 'DONE'
+ dest_table = client.get_table(dest_table_ref) # API request
+ assert dest_table.table_id == 'destination_table'
+ # [END copy_table]
+
+ to_delete.append(dest_dataset)
+ to_delete.insert(0, dest_table)
+
+
+def test_extract_table(client, to_delete):
+ DATASET_ID = 'export_data_dataset_%d' % (_millis(),)
+ dataset = bigquery.Dataset(client.dataset(DATASET_ID))
+ client.create_dataset(dataset)
+ to_delete.append(dataset)
+
+ table_ref = dataset.table('person_ages')
+ table = client.create_table(bigquery.Table(table_ref, schema=SCHEMA))
+ to_delete.insert(0, table)
+ client.create_rows(table, ROWS)
+
+ bucket_name = 'extract_person_ages_job_%d' % (_millis(),)
+ # [START extract_table]
+ from google.cloud.storage import Client as StorageClient
+
+ storage_client = StorageClient()
+ bucket = storage_client.create_bucket(bucket_name) # API request
+ destination_blob_name = 'person_ages_out.csv'
+ destination = bucket.blob(destination_blob_name)
+
+ destination_uri = 'gs://{}/{}'.format(bucket_name, destination_blob_name)
+ extract_job = client.extract_table(
+ table_ref, destination_uri) # API request
+ extract_job.result(timeout=100) # Waits for job to complete.
+
+ got = destination.download_as_string().decode('utf-8') # API request
+ assert 'Bharney Rhubble' in got
+ # [END extract_table]
+ to_delete.append(bucket)
+ to_delete.insert(0, destination)
+
+
+def test_delete_table(client, to_delete):
+ """Delete a table."""
+ DATASET_ID = 'delete_table_dataset_%d' % (_millis(),)
+ TABLE_ID = 'delete_table_table_%d' % (_millis(),)
+ dataset_ref = client.dataset(DATASET_ID)
+ dataset = client.create_dataset(bigquery.Dataset(dataset_ref))
+ to_delete.append(dataset)
+
+ table_ref = dataset.table(TABLE_ID)
+ table = bigquery.Table(table_ref, schema=SCHEMA)
+ client.create_table(table)
+ # [START delete_table]
+ from google.cloud.exceptions import NotFound
+
+ client.delete_table(table) # API request
+
+ with pytest.raises(NotFound):
+ client.get_table(table) # API request
+ # [END delete_table]
+
+
+def test_client_query(client):
+ """Run a query"""
+
+ # [START client_query]
+ QUERY = (
+ 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` '
+ 'WHERE state = "TX" '
+ 'LIMIT 100')
+ TIMEOUT = 30 # in seconds
+ query_job = client.query(QUERY) # API request - starts the query
+ assert query_job.state == 'RUNNING'
+
+ # Waits for the query to finish
+ iterator = query_job.result(timeout=TIMEOUT)
+ rows = list(iterator)
+
+ assert query_job.state == 'DONE'
+ assert len(rows) == 100
+ row = rows[0]
+ assert row[0] == row.name == row['name']
+ # [END client_query]
+
+
+def test_client_query_w_param(client):
+ """Run a query using a query parameter"""
+
+ # [START client_query_w_param]
+ QUERY_W_PARAM = (
+ 'SELECT name, state '
+ 'FROM `bigquery-public-data.usa_names.usa_1910_2013` '
+ 'WHERE state = @state '
+ 'LIMIT 100')
+ TIMEOUT = 30 # in seconds
+ param = bigquery.ScalarQueryParameter('state', 'STRING', 'TX')
+ job_config = bigquery.QueryJobConfig()
+ job_config.query_parameters = [param]
+ query_job = client.query(
+ QUERY_W_PARAM, job_config=job_config) # API request - starts the query
+ assert query_job.state == 'RUNNING'
+
+ # Waits for the query to finish
+ iterator = query_job.result(timeout=TIMEOUT)
+ rows = list(iterator)
+
+ assert query_job.state == 'DONE'
+ assert len(rows) == 100
+ row = rows[0]
+ assert row[0] == row.name == row['name']
+ assert row.state == 'TX'
+ # [END client_query_w_param]
+
+
+def test_client_query_rows(client):
+ """Run a simple query."""
+
+ # [START client_query_rows]
+ QUERY = (
+ 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` '
+ 'WHERE state = "TX" '
+ 'LIMIT 100')
+ TIMEOUT = 30 # in seconds
+ rows = list(client.query_rows(QUERY, timeout=TIMEOUT)) # API request
+
+ assert len(rows) == 100
+ row = rows[0]
+ assert row[0] == row.name == row['name']
+ # [END client_query_rows]
+
+
+def test_client_list_jobs(client):
+ """List jobs for a project."""
+
+ def do_something_with(_):
+ pass
+
+ # [START client_list_jobs]
+ job_iterator = client.list_jobs() # API request(s)
+ for job in job_iterator:
+ do_something_with(job)
+ # [END client_list_jobs]
+
+
+if __name__ == '__main__':
+ pytest.main()
diff --git a/docs/bigquery/usage.rst b/docs/bigquery/usage.rst
new file mode 100644
index 0000000..9a5e7f3
--- /dev/null
+++ b/docs/bigquery/usage.rst
@@ -0,0 +1,266 @@
+BigQuery
+========
+
+.. toctree::
+ :maxdepth: 2
+ :hidden:
+
+ client
+ dataset
+ job
+ query
+ schema
+ table
+
+Authentication / Configuration
+------------------------------
+
+- Use :class:`Client <google.cloud.bigquery.client.Client>` objects to configure
+ your applications.
+
+- :class:`Client <google.cloud.bigquery.client.Client>` objects hold both a ``project``
+ and an authenticated connection to the BigQuery service.
+
+- The authentication credentials can be implicitly determined from the
+ environment or directly via
+ :meth:`from_service_account_json <google.cloud.bigquery.client.Client.from_service_account_json>`
+ and
+ :meth:`from_service_account_p12 <google.cloud.bigquery.client.Client.from_service_account_p12>`.
+
+- After setting :envvar:`GOOGLE_APPLICATION_CREDENTIALS` and
+ :envvar:`GOOGLE_CLOUD_PROJECT` environment variables, create an instance of
+ :class:`Client <google.cloud.bigquery.client.Client>`.
+
+ .. code-block:: python
+
+ >>> from google.cloud import bigquery
+ >>> client = bigquery.Client()
+
+
+Projects
+--------
+
+A project is the top-level container in the ``BigQuery`` API: it is tied
+closely to billing, and can provide default access control across all its
+datasets. If no ``project`` is passed to the client container, the library
+attempts to infer a project using the environment (including explicit
+environment variables, GAE, and GCE).
+
+To override the project inferred from the environment, pass an explicit
+``project`` to the constructor, or to either of the alternative
+``classmethod`` factories:
+
+.. code-block:: python
+
+ >>> from google.cloud import bigquery
+ >>> client = bigquery.Client(project='PROJECT_ID')
+
+
+Project ACLs
+~~~~~~~~~~~~
+
+Each project has an access control list granting reader / writer / owner
+permission to one or more entities. This list cannot be queried or set
+via the API; it must be managed using the Google Developer Console.
+
+
+Datasets
+--------
+
+A dataset represents a collection of tables, and applies several default
+policies to tables as they are created:
+
+- An access control list (ACL). When created, a dataset has an ACL
+ which maps to the ACL inherited from its project.
+
+- A default table expiration period. If set, tables created within the
+ dataset will have the value as their expiration period.
+
+See BigQuery documentation for more information on
+`Datasets <https://cloud.google.com/bigquery/docs/datasets>`_.
+
+
+Dataset operations
+~~~~~~~~~~~~~~~~~~
+
+List datasets for the client's project:
+
+.. literalinclude:: snippets.py
+ :start-after: [START client_list_datasets]
+ :end-before: [END client_list_datasets]
+
+Create a new dataset for the client's project:
+
+.. literalinclude:: snippets.py
+ :start-after: [START create_dataset]
+ :end-before: [END create_dataset]
+
+Refresh metadata for a dataset (to pick up changes made by another client):
+
+.. literalinclude:: snippets.py
+ :start-after: [START get_dataset]
+ :end-before: [END get_dataset]
+
+Update a property in a dataset's metadata:
+
+.. literalinclude:: snippets.py
+ :start-after: [START update_dataset_simple]
+ :end-before: [END update_dataset_simple]
+
+Update multiple properties in a dataset's metadata:
+
+.. literalinclude:: snippets.py
+ :start-after: [START update_dataset_multiple_properties]
+ :end-before: [END update_dataset_multiple_properties]
+
+Delete a dataset:
+
+.. literalinclude:: snippets.py
+ :start-after: [START delete_dataset]
+ :end-before: [END delete_dataset]
+
+
+Tables
+------
+
+Tables exist within datasets. See BigQuery documentation for more information
+on `Tables <https://cloud.google.com/bigquery/docs/tables>`_.
+
+Table operations
+~~~~~~~~~~~~~~~~~~
+List tables for the dataset:
+
+.. literalinclude:: snippets.py
+ :start-after: [START list_dataset_tables]
+ :end-before: [END list_dataset_tables]
+
+Create a table:
+
+.. literalinclude:: snippets.py
+ :start-after: [START create_table]
+ :end-before: [END create_table]
+
+Get a table:
+
+.. literalinclude:: snippets.py
+ :start-after: [START get_table]
+ :end-before: [END get_table]
+
+Update a property in a table's metadata:
+
+.. literalinclude:: snippets.py
+ :start-after: [START update_table_simple]
+ :end-before: [END update_table_simple]
+
+Update multiple properties in a table's metadata:
+
+.. literalinclude:: snippets.py
+ :start-after: [START update_table_multiple_properties]
+ :end-before: [END update_table_multiple_properties]
+
+Get rows from a table's data:
+
+.. literalinclude:: snippets.py
+ :start-after: [START table_list_rows]
+ :end-before: [END table_list_rows]
+
+Utilize iterator properties returned with row data:
+
+.. literalinclude:: snippets.py
+ :start-after: [START table_list_rows_iterator_properties]
+ :end-before: [END table_list_rows_iterator_properties]
+
+Insert rows into a table's data:
+
+.. literalinclude:: snippets.py
+ :start-after: [START table_create_rows]
+ :end-before: [END table_create_rows]
+
+Upload table data from a file:
+
+.. literalinclude:: snippets.py
+ :start-after: [START load_table_from_file]
+ :end-before: [END load_table_from_file]
+
+Load table data from Google Cloud Storage:
+
+.. literalinclude:: snippets.py
+ :start-after: [START load_table_from_uri]
+ :end-before: [END load_table_from_uri]
+
+Copy a table:
+
+.. literalinclude:: snippets.py
+ :start-after: [START copy_table]
+ :end-before: [END copy_table]
+
+Extract a table to Google Cloud Storage:
+
+.. literalinclude:: snippets.py
+ :start-after: [START extract_table]
+ :end-before: [END extract_table]
+
+Delete a table:
+
+.. literalinclude:: snippets.py
+ :start-after: [START delete_table]
+ :end-before: [END delete_table]
+
+
+Queries
+-------
+
+Querying data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. literalinclude:: snippets.py
+ :start-after: [START client_query]
+ :end-before: [END client_query]
+
+.. note::
+
+ - Use of the ``timeout`` parameter is optional. The query will continue to
+ run in the background even if it takes longer the timeout allowed.
+
+
+Run a query using a named query parameter
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+See BigQuery documentation for more information on
+`parameterized queries <https://cloud.google.com/bigquery/docs/parameterized-queries>`_.
+
+.. literalinclude:: snippets.py
+ :start-after: [START client_query_w_param]
+ :end-before: [END client_query_w_param]
+
+
+Querying Table Rows
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Run a query and wait for it to finish:
+
+.. literalinclude:: snippets.py
+ :start-after: [START client_query_rows]
+ :end-before: [END client_query_rows]
+
+.. note::
+
+ - Use of the ``timeout`` parameter is optional. The query will continue to
+ run in the background even if it takes longer the timeout allowed. The job
+ may be retrieved using the job ID via
+ :meth:`~google.cloud.bigquery.client.Client.get_job`
+
+
+List jobs for a project
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Jobs describe actions performed on data in BigQuery tables:
+
+- Load data into a table
+- Run a query against data in one or more tables
+- Extract data from a table
+- Copy a table
+
+.. literalinclude:: snippets.py
+ :start-after: [START client_list_jobs]
+ :end-before: [END client_list_jobs]